src.serena.utilities.ensemble_variation

File for the ensemble variation code to live

  1"""
  2File for the ensemble variation code to live
  3"""
  4from typing import List, Dict
  5from dataclasses import dataclass
  6
  7from serena.utilities.ensemble_structures import Sara2SecondaryStructure, Sara2StructureList
  8
  9@dataclass
 10class EV:
 11    """
 12    Returns the different version of enemble variation
 13    """
 14    ev_normalized: float = -1
 15    ev_threshold_norm: float = -1
 16    ev_structure: float = -1
 17
 18@dataclass
 19class EVResult():
 20    """
 21    Class for holding the ev values for ensemble groups 
 22    """
 23    ev_values:List[EV]
 24
 25class EVToken():
 26    """
 27    Class for the token that is used to pass data between thread
 28    and caller to record ev's and comlete flags
 29    """
 30    def __init__(self, num_groups: int) -> None:
 31        self._group_results: List[EV] = num_groups * [EV()]
 32        self._group_dict: Dict[int,EV] = {}
 33        self._group_values: List[str] = num_groups * ['']
 34        self._group_done_status: List[bool] = num_groups * [False]
 35
 36    @property
 37    def group_dict(self)->Dict[int,EV]:
 38        """
 39        Return the group dictionary of ev's for index
 40        will be depreciated soon probably
 41        """
 42        return self._group_dict
 43
 44    def set_group_dict(self, index:int, value:EV):
 45        """
 46        Sets the group dictionary of ev's with index
 47        will be depreciated soon probably
 48        """
 49        self._group_dict[index]=value
 50
 51    @property
 52    def group_results(self)->List[EV]:
 53        """
 54        Return the ev results for the groups as a list of EV's
 55        """
 56        return self._group_results
 57
 58    @property
 59    def ev_results(self) -> EVResult:
 60        """
 61        Return the ev results for the groups as a EVREsult
 62        """
 63        result: EVResult = EVResult(ev_values=self.group_results)
 64        return result
 65
 66    def set_group_result(self, index:int, value:EV):
 67        """
 68        Set the ev group results for the groups
 69        """
 70        self._group_results[index]=value
 71
 72    @property
 73    def group_values(self)->List[str]:
 74        """
 75        Return the values of the energy groups as a list of str
 76        """
 77        return self._group_values
 78
 79    def set_group_values(self, index:int, value:str):
 80        """
 81        Set the values of the energy groups as a list of str by index
 82        """
 83        self._group_values[index]=value
 84
 85    @property
 86    def group_done_status(self)->List[bool]:
 87        """
 88        Return the list of bools that denote if a group is
 89        done with its algorithm processing
 90        """
 91        return self._group_done_status
 92
 93    def set_group_done_status(self, index:int, state:bool):
 94        """
 95        Sets and inded in the list of bools that denote if a group is
 96        done with its algorithm processing true or false
 97        """
 98        self._group_done_status[index]=state
 99
100    @property
101    def is_done(self)->bool:
102        """
103        Returns the overall status of the EV processing of all groups
104        """
105        is_completed:bool = False
106        if self._group_done_status.count(False) == 0:
107            #its done
108            is_completed = True
109        return is_completed
110
111class EVShuttle():
112    """
113    This is the controller so to speak for the EVTokens to talk back and forth
114    bettween the EV threads to pass results and status
115    """
116    def __init__(self, structs_list: Sara2StructureList, mfe:Sara2SecondaryStructure, group_index:int, token:EVToken) -> None:#pylint: disable=line-too-long
117        self._kcal_group_structures_list: Sara2StructureList = structs_list
118        self._sara_mfestructure:Sara2SecondaryStructure = mfe
119        self._group_index:int = group_index
120        self._token:EVToken = token
121
122    @property
123    def kcal_group_structures_list(self)->Sara2StructureList:
124        """
125        Returns the list of structures that is being analyzed 
126        """
127        return self._kcal_group_structures_list
128
129    @kcal_group_structures_list.setter
130    def kcal_group_structures_list(self, new_list: Sara2StructureList):
131        """
132        Sets the list of structures that is being analyzed 
133        """
134        self._kcal_group_structures_list = new_list
135
136    @property
137    def sara_mfestructure(self)->Sara2SecondaryStructure:
138        """
139        Return the secondary structure used as the reference structure
140        """
141        return self._sara_mfestructure
142
143    @sara_mfestructure.setter
144    def sara_mfestructure(self, new_strucr: Sara2SecondaryStructure):
145        """
146        Sets the secondary structure used as the reference structure
147        """
148        self._sara_mfestructure = new_strucr
149
150    @property
151    def group_index(self)->int:
152        """
153        Returns the group index of this shuttle
154        """
155        return self._group_index
156
157    @group_index.setter
158    def group_index(self, new_index: int):
159        """
160        Sets the group index of this shuttle
161        """
162        self._group_index = new_index
163
164    @property
165    def token(self)->EVToken:
166        """
167        Returns the token tha tis feed between the threads
168        """
169        return self._token
170
171    @token.setter
172    def token(self, new_token: EVToken):
173        """
174        Sets the token tha tis feed between the threads
175        """
176        self._token = new_token
177
178class EnsembleVariation():
179    """
180    Ensemble Variation algorithm that gives a estimated
181    stability of the RNA controlling for nucleotide numbers
182    and number of structures in the ensemble analyzed
183    """
184
185    def __init__(self) -> None:
186        pass
187
188    def thread_ev(self, shuttle: EVShuttle):
189        """
190        Access point for using multithreading to get
191        EV quicker 
192        """
193        token:EVToken = shuttle.token
194        group_num:int = shuttle.group_index
195        structs_list:Sara2StructureList = shuttle.kcal_group_structures_list
196        result: EV =  self.ensemble_variation_algorithm(kcal_group_structures_list=structs_list,
197                                                        ref_structure=shuttle.sara_mfestructure )
198        token.group_results[group_num]= result
199        token.group_dict[group_num] = result
200        token.group_done_status[group_num] = True
201
202    def ensemble_variation_algorithm(self, kcal_group_structures_list: Sara2StructureList, ref_structure:Sara2SecondaryStructure)->EV:#pylint: disable=line-too-long, too-many-locals
203        """
204        This is the actual ensemble variation algorithm
205        """
206        total_ev_subscore1:int = 0
207        structure_element_count = kcal_group_structures_list.num_structures
208
209        if structure_element_count != 0:
210            #need to do each char abd then structure
211            #walk through each nucleotide but first prep containers grab what is needed
212
213            #setup constants
214            nuc_count = kcal_group_structures_list.nuc_count
215            structure_element_count = kcal_group_structures_list.num_structures
216
217            #add the step to get nuc array here
218            #get all the data out of it
219
220            #first initialize the lists
221            list_of_nuc_lists: List[List[str]] = []
222
223            num_nucs: int = kcal_group_structures_list.nuc_count
224            for index in range(num_nucs):
225                temp_list:List[str] = []
226                list_of_nuc_lists.append(temp_list)
227
228            #now go throught everything
229            for sara_structure in kcal_group_structures_list.sara_stuctures:
230                for index in range(num_nucs):
231                    character: str = sara_structure.structure[index]
232                    list_of_nuc_lists[index].append(character)
233
234            list_of_nuc_scores_base: List[int] = [0]*nuc_count
235            list_of_nuc_scores_subscores: List[int] = [0]*nuc_count
236            num_structs:int = kcal_group_structures_list.num_structures
237
238            for nuc_index in range(nuc_count):
239                mfe_nuc=ref_structure.structure[nuc_index]
240                num_chars = list_of_nuc_lists[nuc_index].count(mfe_nuc)
241                num_diff:int = num_structs - num_chars
242                list_of_nuc_scores_base[nuc_index] = num_diff
243                list_of_nuc_scores_subscores[nuc_index] = list_of_nuc_scores_base[nuc_index] / structure_element_count#pylint: disable=line-too-long
244
245            total_ev_subscore1 = sum(list_of_nuc_scores_subscores)
246        else:
247            total_ev_subscore1 = -1
248
249        result: EV =  EV(ev_normalized=total_ev_subscore1, ev_threshold_norm=0, ev_structure=0)
250        return result
251    
@dataclass
class EV:
10@dataclass
11class EV:
12    """
13    Returns the different version of enemble variation
14    """
15    ev_normalized: float = -1
16    ev_threshold_norm: float = -1
17    ev_structure: float = -1

Returns the different version of enemble variation

EV( ev_normalized: float = -1, ev_threshold_norm: float = -1, ev_structure: float = -1)
ev_normalized: float = -1
ev_threshold_norm: float = -1
ev_structure: float = -1
@dataclass
class EVResult:
19@dataclass
20class EVResult():
21    """
22    Class for holding the ev values for ensemble groups 
23    """
24    ev_values:List[EV]

Class for holding the ev values for ensemble groups

EVResult(ev_values: List[src.serena.utilities.ensemble_variation.EV])
class EVToken:
 26class EVToken():
 27    """
 28    Class for the token that is used to pass data between thread
 29    and caller to record ev's and comlete flags
 30    """
 31    def __init__(self, num_groups: int) -> None:
 32        self._group_results: List[EV] = num_groups * [EV()]
 33        self._group_dict: Dict[int,EV] = {}
 34        self._group_values: List[str] = num_groups * ['']
 35        self._group_done_status: List[bool] = num_groups * [False]
 36
 37    @property
 38    def group_dict(self)->Dict[int,EV]:
 39        """
 40        Return the group dictionary of ev's for index
 41        will be depreciated soon probably
 42        """
 43        return self._group_dict
 44
 45    def set_group_dict(self, index:int, value:EV):
 46        """
 47        Sets the group dictionary of ev's with index
 48        will be depreciated soon probably
 49        """
 50        self._group_dict[index]=value
 51
 52    @property
 53    def group_results(self)->List[EV]:
 54        """
 55        Return the ev results for the groups as a list of EV's
 56        """
 57        return self._group_results
 58
 59    @property
 60    def ev_results(self) -> EVResult:
 61        """
 62        Return the ev results for the groups as a EVREsult
 63        """
 64        result: EVResult = EVResult(ev_values=self.group_results)
 65        return result
 66
 67    def set_group_result(self, index:int, value:EV):
 68        """
 69        Set the ev group results for the groups
 70        """
 71        self._group_results[index]=value
 72
 73    @property
 74    def group_values(self)->List[str]:
 75        """
 76        Return the values of the energy groups as a list of str
 77        """
 78        return self._group_values
 79
 80    def set_group_values(self, index:int, value:str):
 81        """
 82        Set the values of the energy groups as a list of str by index
 83        """
 84        self._group_values[index]=value
 85
 86    @property
 87    def group_done_status(self)->List[bool]:
 88        """
 89        Return the list of bools that denote if a group is
 90        done with its algorithm processing
 91        """
 92        return self._group_done_status
 93
 94    def set_group_done_status(self, index:int, state:bool):
 95        """
 96        Sets and inded in the list of bools that denote if a group is
 97        done with its algorithm processing true or false
 98        """
 99        self._group_done_status[index]=state
100
101    @property
102    def is_done(self)->bool:
103        """
104        Returns the overall status of the EV processing of all groups
105        """
106        is_completed:bool = False
107        if self._group_done_status.count(False) == 0:
108            #its done
109            is_completed = True
110        return is_completed

Class for the token that is used to pass data between thread and caller to record ev's and comlete flags

EVToken(num_groups: int)
31    def __init__(self, num_groups: int) -> None:
32        self._group_results: List[EV] = num_groups * [EV()]
33        self._group_dict: Dict[int,EV] = {}
34        self._group_values: List[str] = num_groups * ['']
35        self._group_done_status: List[bool] = num_groups * [False]

Return the group dictionary of ev's for index will be depreciated soon probably

def set_group_dict(self, index: int, value: src.serena.utilities.ensemble_variation.EV):
45    def set_group_dict(self, index:int, value:EV):
46        """
47        Sets the group dictionary of ev's with index
48        will be depreciated soon probably
49        """
50        self._group_dict[index]=value

Sets the group dictionary of ev's with index will be depreciated soon probably

Return the ev results for the groups as a list of EV's

Return the ev results for the groups as a EVREsult

def set_group_result(self, index: int, value: src.serena.utilities.ensemble_variation.EV):
67    def set_group_result(self, index:int, value:EV):
68        """
69        Set the ev group results for the groups
70        """
71        self._group_results[index]=value

Set the ev group results for the groups

group_values: List[str]

Return the values of the energy groups as a list of str

def set_group_values(self, index: int, value: str):
80    def set_group_values(self, index:int, value:str):
81        """
82        Set the values of the energy groups as a list of str by index
83        """
84        self._group_values[index]=value

Set the values of the energy groups as a list of str by index

group_done_status: List[bool]

Return the list of bools that denote if a group is done with its algorithm processing

def set_group_done_status(self, index: int, state: bool):
94    def set_group_done_status(self, index:int, state:bool):
95        """
96        Sets and inded in the list of bools that denote if a group is
97        done with its algorithm processing true or false
98        """
99        self._group_done_status[index]=state

Sets and inded in the list of bools that denote if a group is done with its algorithm processing true or false

is_done: bool

Returns the overall status of the EV processing of all groups

class EVShuttle:
112class EVShuttle():
113    """
114    This is the controller so to speak for the EVTokens to talk back and forth
115    bettween the EV threads to pass results and status
116    """
117    def __init__(self, structs_list: Sara2StructureList, mfe:Sara2SecondaryStructure, group_index:int, token:EVToken) -> None:#pylint: disable=line-too-long
118        self._kcal_group_structures_list: Sara2StructureList = structs_list
119        self._sara_mfestructure:Sara2SecondaryStructure = mfe
120        self._group_index:int = group_index
121        self._token:EVToken = token
122
123    @property
124    def kcal_group_structures_list(self)->Sara2StructureList:
125        """
126        Returns the list of structures that is being analyzed 
127        """
128        return self._kcal_group_structures_list
129
130    @kcal_group_structures_list.setter
131    def kcal_group_structures_list(self, new_list: Sara2StructureList):
132        """
133        Sets the list of structures that is being analyzed 
134        """
135        self._kcal_group_structures_list = new_list
136
137    @property
138    def sara_mfestructure(self)->Sara2SecondaryStructure:
139        """
140        Return the secondary structure used as the reference structure
141        """
142        return self._sara_mfestructure
143
144    @sara_mfestructure.setter
145    def sara_mfestructure(self, new_strucr: Sara2SecondaryStructure):
146        """
147        Sets the secondary structure used as the reference structure
148        """
149        self._sara_mfestructure = new_strucr
150
151    @property
152    def group_index(self)->int:
153        """
154        Returns the group index of this shuttle
155        """
156        return self._group_index
157
158    @group_index.setter
159    def group_index(self, new_index: int):
160        """
161        Sets the group index of this shuttle
162        """
163        self._group_index = new_index
164
165    @property
166    def token(self)->EVToken:
167        """
168        Returns the token tha tis feed between the threads
169        """
170        return self._token
171
172    @token.setter
173    def token(self, new_token: EVToken):
174        """
175        Sets the token tha tis feed between the threads
176        """
177        self._token = new_token

This is the controller so to speak for the EVTokens to talk back and forth bettween the EV threads to pass results and status

EVShuttle( structs_list: serena.utilities.ensemble_structures.Sara2StructureList, mfe: serena.utilities.ensemble_structures.Sara2SecondaryStructure, group_index: int, token: src.serena.utilities.ensemble_variation.EVToken)
117    def __init__(self, structs_list: Sara2StructureList, mfe:Sara2SecondaryStructure, group_index:int, token:EVToken) -> None:#pylint: disable=line-too-long
118        self._kcal_group_structures_list: Sara2StructureList = structs_list
119        self._sara_mfestructure:Sara2SecondaryStructure = mfe
120        self._group_index:int = group_index
121        self._token:EVToken = token
kcal_group_structures_list: serena.utilities.ensemble_structures.Sara2StructureList

Returns the list of structures that is being analyzed

sara_mfestructure: serena.utilities.ensemble_structures.Sara2SecondaryStructure

Return the secondary structure used as the reference structure

group_index: int

Returns the group index of this shuttle

Returns the token tha tis feed between the threads

class EnsembleVariation:
179class EnsembleVariation():
180    """
181    Ensemble Variation algorithm that gives a estimated
182    stability of the RNA controlling for nucleotide numbers
183    and number of structures in the ensemble analyzed
184    """
185
186    def __init__(self) -> None:
187        pass
188
189    def thread_ev(self, shuttle: EVShuttle):
190        """
191        Access point for using multithreading to get
192        EV quicker 
193        """
194        token:EVToken = shuttle.token
195        group_num:int = shuttle.group_index
196        structs_list:Sara2StructureList = shuttle.kcal_group_structures_list
197        result: EV =  self.ensemble_variation_algorithm(kcal_group_structures_list=structs_list,
198                                                        ref_structure=shuttle.sara_mfestructure )
199        token.group_results[group_num]= result
200        token.group_dict[group_num] = result
201        token.group_done_status[group_num] = True
202
203    def ensemble_variation_algorithm(self, kcal_group_structures_list: Sara2StructureList, ref_structure:Sara2SecondaryStructure)->EV:#pylint: disable=line-too-long, too-many-locals
204        """
205        This is the actual ensemble variation algorithm
206        """
207        total_ev_subscore1:int = 0
208        structure_element_count = kcal_group_structures_list.num_structures
209
210        if structure_element_count != 0:
211            #need to do each char abd then structure
212            #walk through each nucleotide but first prep containers grab what is needed
213
214            #setup constants
215            nuc_count = kcal_group_structures_list.nuc_count
216            structure_element_count = kcal_group_structures_list.num_structures
217
218            #add the step to get nuc array here
219            #get all the data out of it
220
221            #first initialize the lists
222            list_of_nuc_lists: List[List[str]] = []
223
224            num_nucs: int = kcal_group_structures_list.nuc_count
225            for index in range(num_nucs):
226                temp_list:List[str] = []
227                list_of_nuc_lists.append(temp_list)
228
229            #now go throught everything
230            for sara_structure in kcal_group_structures_list.sara_stuctures:
231                for index in range(num_nucs):
232                    character: str = sara_structure.structure[index]
233                    list_of_nuc_lists[index].append(character)
234
235            list_of_nuc_scores_base: List[int] = [0]*nuc_count
236            list_of_nuc_scores_subscores: List[int] = [0]*nuc_count
237            num_structs:int = kcal_group_structures_list.num_structures
238
239            for nuc_index in range(nuc_count):
240                mfe_nuc=ref_structure.structure[nuc_index]
241                num_chars = list_of_nuc_lists[nuc_index].count(mfe_nuc)
242                num_diff:int = num_structs - num_chars
243                list_of_nuc_scores_base[nuc_index] = num_diff
244                list_of_nuc_scores_subscores[nuc_index] = list_of_nuc_scores_base[nuc_index] / structure_element_count#pylint: disable=line-too-long
245
246            total_ev_subscore1 = sum(list_of_nuc_scores_subscores)
247        else:
248            total_ev_subscore1 = -1
249
250        result: EV =  EV(ev_normalized=total_ev_subscore1, ev_threshold_norm=0, ev_structure=0)
251        return result

Ensemble Variation algorithm that gives a estimated stability of the RNA controlling for nucleotide numbers and number of structures in the ensemble analyzed

def thread_ev(self, shuttle: src.serena.utilities.ensemble_variation.EVShuttle):
189    def thread_ev(self, shuttle: EVShuttle):
190        """
191        Access point for using multithreading to get
192        EV quicker 
193        """
194        token:EVToken = shuttle.token
195        group_num:int = shuttle.group_index
196        structs_list:Sara2StructureList = shuttle.kcal_group_structures_list
197        result: EV =  self.ensemble_variation_algorithm(kcal_group_structures_list=structs_list,
198                                                        ref_structure=shuttle.sara_mfestructure )
199        token.group_results[group_num]= result
200        token.group_dict[group_num] = result
201        token.group_done_status[group_num] = True

Access point for using multithreading to get EV quicker

def ensemble_variation_algorithm( self, kcal_group_structures_list: serena.utilities.ensemble_structures.Sara2StructureList, ref_structure: serena.utilities.ensemble_structures.Sara2SecondaryStructure) -> src.serena.utilities.ensemble_variation.EV:
203    def ensemble_variation_algorithm(self, kcal_group_structures_list: Sara2StructureList, ref_structure:Sara2SecondaryStructure)->EV:#pylint: disable=line-too-long, too-many-locals
204        """
205        This is the actual ensemble variation algorithm
206        """
207        total_ev_subscore1:int = 0
208        structure_element_count = kcal_group_structures_list.num_structures
209
210        if structure_element_count != 0:
211            #need to do each char abd then structure
212            #walk through each nucleotide but first prep containers grab what is needed
213
214            #setup constants
215            nuc_count = kcal_group_structures_list.nuc_count
216            structure_element_count = kcal_group_structures_list.num_structures
217
218            #add the step to get nuc array here
219            #get all the data out of it
220
221            #first initialize the lists
222            list_of_nuc_lists: List[List[str]] = []
223
224            num_nucs: int = kcal_group_structures_list.nuc_count
225            for index in range(num_nucs):
226                temp_list:List[str] = []
227                list_of_nuc_lists.append(temp_list)
228
229            #now go throught everything
230            for sara_structure in kcal_group_structures_list.sara_stuctures:
231                for index in range(num_nucs):
232                    character: str = sara_structure.structure[index]
233                    list_of_nuc_lists[index].append(character)
234
235            list_of_nuc_scores_base: List[int] = [0]*nuc_count
236            list_of_nuc_scores_subscores: List[int] = [0]*nuc_count
237            num_structs:int = kcal_group_structures_list.num_structures
238
239            for nuc_index in range(nuc_count):
240                mfe_nuc=ref_structure.structure[nuc_index]
241                num_chars = list_of_nuc_lists[nuc_index].count(mfe_nuc)
242                num_diff:int = num_structs - num_chars
243                list_of_nuc_scores_base[nuc_index] = num_diff
244                list_of_nuc_scores_subscores[nuc_index] = list_of_nuc_scores_base[nuc_index] / structure_element_count#pylint: disable=line-too-long
245
246            total_ev_subscore1 = sum(list_of_nuc_scores_subscores)
247        else:
248            total_ev_subscore1 = -1
249
250        result: EV =  EV(ev_normalized=total_ev_subscore1, ev_threshold_norm=0, ev_structure=0)
251        return result

This is the actual ensemble variation algorithm