src.serena.utilities.ensemble_variation
File for the ensemble variation code to live
1""" 2File for the ensemble variation code to live 3""" 4from typing import List, Dict 5from dataclasses import dataclass 6 7from serena.utilities.ensemble_structures import Sara2SecondaryStructure, Sara2StructureList 8 9@dataclass 10class EV: 11 """ 12 Returns the different version of enemble variation 13 """ 14 ev_normalized: float = -1 15 ev_threshold_norm: float = -1 16 ev_structure: float = -1 17 18@dataclass 19class EVResult(): 20 """ 21 Class for holding the ev values for ensemble groups 22 """ 23 ev_values:List[EV] 24 25class EVToken(): 26 """ 27 Class for the token that is used to pass data between thread 28 and caller to record ev's and comlete flags 29 """ 30 def __init__(self, num_groups: int) -> None: 31 self._group_results: List[EV] = num_groups * [EV()] 32 self._group_dict: Dict[int,EV] = {} 33 self._group_values: List[str] = num_groups * [''] 34 self._group_done_status: List[bool] = num_groups * [False] 35 36 @property 37 def group_dict(self)->Dict[int,EV]: 38 """ 39 Return the group dictionary of ev's for index 40 will be depreciated soon probably 41 """ 42 return self._group_dict 43 44 def set_group_dict(self, index:int, value:EV): 45 """ 46 Sets the group dictionary of ev's with index 47 will be depreciated soon probably 48 """ 49 self._group_dict[index]=value 50 51 @property 52 def group_results(self)->List[EV]: 53 """ 54 Return the ev results for the groups as a list of EV's 55 """ 56 return self._group_results 57 58 @property 59 def ev_results(self) -> EVResult: 60 """ 61 Return the ev results for the groups as a EVREsult 62 """ 63 result: EVResult = EVResult(ev_values=self.group_results) 64 return result 65 66 def set_group_result(self, index:int, value:EV): 67 """ 68 Set the ev group results for the groups 69 """ 70 self._group_results[index]=value 71 72 @property 73 def group_values(self)->List[str]: 74 """ 75 Return the values of the energy groups as a list of str 76 """ 77 return self._group_values 78 79 def set_group_values(self, index:int, value:str): 80 """ 81 Set the values of the energy groups as a list of str by index 82 """ 83 self._group_values[index]=value 84 85 @property 86 def group_done_status(self)->List[bool]: 87 """ 88 Return the list of bools that denote if a group is 89 done with its algorithm processing 90 """ 91 return self._group_done_status 92 93 def set_group_done_status(self, index:int, state:bool): 94 """ 95 Sets and inded in the list of bools that denote if a group is 96 done with its algorithm processing true or false 97 """ 98 self._group_done_status[index]=state 99 100 @property 101 def is_done(self)->bool: 102 """ 103 Returns the overall status of the EV processing of all groups 104 """ 105 is_completed:bool = False 106 if self._group_done_status.count(False) == 0: 107 #its done 108 is_completed = True 109 return is_completed 110 111class EVShuttle(): 112 """ 113 This is the controller so to speak for the EVTokens to talk back and forth 114 bettween the EV threads to pass results and status 115 """ 116 def __init__(self, structs_list: Sara2StructureList, mfe:Sara2SecondaryStructure, group_index:int, token:EVToken) -> None:#pylint: disable=line-too-long 117 self._kcal_group_structures_list: Sara2StructureList = structs_list 118 self._sara_mfestructure:Sara2SecondaryStructure = mfe 119 self._group_index:int = group_index 120 self._token:EVToken = token 121 122 @property 123 def kcal_group_structures_list(self)->Sara2StructureList: 124 """ 125 Returns the list of structures that is being analyzed 126 """ 127 return self._kcal_group_structures_list 128 129 @kcal_group_structures_list.setter 130 def kcal_group_structures_list(self, new_list: Sara2StructureList): 131 """ 132 Sets the list of structures that is being analyzed 133 """ 134 self._kcal_group_structures_list = new_list 135 136 @property 137 def sara_mfestructure(self)->Sara2SecondaryStructure: 138 """ 139 Return the secondary structure used as the reference structure 140 """ 141 return self._sara_mfestructure 142 143 @sara_mfestructure.setter 144 def sara_mfestructure(self, new_strucr: Sara2SecondaryStructure): 145 """ 146 Sets the secondary structure used as the reference structure 147 """ 148 self._sara_mfestructure = new_strucr 149 150 @property 151 def group_index(self)->int: 152 """ 153 Returns the group index of this shuttle 154 """ 155 return self._group_index 156 157 @group_index.setter 158 def group_index(self, new_index: int): 159 """ 160 Sets the group index of this shuttle 161 """ 162 self._group_index = new_index 163 164 @property 165 def token(self)->EVToken: 166 """ 167 Returns the token tha tis feed between the threads 168 """ 169 return self._token 170 171 @token.setter 172 def token(self, new_token: EVToken): 173 """ 174 Sets the token tha tis feed between the threads 175 """ 176 self._token = new_token 177 178class EnsembleVariation(): 179 """ 180 Ensemble Variation algorithm that gives a estimated 181 stability of the RNA controlling for nucleotide numbers 182 and number of structures in the ensemble analyzed 183 """ 184 185 def __init__(self) -> None: 186 pass 187 188 def thread_ev(self, shuttle: EVShuttle): 189 """ 190 Access point for using multithreading to get 191 EV quicker 192 """ 193 token:EVToken = shuttle.token 194 group_num:int = shuttle.group_index 195 structs_list:Sara2StructureList = shuttle.kcal_group_structures_list 196 result: EV = self.ensemble_variation_algorithm(kcal_group_structures_list=structs_list, 197 ref_structure=shuttle.sara_mfestructure ) 198 token.group_results[group_num]= result 199 token.group_dict[group_num] = result 200 token.group_done_status[group_num] = True 201 202 def ensemble_variation_algorithm(self, kcal_group_structures_list: Sara2StructureList, ref_structure:Sara2SecondaryStructure)->EV:#pylint: disable=line-too-long, too-many-locals 203 """ 204 This is the actual ensemble variation algorithm 205 """ 206 total_ev_subscore1:int = 0 207 structure_element_count = kcal_group_structures_list.num_structures 208 209 if structure_element_count != 0: 210 #need to do each char abd then structure 211 #walk through each nucleotide but first prep containers grab what is needed 212 213 #setup constants 214 nuc_count = kcal_group_structures_list.nuc_count 215 structure_element_count = kcal_group_structures_list.num_structures 216 217 #add the step to get nuc array here 218 #get all the data out of it 219 220 #first initialize the lists 221 list_of_nuc_lists: List[List[str]] = [] 222 223 num_nucs: int = kcal_group_structures_list.nuc_count 224 for index in range(num_nucs): 225 temp_list:List[str] = [] 226 list_of_nuc_lists.append(temp_list) 227 228 #now go throught everything 229 for sara_structure in kcal_group_structures_list.sara_stuctures: 230 for index in range(num_nucs): 231 character: str = sara_structure.structure[index] 232 list_of_nuc_lists[index].append(character) 233 234 list_of_nuc_scores_base: List[int] = [0]*nuc_count 235 list_of_nuc_scores_subscores: List[int] = [0]*nuc_count 236 num_structs:int = kcal_group_structures_list.num_structures 237 238 for nuc_index in range(nuc_count): 239 mfe_nuc=ref_structure.structure[nuc_index] 240 num_chars = list_of_nuc_lists[nuc_index].count(mfe_nuc) 241 num_diff:int = num_structs - num_chars 242 list_of_nuc_scores_base[nuc_index] = num_diff 243 list_of_nuc_scores_subscores[nuc_index] = list_of_nuc_scores_base[nuc_index] / structure_element_count#pylint: disable=line-too-long 244 245 total_ev_subscore1 = sum(list_of_nuc_scores_subscores) 246 else: 247 total_ev_subscore1 = -1 248 249 result: EV = EV(ev_normalized=total_ev_subscore1, ev_threshold_norm=0, ev_structure=0) 250 return result 251
10@dataclass 11class EV: 12 """ 13 Returns the different version of enemble variation 14 """ 15 ev_normalized: float = -1 16 ev_threshold_norm: float = -1 17 ev_structure: float = -1
Returns the different version of enemble variation
19@dataclass 20class EVResult(): 21 """ 22 Class for holding the ev values for ensemble groups 23 """ 24 ev_values:List[EV]
Class for holding the ev values for ensemble groups
26class EVToken(): 27 """ 28 Class for the token that is used to pass data between thread 29 and caller to record ev's and comlete flags 30 """ 31 def __init__(self, num_groups: int) -> None: 32 self._group_results: List[EV] = num_groups * [EV()] 33 self._group_dict: Dict[int,EV] = {} 34 self._group_values: List[str] = num_groups * [''] 35 self._group_done_status: List[bool] = num_groups * [False] 36 37 @property 38 def group_dict(self)->Dict[int,EV]: 39 """ 40 Return the group dictionary of ev's for index 41 will be depreciated soon probably 42 """ 43 return self._group_dict 44 45 def set_group_dict(self, index:int, value:EV): 46 """ 47 Sets the group dictionary of ev's with index 48 will be depreciated soon probably 49 """ 50 self._group_dict[index]=value 51 52 @property 53 def group_results(self)->List[EV]: 54 """ 55 Return the ev results for the groups as a list of EV's 56 """ 57 return self._group_results 58 59 @property 60 def ev_results(self) -> EVResult: 61 """ 62 Return the ev results for the groups as a EVREsult 63 """ 64 result: EVResult = EVResult(ev_values=self.group_results) 65 return result 66 67 def set_group_result(self, index:int, value:EV): 68 """ 69 Set the ev group results for the groups 70 """ 71 self._group_results[index]=value 72 73 @property 74 def group_values(self)->List[str]: 75 """ 76 Return the values of the energy groups as a list of str 77 """ 78 return self._group_values 79 80 def set_group_values(self, index:int, value:str): 81 """ 82 Set the values of the energy groups as a list of str by index 83 """ 84 self._group_values[index]=value 85 86 @property 87 def group_done_status(self)->List[bool]: 88 """ 89 Return the list of bools that denote if a group is 90 done with its algorithm processing 91 """ 92 return self._group_done_status 93 94 def set_group_done_status(self, index:int, state:bool): 95 """ 96 Sets and inded in the list of bools that denote if a group is 97 done with its algorithm processing true or false 98 """ 99 self._group_done_status[index]=state 100 101 @property 102 def is_done(self)->bool: 103 """ 104 Returns the overall status of the EV processing of all groups 105 """ 106 is_completed:bool = False 107 if self._group_done_status.count(False) == 0: 108 #its done 109 is_completed = True 110 return is_completed
Class for the token that is used to pass data between thread and caller to record ev's and comlete flags
Return the group dictionary of ev's for index will be depreciated soon probably
45 def set_group_dict(self, index:int, value:EV): 46 """ 47 Sets the group dictionary of ev's with index 48 will be depreciated soon probably 49 """ 50 self._group_dict[index]=value
Sets the group dictionary of ev's with index will be depreciated soon probably
Return the ev results for the groups as a list of EV's
Return the ev results for the groups as a EVREsult
67 def set_group_result(self, index:int, value:EV): 68 """ 69 Set the ev group results for the groups 70 """ 71 self._group_results[index]=value
Set the ev group results for the groups
80 def set_group_values(self, index:int, value:str): 81 """ 82 Set the values of the energy groups as a list of str by index 83 """ 84 self._group_values[index]=value
Set the values of the energy groups as a list of str by index
Return the list of bools that denote if a group is done with its algorithm processing
94 def set_group_done_status(self, index:int, state:bool): 95 """ 96 Sets and inded in the list of bools that denote if a group is 97 done with its algorithm processing true or false 98 """ 99 self._group_done_status[index]=state
Sets and inded in the list of bools that denote if a group is done with its algorithm processing true or false
112class EVShuttle(): 113 """ 114 This is the controller so to speak for the EVTokens to talk back and forth 115 bettween the EV threads to pass results and status 116 """ 117 def __init__(self, structs_list: Sara2StructureList, mfe:Sara2SecondaryStructure, group_index:int, token:EVToken) -> None:#pylint: disable=line-too-long 118 self._kcal_group_structures_list: Sara2StructureList = structs_list 119 self._sara_mfestructure:Sara2SecondaryStructure = mfe 120 self._group_index:int = group_index 121 self._token:EVToken = token 122 123 @property 124 def kcal_group_structures_list(self)->Sara2StructureList: 125 """ 126 Returns the list of structures that is being analyzed 127 """ 128 return self._kcal_group_structures_list 129 130 @kcal_group_structures_list.setter 131 def kcal_group_structures_list(self, new_list: Sara2StructureList): 132 """ 133 Sets the list of structures that is being analyzed 134 """ 135 self._kcal_group_structures_list = new_list 136 137 @property 138 def sara_mfestructure(self)->Sara2SecondaryStructure: 139 """ 140 Return the secondary structure used as the reference structure 141 """ 142 return self._sara_mfestructure 143 144 @sara_mfestructure.setter 145 def sara_mfestructure(self, new_strucr: Sara2SecondaryStructure): 146 """ 147 Sets the secondary structure used as the reference structure 148 """ 149 self._sara_mfestructure = new_strucr 150 151 @property 152 def group_index(self)->int: 153 """ 154 Returns the group index of this shuttle 155 """ 156 return self._group_index 157 158 @group_index.setter 159 def group_index(self, new_index: int): 160 """ 161 Sets the group index of this shuttle 162 """ 163 self._group_index = new_index 164 165 @property 166 def token(self)->EVToken: 167 """ 168 Returns the token tha tis feed between the threads 169 """ 170 return self._token 171 172 @token.setter 173 def token(self, new_token: EVToken): 174 """ 175 Sets the token tha tis feed between the threads 176 """ 177 self._token = new_token
This is the controller so to speak for the EVTokens to talk back and forth bettween the EV threads to pass results and status
117 def __init__(self, structs_list: Sara2StructureList, mfe:Sara2SecondaryStructure, group_index:int, token:EVToken) -> None:#pylint: disable=line-too-long 118 self._kcal_group_structures_list: Sara2StructureList = structs_list 119 self._sara_mfestructure:Sara2SecondaryStructure = mfe 120 self._group_index:int = group_index 121 self._token:EVToken = token
Returns the list of structures that is being analyzed
Return the secondary structure used as the reference structure
179class EnsembleVariation(): 180 """ 181 Ensemble Variation algorithm that gives a estimated 182 stability of the RNA controlling for nucleotide numbers 183 and number of structures in the ensemble analyzed 184 """ 185 186 def __init__(self) -> None: 187 pass 188 189 def thread_ev(self, shuttle: EVShuttle): 190 """ 191 Access point for using multithreading to get 192 EV quicker 193 """ 194 token:EVToken = shuttle.token 195 group_num:int = shuttle.group_index 196 structs_list:Sara2StructureList = shuttle.kcal_group_structures_list 197 result: EV = self.ensemble_variation_algorithm(kcal_group_structures_list=structs_list, 198 ref_structure=shuttle.sara_mfestructure ) 199 token.group_results[group_num]= result 200 token.group_dict[group_num] = result 201 token.group_done_status[group_num] = True 202 203 def ensemble_variation_algorithm(self, kcal_group_structures_list: Sara2StructureList, ref_structure:Sara2SecondaryStructure)->EV:#pylint: disable=line-too-long, too-many-locals 204 """ 205 This is the actual ensemble variation algorithm 206 """ 207 total_ev_subscore1:int = 0 208 structure_element_count = kcal_group_structures_list.num_structures 209 210 if structure_element_count != 0: 211 #need to do each char abd then structure 212 #walk through each nucleotide but first prep containers grab what is needed 213 214 #setup constants 215 nuc_count = kcal_group_structures_list.nuc_count 216 structure_element_count = kcal_group_structures_list.num_structures 217 218 #add the step to get nuc array here 219 #get all the data out of it 220 221 #first initialize the lists 222 list_of_nuc_lists: List[List[str]] = [] 223 224 num_nucs: int = kcal_group_structures_list.nuc_count 225 for index in range(num_nucs): 226 temp_list:List[str] = [] 227 list_of_nuc_lists.append(temp_list) 228 229 #now go throught everything 230 for sara_structure in kcal_group_structures_list.sara_stuctures: 231 for index in range(num_nucs): 232 character: str = sara_structure.structure[index] 233 list_of_nuc_lists[index].append(character) 234 235 list_of_nuc_scores_base: List[int] = [0]*nuc_count 236 list_of_nuc_scores_subscores: List[int] = [0]*nuc_count 237 num_structs:int = kcal_group_structures_list.num_structures 238 239 for nuc_index in range(nuc_count): 240 mfe_nuc=ref_structure.structure[nuc_index] 241 num_chars = list_of_nuc_lists[nuc_index].count(mfe_nuc) 242 num_diff:int = num_structs - num_chars 243 list_of_nuc_scores_base[nuc_index] = num_diff 244 list_of_nuc_scores_subscores[nuc_index] = list_of_nuc_scores_base[nuc_index] / structure_element_count#pylint: disable=line-too-long 245 246 total_ev_subscore1 = sum(list_of_nuc_scores_subscores) 247 else: 248 total_ev_subscore1 = -1 249 250 result: EV = EV(ev_normalized=total_ev_subscore1, ev_threshold_norm=0, ev_structure=0) 251 return result
Ensemble Variation algorithm that gives a estimated stability of the RNA controlling for nucleotide numbers and number of structures in the ensemble analyzed
189 def thread_ev(self, shuttle: EVShuttle): 190 """ 191 Access point for using multithreading to get 192 EV quicker 193 """ 194 token:EVToken = shuttle.token 195 group_num:int = shuttle.group_index 196 structs_list:Sara2StructureList = shuttle.kcal_group_structures_list 197 result: EV = self.ensemble_variation_algorithm(kcal_group_structures_list=structs_list, 198 ref_structure=shuttle.sara_mfestructure ) 199 token.group_results[group_num]= result 200 token.group_dict[group_num] = result 201 token.group_done_status[group_num] = True
Access point for using multithreading to get EV quicker
203 def ensemble_variation_algorithm(self, kcal_group_structures_list: Sara2StructureList, ref_structure:Sara2SecondaryStructure)->EV:#pylint: disable=line-too-long, too-many-locals 204 """ 205 This is the actual ensemble variation algorithm 206 """ 207 total_ev_subscore1:int = 0 208 structure_element_count = kcal_group_structures_list.num_structures 209 210 if structure_element_count != 0: 211 #need to do each char abd then structure 212 #walk through each nucleotide but first prep containers grab what is needed 213 214 #setup constants 215 nuc_count = kcal_group_structures_list.nuc_count 216 structure_element_count = kcal_group_structures_list.num_structures 217 218 #add the step to get nuc array here 219 #get all the data out of it 220 221 #first initialize the lists 222 list_of_nuc_lists: List[List[str]] = [] 223 224 num_nucs: int = kcal_group_structures_list.nuc_count 225 for index in range(num_nucs): 226 temp_list:List[str] = [] 227 list_of_nuc_lists.append(temp_list) 228 229 #now go throught everything 230 for sara_structure in kcal_group_structures_list.sara_stuctures: 231 for index in range(num_nucs): 232 character: str = sara_structure.structure[index] 233 list_of_nuc_lists[index].append(character) 234 235 list_of_nuc_scores_base: List[int] = [0]*nuc_count 236 list_of_nuc_scores_subscores: List[int] = [0]*nuc_count 237 num_structs:int = kcal_group_structures_list.num_structures 238 239 for nuc_index in range(nuc_count): 240 mfe_nuc=ref_structure.structure[nuc_index] 241 num_chars = list_of_nuc_lists[nuc_index].count(mfe_nuc) 242 num_diff:int = num_structs - num_chars 243 list_of_nuc_scores_base[nuc_index] = num_diff 244 list_of_nuc_scores_subscores[nuc_index] = list_of_nuc_scores_base[nuc_index] / structure_element_count#pylint: disable=line-too-long 245 246 total_ev_subscore1 = sum(list_of_nuc_scores_subscores) 247 else: 248 total_ev_subscore1 = -1 249 250 result: EV = EV(ev_normalized=total_ev_subscore1, ev_threshold_norm=0, ev_structure=0) 251 return result
This is the actual ensemble variation algorithm