src.serena.utilities.weighted_structures
1from dataclasses import dataclass 2from typing import List 3import collections 4 5from serena.utilities.ensemble_structures import Sara2StructureList, Sara2SecondaryStructure 6from serena.utilities.ensemble_groups import MultipleEnsembleGroups, SingleEnsembleGroup 7 8@dataclass 9class WeightedEnsembleResult(): 10 """ 11 Class that holds the resuls from weighted structurs as sara2SecondaryStructures 12 """ 13 structs: List[Sara2SecondaryStructure] 14 15@dataclass 16class WeightedNucCounts(): 17 """ 18 Class for the weighted structure counts 19 """ 20 num_bound:float = -1 21 num_unbound:float = -1 22 num_both:float = -1 23 num_dot:float = -1 24 num_nucs:int = -1 25 26@dataclass 27class WeightedComparisonResult(): 28 """ 29 Holds the results from weighting the structures 30 """ 31 comp_struct: str = '' 32 unbound_mfe_struct:Sara2SecondaryStructure = Sara2SecondaryStructure() 33 bound_mfe_struct: Sara2SecondaryStructure = Sara2SecondaryStructure() 34 weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts() 35 36class WeightedStructure(): 37 38 def __init__(self) -> None: 39 pass 40 41 def make_weighted_struct(self, structure_list: Sara2StructureList)->Sara2SecondaryStructure: 42 is_bond_value: int = 2 43 not_bond_value: int = -1 44 45 nuc_poistion_values: List[int] = [] 46 nuc_pairs_comp_list: List[List[str]] = [] 47 good_nucs_each_pos: List[bool] = [] 48 49 struct_count: int = structure_list.num_structures 50 51 for nucIndex in range(structure_list.nuc_count): 52 nuc_poistion_values.append(0) 53 pairs_list: List[str] = [] 54 nuc_pairs_comp_list.append(pairs_list) 55 #good_nucs_each_pos.append(False) 56 57 for struct in structure_list.sara_stuctures: 58 for nucIndex in range(structure_list.nuc_count): 59 nuc_bond_type:str = struct.structure[nucIndex] 60 nuc_pairs_comp_list[nucIndex].append(nuc_bond_type) 61 adder: int = 0 62 if nuc_bond_type == '.': 63 adder = not_bond_value 64 else: 65 adder = is_bond_value 66 nuc_poistion_values[nucIndex] = nuc_poistion_values[nucIndex] + adder 67 68 #now record if the nuc position has a weghted bond 69 for nucIndex in range(structure_list.nuc_count): 70 is_weighted_bond=False 71 if nuc_poistion_values[nucIndex] > struct_count: 72 is_weighted_bond = True 73 good_nucs_each_pos.append(is_weighted_bond) 74 75 weighted_structure:str = '' 76 for nucIndex in range(structure_list.nuc_count): 77 is_bonded = good_nucs_each_pos[nucIndex] 78 new_counter: collections.Counter = collections.Counter(nuc_pairs_comp_list[nucIndex]) 79 most_common_char: str= '.' 80 if is_bonded is True: 81 #most_common_char = '|' 82 new_char:str = new_counter.most_common(2)[0][0] 83 length = len(new_counter.most_common(2)) 84 if new_char == '.' and length > 1: 85 #then get second most common 86 new_char = new_counter.most_common(2)[1][0] 87 most_common_char = new_char 88 weighted_structure = weighted_structure + most_common_char 89 90 weighted_structure: Sara2SecondaryStructure = Sara2SecondaryStructure(sequence=structure_list.sara_stuctures[0].sequence, 91 structure=weighted_structure,) 92 93 return weighted_structure 94 95 def compair_weighted_structure(self, unbound_mfe_struct:Sara2SecondaryStructure, bound_mfe_struct:Sara2SecondaryStructure, weighted_result:Sara2SecondaryStructure, nuc_count:int): 96 """ 97 Compaire the weighted structure against the folded and not-folded mfe's. 98 If a element is present in the folded mfe then it gets a '-' 99 if element is in unbound only then it gets a '|'. 100 The idea is that if you have a straight line in the list then it is very close to the 101 folded mfe and if it is not straight then it is more like the unbound mfe. 102 """ 103 unbound:str = '|' 104 num_unbound:int = 0 105 bound:str = '-' 106 num_bound:int = 0 107 both:str = '+' 108 num_both:int = 0 109 dot:str = '.' 110 num_dot:int = 0 111 compared_struct:str = '' 112 113 for nuc_index in range(nuc_count): 114 weighted_nuc:str = weighted_result.structure[nuc_index] 115 unbound_nuc:str = unbound_mfe_struct.structure[nuc_index] 116 bound_nuc: str = bound_mfe_struct.structure[nuc_index] 117 118 comp_nuc_symbol:str = '' 119 120 if weighted_nuc == bound_nuc and weighted_nuc != unbound_nuc: 121 comp_nuc_symbol = bound 122 num_bound += 1 123 elif weighted_nuc != bound_nuc and weighted_nuc == unbound_nuc: 124 comp_nuc_symbol = unbound 125 num_unbound += 1 126 elif weighted_nuc == bound_nuc and weighted_nuc == unbound_nuc: 127 comp_nuc_symbol = both 128 num_both += 1 129 else: 130 comp_nuc_symbol = dot 131 num_dot += 1 132 133 weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts(num_unbound=num_unbound, 134 num_bound=num_bound, 135 num_both=num_both, 136 num_dot=num_dot, 137 num_nucs=nuc_count 138 ) 139 compared_struct = compared_struct + comp_nuc_symbol 140 141 weighted_nuc_counts.num_nucs = nuc_count 142 143 compared_data: WeightedComparisonResult = WeightedComparisonResult(comp_struct=compared_struct, 144 unbound_mfe_struct=unbound_mfe_struct, 145 bound_mfe_struct=bound_mfe_struct, 146 weighted_nuc_counts=weighted_nuc_counts) 147 return compared_data
@dataclass
class
WeightedEnsembleResult:
10@dataclass 11class WeightedEnsembleResult(): 12 """ 13 Class that holds the resuls from weighted structurs as sara2SecondaryStructures 14 """ 15 structs: List[Sara2SecondaryStructure]
Class that holds the resuls from weighted structurs as sara2SecondaryStructures
@dataclass
class
WeightedNucCounts:
17@dataclass 18class WeightedNucCounts(): 19 """ 20 Class for the weighted structure counts 21 """ 22 num_bound:float = -1 23 num_unbound:float = -1 24 num_both:float = -1 25 num_dot:float = -1 26 num_nucs:int = -1
Class for the weighted structure counts
@dataclass
class
WeightedComparisonResult:
28@dataclass 29class WeightedComparisonResult(): 30 """ 31 Holds the results from weighting the structures 32 """ 33 comp_struct: str = '' 34 unbound_mfe_struct:Sara2SecondaryStructure = Sara2SecondaryStructure() 35 bound_mfe_struct: Sara2SecondaryStructure = Sara2SecondaryStructure() 36 weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts()
Holds the results from weighting the structures
WeightedComparisonResult( comp_struct: str = '', unbound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure = <serena.utilities.ensemble_structures.Sara2SecondaryStructure object>, bound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure = <serena.utilities.ensemble_structures.Sara2SecondaryStructure object>, weighted_nuc_counts: src.serena.utilities.weighted_structures.WeightedNucCounts = WeightedNucCounts(num_bound=-1, num_unbound=-1, num_both=-1, num_dot=-1, num_nucs=-1))
unbound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure =
<serena.utilities.ensemble_structures.Sara2SecondaryStructure object>
bound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure =
<serena.utilities.ensemble_structures.Sara2SecondaryStructure object>
weighted_nuc_counts: src.serena.utilities.weighted_structures.WeightedNucCounts =
WeightedNucCounts(num_bound=-1, num_unbound=-1, num_both=-1, num_dot=-1, num_nucs=-1)
class
WeightedStructure:
38class WeightedStructure(): 39 40 def __init__(self) -> None: 41 pass 42 43 def make_weighted_struct(self, structure_list: Sara2StructureList)->Sara2SecondaryStructure: 44 is_bond_value: int = 2 45 not_bond_value: int = -1 46 47 nuc_poistion_values: List[int] = [] 48 nuc_pairs_comp_list: List[List[str]] = [] 49 good_nucs_each_pos: List[bool] = [] 50 51 struct_count: int = structure_list.num_structures 52 53 for nucIndex in range(structure_list.nuc_count): 54 nuc_poistion_values.append(0) 55 pairs_list: List[str] = [] 56 nuc_pairs_comp_list.append(pairs_list) 57 #good_nucs_each_pos.append(False) 58 59 for struct in structure_list.sara_stuctures: 60 for nucIndex in range(structure_list.nuc_count): 61 nuc_bond_type:str = struct.structure[nucIndex] 62 nuc_pairs_comp_list[nucIndex].append(nuc_bond_type) 63 adder: int = 0 64 if nuc_bond_type == '.': 65 adder = not_bond_value 66 else: 67 adder = is_bond_value 68 nuc_poistion_values[nucIndex] = nuc_poistion_values[nucIndex] + adder 69 70 #now record if the nuc position has a weghted bond 71 for nucIndex in range(structure_list.nuc_count): 72 is_weighted_bond=False 73 if nuc_poistion_values[nucIndex] > struct_count: 74 is_weighted_bond = True 75 good_nucs_each_pos.append(is_weighted_bond) 76 77 weighted_structure:str = '' 78 for nucIndex in range(structure_list.nuc_count): 79 is_bonded = good_nucs_each_pos[nucIndex] 80 new_counter: collections.Counter = collections.Counter(nuc_pairs_comp_list[nucIndex]) 81 most_common_char: str= '.' 82 if is_bonded is True: 83 #most_common_char = '|' 84 new_char:str = new_counter.most_common(2)[0][0] 85 length = len(new_counter.most_common(2)) 86 if new_char == '.' and length > 1: 87 #then get second most common 88 new_char = new_counter.most_common(2)[1][0] 89 most_common_char = new_char 90 weighted_structure = weighted_structure + most_common_char 91 92 weighted_structure: Sara2SecondaryStructure = Sara2SecondaryStructure(sequence=structure_list.sara_stuctures[0].sequence, 93 structure=weighted_structure,) 94 95 return weighted_structure 96 97 def compair_weighted_structure(self, unbound_mfe_struct:Sara2SecondaryStructure, bound_mfe_struct:Sara2SecondaryStructure, weighted_result:Sara2SecondaryStructure, nuc_count:int): 98 """ 99 Compaire the weighted structure against the folded and not-folded mfe's. 100 If a element is present in the folded mfe then it gets a '-' 101 if element is in unbound only then it gets a '|'. 102 The idea is that if you have a straight line in the list then it is very close to the 103 folded mfe and if it is not straight then it is more like the unbound mfe. 104 """ 105 unbound:str = '|' 106 num_unbound:int = 0 107 bound:str = '-' 108 num_bound:int = 0 109 both:str = '+' 110 num_both:int = 0 111 dot:str = '.' 112 num_dot:int = 0 113 compared_struct:str = '' 114 115 for nuc_index in range(nuc_count): 116 weighted_nuc:str = weighted_result.structure[nuc_index] 117 unbound_nuc:str = unbound_mfe_struct.structure[nuc_index] 118 bound_nuc: str = bound_mfe_struct.structure[nuc_index] 119 120 comp_nuc_symbol:str = '' 121 122 if weighted_nuc == bound_nuc and weighted_nuc != unbound_nuc: 123 comp_nuc_symbol = bound 124 num_bound += 1 125 elif weighted_nuc != bound_nuc and weighted_nuc == unbound_nuc: 126 comp_nuc_symbol = unbound 127 num_unbound += 1 128 elif weighted_nuc == bound_nuc and weighted_nuc == unbound_nuc: 129 comp_nuc_symbol = both 130 num_both += 1 131 else: 132 comp_nuc_symbol = dot 133 num_dot += 1 134 135 weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts(num_unbound=num_unbound, 136 num_bound=num_bound, 137 num_both=num_both, 138 num_dot=num_dot, 139 num_nucs=nuc_count 140 ) 141 compared_struct = compared_struct + comp_nuc_symbol 142 143 weighted_nuc_counts.num_nucs = nuc_count 144 145 compared_data: WeightedComparisonResult = WeightedComparisonResult(comp_struct=compared_struct, 146 unbound_mfe_struct=unbound_mfe_struct, 147 bound_mfe_struct=bound_mfe_struct, 148 weighted_nuc_counts=weighted_nuc_counts) 149 return compared_data
def
make_weighted_struct( self, structure_list: serena.utilities.ensemble_structures.Sara2StructureList) -> serena.utilities.ensemble_structures.Sara2SecondaryStructure:
43 def make_weighted_struct(self, structure_list: Sara2StructureList)->Sara2SecondaryStructure: 44 is_bond_value: int = 2 45 not_bond_value: int = -1 46 47 nuc_poistion_values: List[int] = [] 48 nuc_pairs_comp_list: List[List[str]] = [] 49 good_nucs_each_pos: List[bool] = [] 50 51 struct_count: int = structure_list.num_structures 52 53 for nucIndex in range(structure_list.nuc_count): 54 nuc_poistion_values.append(0) 55 pairs_list: List[str] = [] 56 nuc_pairs_comp_list.append(pairs_list) 57 #good_nucs_each_pos.append(False) 58 59 for struct in structure_list.sara_stuctures: 60 for nucIndex in range(structure_list.nuc_count): 61 nuc_bond_type:str = struct.structure[nucIndex] 62 nuc_pairs_comp_list[nucIndex].append(nuc_bond_type) 63 adder: int = 0 64 if nuc_bond_type == '.': 65 adder = not_bond_value 66 else: 67 adder = is_bond_value 68 nuc_poistion_values[nucIndex] = nuc_poistion_values[nucIndex] + adder 69 70 #now record if the nuc position has a weghted bond 71 for nucIndex in range(structure_list.nuc_count): 72 is_weighted_bond=False 73 if nuc_poistion_values[nucIndex] > struct_count: 74 is_weighted_bond = True 75 good_nucs_each_pos.append(is_weighted_bond) 76 77 weighted_structure:str = '' 78 for nucIndex in range(structure_list.nuc_count): 79 is_bonded = good_nucs_each_pos[nucIndex] 80 new_counter: collections.Counter = collections.Counter(nuc_pairs_comp_list[nucIndex]) 81 most_common_char: str= '.' 82 if is_bonded is True: 83 #most_common_char = '|' 84 new_char:str = new_counter.most_common(2)[0][0] 85 length = len(new_counter.most_common(2)) 86 if new_char == '.' and length > 1: 87 #then get second most common 88 new_char = new_counter.most_common(2)[1][0] 89 most_common_char = new_char 90 weighted_structure = weighted_structure + most_common_char 91 92 weighted_structure: Sara2SecondaryStructure = Sara2SecondaryStructure(sequence=structure_list.sara_stuctures[0].sequence, 93 structure=weighted_structure,) 94 95 return weighted_structure
def
compair_weighted_structure( self, unbound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure, bound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure, weighted_result: serena.utilities.ensemble_structures.Sara2SecondaryStructure, nuc_count: int):
97 def compair_weighted_structure(self, unbound_mfe_struct:Sara2SecondaryStructure, bound_mfe_struct:Sara2SecondaryStructure, weighted_result:Sara2SecondaryStructure, nuc_count:int): 98 """ 99 Compaire the weighted structure against the folded and not-folded mfe's. 100 If a element is present in the folded mfe then it gets a '-' 101 if element is in unbound only then it gets a '|'. 102 The idea is that if you have a straight line in the list then it is very close to the 103 folded mfe and if it is not straight then it is more like the unbound mfe. 104 """ 105 unbound:str = '|' 106 num_unbound:int = 0 107 bound:str = '-' 108 num_bound:int = 0 109 both:str = '+' 110 num_both:int = 0 111 dot:str = '.' 112 num_dot:int = 0 113 compared_struct:str = '' 114 115 for nuc_index in range(nuc_count): 116 weighted_nuc:str = weighted_result.structure[nuc_index] 117 unbound_nuc:str = unbound_mfe_struct.structure[nuc_index] 118 bound_nuc: str = bound_mfe_struct.structure[nuc_index] 119 120 comp_nuc_symbol:str = '' 121 122 if weighted_nuc == bound_nuc and weighted_nuc != unbound_nuc: 123 comp_nuc_symbol = bound 124 num_bound += 1 125 elif weighted_nuc != bound_nuc and weighted_nuc == unbound_nuc: 126 comp_nuc_symbol = unbound 127 num_unbound += 1 128 elif weighted_nuc == bound_nuc and weighted_nuc == unbound_nuc: 129 comp_nuc_symbol = both 130 num_both += 1 131 else: 132 comp_nuc_symbol = dot 133 num_dot += 1 134 135 weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts(num_unbound=num_unbound, 136 num_bound=num_bound, 137 num_both=num_both, 138 num_dot=num_dot, 139 num_nucs=nuc_count 140 ) 141 compared_struct = compared_struct + comp_nuc_symbol 142 143 weighted_nuc_counts.num_nucs = nuc_count 144 145 compared_data: WeightedComparisonResult = WeightedComparisonResult(comp_struct=compared_struct, 146 unbound_mfe_struct=unbound_mfe_struct, 147 bound_mfe_struct=bound_mfe_struct, 148 weighted_nuc_counts=weighted_nuc_counts) 149 return compared_data
Compaire the weighted structure against the folded and not-folded mfe's. If a element is present in the folded mfe then it gets a '-' if element is in unbound only then it gets a '|'. The idea is that if you have a straight line in the list then it is very close to the folded mfe and if it is not straight then it is more like the unbound mfe.