src.serena.utilities.weighted_structures

  1from dataclasses import dataclass
  2from typing import List
  3import collections
  4
  5from serena.utilities.ensemble_structures import Sara2StructureList, Sara2SecondaryStructure
  6from serena.utilities.ensemble_groups import MultipleEnsembleGroups, SingleEnsembleGroup
  7
  8@dataclass
  9class WeightedEnsembleResult():
 10    """
 11    Class that holds the resuls from weighted structurs as sara2SecondaryStructures
 12    """
 13    structs: List[Sara2SecondaryStructure]
 14
 15@dataclass
 16class WeightedNucCounts():
 17    """
 18    Class for the weighted structure counts
 19    """
 20    num_bound:float = -1
 21    num_unbound:float = -1
 22    num_both:float = -1
 23    num_dot:float = -1
 24    num_nucs:int = -1
 25
 26@dataclass
 27class WeightedComparisonResult():
 28    """
 29    Holds the results from weighting the structures
 30    """
 31    comp_struct: str = ''
 32    unbound_mfe_struct:Sara2SecondaryStructure = Sara2SecondaryStructure()
 33    bound_mfe_struct: Sara2SecondaryStructure = Sara2SecondaryStructure()
 34    weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts()
 35
 36class WeightedStructure():
 37
 38    def __init__(self) -> None:
 39        pass
 40
 41    def make_weighted_struct(self, structure_list: Sara2StructureList)->Sara2SecondaryStructure:
 42        is_bond_value: int = 2
 43        not_bond_value: int = -1
 44
 45        nuc_poistion_values: List[int] = []
 46        nuc_pairs_comp_list: List[List[str]] = []
 47        good_nucs_each_pos: List[bool] = []
 48
 49        struct_count: int = structure_list.num_structures
 50
 51        for nucIndex in range(structure_list.nuc_count):
 52            nuc_poistion_values.append(0)
 53            pairs_list: List[str] = []            
 54            nuc_pairs_comp_list.append(pairs_list)
 55            #good_nucs_each_pos.append(False)
 56
 57        for struct in structure_list.sara_stuctures:
 58            for nucIndex in range(structure_list.nuc_count):
 59                nuc_bond_type:str = struct.structure[nucIndex]
 60                nuc_pairs_comp_list[nucIndex].append(nuc_bond_type)
 61                adder: int = 0
 62                if nuc_bond_type == '.':
 63                    adder = not_bond_value
 64                else:
 65                    adder = is_bond_value
 66                nuc_poistion_values[nucIndex] = nuc_poistion_values[nucIndex] + adder
 67
 68        #now record if the nuc position has a weghted bond
 69        for nucIndex in range(structure_list.nuc_count):
 70            is_weighted_bond=False
 71            if nuc_poistion_values[nucIndex] > struct_count:
 72                is_weighted_bond = True
 73            good_nucs_each_pos.append(is_weighted_bond)
 74
 75        weighted_structure:str = ''
 76        for nucIndex in range(structure_list.nuc_count):
 77            is_bonded = good_nucs_each_pos[nucIndex]
 78            new_counter: collections.Counter = collections.Counter(nuc_pairs_comp_list[nucIndex])
 79            most_common_char: str= '.'
 80            if is_bonded is True:
 81                #most_common_char = '|'
 82                new_char:str = new_counter.most_common(2)[0][0]
 83                length = len(new_counter.most_common(2))
 84                if new_char == '.' and length > 1:
 85                    #then get second most common
 86                    new_char = new_counter.most_common(2)[1][0]
 87                most_common_char = new_char
 88            weighted_structure = weighted_structure + most_common_char
 89
 90        weighted_structure: Sara2SecondaryStructure = Sara2SecondaryStructure(sequence=structure_list.sara_stuctures[0].sequence,
 91                                                                                structure=weighted_structure,)
 92
 93        return weighted_structure
 94
 95    def compair_weighted_structure(self, unbound_mfe_struct:Sara2SecondaryStructure, bound_mfe_struct:Sara2SecondaryStructure, weighted_result:Sara2SecondaryStructure, nuc_count:int):
 96        """
 97        Compaire the weighted structure against the folded and not-folded mfe's.
 98        If a element is present in the folded mfe then it gets a '-'
 99        if element is in unbound only then it gets a '|'.
100        The idea is that if you have a straight line in the list then it is very close to the
101        folded mfe and if it is not straight then it is more like the unbound mfe.
102        """
103        unbound:str = '|'
104        num_unbound:int = 0
105        bound:str = '-'
106        num_bound:int = 0
107        both:str = '+'
108        num_both:int = 0
109        dot:str = '.'
110        num_dot:int = 0
111        compared_struct:str = ''            
112
113        for nuc_index in range(nuc_count):
114            weighted_nuc:str = weighted_result.structure[nuc_index]
115            unbound_nuc:str = unbound_mfe_struct.structure[nuc_index]
116            bound_nuc: str = bound_mfe_struct.structure[nuc_index]
117
118            comp_nuc_symbol:str = ''
119
120            if weighted_nuc == bound_nuc and weighted_nuc != unbound_nuc:
121                comp_nuc_symbol = bound
122                num_bound += 1
123            elif weighted_nuc != bound_nuc and weighted_nuc == unbound_nuc:
124                comp_nuc_symbol = unbound
125                num_unbound += 1
126            elif weighted_nuc == bound_nuc and weighted_nuc == unbound_nuc:
127                comp_nuc_symbol = both
128                num_both += 1
129            else:
130                comp_nuc_symbol = dot
131                num_dot += 1
132
133            weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts(num_unbound=num_unbound,
134                                                                        num_bound=num_bound,
135                                                                        num_both=num_both,
136                                                                        num_dot=num_dot,
137                                                                        num_nucs=nuc_count
138                                                                        )
139            compared_struct = compared_struct + comp_nuc_symbol
140
141            weighted_nuc_counts.num_nucs = nuc_count
142
143        compared_data: WeightedComparisonResult = WeightedComparisonResult(comp_struct=compared_struct,
144                                                                           unbound_mfe_struct=unbound_mfe_struct,
145                                                                           bound_mfe_struct=bound_mfe_struct,
146                                                                           weighted_nuc_counts=weighted_nuc_counts)    
147        return compared_data
@dataclass
class WeightedEnsembleResult:
10@dataclass
11class WeightedEnsembleResult():
12    """
13    Class that holds the resuls from weighted structurs as sara2SecondaryStructures
14    """
15    structs: List[Sara2SecondaryStructure]

Class that holds the resuls from weighted structurs as sara2SecondaryStructures

WeightedEnsembleResult( structs: List[serena.utilities.ensemble_structures.Sara2SecondaryStructure])
structs: List[serena.utilities.ensemble_structures.Sara2SecondaryStructure]
@dataclass
class WeightedNucCounts:
17@dataclass
18class WeightedNucCounts():
19    """
20    Class for the weighted structure counts
21    """
22    num_bound:float = -1
23    num_unbound:float = -1
24    num_both:float = -1
25    num_dot:float = -1
26    num_nucs:int = -1

Class for the weighted structure counts

WeightedNucCounts( num_bound: float = -1, num_unbound: float = -1, num_both: float = -1, num_dot: float = -1, num_nucs: int = -1)
num_bound: float = -1
num_unbound: float = -1
num_both: float = -1
num_dot: float = -1
num_nucs: int = -1
@dataclass
class WeightedComparisonResult:
28@dataclass
29class WeightedComparisonResult():
30    """
31    Holds the results from weighting the structures
32    """
33    comp_struct: str = ''
34    unbound_mfe_struct:Sara2SecondaryStructure = Sara2SecondaryStructure()
35    bound_mfe_struct: Sara2SecondaryStructure = Sara2SecondaryStructure()
36    weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts()

Holds the results from weighting the structures

WeightedComparisonResult( comp_struct: str = '', unbound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure = <serena.utilities.ensemble_structures.Sara2SecondaryStructure object>, bound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure = <serena.utilities.ensemble_structures.Sara2SecondaryStructure object>, weighted_nuc_counts: src.serena.utilities.weighted_structures.WeightedNucCounts = WeightedNucCounts(num_bound=-1, num_unbound=-1, num_both=-1, num_dot=-1, num_nucs=-1))
comp_struct: str = ''
unbound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure = <serena.utilities.ensemble_structures.Sara2SecondaryStructure object>
bound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure = <serena.utilities.ensemble_structures.Sara2SecondaryStructure object>
weighted_nuc_counts: src.serena.utilities.weighted_structures.WeightedNucCounts = WeightedNucCounts(num_bound=-1, num_unbound=-1, num_both=-1, num_dot=-1, num_nucs=-1)
class WeightedStructure:
 38class WeightedStructure():
 39
 40    def __init__(self) -> None:
 41        pass
 42
 43    def make_weighted_struct(self, structure_list: Sara2StructureList)->Sara2SecondaryStructure:
 44        is_bond_value: int = 2
 45        not_bond_value: int = -1
 46
 47        nuc_poistion_values: List[int] = []
 48        nuc_pairs_comp_list: List[List[str]] = []
 49        good_nucs_each_pos: List[bool] = []
 50
 51        struct_count: int = structure_list.num_structures
 52
 53        for nucIndex in range(structure_list.nuc_count):
 54            nuc_poistion_values.append(0)
 55            pairs_list: List[str] = []            
 56            nuc_pairs_comp_list.append(pairs_list)
 57            #good_nucs_each_pos.append(False)
 58
 59        for struct in structure_list.sara_stuctures:
 60            for nucIndex in range(structure_list.nuc_count):
 61                nuc_bond_type:str = struct.structure[nucIndex]
 62                nuc_pairs_comp_list[nucIndex].append(nuc_bond_type)
 63                adder: int = 0
 64                if nuc_bond_type == '.':
 65                    adder = not_bond_value
 66                else:
 67                    adder = is_bond_value
 68                nuc_poistion_values[nucIndex] = nuc_poistion_values[nucIndex] + adder
 69
 70        #now record if the nuc position has a weghted bond
 71        for nucIndex in range(structure_list.nuc_count):
 72            is_weighted_bond=False
 73            if nuc_poistion_values[nucIndex] > struct_count:
 74                is_weighted_bond = True
 75            good_nucs_each_pos.append(is_weighted_bond)
 76
 77        weighted_structure:str = ''
 78        for nucIndex in range(structure_list.nuc_count):
 79            is_bonded = good_nucs_each_pos[nucIndex]
 80            new_counter: collections.Counter = collections.Counter(nuc_pairs_comp_list[nucIndex])
 81            most_common_char: str= '.'
 82            if is_bonded is True:
 83                #most_common_char = '|'
 84                new_char:str = new_counter.most_common(2)[0][0]
 85                length = len(new_counter.most_common(2))
 86                if new_char == '.' and length > 1:
 87                    #then get second most common
 88                    new_char = new_counter.most_common(2)[1][0]
 89                most_common_char = new_char
 90            weighted_structure = weighted_structure + most_common_char
 91
 92        weighted_structure: Sara2SecondaryStructure = Sara2SecondaryStructure(sequence=structure_list.sara_stuctures[0].sequence,
 93                                                                                structure=weighted_structure,)
 94
 95        return weighted_structure
 96
 97    def compair_weighted_structure(self, unbound_mfe_struct:Sara2SecondaryStructure, bound_mfe_struct:Sara2SecondaryStructure, weighted_result:Sara2SecondaryStructure, nuc_count:int):
 98        """
 99        Compaire the weighted structure against the folded and not-folded mfe's.
100        If a element is present in the folded mfe then it gets a '-'
101        if element is in unbound only then it gets a '|'.
102        The idea is that if you have a straight line in the list then it is very close to the
103        folded mfe and if it is not straight then it is more like the unbound mfe.
104        """
105        unbound:str = '|'
106        num_unbound:int = 0
107        bound:str = '-'
108        num_bound:int = 0
109        both:str = '+'
110        num_both:int = 0
111        dot:str = '.'
112        num_dot:int = 0
113        compared_struct:str = ''            
114
115        for nuc_index in range(nuc_count):
116            weighted_nuc:str = weighted_result.structure[nuc_index]
117            unbound_nuc:str = unbound_mfe_struct.structure[nuc_index]
118            bound_nuc: str = bound_mfe_struct.structure[nuc_index]
119
120            comp_nuc_symbol:str = ''
121
122            if weighted_nuc == bound_nuc and weighted_nuc != unbound_nuc:
123                comp_nuc_symbol = bound
124                num_bound += 1
125            elif weighted_nuc != bound_nuc and weighted_nuc == unbound_nuc:
126                comp_nuc_symbol = unbound
127                num_unbound += 1
128            elif weighted_nuc == bound_nuc and weighted_nuc == unbound_nuc:
129                comp_nuc_symbol = both
130                num_both += 1
131            else:
132                comp_nuc_symbol = dot
133                num_dot += 1
134
135            weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts(num_unbound=num_unbound,
136                                                                        num_bound=num_bound,
137                                                                        num_both=num_both,
138                                                                        num_dot=num_dot,
139                                                                        num_nucs=nuc_count
140                                                                        )
141            compared_struct = compared_struct + comp_nuc_symbol
142
143            weighted_nuc_counts.num_nucs = nuc_count
144
145        compared_data: WeightedComparisonResult = WeightedComparisonResult(comp_struct=compared_struct,
146                                                                           unbound_mfe_struct=unbound_mfe_struct,
147                                                                           bound_mfe_struct=bound_mfe_struct,
148                                                                           weighted_nuc_counts=weighted_nuc_counts)    
149        return compared_data
def make_weighted_struct( self, structure_list: serena.utilities.ensemble_structures.Sara2StructureList) -> serena.utilities.ensemble_structures.Sara2SecondaryStructure:
43    def make_weighted_struct(self, structure_list: Sara2StructureList)->Sara2SecondaryStructure:
44        is_bond_value: int = 2
45        not_bond_value: int = -1
46
47        nuc_poistion_values: List[int] = []
48        nuc_pairs_comp_list: List[List[str]] = []
49        good_nucs_each_pos: List[bool] = []
50
51        struct_count: int = structure_list.num_structures
52
53        for nucIndex in range(structure_list.nuc_count):
54            nuc_poistion_values.append(0)
55            pairs_list: List[str] = []            
56            nuc_pairs_comp_list.append(pairs_list)
57            #good_nucs_each_pos.append(False)
58
59        for struct in structure_list.sara_stuctures:
60            for nucIndex in range(structure_list.nuc_count):
61                nuc_bond_type:str = struct.structure[nucIndex]
62                nuc_pairs_comp_list[nucIndex].append(nuc_bond_type)
63                adder: int = 0
64                if nuc_bond_type == '.':
65                    adder = not_bond_value
66                else:
67                    adder = is_bond_value
68                nuc_poistion_values[nucIndex] = nuc_poistion_values[nucIndex] + adder
69
70        #now record if the nuc position has a weghted bond
71        for nucIndex in range(structure_list.nuc_count):
72            is_weighted_bond=False
73            if nuc_poistion_values[nucIndex] > struct_count:
74                is_weighted_bond = True
75            good_nucs_each_pos.append(is_weighted_bond)
76
77        weighted_structure:str = ''
78        for nucIndex in range(structure_list.nuc_count):
79            is_bonded = good_nucs_each_pos[nucIndex]
80            new_counter: collections.Counter = collections.Counter(nuc_pairs_comp_list[nucIndex])
81            most_common_char: str= '.'
82            if is_bonded is True:
83                #most_common_char = '|'
84                new_char:str = new_counter.most_common(2)[0][0]
85                length = len(new_counter.most_common(2))
86                if new_char == '.' and length > 1:
87                    #then get second most common
88                    new_char = new_counter.most_common(2)[1][0]
89                most_common_char = new_char
90            weighted_structure = weighted_structure + most_common_char
91
92        weighted_structure: Sara2SecondaryStructure = Sara2SecondaryStructure(sequence=structure_list.sara_stuctures[0].sequence,
93                                                                                structure=weighted_structure,)
94
95        return weighted_structure
def compair_weighted_structure( self, unbound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure, bound_mfe_struct: serena.utilities.ensemble_structures.Sara2SecondaryStructure, weighted_result: serena.utilities.ensemble_structures.Sara2SecondaryStructure, nuc_count: int):
 97    def compair_weighted_structure(self, unbound_mfe_struct:Sara2SecondaryStructure, bound_mfe_struct:Sara2SecondaryStructure, weighted_result:Sara2SecondaryStructure, nuc_count:int):
 98        """
 99        Compaire the weighted structure against the folded and not-folded mfe's.
100        If a element is present in the folded mfe then it gets a '-'
101        if element is in unbound only then it gets a '|'.
102        The idea is that if you have a straight line in the list then it is very close to the
103        folded mfe and if it is not straight then it is more like the unbound mfe.
104        """
105        unbound:str = '|'
106        num_unbound:int = 0
107        bound:str = '-'
108        num_bound:int = 0
109        both:str = '+'
110        num_both:int = 0
111        dot:str = '.'
112        num_dot:int = 0
113        compared_struct:str = ''            
114
115        for nuc_index in range(nuc_count):
116            weighted_nuc:str = weighted_result.structure[nuc_index]
117            unbound_nuc:str = unbound_mfe_struct.structure[nuc_index]
118            bound_nuc: str = bound_mfe_struct.structure[nuc_index]
119
120            comp_nuc_symbol:str = ''
121
122            if weighted_nuc == bound_nuc and weighted_nuc != unbound_nuc:
123                comp_nuc_symbol = bound
124                num_bound += 1
125            elif weighted_nuc != bound_nuc and weighted_nuc == unbound_nuc:
126                comp_nuc_symbol = unbound
127                num_unbound += 1
128            elif weighted_nuc == bound_nuc and weighted_nuc == unbound_nuc:
129                comp_nuc_symbol = both
130                num_both += 1
131            else:
132                comp_nuc_symbol = dot
133                num_dot += 1
134
135            weighted_nuc_counts:WeightedNucCounts = WeightedNucCounts(num_unbound=num_unbound,
136                                                                        num_bound=num_bound,
137                                                                        num_both=num_both,
138                                                                        num_dot=num_dot,
139                                                                        num_nucs=nuc_count
140                                                                        )
141            compared_struct = compared_struct + comp_nuc_symbol
142
143            weighted_nuc_counts.num_nucs = nuc_count
144
145        compared_data: WeightedComparisonResult = WeightedComparisonResult(comp_struct=compared_struct,
146                                                                           unbound_mfe_struct=unbound_mfe_struct,
147                                                                           bound_mfe_struct=bound_mfe_struct,
148                                                                           weighted_nuc_counts=weighted_nuc_counts)    
149        return compared_data

Compaire the weighted structure against the folded and not-folded mfe's. If a element is present in the folded mfe then it gets a '-' if element is in unbound only then it gets a '|'. The idea is that if you have a straight line in the list then it is very close to the folded mfe and if it is not straight then it is more like the unbound mfe.