src.serena.interfaces.nupack4_0_28_wsl2_interface

File that provides an interface to standard python nupack4 install on linux Intention is to be able to access this via a docker that will be accessable on docker hub that has nupack4 setup and ready for this project to consume

  1"""
  2File that provides an interface to standard python nupack4 install on linux
  3Intention is to be able to access this via a docker that will be accessable on docker hub
  4that has nupack4 setup and ready for this project to consume
  5"""
  6from typing import List, Dict
  7from dataclasses import dataclass
  8from datetime import datetime
  9
 10from enum import Enum
 11from nupack import *
 12
 13from serena.utilities.ensemble_structures import  Sara2SecondaryStructure, Sara2StructureList, MakeSecondaryStructures
 14from serena.utilities.ensemble_groups import SingleEnsembleGroup, MultipleEnsembleGroups, MakeEnsembleGroups, EnsembleSwitchStateMFEStructs
 15
 16
 17class MaterialParameter(Enum):
 18    """
 19    Enumerator for Nupack material properties
 20    """
 21    NONE = 0
 22    #"Based on [Mathews99] and [Lu06] with additional parameters [Xia98,Zuker03] including coaxial stacking [Mathews99,Turner10] and dangle stacking [Serra95,Zuker03,Turner10] in 1M Na+."
 23    rna06_nupack4 = 1
 24    #"Based on [Serra95] with additional parameters [Zuker03] including coaxial stacking [Mathews99,Turner10] and dangle stacking [Serra95,Zuker03,Turner10] in 1M Na+."
 25    rna95_nupack4 = 2    
 26    #"Parameters from [Mathews99] with terminal mismatch free energies in exterior loops and multiloops replaced by two dangle stacking free energies. Parameters are provided only for 37 ∘C."
 27    rna99_nupack3 = 3    
 28    #"Same as rna95 except that terminal mismatch free energies in exterior loops and multiloops are replaced by two dangle stacking free energies."
 29    rna95_nupack3 = 4
 30
 31@dataclass
 32class NupackSettings():
 33    """
 34    Class for passing the setting needed for nupack to run
 35    """  
 36    material_param:MaterialParameter = MaterialParameter.NONE
 37    temp_C: int = 0
 38    kcal_span_from_mfe:int = 0
 39    Kcal_unit_increments: float = 0
 40    sequence:str = ''
 41
 42
 43class NUPACK4Interface():
 44    """
 45    Class for nupack4 interface for sara2 logic intended for serena package
 46    """
 47
 48    def __init__(self) -> None:
 49        pass
 50
 51    def select_material_parameters(self, parameters:MaterialParameter):
 52        """
 53        Function that selects the correct parameters to pass to nupack
 54        based on the enum passed to it
 55        """
 56        param:str = ''        
 57        if parameters == MaterialParameter.rna06_nupack4:
 58            param = "rna06"
 59        elif parameters == MaterialParameter.rna95_nupack4:
 60            param = "rna95"
 61        elif parameters == MaterialParameter.rna99_nupack3:
 62            param = "rna99-nupack3"
 63        elif parameters == MaterialParameter.rna95_nupack3:
 64            param = "rna95-nupack3"            
 65        return param                                                      
 66
 67    def select_model(self, material_param:MaterialParameter, temp_C:int):
 68        """
 69        Select the model that nupack will use for folding based on material params and temp
 70        """
 71        param: str = self.select_material_parameters(material_param)
 72        my_model = Model(material=param, celsius=temp_C)
 73        return my_model
 74
 75    def get_subopt_energy_gap(self, material_param:MaterialParameter, temp_C:int, sequence_string:str, energy_delta_from_MFE: int, bail_num:int=500000):
 76        """
 77        Function to get the subopt result from nupack as a Sara2StructureList
 78        """
 79        #run through subopt
 80        my_model = self.select_model(material_param, temp_C)
 81        kcal_group_structures_list: Sara2StructureList = Sara2StructureList()
 82        ensemble_kcal_group= subopt(strands=sequence_string, model=my_model, energy_gap=energy_delta_from_MFE)
 83
 84        for i,kcal_group_elementInfo in enumerate(ensemble_kcal_group):
 85
 86            #get all the structures and energis pulled and prepped for proccessing and add them tot eh dict and the list               
 87            structure = str(kcal_group_elementInfo.structure)
 88            freeEnergy = float(kcal_group_elementInfo.energy)
 89            stackEnergy = float(kcal_group_elementInfo.stack_energy)
 90            structure_info: Sara2SecondaryStructure = Sara2SecondaryStructure(sequence=sequence_string, structure=structure, 
 91                                                                              free_energy=freeEnergy, stack_energy=stackEnergy)
 92            kcal_group_structures_list.add_structure(structure_info)
 93            if i > bail_num:
 94                break
 95
 96        return kcal_group_structures_list
 97
 98    def load_nupack_subopt_as_ensemble(self, span_structures:Sara2StructureList, kcal_span_from_mfe:float, Kcal_unit_increments:float, switch_state:EnsembleSwitchStateMFEStructs):
 99        """
100        Function that return a MultipleEnsembleGroup from an ensemble generated by nupack in Sara2StructurseList formate
101        """
102        make_ensemble: MakeEnsembleGroups = MakeEnsembleGroups()
103        make_structs: MakeSecondaryStructures = MakeSecondaryStructures()
104        mfe_energy:float =  span_structures.mfe_free_energy
105
106        #this is for increments of 1 kcal need to do fraction
107        num_groups: int = int(kcal_span_from_mfe / Kcal_unit_increments)
108        remainder: int = kcal_span_from_mfe % Kcal_unit_increments
109
110        groups_list : List[Sara2StructureList] = []
111        groups_index_used: List[bool] = []
112        groups_dict: Dict[int, Sara2StructureList] = {}
113        group_values: List[float] = []
114
115        #this fills up the list of energy deltas to publich EV's for
116        current_energy: float = mfe_energy
117        group_values.append(current_energy)
118        for index in range(num_groups):
119            current_energy = current_energy + Kcal_unit_increments
120            group_values.append(current_energy)
121
122        #now initialize the groups_list
123        for index in range(len(group_values)-1):
124            group: Sara2StructureList = Sara2StructureList()
125            groups_list.append(group)
126            groups_index_used.append(False)
127            groups_dict[index+1] = group
128
129        num_sara_struct: int = span_structures.num_structures
130        for sara_index in range(0,num_sara_struct):
131            sara_structure: Sara2SecondaryStructure = span_structures.sara_stuctures[sara_index]
132            current_energy = sara_structure.free_energy
133
134            #need to do this because there are two indexes need to look at each 
135            #loop and want to avoid triggering a list index overrun
136            for group_index in range(len(group_values)-1):
137                #remember we are dealing with neg kcal so its you want to 
138                min_energy: float = group_values[group_index]
139                max_energy: float = group_values[group_index+1]
140                if current_energy >= min_energy and current_energy < max_energy:
141                    groups_list[group_index].add_structure(sara_structure)
142                    groups_index_used[group_index] = True            
143
144        single_groups: List[SingleEnsembleGroup] = []
145
146        for group_index in range(len(groups_list)):
147            group = groups_list[group_index]
148            start_value = group_values[group_index] - Kcal_unit_increments
149            end_value = group_values[group_index]
150            this_group:SingleEnsembleGroup = make_ensemble.make_singel_ensemble_group(ensemble_structures=group,
151                                                                                      mfe_switch_structures=switch_state,
152                                                                                      kcal_start=start_value,
153                                                                                      kcal_end=end_value)
154            single_groups.append(this_group)
155
156        ensemble_groups: MultipleEnsembleGroups = make_ensemble.make_multiple_ensemple_groups(ensemble_groups=single_groups,
157                                                                                              mfe_switch_structures=switch_state)
158        return ensemble_groups
class MaterialParameter(enum.Enum):
18class MaterialParameter(Enum):
19    """
20    Enumerator for Nupack material properties
21    """
22    NONE = 0
23    #"Based on [Mathews99] and [Lu06] with additional parameters [Xia98,Zuker03] including coaxial stacking [Mathews99,Turner10] and dangle stacking [Serra95,Zuker03,Turner10] in 1M Na+."
24    rna06_nupack4 = 1
25    #"Based on [Serra95] with additional parameters [Zuker03] including coaxial stacking [Mathews99,Turner10] and dangle stacking [Serra95,Zuker03,Turner10] in 1M Na+."
26    rna95_nupack4 = 2    
27    #"Parameters from [Mathews99] with terminal mismatch free energies in exterior loops and multiloops replaced by two dangle stacking free energies. Parameters are provided only for 37 ∘C."
28    rna99_nupack3 = 3    
29    #"Same as rna95 except that terminal mismatch free energies in exterior loops and multiloops are replaced by two dangle stacking free energies."
30    rna95_nupack3 = 4

Enumerator for Nupack material properties

rna06_nupack4 = <MaterialParameter.rna06_nupack4: 1>
rna95_nupack4 = <MaterialParameter.rna95_nupack4: 2>
rna99_nupack3 = <MaterialParameter.rna99_nupack3: 3>
rna95_nupack3 = <MaterialParameter.rna95_nupack3: 4>
Inherited Members
enum.Enum
name
value
@dataclass
class NupackSettings:
32@dataclass
33class NupackSettings():
34    """
35    Class for passing the setting needed for nupack to run
36    """  
37    material_param:MaterialParameter = MaterialParameter.NONE
38    temp_C: int = 0
39    kcal_span_from_mfe:int = 0
40    Kcal_unit_increments: float = 0
41    sequence:str = ''

Class for passing the setting needed for nupack to run

NupackSettings( material_param: src.serena.interfaces.nupack4_0_28_wsl2_interface.MaterialParameter = <MaterialParameter.NONE: 0>, temp_C: int = 0, kcal_span_from_mfe: int = 0, Kcal_unit_increments: float = 0, sequence: str = '')
temp_C: int = 0
kcal_span_from_mfe: int = 0
Kcal_unit_increments: float = 0
sequence: str = ''
class NUPACK4Interface:
 44class NUPACK4Interface():
 45    """
 46    Class for nupack4 interface for sara2 logic intended for serena package
 47    """
 48
 49    def __init__(self) -> None:
 50        pass
 51
 52    def select_material_parameters(self, parameters:MaterialParameter):
 53        """
 54        Function that selects the correct parameters to pass to nupack
 55        based on the enum passed to it
 56        """
 57        param:str = ''        
 58        if parameters == MaterialParameter.rna06_nupack4:
 59            param = "rna06"
 60        elif parameters == MaterialParameter.rna95_nupack4:
 61            param = "rna95"
 62        elif parameters == MaterialParameter.rna99_nupack3:
 63            param = "rna99-nupack3"
 64        elif parameters == MaterialParameter.rna95_nupack3:
 65            param = "rna95-nupack3"            
 66        return param                                                      
 67
 68    def select_model(self, material_param:MaterialParameter, temp_C:int):
 69        """
 70        Select the model that nupack will use for folding based on material params and temp
 71        """
 72        param: str = self.select_material_parameters(material_param)
 73        my_model = Model(material=param, celsius=temp_C)
 74        return my_model
 75
 76    def get_subopt_energy_gap(self, material_param:MaterialParameter, temp_C:int, sequence_string:str, energy_delta_from_MFE: int, bail_num:int=500000):
 77        """
 78        Function to get the subopt result from nupack as a Sara2StructureList
 79        """
 80        #run through subopt
 81        my_model = self.select_model(material_param, temp_C)
 82        kcal_group_structures_list: Sara2StructureList = Sara2StructureList()
 83        ensemble_kcal_group= subopt(strands=sequence_string, model=my_model, energy_gap=energy_delta_from_MFE)
 84
 85        for i,kcal_group_elementInfo in enumerate(ensemble_kcal_group):
 86
 87            #get all the structures and energis pulled and prepped for proccessing and add them tot eh dict and the list               
 88            structure = str(kcal_group_elementInfo.structure)
 89            freeEnergy = float(kcal_group_elementInfo.energy)
 90            stackEnergy = float(kcal_group_elementInfo.stack_energy)
 91            structure_info: Sara2SecondaryStructure = Sara2SecondaryStructure(sequence=sequence_string, structure=structure, 
 92                                                                              free_energy=freeEnergy, stack_energy=stackEnergy)
 93            kcal_group_structures_list.add_structure(structure_info)
 94            if i > bail_num:
 95                break
 96
 97        return kcal_group_structures_list
 98
 99    def load_nupack_subopt_as_ensemble(self, span_structures:Sara2StructureList, kcal_span_from_mfe:float, Kcal_unit_increments:float, switch_state:EnsembleSwitchStateMFEStructs):
100        """
101        Function that return a MultipleEnsembleGroup from an ensemble generated by nupack in Sara2StructurseList formate
102        """
103        make_ensemble: MakeEnsembleGroups = MakeEnsembleGroups()
104        make_structs: MakeSecondaryStructures = MakeSecondaryStructures()
105        mfe_energy:float =  span_structures.mfe_free_energy
106
107        #this is for increments of 1 kcal need to do fraction
108        num_groups: int = int(kcal_span_from_mfe / Kcal_unit_increments)
109        remainder: int = kcal_span_from_mfe % Kcal_unit_increments
110
111        groups_list : List[Sara2StructureList] = []
112        groups_index_used: List[bool] = []
113        groups_dict: Dict[int, Sara2StructureList] = {}
114        group_values: List[float] = []
115
116        #this fills up the list of energy deltas to publich EV's for
117        current_energy: float = mfe_energy
118        group_values.append(current_energy)
119        for index in range(num_groups):
120            current_energy = current_energy + Kcal_unit_increments
121            group_values.append(current_energy)
122
123        #now initialize the groups_list
124        for index in range(len(group_values)-1):
125            group: Sara2StructureList = Sara2StructureList()
126            groups_list.append(group)
127            groups_index_used.append(False)
128            groups_dict[index+1] = group
129
130        num_sara_struct: int = span_structures.num_structures
131        for sara_index in range(0,num_sara_struct):
132            sara_structure: Sara2SecondaryStructure = span_structures.sara_stuctures[sara_index]
133            current_energy = sara_structure.free_energy
134
135            #need to do this because there are two indexes need to look at each 
136            #loop and want to avoid triggering a list index overrun
137            for group_index in range(len(group_values)-1):
138                #remember we are dealing with neg kcal so its you want to 
139                min_energy: float = group_values[group_index]
140                max_energy: float = group_values[group_index+1]
141                if current_energy >= min_energy and current_energy < max_energy:
142                    groups_list[group_index].add_structure(sara_structure)
143                    groups_index_used[group_index] = True            
144
145        single_groups: List[SingleEnsembleGroup] = []
146
147        for group_index in range(len(groups_list)):
148            group = groups_list[group_index]
149            start_value = group_values[group_index] - Kcal_unit_increments
150            end_value = group_values[group_index]
151            this_group:SingleEnsembleGroup = make_ensemble.make_singel_ensemble_group(ensemble_structures=group,
152                                                                                      mfe_switch_structures=switch_state,
153                                                                                      kcal_start=start_value,
154                                                                                      kcal_end=end_value)
155            single_groups.append(this_group)
156
157        ensemble_groups: MultipleEnsembleGroups = make_ensemble.make_multiple_ensemple_groups(ensemble_groups=single_groups,
158                                                                                              mfe_switch_structures=switch_state)
159        return ensemble_groups

Class for nupack4 interface for sara2 logic intended for serena package

def select_material_parameters( self, parameters: src.serena.interfaces.nupack4_0_28_wsl2_interface.MaterialParameter):
52    def select_material_parameters(self, parameters:MaterialParameter):
53        """
54        Function that selects the correct parameters to pass to nupack
55        based on the enum passed to it
56        """
57        param:str = ''        
58        if parameters == MaterialParameter.rna06_nupack4:
59            param = "rna06"
60        elif parameters == MaterialParameter.rna95_nupack4:
61            param = "rna95"
62        elif parameters == MaterialParameter.rna99_nupack3:
63            param = "rna99-nupack3"
64        elif parameters == MaterialParameter.rna95_nupack3:
65            param = "rna95-nupack3"            
66        return param                                                      

Function that selects the correct parameters to pass to nupack based on the enum passed to it

def select_model( self, material_param: src.serena.interfaces.nupack4_0_28_wsl2_interface.MaterialParameter, temp_C: int):
68    def select_model(self, material_param:MaterialParameter, temp_C:int):
69        """
70        Select the model that nupack will use for folding based on material params and temp
71        """
72        param: str = self.select_material_parameters(material_param)
73        my_model = Model(material=param, celsius=temp_C)
74        return my_model

Select the model that nupack will use for folding based on material params and temp

def get_subopt_energy_gap( self, material_param: src.serena.interfaces.nupack4_0_28_wsl2_interface.MaterialParameter, temp_C: int, sequence_string: str, energy_delta_from_MFE: int, bail_num: int = 500000):
76    def get_subopt_energy_gap(self, material_param:MaterialParameter, temp_C:int, sequence_string:str, energy_delta_from_MFE: int, bail_num:int=500000):
77        """
78        Function to get the subopt result from nupack as a Sara2StructureList
79        """
80        #run through subopt
81        my_model = self.select_model(material_param, temp_C)
82        kcal_group_structures_list: Sara2StructureList = Sara2StructureList()
83        ensemble_kcal_group= subopt(strands=sequence_string, model=my_model, energy_gap=energy_delta_from_MFE)
84
85        for i,kcal_group_elementInfo in enumerate(ensemble_kcal_group):
86
87            #get all the structures and energis pulled and prepped for proccessing and add them tot eh dict and the list               
88            structure = str(kcal_group_elementInfo.structure)
89            freeEnergy = float(kcal_group_elementInfo.energy)
90            stackEnergy = float(kcal_group_elementInfo.stack_energy)
91            structure_info: Sara2SecondaryStructure = Sara2SecondaryStructure(sequence=sequence_string, structure=structure, 
92                                                                              free_energy=freeEnergy, stack_energy=stackEnergy)
93            kcal_group_structures_list.add_structure(structure_info)
94            if i > bail_num:
95                break
96
97        return kcal_group_structures_list

Function to get the subopt result from nupack as a Sara2StructureList

def load_nupack_subopt_as_ensemble( self, span_structures: serena.utilities.ensemble_structures.Sara2StructureList, kcal_span_from_mfe: float, Kcal_unit_increments: float, switch_state: serena.utilities.ensemble_groups.EnsembleSwitchStateMFEStructs):
 99    def load_nupack_subopt_as_ensemble(self, span_structures:Sara2StructureList, kcal_span_from_mfe:float, Kcal_unit_increments:float, switch_state:EnsembleSwitchStateMFEStructs):
100        """
101        Function that return a MultipleEnsembleGroup from an ensemble generated by nupack in Sara2StructurseList formate
102        """
103        make_ensemble: MakeEnsembleGroups = MakeEnsembleGroups()
104        make_structs: MakeSecondaryStructures = MakeSecondaryStructures()
105        mfe_energy:float =  span_structures.mfe_free_energy
106
107        #this is for increments of 1 kcal need to do fraction
108        num_groups: int = int(kcal_span_from_mfe / Kcal_unit_increments)
109        remainder: int = kcal_span_from_mfe % Kcal_unit_increments
110
111        groups_list : List[Sara2StructureList] = []
112        groups_index_used: List[bool] = []
113        groups_dict: Dict[int, Sara2StructureList] = {}
114        group_values: List[float] = []
115
116        #this fills up the list of energy deltas to publich EV's for
117        current_energy: float = mfe_energy
118        group_values.append(current_energy)
119        for index in range(num_groups):
120            current_energy = current_energy + Kcal_unit_increments
121            group_values.append(current_energy)
122
123        #now initialize the groups_list
124        for index in range(len(group_values)-1):
125            group: Sara2StructureList = Sara2StructureList()
126            groups_list.append(group)
127            groups_index_used.append(False)
128            groups_dict[index+1] = group
129
130        num_sara_struct: int = span_structures.num_structures
131        for sara_index in range(0,num_sara_struct):
132            sara_structure: Sara2SecondaryStructure = span_structures.sara_stuctures[sara_index]
133            current_energy = sara_structure.free_energy
134
135            #need to do this because there are two indexes need to look at each 
136            #loop and want to avoid triggering a list index overrun
137            for group_index in range(len(group_values)-1):
138                #remember we are dealing with neg kcal so its you want to 
139                min_energy: float = group_values[group_index]
140                max_energy: float = group_values[group_index+1]
141                if current_energy >= min_energy and current_energy < max_energy:
142                    groups_list[group_index].add_structure(sara_structure)
143                    groups_index_used[group_index] = True            
144
145        single_groups: List[SingleEnsembleGroup] = []
146
147        for group_index in range(len(groups_list)):
148            group = groups_list[group_index]
149            start_value = group_values[group_index] - Kcal_unit_increments
150            end_value = group_values[group_index]
151            this_group:SingleEnsembleGroup = make_ensemble.make_singel_ensemble_group(ensemble_structures=group,
152                                                                                      mfe_switch_structures=switch_state,
153                                                                                      kcal_start=start_value,
154                                                                                      kcal_end=end_value)
155            single_groups.append(this_group)
156
157        ensemble_groups: MultipleEnsembleGroups = make_ensemble.make_multiple_ensemple_groups(ensemble_groups=single_groups,
158                                                                                              mfe_switch_structures=switch_state)
159        return ensemble_groups

Function that return a MultipleEnsembleGroup from an ensemble generated by nupack in Sara2StructurseList formate