Skip to content

info

FragmentInfo

FragmentInfo(source: str = utils.path.biological_interfaces, fragment_length: int = 5, sql: bool = False, **kwargs)

Stores all Fragment metadata for a particular FragmentDatabase

Parameters:

  • source (str, default: biological_interfaces ) –

    Which type of information to use

  • fragment_length (int, default: 5 ) –

    What is the length of the fragment database

  • sql (bool, default: False ) –

    Whether the database is stored in SQL table

  • **kwargs
Source code in symdesign/structure/fragment/info.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def __init__(self, source: str = utils.path.biological_interfaces, fragment_length: int = 5, sql: bool = False,
             **kwargs):
    """Construct the instance

    Args:
        source: Which type of information to use
        fragment_length: What is the length of the fragment database
        sql: Whether the database is stored in SQL table
        **kwargs:
    """
    super().__init__()  # object
    self.cluster_info_path = utils.path.intfrag_cluster_info_dirpath
    self.fragment_length = fragment_length
    self.fragment_range = parameterize_frag_length(fragment_length)
    self.info = {}
    self.source = source
    self.statistics = {}
    # {cluster_id: [[mapped, paired, {max_weight_counts}, ...], ..., frequencies: {'A': 0.11, ...}}
    #  ex: {'1_0_0': [[0.540, 0.486, {-2: 67, -1: 326, ...}, {-2: 166, ...}], 2749]

    if sql:
        raise NotImplementedError("Can't connect to SQL database yet")
        self.db = True
    else:  # self.source == 'directory':
        # Todo initialize as local directory
        self.db = False

    self._load_db_statistics()

location property

location: AnyStr | None

Provide the location where fragments are stored

aa_frequencies property

aa_frequencies: dict[protein_letters_alph1, float]

Retrieve database specific amino acid representation frequencies

Returns:

  • dict[protein_letters_alph1, float]

    {'A': 0.11, 'C': 0.03, 'D': 0.53, ...}

retrieve_cluster_info

retrieve_cluster_info(cluster: str = None, source: source_literal = None, index: str = None) -> dict[str, int | float | str | dict[int, dict[protein_letters_literal | str, float | tuple[int, float]]]]

Return information from the fragment information database by cluster_id, information source, and source index

Parameters:

  • cluster (str, default: None ) –

    A cluster_id to get information about

  • source (source_literal, default: None ) –

    The source of information to retrieve. Must be one of 'size', 'rmsd', 'rep', 'mapped', or 'paired'

  • index (str, default: None ) –

    The index to gather information from. Source must be one of 'mapped' or 'paired' to use

Returns: {'size': ..., 'rmsd': ..., 'rep': ..., 'mapped': indexed_frequencies, 'paired': indexed_frequencies} Where indexed_frequencies has format {-2: {'A': 0.1, 'C': 0., ..., 'info': (12, 0.41)}, -1: {}, ..., 2: {}}

Source code in symdesign/structure/fragment/info.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def retrieve_cluster_info(self, cluster: str = None, source: source_literal = None, index: str = None) -> \
        dict[str, int | float | str | dict[int, dict[sequence.protein_letters_literal | str, float | tuple[int, float]]]]:
    """Return information from the fragment information database by cluster_id, information source, and source index

    Args:
        cluster: A cluster_id to get information about
        source: The source of information to retrieve. Must be one of 'size', 'rmsd', 'rep', 'mapped', or 'paired'
        index: The index to gather information from. Source must be one of 'mapped' or 'paired' to use
    Returns:
        {'size': ..., 'rmsd': ..., 'rep': ..., 'mapped': indexed_frequencies, 'paired': indexed_frequencies}
        Where indexed_frequencies has format {-2: {'A': 0.1, 'C': 0., ..., 'info': (12, 0.41)}, -1: {}, ..., 2: {}}
    """
    try:
        cluster_data = self.info[cluster]
    except KeyError:
        self.load_cluster_info(ids=[cluster])
        cluster_data = self.info[cluster]

    if source is None:
        return cluster_data
    else:
        if index is None:  # Must check for None, index can be 0
            return cluster_data[source]
        else:  # source in ['mapped', 'paired']:
            try:
                return cluster_data[source][index]
            except KeyError:
                raise KeyError(f'The source {source} is not available. '
                               f'Try one of {", ".join(get_args(source_literal))}')
            except IndexError:
                raise IndexError(f'The index {index} is outside of the fragment range. '
                                 f'Try one of {", ".join(cluster_data["mapped"].keys())}')
            except TypeError:
                raise TypeError(f'You must provide "mapped" or "paired" if you wish to use an index')

load_cluster_info

load_cluster_info(ids: Sequence[str] = None)

Load cluster information from the fragment database source into attribute .info

Parameters:

  • ids (Sequence[str], default: None ) –

    ['1_2_123', ...]

Sets

self.info (dict[str, dict]): {'1_2_123': {'size': , 'rmsd': , 'rep': , 'mapped': , 'paired': }, ...}

Source code in symdesign/structure/fragment/info.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def load_cluster_info(self, ids: Sequence[str] = None):
    """Load cluster information from the fragment database source into attribute .info

    Args:
        ids: ['1_2_123', ...]

    Sets:
        self.info (dict[str, dict]): {'1_2_123': {'size': , 'rmsd': , 'rep': , 'mapped': , 'paired': }, ...}
    """
    if self.db:
        raise NotImplementedError("Can't connect to MySQL database yet")
    else:
        if ids is None:  # Load all data
            identified_files = [(os.path.splitext(os.path.basename(cluster_file))[0], cluster_file)
                                for cluster_file in utils.get_file_paths_recursively(self.location,
                                                                                     extension='.pkl')]
        else:
            identified_files = \
                [(_id, os.path.join(self.location, c_id1, f'{c_id1}_{c_id2}', _id, f'{_id}.pkl'))
                 for _id, (c_id1, c_id2, c_id3) in zip(ids, map(str.split, ids, repeat('_')))]

        self.info.update({tuple(map(int, cluster_id.split('_'))):
                          ClusterInfo(name=cluster_id, **utils.unpickle(cluster_file))
                          for cluster_id, cluster_file in identified_files})

load_cluster_info_from_text

load_cluster_info_from_text(ids: Sequence[str] = None)

Load cluster information from the fragment database source text files into attribute .info

Parameters:

  • ids (Sequence[str], default: None ) –

    ['1_2_123', ...]

Sets

self.info (dict[str, dict]): {'1_2_123': {'size': , 'rmsd': , 'rep': , 'mapped': , 'paired': }, ...}

Source code in symdesign/structure/fragment/info.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
def load_cluster_info_from_text(self, ids: Sequence[str] = None):
    """Load cluster information from the fragment database source text files into attribute .info

    Args:
        ids: ['1_2_123', ...]

    Sets:
        self.info (dict[str, dict]): {'1_2_123': {'size': , 'rmsd': , 'rep': , 'mapped': , 'paired': }, ...}
    """
    if self.db:
        raise NotImplementedError("Can't connect to MySQL database yet")
    else:
        if ids is None:  # Load all data
            identified_files = \
                [(os.path.splitext(os.path.basename(cluster_directory))[0], cluster_directory)
                 for cluster_directory in utils.get_file_paths_recursively(self.cluster_info_path)]
            # for root, dirs, files in os.walk(self.cluster_info_path):
            #     if not dirs:
            #         i_cluster_type, j_cluster_type, k_cluster_type = map(int, os.path.basename(root).split('_'))
            #
            #         # if i_cluster_type not in self.info:
            #         #     self.info[i_cluster_type] = {}
            #         # if j_cluster_type not in self.info[i_cluster_type]:
            #         #     self.info[i_cluster_type][j_cluster_type] = {}
            #
            #         # for file in files:
            #         # There is only one file
            #         self.info[(i_cluster_type, j_cluster_type, k_cluster_type)] = \
            #             ClusterInfo.from_file(os.path.join(root, files[0]))
        else:
            identified_files = [(_id, os.path.join(self.cluster_info_path, c_id1,
                                                   f'{c_id1}_{c_id2}', _id, f'{_id}.txt'))
                                for _id, (c_id1, c_id2, c_id3) in zip(ids, map(str.split, ids, repeat('_')))]
            # for _id, (c_id1, c_id2, c_id3) in zip(ids, map(str.split, ids, repeat('_'))):
            #     identified_directories[_id] = os.path.join(self.cluster_info_path, c_id1,
            #                                                f'{c_id1}_{c_id2}', _id, f'{_id}.txt')

        self.info.update({tuple(map(int, cluster_id.split('_'))): ClusterInfo.from_file(cluster_file)
                          for cluster_id, cluster_file in identified_files})

get_cluster_id staticmethod

get_cluster_id(cluster_id: str, index: int = 3) -> str

Returns the cluster identification string according the specified index

Parameters:

  • cluster_id (str) –

    The id of the fragment cluster. Ex: "1_2_123"

  • index (int, default: 3 ) –

    The index on which to return. Ex: index_number=2 gives 1_2

Returns: The cluster_id modified by the requested index_number

Source code in symdesign/structure/fragment/info.py
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
@staticmethod
def get_cluster_id(cluster_id: str, index: int = 3) -> str:  # Todo Unused, DEPRECIATE
    """Returns the cluster identification string according the specified index

    Args:
        cluster_id: The id of the fragment cluster. Ex: "1_2_123"
        index: The index on which to return. Ex: index_number=2 gives 1_2
    Returns:
        The cluster_id modified by the requested index_number
    """
    while len(cluster_id) < 3:
        cluster_id += '0'

    cluster_id_split = cluster_id.split('_')
    if len(cluster_id_split) == 1:  # in case of 12123? -> ['12123', '']
        id_l = [cluster_id[:1], cluster_id[1:2], cluster_id[2:]]
    else:
        id_l = cluster_id_split

    info = id_l[:index]

    while len(info) < 3:  # Ensure the returned string has at least 3 indices
        info.append('0')

    return '_'.join(info)

parameterize_frag_length

parameterize_frag_length(length: int) -> tuple[int, int]

Generate fragment length range parameters for use in fragment functions

Parameters:

  • length (int) –

    The length of the fragment

Returns: The tuple that provide the range for the specified length centered around 0 ex: length=5 -> (-2, 3), length=6 -> (-3, 3)

Source code in symdesign/structure/fragment/info.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
def parameterize_frag_length(length: int) -> tuple[int, int]:
    """Generate fragment length range parameters for use in fragment functions

    Args:
        length: The length of the fragment
    Returns:
        The tuple that provide the range for the specified length centered around 0
            ex: length=5 -> (-2, 3), length=6 -> (-3, 3)
    """
    if length % 2 == 1:  # fragment length is odd
        index_offset = 1
    else:  # length is even
        logger.warning(f"{parameterize_frag_length.__name__}: {length} is an even integer which isn't symmetric about "
                       'a single residue. Ensure this is what you want')
        index_offset = 0

    # Get the number of residues extending to each side
    _range = math.floor(length / 2)

    return 0 - _range, 0 + _range + index_offset