info

FragmentInfo

FragmentInfo(source: str = utils.path.biological_interfaces, fragment_length: int = 5, sql: bool = False, **kwargs)

Stores all Fragment metadata for a particular FragmentDatabase

Parameters:

source (str, default: biological_interfaces ) –

Which type of information to use
fragment_length (int, default: 5 ) –

What is the length of the fragment database
sql (bool, default: False ) –

Whether the database is stored in SQL table
**kwargs –

Source code in symdesign/structure/fragment/info.py

def __init__(self, source: str = utils.path.biological_interfaces, fragment_length: int = 5, sql: bool = False,
             **kwargs):
    """Construct the instance

    Args:
        source: Which type of information to use
        fragment_length: What is the length of the fragment database
        sql: Whether the database is stored in SQL table
        **kwargs:
    """
    super().__init__()  # object
    self.cluster_info_path = utils.path.intfrag_cluster_info_dirpath
    self.fragment_length = fragment_length
    self.fragment_range = parameterize_frag_length(fragment_length)
    self.info = {}
    self.source = source
    self.statistics = {}
    # {cluster_id: [[mapped, paired, {max_weight_counts}, ...], ..., frequencies: {'A': 0.11, ...}}
    #  ex: {'1_0_0': [[0.540, 0.486, {-2: 67, -1: 326, ...}, {-2: 166, ...}], 2749]

    if sql:
        raise NotImplementedError("Can't connect to SQL database yet")
        self.db = True
    else:  # self.source == 'directory':
        # Todo initialize as local directory
        self.db = False

    self._load_db_statistics()

location `property`

location: AnyStr | None

Provide the location where fragments are stored

aa_frequencies `property`

aa_frequencies: dict[protein_letters_alph1, float]

Retrieve database specific amino acid representation frequencies

Returns:

dict[protein_letters_alph1, float] –

{'A': 0.11, 'C': 0.03, 'D': 0.53, ...}

retrieve_cluster_info

retrieve_cluster_info(cluster: str = None, source: source_literal = None, index: str = None) -> dict[str, int | float | str | dict[int, dict[protein_letters_literal | str, float | tuple[int, float]]]]

Return information from the fragment information database by cluster_id, information source, and source index

Parameters:

cluster (str, default: None ) –

A cluster_id to get information about
source (source_literal, default: None ) –

The source of information to retrieve. Must be one of 'size', 'rmsd', 'rep', 'mapped', or 'paired'
index (str, default: None ) –

The index to gather information from. Source must be one of 'mapped' or 'paired' to use

Returns: {'size': ..., 'rmsd': ..., 'rep': ..., 'mapped': indexed_frequencies, 'paired': indexed_frequencies} Where indexed_frequencies has format {-2: {'A': 0.1, 'C': 0., ..., 'info': (12, 0.41)}, -1: {}, ..., 2: {}}

Source code in symdesign/structure/fragment/info.py

def retrieve_cluster_info(self, cluster: str = None, source: source_literal = None, index: str = None) -> \
        dict[str, int | float | str | dict[int, dict[sequence.protein_letters_literal | str, float | tuple[int, float]]]]:
    """Return information from the fragment information database by cluster_id, information source, and source index

    Args:
        cluster: A cluster_id to get information about
        source: The source of information to retrieve. Must be one of 'size', 'rmsd', 'rep', 'mapped', or 'paired'
        index: The index to gather information from. Source must be one of 'mapped' or 'paired' to use
    Returns:
        {'size': ..., 'rmsd': ..., 'rep': ..., 'mapped': indexed_frequencies, 'paired': indexed_frequencies}
        Where indexed_frequencies has format {-2: {'A': 0.1, 'C': 0., ..., 'info': (12, 0.41)}, -1: {}, ..., 2: {}}
    """
    try:
        cluster_data = self.info[cluster]
    except KeyError:
        self.load_cluster_info(ids=[cluster])
        cluster_data = self.info[cluster]

    if source is None:
        return cluster_data
    else:
        if index is None:  # Must check for None, index can be 0
            return cluster_data[source]
        else:  # source in ['mapped', 'paired']:
            try:
                return cluster_data[source][index]
            except KeyError:
                raise KeyError(f'The source {source} is not available. '
                               f'Try one of {", ".join(get_args(source_literal))}')
            except IndexError:
                raise IndexError(f'The index {index} is outside of the fragment range. '
                                 f'Try one of {", ".join(cluster_data["mapped"].keys())}')
            except TypeError:
                raise TypeError(f'You must provide "mapped" or "paired" if you wish to use an index')

load_cluster_info

load_cluster_info(ids: Sequence[str] = None)

Load cluster information from the fragment database source into attribute .info

Parameters:

ids (Sequence[str], default: None ) –

['1_2_123', ...]

Sets

self.info (dict[str, dict]): {'1_2_123': {'size': , 'rmsd': , 'rep': , 'mapped': , 'paired': }, ...}

Source code in symdesign/structure/fragment/info.py

def load_cluster_info(self, ids: Sequence[str] = None):
    """Load cluster information from the fragment database source into attribute .info

    Args:
        ids: ['1_2_123', ...]

    Sets:
        self.info (dict[str, dict]): {'1_2_123': {'size': , 'rmsd': , 'rep': , 'mapped': , 'paired': }, ...}
    """
    if self.db:
        raise NotImplementedError("Can't connect to MySQL database yet")
    else:
        if ids is None:  # Load all data
            identified_files = [(os.path.splitext(os.path.basename(cluster_file))[0], cluster_file)
                                for cluster_file in utils.get_file_paths_recursively(self.location,
                                                                                     extension='.pkl')]
        else:
            identified_files = \
                [(_id, os.path.join(self.location, c_id1, f'{c_id1}_{c_id2}', _id, f'{_id}.pkl'))
                 for _id, (c_id1, c_id2, c_id3) in zip(ids, map(str.split, ids, repeat('_')))]

        self.info.update({tuple(map(int, cluster_id.split('_'))):
                          ClusterInfo(name=cluster_id, **utils.unpickle(cluster_file))
                          for cluster_id, cluster_file in identified_files})

load_cluster_info_from_text

load_cluster_info_from_text(ids: Sequence[str] = None)

Load cluster information from the fragment database source text files into attribute .info

Parameters:

ids (Sequence[str], default: None ) –

['1_2_123', ...]

Sets

self.info (dict[str, dict]): {'1_2_123': {'size': , 'rmsd': , 'rep': , 'mapped': , 'paired': }, ...}

Source code in symdesign/structure/fragment/info.py

def load_cluster_info_from_text(self, ids: Sequence[str] = None):
    """Load cluster information from the fragment database source text files into attribute .info

    Args:
        ids: ['1_2_123', ...]

    Sets:
        self.info (dict[str, dict]): {'1_2_123': {'size': , 'rmsd': , 'rep': , 'mapped': , 'paired': }, ...}
    """
    if self.db:
        raise NotImplementedError("Can't connect to MySQL database yet")
    else:
        if ids is None:  # Load all data
            identified_files = \
                [(os.path.splitext(os.path.basename(cluster_directory))[0], cluster_directory)
                 for cluster_directory in utils.get_file_paths_recursively(self.cluster_info_path)]
            # for root, dirs, files in os.walk(self.cluster_info_path):
            #     if not dirs:
            #         i_cluster_type, j_cluster_type, k_cluster_type = map(int, os.path.basename(root).split('_'))
            #
            #         # if i_cluster_type not in self.info:
            #         #     self.info[i_cluster_type] = {}
            #         # if j_cluster_type not in self.info[i_cluster_type]:
            #         #     self.info[i_cluster_type][j_cluster_type] = {}
            #
            #         # for file in files:
            #         # There is only one file
            #         self.info[(i_cluster_type, j_cluster_type, k_cluster_type)] = \
            #             ClusterInfo.from_file(os.path.join(root, files[0]))
        else:
            identified_files = [(_id, os.path.join(self.cluster_info_path, c_id1,
                                                   f'{c_id1}_{c_id2}', _id, f'{_id}.txt'))
                                for _id, (c_id1, c_id2, c_id3) in zip(ids, map(str.split, ids, repeat('_')))]
            # for _id, (c_id1, c_id2, c_id3) in zip(ids, map(str.split, ids, repeat('_'))):
            #     identified_directories[_id] = os.path.join(self.cluster_info_path, c_id1,
            #                                                f'{c_id1}_{c_id2}', _id, f'{_id}.txt')

        self.info.update({tuple(map(int, cluster_id.split('_'))): ClusterInfo.from_file(cluster_file)
                          for cluster_id, cluster_file in identified_files})

get_cluster_id `staticmethod`

get_cluster_id(cluster_id: str, index: int = 3) -> str

Returns the cluster identification string according the specified index

Parameters:

cluster_id (str) –

The id of the fragment cluster. Ex: "1_2_123"
index (int, default: 3 ) –

The index on which to return. Ex: index_number=2 gives 1_2

Returns: The cluster_id modified by the requested index_number

Source code in symdesign/structure/fragment/info.py

@staticmethod
def get_cluster_id(cluster_id: str, index: int = 3) -> str:  # Todo Unused, DEPRECIATE
    """Returns the cluster identification string according the specified index

    Args:
        cluster_id: The id of the fragment cluster. Ex: "1_2_123"
        index: The index on which to return. Ex: index_number=2 gives 1_2
    Returns:
        The cluster_id modified by the requested index_number
    """
    while len(cluster_id) < 3:
        cluster_id += '0'

    cluster_id_split = cluster_id.split('_')
    if len(cluster_id_split) == 1:  # in case of 12123? -> ['12123', '']
        id_l = [cluster_id[:1], cluster_id[1:2], cluster_id[2:]]
    else:
        id_l = cluster_id_split

    info = id_l[:index]

    while len(info) < 3:  # Ensure the returned string has at least 3 indices
        info.append('0')

    return '_'.join(info)

parameterize_frag_length

parameterize_frag_length(length: int) -> tuple[int, int]

Generate fragment length range parameters for use in fragment functions

Parameters:

length (int) –

The length of the fragment

Returns: The tuple that provide the range for the specified length centered around 0 ex: length=5 -> (-2, 3), length=6 -> (-3, 3)

Source code in symdesign/structure/fragment/info.py

def parameterize_frag_length(length: int) -> tuple[int, int]:
    """Generate fragment length range parameters for use in fragment functions

    Args:
        length: The length of the fragment
    Returns:
        The tuple that provide the range for the specified length centered around 0
            ex: length=5 -> (-2, 3), length=6 -> (-3, 3)
    """
    if length % 2 == 1:  # fragment length is odd
        index_offset = 1
    else:  # length is even
        logger.warning(f"{parameterize_frag_length.__name__}: {length} is an even integer which isn't symmetric about "
                       'a single residue. Ensure this is what you want')
        index_offset = 0

    # Get the number of residues extending to each side
    _range = math.floor(length / 2)

    return 0 - _range, 0 + _range + index_offset

info

FragmentInfo

location property

aa_frequencies property

retrieve_cluster_info

load_cluster_info

load_cluster_info_from_text

get_cluster_id staticmethod

parameterize_frag_length

location `property`

aa_frequencies `property`

get_cluster_id `staticmethod`