foldcomp_utils#

Utilities for processing and manipulating protein structures from foldcomp.

class prxteinmpnn.utils.foldcomp_utils.FoldCompDatabaseEnum(value)[source]#

Bases: Enum

Enum for FoldComp databases.

ESMATLAS_FULL = 'esmatlas'#
ESMATLAS_v2023_02 = 'esmatlas_v2023_02'#
ESMATLAS_HIGH_QUALITY = 'highquality_clust30'#
AFDB_UNIPROT_V4 = 'afdb_uniprot_v4'#
AFDB_SWISSPROT_V4 = 'afdb_swissprot_v4'#
AFDB_REP_V4 = 'afdb_rep_v4'#
AFDB_REP_DARK_V4 = 'afdb_rep_dark_v4'#
AFDB_H_SAPIENS = 'afdb_h_sapiens'#
AFDB_A_THALIANA = 'a_thaliana'#
AFDB_C_ALBICANS = 'c_albicans'#
AFDB_C_ELEGANS = 'c_elegans'#
AFDB_D_DISCOIDEUM = 'd_discoideum'#
AFDB_D_MELANOGASTER = 'd_melanogaster'#
AFDB_D_RERIO = 'd_rerio'#
AFDB_E_COLI = 'e_coli'#
AFDB_G_MAX = 'g_max'#
AFDB_M_JANNASCHII = 'm_jannaschii'#
AFDB_M_MUSCULUS = 'm_musculus'#
AFDB_O_SATIVA = 'o_sativa'#
AFDB_R_NORVEGICUS = 'r_norvegicus'#
AFDB_S_CEREVISIAE = 's_cerevisiae'#
AFDB_S_POMBE = 's_pombe'#
AFDB_Z_MAYS = 'z_mays'#
prxteinmpnn.utils.foldcomp_utils._setup_foldcomp_database(database)[source]#

Set up the FoldComp database, handling sync and async contexts.

Parameters:

database (FoldCompDatabaseEnum) – The FoldCompDatabase enum value specifying which database to set up.

Return type:

None

Returns:

None

Example

>>> _setup_foldcomp_database(FoldCompDatabase.ESMATLAS_FULL)
prxteinmpnn.utils.foldcomp_utils._from_fcz(proteins)[source]#

Retrieve protein dihedral structures from the FoldComp database.

Parameters:

proteins (FoldcompDatabase) – The FoldComp protein database object.

Return type:

Iterator[ProteinStructure]

Returns:

An iterator over DihedralStructure objects containing the dihedral angle data for the specified protein IDs.

prxteinmpnn.utils.foldcomp_utils.get_protein_structures(protein_ids, database=FoldCompDatabaseEnum.AFDB_REP_V4)[source]#

Retrieve protein structures from the FoldComp database.

Parameters:
  • protein_ids (Sequence[str]) – A sequence of protein IDs to retrieve.

  • database (FoldCompDatabaseEnum) – The FoldCompDatabase enum value specifying which database to use.

Return type:

Iterator[ProteinStructure]

Returns:

An iterator over ProteinStructure objects containing the structure data for the specified protein IDs.

Example

>>> ids = ["P12345", "Q67890"]
>>> structures = get_protein_structures(ids)
>>> for struct in structures:
...     print(struct)
prxteinmpnn.utils.foldcomp_utils.model_from_id(protein_ids, model_weights=None, model_version=None)[source]#

Get the MPNN model and inputs for specific protein IDs.

Parameters:
Return type:

tuple[PyTree[str, 'P'], Iterator[ModelInputs]]

Returns:

A tuple containing the MPNN model parameters and model inputs.

Raises:

ValueError – If no protein structures are found for the given IDs.

Example

>>> model, inputs = model_from_id("P12345")
>>> # Use model and inputs for inference