Source code for pdb_numpy.coor

#!/usr/bin/env python3
# coding: utf-8

import logging
import os
import copy
import numpy as np
from urllib.error import HTTPError


from .data.res_dict import AA_DICT, AA_NA_DICT
from . import geom
from .format import mmcif, pdb, pqr, gro
from .geom import distance_matrix


# Logging
logger = logging.getLogger(__name__)


[docs] class Coor: """The Coor class is a coordinate and topology object based on coordinates like pdb or gro files. It has attributes such as 'models', 'crystal_pack', and 'active_model' that store a list of Model objects, the crystal packing as a string, and the index of the active model, respectively. The class provides methods to read pdb, pqr, mmcif files, parse pdb lines, download pdb files from the PDB database, and write pdb and pqr files. Additionally, it has methods to select atoms, retrieve amino acid sequences, and change the order of atoms in the model. Attributes ---------- models : list List of Model objects crystal_pack : str Crystal Packing as a string active_model : int Index of the active model conect : dict Dictionary of atom connections data_mmCIF : dict Dictionary of mmCIF data symmetry : str Symmetry information transformation : str Transformation information Methods ------- read(file_in) Read a pdb file and store atom informations as a dictionnary od numpy array. The fonction can also read pqr files if the file extension is .pqr select_atoms(select) Return a list of atom index corresponding to the selection simple_select_atoms(select) Return a list of atom index corresponding to a simple selection select_index(select) Return a list of atom index corresponding to the selection dist_under_index(index, dist) Return a list of atom index corresponding to the distance get_index_select(select) Return a list of atom index corresponding to the selection get_aa_seq(model_num=0) Return a string of amino acid sequence get_aa_DL_seq(model_num=0) Return a string of amino acid sequence with disulfide bonds """ @property def len(self): return self.models[self.active_model].len @property def total_len(self): total_len = 0 for model in self.models: total_len += model.len return total_len @property def model_num(self): return len(self.models) @property def field(self): return self.models[self.active_model].atom_dict["field"] @property def num(self): return self.models[self.active_model].atom_dict["num_resid_uniqresid"][:, 0] @property def name(self): return self.models[self.active_model].atom_dict["name_resname_elem"][:, 0] @property def resname(self): return self.models[self.active_model].atom_dict["name_resname_elem"][:, 1] @property def alterloc(self): return self.models[self.active_model].atom_dict["alterloc_chain_insertres"][ :, 0 ] @property def chain(self): return self.models[self.active_model].atom_dict["alterloc_chain_insertres"][ :, 1 ] @property def insertres(self): return self.models[self.active_model].atom_dict["alterloc_chain_insertres"][ :, 2 ] @property def elem(self): return self.models[self.active_model].atom_dict["name_resname_elem"][:, 2] @property def resid(self): return self.models[self.active_model].atom_dict["num_resid_uniqresid"][:, 1] @property def uniq_resid(self): return self.models[self.active_model].atom_dict["num_resid_uniqresid"][:, 2] @property def residue(self): return self.models[self.active_model].atom_dict["num_resid_uniqresid"][:, 2] @property def occ(self): return self.models[self.active_model].atom_dict["occ_beta"][:, 0] @property def beta(self): return self.models[self.active_model].atom_dict["occ_beta"][:, 1] @property def xyz(self): return self.models[self.active_model].atom_dict["xyz"] @property def x(self): return self.models[self.active_model].atom_dict["xyz"][:, 0] @property def y(self): return self.models[self.active_model].atom_dict["xyz"][:, 1] @property def z(self): return self.models[self.active_model].atom_dict["xyz"][:, 2] @property def com(self): """Return the center of mass of the active model""" return np.average(self.models[self.active_model].atom_dict["xyz"], axis=0) def __init__(self, coor_in=None, pdb_lines=None, pdb_id=None): self.active_model = 0 self.models = [] self.crystal_pack = "" self.data_mmCIF = None self.symmetry = "" self.transformation = "" self.data_mmCIF = {} self.conect = {} if coor_in is not None: self.read(coor_in) elif pdb_lines is not None: pdb_coor = pdb.parse(pdb_lines=pdb_lines) self.models = pdb_coor.models self.crystal_pack = pdb_coor.crystal_pack self.transformation = pdb_coor.transformation self.symmetry = pdb_coor.symmetry elif pdb_id is not None: try: pdb_coor = pdb.fetch(pdb_ID=pdb_id) except HTTPError: logger.warning( f"PDB ID {pdb_id} not found in `.pdb` format, fetching the mmCIF format." ) pdb_coor = mmcif.fetch(pdb_ID=pdb_id) self.models = pdb_coor.models self.crystal_pack = pdb_coor.crystal_pack self.transformation = pdb_coor.transformation self.symmetry = pdb_coor.symmetry
[docs] def read(self, file_in): """Read a pdb/pqr/gro/cif file and return atom information as a Coor object. It determines the file format based on the file extension and parses the lines accordingly. If the file extension is not recognized, it assumes it is a pdb file. Parameters ---------- file_in : str Path of the pdb file to read Returns ------- None Examples -------- >>> prot_coor = Coor('1y0m.pdb') Succeed to read file ...1y0m.pdb , 648 atoms found >>> prot_coor.read('1y0m.gro') Succeed to read file ...1y0m.gro , 648 atoms found """ file_lines = open(file_in) lines = file_lines.readlines() if str(file_in).endswith(".gro"): gro_coor = gro.parse(gro_lines=lines) self.models = gro_coor.models self.crystal_pack = gro_coor.crystal_pack elif str(file_in).endswith(".pqr"): pdb_coor = pqr.parse(pqr_lines=lines) self.models = pdb_coor.models self.crystal_pack = pdb_coor.crystal_pack self.transformation = pdb_coor.transformation self.symmetry = pdb_coor.symmetry elif str(file_in).endswith(".pdb"): pdb_coor = pdb.parse(pdb_lines=lines) self.models = pdb_coor.models self.crystal_pack = pdb_coor.crystal_pack self.transformation = pdb_coor.transformation self.symmetry = pdb_coor.symmetry self.conect = pdb_coor.conect elif str(file_in).endswith(".cif"): mmcif_coor = mmcif.parse(mmcif_lines=lines) self.data_mmCIF = mmcif_coor.data_mmCIF self.models = mmcif_coor.models self.crystal_pack = mmcif_coor.crystal_pack self.transformation = mmcif_coor.transformation self.symmetry = mmcif_coor.symmetry else: logger.warning( "File name doesn't finish with .pdb" " read it as .pdb anyway" ) pdb_coor = pdb.parse(pdb_lines=lines) self.models = pdb_coor.models self.crystal_pack = pdb_coor.crystal_pack self.transformation = pdb_coor.transformation self.symmetry = pdb_coor.symmetry logger.info( f"Succeed to read file { os.path.relpath(file_in)} \n" f"{self.len} atoms found" )
[docs] def write(self, file_out, overwrite=False): """Write a pdb/pqr/gro/cif file from a Coor object. It determines the file format based on the file extension and writes the lines accordingly. If the file extension is not recognized, it assumes it is a pdb file. Parameters ---------- file_out : str Path of the pdb file to write overwrite : bool If False, check if the file exists and don't overwrite it. If True, overwrite the file without asking for confirmation. Returns ------- None """ if str(file_out).endswith(".pdb"): pdb.write(self, file_out, overwrite) elif str(file_out).endswith(".pqr"): pqr.write(self, file_out, overwrite) elif str(file_out).endswith(".cif"): mmcif.write(self, file_out, overwrite) elif str(file_out).endswith(".gro"): gro.write(self, file_out, overwrite) else: logger.warning( "File name doesn't finish with pdb/pqr/cif/gro read it as ``.pdb``." ) pdb.write(self, file_out, overwrite)
[docs] def change_order(self, field, order_list): """Change the order of the atoms in the model. The `change_order()` function takes in two arguments, `field` and `order_list`, which are used to change the order of the atoms in the model. The `field` argument specifies which field to change the order by, while `order_list` is a list of the new order. The function then modifies the atom dictionary in each model to match the new order. Parameters ---------- field : str The field to change the order by. order_list : list List of the new order Returns ------- None Change the order of the atoms in the model Examples -------- >>> test = Coor(pdb_id='1jd4') >>> test.change_order('chain', ['B', 'C', 'A']) """ keyword_dict = { "num": ["num_resid_uniqresid", 0], "resname": ["name_resname_elem", 1], "chain": ["alterloc_chain_insertres", 1], "name": ["name_resname_elem", 0], "altloc": ["alterloc_chain_insertres", 0], "resid": ["num_resid_uniqresid", 1], "resid": ["num_resid_uniqresid", 2], "beta": ["occ_beta", 1], "occupancy": ["occ_beta", 0], "x": ["xyz", 0], "y": ["xyz", 1], "z": ["xyz", 2], } if field not in keyword_dict: raise ValueError("Field not found") keyword = keyword_dict[field][0] index = keyword_dict[field][1] field_uniqs = np.unique(self.models[0].atom_dict[keyword][:, index]) for field_uniq in field_uniqs: if field_uniq not in order_list: logger.info( f"Field {field_uniq} not found in order list, will be added at the end" ) order_list.append(field_uniq) if isinstance(order_list[0], str): order_list = np.array(order_list, dtype="U") new_order = np.array([], dtype=np.int32) for value in order_list: new_order = np.append( new_order, np.where(self.models[0].atom_dict[keyword][:, index] == value)[0], ) assert len(new_order) == self.len, "Inconsistent number of atoms" for model in self.models: for key in [ "alterloc_chain_insertres", "name_resname_elem", "num_resid_uniqresid", "xyz", "occ_beta", ]: model.atom_dict[key] = model.atom_dict[key][new_order, :] self.reset_residue_index() return
[docs] def reset_residue_index(self): """Reset the residue index to the original index of the pdb file. This function resets the residue index in the model to the original index of the pdb file. It loops through each model in the Coor object and for each atom in the model, it compares the residue number with the last residue number seen. If they are different, it increments the residue number, and sets the residue number of the current atom to the new residue number. This effectively resets the residue index to the original index of the pdb file. The function does not return anything, it simply modifies the residue number of each atom in the `Coor` object. Returns ------- None Change the residue index in the model """ residue = -1 last_residue = self.models[0].atom_dict["num_resid_uniqresid"][0, 2] for model in self.models: for i in range(model.atom_dict["num_resid_uniqresid"].shape[0]): if model.atom_dict["num_resid_uniqresid"][i, 2] != last_residue: residue += 1 last_residue = model.atom_dict["num_resid_uniqresid"][i, 2] # model.atom_dict["num_resid_uniqresid"][i, 1] = residue model.atom_dict["num_resid_uniqresid"][i, 2] = residue return
[docs] def select_index(self, indexes): """The `select_index()` function selects atoms from a Coor object based on the provided `indexes` and returns a new `Coor` object with the selected atoms. Parameters ---------- self : Coor Coor object indexes : list List of indexes frame : int Frame number for the selection, default is 0 Returns ------- Coor a new Coor object with the selected atoms """ # new_coor = copy.deepcopy(self) # for i in range(len(new_coor.models)): # new_coor.models[i] = new_coor.models[i].select_index(indexes) new_coor = Coor() new_coor.models = [model.select_index(indexes) for model in self.models] new_coor.active_model = self.active_model new_coor.crystal_pack = self.crystal_pack new_coor.data_mmCIF = self.data_mmCIF new_coor.symmetry = self.symmetry new_coor.transformation = self.transformation new_coor.data_mmCIF = self.data_mmCIF # Renumber the atom indexes starting from 1 for model in new_coor.models: model.num = np.arange(1, model.len + 1) new_coor.conect = {} # print(self.num[indexes]) index_dict = { old_idx: new_idx + 1 for new_idx, old_idx in enumerate(self.num[indexes]) } for key in self.conect: if key in index_dict: new_key = index_dict[key] new_values = [ index_dict[val] for val in self.conect[key] if val in index_dict ] if len(new_values) > 0: new_coor.conect[new_key] = new_values return new_coor
[docs] def get_index_select(self, selection, frame=0): """Return index from the `Coor` object based on the selection string. Parameters ---------- self : Coor Coor object selection : str Selection string frame : int Frame number for the selection, default is 0 Returns ------- list a list of indexes """ indexes = self.models[frame].get_index_select(selection) return indexes
[docs] def select_atoms(self, selection, frame=0): """This method allows selecting atoms from the PDB file based on a selection string. The selection string follows a syntax similar to that used in the VMD molecular visualization software. Parameters ---------- self : Coor Coor object selection : str Selection string frame : int Frame number for the selection, default is 0 Returns ------- Coor a new Coor object with the selected atoms """ indexes = self.models[frame].get_index_select(selection) return self.select_index(indexes)
[docs] def get_aa_seq(self, gap_in_seq=True, frame=0): """Get the amino acid sequence from a `Coor` object. This function takes a Coor object, selects the CA atoms using the `select_atoms()` method with the argument name CA, and returns a dictionary with the amino acid sequence of each chain in the protein, where the key is the chain identifier and the value is the sequence. The function first creates empty dictionaries `seq_dict` and `aa_num_dict`. These will be used to store the sequence and the number of the last amino acid in each chain, respectively. Then, it loops through each CA atom in the selected atoms and retrieves the chain identifier, residue name, and residue number. If the chain identifier is not in `seq_dict`, the function initializes an empty string for the sequence of that chain and stores the residue number in `aa_num_dict`. Then, if the residue name is recognized as a standard amino acid in `AA_DICT`, the function adds the corresponding one-letter code to the sequence string for the corresponding chain in `seq_dict`, and updates the last amino acid number in aa_num_dict. If the residue name is not recognized, the function logs a warning message. If the residue number is not consecutive to the previous one and `gap_in_seq` is set to True, the function adds gaps to the sequence. Finally, the function returns `seq_dict`, the dictionary with the amino acid sequences of each chain in the protein. Parameters ---------- self : Coor Coor object gap_in_seq : bool, optional if True, add gaps in the sequence, by default True frame : int Frame number for the selection, default is 0 Returns ------- dict Dictionary with chain as key and sequence as value. Examples -------- >>> prot_coor = Coor('1y0m.pdb') >>> prot_coor.get_aa_seq() {'A': 'TFKSAVKALFDYKAQREDELTFTKSAIIQNVEKQDGGWWRGDYGGKKQLWFPSNYVEEMIN'} .. warning:: If atom chains are not arranged sequentially (A,A,A,B,B,A,A,A ...), the first atom seq will be overwritten by the last one. """ # Get CA atoms CA_sel = self.select_atoms("name CA and not altloc B C D E F", frame=frame) seq_dict = {} aa_num_dict = {} for chain, res_name, resid in zip( CA_sel.models[frame].atom_dict["alterloc_chain_insertres"][:, 1], CA_sel.models[frame].atom_dict["name_resname_elem"][:, 1], CA_sel.models[frame].atom_dict["num_resid_uniqresid"][:, 1], ): if chain not in seq_dict: seq_dict[chain] = "" aa_num_dict[chain] = resid if res_name in AA_DICT: if resid != aa_num_dict[chain] + 1 and len(seq_dict[chain]) != 0: logger.info( f"Residue {chain}:{res_name}:{resid} is " f"not consecutive, there might be missing " f"residues" ) if gap_in_seq: seq_dict[chain] += "-" * (resid - aa_num_dict[chain] - 1) seq_dict[chain] += AA_DICT[res_name] aa_num_dict[chain] = resid else: logger.warning(f"Residue {res_name} in chain {chain} not recognized") return seq_dict
[docs] def get_aa_na_seq(self, gap_in_seq=True, frame=0): """Get the amino acid sequence from a `Coor` object. This function takes a Coor object, selects the CA/P atoms using the `select_atoms()` method with the argument name CA/P, and returns a dictionary with the amino acid sequence of each chain in the protein, where the key is the chain identifier and the value is the sequence. The function first creates empty dictionaries `seq_dict` and `aa_num_dict`. These will be used to store the sequence and the number of the last amino acid in each chain, respectively. Then, it loops through each CA atom in the selected atoms and retrieves the chain identifier, residue name, and residue number. If the chain identifier is not in `seq_dict`, the function initializes an empty string for the sequence of that chain and stores the residue number in `aa_num_dict`. Then, if the residue name is recognized as a standard amino acid in `AA_DICT`, the function adds the corresponding one-letter code to the sequence string for the corresponding chain in `seq_dict`, and updates the last amino acid number in aa_num_dict. If the residue name is not recognized, the function logs a warning message. If the residue number is not consecutive to the previous one and `gap_in_seq` is set to True, the function adds gaps to the sequence. Finally, the function returns `seq_dict`, the dictionary with the amino acid sequences of each chain in the protein. Parameters ---------- self : Coor Coor object gap_in_seq : bool, optional if True, add gaps in the sequence, by default True frame : int Frame number for the selection, default is 0 Returns ------- dict Dictionary with chain as key and sequence as value. Examples -------- >>> prot_coor = Coor('1y0m.pdb') >>> prot_coor.get_aa_seq() {'A': 'TFKSAVKALFDYKAQREDELTFTKSAIIQNVEKQDGGWWRGDYGGKKQLWFPSNYVEEMIN'} .. warning:: If atom chains are not arranged sequentially (A,A,A,B,B,A,A,A ...), the first atom seq will be overwritten by the last one. """ # Get CA atoms CA_sel = self.select_atoms( "((protein and name CA) or (dna and name P)) and not altloc B C D E F", frame=frame, ) seq_dict = {} aa_num_dict = {} for chain, res_name, resid in zip( CA_sel.models[frame].atom_dict["alterloc_chain_insertres"][:, 1], CA_sel.models[frame].atom_dict["name_resname_elem"][:, 1], CA_sel.models[frame].atom_dict["num_resid_uniqresid"][:, 1], ): if chain not in seq_dict: seq_dict[chain] = "" aa_num_dict[chain] = resid if res_name in AA_NA_DICT: if resid != aa_num_dict[chain] + 1 and len(seq_dict[chain]) != 0: logger.info( f"Residue {chain}:{res_name}:{resid} is " f"not consecutive, there might be missing " f"residues" ) if gap_in_seq: seq_dict[chain] += "-" * (resid - aa_num_dict[chain] - 1) seq_dict[chain] += AA_NA_DICT[res_name] aa_num_dict[chain] = resid else: logger.warning(f"Residue {res_name} in chain {chain} not recognized") return seq_dict
[docs] def get_aa_DL_seq(self, gap_in_seq=True, frame=0): """Get the amino acid sequence from a coor object. The function calculates the amino acid sequence based on the CA atoms in the structure. L or D form is determined using CA-N-C-CB angle Angle should take values around +34° and -34° for L- and D-amino acid residues. if amino acid is in D form it will be in lower case. Reference: https://onlinelibrary.wiley.com/doi/full/10.1002/prot.10320 Parameters ---------- self : Coor Coor object gap_in_seq : bool, optional if True, add gaps in the sequence, by default True frame : int Frame number for the selection, default is 0 Returns ------- dict Dictionary with chain as key and sequence as value. Examples -------- >>> prot_coor = Coor('1y0m.pdb') Succeed to read file 1y0m.pdb , 648 atoms found >>> prot_coor.get_aa_DL_seq() {'A': 'TFKSAVKALFDYKAQREDELTFTKSAIIQNVEKQDGGWWRGDYGGKKQLWFPSNYVEEMIN'} >>> prot_coor = Coor('6be9_frame_0.pdb') Succeed to read file 6be9_frame_0.pdb , 104 atoms found >>> prot_coor.get_aa_DL_seq() Residue K2 is in D form Residue N6 is in D form Residue P7 is in D form {'A': 'TkNDTnp'} .. warning:: If atom chains are not arranged sequentially (A,A,A,B,B,A,A,A ...), the first atom seq will be overwritten by the last one. """ # Get CA atoms CA_index = self.get_index_select( "name CA and not altloc B C D E F", frame=frame ) CA_sel = self.select_atoms("name CA", frame=frame) N_C_CB_sel = self.select_atoms( "name N C CB and not altloc B C D E F", frame=frame ) seq_dict = {} aa_num_dict = {} for i, chain, res_name, resid, uniq_resid in zip( CA_index, CA_sel.models[frame].atom_dict["alterloc_chain_insertres"][:, 1], CA_sel.models[frame].atom_dict["name_resname_elem"][:, 1], CA_sel.models[frame].atom_dict["num_resid_uniqresid"][:, 1], CA_sel.models[frame].atom_dict["num_resid_uniqresid"][:, 2], ): """ " for i in CA_index: chain = ( self.models[frame] .atom_dict["alterloc_chain_insertres"][i, 1] .astype(np.str_) ) res_name = ( self.models[frame].atom_dict["name_resname_elem"][i, 1].astype(np.str_) ) resid = self.models[frame].atom_dict["num_resid_uniqresid"][i, 1] uniq_resid = self.models[frame].atom_dict["num_resid_uniqresid"][i, 2] """ if chain not in seq_dict: seq_dict[chain] = "" aa_num_dict[chain] = resid if res_name in AA_DICT: if resid != aa_num_dict[chain] + 1 and len(seq_dict[chain]) != 0: logger.warning( f"Residue {chain}:{res_name}:{resid} is " "not consecutive, there might be missing " "residues" ) if gap_in_seq: seq_dict[chain] += "-" * (resid - aa_num_dict[chain] - 1) if res_name == "GLY": seq_dict[chain] += "G" else: N_index = N_C_CB_sel.get_index_select( f"name N and residue {uniq_resid}", frame=frame )[0] C_index = N_C_CB_sel.get_index_select( f"name C and residue {uniq_resid}", frame=frame )[0] CB_index = N_C_CB_sel.get_index_select( f"name CB and residue {uniq_resid}", frame=frame )[0] dihed = geom.atom_dihed_angle( self.models[frame].atom_dict["xyz"][i], N_C_CB_sel.models[frame].atom_dict["xyz"][N_index], N_C_CB_sel.models[frame].atom_dict["xyz"][C_index], N_C_CB_sel.models[frame].atom_dict["xyz"][CB_index], ) if dihed > 0: seq_dict[chain] += AA_DICT[res_name] else: logger.warning( f"Residue {AA_DICT[res_name]}{resid} is in D form" ) seq_dict[chain] += AA_DICT[res_name].lower() aa_num_dict[chain] = resid else: logger.warning(f"Residue {res_name} in chain {chain} not " "recognized") return seq_dict
[docs] def merge_models(self): """Merge all models into the first model. Returns ------- None """ field_list = [ "field", "num_resid_uniqresid", "name_resname_elem", "alterloc_chain_insertres", "occ_beta", "xyz", ] if len(self.models) > 1: uniqresid = 0 for model in self.models: model.atom_dict["num_resid_uniqresid"][:, 2] += uniqresid uniqresid = np.max(model.atom_dict["num_resid_uniqresid"][:, 2]) + 1 for field in field_list: self.models[0].atom_dict[field] = np.concatenate( [model.atom_dict[field] for model in self.models] ) self.models[0].atom_dict["num_resid_uniqresid"][:, 0] = np.arange( 1, self.models[0].atom_dict["num_resid_uniqresid"].shape[0] + 1 ) for i in range(len(self.models) - 1, 0, -1): del self.models[i] self.active_model = 0
[docs] def compute_chains_CA(self, Ca_cutoff=4.2): """Correct the chain ID's of a coor object, by checking consecutive Calphas atoms distance. If the distance is higher than ``Ca_cutoff`` , the former atoms are considered as in a different chain. Parameters ---------- Ca_cutoff : float, optional Cutoff distance between consecutive Calpha atoms, by default 4.5 Angstrom Returns ------- None """ # 65:90 CAP letters # 97:122 letters # Remove x y z for selection issues # 48:57 Digits # 192-> 1000 # Others chain_letters = np.array( [chr(i) for i in range(65, 91)] + [chr(i) for i in range(97, 120)] + [chr(i) for i in range(48, 58)] + [chr(i) for i in range(192, 1000)] ) CA_sel = self.select_atoms("name CA and not altloc B C D") for i, model in enumerate(CA_sel.models): # Identify Chain uniq_resid last_CA_xyz = model.atom_dict["xyz"][0] uniqresid = model.atom_dict["num_resid_uniqresid"][0, 2] last_chain = 0 chain_res_dict = {last_chain: [uniqresid]} for j in range(1, model.atom_dict["xyz"].shape[0]): chain = model.atom_dict["alterloc_chain_insertres"][j, 1] uniqresid = model.atom_dict["num_resid_uniqresid"][j, 2] if np.linalg.norm(model.atom_dict["xyz"][j] - last_CA_xyz) > Ca_cutoff: last_chain += 1 chain_res_dict[last_chain] = [uniqresid] else: chain_res_dict[last_chain].append(uniqresid) last_CA_xyz = model.atom_dict["xyz"][j] # Change chain ID : for chain_id in chain_res_dict: chain_index = self.models[i].get_index_select( f"residue {' '.join([str(i) for i in chain_res_dict[chain_id]])}" ) self.models[i].atom_dict["alterloc_chain_insertres"][ chain_index, 1 ] = chain_letters[chain_id % chain_letters.shape[0]]
[docs] def apply_transformation(self, index_list=None): """Apply the transformation matrix to the coordinates. Add a new model with the transformed coordinates. Parameters ---------- index_list : list, optional Index list of the transformation matrix to apply, by default all Returns ------- Coor A new Coor object with the transformation added. """ if index_list is None: index_list = self.transformation.keys() if len(self.models) != 1: logger.warning("Only one model is allowed.") return self if self.transformation == "": logger.warning("No transformation matrix found.") return self new_coor = copy.deepcopy(self) for index in index_list: matrix = np.array(self.transformation[index]["matrix"]) # indexes = np.argwhere( # np.isin(self.models[0].chain, self.transformation[index]["chains"]) # ).ravel() indexes = np.isin( self.models[0].chain, self.transformation[index]["chains"] ) model_num = matrix.shape[0] // 3 for i in range(model_num): local_matrix = matrix[i * 3 : (i + 1) * 3, 1:4] local_translation = matrix[i * 3 : (i + 1) * 3, 4] if ( not (local_matrix == np.eye(3)).all() or (local_translation != 0.0).any() ): logger.info(f"Add transformation {i}") local_model = copy.deepcopy(self.models[0]) local_model.xyz[indexes, :] = np.dot( local_model.xyz[indexes, :], local_matrix ) local_model.xyz[indexes, :] += local_translation new_coor.models.append(local_model) new_coor.merge_models() return new_coor
[docs] def add_symmetry(self): """Apply the symmetry matrix to the coordinates. Add a model for each symmetry matrix. Parameters ---------- None Returns ------- Coor A new Coor object with the symmetry added. """ if self.symmetry == "": logger.warning("No symmetry matrix found.") return self elif len(self.models) != 1: logger.warning("Only one model is allowed.") return self new_coor = copy.deepcopy(self) matrix = np.array(new_coor.symmetry["matrix"]) model_num = matrix.shape[0] // 3 for i in range(model_num): local_matrix = matrix[i * 3 : (i + 1) * 3, 1:4] local_translation = matrix[i * 3 : (i + 1) * 3, 4] if ( not (local_matrix == np.eye(3)).all() or (local_translation != 0.0).any() ): logger.info(f"Add symmetry {i}") local_model = copy.deepcopy(self.models[0]) local_model.xyz = ( np.dot(local_model.xyz, local_matrix) + matrix[i * 3 : (i + 1) * 3, 4] ) new_coor.models.append(local_model) new_coor.merge_models() return new_coor
[docs] def remove_overlap_chain(self, cutoff=1.0, frame=0): """Remove atoms that are closer than ``cutoff`` from another atom. Parameters ---------- cutoff : float, optional Cutoff distance, by default 1.0 Angstrom Returns ------- Coor A new Coor object with the overlapping atoms removed. """ chain_list = np.unique(self.models[0].chain) center_list = [] for chain in chain_list: # print(f'chain {chain}') chain_sel = self.models[frame].select_atoms(f"protein and chain {chain}") avg = np.average(chain_sel.xyz, axis=0) center_list.append(avg) center_list = np.array(center_list) dist_matrix = distance_matrix(center_list, center_list) mask = dist_matrix < cutoff # Remove lower triangle and i, (k=0) mask[np.tril_indices_from(mask, k=0)] = False indices = np.argwhere(mask) remove_chains = [] for indice in indices: indice.sort() remove_chains.append(indice[1]) remove_chains = [chain_list[i] for i in remove_chains] keep_chains = [chain for chain in chain_list if chain not in remove_chains] return self.select_atoms(f'chain {" ".join(keep_chains)}')
[docs] def copy_box(self, x=1, y=1, z=1): """Copy the box in the x, y and z direction. Parameters ---------- x : int, optional Number of copy in the x direction, by default 1 y : int, optional Number of copy in the y direction, by default 1 z : int, optional Number of copy in the z direction, by default 1 Returns ------- Coor A new Coor object with the box copied. """ if len(self.models) != 1: logger.warning("Only one model is allowed.") return self if self.crystal_pack == "": logger.warning("No crystal pack found.") return self new_coor = copy.deepcopy(self) a, b, c, alpha, beta, gamma = [float(i) for i in self.crystal_pack.split()[1:7]] v1, v2, v3 = geom.compute_unit_cell_vectors(alpha, beta, gamma, a, b, c) for i in range(x): for j in range(y): for k in range(z): if i == 0 and j == 0 and k == 0: continue else: logger.info(f"Add copy {i:2d} {j:2d} {k:2d}") local_model = copy.deepcopy(new_coor.models[0]) translation = i * v1 + j * v2 + k * v3 local_model.xyz = local_model.xyz + translation new_coor.models.append(local_model) # new_coor.crystal_pack = crystal_pack new_coor.merge_models() return new_coor