#!/usr/bin/env python3
# coding: utf-8
import logging
import os
import copy
import numpy as np
from . import select
from .geom import distance_matrix
# Logging
logger = logging.getLogger(__name__)
KEYWORD_DICT = {
"num": ["num_resid_uniqresid", 0],
"resname": ["name_resname_elem", 1],
"chain": ["alterloc_chain_insertres", 1],
"name": ["name_resname_elem", 0],
"altloc": ["alterloc_chain_insertres", 0],
"resid": ["num_resid_uniqresid", 1],
"residue": ["num_resid_uniqresid", 2],
"beta": ["occ_beta", 1],
"occupancy": ["occ_beta", 0],
"x": ["xyz", 0],
"y": ["xyz", 1],
"z": ["xyz", 2],
}
[docs]
class Model:
"""Model class for pdb_numpy
Attributes
----------
atom_dict : dict
Dictionary containing the atom information
len : int
Number of atoms in the model
field : numpy.ndarray
Array containing the field of the atom
num : numpy.ndarray
Array containing the atom number
name : numpy.ndarray
Array containing the atom name
resname : numpy.ndarray
Array containing the residue name
alterloc : numpy.ndarray
Array containing the alternate location
chain : numpy.ndarray
Array containing the chain
insertres : numpy.ndarray
Array containing the insertion code
elem : numpy.ndarray
Array containing the element
resid : numpy.ndarray
Array containing the residue number
uniq_resid : numpy.ndarray
Array containing the unique residue id
x : numpy.ndarray
Array containing the x coordinate
y : numpy.ndarray
Array containing the y coordinate
z : numpy.ndarray
Array containing the z coordinate
occupancy : numpy.ndarray
Array containing the occupancy
bfactor : numpy.ndarray
Array containing the bfactor
xyz : numpy.ndarray
Array containing the x, y and z coordinates
Methods
-------
select(selection)
Select atoms from the model
select_index(selection)
Select atoms from the model and return the index
"""
def __init__(self):
self.atom_dict = {}
@property
def len(self):
return self.atom_dict["field"].shape[0]
@property
def field(self):
return self.atom_dict["field"]
@property
def num(self):
return self.atom_dict["num_resid_uniqresid"][:, 0]
@property
def name(self):
return self.atom_dict["name_resname_elem"][:, 0]
@property
def resname(self):
return self.atom_dict["name_resname_elem"][:, 1]
@property
def alterloc(self):
return self.atom_dict["alterloc_chain_insertres"][:, 0]
@property
def chain(self):
return self.atom_dict["alterloc_chain_insertres"][:, 1]
@property
def insertres(self):
return self.atom_dict["alterloc_chain_insertres"][:, 2]
@property
def elem(self):
return self.atom_dict["name_resname_elem"][:, 2]
@property
def resid(self):
return self.atom_dict["num_resid_uniqresid"][:, 1]
@property
def uniq_resid(self):
return self.atom_dict["num_resid_uniqresid"][:, 2]
@property
def residue(self):
return self.atom_dict["num_resid_uniqresid"][:, 2]
@property
def occ(self):
return self.atom_dict["occ_beta"][:, 0]
@property
def beta(self):
return self.atom_dict["occ_beta"][:, 1]
@property
def xyz(self):
return self.atom_dict["xyz"]
@property
def x(self):
return self.atom_dict["xyz"][:, 0]
@property
def y(self):
return self.atom_dict["xyz"][:, 1]
@property
def z(self):
return self.atom_dict["xyz"][:, 2]
@field.setter
def field(self, value):
self.atom_dict["field"] = value
@num.setter
def num(self, value):
self.atom_dict["num_resid_uniqresid"][:, 0] = value
@name.setter
def name(self, value):
self.atom_dict["name_resname"][:, 0] = value
@resname.setter
def resname(self, value):
self.atom_dict["name_resname"][:, 1] = value
@alterloc.setter
def alterloc(self, value):
self.atom_dict["alterloc_chain_insertres"][:, 0] = value
@chain.setter
def chain(self, value):
self.atom_dict["alterloc_chain_insertres"][:, 1] = value
@insertres.setter
def insertres(self, value):
self.atom_dict["alterloc_chain_insertres"][:, 2] = value
@elem.setter
def elem(self, value):
self.atom_dict["alterloc_chain_insertres"][:, 3] = value
@resid.setter
def resid(self, value):
self.atom_dict["num_resid_uniqresid"][:, 1] = value
@uniq_resid.setter
def uniq_resid(self, value):
self.atom_dict["num_resid_uniqresid"][:, 2] = value
@residue.setter
def residue(self, value):
self.atom_dict["num_resid_uniqresid"][:, 2] = value
@x.setter
def x(self, value):
self.atom_dict["xyz"][:, 0] = value
@y.setter
def y(self, value):
self.atom_dict["xyz"][:, 1] = value
@z.setter
def z(self, value):
self.atom_dict["xyz"][:, 2] = value
@xyz.setter
def xyz(self, value):
self.atom_dict["xyz"] = value
@beta.setter
def beta(self, value):
self.atom_dict["occ_beta"][:, 1] = value
@occ.setter
def occ(self, value):
self.atom_dict["occ_beta"][:, 0] = value
[docs]
def simple_select_atoms(self, column, values, operator="=="):
"""Select atoms from the PDB file based on the selection tokens.
Selection tokens are simple selection containing only one
keyword, operator, and values.
The keywords :
- `"resname"`
- `"chain"`
- `"name"`
- `"altloc"`
- `"resid"`
- `"residue"`
- `"beta"`
- `"occupancy"`
- `"x"`, `"y"`, `"z"`.
The operators are:
- `"=="`
- `"!="`
- `">"`
- `">="`
- `"<"`
- `"<="`
- `"isin"`
Parameters
----------
self : Model
Model object
column : str
Keyword for the selection
values : list
List of values for the selection
operator : str
Operator for the selection
frame : int
Frame number for the selection, default is 0
Returns
-------
list
a list of boolean values for each atom in the PDB file
"""
if column in KEYWORD_DICT:
col = KEYWORD_DICT[column][0]
index = KEYWORD_DICT[column][1]
else:
raise ValueError(f"Column {column} not recognized")
if isinstance(values, list):
if column in ["resname", "chain", "name", "altloc"]:
values = np.array(values, dtype="U")
# deal with case with "name H*"
if len(values) == 1 and values[0][-1] == "*":
operator = "startswith"
values = values[0][:-1]
elif column in ["resid", "residue"]:
values = np.array(values, dtype=int)
elif column in ["beta", "occupancy", "x", "y", "z"]:
values = np.array(values, dtype=float)
else:
raise ValueError(f"Column {column} not recognized")
if len(values) > 1 and operator in [">", ">=", "<", "<="]:
raise ValueError(f"Wrong operator {operator} with multiple values")
elif len(values) > 1 and operator == "==":
operator = "isin"
elif len(values) > 1:
raise ValueError(f"Wrong operator {operator} with multiple values")
elif isinstance(values, str):
# Remove the "." before checking if the string is numeric
# Also remove the "-" if it is the first character
if values.replace(".", "", 1).lstrip("-").isnumeric():
if values.find(".") == -1:
values = int(values)
else:
values = float(values)
else:
values = np.array([values], dtype="U")
if operator == "==":
bool_val = self.atom_dict[col][:, index] == values
elif operator == "!=":
bool_val = self.atom_dict[col][:, index] != values
elif operator == ">":
bool_val = self.atom_dict[col][:, index] > values
elif operator == ">=":
bool_val = self.atom_dict[col][:, index] >= values
elif operator == "<":
bool_val = self.atom_dict[col][:, index] < values
elif operator == "<=":
bool_val = self.atom_dict[col][:, index] <= values
elif operator == "isin":
bool_val = np.isin(self.atom_dict[col][:, index], (values))
elif operator == "startswith":
bool_val = np.array(
[x.startswith(values) for x in self.atom_dict[col][:, index]]
)
else:
raise ValueError(f"Operator {operator} not recognized")
return bool_val
[docs]
def select_tokens(self, tokens):
"""Select atoms from the PDB file based on the selection tokens.
Selection tokens are a list of tokens that can be either
simple selection or nested selection.
A simple selection contains only one keyword, operator, and values.
A nested selection contains a list or sub-list of tokens.
Parameters
----------
self : Model
Model object
tokens : list
List of nested tokens
Returns
-------
list
a list of boolean values for each atom in the PDB file
"""
bool_list = []
logical = None
new_bool_list = []
not_flag = False
# Case for simple selection
if select.is_simple_list(tokens):
if tokens[1] in ["==", "!=", ">", ">=", "<", "<="]:
return self.simple_select_atoms(
column=tokens[0], values=tokens[2], operator=tokens[1]
)
else:
return self.simple_select_atoms(column=tokens[0], values=tokens[1:])
# Case for within selection
elif tokens[0] == "within":
if len(tokens) != 4:
raise ValueError("within selection must have 3 arguments")
new_bool_list = self.select_tokens(tokens[-1])
distance = float(tokens[1])
sel_2 = self.select_index(np.where(new_bool_list)[0])
return self.dist_under_index(sel_2, cutoff=distance)
i = 0
while i < len(tokens):
if tokens[i] in ["and", "or"]:
logical = tokens[i]
bool_list = new_bool_list
new_bool_list = []
i += 1
continue
elif tokens[i] == "not":
not_flag = True
i += 1
continue
else:
new_bool_list = self.select_tokens(tokens[i])
if not_flag:
new_bool_list = np.logical_not(new_bool_list)
not_flag = False
if len(new_bool_list) > 0 and logical in ["and", "or"]:
if logical == "and":
new_bool_list = np.logical_and(bool_list, new_bool_list)
elif logical == "or":
new_bool_list = np.logical_or(bool_list, new_bool_list)
logical = None
i += 1
return new_bool_list
[docs]
def select_index(self, indexes):
"""Select atoms from the PDB file based on the selection indexes.
Parameters
----------
self : Model
Model object
indexes : list
List of indexes
frame : int
Frame number for the selection, default is 0
Returns
-------
Coor
a new Coor object with the selected atoms
"""
new_model = Model()
new_model.atom_dict = {}
for key in self.atom_dict:
new_model.atom_dict[key] = self.atom_dict[key][indexes]
return new_model
[docs]
def get_index_select(self, selection):
"""Return index from the PDB file based on the selection string.
Parameters
----------
self : Model
Model object
selection : str
Selection string
frame : int
Frame number for the selection, default is 0
Returns
-------
list
a list of indexes
"""
tokens = select.parse_selection(selection)
sel_list = self.select_tokens(tokens)
indexes = np.where(sel_list)
return indexes[0]
[docs]
def select_atoms(self, selection):
"""Select atoms from the PDB file based on the selection string.
Parameters
----------
self : Model
Model object
selection : str
Selection string
frame : int
Frame number for the selection, default is 0
Returns
-------
Coor
a new Model object with the selected atoms
"""
tokens = select.parse_selection(selection)
sel_list = self.select_tokens(tokens)
indexes = np.where(sel_list)
return self.select_index(indexes)
[docs]
def dist_under_index(self, sel_2, cutoff):
"""Select atoms from the PDB file based on distance.
Parameters
----------
self : Model
Model object for the first selection
sel_2 : Model
Model object for the second selection
cutoff : float
Cutoff distance for the selection
Returns
-------
List
list of boolean values for each atom in the PDB file
"""
# Compute distance matrix
if self.xyz.shape[0] == 0:
return np.array([])
elif sel_2.xyz.shape[0] == 0:
return np.array([False] * self.xyz.shape[0])
dist_mat = distance_matrix(self.xyz, sel_2.xyz)
# Return column under cutoff_max:
return dist_mat.min(1) < cutoff
[docs]
def add_atom(
self,
index,
name,
resname,
num,
resid,
uniq_resid,
chain,
xyz,
bfactor=0,
occupancy=0,
altloc="",
insertres="",
elem="",
):
"""Add an atom to the Model object.
Parameters
----------
self : Model
Model object
atom : Atom
Atom object
"""
self.atom_dict["field"] = np.insert(
self.atom_dict["field"], index, ["ATOM"], axis=0
)
self.atom_dict["num_resid_uniqresid"] = np.insert(
self.atom_dict["num_resid_uniqresid"],
index,
[num, resid, uniq_resid],
axis=0,
)
self.atom_dict["name_resname_elem"] = np.insert(
self.atom_dict["name_resname_elem"], index, [name, resname, elem], axis=0
)
self.atom_dict["alterloc_chain_insertres"] = np.insert(
self.atom_dict["alterloc_chain_insertres"],
index,
[altloc, chain, insertres],
axis=0,
)
self.atom_dict["occ_beta"] = np.insert(
self.atom_dict["occ_beta"], index, [bfactor, occupancy], axis=0
)
self.atom_dict["xyz"] = np.insert(self.atom_dict["xyz"], index, xyz, axis=0)
if len(self.atom_dict["field"]) == 1:
atom_num = 1
# self.atom_dict["field"] = self.atom_dict["field"].reshape((atom_num, 1))
self.atom_dict["num_resid_uniqresid"] = self.atom_dict[
"num_resid_uniqresid"
].reshape((atom_num, 3))
self.atom_dict["name_resname_elem"] = self.atom_dict[
"name_resname_elem"
].reshape((atom_num, 3))
self.atom_dict["alterloc_chain_insertres"] = self.atom_dict[
"alterloc_chain_insertres"
].reshape((atom_num, 3))
self.atom_dict["occ_beta"] = self.atom_dict["occ_beta"].reshape(
(atom_num, 2)
)
self.atom_dict["xyz"] = self.atom_dict["xyz"].reshape((atom_num, 3))