#!/usr/bin/env python3
# coding: utf-8
import os
import urllib.request
import logging
import numpy as np
import gzip
from .. import geom as geom
from ..model import Model
from .. import coor
# Logging
logger = logging.getLogger(__name__)
FIELD_DICT = {"A": "ATOM ", "H": "HETATM"}
[docs]def parse(pdb_lines, pqr_format=False):
"""Parse the pdb lines and return atom information's as a dictionary
Parameters
----------
pdb_lines : list
list of pdb lines
pqr_format : bool, optional
if True, parse pqr format, by default False
Returns
-------
Coor
Coor object
"""
pdb_coor = coor.Coor()
# To parse hexadecimal resid:
resid_base = 10
atom_index = 0
uniq_resid = -1
old_resid = -np.inf
old_insert_res = " "
model_num = 1
index_list = []
field_list = [] # 6 char
num_resid_uniqresid_list = [] # int 5 digits (+1 with Chimera)
alter_chain_insert_list = [] # 1 char
name_resname_elem_list = [] # 4 / 3 char (+1 with Chimera) = 4
xyz_list = [] # real (8.3)
occ_beta_list = [] # real (6.2)
transformation = ""
symmetry = ""
for line in pdb_lines:
if line.startswith("CRYST1"):
pdb_coor.crystal_pack = line
elif line.startswith("REMARK 350 "):
transformation += line
elif line.startswith("REMARK 290 "):
symmetry += line
elif line.startswith("MODEL"):
# print('Read Model {}'.format(model_num))
model_num += 1
elif line.startswith("ENDMDL") or line.startswith("END"):
if len(field_list) > 0:
local_model = Model()
local_model.atom_dict = {
"field": np.array(field_list, dtype="|U1"),
"num_resid_uniqresid": np.array(
num_resid_uniqresid_list, dtype="int32"
),
"name_resname_elem": np.array(name_resname_elem_list, dtype="|U4"),
"alterloc_chain_insertres": np.array(
alter_chain_insert_list, dtype="|U2"
),
"xyz": np.array(xyz_list, dtype="float32"),
"occ_beta": np.array(occ_beta_list, dtype="float32"),
}
if (
len(pdb_coor.models) > 1
and local_model.len != pdb_coor.models[-1].len
):
logger.warning(
f"The atom number is not the same in the model {len(pdb_coor.models)-1} and the model {len(pdb_coor.models)}."
"\nSkip this model."
)
else:
pdb_coor.models.append(local_model)
atom_index = 0
uniq_resid = -1
old_resid = -np.inf
old_insert_res = " "
model_num = 1
index_list = []
field_list = [] # 6 char
num_resid_uniqresid_list = [] # int 5 digits (+1 with Chimera)
alter_chain_insert_list = [] # 1 char
name_resname_elem_list = [] # 4 / 3 char (+1 with Chimera) = 4
xyz_list = [] # real (8.3)
occ_beta_list = [] # real (6.2)
elif line.startswith("ATOM") or line.startswith("HETATM"):
field = line[:6].strip()
atom_num = int(line[6:11])
atom_name = line[12:16].strip()
res_name = line[17:20].strip()
chain = line[21]
# To parse hexadecimal resid:
resid = int(line[22:26], base=resid_base)
# If resid is hexadecimal, resid_base is set to 16
if resid >= 9999:
resid_base = 16
insert_res = line[26:27].strip()
xyz = [float(line[30:38]), float(line[38:46]), float(line[46:54])]
if pqr_format:
alter_loc = ""
res_name = line[16:20].strip()
occ, beta = line[54:62].strip(), line[62:70].strip()
elem_symbol = ""
else:
alter_loc = line[16:17].strip()
res_name = line[17:21].strip()
occ, beta = line[54:60].strip(), line[60:66].strip()
elem_symbol = line[76:78].strip()
if occ == "":
occ = 0.0
else:
occ = float(occ)
if beta == "":
beta = 0.0
else:
beta = float(beta)
if resid != old_resid or insert_res != old_insert_res:
uniq_resid += 1
old_resid = resid
old_insert_res = insert_res
field_list.append(field[0])
num_resid_uniqresid_list.append([atom_num, resid, uniq_resid])
index_list.append(atom_index)
name_resname_elem_list.append([atom_name, res_name, elem_symbol])
alter_chain_insert_list.append([alter_loc, chain, insert_res])
xyz_list.append(xyz)
occ_beta_list.append([occ, beta])
atom_index += 1
if len(field_list) > 0:
logger.warning("No ENDMDL in the pdb file.")
local_model = Model()
local_model.atom_dict = {
"field": np.array(field_list, dtype="|U1"),
"num_resid_uniqresid": np.array(num_resid_uniqresid_list, dtype="int32"),
"name_resname_elem": np.array(name_resname_elem_list, dtype="|U4"),
"alterloc_chain_insertres": np.array(alter_chain_insert_list, dtype="|U2"),
"xyz": np.array(xyz_list, dtype="float32"),
"occ_beta": np.array(occ_beta_list, dtype="float32"),
}
if len(pdb_coor.models) > 1 and local_model.len != pdb_coor.models[-1].len:
logger.warning(
f"The atom number is not the same in the model {len(pdb_coor.models)-1} and the model {len(pdb_coor.models)}."
"\nSkip this model."
)
else:
pdb_coor.models.append(local_model)
if transformation != "":
pdb_coor.transformation = parse_transformation(transformation)
if symmetry != "":
pdb_coor.symmetry = parse_symmetry(symmetry)
return pdb_coor
[docs]def parse_symmetry(text):
"""Parse the `REMARK 290 SMTRY` information from a pdb file.
Parameters
----------
text : str
pdb file
Returns
-------
symetry_dict : dict
symetry information
"""
symmetry_dict = {"matrix": []}
for line in text.split("\n"):
if line.startswith("REMARK 290 SMTRY"):
symmetry_dict["matrix"] += [[float(x) for x in line[19:].split()]]
return symmetry_dict
[docs]def fetch(pdb_ID):
"""Get a pdb file from the PDB using its ID
and return a Coor object.
Parameters
----------
pdb_ID : str
pdb ID
Returns
-------
Coor
Coor object
Examples
--------
>>> prot_coor = Coor()
>>> prot_coor.get_PDB('3EAM')
"""
# Get the pdb file from the PDB:
with urllib.request.urlopen(
f"http://files.rcsb.org/download/{pdb_ID}.pdb"
) as response:
pdb_lines = response.read().decode("utf-8").splitlines(True)
return parse(pdb_lines)
[docs]def fetch_BioAssembly(pdb_ID, index=1):
"""Get a Bio Assembly pdb file from the PDB using its ID
and return a Coor object.
Parameters
----------
pdb_ID : str
pdb ID
index : int
Bio Assembly index
Returns
-------
Coor
Coor object
Examples
--------
>>> prot_coor = Coor()
>>> prot_coor.get_PDB('3EAM')
"""
# Get the pdb file from the PDB:
req = urllib.request.Request(
f"http://files.rcsb.org/download/{pdb_ID}.pdb{index}.gz"
)
req.add_header("Accept-Encoding", "gzip")
with urllib.request.urlopen(req) as response:
pdb_lines = gzip.decompress(response.read()).decode("utf-8").splitlines(True)
return parse(pdb_lines)
[docs]def get_pdb_string(pdb_coor):
"""Return a coor object as a pdb string.
Parameters
----------
pdb_coor : Coor
Coor object
Returns
-------
str
Coor object as a pdb string
Examples
--------
>>> prot_coor = Coor()
>>> prot_coor.read_file(os.path.join(TEST_PATH, '1y0m.pdb'))\
#doctest: +ELLIPSIS
Succeed to read file ...1y0m.pdb , 648 atoms found
>>> pdb_str = prot_coor.get_structure_string()
>>> print(f'Number of caracters: {len(pdb_str)}')
Number of caracters: 51264
"""
str_out = ""
if pdb_coor.crystal_pack != "":
str_out += geom.cryst_convert(pdb_coor.crystal_pack, format_out="pdb")
elif pdb_coor.data_mmCIF is not None:
str_out += geom.cryst_convert_mmCIF(pdb_coor.data_mmCIF, format_out="pdb")
for model_index, model in enumerate(pdb_coor.models):
str_out += f"MODEL {model_index:4d}\n"
for i in range(model.len):
# Atom name should start a column 14, with the type of atom ex:
# - with atom type 'C': ' CH3'
# for 2 letters atom type, it should start at coulumn 13 ex:
# - with atom type 'FE': 'FE1'
name = model.atom_dict["name_resname_elem"][i, 0].astype(np.str_)
if len(name) <= 3 and name[0] in ["C", "H", "O", "N", "S", "P"]:
name = " " + name
# To use resid > 9999, we need to convert the resid in hexa
resid = model.atom_dict["num_resid_uniqresid"][i, 1]
if resid > 9999:
resid = hex(resid).lstrip("0x")
else:
resid = str(resid)
# Note : Here we use 4 letter residue name.
str_out += (
"{:6s}{:5d} {:4s}{:1s}{:4s}{:1s}{:>4s}{:1s}"
" {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f}"
" {:2s}\n".format(
FIELD_DICT[model.atom_dict["field"][i]],
i + 1,
name,
model.atom_dict["alterloc_chain_insertres"][i, 0].astype(np.str_),
model.atom_dict["name_resname_elem"][i, 1].astype(np.str_),
model.atom_dict["alterloc_chain_insertres"][i, 1].astype(np.str_),
resid,
model.atom_dict["alterloc_chain_insertres"][i, 2].astype(np.str_),
model.atom_dict["xyz"][i, 0],
model.atom_dict["xyz"][i, 1],
model.atom_dict["xyz"][i, 2],
model.atom_dict["occ_beta"][i, 0],
model.atom_dict["occ_beta"][i, 1],
model.atom_dict["name_resname_elem"][i, 2].astype(np.str_),
)
)
str_out += "ENDMDL\n"
str_out += "END\n"
return str_out
[docs]def write(coor, pdb_out, overwrite=False):
"""Write a pdb file.
Parameters
----------
coor : Coor
Coor object
pdb_out : str
path of the pdb file to write
overwrite : bool, optional, default=False
flag to overwrite or not if file has already been created.
Returns
-------
None
Examples
--------
>>> prot_coor = Coor(os.path.join(TEST_PATH, '1y0m.pdb'))
>>> prot_coor.write_pdb(os.path.join(TEST_OUT, 'tmp.pdb'))
Succeed to save file tmp.pdb
"""
if not overwrite and os.path.exists(pdb_out):
logger.info(f"PDB file {pdb_out} already exist, file not saved")
return
filout = open(pdb_out, "w")
filout.write(get_pdb_string(coor))
filout.close()
logger.info(f"Succeed to save file {os.path.relpath(pdb_out)}")
return