Source code for pdb_numpy.analysis

#!/usr/bin/env python3
# coding: utf-8

import numpy as np
import logging

# Logging
logger = logging.getLogger(__name__)

from . import alignement
from .select import remove_incomplete_backbone_residues
from .geom import distance_matrix



[docs]
def rmsd(coor_1, coor_2, selection="name CA", index_list=None, frame_ref=0):
    r"""Compute RMSD between two sets of coordinates.

    The RMSD (Root Mean Square Deviation) measures the similarity between two sets of coordinates by calculating the
    average distance between the corresponding atoms. It is given by the formula:

    .. math::
        RMSD(v,w) = \sqrt{ \frac{1}{N} \sum_{i=1}^{N} (v_i - w_i)^2 }

    where `v` and `w` are the two sets of coordinates, and `N` is the total number of atoms.

    Parameters
    ----------
    coor_1 : Coor
        First set of coordinates.
    coor_2 : Coor
        Second set of coordinates.
    selection : str, optional
        A selection string specifying which atoms to include in the RMSD calculation. By default, it selects the alpha
        carbon atoms ('name CA').
    index_list : list, optional
        A list of two arrays containing the indices of the atoms to include in the RMSD calculation. This option is
        provided as an alternative to the `selection` argument, and can be used if you need to calculate the RMSD for a
        custom set of atoms. If this argument is provided, the `selection` argument is ignored.
    frame_ref : int, optional
        The frame number of the reference structure. By default, it is set to 0.

    Returns
    -------
    float
        RMSD value

    """

    assert (
        0 <= frame_ref < len(coor_2.models)
    ), "Reference frame index is larger than the number of frame in the reference structure"

    if index_list is None:
        index_1 = coor_1.get_index_select(selection)
        index_2 = coor_2.get_index_select(selection)
    else:
        index_1 = index_list[0]
        index_2 = index_list[1]

    rmsd_list = []

    for model in coor_1.models:
        diff = model.xyz[index_1] - coor_2.models[frame_ref].xyz[index_2]
        rmsd_list.append(np.sqrt((diff * diff).sum() / len(index_1)))

    return rmsd_list




[docs]
def interface_rmsd(
    coor,
    coor_native,
    rec_chains_native,
    lig_chains_native,
    cutoff=10.0,
    back_atom=["CA", "N", "C", "O"],
):
    """Compute the interface RMSD between two models.
    The interface is defined as the distance between the ligand and the receptor
    below the cutoff distance. The RMSD is computed between the ligand and the
    receptor of the two models.


    Parameters
    ----------
    coor : Coor
        First coordinates
    coor_native : Coor
        Second coordinates
    rec_chains_native : list
        List of receptor chains
    lig_chains_native : list
        List of ligand chains
    cutoff : float, default=10.0
        Cutoff distance for the interface
    back_atom : list, default=['CA', 'N', 'C', 'O']
        List of backbone atoms to use for the RMSD calculation

    Returns
    -------
    float
        Interface RMSD value

    Notes
    -----
    Both coor object must have equivalent residues number in both chains.
    Chain order must also be equivalent.

    """

    lig_interface = coor_native.select_atoms(
        f'chain {" ".join(lig_chains_native)} and within {cutoff} of chain {" ".join(rec_chains_native)}'
    )
    rec_interface = coor_native.select_atoms(
        f'chain {" ".join(rec_chains_native)} and within {cutoff} of chain {" ".join(lig_chains_native)}'
    )

    lig_interface_residues = np.unique(lig_interface.models[0].residue)
    rec_interface_residues = np.unique(rec_interface.models[0].residue)

    interface_residues = np.concatenate(
        (
            lig_interface_residues,
            rec_interface_residues,
        )
    )

    if len(interface_residues) == 0:
        logger.info("No interface residues found")
        return [None] * len(coor.models)

    # print(f"lig_interface_resids= {lig_interface_residues} rec_interface_resids= {rec_interface_residues}")
    index = coor.get_index_select(
        f'residue {" ".join([str(i) for i in interface_residues])} and name {" ".join(back_atom)}'
    )
    index_native = coor_native.get_index_select(
        f'residue {" ".join([str(i) for i in interface_residues])} and name {" ".join(back_atom)}'
    )

    # print(f"index= {index} index_native= {index_native}")
    # print(f"index= {len(index)} index_native= {len(index_native)}")
    assert len(index) == len(
        index_native
    ), "The number of atoms in the interface is not the same in the two models"

    alignement.coor_align(coor, coor_native, index, index_native, frame_ref=0)

    return rmsd(coor, coor_native, index_list=[index, index_native])




[docs]
def native_contact(
    coor,
    native_coor,
    rec_chains,
    lig_chains,
    native_rec_chains,
    native_lig_chains,
    cutoff=5.0,
):
    """Compute the native contact score between a model and a native structure.

    The function returns two lists: `fnat_list` and `fnonnat_list`. `fnat_list`
    contains the fraction of native contacts, and `fnonnat_list` contains the
    fraction of non-native contacts.

    The function first selects the atoms within the cutoff distance from the
    native receptor-ligand interface in the native structure, and then selects
    the atoms within the cutoff distance from the receptor-ligand interface in
    the model structure. It then compares the selected atoms in the model
    structure with the native atoms to determine the native contacts and
    non-native contacts.

    The function uses the `np.unique()` function to get unique residue numbers,
    and then iterates over these residue numbers to compute the native contacts.
    It then computes the fraction of native contacts and non-native contacts
    for each model and adds them to the respective lists.

    Finally, the function returns the `fnat_list` and `fnonnat_list`.

    Parameters
    ----------
    coor : Coor
        Model coordinates
    native_coor : Coor
        Native coordinates
    rec_chains : list
        List of receptor chains
    lig_chains : list
        List of ligand chains
    native_rec_chains : list
        List of native receptor chains
    native_lig_chains : list
        List of native ligand chains
    cutoff : float, default=5.0
        Cutoff distance for the native contact

    Returns
    -------
    float
        Native contact score
    """

    native_rec_lig_interface = native_coor.select_atoms(
        f'(chain {" ".join(native_rec_chains)} and within {cutoff} of chain {" ".join(native_lig_chains)}) or'
        f'(chain {" ".join(native_lig_chains)} and within {cutoff} of chain {" ".join(native_rec_chains)})'
    )
    native_rec_interface = native_rec_lig_interface.select_atoms(
        f'chain {" ".join(native_rec_chains)} and within {cutoff} of chain {" ".join(native_lig_chains)}'
    )

    native_contact_list = []
    native_rec_residue = native_rec_interface.models[0].residue

    for residue in np.unique(native_rec_residue):
        res_lig = native_rec_lig_interface.select_atoms(
            f'chain {" ".join(native_lig_chains)} and within {cutoff} of residue {residue}'
        )
        native_contact_list += [
            [residue, lig_residue]
            for lig_residue in np.unique(res_lig.models[0].residue)
        ]

    fnat_list = []
    fnonnat_list = []
    for model in coor.models:
        rec_lig_interface = model.select_atoms(
            f'(chain {" ".join(rec_chains)} and within {cutoff} of chain {" ".join(lig_chains)}) or '
            f'(chain {" ".join(lig_chains)} and within {cutoff} of chain {" ".join(rec_chains)})'
        )
        rec_interface = rec_lig_interface.select_atoms(
            f'(chain {" ".join(rec_chains)} and within {cutoff} of chain {" ".join(lig_chains)})'
        )

        model_rec_residue = np.unique(rec_interface.residue)

        native_contact_num = 0
        non_native_contact_num = 0
        model_contact_list = []

        for residue in model_rec_residue:
            residue_lig = rec_lig_interface.select_atoms(
                f'chain {" ".join(lig_chains)} and within {cutoff} of residue {residue}'
            )

            for lig_residue in np.unique(residue_lig.residue):
                # print([residue, lig_residue])
                if [residue, lig_residue] in native_contact_list:
                    native_contact_num += 1
                else:
                    non_native_contact_num += 1

                model_contact_list.append([residue, lig_residue])

        if native_contact_num > 0:
            fnat = native_contact_num / len(native_contact_list)
        else:
            fnat = 0.0
        logger.info(
            f"Fnat {fnat:.3f} {native_contact_num} correct of {len(native_contact_list)} native contacts"
        )

        if non_native_contact_num > 0:
            fnonnat = non_native_contact_num / len(model_contact_list)
        else:
            fnonnat = 1.0
        logger.info(
            f"Fnonnat {fnonnat:.3f} {non_native_contact_num} non-native of {len(model_contact_list)} model contacts"
        )

        fnat_list.append(fnat)
        fnonnat_list.append(fnonnat)

    return (fnat_list, fnonnat_list)




[docs]
def dockQ(
    coor,
    native_coor,
    rec_chains=None,
    lig_chains=None,
    native_rec_chains=None,
    native_lig_chains=None,
    back_atom=["CA", "N", "C", "O"],
):
    """The dockQ function computes the docking quality score between a model
    and a native structure. It takes as input the Cartesian coordinates of the
    model (`coor`) and the native structure (`native_coor`) and various
    optional parameters such as the receptor chain (`rec_chain`), the ligand
    chain (`lig_chain`), the native receptor chain (`native_rec_chain`),
    the native ligand chain (`native_lig_chain`) and the list of backbone atoms
    (`back_atom`).

    The docking quality score is computed as follows:

    1. Align the receptor on the native receptor using the backbone atoms
    2. Compute the RMSD between the ligand and the native ligand
    3. Compute the RMSD between the receptor and the native receptor

    The function returns the docking quality score as a float value.

    The function first gets the shortest chain's sequence to identify the
    peptide and receptor chains for both the model and native structure. It
    then removes hydrogens and non-protein atoms as well as alternate
    locations from the coordinates of both the model and the native structure.

    The function then removes incomplete backbone residues from both the model
    and native structure coordinates. The ligand chain is then put at the end
    of the dictionary. The model is then aligned on the native structure using
    the model back atoms. The function computes the RMSD between the ligand and
    the native ligand and the RMSD between the receptor and the native
    receptor.

    The same residue in common is set and the interface coordinates and
    interface native coordinates are selected. Finally, the docking quality
    score is returned.


    Parameters
    ----------
    coor : Coor
        Model coordinates
    native_coor : Coor
        Native coordinates
    rec_chains : list, default=None
        List of receptor chains
    lig_chains : list, default=None
        List of ligand chains
    native_rec_chains : list, default=None
        List of native receptor chains
    native_lig_chains : list, default=None
        List of native ligand chains
    back_atom : list, default=['CA', 'N', 'C', 'O']
        List of backbone atoms

    Returns
    -------
    float
        Docking quality score

    """

    # Get shortest chain's sequence to identify peptide and receptor chains
    model_seq = coor.get_aa_seq()
    native_seq = native_coor.get_aa_seq()

    if lig_chains is None:
        lig_chains = [
            min(model_seq.items(), key=lambda x: len(x[1].replace("-", "")))[0]
        ]
    logger.info(f'Model ligand chains : {" ".join(lig_chains)}')
    if rec_chains is None:
        rec_chains = [chain for chain in model_seq if chain not in lig_chains]
    logger.info(f'Model receptor chains : {" ".join(rec_chains)}')

    if native_lig_chains is None:
        native_lig_chains = [
            min(native_seq.items(), key=lambda x: len(x[1].replace("-", "")))[0]
        ]
    logger.info(f'Native ligand chains : {" ".join(native_lig_chains)}')
    if native_rec_chains is None:
        native_rec_chains = [
            chain for chain in native_seq if chain not in native_lig_chains
        ]
    logger.info(f'Native receptor chains : {" ".join(native_rec_chains)}')

    # Remove hydrogens and non protein atoms as well as altloc
    clean_coor = coor.select_atoms(
        f"protein and not altloc B C D and chain {' '.join(rec_chains + lig_chains)}"
    )
    clean_native_coor = native_coor.select_atoms(
        f"protein and not altloc B C D and chain {' '.join(native_rec_chains + native_lig_chains)}"
    )

    # print("1:", clean_native_coor.models[0].resid[:10])

    # Remove incomplete backbone residue:
    clean_coor = remove_incomplete_backbone_residues(clean_coor)
    clean_native_coor = remove_incomplete_backbone_residues(clean_native_coor)
    # print("2:", clean_native_coor.models[0].resid[:10])

    # Put lig chain at the end of the dict:
    clean_coor.change_order("chain", rec_chains + lig_chains)
    clean_native_coor.change_order("chain", native_rec_chains + native_lig_chains)

    # Align model on native structure using model back atoms:
    rmsd_prot_list, [
        align_rec_index,
        align_rec_native_index,
    ] = alignement.align_seq_based(
        clean_coor,
        clean_native_coor,
        chain_1=rec_chains,
        chain_2=native_rec_chains,
        back_names=back_atom,
    )
    logger.info(f"Receptor RMSD: {rmsd_prot_list[0]:.3f} A")
    logger.info(
        f"Found {len(align_rec_index)//len(back_atom):d} residues in common (receptor)"
    )
    # print("4:", clean_native_coor.models[0].resid[:10])

    ########################
    # Compute ligand RMSD: #
    ########################

    lrmsd_list, [align_lig_index, align_lig_native_index] = alignement.rmsd_seq_based(
        clean_coor,
        clean_native_coor,
        chain_1=lig_chains,
        chain_2=native_lig_chains,
        back_names=back_atom,
    )
    logger.info(f"Ligand   RMSD: {lrmsd_list[0]:.3f} A")
    logger.info(
        f"Found {len(align_lig_index)//len(back_atom):d} residues in common (ligand)"
    )
    # print("5:", clean_native_coor.models[0].resid[:10])

    #############################
    # Set same resid in common: #
    #############################

    coor_residue = clean_coor.models[0].residue[align_rec_index + align_lig_index]

    native_residue = clean_native_coor.models[0].residue[
        align_rec_native_index + align_lig_native_index
    ]

    coor_residue_unique = np.unique(coor_residue)
    native_residue_unique = np.unique(native_residue)

    assert len(coor_residue_unique) == len(native_residue_unique)

    interface_coor = clean_coor.select_atoms(
        f'residue {" ".join([str(i) for i in coor_residue_unique])}'
    )
    interface_native_coor = clean_native_coor.select_atoms(
        f'residue {" ".join([str(i) for i in native_residue_unique])}'
    )

    # for model in interface_coor.models:
    #    model.resid[:] = -1
    #    for res, nat_res in zip(coor_residue_unique, native_resid_unique):
    #        model.resid[model.residue == res] = nat_res

    interface_coor.reset_residue_index()
    interface_native_coor.reset_residue_index()

    irmsd_list = interface_rmsd(
        interface_coor,
        interface_native_coor,
        native_rec_chains,
        native_lig_chains,
        cutoff=10.0,
        back_atom=back_atom,
    )
    logger.info(f"Interface   RMSD: {irmsd_list[0]} A")

    fnat_list, fnonnat_list = native_contact(
        interface_coor,  # clean_coor,
        interface_native_coor,  # clean_native_coor,
        rec_chains,
        lig_chains,
        native_rec_chains,
        native_lig_chains,
        cutoff=5.0,
    )
    logger.info(f"Fnat: {fnat_list[0]:.3f}      Fnonnat: {fnonnat_list[0]:.3f}")

    def scale_rms(rms, d):
        if rms is None:
            return 0.0
        return 1.0 / (1 + (rms / d) ** 2)

    d1 = 8.5
    d2 = 1.5

    dockq_list = [
        (fnat + scale_rms(lrmsd, d1) + scale_rms(irmsd, d2)) / 3
        for fnat, lrmsd, irmsd in zip(fnat_list, lrmsd_list, irmsd_list)
    ]

    logger.info(f"DockQ Score pdb_manip: {dockq_list[0]:.3f} ")

    return {
        "Fnat": fnat_list,
        "Fnonnat": fnonnat_list,
        "rRMS": rmsd_prot_list,
        "iRMS": irmsd_list,
        "LRMS": lrmsd_list,
        "DockQ": dockq_list,
    }




[docs]
def compute_pdockQ(
    coor,
    rec_chains=None,
    lig_chains=None,
    cutoff=8.0,
    L=0.724,
    x0=152.611,
    k=0.052,
    b=0.018,
):
    r"""Compute the pdockQ score as define in [1]_.

    pDockQ is computed using this equation [1]_:

    .. math::
        pDockQ = \frac{L}{1 + e^{-k (x-x_{0})}} + b

    where

    .. math::
        x = \overline{plDDT_{interface}} \cdot log(number \: of \: interface \: contacts)

    :math:`L = 0.724` is the maximum value of the sigmoid,
    :math:`k = 0.052` is the slope of the sigmoid, :math:`x_{0} = 152.611`
    is the midpoint of the sigmoid, and :math:`b = 0.018` is the y-intercept
    of the sigmoid.

    For the *multiple* pdockQ or `mpDockQ` [2]_ you should use this values:

    :math:`L = 0.728`, :math:`x0 = 309.375`, :math:`k = 0.098` and :math:`b = 0.262`.

    Implementation was inspired from https://gitlab.com/ElofssonLab/FoldDock/-/blob/main/src/pdockq.py

    Parameters
    ----------
    coor : Coor
        object containing the coordinates of the model
    rec_chains : list
        list of receptor chains
    lig_chains : list
        list of ligand chains
    cutoff : float
        cutoff for native contacts, default is 8.0 A
    L : float
        maximum value of the sigmoid, default is 0.728
    x0 : float
        midpoint of the sigmoid, default is 309.375
    k : float
        slope of the sigmoid, default is 0.098
    b : float
        y-intercept of the sigmoid, default is 0.262


    Returns
    -------
    list
        pdockQ scores


    References
    ----------
    .. [1] Bryant P, Pozzati G and Elofsson A. Improved prediction of
        protein-protein interactions using AlphaFold2. *Nature Communications*.
        vol. 13, 1265 (2022)
        https://www.nature.com/articles/s41467-022-28865-w
    .. [2] Bryant P, Pozzati G, Zhu W, Shenoy A, Kundrotas P & Elofsson A. Predicting
        the structure of large protein complexes using AlphaFold and Monte Carlo
        tree search. *Nature Communications*.
        vol. 13, 6028 (2022)
        https://www.nature.com/articles/s41467-022-33729-4
    """

    # coor_CA_CB = coor.select_atoms("name CB or (resname GLY and name CA)")
    coor_CA_CB = coor.select_atoms(
        "(protein and (name CB or (resname GLY and name CA))) or (dna and name P)"
    )

    model_seq = coor.get_aa_na_seq()

    if lig_chains is None:
        lig_chains = [
            min(model_seq.items(), key=lambda x: len(x[1].replace("-", "")))[0]
        ]
    assert len(lig_chains) >= 1, "At least one ligand chain is allowed"
    logger.info(f'Model ligand chain : {" ".join(lig_chains)}')

    if rec_chains is None:
        rec_chains = [chain for chain in model_seq if chain not in lig_chains]
    assert len(rec_chains) >= 1, "At least one receptor chain is allowed"
    logger.info(f'Model receptor chain : {" ".join(rec_chains)}')

    pdockq_list = []

    for model in coor_CA_CB.models:
        rec_in_contact = model.select_atoms(
            f"chain {' '.join(rec_chains)} and within {cutoff} of chain {' '.join(lig_chains)}"
        )

        lig_in_contact = model.select_atoms(
            f"chain {' '.join(lig_chains)} and within {cutoff} of chain {' '.join(rec_chains)}"
        )

        dist_mat = distance_matrix(rec_in_contact.xyz, lig_in_contact.xyz)
        contact_num = np.sum(dist_mat < cutoff)

        if contact_num == 0:
            pdockq = 0.0
            pdockq_list.append(pdockq)
            continue

        avg_plddt = rec_in_contact.len * np.average(
            rec_in_contact.beta
        ) + lig_in_contact.len * np.average(lig_in_contact.beta)
        avg_plddt /= rec_in_contact.len + lig_in_contact.len

        x = avg_plddt * np.log10(contact_num)
        pdockq = L / (1 + np.exp(-k * (x - x0))) + b

        pdockq_list.append(pdockq)

    return pdockq_list




[docs]
def compute_pdockQ_sel(
    coor,
    rec_sel,
    lig_sel,
    cutoff=8.0,
    L=0.724,
    x0=152.611,
    k=0.052,
    b=0.018,
):
    r"""Compute the pdockQ score as define in [1]_. Using two selection strings.
    pDockQ is computed using this equation [1]_:

    .. math::
        pDockQ = \frac{L}{1 + e^{-k (x-x_{0})}} + b

    where

    .. math::
        x = \overline{plDDT_{interface}} \cdot log(number \: of \: interface \: contacts)

    :math:`L = 0.724` is the maximum value of the sigmoid,
    :math:`k = 0.052` is the slope of the sigmoid, :math:`x_{0} = 152.611`
    is the midpoint of the sigmoid, and :math:`b = 0.018` is the y-intercept
    of the sigmoid.

    For the *multiple* pdockQ or `mpDockQ` [2]_ you should use this values:

    :math:`L = 0.728`, :math:`x0 = 309.375`, :math:`k = 0.098` and :math:`b = 0.262`.

    Implementation was inspired from https://gitlab.com/ElofssonLab/FoldDock/-/blob/main/src/pdockq.py

    Parameters
    ----------
    coor : Coor
        object containing the coordinates of the model
    rec_sel : str
        selection string for receptor
    lig_sel : str
        selection string for ligand
    cutoff : float
        cutoff for native contacts, default is 8.0 A
    L : float
        maximum value of the sigmoid, default is 0.728
    x0 : float
        midpoint of the sigmoid, default is 309.375
    k : float
        slope of the sigmoid, default is 0.098
    b : float
        y-intercept of the sigmoid, default is 0.262

    Returns
    -------
    float
        pdockQ score



    References
    ----------
    .. [1] Bryant P, Pozzati G and Elofsson A. Improved prediction of
        protein-protein interactions using AlphaFold2. *Nature Communications*.
        vol. 13, 1265 (2022)
        https://www.nature.com/articles/s41467-022-28865-w
    .. [2] Bryant P, Pozzati G, Zhu W, Shenoy A, Kundrotas P & Elofsson A. Predicting
        the structure of large protein complexes using AlphaFold and Monte Carlo
        tree search. *Nature Communications*.
        vol. 13, 6028 (2022)
        https://www.nature.com/articles/s41467-022-33729-4
    """

    # models_CA = coor.select_atoms("name CB or (resname GLY and name CA)")
    coor_CA_CB = coor.select_atoms(
        "(protein and (name CB or (resname GLY and name CA))) or (dna and name P)"
    )

    pdockq_list = []

    for model in coor_CA_CB.models:
        rec_in_contact = model.select_atoms(
            f"({rec_sel}) and within {cutoff} of ({lig_sel})"
        )

        lig_in_contact = model.select_atoms(
            f"({lig_sel}) and within {cutoff} of ({rec_sel})"
        )

        dist_mat = distance_matrix(rec_in_contact.xyz, lig_in_contact.xyz)
        contact_num = np.sum(dist_mat < cutoff)

        if contact_num == 0:
            pdockq = 0.0
            pdockq_list.append(pdockq)
            continue

        avg_plddt = rec_in_contact.len * np.average(
            rec_in_contact.beta
        ) + lig_in_contact.len * np.average(lig_in_contact.beta)
        avg_plddt /= rec_in_contact.len + lig_in_contact.len

        x = avg_plddt * np.log10(contact_num)
        pdockq = L / (1 + np.exp(-k * (x - x0))) + b

        pdockq_list.append(pdockq)

    return pdockq_list




[docs]
def compute_pdockQ2(
    coor,
    pae_array,
    cutoff=8.0,
    L=1.31034849e00,
    x0=8.47326239e01,
    k=7.47157696e-02,
    b=5.01886443e-03,
    d0=10.0,
    sel="(protein and name CA) or (dna and name P) or ions or resname LIG",
):
    r"""Compute the pdockQ2 score as define in [1]_.

    .. math::
        pDockQ_2 = \frac{L}{1 + exp [-k*(X_i-X_0)]} + b

    with:

    .. math::
        X_i = \langle \frac{1}{1+(\frac{PAE_{int}}{d_0})^2} \rangle * \langle pLDDT \rangle_{int}

    :math:`L = 1.31` is the maximum value of the sigmoid,
    :math:`k = 0.075` is the slope of the sigmoid, :math:`x_{0} = 84.733`
    is the midpoint of the sigmoid, and :math:`b = 0.005` is the y-intercept
    of the sigmoid.

    Implementation was inspired from https://gitlab.com/ElofssonLab/afm-benchmark/-/blob/main/src/pdockq2.py

    Parameters
    ----------
    coor : Coor
        object containing the coordinates of the model
    rec_chain : list
        list of receptor chain
    lig_chain : list
        list of ligand chain
    cutoff : float
        cutoff for native contacts, default is 8.0 A
    L : float
        maximum value of the sigmoid, default is ~1.31
    x0 : float
        midpoint of the sigmoid, default is ~84.733
    k : float
        slope of the sigmoid, default is ~0.075
    b : float
        y-intercept of the sigmoid, default is ~0.005
    d0 : float
        default is 10.0
    sel : str
        selection string for atoms to consider for the calculation


    Returns
    -------
    list
        pdockQ scores

    References
    ----------
    .. [1] Zhu W, Shenoy A, Kundrotras P and Elofsson A. Evaluation of AlphaFold-Multimer prediction
        on multi-chain protein complexes. *Bioinformatics*.
        vol. 39, 7 (2023) btad424
        https://academic.oup.com/bioinformatics/article/39/7/btad424/7219714
    """

    models_CA = coor.select_atoms(sel)
    models_chains = np.unique(models_CA.chain)

    assert pae_array.shape == (
        models_CA.len,
        models_CA.len,
    ), "PAE array shape mismatch with CA atoms number"

    pdockq2_list = []
    for chain in models_chains:
        pdockq2_list.append([])

    for model in models_CA.models:
        for i, chain in enumerate(models_chains):
            chain_sel = model.select_atoms(
                f"(chain {chain} and within {cutoff} of not chain {chain})"
            )
            inter_chain_sel = model.select_atoms(
                f"(not chain {chain} {chain} and within {cutoff} of chain {chain})"
            )

            if chain_sel.len == 0 or inter_chain_sel.len == 0:
                pdockq2_list[i].append(0.0)
                logger.info(
                    "No interface residues found for pdockq2 calculation, 0 value return."
                )
                continue

            dist_mat = distance_matrix(chain_sel.xyz, inter_chain_sel.xyz)

            indexes = np.where(dist_mat < cutoff)
            x_indexes = chain_sel.uniq_resid[indexes[0]]
            y_indexes = inter_chain_sel.uniq_resid[indexes[1]]
            pae_sel = pae_array[x_indexes, y_indexes]

            norm_if_interpae = np.mean(1 / (1 + (pae_sel / d0) ** 2))

            plddt_avg = np.mean(model.beta[x_indexes])

            x = norm_if_interpae * plddt_avg
            y = L / (1 + np.exp(-k * (x - x0))) + b
            pdockq2_list[i].append(y)

    return pdockq2_list




[docs]
def compute_piTM(
    coor,
    pae_array,
    cutoff=8.0,
):
    r"""Compute the piTM score as define in [2]_.

    .. math::
        piTM = \max_{i \in \mathcal{I}} \frac{1}{I} \sum_{j \in \mathcal{I}}  \frac{1}{1 + [\langle e_{ij} \rangle / d_0 (I)]^2}

    with:

    .. math::
        d_0(I) = \begin{cases} 1.25 \sqrt[3]{I -15} -1.8\text{,} & \text{if } I \geq 22 \\ 0.02 I \text{,} & \text{if } I < 22  \end{cases}


    Implementation was inspired from `predicted_tm_score_v1()` in https://github.com/FreshAirTonight/af2complex/blob/main/src/alphafold/common/confidence.py

    Parameters
    ----------
    coor : Coor
        object containing the coordinates of the model
    pae_array : np.array
        array of predicted PAE
    cutoff : float
        cutoff for native contacts, default is 8.0 A

    Returns
    -------
    list
        piTM scores

    References
    ----------
    .. [2] Mu Gao, Davi Nakajima An, Jerry M. Parks & Jeffrey Skolnick.
        AF2Complex predicts direct physical interactions in multimeric proteins with deep learning
        *Nature Communications*. volume 13, Article number: 1744 (2022).
        https://www.nature.com/articles/s41467-022-29394-2

    """

    models_CA = coor.select_atoms("name CA")
    models_chains = np.unique(models_CA.chain)

    # print(models_chains, pae_array.shape, models_CA.len)

    assert pae_array.shape == (
        models_CA.len,
        models_CA.len,
    ), "PAE array shape mismatch with CA atoms number"

    piTM_list = []

    piTM_Score_list = []
    for chain in models_chains:
        piTM_Score_list.append([])

    for model in models_CA.models:
        # Compute I and d0
        interface_index = []
        for i, chain in enumerate(models_chains):
            sel_ndx = model.get_index_select(
                f"(within {cutoff} of chain {chain}) and (within {cutoff} of not chain {chain})"
            )
            interface_index += list(sel_ndx)
        interface_index = set(interface_index)

        I = len(interface_index)
        if I == 0:
            piTM_list.append(0)
            for i in range(len(models_chains)):
                piTM_Score_list[i].append(0)
            continue
        d0 = 1.25 * (I - 15) ** (1 / 3) - 1.8 if I >= 22 else 0.02 * I

        # Add 0 in case there is no interface residues
        piTM_local = [0]
        for i in interface_index:
            # print(pae_array[:10, :10])
            local_score = 0
            for j in interface_index:
                if i != j:
                    local_score += 1 / (1 + (pae_array[i, j] / d0) ** 2)
            piTM_local.append(local_score)
        piTM_list.append(max(piTM_local) / I)

        for i, chain in enumerate(models_chains):
            chain_sel_ndx = model.get_index_select(
                f"(chain {chain} and within {cutoff} of not chain {chain})"
            )
            inter_chain_ndx = model.get_index_select(
                f"(not chain {chain} {chain} and within {cutoff} of chain {chain})"
            )
            # Add 0 in case there is no interface residues
            chain_score = [0]
            for j in inter_chain_ndx:
                local_score = 0
                for k in chain_sel_ndx:
                    local_score += 1 / (1 + (pae_array[k, j].mean() / d0) ** 2)
                chain_score.append(local_score)

            piTM_Score_list[i].append(max(chain_score) / I)
            # piTM_Score_list[i].append(max(chain_score)/len(chain_sel_ndx))

    return piTM_list, piTM_Score_list