Source code for moldesign.molecules.residue

# Copyright 2016 Autodesk Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import moldesign as mdt
from moldesign import utils, data

from . import Entity, AtomList, toplevel


@toplevel
[docs]class Residue(Entity):
    """ A biomolecular residue - most often an amino acid, a nucleic base, or a solvent
    molecule. In PDB structures, also often refers to non-biochemical molecules.

    Its children are almost always residues.

    Attributes:
        parent (mdt.Molecule): the molecule this residue belongs to
        chain (Chain): the chain this residue belongs to
    """

[docs]    def copy(self):
        newatoms = super(Residue, self).copy()
        return newatoms[0].residue
    copy.__doc__ = Entity.copy.__doc__

[docs]    def to_json(self):
        js = mdt.chemjson.jsonify(self, 'index resname name pdbindex'.split())
        js['chain'] = self.chain.index
        js['atoms'] = [atom.index for atom in self.atoms]
        return js

    @utils.args_from(Entity)
    def __init__(self, **kwargs):
        """ Initialization
        Args:
            **kwargs ():
        """
        self.chain = kwargs.get('chain', None)
        super(Residue, self).__init__(**kwargs)
        if self.index is None and self.molecule is not None:
            self.index = self.molecule.residues.index(self)
        self.chainindex = None
        self._backbone = None
        self._sidechain = None
        self._template_name = None
        if self.name is None and self.pdbname is not None:
            self.name = self.pdbname + str(self.pdbindex)

    @property
    def atoms(self):
        return self.children

[docs]    def add(self, atom, key=None):
        """Deals with atom name clashes within a residue - common for small molecules"""
        if atom.residue is not None:
            assert atom.residue is self, 'Atom already assigned to a residue'
        atom.residue = self
        if atom.chain is None:
            atom.chain = self.chain
        else:
            assert atom.chain == self.chain, "Atom's chain does not match residue's chain"

        if key is not None or atom.name not in self.children:
            return super(Residue, self).add(atom, key=key)
        else:
            return super(Residue, self).add(atom, key='%s%s' % (atom.name, len(self)))
    add.__doc__ = Entity.add.__doc__

    @property
    def is_n_terminal(self):
        """bool: this is the last residue in a peptide

        Raises:
            ValueError: if this residue is not an amino acid
        """
        if self.type != 'protein':
            raise ValueError('%s is not a recognized peptide monomer' % self)
        return self._is_starting_residue

    @property
    def is_c_terminal(self):
        """bool: this is the first residue in a peptide

        Raises:
            ValueError: if this residue is not an amino acid
        """
        if self.type != 'protein':
            raise ValueError('%s is not a recognized peptide monomer' % self)
        return self._is_ending_residue

    @property
    def is_5prime_end(self):
        """bool: this is the first base in a strand

        Raises:
            ValueError: if this residue is not a DNA base
        """
        if self.type != 'dna':
            raise ValueError('%s is not a recognized nucleic acid monomer' % self)
        return self._is_starting_residue

    @property
    def is_3prime_end(self):
        """bool: this is the last base in a strand

        Raises:
            ValueError: if this residue is not a DNA base
        """
        if self.type != 'dna':
            raise ValueError('%s is not a recognized nucleic acid monomer' % self)
        return self._is_ending_residue

    @property
    def is_monomer(self):
        """bool: this residue is not part of a biopolymer
        """
        return self._is_ending_residue and self._is_starting_residue

    @property
    def _is_ending_residue(self):
        """bool: this is the last residue in a polymer"""
        try:
            nextres = self.next_residue
        except StopIteration:
            return True
        else:
            return False

    @property
    def _is_starting_residue(self):
        """bool: this is the first residue in a polymer"""
        try:
            prevres = self.prev_residue
        except StopIteration:
            return True
        else:
            return False

[docs]    def assign_template_bonds(self):
        """Assign bonds from bioresidue templates.

        Only assigns bonds that are internal to this residue (does not connect different residues).
        The topologies here assume pH7.4 and may need to be corrected for other pHs

        See Also:
            :ref:`moldesign.Chain.assign_biopolymer_bonds` for assigning inter-residue bonds

        Raises:
            ValueError: if ``residue.resname`` is not in bioresidue templates
            KeyError: if an atom in this residue is not recognized """
        try:
            resname = self.resname
            if self.type == 'protein':
                if self.is_n_terminal:
                    resname = self.resname + '_LSN3'  # the protonated form (with NH3+ on the end)
                elif self.is_c_terminal:
                    resname = self.resname + '_LEO2H'  # deprotonated form (COO-)
                elif self.is_monomer:
                    resname = self.resname + '_LFZW'  # free zwitterion form

            bonds_by_name = data.RESIDUE_BONDS[resname]
            self._template_name = resname
        except KeyError:
            if len(self) == 1:
                print 'INFO: no bonds assigned to residue %s' % self
                return
            else:
                raise KeyError("No bonding template for residue '%s'" % resname)

        # intra-residue bonds
        bond_graph = {atom: {} for atom in self}
        for atom in self:
            for nbrname, order in bonds_by_name.get(atom.name, {}).iteritems():
                try:
                    nbr = self[nbrname]
                except KeyError:  # missing atoms are normal (often hydrogen)
                    pass
                else:
                    bond_graph[atom][nbr] = bond_graph[nbr][atom] = order

        # copy bonds into the right structure (do this last to avoid mangling the graph)
        for atom in bond_graph:
            atom.bond_graph.update(bond_graph[atom])

    @property
    def next_residue(self):
        """Residue:
            The next residue in the chain (in the C-direction for proteins, 3'
            direction for nucleic acids)

        Raises:
            NotImplementedError: If we don't know how to deal with this type of biopolymer
            StopIteration: If there isn't a next residue (i.e. it's a 3'- or C-terminus)
        """
        if self.chain.type == 'protein':
            return self._get_neighbor('C', 'C-terminus')
        elif self.chain.type == 'dna':
            return self._get_neighbor("O3'", "3' end")
        else:
            raise NotImplementedError('We only deal with dna and amino acids right now')

    @property
    def prev_residue(self):
        """Residue: The next residue in the chain (in the N-direction for proteins, 5' direction for
            nucleic acids)

        Raises:
            NotImplementedError: If we don't know how to deal with this type of biopolymer
            StopIteration: If there isn't a previous residue (i.e. it's a 5'- or N-terminus)
        """
        if self.chain.type == 'protein':
            return self._get_neighbor('N', 'N-terminus')
        elif self.chain.type == 'dna':
            return self._get_neighbor("P", "5' end")
        else:
            raise NotImplementedError('We only deal with dna and amino acids right now')

    def _get_neighbor(self, atomname, name):
        """Return the first residue found that's bound to the passed atom name
        """
        conn_atom = self[atomname]
        for nbr in conn_atom.bond_graph:
            if nbr.residue is not self:
                return nbr.residue
        else:
            raise StopIteration('%s reached' % name)

    @property
    def resname(self):
        """str: Synonym for pdbname"""
        return self.pdbname

    @resname.setter
    def resname(self, val):
        self.pdbname = val

    @property
    def type(self):
        """str: Classification of the residue (protein, solvent, dna, water, unknown)"""
        return data.RESIDUE_TYPES.get(self.resname, 'unknown')

    @property
    def code(self):
        """str: one-letter amino acid code or two letter nucleic acid code, or '?' otherwise"""
        return data.RESIDUE_ONE_LETTER.get(self.pdbname, '?')

    @property
    def atomnames(self):
        """Residue: synonym for ```self``` for for the sake of readability:
            ```molecule.chains['A'].residues[123].atomnames['CA']```
        """
        return self

    @property
    def backbone(self):
        """ AtomList: all backbone atoms for nucleic and protein residues
            (indentified using PDB names); returns None for other residue types
        """
        if self._backbone is None:
            if self.type not in data.BACKBONES:
                return None
            self._backbone = AtomList()
            for name in data.BACKBONES[self.type]:
                try: self._backbone.append(self[name])
                except KeyError: pass
        return self._backbone

    @property
    def sidechain(self):
        """ AtomList: all sidechain atoms for nucleic and protein residues
            (defined as non-backbone atoms); returns None for other residue types
        """
        if self._sidechain is None:
            if self.backbone is None:
                return None
            bb = set(self.backbone)
            self._sidechain = [atom for atom in self if atom not in bb]
        return self._sidechain

    @property
    def is_standard_residue(self):
        """ bool: this residue is a "standard residue" for the purposes of a PDB entry.

        In PDB files, this will be stored using 'ATOM' if this is a standard residue
        and 'HETATM' records if not.

        Note:
            We currently define "standard" residues as those whose 3 letter residue code appears in
            the ``moldesign.data.RESIDUE_DESCRIPTIONS`` dictionary. Although this seems to work
            well, we'd welcome a PR with a less hacky method.

        References:
            PDB format guide: http://www.wwpdb.org/documentation/file-format
        """
        return self.resname in mdt.data.RESIDUE_DESCRIPTIONS

    def __str__(self):
        return 'Residue %s (index %d, chain %s)' % (self.name, self.index,
                                                    self.chain.name)

    def _repr_markdown_(self):
        return self.markdown_summary()

[docs]    def markdown_summary(self):
        """ Markdown-formatted information about this residue

        Returns:
            str: markdown-formatted string
        """
        if self.type == 'placeholder':
            return '`%s`' % repr(self)

        if self.molecule is None:
            lines = ["<h3>Residue %s</h3>" % self.name]
        else:
            lines = ["<h3>Residue %s (index %d)</h3>" % (self.name, self.index)]

        if self.type == 'protein':
            lines.append('**Residue codes**: %s / %s' % (self.resname, self.code))
        else:
            lines.append("**Residue code**: %s" % self.resname)
        lines.append('**Type**: %s' % self.type)
        if self.resname in data.RESIDUE_DESCRIPTIONS:
            lines.append('**Description**: %s' % data.RESIDUE_DESCRIPTIONS[self.resname])

        lines.append('**<p>Chain:** %s' % self.chain.name)

        lines.append('**Sequence number**: %d' % self.pdbindex)

        terminus = None
        if self.type == 'dna':
            if self.is_3prime_end:
                terminus = "3' end"
            elif self.is_5prime_end:
                terminus = "5' end"
        elif self.type == 'protein':
            if self.is_n_terminal:
                terminus = 'N-terminus'
            elif self.is_c_terminal:
                terminus = 'C-terminus'
        if terminus is not None:
            lines.append('**Terminal residue**: %s of chain %s' % (terminus, self.chain.name))

        if self.molecule is not None:
            lines.append("**Molecule**: %s" % self.molecule.name)

        lines.append("**<p>Number of atoms**: %s" % self.num_atoms)
        if self.backbone:
            lines.append("**Backbone atoms:** %s" % ', '.join(x.name for x in self.backbone))
            lines.append("**Sidechain atoms:** %s" % ', '.join(x.name for x in self.sidechain))
        else:
            lines.append("**Atom:** %s" % ', '.join(x.name for x in self.atoms))

        return '<br>'.join(lines)