Source code for moldesign.molecules.residue

# Copyright 2016 Autodesk Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import moldesign as mdt
from moldesign import utils, data

from . import Entity, AtomList, toplevel


@toplevel
[docs]class Residue(Entity): """ A biomolecular residue - most often an amino acid, a nucleic base, or a solvent molecule. In PDB structures, also often refers to non-biochemical molecules. Its children are almost always residues. Attributes: parent (mdt.Molecule): the molecule this residue belongs to chain (Chain): the chain this residue belongs to """
[docs] def copy(self): newatoms = super(Residue, self).copy() return newatoms[0].residue
copy.__doc__ = Entity.copy.__doc__
[docs] def to_json(self): js = mdt.chemjson.jsonify(self, 'index resname name pdbindex'.split()) js['chain'] = self.chain.index js['atoms'] = [atom.index for atom in self.atoms] return js
@utils.args_from(Entity) def __init__(self, **kwargs): """ Initialization Args: **kwargs (): """ self.chain = kwargs.get('chain', None) super(Residue, self).__init__(**kwargs) if self.index is None and self.molecule is not None: self.index = self.molecule.residues.index(self) self.chainindex = None self._backbone = None self._sidechain = None self._template_name = None if self.name is None and self.pdbname is not None: self.name = self.pdbname + str(self.pdbindex) @property def atoms(self): return self.children
[docs] def add(self, atom, key=None): """Deals with atom name clashes within a residue - common for small molecules""" if atom.residue is not None: assert atom.residue is self, 'Atom already assigned to a residue' atom.residue = self if atom.chain is None: atom.chain = self.chain else: assert atom.chain == self.chain, "Atom's chain does not match residue's chain" if key is not None or atom.name not in self.children: return super(Residue, self).add(atom, key=key) else: return super(Residue, self).add(atom, key='%s%s' % (atom.name, len(self)))
add.__doc__ = Entity.add.__doc__ @property def is_n_terminal(self): """bool: this is the last residue in a peptide Raises: ValueError: if this residue is not an amino acid """ if self.type != 'protein': raise ValueError('%s is not a recognized peptide monomer' % self) return self._is_starting_residue @property def is_c_terminal(self): """bool: this is the first residue in a peptide Raises: ValueError: if this residue is not an amino acid """ if self.type != 'protein': raise ValueError('%s is not a recognized peptide monomer' % self) return self._is_ending_residue @property def is_5prime_end(self): """bool: this is the first base in a strand Raises: ValueError: if this residue is not a DNA base """ if self.type != 'dna': raise ValueError('%s is not a recognized nucleic acid monomer' % self) return self._is_starting_residue @property def is_3prime_end(self): """bool: this is the last base in a strand Raises: ValueError: if this residue is not a DNA base """ if self.type != 'dna': raise ValueError('%s is not a recognized nucleic acid monomer' % self) return self._is_ending_residue @property def is_monomer(self): """bool: this residue is not part of a biopolymer """ return self._is_ending_residue and self._is_starting_residue @property def _is_ending_residue(self): """bool: this is the last residue in a polymer""" try: nextres = self.next_residue except StopIteration: return True else: return False @property def _is_starting_residue(self): """bool: this is the first residue in a polymer""" try: prevres = self.prev_residue except StopIteration: return True else: return False
[docs] def assign_template_bonds(self): """Assign bonds from bioresidue templates. Only assigns bonds that are internal to this residue (does not connect different residues). The topologies here assume pH7.4 and may need to be corrected for other pHs See Also: :ref:`moldesign.Chain.assign_biopolymer_bonds` for assigning inter-residue bonds Raises: ValueError: if ``residue.resname`` is not in bioresidue templates KeyError: if an atom in this residue is not recognized """ try: resname = self.resname if self.type == 'protein': if self.is_n_terminal: resname = self.resname + '_LSN3' # the protonated form (with NH3+ on the end) elif self.is_c_terminal: resname = self.resname + '_LEO2H' # deprotonated form (COO-) elif self.is_monomer: resname = self.resname + '_LFZW' # free zwitterion form bonds_by_name = data.RESIDUE_BONDS[resname] self._template_name = resname except KeyError: if len(self) == 1: print 'INFO: no bonds assigned to residue %s' % self return else: raise KeyError("No bonding template for residue '%s'" % resname) # intra-residue bonds bond_graph = {atom: {} for atom in self} for atom in self: for nbrname, order in bonds_by_name.get(atom.name, {}).iteritems(): try: nbr = self[nbrname] except KeyError: # missing atoms are normal (often hydrogen) pass else: bond_graph[atom][nbr] = bond_graph[nbr][atom] = order # copy bonds into the right structure (do this last to avoid mangling the graph) for atom in bond_graph: atom.bond_graph.update(bond_graph[atom])
@property def next_residue(self): """Residue: The next residue in the chain (in the C-direction for proteins, 3' direction for nucleic acids) Raises: NotImplementedError: If we don't know how to deal with this type of biopolymer StopIteration: If there isn't a next residue (i.e. it's a 3'- or C-terminus) """ if self.chain.type == 'protein': return self._get_neighbor('C', 'C-terminus') elif self.chain.type == 'dna': return self._get_neighbor("O3'", "3' end") else: raise NotImplementedError('We only deal with dna and amino acids right now') @property def prev_residue(self): """Residue: The next residue in the chain (in the N-direction for proteins, 5' direction for nucleic acids) Raises: NotImplementedError: If we don't know how to deal with this type of biopolymer StopIteration: If there isn't a previous residue (i.e. it's a 5'- or N-terminus) """ if self.chain.type == 'protein': return self._get_neighbor('N', 'N-terminus') elif self.chain.type == 'dna': return self._get_neighbor("P", "5' end") else: raise NotImplementedError('We only deal with dna and amino acids right now') def _get_neighbor(self, atomname, name): """Return the first residue found that's bound to the passed atom name """ conn_atom = self[atomname] for nbr in conn_atom.bond_graph: if nbr.residue is not self: return nbr.residue else: raise StopIteration('%s reached' % name) @property def resname(self): """str: Synonym for pdbname""" return self.pdbname @resname.setter def resname(self, val): self.pdbname = val @property def type(self): """str: Classification of the residue (protein, solvent, dna, water, unknown)""" return data.RESIDUE_TYPES.get(self.resname, 'unknown') @property def code(self): """str: one-letter amino acid code or two letter nucleic acid code, or '?' otherwise""" return data.RESIDUE_ONE_LETTER.get(self.pdbname, '?') @property def atomnames(self): """Residue: synonym for ```self``` for for the sake of readability: ```molecule.chains['A'].residues[123].atomnames['CA']``` """ return self @property def backbone(self): """ AtomList: all backbone atoms for nucleic and protein residues (indentified using PDB names); returns None for other residue types """ if self._backbone is None: if self.type not in data.BACKBONES: return None self._backbone = AtomList() for name in data.BACKBONES[self.type]: try: self._backbone.append(self[name]) except KeyError: pass return self._backbone @property def sidechain(self): """ AtomList: all sidechain atoms for nucleic and protein residues (defined as non-backbone atoms); returns None for other residue types """ if self._sidechain is None: if self.backbone is None: return None bb = set(self.backbone) self._sidechain = [atom for atom in self if atom not in bb] return self._sidechain @property def is_standard_residue(self): """ bool: this residue is a "standard residue" for the purposes of a PDB entry. In PDB files, this will be stored using 'ATOM' if this is a standard residue and 'HETATM' records if not. Note: We currently define "standard" residues as those whose 3 letter residue code appears in the ``moldesign.data.RESIDUE_DESCRIPTIONS`` dictionary. Although this seems to work well, we'd welcome a PR with a less hacky method. References: PDB format guide: http://www.wwpdb.org/documentation/file-format """ return self.resname in mdt.data.RESIDUE_DESCRIPTIONS def __str__(self): return 'Residue %s (index %d, chain %s)' % (self.name, self.index, self.chain.name) def _repr_markdown_(self): return self.markdown_summary()
[docs] def markdown_summary(self): """ Markdown-formatted information about this residue Returns: str: markdown-formatted string """ if self.type == 'placeholder': return '`%s`' % repr(self) if self.molecule is None: lines = ["<h3>Residue %s</h3>" % self.name] else: lines = ["<h3>Residue %s (index %d)</h3>" % (self.name, self.index)] if self.type == 'protein': lines.append('**Residue codes**: %s / %s' % (self.resname, self.code)) else: lines.append("**Residue code**: %s" % self.resname) lines.append('**Type**: %s' % self.type) if self.resname in data.RESIDUE_DESCRIPTIONS: lines.append('**Description**: %s' % data.RESIDUE_DESCRIPTIONS[self.resname]) lines.append('**<p>Chain:** %s' % self.chain.name) lines.append('**Sequence number**: %d' % self.pdbindex) terminus = None if self.type == 'dna': if self.is_3prime_end: terminus = "3' end" elif self.is_5prime_end: terminus = "5' end" elif self.type == 'protein': if self.is_n_terminal: terminus = 'N-terminus' elif self.is_c_terminal: terminus = 'C-terminus' if terminus is not None: lines.append('**Terminal residue**: %s of chain %s' % (terminus, self.chain.name)) if self.molecule is not None: lines.append("**Molecule**: %s" % self.molecule.name) lines.append("**<p>Number of atoms**: %s" % self.num_atoms) if self.backbone: lines.append("**Backbone atoms:** %s" % ', '.join(x.name for x in self.backbone)) lines.append("**Sidechain atoms:** %s" % ', '.join(x.name for x in self.sidechain)) else: lines.append("**Atom:** %s" % ', '.join(x.name for x in self.atoms)) return '<br>'.join(lines)