Source code for moldesign.molecules.chain

# Copyright 2016 Autodesk Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import collections

import moldesign as mdt
from moldesign import utils, data

from . import Entity, toplevel


@toplevel
[docs]class Chain(Entity): """ Biomolecular chain class - its children are almost always residues. Attributes: parent (mdt.Molecule): the molecule this residue belongs to chain (Chain): the chain this residue belongs to """ @utils.args_from(Entity) def __init__(self, pdbname=None, **kwargs): super(Chain, self).__init__(pdbname=pdbname, **kwargs) if self.name is None: self.name = self.pdbname if self.pdbindex is not None: self.pdbindex = self.pdbname self._type = None self._5p_end = self._3p_end = self._n_terminal = self._c_terminal = None @property def type(self): """ str: the type of chain - protein, DNA, solvent, etc. This field returns the type of chain, classified by the following rules: 1) If the chain contains only one type of residue, it is given that classification (so a chain containing only ions has type "ion" 2) If the chain contains a biopolymer + ligands and solvent, it is classified as a biopolymer (i.e. 'protein', 'dna', or 'rna'). This is the most common case with .pdb files from the PDB. 3) If the chain contains multiple biopolymer types, it will be given a hybrid classification (e.g. 'dna/rna', 'protein/dna') - this is rare! 4) If it contains multiple kinds of non-biopolymer residues, it will be called "solvent" (if all non-bio residues are water/solvent/ion) or given a hybrid name as in 3) """ if self._type is None: counts = collections.Counter(x.type for x in self.residues) unique_types = sum(bool(v) for v in counts.itervalues()) if unique_types == 1: if self.num_residues == 1: self._type = data.CHAIN_MONOMER_NAMES.get(self.residues[0].type, self.residues[0].type) else: self._type = self.residues[0].type else: polymer_types = sum(bool(counts[t]) for t in data.BIOPOLYMER_TYPES) if polymer_types == 1: # the most common case - a polymer + solvent/ligands for residue in self.residues: if residue.type in data.BIOPOLYMER_TYPES: break else: assert False, "No biopolymer found but polymer_types==1" self._type = residue.type elif polymer_types > 1: # for rare cases, e.g. "DNA/RNA/PROTEIN" self._type = '/'.join(k for k in data.BIOPOLYMER_TYPES if counts[k]) elif polymer_types == 0: if counts['unknown'] > 0: # some molecule + solvent self._type = '/'.join(k for k in counts if counts[k]) else: # just solvent self._type = 'solvent' return self._type @property def polymer_residues(self): for res in self.residues: if res.type in ('dna', 'protein'): yield res @property def solvent_residues(self): for res in self.residues: if res.type in ('water', 'solvent', 'ion'): yield res @property def unclassified_residues(self): for res in self.residues: if res.type == 'unknown': yield res
[docs] def get_ligand(self): """ Return a (single) ligand if it exists; raises ValueError if there's not exactly one This is a utility routine to get a single ligand from a chain. If there's exactly one residue, it is returned. If not, ValueError is raised - use :meth:`Chain.unclassified_residues` to get an iterator over all unclassified residues. Returns: moldesign.Residue: ligand residue Raises: ValueError: if the chain does not contain exactly one unclassifiable residue """ iterator = self.unclassified_residues try: ligand = iterator.next() except StopIteration: raise ValueError('This chain does not appear to contain any ligands') try: nextligand = iterator.next() except StopIteration: return ligand else: raise ValueError('Multiple ligands detected. Use `chain.unclassified_residues` to ' 'iterate over them')
[docs] def to_json(self): js = mdt.chemjson.jsonify(self, 'index name pdbindex'.split()) js['residues'] = [res.index for res in self.residues] return js
[docs] def copy(self): newatoms = super(Chain, self).copy() return newatoms[0].chain
copy.__doc__ = Entity.copy.__doc__ @property def num_residues(self): return len(self) nresidues = numresidues = num_residues @property def residues(self): """ChildList: list of residues in this chain """ return self.children
[docs] def add(self, residue, **kwargs): if residue.chain is None: residue.chain = self else: assert residue.chain is self, "Residue is not a member of this chain" return super(Chain, self).add(residue, **kwargs)
def _get_chain_end(self, restype, selfattr, test): currval = getattr(self, selfattr) if currval is None or not getattr(currval, test): for residue in self.residues: if residue.type != restype: continue if getattr(residue, test): setattr(self, selfattr, residue) break return getattr(self, selfattr) @property def c_terminal(self): """moldesign.Residue: The chain's C-terminus (or ``None`` if it does not exist)""" return self._get_chain_end('protein', '_c_terminal', 'is_c_terminal') @property def n_terminal(self): """moldesign.Residue: The chain's N-terminus (or ``None`` if it does not exist)""" return self._get_chain_end('protein', '_n_terminal', 'is_n_terminal') @property def fiveprime_end(self): """moldesign.Residue: The chain's 5' base (or ``None`` if it does not exist)""" return self._get_chain_end('dna', '_5p_end', 'is_5prime_end') @property def threeprime_end(self): """moldesign.Residue: The chain's 3' base (or ``None`` if it does not exist)""" return self._get_chain_end('dna', '_3p_end', 'is_3prime_end')
[docs] def assign_biopolymer_bonds(self): """Connect bonds between residues in this chain. See Also: :ref:`moldesign.Residue.assign_template_bonds` Raises: ValueError: if ``residue.resname`` is not in bioresidue templates KeyError: if an atom in this residue is not recognized """ residues = list(self) residues.sort(key=lambda x: int(x.pdbindex)) bond_graph = {} for ires in xrange(len(residues)-1): r1 = residues[ires] r2 = residues[ires+1] # don't assign bonds unless these are contiguous bioresidues if r1.pdbindex + 1 != r2.pdbindex: continue restype = r1.type if r2.type != restype: continue # Create the bonds if restype == 'protein': bond_graph[r1['C']] = {r2['N']: 1} bond_graph[r2['N']] = {r1['C']: 1} elif restype == 'dna': bond_graph[r1["O3'"]] = {r2['P']: 1} bond_graph[r2['P']] = {r1["O3'"]: 1} elif restype == 'rna': raise NotImplementedError('RNA not yet implemented') # copy bonds into the right structure (do this last to avoid mangling the graph) for atom in bond_graph: atom.bond_graph.update(bond_graph[atom])
@property def sequence(self): """str: this chain's residue sequence with one-letter residue codes """ missing = '.' # don't do this outputs = [] last_idx = None for res in sorted(self, key=lambda x: x.pdbindex): if res.type not in ('protein', 'dna', 'rna'): continue if last_idx is not None: num_missing = res.pdbindex - last_idx - 1 if num_missing > 0: outputs.append(missing * (res.pdbindex - last_idx - 1)) if res.code != '?': outputs.append(res.code) else: if len(outputs) > 0 and outputs[-1][-1] != ',': outputs.append(',') outputs.append(res.pdbname + ',') last_idx = res.pdbindex return ''.join(outputs) def __str__(self): return 'Chain %s' % self.name