Source code for moldesign.interfaces.openbabel

# Copyright 2016 Autodesk Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import

import os
import string

import collections
import moldesign.molecules.atomcollections

try:
    import pybel as pb
    import openbabel as ob
    # WARNING: this is the real library, not our interface - this works because of absolute
    # imports. We should probably rename this interface
except ImportError:
    force_remote = True
else:  # this should be configurable
    force_remote = False  # debugging

import moldesign as mdt
from moldesign.compute.runsremotely import runsremotely
import moldesign.molecules.atoms
from moldesign import units as u


[docs]def read_file(filename, name=None, format=None): """ Read a molecule from a file Note: Currently only reads the first conformation in a file Args: filename (str): path to file name (str): name to assign to molecule format (str): File format: pdb, sdf, mol2, bbll, etc. Returns: moldesign.Molecule: parsed result """ # TODO: check for openbabel molecule name? if format is None: format = filename.split('.')[-1] if force_remote: with open(filename, 'r') as infile: mol = read_string(infile.read(), format, name=name) return mol else: pbmol = pb.readfile(format=format, filename=filename).next() if name is None: name = filename mol = pybel_to_mol(pbmol, name=os.path.basename(name)) mol.filename = filename return mol
[docs]def read_stream(filelike, format, name=None): """ Read a molecule from a file-like object Note: Currently only reads the first conformation in a file Args: filelike: a file-like object to read a file from format (str): File format: pdb, sdf, mol2, bbll, etc. name (str): name to assign to molecule Returns: moldesign.Molecule: parsed result """ molstring = filelike.read() return read_string(molstring, format, name=name)
@runsremotely(enable=force_remote)
[docs]def read_string(molstring, format, name=None): """ Read a molecule from a file-like object Note: Currently only reads the first conformation in a file Args: molstring (str): string containing file contents format (str): File format: pdb, sdf, mol2, bbll, etc. name (str): name to assign to molecule Returns: moldesign.Molecule: parsed result """ pbmol = pb.readstring(format, molstring) mol = pybel_to_mol(pbmol, name=name) return mol
@runsremotely(enable=force_remote)
[docs]def write_string(mol, format): """ Create a file from the passed molecule Args: mol (moldesign.Molecule): molecule to write format (str): File format: pdb, sdf, mol2, bbll, etc. Returns: str: contents of the file """ pbmol = mol_to_pybel(mol) outstr = pbmol.write(format=format) return outstr
[docs]def write_file(mol, filename=None, mode='w', format=None): """ Write molecule to a file Args: mol (moldesign.Molecule): molecule to write filename (str): File to write to mode (str): Writing mode (e.g. 'w' to overwrite, the default, or 'a' to append) format (str): File format: pdb, sdf, mol2, bbll, etc. """ if format is None: format = filename.split('.')[-1] outstr = write_string(mol, format) if filename is None: return outstr else: with open(filename, mode) as wrf: print >> wrf, outstr
@runsremotely(enable=force_remote)
[docs]def guess_bond_orders(mol): """Use OpenBabel to guess bond orders using geometry and functional group templates. Args: mol (moldesign.Molecule): Molecule to perceive the bonds of Returns: moldesign.Molecule: New molecule with assigned bonds """ # TODO: pH, formal charges pbmol = mol_to_pybel(mol) pbmol.OBMol.PerceiveBondOrders() newmol = pybel_to_mol(pbmol) return newmol
@runsremotely(enable=force_remote)
[docs]def add_hydrogen(mol, ph=None): """Add hydrogens to saturate atomic valences. Args: mol (moldesign.Molecule): Molecule to saturate ph (float): Assign formal charges and protonation using pH model; if None (the default), neutral protonation will be assigned where possible. Returns: moldesign.Molecule: New molecule with all valences saturated """ pbmol = mol_to_pybel(mol) pbmol.OBMol.AddHydrogens(False, ph is not None,) newmol = pybel_to_mol(pbmol, reorder_atoms_by_residue=True) mdt.helpers.assign_unique_hydrogen_names(newmol) return newmol
[docs]def mol_to_pybel(mdtmol): """ Translate a moldesign molecule object into a pybel molecule object. Note: The focus is on translating topology and biomolecular structure - we don't translate any metadata. Args: mdtmol (moldesign.Molecule): molecule to translate Returns: pybel.Molecule: translated molecule """ obmol = ob.OBMol() obmol.BeginModify() atommap = {} resmap = {} for atom in mdtmol.atoms: obatom = obmol.NewAtom() obatom.SetAtomicNum(atom.atnum) atommap[atom] = obatom pos = atom.position.value_in(u.angstrom) obatom.SetVector(*pos) if atom.residue and atom.residue not in resmap: obres = obmol.NewResidue() resmap[atom.residue] = obres obres.SetChain(bytes( mdt.utils.if_not_none(atom.chain.name, 'Z')[0] )) obres.SetName(bytes( mdt.utils.if_not_none(atom.residue.pdbname, 'UNL') )) obres.SetNum(mdt.utils.if_not_none(atom.residue.pdbindex, '0')) else: obres = resmap[atom.residue] obres.AddAtom(obatom) obres.SetHetAtom(obatom, not atom.residue.is_standard_residue) obres.SetAtomID(obatom, bytes(atom.name)) obres.SetSerialNum(obatom, mdt.utils.if_not_none(atom.pdbindex, atom.index+1)) for atom in mdtmol.bond_graph: a1 = atommap[atom] for nbr, order in mdtmol.bond_graph[atom].iteritems(): a2 = atommap[nbr] if a1.GetIdx() > a2.GetIdx(): obmol.AddBond(a1.GetIdx(), a2.GetIdx(), order) obmol.EndModify() pbmol = pb.Molecule(obmol) for atom in atommap: idx = atommap[atom].GetIdx() obatom = obmol.GetAtom(idx) obatom.SetFormalCharge(int(atom.formal_charge.value_in(u.q_e))) return pbmol
[docs]def pybel_to_mol(pbmol, atom_names=True, reorder_atoms_by_residue=False, primary_structure=True, **kwargs): """ Translate a pybel molecule object into a moldesign object. Note: The focus is on translating topology and biomolecular structure - we don't translate any metadata. Args: pbmol (pybel.Molecule): molecule to translate atom_names (bool): use pybel's atom names (default True) reorder_atoms_by_residue (bool): change atom order so that all atoms in a residue are stored contiguously primary_structure (bool): translate primary structure data as well as atomic data **kwargs (dict): keyword arguments to moldesign.Molecule __init__ method Returns: moldesign.Molecule: translated molecule """ newatom_map = {} newresidues = {} newchains = {} newatoms = moldesign.molecules.atomcollections.AtomList([]) backup_chain_names = list(string.ascii_uppercase) for pybatom in pbmol.atoms: obres = pybatom.OBAtom.GetResidue() if atom_names: name = obres.GetAtomID(pybatom.OBAtom).strip() else: name = None if pybatom.atomicnum == 67: print ("WARNING: openbabel parsed atom serial %d (name:%s) as Holmium; " "correcting to hydrogen. ") % (pybatom.OBAtom.GetIdx(), name) atnum = 1 elif pybatom.atomicnum == 0: print "WARNING: openbabel failed to parse atom serial %d (name:%s); guessing %s. " % ( pybatom.OBAtom.GetIdx(), name, name[0]) atnum = moldesign.data.ATOMIC_NUMBERS[name[0]] else: atnum = pybatom.atomicnum mdtatom = moldesign.molecules.atoms.Atom(atnum=atnum, name=name, formal_charge=pybatom.formalcharge * u.q_e, pdbname=name, pdbindex=pybatom.OBAtom.GetIdx()) newatom_map[pybatom.OBAtom.GetIdx()] = mdtatom mdtatom.position = pybatom.coords * u.angstrom if primary_structure: obres = pybatom.OBAtom.GetResidue() resname = obres.GetName() residx = obres.GetIdx() chain_id = obres.GetChain() chain_id_num = obres.GetChainNum() if chain_id_num not in newchains: # create new chain if not mdt.utils.is_printable(chain_id.strip()) or not chain_id.strip(): chain_id = backup_chain_names.pop() print 'WARNING: assigned name %s to unnamed chain object @ %s' % ( chain_id, hex(chain_id_num)) chn = mdt.Chain(pdbname=str(chain_id)) newchains[chain_id_num] = chn else: chn = newchains[chain_id_num] if residx not in newresidues: # Create new residue pdb_idx = obres.GetNum() res = mdt.Residue(pdbname=resname, pdbindex=pdb_idx) newresidues[residx] = res chn.add(res) res.chain = chn else: res = newresidues[residx] # Assign the atom if mdtatom.name in res: mdtatom.name = '%s%d' % (mdtatom.name, pybatom.idx) # prevent name clashes res.add(mdtatom) newatoms.append(mdtatom) newtopo = {} for ibond in xrange(pbmol.OBMol.NumBonds()): obbond = pbmol.OBMol.GetBond(ibond) a1 = newatom_map[obbond.GetBeginAtomIdx()] a2 = newatom_map[obbond.GetEndAtomIdx()] order = obbond.GetBondOrder() if a1 not in newtopo: newtopo[a1] = {} if a2 not in newtopo: newtopo[a2] = {} newtopo[a1][a2] = order newtopo[a2][a1] = order if reorder_atoms_by_residue and primary_structure: resorder = {} for atom in newatoms: resorder.setdefault(atom.residue, len(resorder)) newatoms.sort(key=lambda a: resorder[a.residue]) return mdt.Molecule(newatoms, bond_graph=newtopo, **kwargs)
@runsremotely(enable=force_remote)
[docs]def from_smiles(smi, name=None): """ Translate a smiles string to a 3D structure. This method uses OpenBabel to generate a plausible 3D conformation of the 2D SMILES topology. We only use the first result from the conformation generator. Args: smi (str): smiles string name (str): name to assign to molecule (default - the smiles string) Returns: moldesign.Molecule: the translated molecule """ if name is None: name = smi pbmol = pb.readstring('smi', smi) pbmol.addh() pbmol.make3D() mol = pybel_to_mol(pbmol, name=name, atom_names=False, primary_structure=False) for atom in mol.atoms: atom.name = atom.elem + str(atom.index) return mol