Source code for moldesign.interfaces.ambertools

# Copyright 2016 Autodesk Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import re

import traitlets

import moldesign as mdt
import pyccc
from moldesign import compute, uibase, utils
from moldesign import units as u

IMAGE = 'ambertools'

AmberParameters = collections.namedtuple('AmberParameters',
                                         'prmtop inpcrd job')
ExtraAmberParameters = collections.namedtuple('GAFFParameters',
                                              'lib frcmod job')

DNA_FF = {ff: 'leaprc.DNA.%s'%ff for ff in ['OL15', 'bsc1']}
RNA_FF = {ff: 'leaprc.RNA.%s'%ff for ff in ['OL3', 'YIL']}
PROTEIN_FF = {ff: 'leaprc.protein.%s'%ff for ff in
              ['ff14SB',
               'ff14SBonlysc',
               'ff14SB.redq',
               'ff15ipq',
               'ff15ipq-vac',
               'ff03.r1',
               'ff03ua',
               'fb15']}
LIPID_FF = {'lipid14': 'leaprc.lipid14'}
WATER_FF = {ff: 'leaprc.water.%s'%ff for ff in
            ['tip3p', 'tip4pew', 'spce', 'opc']}
CARB_FF = {ff: 'leaprc.%s'%ff for ff in ['GLYCAM_06EPb', 'GLYCAM_06j-1']}
ORGANIC_FF = {ff: 'leaprc.%s'%ff for ff in ['gaff', 'gaff2']}
OTHER_FF = {ff: 'leaprc.%s'%ff for ff in ['phosaa10', 'modrna08', 'xFPchromophores']}
LEAPRCFILES = {}
for ffiles in (DNA_FF, RNA_FF, PROTEIN_FF, LIPID_FF, CARB_FF, WATER_FF, ORGANIC_FF, OTHER_FF):
    LEAPRCFILES.update(ffiles)


[docs]class ParameterizationError(Exception):
    pass


@utils.kwargs_from(mdt.compute.run_job)
[docs]def calc_am1_bcc_charges(mol, **kwargs):
    """Calculate am1 bcc charges

    Args:
        mol (moldesign.Molecule): assign partial charges to this molecule
            (they will be stored at ``mol.properties['am1-bcc']``)

    Note:
        This will implicity run an AM1 energy minimization before calculating the final
        partial charges. For more control over this process, use the
        ``moldesign.models.SQMPotential`` energy model to calculate the charges.

    Returns:
        Mapping[moldesign.Atom, units.Scalar[charge]]: AM1-BCC partial charges on each atom
    """
    return _antechamber_calc_charges(mol, 'bcc', 'am1-bcc', kwargs)


@utils.kwargs_from(mdt.compute.run_job)
[docs]def calc_gasteiger_charges(mol, **kwargs):
    """Calculate gasteiger charges

    Args:
        mol (moldesign.Molecule): assign partial charges to this molecule

    Returns:
        Mapping[moldesign.Atom, units.Scalar[charge]]: gasteiger partial charges on each atom
             (they will be stored at ``mol.properties['gasteiger']``)
    """
    return _antechamber_calc_charges(mol, 'gas', 'gasteiger', kwargs)


def _antechamber_calc_charges(mol, ambname, chargename, kwargs):
    charge = utils.if_not_none(mol.charge, 0)
    command = 'antechamber -fi pdb -i mol.pdb -fo mol2 -o out.mol2 -c %s -an n'%ambname
    if charge != 0:
        command += ' -nc %d'%charge.value_in(u.q_e)

    def finish_job(job):
        """Callback to complete the job"""
        lines = iter(job.get_output('out.mol2').read().split('\n'))
        charges = utils.DotDict(type='atomic')

        line = lines.next()
        while line.strip()[:len('@<TRIPOS>ATOM')] != '@<TRIPOS>ATOM':
            line = lines.next()

        line = lines.next()
        while line.strip()[:len('@<TRIPOS>BOND')] != '@<TRIPOS>BOND':
            fields = line.split()
            idx = int(fields[0])-1
            assert mol.atoms[idx].name == fields[1]
            charges[mol.atoms[idx]] = u.q_e*float(fields[-1])
            line = lines.next()

        mol.properties[chargename] = charges
        return charges

    job = pyccc.Job(image=mdt.compute.get_image_path(IMAGE),
                    command=command,
                    name="%s, %s"%(chargename, mol.name),
                    inputs={'mol.pdb': mol.write(format='pdb')},
                    when_finished=finish_job)
    return compute.run_job(job, _return_result=True, **kwargs)


@utils.kwargs_from(mdt.compute.run_job)
[docs]def build_bdna(sequence, **kwargs):
    """ Uses Ambertools' Nucleic Acid Builder to build a 3D double-helix B-DNA structure.

    Args:
        sequence (str): DNA sequence for one of the strands (a complementary sequence will
            automatically be created)
        **kwargs: arguments for :meth:`compute.run_job`

    Returns:
        moldesign.Molecule: B-DNA double helix
    """
    print('DeprecationWarning: build_bdna is deprecated. '
          "Use `build_dna_helix(sequence, helix_type='b')` instead")
    return build_dna_helix(sequence, helix_type='b', **kwargs)


@utils.kwargs_from(mdt.compute.run_job)
[docs]def build_dna_helix(sequence, helix_type='B', **kwargs):
    """ Uses Ambertools' Nucleic Acid Builder to build a 3D DNA double-helix.

    Args:
        sequence (str): DNA sequence for one of the strands (a complementary sequence will
            automatically be created)
        helix_type (str): Type of helix - 'A'=Arnott A-DNA
                                          'B'=B-DNA (from standard templates and helical params),
                                          'LB'=Langridge B-DNA,
                                          'AB'=Arnott B-DNA,
                                          'SB'=Sasisekharan left-handed B-DNA
        **kwargs: arguments for :meth:`compute.run_job`

    All helix types except 'B' are taken from fiber diffraction data (see the refernce for details)

    Returns:
        moldesign.Molecule: B-DNA double helix

    References:
        See NAB / AmberTools documentation: http://ambermd.org/doc12/Amber16.pdf, pg 771-2
    """
    infile = ['molecule m;']
    if helix_type.lower() == 'b':
        infile.append('m = bdna( "%s" );' % sequence.lower())
    else:
        infile.append('m = fd_helix( "%sdna",  "%s", "dna" );'
                      % (helix_type.lower(), sequence.lower()))
    infile.append('putpdb( "helix.pdb", m, "-wwpdb");\n')

    def finish_job(job):
        mol = mdt.fileio.read_pdb(job.get_output('helix.pdb').open(), assign_ccd_bonds=False)
        if mol.num_chains == 1:
            assert mol.num_residues % 2 == 0
            oldchain = mol.chains[0]
            oldchain.name = oldchain.pdbindex = oldchain.pdbname = 'A'
            newchain = mdt.Chain('B')
            for residue in mol.residues[mol.num_residues//2:]:
                residue.chain = newchain
                for atom in residue:
                    atom.chain = newchain
            mol = mdt.Molecule(mol)
        mdt.helpers.assign_biopolymer_bonds(mol)

        mol.name = '%s-DNA Helix: %s' % (helix_type.upper(), sequence)
        return mol

    job = pyccc.Job(command='nab -o buildbdna build.nab && ./buildbdna',
                    image=mdt.compute.get_image_path(IMAGE),
                    inputs={'build.nab': '\n'.join(infile)},
                    name='NAB_build_dna',
                    when_finished=finish_job)

    return mdt.compute.run_job(job, _return_result=True, **kwargs)


@utils.kwargs_from(compute.run_job)
[docs]def run_tleap(mol, forcefields=None, parameters=None, **kwargs):
    """
    Drives tleap to create a prmtop and inpcrd file. Specifically uses the AmberTools 16
    tleap distribution.

    Defaults are as recommended in the ambertools manual.

    Args:
        mol (moldesign.Molecule): Molecule to set up
        forcefields (List[str]): list of the names of forcefields to use
            (see AmberTools manual for descriptions)
        parameters (List[ExtraAmberParameters]): (optional) list of amber parameters
            for non-standard residues
        **kwargs: keyword arguments to :meth:`compute.run_job`

    References:
        Ambertools Manual, http://ambermd.org/doc12/Amber16.pdf. See page 33 for forcefield
        recommendations.
    """
    # Prepare input for tleap
    if forcefields is None: forcefields = mdt.forcefields.ffdefaults.values()
    leapstr = ['source %s' % LEAPRCFILES[ff] for ff in forcefields]
    inputs = {'input.pdb': mdt.helpers.insert_ter_records(mol, mol.write(format='pdb'))}

    if parameters:
        if isinstance(parameters, ExtraAmberParameters):
            parameters = [parameters]
        for ipmtr, p in enumerate(parameters):
            frcname = 'res%d.frcmod' % ipmtr
            libname = 'res%d.lib' % ipmtr
            inputs[frcname] = p.frcmod
            inputs[libname] = p.lib
            leapstr.append('loadamberparam %s' % frcname)
            leapstr.append('loadoff %s' % libname)

    leapstr.append('mol = loadpdb input.pdb\n'
                   "check mol\n"
                   "saveamberparm mol output.prmtop output.inpcrd\n"
                   "savepdb mol output.pdb\n"
                   "quit\n")

    inputs['input.leap'] = '\n'.join(leapstr)

    job = pyccc.Job(image=compute.get_image_path(IMAGE),
                    command='tleap -f input.leap',
                    inputs=inputs,
                    name="tleap, %s" % mol.name)

    return compute.run_job(job, **kwargs)


@utils.args_from(run_tleap)
[docs]def assign_forcefield(mol, **kwargs):
    """ see run_tleap docstring """
    from moldesign.widgets.parameterization import ParameterizationDisplay
    job = run_tleap(mol, **kwargs)

    if 'output.inpcrd' in job.get_output():
        prmtop = job.get_output('output.prmtop')
        inpcrd = job.get_output('output.inpcrd')
        params = AmberParameters(prmtop, inpcrd, job)
        newmol = mdt.read_amber(params.prmtop, params.inpcrd)
        newmol.ff.amber_params = params
    else:
        newmol = None

    errors = _parse_tleap_errors(job, mol)

    try:
        report = ParameterizationDisplay(errors, mol, molout=newmol)
        uibase.display_log(report, title='ERRORS/WARNINGS', show=True)
    except traitlets.TraitError:
        print 'Forcefield assignment: %s' % ('Success' if newmol is not None else 'Failure')
        for err in errors:
            print err.desc

    if newmol is not None:
        return newmol
    else:
        raise ParameterizationError('TLeap failed to assign force field parameters for %s' % mol, job)


@utils.kwargs_from(mdt.compute.run_job)
[docs]def parameterize(mol, charges='esp', ffname='gaff2', **kwargs):
    """Parameterize ``mol``, typically using GAFF parameters.

    This will both assign a forcefield to the molecule (at ``mol.ff``) and produce the parameters
    so that they can be used in other systems (e.g., so that this molecule can be simulated
    embedded in a larger protein)

    Note:
        'am1-bcc' and 'gasteiger' partial charges will be automatically computed if necessary.
        Other charge types must be precomputed.

    Args:
        mol (moldesign.Molecule):
        charges (str or dict): what partial charges to use? Can be a dict (``{atom:charge}``) OR
            a string, in which case charges will be read from
           ``mol.properties.[charges name]``; typical values will be 'esp', 'mulliken',
           'am1-bcc', etc. Use 'zero' to set all charges to 0 (for QM/MM and testing)
        ffname (str): Name of the gaff-like forcefield file (default: gaff2)

    Returns:
        ExtraAmberParameters: Parameters for the molecule; this object can be used to create
            forcefield parameters for other systems that contain this molecule
    """
    assert mol.num_residues == 1
    if mol.residues[0].resname is None:
        mol.residues[0].resname = 'UNL'
        print 'Assigned residue name "UNL" to %s' % mol
    resname = mol.residues[0].resname

    if charges == 'am1-bcc' and 'am1-bcc' not in mol.properties:
        calc_am1_bcc_charges(mol)
    elif charges == 'gasteiger' and 'gasteiger' not in mol.properties:
        calc_gasteiger_charges(mol)

    if charges == 'zero':
        charge_array = [0.0 for atom in mol.atoms]
    elif isinstance(charges, basestring):
        charge_array = u.array([mol.properties[charges][atom] for atom in mol.atoms])
        if not charge_array.dimensionless:  # implicitly convert floats to fundamental charge units
            charge_array = charge_array.to(u.q_e).magnitude
    else:
        charge_array = [charges[atom] for atom in mol.atoms]

    inputs = {'mol.mol2': mol.write(format='mol2'),
              'mol.charges': '\n'.join(map(str, charge_array))}

    cmds = ['antechamber -i mol.mol2 -fi mol2 -o mol_charged.mol2 -fo mol2 -c rc -cf mol.charges',
            'parmchk -i mol_charged.mol2 -f mol2 -o mol.frcmod', 'tleap -f leap.in']

    inputs['leap.in'] = '\n'.join(["source leaprc.%s" % ffname,
                                   "%s = loadmol2 mol_charged.mol2" % resname,
                                   "fmod = loadamberparams mol.frcmod",
                                   "check %s" % resname,
                                   "saveoff %s mol.lib" % resname,
                                   "saveamberparm %s mol.prmtop mol.inpcrd" % resname,
                                   "quit\n"])

    def finish_job(j):
        param = ExtraAmberParameters(j.get_output('mol.lib'),
                                     j.get_output('mol.frcmod'),
                                     j)
        tempmol = mdt.assign_forcefield(mol, parameters=param)
        mol.ff = tempmol.ff
        return param

    job = pyccc.Job(image=mdt.compute.get_image_path(IMAGE),
                    command=' && '.join(cmds),
                    inputs=inputs,
                    when_finished=finish_job,
                    name="GAFF assignment: %s" % mol.name)

    return mdt.compute.run_job(job, _return_result=True, **kwargs)


ATOMSPEC = re.compile(r'\.R<(\S+) (\d+)>\.A<(\S+) (\d+)>')


def _parse_tleap_errors(job, molin):
    from moldesign.widgets.parameterization import UnusualBond, UnknownAtom, UnknownResidue

    # TODO: special messages for known problems (e.g. histidine)
    msg = []
    unknown_res = set()  # so we can print only one error per unkonwn residue
    lineiter = iter(job.stdout.split('\n'))
    offset = utils.if_not_none(molin.residues[0].pdbindex, 0)
    reslookup = {str(i+offset): r for i,r in enumerate(molin.residues)}

    def _atom_from_re(s):
        resname, residx, atomname, atomidx = s
        r = reslookup[residx]
        a = r[atomname]
        return a

    def unusual_bond(l):
        atomre1, atomre2 = ATOMSPEC.findall(l)
        try:
            a1, a2 = _atom_from_re(atomre1), _atom_from_re(atomre2)
        except KeyError:
            a1 = a2 = None
        r1 = reslookup[atomre1[1]]
        r2 = reslookup[atomre2[1]]
        return UnusualBond(l, (a1, a2), (r1, r2))

    while True:
        try:
            line = lineiter.next()
        except StopIteration:
            break

        fields = line.split()
        if fields[0:2] == ['Unknown','residue:']:
            # EX: "Unknown residue: 3TE   number: 499   type: Terminal/beginning"
            res = molin.residues[int(fields[4])]
            unknown_res.add(res)
            msg.append(UnknownResidue(line,res))

        elif fields[:4] == 'Warning: Close contact of'.split():
            # EX: "Warning: Close contact of 1.028366 angstroms between .R<DC5 1>.A<HO5' 1> and .R<DC5 81>.A<P 9>"
            msg.append(unusual_bond(line))

        elif fields[:6] == 'WARNING: There is a bond of'.split():
            # Matches two lines, EX:
            # "WARNING: There is a bond of 34.397700 angstroms between:"
            # "-------  .R<DG 92>.A<O3' 33> and .R<DG 93>.A<P 1>"
            nextline = lineiter.next()
            msg.append(unusual_bond(line + nextline))

        elif fields[:5] == 'Created a new atom named:'.split():
            # EX: "Created a new atom named: P within residue: .R<DC5 81>"
            residue = reslookup[fields[-1][:-1]]
            if residue in unknown_res: continue  # suppress atoms from an unknown res ...
            atom = residue[fields[5]]
            msg.append(UnknownAtom(line, residue, atom))


    return msg