madscatt / zazmol

Library for defining molecular objects to create simulation and analysis programs To install: python setup.py install dependencies: numpy, mocker
GNU General Public License v3.0
0 stars 2 forks source link

The align code is rough and poorly documented #7

Open StevenCHowell opened 7 years ago

StevenCHowell commented 7 years ago

It took some work and testing to figure out how to align sasmol objects. It was also not intuitive that adding more residues to the alignment region could lead to worse rather than better results. This was more clear when I understood the alignment was only based on coordinates, and did not consider the properties of the atoms being aligned.

I think the first step would be to improve the documentation and either wrap the current module in a more intuitive setup method or modify the current code. I will add the code I wrote as a wrapper in the comments below.

As a second step, perhaps consider writing a method that uses atom properties, perhaps using atom type, to align sasmol objects. This could be used to separate a match region to force C atoms to align with C atoms and O atoms to align with O atoms.

StevenCHowell commented 7 years ago

The alignment wrapper I wrote is contained in sassie/calculate/convergence_test.py because it was required to do before performing a spatial convergence test. It relies on the AlignInputs class:

class AlignInputs(object):

    def __init__(self, goal_pdb, move, ref_pdb, out_fname, **kwargs):
        self.goal_pdb = goal_pdb
        self.ref_pdb = ref_pdb
        self.move = move
        self.out_fname = out_fname
        self.path = kwargs.get('path', './')
        self.basis_atoms = kwargs.get('basis_atoms', 'CA')
        self.seg_or_chain = kwargs.get('seg_or_chain', 'segname')
        self.seg_chain = kwargs.get('seg_chain', 'GAG')
        self.min_resid = kwargs.get('min_resid', 20)
        self.max_resid = kwargs.get('max_resid', 30)
        default_filter = ('(({}[i] == "{}") and (name[i] == "{}") and '
                          '(resid[i] >= {}) and (resid[i] <= {}))'.format(
                              self.seg_or_chain, self.seg_chain,
                              self.basis_atoms, self.min_resid, self.max_resid))
        self.goal_filter = kwargs.get('goal_filter', default_filter)
        self.move_filter = kwargs.get('move_filter', default_filter)
        logging.debug('goal_pdb: {}'.format(self.goal_pdb))
        logging.debug('ref_pdb: {}'.format(self.ref_pdb))
        logging.debug('move: {}'.format(self.move))
        logging.debug('out_fname: {}'.format(self.out_fname))
        logging.debug('path: {}'.format(self.path))
        logging.debug('goal_filter: {}'.format(self.goal_filter))
        logging.debug('move_filter: {}'.format(self.move_filter))
StevenCHowell commented 7 years ago

Here is the complete alignment method that wraps the sasmol align method that I think is now in sasmol/src/python/sasop.py. This

  1. parses user input for the match regions
  2. creates the separate sasmol objects to use for alignment
  3. then calls the sasmol align method to perform the alignment I put this method in sassie because it was required for the convergence test and this where I was able to commit changes.
def align(inputs):
    '''
    input:
    ------
        inputs: object should contain the following attributes
            goal:    goal pdb
            ref:     reference pdb containing molecule info for moving pdb/dcd
            move:    pdb/dcd to align
            out:     output dcd file
            path:    output path
            goal_filter:     goal basis filter
            move_filter:     move basis filter

    note: inputs.ref and inputs.move can ofter be the same pdb
    '''

    aa_goal_pdb = inputs.goal_pdb
    aa_move_pdb = inputs.ref_pdb
    aa_move_fname = inputs.move
    save_fname = inputs.out_fname
    path = inputs.path

    if save_fname == aa_move_fname:
        in_place = True
        save_fname = 'temp' + save_fname[-4:]

    try:
        goal_filter = inputs.goal_filter
    except:
        basis_atoms = inputs.basis_atoms
        goal_seg_or_ch = inputs.goal_seg_or_chain
        goal_segname = inputs.goal_seg_chain
        goal_res_max = inputs.goal_max
        goal_res_min = inputs.goal_min

    try:
        move_filter = inputs.move_filter
    except:
        basis_atoms = inputs.basis_atoms
        move_seg_or_ch = inputs.move_seg_or_chain
        move_segname = inputs.move_seg_chain
        move_res_max = inputs.move_max
        move_res_min = inputs.move_min
        move_filter = ('((%s[i] == "%s") and (name[i] == "%s") and '
                       '(resid[i] >= %s) and (resid[i] <= %s))' % (
                           move_seg_or_ch, move_segname, basis_atoms,
                           move_res_min, move_res_max))

    # check input
    assert os.path.exists(aa_move_fname), ('ERROR: no such file - %s' %
                                          aa_move_fname)
    assert os.path.exists(aa_move_pdb), ('ERROR: no such file - %s' %
                                         aa_move_pdb)
    assert os.path.exists(aa_goal_pdb), ('ERROR: no such file - %s' %
                                         aa_goal_pdb)

    # create the SasMol objects
    sub_goal = sasmol.SasMol(0)
    sub_move = sasmol.SasMol(0)
    aa_goal = sasmol.SasMol(0)
    aa_move = sasmol.SasMol(0)

    aa_goal.read_pdb(aa_goal_pdb)
    aa_move.read_pdb(aa_move_pdb)

    if aa_move_fname[-3:] == 'pdb':
        aa_move.read_pdb(aa_move_fname)
        n_frames = aa_move.number_of_frames()
        in_type = 'pdb'
    elif aa_move_fname[-3:] == 'dcd':
        dcd_file = aa_move.open_dcd_read(aa_move_fname)
        n_frames = dcd_file[2]
        in_type = 'dcd'
    else:
        message = "\n~~~ ERROR, unknown input type ~~~\n"
        print_failure(message, txtOutput)
        return

    out_type = save_fname[-3:].lower()
    if 'dcd' == out_type:
        dcd_out_file = aa_move.open_dcd_write(path + save_fname)
    elif 'pdb' == out_type:
        dcd_out_file = None

    error, goal_seg_mask = aa_goal.get_subset_mask(goal_filter)
    assert not error, error
    error, move_seg_mask = aa_move.get_subset_mask(move_filter)
    assert not error, error

    error = aa_goal.copy_molecule_using_mask(sub_goal, goal_seg_mask, 0)
    assert not error, error
    error = aa_move.copy_molecule_using_mask(sub_move, move_seg_mask, 0)
    assert not error, error

    # calculate the center of mass of the subset of m1
    com_sub_goal = sub_goal.calccom(0)
    sub_goal.center(0)                         # center the m1 coordinates
    # get the m1 centered coordinates
    coor_sub_goal = sub_goal.coor()[0]

    for i in xrange(n_frames):
        if in_type == 'dcd':
            aa_move.read_dcd_step(dcd_file, i)
            # move m2 to be centered at the origin
            aa_move.center(0)
            error, sub_move.coor = aa_move.get_coor_using_mask(
                0, move_seg_mask)
            sub_move.setCoor(sub_move.coor)
            # calculate the center of mass of the subset of m2
            com_sub_move = sub_move.calccom(0)
            # move the subset of m2 to be centered at the origin
            sub_move.center(0)
            # get the new coordinates of the subset of m2
            coor_sub_move = sub_move.coor[0]
            # align m2 using the transformation from sub_m2 to sub_m1
            aa_move.align(
                0, coor_sub_move, com_sub_move, coor_sub_goal, com_sub_goal)
        elif in_type == 'pdb':
            # move m2 to be centered at the origin
            aa_move.center(i)
            error, sub_move.coor = aa_move.get_coor_using_mask(
                i, move_seg_mask)
            sub_move.setCoor(sub_move.coor)
            # calculate the center of mass of the subset of m2
            com_sub_move = sub_move.calccom(0)
            # move the subset of m2 to be centered at the origin
            sub_move.center(0)
            # get the new coordinates of the subset of m2
            coor_sub_move = sub_move.coor[0]
            # align m2 using the transformation from sub_m2 to sub_m1
            aa_move.align(
                i, coor_sub_move, com_sub_move, coor_sub_goal, com_sub_goal)

        aa_move.write_dcd_step(dcd_out_file, 0, i + 1)

    if in_type == 'dcd':
        aa_move.close_dcd_read(dcd_file[0])
    if out_type == 'dcd':
        aa_move.close_dcd_write(dcd_out_file)

    if in_place:
        os.remove(aa_move_fname)
        os.rename(save_fname, aa_move_fname)

    logging.info('Alingment of {} complete. \m/ >.< \m/'.format(aa_move_fname))