Open StevenCHowell opened 7 years ago
The alignment wrapper I wrote is contained in sassie/calculate/convergence_test.py
because it was required to do before performing a spatial convergence test. It relies on the AlignInputs
class:
class AlignInputs(object):
def __init__(self, goal_pdb, move, ref_pdb, out_fname, **kwargs):
self.goal_pdb = goal_pdb
self.ref_pdb = ref_pdb
self.move = move
self.out_fname = out_fname
self.path = kwargs.get('path', './')
self.basis_atoms = kwargs.get('basis_atoms', 'CA')
self.seg_or_chain = kwargs.get('seg_or_chain', 'segname')
self.seg_chain = kwargs.get('seg_chain', 'GAG')
self.min_resid = kwargs.get('min_resid', 20)
self.max_resid = kwargs.get('max_resid', 30)
default_filter = ('(({}[i] == "{}") and (name[i] == "{}") and '
'(resid[i] >= {}) and (resid[i] <= {}))'.format(
self.seg_or_chain, self.seg_chain,
self.basis_atoms, self.min_resid, self.max_resid))
self.goal_filter = kwargs.get('goal_filter', default_filter)
self.move_filter = kwargs.get('move_filter', default_filter)
logging.debug('goal_pdb: {}'.format(self.goal_pdb))
logging.debug('ref_pdb: {}'.format(self.ref_pdb))
logging.debug('move: {}'.format(self.move))
logging.debug('out_fname: {}'.format(self.out_fname))
logging.debug('path: {}'.format(self.path))
logging.debug('goal_filter: {}'.format(self.goal_filter))
logging.debug('move_filter: {}'.format(self.move_filter))
Here is the complete alignment method that wraps the sasmol align
method that I think is now in sasmol/src/python/sasop.py
. This
def align(inputs):
'''
input:
------
inputs: object should contain the following attributes
goal: goal pdb
ref: reference pdb containing molecule info for moving pdb/dcd
move: pdb/dcd to align
out: output dcd file
path: output path
goal_filter: goal basis filter
move_filter: move basis filter
note: inputs.ref and inputs.move can ofter be the same pdb
'''
aa_goal_pdb = inputs.goal_pdb
aa_move_pdb = inputs.ref_pdb
aa_move_fname = inputs.move
save_fname = inputs.out_fname
path = inputs.path
if save_fname == aa_move_fname:
in_place = True
save_fname = 'temp' + save_fname[-4:]
try:
goal_filter = inputs.goal_filter
except:
basis_atoms = inputs.basis_atoms
goal_seg_or_ch = inputs.goal_seg_or_chain
goal_segname = inputs.goal_seg_chain
goal_res_max = inputs.goal_max
goal_res_min = inputs.goal_min
try:
move_filter = inputs.move_filter
except:
basis_atoms = inputs.basis_atoms
move_seg_or_ch = inputs.move_seg_or_chain
move_segname = inputs.move_seg_chain
move_res_max = inputs.move_max
move_res_min = inputs.move_min
move_filter = ('((%s[i] == "%s") and (name[i] == "%s") and '
'(resid[i] >= %s) and (resid[i] <= %s))' % (
move_seg_or_ch, move_segname, basis_atoms,
move_res_min, move_res_max))
# check input
assert os.path.exists(aa_move_fname), ('ERROR: no such file - %s' %
aa_move_fname)
assert os.path.exists(aa_move_pdb), ('ERROR: no such file - %s' %
aa_move_pdb)
assert os.path.exists(aa_goal_pdb), ('ERROR: no such file - %s' %
aa_goal_pdb)
# create the SasMol objects
sub_goal = sasmol.SasMol(0)
sub_move = sasmol.SasMol(0)
aa_goal = sasmol.SasMol(0)
aa_move = sasmol.SasMol(0)
aa_goal.read_pdb(aa_goal_pdb)
aa_move.read_pdb(aa_move_pdb)
if aa_move_fname[-3:] == 'pdb':
aa_move.read_pdb(aa_move_fname)
n_frames = aa_move.number_of_frames()
in_type = 'pdb'
elif aa_move_fname[-3:] == 'dcd':
dcd_file = aa_move.open_dcd_read(aa_move_fname)
n_frames = dcd_file[2]
in_type = 'dcd'
else:
message = "\n~~~ ERROR, unknown input type ~~~\n"
print_failure(message, txtOutput)
return
out_type = save_fname[-3:].lower()
if 'dcd' == out_type:
dcd_out_file = aa_move.open_dcd_write(path + save_fname)
elif 'pdb' == out_type:
dcd_out_file = None
error, goal_seg_mask = aa_goal.get_subset_mask(goal_filter)
assert not error, error
error, move_seg_mask = aa_move.get_subset_mask(move_filter)
assert not error, error
error = aa_goal.copy_molecule_using_mask(sub_goal, goal_seg_mask, 0)
assert not error, error
error = aa_move.copy_molecule_using_mask(sub_move, move_seg_mask, 0)
assert not error, error
# calculate the center of mass of the subset of m1
com_sub_goal = sub_goal.calccom(0)
sub_goal.center(0) # center the m1 coordinates
# get the m1 centered coordinates
coor_sub_goal = sub_goal.coor()[0]
for i in xrange(n_frames):
if in_type == 'dcd':
aa_move.read_dcd_step(dcd_file, i)
# move m2 to be centered at the origin
aa_move.center(0)
error, sub_move.coor = aa_move.get_coor_using_mask(
0, move_seg_mask)
sub_move.setCoor(sub_move.coor)
# calculate the center of mass of the subset of m2
com_sub_move = sub_move.calccom(0)
# move the subset of m2 to be centered at the origin
sub_move.center(0)
# get the new coordinates of the subset of m2
coor_sub_move = sub_move.coor[0]
# align m2 using the transformation from sub_m2 to sub_m1
aa_move.align(
0, coor_sub_move, com_sub_move, coor_sub_goal, com_sub_goal)
elif in_type == 'pdb':
# move m2 to be centered at the origin
aa_move.center(i)
error, sub_move.coor = aa_move.get_coor_using_mask(
i, move_seg_mask)
sub_move.setCoor(sub_move.coor)
# calculate the center of mass of the subset of m2
com_sub_move = sub_move.calccom(0)
# move the subset of m2 to be centered at the origin
sub_move.center(0)
# get the new coordinates of the subset of m2
coor_sub_move = sub_move.coor[0]
# align m2 using the transformation from sub_m2 to sub_m1
aa_move.align(
i, coor_sub_move, com_sub_move, coor_sub_goal, com_sub_goal)
aa_move.write_dcd_step(dcd_out_file, 0, i + 1)
if in_type == 'dcd':
aa_move.close_dcd_read(dcd_file[0])
if out_type == 'dcd':
aa_move.close_dcd_write(dcd_out_file)
if in_place:
os.remove(aa_move_fname)
os.rename(save_fname, aa_move_fname)
logging.info('Alingment of {} complete. \m/ >.< \m/'.format(aa_move_fname))
It took some work and testing to figure out how to align sasmol objects. It was also not intuitive that adding more residues to the alignment region could lead to worse rather than better results. This was more clear when I understood the alignment was only based on coordinates, and did not consider the properties of the atoms being aligned.
I think the first step would be to improve the documentation and either wrap the current module in a more intuitive setup method or modify the current code. I will add the code I wrote as a wrapper in the comments below.
As a second step, perhaps consider writing a method that uses atom properties, perhaps using atom type, to align sasmol objects. This could be used to separate a match region to force C atoms to align with C atoms and O atoms to align with O atoms.