Wednesday, August 12, 2009

Python simulation of Needleman-Wunsch 4

Finally, here is the code to organize everything. If run without a command-line argument, it does three short comparisons. Here is the output:


TGCTCGTA
T TC TA
T--TCATA

HEAGAWGHE-E
AW HE E
--P-AW-HEAE

HKKLYLVFEFLDLD-RYMEGIPKE
+ ++YLVFE+LDL+ ++M+ P++
N-RIYLVFEYLDLETKFMDSCPED


And here is the code:


import string,sys
from BLOSUM import loadMatrix,showM
from Utils import pprint,report
from Utils import printAlignment,load
from Inits import newDict,setUp,bases
from Inits import seqType,initScore
from Algorithm import doScoring,trackback

def run(s1,s2,kind=None,v=False):
if not kind:
kind = seqType(s1,s2)
sc = initScore()
# set up the box
L = setUp(s1,s2,sc)
if kind == 'protein':
# m = blosum
aaNames,m = loadMatrix(
fn='blosum50.txt')
#showM(m,aaNames)
else:
m = dict()
for n1 in bases:
for n2 in bases:
k = n1 + n2
if n1 == n2: m[k] = 5
else: m[k] = -2

doScoring(L,s1,s2,m,sc)
t = trackback(L,s1,s2,m)
if not v: print '\n'.join(t) + '\n'
else: printAlignment(t)

def main():
s1 = 'TGCTCGTA'
s2 = 'TTCATA'
run(s1,s2)
s1 = 'HEAGAWGHEE'
s2 = 'PAWHEAE'
run(s1,s2)
s1 = 'HKKLYLVFEFLDLDRYMEGIPKE'
s2 = 'NRIYLVFEYLDLETKFMDSCPED'
run(s1,s2)

if __name__ == "__main__":
fn = None
try: fn = sys.argv[1]
except IndexError: pass
if fn:
s1,s2 = load(fn)
run(s1,s2,v=True)
else:
main()