1
2
3
4 import fileinput
5 import re
6 import getopt
7 import sys
8
9 from obitools import complement
10 from obitools.fasta import fastaIterator,writeFasta
11 from obitools.fast import Fast
12
14 hashd = Fast(seqref,kup)
15 hashr = Fast(seqref.complement(),kup)
16
17 def isDirect(seq):
18 sdirect,p =hashd(seq)
19 sreverse,p=hashr(seq)
20 return sdirect > sreverse,sdirect,sreverse
21
22 return isDirect
23
25 print "-----------------------------------"
26 print " fastaGrep.py"
27 print "-----------------------------------"
28 print "fastaGrep.py [option] <argument>"
29 print "-----------------------------------"
30 print "-h --help : print this help"
31 print "-r --reference=<fasta> : fasta file containing reference sequence"
32 print "-k --kup=## : word size used in fastn algorithm"
33 print "-----------------------------------"
34
35
36 if __name__=='__main__':
37
38 o,filenames = getopt.getopt(sys.argv[1:],
39 'hr:k:',
40 ['help',
41 'reference=',
42 'kup:'])
43
44 kup = 6
45
46 sys.argv[1:]=filenames
47
48 for name,value in o:
49 if name in ('-h','--help'):
50 printHelp()
51 exit()
52
53 elif name in ('-r','--reference'):
54 rseq=fastaIterator(value).next()
55
56 elif name in ('-k','--kup'):
57 kup = int(value)
58
59 else:
60 raise ValueError,'Unknown option %s' % name
61
62
63 isDirect=strandOnRefGenerator(rseq,kup)
64
65 fasta = fastaIterator(fileinput.input())
66
67 for seq in fasta:
68 d,ds,rs=isDirect(seq)
69 seq['isDirect']=str(d)
70 seq['directScore']=str(ds)
71 seq['reverseScore']=str(rs)
72 seq['onReference']=rid
73 print writeFasta(seq)
74