1
2
3
4 import fileinput
5 import re
6 import getopt
7 import sys
8
9 from obitools.fasta import fastaIterator,writeFasta
10
11
13 o,filenames = getopt.getopt(sys.argv[1:],
14 'hs:i:d:a:l:L:t:v',
15 ['help',
16 'sequence=',
17 'identifier=',
18 'definition=',
19 'attribute=',
20 'lmin=',
21 'lmax=',
22 'tag='])
23
24 sys.argv[1:]=filenames
25
26 haveSequencePattern = False
27 haveIdentifierPattern= False
28 haveDefinitionPattern= False
29 haveAttributePattern = False
30 haveLmin = False
31 haveLmax = False
32 haveTag = False
33
34 isInverted = False
35
36 attributePatterns={}
37 tags={}
38
39 for name,value in o:
40 if name in ('-h','--help'):
41 printHelp()
42 exit()
43 elif name in ('-s','--sequence'):
44 sequencePattern=re.compile(value,re.Ignore)
45 haveSequencePattern=True
46
47 elif name in ('-d','--definition'):
48 definitionPattern=re.compile(value)
49 haveDefinitionPattern=True
50
51 elif name in ('-i','--identifier'):
52 identifierPattern=re.compile(value)
53 haveIdentifierPattern=True
54
55 elif name in ('-a','--attribute'):
56 attribute,pattern=value.split(':',1)
57 attributePatterns[attribute]=re.compile(pattern)
58 haveAttributePattern=True
59
60 elif name in ('-t','--tag'):
61 attribute,data=value.split(':',1)
62 tags[attribute]=data.strip()
63 haveTag=True
64
65 elif name in ('-l','--lmin'):
66 lmin=int(value)
67 haveLmin=True
68
69 elif name in ('-L','--lmax'):
70 lmax=int(value)
71 haveLmax=True
72
73 elif name in ('-v'):
74 isInverted=True
75
76 else:
77 raise ValueError,'Unknown option %s' % name
78
79
80 def sequenceAnnotator(seq):
81
82 good=True
83
84 if haveSequencePattern:
85 good = bool(sequencePattern.search(str(seq)))
86
87 if good and haveIdentifierPattern:
88 good = bool(identifierPattern.search(seq.id))
89
90 if good and haveDefinitionPattern:
91 good = bool(definitionPattern.search(seqdefinition))
92
93 if good and haveAttributePattern:
94 good = (reduce(lambda x,y : x and y,
95 (bool(attributePatterns[p].search(seq[p]))
96 for p in attributePatterns
97 if p in seq),True)
98 and
99 reduce(lambda x,y : x and y,
100 (bool(p in seq)
101 for p in attributePatterns),True)
102 )
103
104 if good and haveLmin:
105 good = len(seq) >= lmin
106
107 if good and haveLmax:
108 good = len(seq) <= lmax
109
110
111 if isInverted:
112 good=not good
113
114 if good:
115 info.update(tags)
116
117 return info
118
119 assert haveTag,'You must specified at least one --tag option'
120
121 return sequenceAnnotator
122
123
125 print "-----------------------------------"
126 print " fastaTag.py"
127 print "-----------------------------------"
128 print "fastaGrep.py [option] <argument>"
129 print "-----------------------------------"
130 print "-h --help : print this help"
131 print "-s --sequence=<pattern> : match the sequence with a regular pattern"
132 print "-i --identifier=<pattern> : match the sequence identifier with a regular pattern"
133 print "-d --definition=<pattern> : match the sequence definition with a regular pattern"
134 print "-a --attribute=<name>:<pattern> : match the sequence attribute <name> with a regular pattern"
135 print "-l --lmin=## : keep sequences longer than lmin"
136 print "-L --lmax=## : keep sequences shorter than lmax"
137 print "-v : revert the sequence selection"
138 print "-----------------------------------"
139
140
141 if __name__=='__main__':
142
143 annoteFasta=annoteFastaGenerator()
144
145 fasta = fastaIterator(fileinput.input())
146
147 for seq in fasta:
148 info=annoteFasta(seq)
149 print writeFasta(seq)
150