1 import sys
2 import re
3
4 from obitools.options.taxonomyfilter import addTaxonomyFilterOptions
5 from obitools.options.taxonomyfilter import taxonomyFilterGenerator
6
8 parser.values.sequencePattern = re.compile(value,re.I)
9
12
15
17 if not hasattr(options, 'attributePatterns'):
18 parser.values.attributePatterns={}
19 attribute,pattern=value.split(':',1)
20 parser.values.attributePatterns[attribute]=re.compile(pattern)
21
26
27
29
30 optionManager.add_option('-s','--sequence',
31 action="callback", callback=_sequenceOptionCallback,
32 metavar="<REGULAR_PATTERN>",
33 type="string",
34 help="regular expression pattern used to select "
35 "the sequence. The pattern is case insensitive")
36
37 optionManager.add_option('-D','--definition',
38 action="callback", callback=_defintionOptionCallback,
39 type="string",
40 metavar="<REGULAR_PATTERN>",
41 help="regular expression pattern matched against "
42 "the definition of the sequence. "
43 "The pattern is case sensitive")
44
45 optionManager.add_option('-I','--identifier',
46 action="callback", callback=_identifierOptionCallback,
47 type="string",
48 metavar="<REGULAR_PATTERN>",
49 help="regular expression pattern matched against "
50 "the identifier of the sequence. "
51 "The pattern is case sensitive")
52
53 optionManager.add_option('-a','--attribute',
54 action="callback", callback=_attributeOptionCallback,
55 type="string",
56 metavar="<ATTRIBUTE_NAME>:<REGULAR_PATTERN>",
57 help="regular expression pattern matched against "
58 "the attributes of the sequence. "
59 "the value of this atribute is of the form : "
60 "attribute_name:regular_pattern. "
61 "The pattern is case sensitive."
62 "Several -a option can be used on the same "
63 "commande line.")
64
65 optionManager.add_option('-A','--has-attribute',
66 action="append",
67 type="string",
68 dest="has_attribute",
69 default=[],
70 metavar="<ATTRIBUTE_NAME>",
71 help="select sequence with attribute <ATTRIBUTE_NAME> "
72 "defined")
73
74 optionManager.add_option('-p','--predicat',
75 action="append", dest="predicats",
76 metavar="<PYTHON_EXPRESSION>",
77 help="python boolean expression to be evaluated in the "
78 "sequence context. The attribute name can be "
79 "used in the expression as variable name ."
80 "An extra variable named 'sequence' refers"
81 "to the sequence object itself. "
82 "Several -p option can be used on the same "
83 "commande line.")
84
85 optionManager.add_option('-L','--lmax',
86 action='store',
87 metavar="<##>",
88 type="int",dest="lmax",
89 help="keep sequences shorter than lmax")
90
91 optionManager.add_option('-l','--lmin',
92 action='store',
93 metavar="<##>",
94 type="int",dest="lmin",
95 help="keep sequences longer than lmin")
96
97 optionManager.add_option('-v','--inverse-match',
98 action='store_true',
99 default=False,
100 dest="invertedFilter",
101 help="revert the sequence selection "
102 "[default : %default]")
103
104 addTaxonomyFilterOptions(optionManager)
105
106
107
108
109
111 taxfilter = taxonomyFilterGenerator(options)
112
113 def sequenceFilter(seq):
114 good = True
115
116 if hasattr(options, 'sequencePattern'):
117 good = bool(options.sequencePattern.search(str(seq)))
118
119 if good and hasattr(options, 'identifierPattern'):
120 good = bool(options.identifierPattern.search(seq.id))
121
122 if good and hasattr(options, 'definitionPattern'):
123 good = bool(options.definitionPattern.search(seq.definition))
124
125 if good :
126 good = reduce(lambda x,y:x and y,
127 (k in seq for k in options.has_attribute),
128 True)
129
130 if good and hasattr(options, 'attributePatterns'):
131 good = (reduce(lambda x,y : x and y,
132 (bool(options.attributePatterns[p].search(seq[p]))
133 for p in options.attributePatterns
134 if p in seq),True)
135 and
136 reduce(lambda x,y : x and y,
137 (bool(p in seq)
138 for p in options.attributePatterns),True)
139 )
140
141 if good and hasattr(options, 'predicats') and options.predicats is not None:
142 good = (reduce(lambda x,y: x and y,
143 (bool(eval(p,{'sequence':seq},seq))
144 for p in options.predicats),True)
145 )
146
147 if good and hasattr(options, 'lmin') and options.lmin is not None:
148 good = len(seq) >= options.lmin
149
150 if good and hasattr(options, 'lmax') and options.lmax is not None:
151 good = len(seq) <= options.lmax
152
153 if good:
154 good = taxfilter(seq)
155
156 if hasattr(options, 'invertedFilter') and options.invertedFilter:
157 good=not good
158
159
160 return good
161
162 return sequenceFilter
163
165 filter = filterGenerator(options)
166
167 def sequenceFilterIterator(seqIterator):
168 for seq in seqIterator:
169 if filter(seq):
170 yield seq
171
172 return sequenceFilterIterator
173