Package obitools :: Package options :: Module bioseqfilter
[hide private]
[frames] | no frames]

Source Code for Module obitools.options.bioseqfilter

  1  import sys 
  2  import re 
  3   
  4  from obitools.options.taxonomyfilter import addTaxonomyFilterOptions 
  5  from obitools.options.taxonomyfilter import taxonomyFilterGenerator 
  6       
7 -def _sequenceOptionCallback(options,opt,value,parser):
8 parser.values.sequencePattern = re.compile(value,re.I)
9
10 -def _defintionOptionCallback(options,opt,value,parser):
11 parser.values.definitionPattern = re.compile(value)
12
13 -def _identifierOptionCallback(options,opt,value,parser):
14 parser.values.identifierPattern = re.compile(value)
15
16 -def _attributeOptionCallback(options,opt,value,parser):
17 if not hasattr(options, 'attributePatterns'): 18 parser.values.attributePatterns={} 19 attribute,pattern=value.split(':',1) 20 parser.values.attributePatterns[attribute]=re.compile(pattern)
21
22 -def _predicatOptionCallback(options,opt,value,parser):
23 if not hasattr(options, 'predicats'): 24 options.predicats=[] 25 parser.values.predicats.append(value)
26 27
28 -def addSequenceFilteringOptions(optionManager):
29 30 optionManager.add_option('-s','--sequence', 31 action="callback", callback=_sequenceOptionCallback, 32 metavar="<REGULAR_PATTERN>", 33 type="string", 34 help="regular expression pattern used to select " 35 "the sequence. The pattern is case insensitive") 36 37 optionManager.add_option('-D','--definition', 38 action="callback", callback=_defintionOptionCallback, 39 type="string", 40 metavar="<REGULAR_PATTERN>", 41 help="regular expression pattern matched against " 42 "the definition of the sequence. " 43 "The pattern is case sensitive") 44 45 optionManager.add_option('-I','--identifier', 46 action="callback", callback=_identifierOptionCallback, 47 type="string", 48 metavar="<REGULAR_PATTERN>", 49 help="regular expression pattern matched against " 50 "the identifier of the sequence. " 51 "The pattern is case sensitive") 52 53 optionManager.add_option('-a','--attribute', 54 action="callback", callback=_attributeOptionCallback, 55 type="string", 56 metavar="<ATTRIBUTE_NAME>:<REGULAR_PATTERN>", 57 help="regular expression pattern matched against " 58 "the attributes of the sequence. " 59 "the value of this atribute is of the form : " 60 "attribute_name:regular_pattern. " 61 "The pattern is case sensitive." 62 "Several -a option can be used on the same " 63 "commande line.") 64 65 optionManager.add_option('-A','--has-attribute', 66 action="append", 67 type="string", 68 dest="has_attribute", 69 default=[], 70 metavar="<ATTRIBUTE_NAME>", 71 help="select sequence with attribute <ATTRIBUTE_NAME> " 72 "defined") 73 74 optionManager.add_option('-p','--predicat', 75 action="append", dest="predicats", 76 metavar="<PYTHON_EXPRESSION>", 77 help="python boolean expression to be evaluated in the " 78 "sequence context. The attribute name can be " 79 "used in the expression as variable name ." 80 "An extra variable named 'sequence' refers" 81 "to the sequence object itself. " 82 "Several -p option can be used on the same " 83 "commande line.") 84 85 optionManager.add_option('-L','--lmax', 86 action='store', 87 metavar="<##>", 88 type="int",dest="lmax", 89 help="keep sequences shorter than lmax") 90 91 optionManager.add_option('-l','--lmin', 92 action='store', 93 metavar="<##>", 94 type="int",dest="lmin", 95 help="keep sequences longer than lmin") 96 97 optionManager.add_option('-v','--inverse-match', 98 action='store_true', 99 default=False, 100 dest="invertedFilter", 101 help="revert the sequence selection " 102 "[default : %default]") 103 104 addTaxonomyFilterOptions(optionManager)
105 106 107 108 109
110 -def filterGenerator(options):
111 taxfilter = taxonomyFilterGenerator(options) 112 113 def sequenceFilter(seq): 114 good = True 115 116 if hasattr(options, 'sequencePattern'): 117 good = bool(options.sequencePattern.search(str(seq))) 118 119 if good and hasattr(options, 'identifierPattern'): 120 good = bool(options.identifierPattern.search(seq.id)) 121 122 if good and hasattr(options, 'definitionPattern'): 123 good = bool(options.definitionPattern.search(seq.definition)) 124 125 if good : 126 good = reduce(lambda x,y:x and y, 127 (k in seq for k in options.has_attribute), 128 True) 129 130 if good and hasattr(options, 'attributePatterns'): 131 good = (reduce(lambda x,y : x and y, 132 (bool(options.attributePatterns[p].search(seq[p])) 133 for p in options.attributePatterns 134 if p in seq),True) 135 and 136 reduce(lambda x,y : x and y, 137 (bool(p in seq) 138 for p in options.attributePatterns),True) 139 ) 140 141 if good and hasattr(options, 'predicats') and options.predicats is not None: 142 good = (reduce(lambda x,y: x and y, 143 (bool(eval(p,{'sequence':seq},seq)) 144 for p in options.predicats),True) 145 ) 146 147 if good and hasattr(options, 'lmin') and options.lmin is not None: 148 good = len(seq) >= options.lmin 149 150 if good and hasattr(options, 'lmax') and options.lmax is not None: 151 good = len(seq) <= options.lmax 152 153 if good: 154 good = taxfilter(seq) 155 156 if hasattr(options, 'invertedFilter') and options.invertedFilter: 157 good=not good 158 159 160 return good
161 162 return sequenceFilter 163
164 -def sequenceFilterIteratorGenerator(options):
165 filter = filterGenerator(options) 166 167 def sequenceFilterIterator(seqIterator): 168 for seq in seqIterator: 169 if filter(seq): 170 yield seq
171 172 return sequenceFilterIterator 173