Module fastaGrep
[hide private]
[frames] | no frames]

Source Code for Module fastaGrep

  1  #!/usr/local/bin/python 
  2  """\ 
  3  -------------------------------------------------------------------- 
  4   fastaGrep.py 
  5  -------------------------------------------------------------------- 
  6  fastaGrep.py [option] <argument> 
  7  -------------------------------------------------------------------- 
  8  -h    --help                       : print this help 
  9   
 10  -s    --sequence=<pattern>         : match the sequence with  
 11                                       a regular pattern 
 12                                        
 13  -i    --identifier=<pattern>       : match the sequence  
 14                                       identifier with a regular  
 15                                       pattern 
 16                                        
 17  -d    --definition=<pattern>       : match the sequence definition  
 18                                                                           with a regular pattern 
 19                                                                            
 20  -a    --attribute=<name>:<pattern> : match the sequence attribute  
 21                                                                           <name> with a regular pattern 
 22                                                                            
 23  -l    --lmin=##                    : keep sequences longer than  
 24                                                                           lmin 
 25   
 26  -L    --lmax=##                    : keep sequences shorter than  
 27                                                                           lmax 
 28   
 29  -v                                 : revert the sequence selection 
 30  -------------------------------------------------------------------- 
 31  """ 
 32   
 33  import fileinput 
 34  import re 
 35  import getopt 
 36  import sys 
 37   
 38  from obitools.fasta import fastaIterator,writeFasta 
 39  from obitools.utils import checkHelpOption 
 40   
 41           
42 -def goodFastaGenerator():
43 o,filenames = getopt.getopt(sys.argv[1:], 44 'hs:i:d:a:l:L:v', 45 ['help', 46 'sequence=', 47 'identifier=', 48 'definition=', 49 'attribute=', 50 'lmin=', 51 'lmax=']) 52 53 sys.argv[1:]=filenames 54 55 haveSequencePattern = False 56 haveIdentifierPattern= False 57 haveDefinitionPattern= False 58 haveAttributePattern = False 59 haveLmin = False 60 haveLmax = False 61 62 isInverted = False 63 64 attributePatterns={} 65 66 for name,value in o: 67 if name in ('-s','--sequence'): 68 sequencePattern=re.compile(value,re.Ignore) 69 haveSequencePattern=True 70 71 elif name in ('-d','--definition'): 72 definitionPattern=re.compile(value) 73 haveDefinitionPattern=True 74 75 elif name in ('-i','--identifier'): 76 identifierPattern=re.compile(value) 77 haveIdentifierPattern=True 78 79 elif name in ('-a','--attribute'): 80 attribute,pattern=value.split(':',1) 81 attributePatterns[attribute]=re.compile(pattern) 82 haveAttributePattern=True 83 84 elif name in ('-l','--lmin'): 85 lmin=int(value) 86 haveLmin=True 87 88 elif name in ('-L','--lmax'): 89 lmax=int(value) 90 haveLmax=True 91 92 elif name in ('-v'): 93 isInverted=True 94 95 else: 96 raise ValueError,'Unknown option %s' % name 97 98 def sequenceSelector(seq): 99 100 good=True 101 102 if haveSequencePattern: 103 good = bool(sequencePattern.search(str(seq))) 104 105 if good and haveIdentifierPattern: 106 good = bool(identifierPattern.search(seq.id)) 107 108 if good and haveDefinitionPattern: 109 good = bool(definitionPattern.search(seq.definition)) 110 111 if good and haveAttributePattern: 112 good = (reduce(lambda x,y : x and y, 113 (bool(attributePatterns[p].search(seq[p])) 114 for p in attributePatterns 115 if p in seq),True) 116 and 117 reduce(lambda x,y : x and y, 118 (bool(p in seq) 119 for p in attributePatterns),True) 120 ) 121 122 if good and haveLmin: 123 good = len(seq) >= lmin 124 125 if good and haveLmax: 126 good = len(seq) <= lmax 127 128 129 if isInverted: 130 good=not good 131 132 return good
133 134 return sequenceSelector 135 136 137 138 if __name__=='__main__': 139 140 checkHelpOption(__doc__) 141 142 goodFasta=goodFastaGenerator() 143 144 fasta = fastaIterator(fileinput.input()) 145 146 for seq in fasta: 147 if goodFasta(seq): 148 print writeFasta(seq) 149