Module fastaTag
[hide private]
[frames] | no frames]

Source Code for Module fastaTag

  1  #!/usr/local/bin/python 
  2   
  3   
  4  import fileinput 
  5  import re 
  6  import getopt 
  7  import sys 
  8   
  9  from obitools.fasta import fastaIterator,writeFasta 
 10   
 11           
12 -def annoteFastaGenerator():
13 o,filenames = getopt.getopt(sys.argv[1:], 14 'hs:i:d:a:l:L:t:v', 15 ['help', 16 'sequence=', 17 'identifier=', 18 'definition=', 19 'attribute=', 20 'lmin=', 21 'lmax=', 22 'tag=']) 23 24 sys.argv[1:]=filenames 25 26 haveSequencePattern = False 27 haveIdentifierPattern= False 28 haveDefinitionPattern= False 29 haveAttributePattern = False 30 haveLmin = False 31 haveLmax = False 32 haveTag = False 33 34 isInverted = False 35 36 attributePatterns={} 37 tags={} 38 39 for name,value in o: 40 if name in ('-h','--help'): 41 printHelp() 42 exit() 43 elif name in ('-s','--sequence'): 44 sequencePattern=re.compile(value,re.Ignore) 45 haveSequencePattern=True 46 47 elif name in ('-d','--definition'): 48 definitionPattern=re.compile(value) 49 haveDefinitionPattern=True 50 51 elif name in ('-i','--identifier'): 52 identifierPattern=re.compile(value) 53 haveIdentifierPattern=True 54 55 elif name in ('-a','--attribute'): 56 attribute,pattern=value.split(':',1) 57 attributePatterns[attribute]=re.compile(pattern) 58 haveAttributePattern=True 59 60 elif name in ('-t','--tag'): 61 attribute,data=value.split(':',1) 62 tags[attribute]=data.strip() 63 haveTag=True 64 65 elif name in ('-l','--lmin'): 66 lmin=int(value) 67 haveLmin=True 68 69 elif name in ('-L','--lmax'): 70 lmax=int(value) 71 haveLmax=True 72 73 elif name in ('-v'): 74 isInverted=True 75 76 else: 77 raise ValueError,'Unknown option %s' % name 78 79 80 def sequenceAnnotator(seq): 81 82 good=True 83 84 if haveSequencePattern: 85 good = bool(sequencePattern.search(str(seq))) 86 87 if good and haveIdentifierPattern: 88 good = bool(identifierPattern.search(seq.id)) 89 90 if good and haveDefinitionPattern: 91 good = bool(definitionPattern.search(seqdefinition)) 92 93 if good and haveAttributePattern: 94 good = (reduce(lambda x,y : x and y, 95 (bool(attributePatterns[p].search(seq[p])) 96 for p in attributePatterns 97 if p in seq),True) 98 and 99 reduce(lambda x,y : x and y, 100 (bool(p in seq) 101 for p in attributePatterns),True) 102 ) 103 104 if good and haveLmin: 105 good = len(seq) >= lmin 106 107 if good and haveLmax: 108 good = len(seq) <= lmax 109 110 111 if isInverted: 112 good=not good 113 114 if good: 115 info.update(tags) 116 117 return info
118 119 assert haveTag,'You must specified at least one --tag option' 120 121 return sequenceAnnotator 122 123
124 -def printHelp():
125 print "-----------------------------------" 126 print " fastaTag.py" 127 print "-----------------------------------" 128 print "fastaGrep.py [option] <argument>" 129 print "-----------------------------------" 130 print "-h --help : print this help" 131 print "-s --sequence=<pattern> : match the sequence with a regular pattern" 132 print "-i --identifier=<pattern> : match the sequence identifier with a regular pattern" 133 print "-d --definition=<pattern> : match the sequence definition with a regular pattern" 134 print "-a --attribute=<name>:<pattern> : match the sequence attribute <name> with a regular pattern" 135 print "-l --lmin=## : keep sequences longer than lmin" 136 print "-L --lmax=## : keep sequences shorter than lmax" 137 print "-v : revert the sequence selection" 138 print "-----------------------------------"
139 140 141 if __name__=='__main__': 142 143 annoteFasta=annoteFastaGenerator() 144 145 fasta = fastaIterator(fileinput.input()) 146 147 for seq in fasta: 148 info=annoteFasta(seq) 149 print writeFasta(seq) 150