Package obitools :: Package tagmatcher :: Module parser
[hide private]
[frames] | no frames]

Source Code for Module obitools.tagmatcher.parser

 1  import re 
 2  import sys 
 3   
 4  from obitools import tagmatcher 
 5  from obitools.seqdb import nucEntryIterator 
 6  from obitools.location.feature import Feature 
 7  from obitools.location import locationGenerator 
 8   
 9  _seqMatcher    = re.compile('(?<=TG   )[acgtrymkwsbdhvnACGTRYMKWSBDHVN]+') 
10  _cdMatcher     = re.compile('(?<=CD   ) *([^:]+?) +: +([0-9]+)') 
11  _loMatcher     = re.compile('(?<=LO   ) *([ACGTRYMKWSBDHVN]+) +([^ ]+) +([^ ]+) +\(([0-9]+)\)') 
12  _dmMatcher     = re.compile('(?<=DM   )[0-9]+') 
13  _rmMatcher     = re.compile('(?<=RM   )[0-9]+') 
14   
15   
16 -def __tagmatcherparser(text):
17 try: 18 seq = _seqMatcher.search(text).group() 19 cd = dict((x[0],int(x[1])) for x in _cdMatcher.findall(text)) 20 locs = [] 21 22 for (match,ac,loc,err) in _loMatcher.findall(text): 23 feat = Feature('location', locationGenerator(loc)) 24 feat['error']=int(err) 25 feat['match']=match 26 feat['contig']=ac 27 locs.append(feat) 28 29 dm = int(_dmMatcher.search(text).group()) 30 rm = int(_rmMatcher.search(text).group()) 31 32 except AttributeError,e: 33 print >>sys.stderr,'=======================================================' 34 print >>sys.stderr,text 35 print >>sys.stderr,'=======================================================' 36 raise e 37 38 return (seq,cd,locs,dm,rm)
39
40 -def tagMatcherParser(text):
41 return tagmatcher.TagMatcherSequence(*__tagmatcherparser(text))
42 43
44 -class TagMatcherIterator(object):
45 _cdheadparser = re.compile('condition [0-9]+ : (.+)') 46
47 - def __init__(self,file):
48 self._ni = nucEntryIterator(file) 49 self.header=self._ni.next() 50 self.conditions=TagMatcherIterator._cdheadparser.findall(self.header)
51
52 - def next(self):
53 return tagMatcherParser(self._ni.next())
54
55 - def __iter__(self):
56 return self
57
58 -def formatTagMatcher(tmseq,reader=None):
59 if isinstance(tmseq, TagMatcherIterator): 60 return tmseq.header 61 62 assert isinstance(tmseq,tagmatcher.TagMatcherSequence),'Only TagMatcherSequence can be used' 63 lo = '\n'.join(['LO %s %s %s (%d)' % (l['match'],l['contig'],l.locStr(),l['error']) 64 for l in tmseq['locations']]) 65 if reader is not None: 66 cd = '\n'.join(['CD %s : %d' % (x,tmseq['conditions'][x]) 67 for x in reader.conditions]) 68 else: 69 cd = '\n'.join(['CD %s : %d' % (x,tmseq['conditions'][x]) 70 for x in tmseq['conditions']]) 71 72 tg = 'TG %s' % str(tmseq) 73 74 e=[tg] 75 if cd: 76 e.append(cd) 77 if lo: 78 e.append(lo) 79 80 tm = 'TM %d' % tmseq['tm'] 81 dm = 'DM %d' % tmseq['dm'] 82 rm = 'RM %d' % tmseq['rm'] 83 84 e.extend((tm,dm,rm,'//')) 85 86 return '\n'.join(e)
87