1 import re
2 import sys
3
4 from obitools import tagmatcher
5 from obitools.seqdb import nucEntryIterator
6 from obitools.location.feature import Feature
7 from obitools.location import locationGenerator
8
9 _seqMatcher = re.compile('(?<=TG )[acgtrymkwsbdhvnACGTRYMKWSBDHVN]+')
10 _cdMatcher = re.compile('(?<=CD ) *([^:]+?) +: +([0-9]+)')
11 _loMatcher = re.compile('(?<=LO ) *([ACGTRYMKWSBDHVN]+) +([^ ]+) +([^ ]+) +\(([0-9]+)\)')
12 _dmMatcher = re.compile('(?<=DM )[0-9]+')
13 _rmMatcher = re.compile('(?<=RM )[0-9]+')
14
15
17 try:
18 seq = _seqMatcher.search(text).group()
19 cd = dict((x[0],int(x[1])) for x in _cdMatcher.findall(text))
20 locs = []
21
22 for (match,ac,loc,err) in _loMatcher.findall(text):
23 feat = Feature('location', locationGenerator(loc))
24 feat['error']=int(err)
25 feat['match']=match
26 feat['contig']=ac
27 locs.append(feat)
28
29 dm = int(_dmMatcher.search(text).group())
30 rm = int(_rmMatcher.search(text).group())
31
32 except AttributeError,e:
33 print >>sys.stderr,'======================================================='
34 print >>sys.stderr,text
35 print >>sys.stderr,'======================================================='
36 raise e
37
38 return (seq,cd,locs,dm,rm)
39
42
43
45 _cdheadparser = re.compile('condition [0-9]+ : (.+)')
46
51
54
57
87