1 import re
2 import sys
3
4 import obitools.seqdb.genbank as gb
5 from obitools.seqdb import nucEntryIterator,aaEntryIterator
6
7 _featureMatcher = re.compile('^FEATURES.+\n(?=ORIGIN)',re.DOTALL + re.M)
8
9 _headerMatcher = re.compile('^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M)
10 _seqMatcher = re.compile('(?<=ORIGIN).+(?=//\n)', re.DOTALL + re.M)
11 _cleanSeq = re.compile('[ \n0-9]+')
12 _acMatcher = re.compile('(?<=^ACCESSION ).+',re.M)
13 _deMatcher = re.compile('(?<=^DEFINITION ).+\n( .+\n)*',re.M)
14 _cleanDe = re.compile('\n *')
15
17 try:
18 header = _headerMatcher.search(text).group()
19 ft = _featureMatcher.search(text).group()
20 seq = _seqMatcher.search(text).group()
21 seq = _cleanSeq.sub('',seq).upper()
22 acs = _acMatcher.search(text).group()
23 acs = acs.split()
24 ac = acs[0]
25 acs = acs[1:]
26 de = _deMatcher.search(header).group()
27 de = _cleanDe.sub(' ',de).strip().strip('.')
28 except AttributeError,e:
29 print >>sys.stderr,'======================================================='
30 print >>sys.stderr,text
31 print >>sys.stderr,'======================================================='
32 raise e
33
34 return (ac,seq,de,header,ft,acs)
35
38
39
43
44
47
48
52