Package obitools :: Package seqdb :: Package genbank :: Module parser
[hide private]
[frames] | no frames]

Source Code for Module obitools.seqdb.genbank.parser

 1  import re 
 2  import sys 
 3   
 4  import obitools.seqdb.genbank as gb 
 5  from obitools.seqdb import nucEntryIterator,aaEntryIterator 
 6   
 7  _featureMatcher = re.compile('^FEATURES.+\n(?=ORIGIN)',re.DOTALL + re.M) 
 8   
 9  _headerMatcher = re.compile('^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M) 
10  _seqMatcher    = re.compile('(?<=ORIGIN).+(?=//\n)', re.DOTALL + re.M) 
11  _cleanSeq      = re.compile('[ \n0-9]+') 
12  _acMatcher     = re.compile('(?<=^ACCESSION   ).+',re.M) 
13  _deMatcher     = re.compile('(?<=^DEFINITION  ).+\n( .+\n)*',re.M) 
14  _cleanDe       = re.compile('\n *') 
15   
16 -def __gbparser(text):
17 try: 18 header = _headerMatcher.search(text).group() 19 ft = _featureMatcher.search(text).group() 20 seq = _seqMatcher.search(text).group() 21 seq = _cleanSeq.sub('',seq).upper() 22 acs = _acMatcher.search(text).group() 23 acs = acs.split() 24 ac = acs[0] 25 acs = acs[1:] 26 de = _deMatcher.search(header).group() 27 de = _cleanDe.sub(' ',de).strip().strip('.') 28 except AttributeError,e: 29 print >>sys.stderr,'=======================================================' 30 print >>sys.stderr,text 31 print >>sys.stderr,'=======================================================' 32 raise e 33 34 return (ac,seq,de,header,ft,acs)
35
36 -def genbankParser(text):
37 return gb.GbSequence(*__gbparser(text))
38 39
40 -def genbankIterator(file):
41 for e in nucEntryIterator(file): 42 yield genbankParser(e)
43 44
45 -def genpepParser(text):
46 return gb.GpepSequence(*__gbparser(text))
47 48
49 -def genpepIterator(file):
50 for e in aaEntryIterator(file): 51 yield genpepParser(e)
52