1 import os
2 import re
3
4 from obitools.fasta import formatFasta
5
7
8 _matchQuery = re.compile("^Query:.+\n.+?>+([^ ]+)", re.MULTILINE)
9 _matchLQuery = re.compile("^Query:.+\n.+?(\d+)(?= nt\n)", re.MULTILINE)
10 _matchProp = re.compile("^The best scores are:.*\n(.+?)>>>", re.DOTALL+re.MULTILINE)
12 if isinstance(file,str):
13 file = open(file,'rU')
14 self.data = file.read()
15 self.query= SsearchParser._matchQuery.search(self.data).group(1)
16 self.queryLength= int(SsearchParser._matchLQuery.search(self.data).group(1))
17 props = SsearchParser._matchProp.search(self.data)
18 if props:
19 props=props.group(0).split('\n')[1:-2]
20 self.props=[]
21 for line in props:
22 subject,tab = line.split('\t')
23 tab=tab.split()
24 ssp = subject.split()
25 ac = ssp[0]
26 dbl= int(ssp[-5][:-1])
27 ident = float(tab[0])
28 matchlen = int(tab[5]) - int(tab[4]) +1
29 self.props.append({"ac" :ac,
30 "identity" :ident,
31 "subjectlength":dbl,
32 'matchlength' : matchlen})
33
34 -def run(seq,database,program='fasta35',opts=''):
35 ssearchin,ssearchout = os.popen2("%s %s %s" % (program,opts,database))
36 print >>ssearchin,formatFasta(seq)
37 ssearchin.close()
38 result = SsearchParser(ssearchout)
39 return seq,result
40
42 for seq in sequenceIterator:
43 yield run(seq,database,program,opts)
44