1 from obitools.format.genericparser import GenericParser
2 from obitools.utils import universalOpen
3 from obitools.fasta import parseFastaDescription
4 from obitools import NucSequence
5
6 from itertools import imap
7
8 import sys
9
10 _contigIterator=GenericParser('^CO ')
11
12 _contigIterator.addParseAction('AF', '\nAF +(\S+) +([UC]) +(-?[0-9]+)')
13 _contigIterator.addParseAction('RD', '\nRD +(\S+) +([0-9]+) +([0-9]+) +([0-9]+) *\n([A-Za-z\n*]+?)\n\n')
14 _contigIterator.addParseAction('DS', '\nDS +(.+)')
15 _contigIterator.addParseAction('CO', '^CO (\S+)')
16
18 file = universalOpen(file)
19 for entry in _contigIterator(file):
20 contig=[]
21 for rd,ds,af in map(None,entry['RD'],entry['DS'],entry['AF']):
22 id = rd[0]
23 shift = int(af[2])
24 if shift < 0:
25 print >> sys.stderr,"Sequence %s in contig %s has a negative paddng value %d : skipped" % (id,entry['CO'][0],shift)
26
27
28 definition,info = parseFastaDescription(ds)
29 info['shift']=shift
30 seq = rd[4].replace('\n','').replace('*','-').strip()
31 contig.append(NucSequence(id,seq,definition,**info))
32
33 maxlen = max(len(x)+x['shift'] for x in contig)
34 minshift=min(x['shift'] for x in contig)
35 rep = []
36
37 for s in contig:
38 info = s.getTags()
39 info['shift']-=minshift-1
40 head = '-' * (info['shift']-1)
41
42 tail = (maxlen + minshift - len(s) - info['shift'] - 1)
43 info['tail']=tail
44 newseq = NucSequence(s.id,head + str(s)+ '-' * tail,s.definition,**info)
45 rep.append(newseq)
46
47 yield entry['CO'][0],rep
48