Package obitools :: Package alignment :: Module ace
[hide private]
[frames] | no frames]

Source Code for Module obitools.alignment.ace

 1  from obitools.format.genericparser import GenericParser 
 2  from obitools.utils import universalOpen 
 3  from obitools.fasta import parseFastaDescription 
 4  from obitools import NucSequence 
 5   
 6  from itertools import imap 
 7   
 8  import sys 
 9   
10  _contigIterator=GenericParser('^CO ') 
11   
12  _contigIterator.addParseAction('AF', '\nAF +(\S+) +([UC]) +(-?[0-9]+)') 
13  _contigIterator.addParseAction('RD', '\nRD +(\S+) +([0-9]+) +([0-9]+) +([0-9]+) *\n([A-Za-z\n*]+?)\n\n') 
14  _contigIterator.addParseAction('DS', '\nDS +(.+)') 
15  _contigIterator.addParseAction('CO',  '^CO (\S+)') 
16   
17 -def contigIterator(file):
18 file = universalOpen(file) 19 for entry in _contigIterator(file): 20 contig=[] 21 for rd,ds,af in map(None,entry['RD'],entry['DS'],entry['AF']): 22 id = rd[0] 23 shift = int(af[2]) 24 if shift < 0: 25 print >> sys.stderr,"Sequence %s in contig %s has a negative paddng value %d : skipped" % (id,entry['CO'][0],shift) 26 #continue 27 28 definition,info = parseFastaDescription(ds) 29 info['shift']=shift 30 seq = rd[4].replace('\n','').replace('*','-').strip() 31 contig.append(NucSequence(id,seq,definition,**info)) 32 33 maxlen = max(len(x)+x['shift'] for x in contig) 34 minshift=min(x['shift'] for x in contig) 35 rep = [] 36 37 for s in contig: 38 info = s.getTags() 39 info['shift']-=minshift-1 40 head = '-' * (info['shift']-1) 41 42 tail = (maxlen + minshift - len(s) - info['shift'] - 1) 43 info['tail']=tail 44 newseq = NucSequence(s.id,head + str(s)+ '-' * tail,s.definition,**info) 45 rep.append(newseq) 46 47 yield entry['CO'][0],rep
48