Commit 089f0892 by Eric Coissac

Add management of obitools tags on generated sequences

parent dfb33d60
......@@ -222,14 +222,12 @@ cpdef dict buildstem(Assembler assembler,
'weight' : int(w),
'circle' : bool(circle),
'stemid' : 0,
'label' : None
'label' : None,
'head' : assembler._index.getRead(first,
0,
assembler._index.getReadSize())
}
if is_begining(assembler._graph,first):
s["head"]=assembler._index.getRead(first,
0,
assembler._index.getReadSize())
if circle:
getConfiguration()['orgasm']['logger'].info(" Circle : %6d bp coverage : %6dx" %
(length,int(w)))
......
......@@ -5,14 +5,15 @@ Created on 26 nov. 2014
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, genesincontig
from orgasm.utils import tags2str
import os
import sys
__title__="Build a fasta file from the assembling graph"
default_config = {
default_config = { 'tags' : None,
'extension' : None
}
def addOptions(parser):
......@@ -24,8 +25,21 @@ def addOptions(parser):
default=None,
help='output prefix' )
parser.add_argument('--set-tag','-S', dest ='fasta:tags',
metavar='tag',
action='append',
default=[],
type=str,
help='Allows to add a tag in the OBITools format '
'to the header of the fasta sequences')
parser.add_argument('--no-5ext', dest='fasta:extension',
action='store_false',
default=True,
help="Do not add the 5' end of the sequences")
def fastaFormat(edge, title=None, nchar=60):
def fastaFormat(edge, title=None, nchar=60, extension=False,tags=[]):
if title is None:
title = 'Seq'
......@@ -36,13 +50,13 @@ def fastaFormat(edge, title=None, nchar=60):
lheader.append('%s=%s'%(k, edge[k]))
l = ['; '.join(lheader)+";"]
l = ['; '.join(lheader)+"; " + tags2str(tags)]
l[0] = '>%s_%d %s'%(title, edge['stemid'], l[0])
seq = edge['sequence']
if "head" in edge:
if extension:
seq=edge['head'].lower() + seq
lseq = len(seq)
i=0
......@@ -77,7 +91,10 @@ def run(config):
head, tail = os.path.split(config['orgasm']['outputfilename'])
c=1
for e in edges:
print(fastaFormat(e, "%s_%d" % (tail,c)),
print(fastaFormat(e, "%s_%d" % (tail,c),
extension=config['fasta']['extension'],
tags=config['fasta']['tags']
),
file=fastaout)
c+=1
......
......@@ -43,6 +43,15 @@ def addOptions(parser):
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
parser.add_argument('--set-tag','-S', dest ='fasta:tags',
metavar='tag',
action='append',
default=[],
type=str,
help='Allows to add a tag in the OBITools format '
'to the header of the fasta sequences')
def run(config):
......@@ -90,10 +99,12 @@ def run(config):
logger.info('Built path : %s' % str(path))
fa = path2fasta(asm,cg,path,
identifier="%s_%d" % (seqid,c),
back=back,
minlink=config['orgasm']['minlink'],
logger=logger)
identifier="%s_%d" % (seqid,c),
back=back,
minlink=config['orgasm']['minlink'],
logger=logger,
tags=config['fasta']['tags'])
print(fa,file=fastaout)
print(" ".join([str(x) for x in path]),file=pathout)
......
......@@ -41,6 +41,14 @@ def addOptions(parser):
help='the number of bases taken at the end of '
'contigs to jump with paired-ends [default: <estimated>]')
parser.add_argument('--set-tag','-S', dest ='fasta:tags',
metavar='tag',
action='append',
default=[],
type=str,
help='Allows to add a tag in the OBITools format '
'to the header of the fasta sequences')
def run(config):
......@@ -117,7 +125,8 @@ def run(config):
identifier="%s_%d" % (seqid,c),
back=back,
minlink=config['orgasm']['minlink'],
logger=logger)
logger=logger,
tags=config['fasta']['tags'])
print(fa,file=fastaout)
print(" ".join([str(x) for x in path]),file=pathout)
......
......@@ -31,6 +31,14 @@ def addOptions(parser):
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
parser.add_argument('--set-tag','-S', dest ='fasta:tags',
metavar='tag',
action='append',
default=[],
type=str,
help='Allows to add a tag in the OBITools format '
'to the header of the fasta sequences')
def selectGoodComponent(cg):
......@@ -108,7 +116,8 @@ def run(config):
identifier="%s_%d" % (seqid,c),
back=back,
minlink=config['orgasm']['minlink'],
logger=logger)
logger=logger,
tags=config['fasta']['tags'])
print(fa, file=fastaout)
print(" ".join([str(x) for x in path]),file=pathout)
......
......@@ -24,6 +24,7 @@ from orgasm.multialign import multiAlignReads, consensus # @UnresolvedImport
from orgasm.assembler import Assembler # @UnresolvedImport
from orgasm.assembler import buildstem # @UnresolvedImport
from orgasm.assembler import tango # @UnresolvedImport
from orgasm.utils import tags2str
from time import time
import math
......@@ -883,7 +884,7 @@ def coverageEstimate(self,matches=None,index=None,timeout=60.0):
return maxpath,cumlength,maxpath/float(cumlength)
def path2fasta(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=200,logger=None):
def path2fasta(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back=200,logger=None,tags=[]):
'''
Convert a path in an compact assembling graph in a fasta formated sequences.
......@@ -1129,8 +1130,9 @@ def path2fasta(self,assgraph,path,identifier="contig",minlink=10,nlength=20,back
length = sum(slength)
mcov = sum(a * b for a,b in map(lambda a,b:(a,b),slength,coverage)) / float(length)
fasta=[">%s seq_length=%d; coverage=%5.1f; circular=%s; %s" % (identifier,length,
fasta=[">%s seq_length=%d; coverage=%5.1f; circular=%s; %s%s" % (identifier,length,
mcov,circular,
tags2str(tags) + " ",
'.'.join(label))]
fasta.extend(sequence[i:i+60].decode('ascii') for i in range(0,len(sequence),60))
return "\n".join(fasta)
......
......@@ -23,3 +23,7 @@ def bytes2str( string):
"""
return string.decode('ascii')
def tags2str(taglist):
return " ".join("%s=%s;" % tuple(v.split(':',1))
for v in taglist if ':' in v)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment