Commit 720501e7 authored by Eric Coissac's avatar Eric Coissac

Print fasta output of the unfold, fasta and unfoldrrna on the standard

output
parent 81c6e0f1
'''
Created on 26 nov. 2014
@author: boyer
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, genesincontig
import os
__title__="Build a fasta file from the assembling graph"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='<index>',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='<output>',
nargs='?',
default=None,
help='output prefix' )
def fastaFormat(edge, title=None, nchar=60):
if title is None:
title = 'Seq'
lheader = []
for k in ('weight', 'label', 'length', 'stemid', 'ingene'):
lheader.append('%s=%s'%(k, edge[k]))
l = ['; '.join(lheader)+";"]
l[0] = '>%s_%d %s'%(title, edge['stemid'], l[0])
seq = edge['sequence']
lseq = len(edge['sequence'])
i=0
while i < lseq:
l.append(seq[i:i+60].decode('ascii'))
i += 60
return '\n'.join(l)
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
# fastaout = open(output+".fasta","w")
fastaout = sys.stdout
logger.info("Print the result as a fasta file")
edges = [cg.getEdgeAttr(*i) for i in cg.edgeIterator(edgePredicate = lambda e : cg.getEdgeAttr(*e)['stemid']>0)]
head, tail = os.path.split(output)
for e in edges:
print(fastaFormat(e, tail),file=fastaout)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
pathConstraints, scaffold, selectGoodComponent, unfoldAssembling, path2fasta
__title__="Universal assembling graph unfolder"
default_config = { 'circular' : False,
'force' : False
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--circular', dest='unfold:circular',
action='store_true',
default=None,
help='Wish a circular unfolding')
parser.add_argument('--force', dest='unfold:force',
action='store_true',
default=None,
help='Force circular unfolding')
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with paired-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
if config['unfold']['force']:
config['unfold']['circular']= True
r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
logger.info("Evaluate pair-end constraints")
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
minlink=config['orgasm']['minlink']
constraints = pathConstraints(asm,cg,back=int(back),minlink=minlink)
scaffold(asm,cg,minlink=minlink,
back=int(back),addConnectedLink=False)
#fastaout = open(output+".fasta","w")
fastaout = sys.stdout
pathout = open(output+".path","w")
logger.info("Select the good connected components")
gcc = selectGoodComponent(cg)
logger.info("Print the result as a fasta file")
if config['unfold']['circular']:
if config['unfold']['force']:
logger.info("Force circular sequence")
else:
logger.info("Unfolding in circular mode")
c=1
for seeds in gcc:
path = unfoldAssembling(asm,cg,
seeds=seeds,
constraints=constraints,
circular=config['unfold']['circular'],
force=config['unfold']['force'])
path = path[-1][0]
fa = path2fasta(asm,cg,path,
identifier="Seq_%d" % c,
back=back,
minlink=config['orgasm']['minlink'],
logger=logger)
print(fa,file=fastaout)
print(" ".join([str(x) for x in path]),file=pathout)
c+=1
with open(output +'.path.gml','w') as gmlfile:
print(cg.gml(),file=gmlfile)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, path2fasta, unfoldmarker
__title__="Assembling graph unfolder for the nuclear rDNA complex"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def selectGoodComponent(cg):
def geneincc(cc):
return any(cg.getEdgeAttr(*e)['ingene']>0 for e in cc)
cc = list(cg.connectedComponentIterator())
goodcc=[]
for cc1 in cc:
ccp = set(-i for i in cc1)
if ccp not in goodcc:
goodcc.append(cc1)
gcc = [list(cg.edgeIterator(nodePredicate=lambda n:n in c,
edgePredicate=lambda e: 'stemid' in cg.getEdgeAttr(*e)))
for c in goodcc]
gcc = [cc for cc in gcc if geneincc(cc)]
return gcc
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
logger.info("Evaluate pair-end constraints")
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
fastaout = open(output+".fasta","w")
fastaout = sys.stdout
pathout = open(output+".path","w")
logger.info("Select the good connected components")
gcc = selectGoodComponent(cg)
logger.info("Print the result as a fasta file")
c=1
for seeds in gcc:
path = unfoldmarker(cg,seeds=seeds)
fa = path2fasta(asm,cg,path,
identifier="Seq_%d" % c,
back=back,
minlink=config['orgasm']['minlink'],
logger=logger)
print(fa, file=fastaout)
print(" ".join([str(x) for x in path]),file=pathout)
c+=1
print(cg.gml(),file=open(output +'.path.gml','w'))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment