Commit 1c745a3e by Eric Coissac

Patches some minor bugs

parent 3fadd975
......@@ -25,16 +25,27 @@ def getOutput(config):
#
# Reformating outpout according to the new format
#
# Try to load the index to test its format
index=getIndex(config)
os.makedirs(dirname)
for extension in ['oax','omx','gml','stats',
'intermediate.gml','.intermediate.oax']:
'intermediate.gml','.intermediate.oax',
'chloroplast.path.gml',
'path']:
if os.path.exists('%s.%s' % (config['orgasm']['outputfilename'],
extension)):
os.renames('%s.%s' % (config['orgasm']['outputfilename'],
extension),
'%s.oas/assembling.%s' % (config['orgasm']['outputfilename'],
extension))
# Try to load the seeds to test its format
seeds=getSeeds(index, config)
sys.exit(0)
else:
#
# Exit with an error because the format is obsolete.
......
'''
Created on 28 sept. 2014
@author: coissac
'''
import orgasm.samples
from orgasm import getIndex, getSeeds,getOutput
from orgasm.tango import cutLowCoverage, cutSNPs,\
estimateDeadBrancheLength, estimateFragmentLength,\
genesincontig, scaffold, fillGaps, dumpGraph, restoreGraph
import sys
__title__="Recompact the assembling graph"
default_config = { 'seeds' : None
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
meanlength,sdlength = estimateFragmentLength(asm)
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
cg = asm.compactAssembling(verbose=False)
logger.info("Scaffold the assembly")
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
genesincontig(cg,r,x)
with open(output+'.gml','w') as gmlfile:
print(cg.gml(),file=gmlfile)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, cutLowCoverage, estimateDeadBrancheLength, dumpGraph
__title__="Cut low coverage edge in an assembling graph"
default_config = { 'coverage' : None,
'smallbranches' : None
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--coverage', dest='cutlow:coverage',
required=True,
type=int,
action='store',
default=None,
help='All edges with a coverage below this value will be deleted')
parser.add_argument('--smallbranches', dest='cutlow:smallbranches',
type=int,
action='store',
default=None,
help='maximum length of the branches to cut during '
'the cleaning process [default: <estimated>]')
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
xxx,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
logger.info("Cut low coverage")
cutLowCoverage(asm,config['cutlow']['coverage'],terminal=False)
if config['cutlow']['smallbranches'] is not None:
smallbranches = config['cutlow']['smallbranches']
else:
smallbranches = estimateDeadBrancheLength(asm)
logger.info("Dead branch length setup to : %d bp" % smallbranches)
asm.cleanDeadBranches(maxlength=smallbranches)
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
with open(output+'.gml','w') as gmlfile:
print(cg.gml(),file=gmlfile)
dumpGraph(output+'.oax',asm)
'''
Created on 26 nov. 2014
@author: boyer
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, genesincontig
import os
__title__="Build a fasta file from the assembling graph"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='<index>',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='<output>',
nargs='?',
default=None,
help='output prefix' )
def fastaFormat(edge, title=None, nchar=60):
if title is None:
title = 'Seq'
lheader = []
for k in ('weight', 'label', 'length', 'stemid', 'ingene'):
lheader.append('%s=%s'%(k, edge[k]))
l = ['; '.join(lheader)+";"]
l[0] = '>%s_%d %s'%(title, edge['stemid'], l[0])
seq = edge['sequence']
lseq = len(edge['sequence'])
i=0
while i < lseq:
l.append(seq[i:i+60].decode('ascii'))
i += 60
return '\n'.join(l)
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
fastaout = open(output+".fasta","w")
logger.info("Print the result as a fasta file")
edges = [cg.getEdgeAttr(*i) for i in cg.edgeIterator(edgePredicate = lambda e : cg.getEdgeAttr(*e)['stemid']>0)]
head, tail = os.path.split(output)
for e in edges:
print(fastaFormat(e, tail),file=fastaout)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, selectGoodComponent
__title__="Print some statistics about the assembling graph"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='<output>',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--back', dest='orgasm:back',
metavar='<insert size>',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
ccs = list(cg.connectedComponentIterator())
gcc = selectGoodComponent(cg)
gnode=set()
for cc in gcc:
for e in cc:
gnode.add(e[0])
gnode.add(e[1])
ucc = set()
for cc in ccs:
ccc = frozenset([-x for x in cc])
if ccc not in ucc:
ucc.add(frozenset(cc))
output = open(output+".stats","w")
print ("AssembledBasePairs:",len(asm)/2,file=output)
print ("TotalConnectedComponents:",len(ccs),file=output)
print ("UniqueConnectedComponents:",len(ucc),file=output)
print ("GoodConnectedComponents:",len(ucc),file=output)
print ("CompactNodes:",len(cg),file=output)
print ("GoodCompactNodes:",len(gnode),file=output)
print ("CompactEdges:",cg.edgeCount(),file=output)
print ("GoodCompactEdges:",sum(len(x) for x in gcc),file=output)
print ("FragmentMeanLength:",meanlength,file=output)
print ("FragmentSdLength:",sdlength,file=output)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getIndex
__title__="List information about a read index"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
def run(config):
r = getIndex(config)
print(len(r),r.getReadSize())
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, path2fasta
__title__="Build a fasta file from a path across the assembling graph"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='<index>',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='<output>',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--path', dest='path:path',
action='store',
metavar='<edgeid>',
type=int,
nargs='+',
required=True,
default=None,
help='A list of edge id separated by space add -- at the end of the path')
parser.add_argument('--back', dest='orgasm:back',
metavar='<insert size>',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=config['orgasm']['minlink'],
back=int(back),addConnectedLink=False)
fastaout = open(output+".fasta","w")
pathout = open(output+".path","w")
logger.info("Print the result as a fasta file")
c=1
path = config['path']['path']
logger.info('Built path : %s' % str(path))
fa = path2fasta(asm,cg,path,
identifier="Seq_%d" % c,
back=back,
minlink=config['orgasm']['minlink'],
logger=logger)
print(fa,file=fastaout)
print(" ".join([str(x) for x in path]),file=pathout)
print(cg.gml(),file=open(output +'.path.gml','w'))
'''
Created on 28 sept. 2014
@author: coissac
'''
import orgasm.samples
from orgasm import getOutput,getIndex, getSeeds, getAdapters
from orgasm.tango import matchtoseed, cutLowCoverage, cutSNPs,\
estimateDeadBrancheLength, estimateFragmentLength,\
genesincontig, scaffold, fillGaps, dumpGraph, restoreGraph
from orgasm.assembler import Assembler,tango
import sys
__title__="Build the set of seed reads"
default_config = { "reformat" : None
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--seeds', dest ='orgasm:seeds',
metavar='seeds',
action='append',
default=[],
type=str,
help='protein or nucleic seeds; either a fasta file containing '
'seed sequences or the name of one of the internal set of seeds '
'among %s' % str(list(filter(lambda s: s.startswith('prot') or
s.startswith('nuc'),dir(orgasm.samples)))))
parser.add_argument('--kup', dest='orgasm:kup',
type=int,
action='store',
default=None,
help='The word size used to identify the seed reads '
'[default: protein=4, DNA=12]')
parser.add_argument("--reformat",
dest="seeds:reformat",
action='store_true',
default=None,
help='Asks for reformatting an old sequence index to the new format'
)
def run(config):
logger=config['orgasm']['logger']
progress = config['orgasm']['progress']
output = getOutput(config)
logger.info("Looking for the seed reads")
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
logger.info('Coverage estimated from probe matches at : %d' % ecoverage)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
pathConstraints, scaffold, selectGoodComponent, unfoldAssembling, path2fasta
__title__="Universal assembling graph unfolder"
default_config = { 'circular' : False,
'force' : False
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--circular', dest='unfold:circular',
action='store_true',
default=None,
help='Wish a circular unfolding')
parser.add_argument('--force', dest='unfold:force',
action='store_true',
default=None,
help='Force circular unfolding')
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with paired-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
if config['unfold']['force']:
config['unfold']['circular']= True
r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
logger.info("Evaluate pair-end constraints")
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
minlink=config['orgasm']['minlink']
constraints = pathConstraints(asm,cg,back=int(back),minlink=minlink)
scaffold(asm,cg,minlink=minlink,
back=int(back),addConnectedLink=False)
fastaout = open(output+".fasta","w")
pathout = open(output+".path","w")
logger.info("Select the good connected components")
gcc = selectGoodComponent(cg)
logger.info("Print the result as a fasta file")
if config['unfold']['circular']:
if config['unfold']['force']:
logger.info("Force circular sequence")
else:
logger.info("Unfolding in circular mode")
c=1
for seeds in gcc:
path = unfoldAssembling(asm,cg,
seeds=seeds,
constraints=constraints,
circular=config['unfold']['circular'],
force=config['unfold']['force'])
path = path[-1][0]
fa = path2fasta(asm,cg,path,
identifier="Seq_%d" % c,
back=back,
minlink=config['orgasm']['minlink'],
logger=logger)
print(fa,file=fastaout)
print(" ".join([str(x) for x in path]),file=pathout)
c+=1
with open(output +'.path.gml','w') as gmlfile:
print(cg.gml(),file=gmlfile)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, path2fasta, unfoldmarker
__title__="Assembling graph unfolder for the nuclear rDNA complex"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')