Commit ae99d8d3 by Eric Coissac

Renames the command sub-moduel to commands

parent 720501e7
'''
Created on 28 sept. 2014
@author: coissac
'''
import orgasm.samples
from orgasm import getIndex, getSeeds,getOutput
from orgasm.tango import cutLowCoverage, cutSNPs,\
estimateDeadBrancheLength, estimateFragmentLength,\
genesincontig, scaffold, fillGaps, dumpGraph, restoreGraph
import sys
__title__="Recompact the assembling graph"
default_config = { 'seeds' : None
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
meanlength,sdlength = estimateFragmentLength(asm)
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
cg = asm.compactAssembling(verbose=False)
logger.info("Scaffold the assembly")
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
genesincontig(cg,r,x)
with open(output+'.gml','w') as gmlfile:
print(cg.gml(),file=gmlfile)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, cutLowCoverage, estimateDeadBrancheLength, dumpGraph
__title__="Cut low coverage edge in an assembling graph"
default_config = { 'coverage' : None,
'smallbranches' : None
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--coverage', dest='cutlow:coverage',
required=True,
type=int,
action='store',
default=None,
help='All edges with a coverage below this value will be deleted')
parser.add_argument('--smallbranches', dest='cutlow:smallbranches',
type=int,
action='store',
default=None,
help='maximum length of the branches to cut during '
'the cleaning process [default: <estimated>]')
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
xxx,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
logger.info("Cut low coverage")
cutLowCoverage(asm,config['cutlow']['coverage'],terminal=False)
if config['cutlow']['smallbranches'] is not None:
smallbranches = config['cutlow']['smallbranches']
else:
smallbranches = estimateDeadBrancheLength(asm)
logger.info("Dead branch length setup to : %d bp" % smallbranches)
asm.cleanDeadBranches(maxlength=smallbranches)
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
with open(output+'.gml','w') as gmlfile:
print(cg.gml(),file=gmlfile)
dumpGraph(output+'.oax',asm)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, selectGoodComponent
__title__="Print some statistics about the assembling graph"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='<output>',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--back', dest='orgasm:back',
metavar='<insert size>',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False)
ccs = list(cg.connectedComponentIterator())
gcc = selectGoodComponent(cg)
gnode=set()
for cc in gcc:
for e in cc:
gnode.add(e[0])
gnode.add(e[1])
ucc = set()
for cc in ccs:
ccc = frozenset([-x for x in cc])
if ccc not in ucc:
ucc.add(frozenset(cc))
output = open(output+".stats","w")
print ("AssembledBasePairs:",len(asm)/2,file=output)
print ("TotalConnectedComponents:",len(ccs),file=output)
print ("UniqueConnectedComponents:",len(ucc),file=output)
print ("GoodConnectedComponents:",len(ucc),file=output)
print ("CompactNodes:",len(cg),file=output)
print ("GoodCompactNodes:",len(gnode),file=output)
print ("CompactEdges:",cg.edgeCount(),file=output)
print ("GoodCompactEdges:",sum(len(x) for x in gcc),file=output)
print ("FragmentMeanLength:",meanlength,file=output)
print ("FragmentSdLength:",sdlength,file=output)
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getIndex
__title__="List information about a read index"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
def run(config):
r = getIndex(config)
print(len(r),r.getReadSize())
'''
Created on 28 sept. 2014
@author: coissac
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, path2fasta
__title__="Build a fasta file from a path across the assembling graph"
default_config = {
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='<index>',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='<output>',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--path', dest='path:path',
action='store',
metavar='<edgeid>',
type=int,
nargs='+',
required=True,
default=None,
help='A list of edge id separated by space add -- at the end of the path')
parser.add_argument('--back', dest='orgasm:back',
metavar='<insert size>',
type=int,
action='store',
default=None,
help='the number of bases taken at the end of '
'contigs to jump with pared-ends [default: <estimated>]')
def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
asm = restoreGraph(output+'.oax',r,x)
logger.info("Evaluate fragment length")
meanlength,sdlength = estimateFragmentLength(asm)
if meanlength is not None:
logger.info("Fragment length estimated : %f pb (sd: %f)" % (meanlength,sdlength))
if config['orgasm']['back'] is not None:
back = config['orgasm']['back']
elif config['orgasm']['back'] is None and meanlength is not None:
back = int(meanlength + 4 * sdlength)
if back > 500:
back=500
else:
back = 300
cg = asm.compactAssembling(verbose=False)
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=config['orgasm']['minlink'],
back=int(back),addConnectedLink=False)
fastaout = open(output+".fasta","w")
pathout = open(output+".path","w")
logger.info("Print the result as a fasta file")
c=1
path = config['path']['path']
logger.info('Built path : %s' % str(path))
fa = path2fasta(asm,cg,path,
identifier="Seq_%d" % c,
back=back,
minlink=config['orgasm']['minlink'],
logger=logger)
print(fa,file=fastaout)
print(" ".join([str(x) for x in path]),file=pathout)
print(cg.gml(),file=open(output +'.path.gml','w'))
'''
Created on 28 sept. 2014
@author: coissac
'''
import orgasm.samples
from orgasm import getOutput,getIndex, getSeeds, getAdapters
from orgasm.tango import matchtoseed, cutLowCoverage, cutSNPs,\
estimateDeadBrancheLength, estimateFragmentLength,\
genesincontig, scaffold, fillGaps, dumpGraph, restoreGraph
from orgasm.assembler import Assembler,tango
import sys
__title__="Build the set of seed reads"
default_config = { "reformat" : None
}
def addOptions(parser):
parser.add_argument(dest='orgasm:indexfilename', metavar='index',
help='index root filename (produced by the oa index command)')
parser.add_argument(dest='orgasm:outputfilename', metavar='output',
nargs='?',
default=None,
help='output prefix' )
parser.add_argument('--seeds', dest ='orgasm:seeds',
metavar='seeds',
action='append',
default=[],
type=str,
help='protein or nucleic seeds; either a fasta file containing '
'seed sequences or the name of one of the internal set of seeds '
'among %s' % str(list(filter(lambda s: s.startswith('prot') or
s.startswith('nuc'),dir(orgasm.samples)))))
parser.add_argument('--kup', dest='orgasm:kup',
type=int,
action='store',
default=None,
help='The word size used to identify the seed reads '
'[default: protein=4, DNA=12]')
parser.add_argument("--reformat",
dest="seeds:reformat",
action='store_true',
default=None,
help='Asks for reformatting an old sequence index to the new format'
)
def run(config):
logger=config['orgasm']['logger']
progress = config['orgasm']['progress']
output = getOutput(config)
logger.info("Looking for the seed reads")
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
logger.info('Coverage estimated from probe matches at : %d' % ecoverage)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment