Commit 8a6c8b59 by Eric Coissac

Adds the --force-scaffold to the unfold, unfoldrdna and path subcommand

parent 058ae863
...@@ -8,7 +8,7 @@ import sys ...@@ -8,7 +8,7 @@ import sys
from orgasm import getOutput,getIndex, getSeeds from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\ from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, path2fasta scaffold, path2fasta, parseFocedScaffold
__title__="Build a fasta file from a path across the assembling graph" __title__="Build a fasta file from a path across the assembling graph"
...@@ -35,6 +35,11 @@ def addOptions(parser): ...@@ -35,6 +35,11 @@ def addOptions(parser):
help='A list of edge id separated by space add -- at the end of the path') help='A list of edge id separated by space add -- at the end of the path')
parser.add_argument('--force-scaffold', dest='unfold:fscaffold',
action='append',
default=None,
help='Force circular unfolding')
parser.add_argument('--back', dest='orgasm:back', parser.add_argument('--back', dest='orgasm:back',
metavar='<insert size>', metavar='<insert size>',
type=int, type=int,
...@@ -58,8 +63,10 @@ def run(config): ...@@ -58,8 +63,10 @@ def run(config):
logger=config['orgasm']['logger'] logger=config['orgasm']['logger']
output = getOutput(config) output = getOutput(config)
forcedscaffold = parseFocedScaffold(config['unfold']['fscaffold'])
r = getIndex(config) r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config) ecoverage,x,newprobes = getSeeds(r,config) # @UnusedVariable
asm = restoreGraph(output+'.oax',r,x) asm = restoreGraph(output+'.oax',r,x)
...@@ -83,8 +90,12 @@ def run(config): ...@@ -83,8 +90,12 @@ def run(config):
genesincontig(cg,r,x) genesincontig(cg,r,x)
scaffold(asm,cg,minlink=config['orgasm']['minlink'], scaffold(asm,
back=int(back),addConnectedLink=False, cg,
minlink=config['orgasm']['minlink'],
back=int(back),
addConnectedLink=False,
forcedLink=forcedscaffold,
logger=logger) logger=logger)
# fastaout = open(output+".fasta","w") # fastaout = open(output+".fasta","w")
......
...@@ -64,13 +64,12 @@ def addOptions(parser): ...@@ -64,13 +64,12 @@ def addOptions(parser):
def run(config): def run(config):
logger=config['orgasm']['logger'] logger=config['orgasm']['logger']
progress = config['orgasm']['progress'] output = getOutput(config) # @UnusedVariable
output = getOutput(config)
logger.info("Looking for the seed reads") logger.info("Looking for the seed reads")
r = getIndex(config) r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config) ecoverage,x,newprobes = getSeeds(r,config) # @UnusedVariable
logger.info('Coverage estimated from probe matches at : %d' % ecoverage) logger.info('Coverage estimated from probe matches at : %d' % ecoverage)
...@@ -7,13 +7,14 @@ import sys ...@@ -7,13 +7,14 @@ import sys
from orgasm import getOutput,getIndex, getSeeds from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\ from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
pathConstraints, scaffold, selectGoodComponent, unfoldAssembling, path2fasta,\ pathConstraints, scaffold, selectGoodComponent, unfoldAssembling, path2fasta,\
path2sam path2sam, parseFocedScaffold
__title__="Universal assembling graph unfolder" __title__="Universal assembling graph unfolder"
default_config = { 'circular' : False, default_config = { 'circular' : False,
'force' : False, 'force' : False,
'format' : "fasta" 'format' : "fasta",
"fscaffold": []
} }
def addOptions(parser): def addOptions(parser):
...@@ -48,6 +49,11 @@ def addOptions(parser): ...@@ -48,6 +49,11 @@ def addOptions(parser):
default=None, default=None,
help='Force circular unfolding') help='Force circular unfolding')
parser.add_argument('--force-scaffold', dest='unfold:fscaffold',
action='append',
default=None,
help='Force circular unfolding')
parser.add_argument('--back', dest='orgasm:back', parser.add_argument('--back', dest='orgasm:back',
type=int, type=int,
action='store', action='store',
...@@ -64,7 +70,6 @@ def addOptions(parser): ...@@ -64,7 +70,6 @@ def addOptions(parser):
'to the header of the fasta sequences') 'to the header of the fasta sequences')
def run(config): def run(config):
logger=config['orgasm']['logger'] logger=config['orgasm']['logger']
...@@ -73,7 +78,9 @@ def run(config): ...@@ -73,7 +78,9 @@ def run(config):
if config['unfold']['force']: if config['unfold']['force']:
config['unfold']['circular']= True config['unfold']['circular']= True
forcedscaffold = parseFocedScaffold(config['unfold']['fscaffold'])
r = getIndex(config) r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config) # @UnusedVariable coverage,x,newprobes = getSeeds(r,config) # @UnusedVariable
...@@ -103,7 +110,9 @@ def run(config): ...@@ -103,7 +110,9 @@ def run(config):
minlink=config['orgasm']['minlink'] minlink=config['orgasm']['minlink']
scaffold(asm,cg,minlink=minlink, scaffold(asm,cg,minlink=minlink,
back=int(back),addConnectedLink=False, back=int(back),
addConnectedLink=False,
forcedLink=forcedscaffold,
logger=logger) logger=logger)
constraints = pathConstraints(asm,cg,back=int(back),minlink=minlink) constraints = pathConstraints(asm,cg,back=int(back),minlink=minlink)
......
...@@ -7,7 +7,7 @@ import sys ...@@ -7,7 +7,7 @@ import sys
from orgasm import getOutput,getIndex, getSeeds from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\ from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, path2fasta, unfoldmarker scaffold, path2fasta, unfoldmarker, parseFocedScaffold
__title__="Assembling graph unfolder for the nuclear rDNA complex" __title__="Assembling graph unfolder for the nuclear rDNA complex"
...@@ -24,6 +24,11 @@ def addOptions(parser): ...@@ -24,6 +24,11 @@ def addOptions(parser):
help='output prefix' ) help='output prefix' )
parser.add_argument('--force-scaffold', dest='unfold:fscaffold',
action='append',
default=None,
help='Force circular unfolding')
parser.add_argument('--back', dest='orgasm:back', parser.add_argument('--back', dest='orgasm:back',
type=int, type=int,
action='store', action='store',
...@@ -67,8 +72,10 @@ def run(config): ...@@ -67,8 +72,10 @@ def run(config):
logger=config['orgasm']['logger'] logger=config['orgasm']['logger']
output = getOutput(config) output = getOutput(config)
forcedscaffold = parseFocedScaffold(config['unfold']['fscaffold'])
r = getIndex(config) r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config) coverage,x,newprobes = getSeeds(r,config) # @UnusedVariable
asm = restoreGraph(output+'.oax',r,x) asm = restoreGraph(output+'.oax',r,x)
...@@ -94,7 +101,12 @@ def run(config): ...@@ -94,7 +101,12 @@ def run(config):
genesincontig(cg,r,x) genesincontig(cg,r,x)
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False, scaffold(asm,
cg,
minlink=5,
back=int(back),
addConnectedLink=False,
forcedLink=forcedscaffold,
logger=logger) logger=logger)
fastaout = sys.stdout fastaout = sys.stdout
......
...@@ -346,6 +346,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink= ...@@ -346,6 +346,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
''' '''
frglen,frglensd = estimateFragmentLength(self) frglen,frglensd = estimateFragmentLength(self)
readsize=self.index.getReadSize()
if frglen is None: if frglen is None:
logger.warning("Insert size cannot be estimated") logger.warning("Insert size cannot be estimated")
...@@ -420,7 +421,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink= ...@@ -420,7 +421,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
overlap = testOverlap(eiat[etid[e1]]['sequence'], overlap = testOverlap(eiat[etid[e1]]['sequence'],
eiat[eiid[e2]]['sequence'], eiat[eiid[e2]]['sequence'],
eiat[eiid[e2]]['head'], eiat[eiid[e2]]['head'],
self.index.getReadSize()) readsize)
if overlap >= 0: if overlap >= 0:
s.append(('o',et[e1][1],ei[e2][0],linkedby,etid[e1], eiid[e2],"#FF00FF",overlap,0,[],first,last)) s.append(('o',et[e1][1],ei[e2][0],linkedby,etid[e1], eiid[e2],"#FF00FF",overlap,0,[],first,last))
logOrPrint(logger, logOrPrint(logger,
...@@ -446,7 +447,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink= ...@@ -446,7 +447,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
attr = assgraph.addEdge(x,y) attr = assgraph.addEdge(x,y)
if kind=="s": if kind=="s":
glengths = [frglen - i - self.index.getReadSize() glengths = [frglen - i - 2 * readsize
for i in delta] for i in delta]
pglengths = [i for i in glengths if i >=0] pglengths = [i for i in glengths if i >=0]
if len(pglengths) > 1: if len(pglengths) > 1:
...@@ -464,7 +465,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink= ...@@ -464,7 +465,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
attr['gappairs']=len(glengths) attr['gappairs']=len(glengths)
attr['gaplength']=int(glength) attr['gaplength']=int(glength)
attr['gapsd']=int(math.sqrt(frglensd**2+glengthsd**2)) attr['gapsd']=int(math.sqrt(frglensd**2+glengthsd**2))
attr['gapdeltas']=[frglen - i - self.index.getReadSize() for i in delta] attr['gapdeltas']=[frglen - i - 2 * readsize for i in delta]
attr['pairendlink']=z attr['pairendlink']=z
attr['ingene']=0 attr['ingene']=0
attr['link']=(s1,s2) attr['link']=(s1,s2)
...@@ -494,7 +495,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink= ...@@ -494,7 +495,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
attr['path']=[] attr['path']=[]
attr['class']='scaffold:overlap' attr['class']='scaffold:overlap'
elif kind=="f": elif kind=="f":
glengths = [frglen - i - self.index.getReadSize() glengths = [frglen - i - 2 * readsize
for i in delta] for i in delta]
pglengths = [i for i in glengths if i >=0] pglengths = [i for i in glengths if i >=0]
...@@ -513,7 +514,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink= ...@@ -513,7 +514,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
attr['gappairs']=len(glengths) attr['gappairs']=len(glengths)
attr['gaplength']=int(glength) attr['gaplength']=int(glength)
attr['gapsd']=int(math.sqrt(frglensd**2+glengthsd**2)) attr['gapsd']=int(math.sqrt(frglensd**2+glengthsd**2))
attr['gapdeltas']=[frglen - i - self.index.getReadSize() for i in delta] attr['gapdeltas']=[frglen - i - readsize for i in delta]
attr['pairendlink']=z attr['pairendlink']=z
attr['ingene']=0 attr['ingene']=0
attr['link']=(s1,s2) attr['link']=(s1,s2)
...@@ -2397,3 +2398,10 @@ def stem2fasta(self): ...@@ -2397,3 +2398,10 @@ def stem2fasta(self):
return '\n'.join(seqs) return '\n'.join(seqs)
def parseFocedScaffold(parametters):
f = [x.split(":") for x in parametters]
f = set((int(x[0]),int(x[1])) for x in f if len(x)==2)
r = set((-x[1],-x[0]) for x in f)
return f | r
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment