Commit 8a6c8b59 by Eric Coissac

Adds the --force-scaffold to the unfold, unfoldrdna and path subcommand

parent 058ae863
......@@ -8,7 +8,7 @@ import sys
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, path2fasta
scaffold, path2fasta, parseFocedScaffold
__title__="Build a fasta file from a path across the assembling graph"
......@@ -35,6 +35,11 @@ def addOptions(parser):
help='A list of edge id separated by space add -- at the end of the path')
parser.add_argument('--force-scaffold', dest='unfold:fscaffold',
action='append',
default=None,
help='Force circular unfolding')
parser.add_argument('--back', dest='orgasm:back',
metavar='<insert size>',
type=int,
......@@ -58,8 +63,10 @@ def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
forcedscaffold = parseFocedScaffold(config['unfold']['fscaffold'])
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
ecoverage,x,newprobes = getSeeds(r,config) # @UnusedVariable
asm = restoreGraph(output+'.oax',r,x)
......@@ -83,8 +90,12 @@ def run(config):
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=config['orgasm']['minlink'],
back=int(back),addConnectedLink=False,
scaffold(asm,
cg,
minlink=config['orgasm']['minlink'],
back=int(back),
addConnectedLink=False,
forcedLink=forcedscaffold,
logger=logger)
# fastaout = open(output+".fasta","w")
......
......@@ -64,13 +64,12 @@ def addOptions(parser):
def run(config):
logger=config['orgasm']['logger']
progress = config['orgasm']['progress']
output = getOutput(config)
output = getOutput(config) # @UnusedVariable
logger.info("Looking for the seed reads")
r = getIndex(config)
ecoverage,x,newprobes = getSeeds(r,config)
ecoverage,x,newprobes = getSeeds(r,config) # @UnusedVariable
logger.info('Coverage estimated from probe matches at : %d' % ecoverage)
......@@ -7,13 +7,14 @@ import sys
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
pathConstraints, scaffold, selectGoodComponent, unfoldAssembling, path2fasta,\
path2sam
path2sam, parseFocedScaffold
__title__="Universal assembling graph unfolder"
default_config = { 'circular' : False,
'force' : False,
'format' : "fasta"
'format' : "fasta",
"fscaffold": []
}
def addOptions(parser):
......@@ -48,6 +49,11 @@ def addOptions(parser):
default=None,
help='Force circular unfolding')
parser.add_argument('--force-scaffold', dest='unfold:fscaffold',
action='append',
default=None,
help='Force circular unfolding')
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
......@@ -64,7 +70,6 @@ def addOptions(parser):
'to the header of the fasta sequences')
def run(config):
logger=config['orgasm']['logger']
......@@ -73,7 +78,9 @@ def run(config):
if config['unfold']['force']:
config['unfold']['circular']= True
forcedscaffold = parseFocedScaffold(config['unfold']['fscaffold'])
r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config) # @UnusedVariable
......@@ -103,7 +110,9 @@ def run(config):
minlink=config['orgasm']['minlink']
scaffold(asm,cg,minlink=minlink,
back=int(back),addConnectedLink=False,
back=int(back),
addConnectedLink=False,
forcedLink=forcedscaffold,
logger=logger)
constraints = pathConstraints(asm,cg,back=int(back),minlink=minlink)
......
......@@ -7,7 +7,7 @@ import sys
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, path2fasta, unfoldmarker
scaffold, path2fasta, unfoldmarker, parseFocedScaffold
__title__="Assembling graph unfolder for the nuclear rDNA complex"
......@@ -24,6 +24,11 @@ def addOptions(parser):
help='output prefix' )
parser.add_argument('--force-scaffold', dest='unfold:fscaffold',
action='append',
default=None,
help='Force circular unfolding')
parser.add_argument('--back', dest='orgasm:back',
type=int,
action='store',
......@@ -67,8 +72,10 @@ def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
forcedscaffold = parseFocedScaffold(config['unfold']['fscaffold'])
r = getIndex(config)
coverage,x,newprobes = getSeeds(r,config)
coverage,x,newprobes = getSeeds(r,config) # @UnusedVariable
asm = restoreGraph(output+'.oax',r,x)
......@@ -94,7 +101,12 @@ def run(config):
genesincontig(cg,r,x)
scaffold(asm,cg,minlink=5,back=int(back),addConnectedLink=False,
scaffold(asm,
cg,
minlink=5,
back=int(back),
addConnectedLink=False,
forcedLink=forcedscaffold,
logger=logger)
fastaout = sys.stdout
......
......@@ -346,6 +346,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
'''
frglen,frglensd = estimateFragmentLength(self)
readsize=self.index.getReadSize()
if frglen is None:
logger.warning("Insert size cannot be estimated")
......@@ -420,7 +421,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
overlap = testOverlap(eiat[etid[e1]]['sequence'],
eiat[eiid[e2]]['sequence'],
eiat[eiid[e2]]['head'],
self.index.getReadSize())
readsize)
if overlap >= 0:
s.append(('o',et[e1][1],ei[e2][0],linkedby,etid[e1], eiid[e2],"#FF00FF",overlap,0,[],first,last))
logOrPrint(logger,
......@@ -446,7 +447,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
attr = assgraph.addEdge(x,y)
if kind=="s":
glengths = [frglen - i - self.index.getReadSize()
glengths = [frglen - i - 2 * readsize
for i in delta]
pglengths = [i for i in glengths if i >=0]
if len(pglengths) > 1:
......@@ -464,7 +465,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
attr['gappairs']=len(glengths)
attr['gaplength']=int(glength)
attr['gapsd']=int(math.sqrt(frglensd**2+glengthsd**2))
attr['gapdeltas']=[frglen - i - self.index.getReadSize() for i in delta]
attr['gapdeltas']=[frglen - i - 2 * readsize for i in delta]
attr['pairendlink']=z
attr['ingene']=0
attr['link']=(s1,s2)
......@@ -494,7 +495,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
attr['path']=[]
attr['class']='scaffold:overlap'
elif kind=="f":
glengths = [frglen - i - self.index.getReadSize()
glengths = [frglen - i - 2 * readsize
for i in delta]
pglengths = [i for i in glengths if i >=0]
......@@ -513,7 +514,7 @@ def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False,forcedLink=
attr['gappairs']=len(glengths)
attr['gaplength']=int(glength)
attr['gapsd']=int(math.sqrt(frglensd**2+glengthsd**2))
attr['gapdeltas']=[frglen - i - self.index.getReadSize() for i in delta]
attr['gapdeltas']=[frglen - i - readsize for i in delta]
attr['pairendlink']=z
attr['ingene']=0
attr['link']=(s1,s2)
......@@ -2397,3 +2398,10 @@ def stem2fasta(self):
return '\n'.join(seqs)
def parseFocedScaffold(parametters):
f = [x.split(":") for x in parametters]
f = set((int(x[0]),int(x[1])) for x in f if len(x)==2)
r = set((-x[1],-x[0]) for x in f)
return f | r
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment