Commit 17e6a496 by Eric Coissac

Adds possibility to remove edges based on the count of matches with

seeds
parent 76a2b9c9
......@@ -5,12 +5,14 @@ Created on 28 sept. 2014
'''
from orgasm import getOutput,getIndex, getSeeds
from orgasm.tango import restoreGraph, estimateFragmentLength, genesincontig,\
scaffold, cutLowCoverage, estimateDeadBrancheLength, dumpGraph
scaffold, cutLowCoverage, estimateDeadBrancheLength, dumpGraph, cutLowSeeds
import sys
__title__="Cut low coverage edge in an assembling graph"
default_config = { 'coverage' : None,
'smallbranches' : None
'smallbranches' : None,
'lowseeds' : None
}
def addOptions(parser):
......@@ -24,7 +26,12 @@ def addOptions(parser):
parser.add_argument('--coverage', dest='cutlow:coverage',
required=True,
type=int,
action='store',
default=None,
help='All edges with a coverage below this value will be deleted')
parser.add_argument('--seeds', dest='cutlow:lowseeds',
type=int,
action='store',
default=None,
......@@ -50,6 +57,12 @@ def run(config):
logger=config['orgasm']['logger']
output = getOutput(config)
if (config['cutlow']['coverage'] is None and
config['cutlow']['lowseeds'] is None
):
logger.error("No threshold specified")
sys.exit(1)
r = getIndex(config)
xxx,x,newprobes = getSeeds(r,config)
......@@ -74,7 +87,11 @@ def run(config):
logger.info("Cut low coverage")
cutLowCoverage(asm,config['cutlow']['coverage'],terminal=False)
if config['cutlow']['coverage'] is not None:
cutLowCoverage(asm,config['cutlow']['coverage'],terminal=False)
if config['cutlow']['lowseeds'] is not None:
cutLowSeeds(asm,config['cutlow']['lowseeds'],x,terminal=False)
if config['cutlow']['smallbranches'] is not None:
smallbranches = config['cutlow']['smallbranches']
......
......@@ -172,6 +172,148 @@ def cutLowCoverage(self,mincov,terminal=True):
return ilength - len(self)
def cutLowSeeds(self,minseeds,seeds,terminal=True):
'''
Remove sequences in the assembling graph with a coverage below ``mincov``.
.. code-block :: ipython
In [159]: asm = Assembler(r)
In [160]: s = matchtoseed(m,r)
In [161]: a = tango(asm,s,mincov=1,minread=10,minoverlap=30,maxjump=0,cycle=1)
In [162]: asm.cleanDeadBranches(maxlength=10)
Remaining edges : 424216 node : 423896
Out[162]: 34821
In [162]: cutLowCoverage(asm,10,terminal=False)
:param mincov: coverage threshold
:type mincov: :py:class:`int`
:param terminal: if set to ``True`` only terminal edges are removed from the assembling graph
:type terminal: :py:class:`bool`
:return: the count of deleted node
:rtype: :py:class:`int`
:seealso: :py:meth:`~orgasm.assambler.Assembler.cleanDeadBranches`
'''
def isTerminal(g,n):
return len(list(g.parentIterator(n)))==0 or len(list(g.neighbourIterator(n)))==0
def endnodeset(g):
return set(g.nodeIterator(predicate = lambda n : (len(list(g.neighbourIterator(n)))==0)))
def startnodeset(g):
return set(g.nodeIterator(predicate = lambda n : (len(list(g.parentIterator(n)))==0)))
ontty = sys.stderr.isatty()
if terminal:
tstates=[True]
else:
tstates=[True,False]
index = self.index
readSize = index.getReadSize()
ilength=len(self)
cg = self.compactAssembling(verbose=False)
genesincontig(cg,index,seeds)
for terminal in tstates:
print('',file=sys.stderr)
if terminal:
print("Deleting terminal branches",file=sys.stderr)
else:
print("Deleting internal branches",file=sys.stderr)
extremities = endnodeset(cg) | startnodeset(cg)
go = True
while go:
go = False
stems = [x for x in cg.edgeIterator()
if (not terminal
or (isTerminal(cg, x[0]) or isTerminal(cg, x[1])))
and 'ingene' in cg.getEdgeAttr(*x)
]
if stems:
stems.sort(key=lambda i:cg.getEdgeAttr(*i)['ingene'],reverse=True)
lightest = stems.pop()
lattr = cg.getEdgeAttr(*lightest)
if lattr['ingene'] < minseeds:
if stems:
go=True
for n in lattr['path'][1:-1]:
if n in self.graph:
try:
del self.graph[n]
except KeyError:
pass
if lightest[0] in extremities and lightest[0] in self.graph:
del self.graph[lightest[0]]
if lightest[1] in extremities and lightest[1] in self.graph:
del self.graph[lightest[1]]
if ontty:
print("Remaining edges : %d node : %d\r" % (self.graph.edgeCount(),len(self)),
end='\r',
file=sys.stderr)
cg.deleteEdge(*lightest)
tojoin=[]
if lightest[0] in extremities:
del cg[lightest[0]]
else:
tojoin.append(lightest[0])
if lightest[1] in extremities:
del cg[lightest[1]]
else:
tojoin.append(lightest[1])
# print >>sys.stderr,lightest[0] in extremities,lightest[1] in extremities,tojoin
for c in tojoin:
if c in cg:
begin = list(cg.parentIterator(c))
end = list(cg.neighbourIterator(c))
if len(begin)==1 and len(end)==1:
begin = begin[0]
end = end[0]
e1s = list(cg.edgeIterator(edgePredicate = lambda e:e[0]==begin and e[1]==c))
e2s = list(cg.edgeIterator(edgePredicate = lambda e:e[0]==c and e[1]==end))
if len(e1s)==1 and len(e2s)==1:
e1 = e1s[0]
e2 = e2s[0]
attr1 = cg.getEdgeAttr(*e1)
attr2 = cg.getEdgeAttr(*e2)
sequence=attr1['sequence'] + attr2['sequence']
path=attr1['path'] + attr2['path'][1:]
stem = buildstem(self,
begin,
end,
sequence,
path,
False)
stem['graphics']={'width':int(stem['weight']/readSize)+1,
'arrow':'last'}
cg.addStem(stem)
cg.deleteEdge(*e2)
cg.deleteEdge(*e1)
del cg[c]
if ontty:
print('',file=sys.stderr)
return ilength - len(self)
def cutSNPs(self,maxlength=500):
ontty = sys.stderr.isatty()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment