Commit 7042f912 by Eric Coissac

Swith the tango function to cython in the assembler submodule

parent 8d076d3e
......@@ -15,3 +15,4 @@ which manage the assembling process.
from ._assembler import Assembler
from ._assembler import buildstem
from ._tango import tango
......@@ -163,7 +163,7 @@ cdef class AsmbGraph(DiGraph):
try:
inode = self._readIndex.getIds(node)[0]
except IndexError as e:
raise IndexError("%s --> index requested %d" % (e.args[0],node))
inode = node
return DiGraph.getNodeAttr(self, inode)
......
......@@ -17,9 +17,14 @@ import sys
from cpython.sequence cimport PySequence_GetSlice
from cpython.bytes cimport PyBytes_GET_SIZE
def cmp(a, b):
return (a > b) - (a < b)
cdef int iabs(int x):
return x if x > 0 else -x
@cython.nonecheck(True)
cdef tuple findDeepestRead(Index index, bytes seed):
cdef list frg
......@@ -115,17 +120,25 @@ cdef bint is_junction(AsmbGraph graph,int node): # @Duplicated
or len(parents(graph,node)) != 1
)
cdef tuple normalizePath(list path):
cdef tuple normalizePath(list path, bint *direction):
cdef int c = path[0]
cdef int r = -path[-1]
cdef bint d = c < r
cdef tuple cpath
# TODO: verify the property : if more than two nodes are reverse complement then the stem is palindromic
if c==r:
c = path[1]
r = -path[-2]
d = c < r
if d:
cpath=tuple(path)
else:
cpath=tuple(-i for i in reversed(path))
direction[0]=d
return cpath
cdef bint isPalindrome(list path):
......@@ -148,10 +161,12 @@ cdef int weight(Assembler assembler,
cdef int x
cdef AsmbGraph graph = assembler._graph
cdef dict nattr
cdef int nodemax = len(assembler._index) +1
for x in path:
nattr = graph.getNodeAttr(x)
weight+=nattr.get('coverage',0.0)
if iabs(x) < nodemax:
nattr = graph.getNodeAttr(x)
weight+=nattr.get('coverage',0.0)
weight/= len(path)
......@@ -234,7 +249,6 @@ cdef class CompactAssembling(DiGraphMultiEdge):
cdef double minweight
cdef AsmbGraph graph
cdef dict stem
cdef list stems
DiGraphMultiEdge.__init__(self,'compact')
......@@ -254,8 +268,7 @@ cdef class CompactAssembling(DiGraphMultiEdge):
minweight=1000000.
stems = list(StemIterator(assembler))
for stem in stems:
for stem in StemIterator(assembler):
self.addStem(stem)
lcontig+=stem['length']
weight = stem['weight']
......@@ -291,8 +304,9 @@ cdef class CompactAssembling(DiGraphMultiEdge):
cdef int eid
cdef int first = stem['first']
cdef int last = stem['last']
cdef bint d
cpath = normalizePath(stem['path'])
cpath = normalizePath(stem['path'],&d)
self._paths[cpath] = self._paths.get(cpath,0) + 1
self._stemidOk=False
......@@ -323,9 +337,9 @@ cdef class CompactAssembling(DiGraphMultiEdge):
cpdef deleteEdge(self, int node1, int node2, int edge=-1):
cdef dict stem
cdef bint d
stem = self.getEdgeAttr(node1,node2,edge)
cpath = normalizePath(stem['path'])
cpath = normalizePath(stem['path'],&d)
self._paths[cpath]-=1
if self._paths[cpath]==0:
del self._paths[cpath]
......@@ -337,14 +351,11 @@ cdef class CompactAssembling(DiGraphMultiEdge):
cdef int getStemid(self, dict stem) except 0:
cdef list path = stem['path']
cdef int c = path[0]
cdef int r = -path[-1]
cdef bint d = c < r
cdef bint d
cdef tuple cpath
cdef int stemid
cdef list sstems
if not self._stemidOk:
sstems=list(self._paths)
sstems.sort()
......@@ -352,7 +363,7 @@ cdef class CompactAssembling(DiGraphMultiEdge):
self._stemid[sstems[i]]=i+1
self._stemidOk=True
cpath = normalizePath(path)
cpath = normalizePath(path,&d)
stemid = self._stemid[cpath]
if not d:
......@@ -400,19 +411,12 @@ cdef class StemIterator:
cdef dict stem
cdef int lcontig
cdef AsmbGraph graph = self._graph
cdef set junctions = set(graph.nodeIterator(lambda x: is_junction(graph,x)))
cdef set junctions = set(int(y) for y in graph.nodeIterator(lambda x: is_junction(graph,x)))
pjunction=set(x for x in junctions if x > 0)
njunction=set(-x for x in junctions if x < 0)
print("paired junctions : p %s n %s " % ((pjunction - njunction),(njunction - pjunction)))
print("\nnb junctions (in) = %d" % len(junctions),file=sys.stderr)
nn=0
for first in junctions:
nn+=1
branches=sons(graph,first)
nnn=0
for n in branches:
nnn+=1
path = [first,n]
brothers=sons(graph,n)
lsequence=[graph.getEdgeAttr(first,n)['ext']]
......
../../../src/orgasm.h
../../../fiboheap/fibo.h
......@@ -7,10 +7,11 @@ Created on 28 sept. 2014
import orgasm.samples
from orgasm import getIndex, getProbes, getSeeds, getAdapters
from orgasm.tango import matchtoseed, tango, cutLowCoverage, cutSNPs,\
from orgasm.tango import matchtoseed, cutLowCoverage, cutSNPs,\
estimateDeadBrancheLength, coverageEstimate, estimateFragmentLength,\
genesincontig, scaffold, fillGaps, dumpGraph, restoreGraph
from orgasm.assembler import Assembler
from orgasm.assembler import Assembler,tango
import sys
......@@ -266,17 +267,9 @@ def run(config):
progress=progress,
logger=logger)
cg = asm.compactAssembling()
print(cg.gml(),file=open('raw.gml','w'))
# Clean small unsuccessful extensions
asm.cleanDeadBranches(maxlength=10)
cg = asm.compactAssembling()
print(cg.gml(),file=open('clean.gml','w'))
# and too low covered assembling
if coverageset:
cutLowCoverage(asm,int(coverage),terminal=True)
......
......@@ -45,14 +45,14 @@ cdef class Index:
cpdef lookForSeeds(self, dict proteins, int kup=?, int mincov=?, object logger=?)
cdef c_array.array lookForString(self, bytes word)
cpdef dict checkedExtensions(self, bytes probe,
tuple adapters5=?,
tuple adapters3=?,
int minread=?,
cpdef dict checkedExtensions(self, bytes probe,
tuple adapters5=?,
tuple adapters3=?,
int minread=?,
double minratio=?,
int mincov=?,
int minoverlap=?,
int extlength=?,
bint lowfilter=?,
int mincov=?,
int minoverlap=?,
int extlength=?,
bint lowfilter=?,
object restrict=?,
bint exact=?)
bint exact=?)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment