Commit c3beb1a2 by Eric Coissac

Patch the evaluation of the coverage from seed matches.

Adds a --identity option to the buildgraph command
parent 2f5fe190
......@@ -31,6 +31,7 @@ default_config = {
'seeds' : None,
'seedmincov' : 1,
'kup' : None,
'identity' : 0.5,
'version' : False,
'progress' : True
}
......
......@@ -100,6 +100,7 @@ def getSeeds(index,config,extension=".omx"):
seeds = index.lookForSeeds(p,
mincov=config['orgasm']['seedmincov'],
kup=kup,
identity=config['orgasm']['identity'],
logger=logger)
probes[probename][1]=seeds
......@@ -133,10 +134,10 @@ def getSeeds(index,config,extension=".omx"):
p = probes[probename][0]
s = probes[probename][1]
nuc = all([isDNA(k) for k in p.values()])
#print(s[list(s.keys())[0]])
nbmatch = [(k,
len(s[k]),
len(s[k])*index.getReadSize() / len(p[k]) / (1 if nuc else 3)) for k in s]
sum(x[2] for x in s[k]),
sum(x[2] for x in s[k])*index.getReadSize() / len(p[k]) / (1 if nuc else 3)) for k in s]
nbmatch.sort(key=lambda x:-x[2])
......
......@@ -359,17 +359,6 @@ cpdef AsmbGraph tango(Assembler self,
if PyList_GET_SIZE(lastseeds) > 1000:
lastseeds=lastseeds[-1000:]
# if addedseed==0 and maxjump > 0 and restrict is None:
# if maxanchor < maxjump and readid < readmax and readid in graph:
# maxanchor+=1
# seeds.append((0,(None,)))
# anchors = list(map(isanchors,index.getIds(readid)[2]))
#
# print ("\n\nJumpGap on read %d\n" % readid,file=sys.stderr)
# for a in anchors:
# if a not in graph:
# seeds.append((-a,(None,)))
return graph
......@@ -142,6 +142,13 @@ def addOptions(parser):
help='The word size used to identify the seed reads '
'[default: protein=4, DNA=12]')
parser.add_argument('--identity', dest='orgasm:identity',
type=float,
action='store',
default=0.5,
help='The fraction of word'
'[default: 0.5]')
def estimateMinRead(index,minoverlap,coverage):
MINREAD=10
......
......@@ -42,7 +42,10 @@ cdef class Index:
cpdef tuple getIds(self, int32_t id)
cpdef int32_t getPairedRead(self, int32_t id)
cpdef tuple normalizedPairedEndsReads(self, int32_t id)
cpdef dict lookForSeeds(self, dict proteins, int kup=?, int mincov=?, object logger=?)
cpdef dict lookForSeeds(self, dict sequences,
int kup=?, int mincov=?,
float identity=?,
object logger=?)
cdef c_array.array lookForString(self, bytes word)
cpdef dict checkedExtensions(self, bytes probe,
......
# cython: language_level=3
from asyncio.log import logger
cimport cython
from ._orgasm cimport *
......@@ -349,7 +350,10 @@ cdef class Index:
return nid,paired
cpdef dict lookForSeeds(self, dict sequences, int kup=-1, int mincov=1,object logger=None):
cpdef dict lookForSeeds(self, dict sequences,
int kup=-1, int mincov=1,
float identity=0.5,
object logger=None):
cdef AhoCorasick patterns
cdef dict matches
......@@ -376,8 +380,9 @@ cdef class Index:
patterns.finalize()
#minmatch = 50 if nuc else 15
minmatch = int(self.getReadSize() // (2 if nuc else 6))
minmatch = int((self.getReadSize() // (1 if nuc else 3)) * identity)
logger.info('Minimum word matches = %d ' % minmatch)
matches = patterns.scanIndex(self,minmatch,-1,mincov)
return matches
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment