Commit b6f5096c by Eric Coissac

Patch some bugs and switch to version alpha9

parent b1e6c937
1.0.00-alpha7
\ No newline at end of file
1.0.00-alpha9
\ No newline at end of file
......@@ -15,4 +15,4 @@ which manage the assembling process.
from ._assembler import Assembler # @UnresolvedImport
from ._assembler import buildstem # @UnresolvedImport
from ._tango import tango,getusedreads # @UnresolvedImport
from ._tango import tango,getusedreads,resetusedreads # @UnresolvedImport
......@@ -8,5 +8,5 @@ cdef class AsmbGraph(DiGraph):
cpdef dict addNode(self,int node)
cpdef dict addEdge(self,int node1, int node2)
cpdef dict getNodeAttr(self, int node)
#def dict getNodeAttr(self, int node)
......@@ -158,7 +158,7 @@ cdef class AsmbGraph(DiGraph):
DiGraph.deleteNode(self, node)
DiGraph.deleteNode(self, -node)
cpdef dict getNodeAttr(self, int node):
def getNodeAttr(self, int node):
cdef int inode
try:
inode = self._readIndex.getIds(node)[0]
......
......@@ -172,8 +172,11 @@ cdef int weight(Assembler assembler,
for x in path:
if iabs(x) < nodemax:
nattr = graph.getNodeAttr(x)
weight+=nattr.get('coverage',0.0)
try:
nattr = graph.getNodeAttr(x)
weight+=nattr.get('coverage',0.0)
except KeyError:
pass
weight/= len(path)
......
......@@ -39,6 +39,9 @@ cdef set __used_reads__ = set()
cpdef set getusedreads():
return __used_reads__
cpdef resetusedreads():
__used_reads__.clear()
cpdef AsmbGraph tango(Assembler self,
list seeds,
int minread=10, float minratio=0.1,
......
......@@ -11,7 +11,7 @@ from orgasm.tango import matchtoseed, cutLowCoverage, cutSNPs,\
estimateDeadBrancheLength, coverageEstimate, estimateFragmentLength,\
genesincontig, scaffold, fillGaps, dumpGraph, restoreGraph
from orgasm.assembler import Assembler,tango
from orgasm.assembler import Assembler,tango,resetusedreads
import sys
import os.path
......@@ -214,12 +214,12 @@ def addOptions(parser):
def estimateMinRead(index,minoverlap,coverage,minreadcor):
def estimateMinRead(index,minoverlap,coverage):
MINREAD=10
MINOVERLAP=50
minread = (index.getReadSize() - minoverlap) * coverage / index.getReadSize() / minreadcor
minread = (index.getReadSize() - minoverlap) * coverage / index.getReadSize()
if minread < MINREAD:
minoverlap = index.getReadSize() - (MINREAD * minreadcor * index.getReadSize() / coverage)
minoverlap = index.getReadSize() - (MINREAD * index.getReadSize() / coverage)
minread = MINREAD
if minoverlap< MINOVERLAP:
minread = MINREAD
......@@ -238,7 +238,8 @@ def run(config):
assmax = config['buildgraph']['assmax']*2
covratio = config['buildgraph']['covratio']
covratiofill = config['buildgraph']['covratiofill']
mincov = config['buildgraph']['mincov']
......@@ -260,6 +261,7 @@ def run(config):
else:
coverage = ecoverage
mincoverage = coverage // covratio
if config['buildgraph']['clean'] or not os.path.exists("%s.oax" % output):
......@@ -285,8 +287,7 @@ def run(config):
if coverageset:
logger.info('Coverage forced by user at : %d' % coverage)
minread,minoverlap = estimateMinRead(r,config['buildgraph']['minoverlap'],
coverage,
covratio)
mincoverage)
else:
##########################
......@@ -299,8 +300,7 @@ def run(config):
if config['buildgraph']['minread'] is None:
minread,minoverlap = estimateMinRead(r,config['buildgraph']['minoverlap'],
coverage,
covratio)
mincoverage)
minread//=4
if minread<5:
minread=5
......@@ -308,9 +308,11 @@ def run(config):
logger.info('Assembling of %d pb for estimating actual coverage' % config['buildgraph']['testrun'])
s = matchtoseed(x,r,newprobes)
#logger.info("minread = %d mincov = %d coverage = %d %d" % (minread,mincov,coverage,minoverlap))
# Run the first assembling pass
a = tango(asm,s,mincov=minread, # @UnusedVariable
a = tango(asm,s,mincov=mincov, # @UnusedVariable
minread=minread,
minoverlap=minoverlap,
lowfilter=lowfilter,
......@@ -329,7 +331,7 @@ def run(config):
if coverageset:
cutLowCoverage(asm,int(coverage),terminal=True)
else:
cutLowCoverage(asm,int(coverage/4),terminal=True)
cutLowCoverage(asm,int(mincoverage),terminal=True)
if snp:
cutSNPs(asm)
......@@ -347,11 +349,12 @@ def run(config):
score,length,ecoverage = coverageEstimate(asm,x,r) # @UnusedVariable
if not coverageset:
coverage = ecoverage
mincoverage = coverage // covratio
minread,minoverlap = estimateMinRead(r,config['buildgraph']['minoverlap'],
coverage,
covratio)
mincoverage)
logger.info("coverage estimated : %dx based on %d bp (minread: %d)" %(coverage,length/2,minread))
else:
logger.error('Nothing assembled - Assembling aborted')
sys.exit(1)
......@@ -385,8 +388,12 @@ def run(config):
# If no coverage specified coverage = estimated coverage
if not coverageset:
coverage = ecoverage
cutLowCoverage(asm,int(coverage/4),terminal=True)
coverage = ecoverage
mincoverage = coverage // covratio
if not coverageset:
cutLowCoverage(asm,int(mincoverage),terminal=True)
else:
cutLowCoverage(asm,int(coverage),terminal=True)
......@@ -411,6 +418,8 @@ def run(config):
if not coverageset:
coverage = ecoverage
mincoverage = coverage // covratio
# cleanup snp bubble in the graph
if snp:
cutSNPs(asm)
......@@ -419,15 +428,13 @@ def run(config):
if config['buildgraph']['minread'] is None:
minread,minoverlap = estimateMinRead(r,config['buildgraph']['minoverlap'],
coverage,
covratio)
mincoverage)
if minread<5:
minread=5
else:
minread,minoverlap = estimateMinRead(r,config['buildgraph']['minoverlap'],
coverage,
covratio)
mincoverage)
minread=config['buildgraph']['minread']
......@@ -466,11 +473,14 @@ def run(config):
# We now run the main assembling process
#
#############################################
resetusedreads()
logger.info('Starting the assembling')
# logger.info("minread = %d mincov = %d coverage = %d %d" % (minread,mincov,coverage,minoverlap))
# Run the first assembling pass
a = tango(asm,s,mincov=coverage/4, #@UnusedVariable
a = tango(asm,s,mincov=mincov, #@UnusedVariable
minread=minread,
minoverlap=minoverlap,
lowfilter=lowfilter,
......@@ -489,7 +499,7 @@ def run(config):
if coverageset:
cutLowCoverage(asm,int(coverage),terminal=True)
else:
cutLowCoverage(asm,int(coverage/4),terminal=True)
cutLowCoverage(asm,int(mincoverage),terminal=True)
# cleanup snp bubble in the graph
......@@ -515,6 +525,8 @@ def run(config):
if not coverageset:
coverage = ecoverage
mincoverage = coverage // covratio
# if coverage < 30:
# sys.exit()
......@@ -559,11 +571,12 @@ def run(config):
#
###################################################
mincoverage = coverage // covratiofill
if config['buildgraph']['minread'] is None:
logger.info("Estimating minread")
minread,minoverlap = estimateMinRead(r,config['buildgraph']['minoverlap'],
coverage,
covratiofill)
mincoverage)
if minread<5:
minread=5
......@@ -584,8 +597,8 @@ def run(config):
maxjump=0,
minoverlap=minoverlap,
cmincov=1,
emincov=int(coverage/4),
gmincov=int(coverage/4),
emincov=mincoverage,
gmincov=mincoverage,
lowfilter=lowfilter,
adapters5 = adapterSeq5,
adapters3 = adapterSeq3,
......@@ -655,9 +668,9 @@ def run(config):
if coverageset:
cutLowCoverage(asm,int(coverage),terminal=False)
else:
cutLowCoverage(asm,int(coverage/2),terminal=True)
cutLowCoverage(asm,int(mincoverage),terminal=True)
logger.info("Clean low coverage internal branches")
cutLowCoverage(asm,int(coverage/3),terminal=False)
cutLowCoverage(asm,int(mincoverage),terminal=False)
logger.info("Saving the assembling graph")
dumpGraph(output+'.oax',asm)
......
......@@ -21,7 +21,8 @@ default_config = { 'circular' : False,
"path" : None,
"contigs" : False,
'tags' : None,
'extension' : None
'extension' : None,
'1x-coverage' : None
}
def addOptions(parser):
......@@ -81,6 +82,12 @@ def addOptions(parser):
help='the number of bases taken at the end of '
'contigs to jump with paired-ends [default: <estimated>]')
parser.add_argument('--1x-coverage', dest='unfold:1x-coverage',
type=int,
action='store',
default=None,
help='The expected coverage for not duplicated regions [default: <estimated>]')
parser.add_argument('--set-tag','-S', dest ='unfold:tags',
metavar='tag',
action='append',
......@@ -112,6 +119,7 @@ def buildPath(asm,compactgraph,back,minlink,config):
constraints=constraints,
circular=config['unfold']['circular'],
force=config['unfold']['force'],
cov1x=config['unfold']['1x-coverage'],
logger=logger)
paths.append(path[-1][0])
......
......@@ -1850,6 +1850,7 @@ def unfoldAssembling(self,assgraph,constraints=None,
minlink=5, limitSize=0,
circular=False,
force=False,
cov1x=None,
logger=None):
'''
......@@ -1928,8 +1929,12 @@ def unfoldAssembling(self,assgraph,constraints=None,
# - [3] the weight of the first stem in the path
# - [4] the total length of the path
covone=oneXcoverage(assgraph)
logOrPrint(logger,"Coverage 1x = %d" % covone)
if cov1x is None:
covone=oneXcoverage(assgraph)
logOrPrint(logger,"Coverage 1x estimated = %d" % covone)
else:
covone=cov1x
logOrPrint(logger,"Coverage set by user 1x = %d" % covone)
# paths = [[dict(weight),0,(i,),0,0] for i in si]
paths = [[dict(weight),0,(i,),covone,0] for i in si]
......
major = 1
minor = '0'
serial= '00-alpha7'
serial= '00-alpha9'
version = "%d.%s.%s" % (major,minor,serial)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment