Commit 7fc8d22d by Eric Coissac

--no commit message

parent f081c832
In [110]: r = Index('../../samples/AA')
In [111]: r = Index('../../samples/AA')
Loading global data...
......@@ -26,8 +26,6 @@ Fast indexing reverse reads...
Done.
In [112]: p = fasta('../../samples/arabidopsis.chloro.prot.fasta')
In [113]: p.keys()
Out[113]:
['petD', 'petG', 'psbN', 'petA', 'rpl2', 'petB', 'petL',
......@@ -43,7 +41,17 @@ Out[113]:
'ndhJ', 'rps4', 'rps3', 'rps2', 'psbL', 'rps8', 'ndhH',
'psbZ']
In [114]: gp = ['rbcL','matK','psbA','ndhA','rpoA','atpA']
In [113]: gp = ['rbcL','matK','ndhA',
'rpoA','atpA','atpH',
'rpoA','atpB','psbN',
'psbT','psbB','psbD',
'psbE','psbF','psbA',
'psbC','psbM','psbI',
'psbH','psbK','psbJ',
'psbL','psbZ']
In [114]: p2=dict(x for x in protChloroArabidopsis.items() if x[0] in gp)
In [115]: p2=dict(x for x in p.items() if x[0] in gp)
......
......@@ -2,6 +2,7 @@ from orgasm.indexer._orgasm cimport *
from _asmbgraph cimport *
from orgasm.graph._graphmultiedge cimport *
from orgasm.utils._progress cimport progressBar
import math
import sys
......@@ -285,7 +286,7 @@ cdef class Assembler:
yield first,last,lseq,sequence,path
def compactAssembling(self):
def compactAssembling(self, bint verbose=True):
cdef DiGraph graph = DiGraphMultiEdge('compact')
cdef int n=0
cdef int x
......@@ -293,6 +294,7 @@ cdef class Assembler:
cdef int first
cdef int last
cdef int lseq
cdef int lgraph
cdef bytes sequence
cdef dict attr
cdef double weight
......@@ -309,9 +311,13 @@ cdef class Assembler:
def is_junction(int node):
return len(sons(node)) > 1 or len(parents(node)) > 1 or len(parents(node))==0
print >>sys.stderr,b"Compacting graph :"
lgraph=len(self._graph)
if verbose:
print >>sys.stderr,b"Compacting graph :"
else:
progressBar(1,lgraph,True,b"Compacting graph")
minweight=1000000.
......@@ -344,9 +350,10 @@ cdef class Assembler:
if weight < minweight:
minweight=weight
print >>sys.stderr,b" Stem %9d : %6d bp (total : %6d) coverage : %6.2f" % (stemid,lseq,lcontig,weight * readSize)
if verbose:
print >>sys.stderr,b" Stem %9d : %6d bp (total : %6d) coverage : %6.2f" % (stemid,lseq,lcontig,weight * readSize)
else:
progressBar(lcontig,lgraph,False,b"Compacting graph")
if lseq > 10:
attr[b'label']=b"%d : %s->(%d)->%s [%d]" % (stemid,sequence[0:5],lseq,sequence[-5:],attr[b'weight'])
......
......@@ -346,13 +346,16 @@ def consensus(aln,index,covmin=5):
coverage = [i[1] for i in seq]
seq = ''.join([i[0] for i in seq])
i=0
while coverage[i] < covmin:
lseq=len(seq)
while i < lseq and coverage[i] < covmin:
i+=1
j=len(coverage)-1
while coverage[j] < covmin:
while j >= 0 and coverage[j] < covmin:
j-=1
if i < j:
seq = seq[i:j+1]
else:
seq = ""
return seq
......@@ -34,6 +34,9 @@ cpdef object progressBar(object pos,
if reset:
del delta[:]
step[0]=1
step[1]=0
step[2]=0
if not delta:
delta.append(clock())
delta.append(clock())
......
......@@ -20,7 +20,7 @@ import pickle
#p = fasta('../../samples/arabidopsis.chloro.prot.fasta')
prefix="Cardru"
prefix="Coespi"
gp = ['rbcL','matK','ndhA',
'rpoA','atpA','atpH',
'rpoA','atpB','psbN',
......@@ -34,21 +34,21 @@ p2=dict(x for x in protChloroArabidopsis.items() if x[0] in gp)
r = Index('../../samples/'+prefix)
m = r.lookForSeeds(p2)
pickle.dump(m,open(prefix+'.match.omx',"w"))
%ppickle.dump(m,open(prefix+'.match.omx',"w"))
s = matchtoseed(m,r)
asm = Assembler(r)
a = tango(asm,s,mincov=1,minread=15,minoverlap=40,maxjump=0,cycle=1)
a = tango(asm,s,mincov=1,minread=10,minoverlap=40,maxjump=0,cycle=1)
asm.cleanDeadBranches(maxlength=10)
score,length,coverage = coverageEstimate(asm)
cutLowCoverage(asm,10,terminal=False)
while cutLowCoverage(asm,int(coverage/3),terminal=False):
pass
score,length,coverage = coverageEstimate(asm)
cutLowCoverage(asm,int(coverage/3),terminal=False)
while fillGaps(asm,back=300,minread=15,maxjump=0,minoverlap=40,cmincov=4,gmincov=int(coverage/3)):
while fillGaps(asm,back=300,minread=10,maxjump=0,minoverlap=40,cmincov=2,gmincov=int(coverage/3)) > 0:
pass
cg = asm.compactAssembling()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment