Commit 8bda6f34 by Eric Coissac

--no commit message

parent 62a86bd8
......@@ -11,6 +11,7 @@ Contents:
.. toctree::
:maxdepth: 2
Algorithms <algorithms>
Assembling a mitochondrion genome <mitochondrion>
User API <userapi>
The organelle assembler indexer <orgasmi>
......
import pickle
import sys
from backtranslate.fasta import fasta
import samples
def getProbes(config):
try:
p = fasta(config.seedname)
config.logger.info("Load protein sequences from file : %s" % config.seedname)
except IOError:
p = getattr(samples,config.seedname)
config.logger.info("Load protein internal dataset : %s" % config.seedname)
if "matK" in p:
gp = ['accD',
'atpA','atpB','atpE','atpH',
'ccsA',
'cemA',
'clpP',
'matK',
'ndhA',
'petA','petG',
'psbA','psbB','psbC','psbD','psbE','psbF','psbH','psbI','psbJ','psbK','psbL','psbM','psbN','psbT','psbZ',
'psaA','psaB','psaC',
'rbcL',
'rpl12','rpl14','rpl16','rpl23','rpl32','rpl36',
'rpoA','rpoB','rpoC1','rpoC2',
"rps2","rps4","rps7",'rps8','rps12',"rps18",'rpl14',
'ycf1',
]
p=dict(x for x in p.items() if x[0] in gp)
return p
def getSeeds(index,protein,config):
# looks if the internal blast was already run
try:
# yes, load the previous results
seeds = pickle.load(open(config.outputname+'.omx'))
config.logger.info("Load protein match from previous run : %d matches" % sum(len(seeds[i]) for i in seeds))
except:
# no, run it and save the results
config.logger.info("Running protein matching against reads...")
seeds = index.lookForSeeds(protein,mincov=config.seedmincov)
config.logger.info("==> %d matches" % sum(len(seeds[i]) for i in seeds))
pickle.dump(seeds,open(config.outputname+'.omx',"w"))
return seeds
\ No newline at end of file
......@@ -34,7 +34,7 @@ cdef class Index:
cpdef tuple getIds(self, int32_t id)
cpdef int32_t getPairedRead(self, int32_t id)
cpdef tuple normalizedPairedEndsReads(self, int32_t id)
cpdef lookForSeeds(self, dict proteins, int kup=?, int covmin=?)
cpdef lookForSeeds(self, dict proteins, int kup=?, int mincov=?)
cpdef dict checkedExtensions(self, bytes probe,
int minread=?,
......
......@@ -259,7 +259,7 @@ cdef class Index:
return nid,paired
cpdef lookForSeeds(self, dict proteins, int kup=4, int covmin=1):
cpdef lookForSeeds(self, dict proteins, int kup=4, int mincov=1):
cdef AhoCarasick patterns = AhoCarasick()
cdef dict matches
......@@ -270,7 +270,7 @@ cdef class Index:
patterns.finalize()
matches = patterns.scanIndex(self,15,-1,covmin)
matches = patterns.scanIndex(self,15,-1,mincov)
return matches
......
......@@ -164,7 +164,7 @@ def tango(self,seeds,
minread+delta, minratio,
mincov, minoverlap+delta,
1,
restrict=restrict)
restrict=restrict,exact=True)
#
# Build a bijective relationship for the right extensions
......@@ -175,7 +175,7 @@ def tango(self,seeds,
minread+delta, minratio,
mincov, minoverlap+delta,
1,
restrict=restrict)
restrict=restrict,exact=True)
back = set(reduce(lambda p,q:p+q,[backe[y][1] for y in backe],[]))
if -readid not in back:
......@@ -187,7 +187,7 @@ def tango(self,seeds,
minread+delta, minratio,
mincov, minoverlap+delta,
1,
restrict=restrict)
restrict=restrict,exact=True)
#
# Build a bijective relationship for the left extensions
......@@ -198,7 +198,7 @@ def tango(self,seeds,
minread+delta, minratio,
mincov, minoverlap+delta,
1,
restrict=restrict)
restrict=restrict,exact=True)
back = set(reduce(lambda p,q:p+q,[backe[y][1] for y in backe],[]))
......@@ -624,6 +624,45 @@ def matchtoseed(matches,index):
return s
def matchtogene(matches):
genes = {}
k = matches.keys()
for x in k:
for m in matches[x]:
genes[abs(m[0])]=x
return genes
def genesincontig(cg,index,matches):
def vread(i):
if abs(i) > len(index):
v=0
elif abs(i) in genes:
v=1
else:
v=-1
return v
genes = matchtogene(matches)
ei = cg.edgeIterator(edgePredicate=lambda e: 'path' in cg.getEdgeAttr(*e))
for e in ei:
ea = cg.getEdgeAttr(*e)
path = ea['path']
eg = [vread(i) for i in path]
g = sum(i for i in eg if i > 0)
if g > 0:
color=b"#00FF00"
else:
color=b"#0000FF"
graphics = ea.get(b'graphics',{})
graphics[b'arrow']=b'last'
graphics[b'fill']=color
ea[b'graphics']=graphics
ea[b'ingene']=g
def scaffold(self,assgraph,minlink=5,back=200,addConnectedLink=False):
'''
Add relationships between edges of the assembling graph related to the
......
......@@ -49,7 +49,7 @@ if __name__ == '__main__':
# Load the read index
r = Index(args.indexFilename)
x = pickle.load(open(args.resultFilename+'.match.omx'))
x = pickle.load(open(args.resultFilename+'.omx'))
asm = restoreGraph(args.resultFilename+'.oax',r,x)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment