Commit f38ccae6 by Eric Coissac

Add a --minimum-circle option to ecotag and a cache on the self

alignment scores of 1000000 of pairwise scores
parent b9988181
......@@ -19,7 +19,7 @@ from os import path
PACKAGE = "OBITools"
VERSION = "1.1.18"
VERSION = "1.1.19"
AUTHOR = 'Eric Coissac'
EMAIL = 'eric@coissac.eu'
URL = 'metabarcoding.org/obitools'
......
......@@ -46,6 +46,8 @@ from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDat
from obitools.options import getOptionManager
from obitools.format.options import addInOutputOption, sequenceWriterGenerator
from collections import OrderedDict
import sys
import math
import os.path
......@@ -76,6 +78,13 @@ def addSearchOptions(optionManager):
default=0.0,
help="minimum identity to consider.")
optionManager.add_option('--minimum-circle',
action="store", dest="circle",
metavar="identity",
type="float",
default=1.0,
help="minimum identity considered for the assignment circle.")
# optionManager.add_option('-S','--normalized-smallest',
# action="store_false", dest="large",
# default=True,
......@@ -190,6 +199,26 @@ def myLenlcs(s1, s2, minid, normalized, reference):
return lcs, lali
def cachedLenLCS(s1,s2,minid,normalized,reference):
global __LCSCache__
pair=frozenset((s1.id,s2.id))
if pair in __LCSCache__:
rep=__LCSCache__[pair]
del __LCSCache__[pair]
else:
rep=lenlcs(s1,s2,minid,normalized,reference)
__LCSCache__[pair]=rep
if len(__LCSCache__) > 1000000:
__LCSCache__.popitem(0)
return rep
#def lcsIterator(entries,db,options):
#
# for seq in entries:
......@@ -233,7 +262,7 @@ def lcsIteratorSelf(entries,db,options):
maxid = ([],0.0)
minid = options.minimum
for d in db:
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN)
lcs,lali = myLenlcs(seq,d,minid,normalized=True,reference=ALILEN) # @UnusedVariable
if lcs > maxid[1] and lcs > options.minimum:
maxid = ([d],lcs)
minid = maxid[1]
......@@ -241,11 +270,13 @@ def lcsIteratorSelf(entries,db,options):
maxid[0].append(d)
if maxid[0]:
if maxid[1] > options.circle:
maxid[1]=options.circle
results.extend([(s,maxid[1]) for s in maxid[0]])
for d in db:
for s in maxid[0]:
if d.id != s.id:
lcs,lali = lenlcs(s,d,maxid[1],normalized=True,reference=ALILEN)
lcs,lali = cachedLenLCS(s,d,maxid[1],normalized=True,reference=ALILEN) # @UnusedVariable
if lcs >= maxid[1]:
results.append((d,lcs))
......@@ -253,6 +284,8 @@ def lcsIteratorSelf(entries,db,options):
if __name__=='__main__':
__LCSCache__=OrderedDict()
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
(options, entries) = optionParser()
......
major = 1
minor = 1
serial= '18'
serial= '19'
version = "%2d.%02d %s" % (major,minor,serial)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment