Commit aa064dda by Eric Coissac

Add two general options for limiting the analysis of a file to only a

sub-part of the data:

   - --skip <N>
   - --only <N>
   
They respectively allow to first skip the N first sequences, and to
analysize only the <N> next sequences.
parent f38ccae6
......@@ -201,15 +201,19 @@ def myLenlcs(s1, s2, minid, normalized, reference):
def cachedLenLCS(s1,s2,minid,normalized,reference):
global __LCSCache__
global __INCache__
global __OUTCache__
pair=frozenset((s1.id,s2.id))
if pair in __LCSCache__:
rep=__LCSCache__[pair]
del __LCSCache__[pair]
__INCache__+=1.0
else:
rep=lenlcs(s1,s2,minid,normalized,reference)
__OUTCache__+=1.0
__LCSCache__[pair]=rep
......@@ -271,7 +275,7 @@ def lcsIteratorSelf(entries,db,options):
if maxid[0]:
if maxid[1] > options.circle:
maxid[1]=options.circle
maxid=(maxid[0],options.circle)
results.extend([(s,maxid[1]) for s in maxid[0]])
for d in db:
for s in maxid[0]:
......@@ -285,6 +289,8 @@ def lcsIteratorSelf(entries,db,options):
if __name__=='__main__':
__LCSCache__=OrderedDict()
__INCache__=1.0
__OUTCache__=1.0
optionParser = getOptionManager([addSearchOptions,addTaxonomyDBOptions,addInOutputOption],progdoc=__doc__)
......@@ -418,6 +424,7 @@ if __name__=='__main__':
else:
seq['species_name']=None
print >>sys.stderr,'\rCache size : %5.3f ' % (__INCache__/__OUTCache__),
writer(seq)
......
......@@ -18,7 +18,7 @@
'''
from obitools.options import getOptionManager
from obitools.format.options import addInOutputOption
from obitools.format.options import addInputFormatOption
def addCountOptions(optionManager):
group=optionManager.add_option_group('Obicount specific options')
......@@ -36,7 +36,7 @@ def addCountOptions(optionManager):
if __name__ == '__main__':
optionParser = getOptionManager([addCountOptions,addInOutputOption], progdoc=__doc__)
optionParser = getOptionManager([addCountOptions,addInputFormatOption], progdoc=__doc__)
(options, entries) = optionParser()
......
......@@ -29,11 +29,12 @@ import sys
import re
from obitools.ecopcr import EcoPCRFile
from obitools.format.sequence import skipOnErrorIterator
from obitools.format.sequence import skipOnErrorIterator, skipfirst, only
from obitools import BioSequence
from obitools.utils import FakeFile
from glob import glob
from test.test_compiler import Toto
def binarySequenceIterator(lineiterator):
......@@ -52,6 +53,23 @@ def binarySequenceIterator(lineiterator):
def addInputFormatOption(optionManager):
group = optionManager.add_option_group("Restriction to a sub-part options",
"Allow to limit analysis to a sub-part of the data file")
group.add_option('--skip',
action="store", dest="skip",
metavar='<N>',
default=None,
type='int',
help="skip the N first sequences")
group.add_option('--only',
action="store", dest="only",
metavar='<N>',
default=None,
type='int',
help="treat only N sequences")
group = optionManager.add_option_group("Input format options",
"If not specified, a test is done to determine the file format")
......@@ -294,6 +312,14 @@ def autoEntriesIterator(options):
if options.skiperror:
reader = skipOnErrorIterator(reader)
if options.skip is not None:
print >>sys.stderr,"Skipping %d sequences" % options.skip
reader = skipfirst(reader,options.skip)
if options.only is not None:
print >>sys.stderr,"Analysing only %d sequences" % options.only
reader = only(reader,options.only)
return reader
......
......@@ -21,6 +21,33 @@ def skipOnErrorIterator(seqIterator):
continue
return internal
def skipfirst(seqIterator,n):
def internal(inputdata):
si = seqIterator(inputdata)
c=0
for seq in si:
c+=1
if c > n:
yield seq
print >>sys.stderr
return internal
def only(seqIterator,n):
def internal(inputdata):
si = seqIterator(inputdata)
c=0
for seq in si:
if c < n:
yield seq
else:
break
c+=1
print >>sys.stderr
return internal
def autoSequenceIterator(file):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment