Commit f87e9208 by Frédéric Boyer

[MOD] keep taxid in every case + some doc

parent 1c2c67be
#!/usr/local/bin/python
"""
:py:mod:`obiselect` : Selects representative sequences from a sequence file
===========================================================================
:py:mod:`obiselect` : Selects representative sequence records
=============================================================
.. codeauthor:: Eric Coissac <eric.coissac@metabarcoding.org>
......@@ -9,18 +9,6 @@
file by describing sequence record groups and defining how many and which sequence records
from each group must be retrieved.
Example:
.. code-block:: bash
> obiselect ...
The above command splits the sequence input file according to the ``mode`` attribute.
This attribute is created by the :py:mod:`solexapairend` tool and its value can be set to
either ``joined`` or ``alignment``. The prefix ``experiment_`` is put before
each subfile name. Two subfiles will thus be created: ``experiment_joined`` and
``experiment_alignment``.
"""
from obitools.format.options import addInOutputOption, sequenceWriterGenerator,\
addInputFormatOption
......@@ -48,64 +36,70 @@ def median(seqs):
def addSelectOptions(optionManager):
optionManager.add_option('-c','--category-attribute',
group = optionManager.add_option_group('obiselect specific options')
group.add_option('-c','--category-attribute',
action="append", dest="categories",
metavar="<Attribute Name>",
default=[],
help="Add one attribute to the list of"
" attribute used for categorizing sequences")
" attribute used for categorizing sequence records")
optionManager.add_option('-n','--number',
group.add_option('-n','--number',
action="store", dest="number",
metavar="",
type="int",
default=1,
help="select sequence in each group minimizing the function")
help="number of sequence records to keep in each category")
optionManager.add_option('-f','--function',
group.add_option('-f','--function',
action="store", dest="function",
metavar="",
default="random",
help="select sequence in each group minimizing the function")
help="python code evaluated for each sequence record [default: random value]")
optionManager.add_option('-m','--min',
group.add_option('-m','--min',
action="store_const", dest="method",
metavar="",
default=maximum,
const=minimum,
help="select sequence in each group minimizing the function")
help="select sequence record in each group minimizing the function"
" (exclusive with -M, -a, --median)")
optionManager.add_option('-M','--max',
group.add_option('-M','--max',
action="store_const", dest="method",
metavar="<Attribute Name>",
metavar="",
default=maximum,
const=maximum,
help="select sequence in each group maximizing the function")
help="select sequence record in each group maximizing the function"
" (exclusive with -m, -a, --median)")
optionManager.add_option('-a','--mean',
group.add_option('-a','--mean',
action="store_const", dest="method",
metavar="<Attribute Name>",
metavar="",
default=maximum,
const=mean,
help="select sequence in each group closest to the mean of the function")
help="select sequence record in each group closest to the mean of the function"
" (exclusive with -m, -M, --median)")
optionManager.add_option('--median',
group.add_option('--median',
action="store_const", dest="method",
metavar="<Attribute Name>",
default=maximum,
const=median,
help="select sequence in each group closest to the median of the function")
help="select sequence record in each group closest to the median of the function"
" (exclusive with -m, -M, -a)")
optionManager.add_option('--merge',
group.add_option('--merge',
action="append", dest="merge",
metavar="<TAG NAME>",
type="string",
default=[],
help="attributes to merge")
help="attributes to merge within each group")
optionManager.add_option('--merge-ids',
group.add_option('--merge-ids',
action="store_true", dest="mergeids",
default=False,
help="add the merged id data to output")
......@@ -154,7 +148,7 @@ if __name__ == '__main__':
if hasattr(options, 'taxonomy') and options.taxonomy is not None:
environ = {'taxonomy' : options.taxonomy,'sequence':s,'random':random()}
else:
environ = {'sequence':s}
environ = {'sequence':s, 'random':random()}
try:
select = eval(options.function,environ,s)
......@@ -194,7 +188,7 @@ if __name__ == '__main__':
s[mkey]={}
if key in s:
s[mkey][s[key]]=s[mkey].get(s[key],0)+1
del(s[key])
#del(s[key])
if 'count' not in s:
s['count']=1
......@@ -222,10 +216,10 @@ if __name__ == '__main__':
else:
s[mkey][skey]=seq[mkey][skey]
for key in seq.iterkeys():
# Merger proprement l'attribut merged s'il exist
if key in s and s[key]!=seq[key] and key!='count' and key[0:7]!='merged_' and key!='merged' and key!='select':
del(s[key])
#for key in seq.iterkeys():
# # Merger proprement l'attribut merged s'il exist
# if key in s and s[key]!=seq[key] and key!='count' and key[0:7]!='merged_' and key!='merged' and key!='select':
# del(s[key])
if mergeIds:
......@@ -234,12 +228,8 @@ if __name__ == '__main__':
if taxonomy is not None:
mergeTaxonomyClassification(seqs, taxonomy)
for s in seqs[0:options.number]:
s['class']=c
del s['select']
writer(s)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment