Commit bac706c9 by Aurélie Bonin

--no commit message

parent 69f7371b
#!/usr/local/bin/python
'''
:py:mod:`ecotaxspecificity`: Evaluates barcode resolution
=========================================================
.. codeauthor:: Eric Coissac <eric.coissac@metabarcoding.org>
The :py:mod:`ecotaxspecificity` command evaluates barcode resolution at different
taxonomic ranks.
As inputs, it takes a sequence record file annotated with taxids in the sequence
header, and a database formated as an ecopcr database (see :doc:`obitaxonomy
<obitaxonomy>`) or a NCBI taxdump (see NCBI ftp site).
An example of output is reported below::
Number of sequences added in graph: 284
Number of nodes in all components: 269
Number of sequences lost: 15!
rank taxon_ok taxon_total percent
order 8 8 100.00
superfamily 1 1 100.00
parvorder 1 1 100.00
subkingdom 1 1 100.00
superkingdom 1 1 100.00
kingdom 3 3 100.00
phylum 5 5 100.00
infraorder 1 1 100.00
subfamily 3 3 100.00
class 6 6 100.00
species 35 176 19.89
superorder 1 1 100.00
suborder 1 1 100.00
subtribe 1 1 100.00
subclass 3 3 100.00
genus 9 15 60.00
superclass 1 1 100.00
family 10 10 100.00
tribe 2 2 100.00
subphylum 1 1 100.00
In this example, the input sequence file contains 284 sequence records, but only
269 have been examined, because taxonomic information was not recovered for the
the 15 remaining ones.
"Taxon_total" refers to the number of different taxa observed at this rank
in the sequence record file (when taxonomic information is available at this
rank), and "taxon_ok" corresponds to the number of taxa that the barcode sequence
identifies unambiguously in the taxonomic database. In this example, the sequence
records correspond to 176 different species, but only 35 of these have specific
barcodes. "percent" is the percentage of unambiguously identified taxa among
the total number of taxa (taxon_ok/taxon_total*100).
'''
import math
import sys
......@@ -16,13 +70,14 @@ from obitools.ecopcr.options import addTaxonomyDBOptions, loadTaxonomyDatabase
def addSpecificityOptions(optionManager):
optionManager.add_option('-e','--errors',
group = optionManager.add_option_group('ecotaxspecificity specific options')
group.add_option('-e','--errors',
action="store", dest="dist",
metavar="###",
type="int",
default=1,
help="Maximum errors between two sequences")
optionManager.add_option('-q','--quorum',
group.add_option('-q','--quorum',
action="store", dest="quorum",
type="float",
default=0.0,
......@@ -31,7 +86,7 @@ def addSpecificityOptions(optionManager):
if __name__=='__main__':
optionParser = getOptionManager([addInOutputOption,addTaxonomyDBOptions,addSpecificityOptions])
optionParser = getOptionManager([addInputFormatOption,addTaxonomyDBOptions,addSpecificityOptions])
(options, entries) = optionParser()
......@@ -110,9 +165,9 @@ if __name__=='__main__':
indexbyseq[s].add(seq)
yy = yy + 1
print "Total Sequences added in graph: " + str(xx)
print "Total nodes in all components: " + str (yy)
print "Lost sequences: " + str (xx-yy) + "!"
print "Number of sequences added in graph: " + str(xx)
print "Number of nodes in all components: " + str (yy)
print "Number of sequences lost: " + str (xx-yy) + "!"
# since multiple different sequences have one key, we need to know what that key is for each sequence
indexbykey={} #it will have elements like: {"seq1":key, "seq2":key, ...} where 'key' is the component key to which 'seqx' belongs
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment