Commit 078a1c83 by Eric Coissac

Add a capacity for taxonomy to manage extension to it through the notion of local taxa

parent a0fca4cf
......@@ -7,8 +7,8 @@ Created on 2 fevr. 2010
from obitools.options import getOptionManager
from obitools.format.options import addInputFormatOption
import sys
from obitools.ecopcr.options import addTaxonomyDBOptions, loadTaxonomyDatabase
import math
def addStatOptions(optionManager):
optionManager.add_option('-c','--category-attribute',
......@@ -36,6 +36,18 @@ def addStatOptions(optionManager):
default=[],
help="compute mean of attribute")
optionManager.add_option('-v','--variance',
action="append", dest="var",
metavar="<Attribute Name>",
default=[],
help="compute the estimator of variance of attribute")
optionManager.add_option('-s','--std-dev',
action="append", dest="sd",
metavar="<Attribute Name>",
default=[],
help="compute the estimator of standard deviation of attribute")
def statistics(values,attribute,func):
stat={}
......@@ -69,6 +81,19 @@ def mean(values,options):
return statistics(values, options.mean, average)
def variance(v):
s = reduce(lambda x,y:(x[0]+y,x[1]+y**2),v,(0.,0.))
return s[1]/(len(v)-1) - s[0]**2/len(v)/(len(v)-1)
def var(values,options):
return statistics(values, options.var, variance)
def sd(values,options):
def stddev(v):
return math.sqrt(variance(v))
return statistics(values, options.sd, stddev)
if __name__ == "__main__":
optionParser = getOptionManager([addStatOptions,addInputFormatOption,addTaxonomyDBOptions])
......@@ -114,9 +139,11 @@ if __name__ == "__main__":
values[var][category].append(v)
mini,lmini = minimum(values, options)
maxi,lmaxi = maximum(values, options)
avg ,lavg = mean(values, options)
mini,lmini = minimum(values, options)
maxi,lmaxi = maximum(values, options)
avg ,lavg = mean(values, options)
varp ,lvarp = var(values, options)
sigma,lsigma= sd(values, options)
pcat = "%%-%ds" % lcat
......@@ -135,10 +162,22 @@ if __name__ == "__main__":
else:
meanvar= "%s"
if options.var:
varvar= "var_%%-%ds" % max(len(x) for x in options.var)
else:
varvar= "%s"
if options.sd:
sdvar= "sd_%%-%ds" % max(len(x) for x in options.sd)
else:
sdvar= "%s"
hcat = "\t".join([pcat % x for x in options.categories]) + "\t" +\
"\t".join([minvar % x for x in options.minimum]) + "\t" +\
"\t".join([maxvar % x for x in options.maximum]) + "\t" +\
"\t".join([meanvar % x for x in options.mean]) + \
"\t".join([meanvar % x for x in options.mean]) + "\t" +\
"\t".join([varvar % x for x in options.var]) + "\t" +\
"\t".join([sdvar % x for x in options.sd]) + \
"\t count" + \
"\t total"
print hcat
......@@ -151,6 +190,10 @@ if __name__ == "__main__":
print (("%%%dd" % lmaxi[m]) % maxi[m][c])+"\t",
for m in options.mean:
print (("%%%df" % lavg[m]) % avg[m][c])+"\t",
for m in options.var:
print (("%%%df" % lvarp[m]) % varp[m][c])+"\t",
for m in options.sd:
print (("%%%df" % lsigma[m]) % sigma[m][c])+"\t",
print "%7d" %catcount[c],
print "%9d" %totcount[c]
......
......@@ -7,15 +7,77 @@ Created on 13 oct. 2009
from obitools.options.taxonomyfilter import addTaxonomyDBOptions,loadTaxonomyDatabase
from obitools.options import getOptionManager
from obitools.ecopcr.taxonomy import ecoTaxonomyWriter
def editTaxonomyOptions(optionManager):
optionManager.add_option('-a','--add-taxon',
action="append", dest="newtaxon",
metavar="<taxon_name>:rank:parent",
default=[],
help="Add a new taxon to the taxonomy. The new taxon "
"is described by tree values separated by colon. "
"the scientific name, the rank of the new taxon, "
"the taxid of the parent taxon")
optionManager.add_option('-s','--add-species',
action="append", dest="newspecies",
metavar="<species name>",
default=[],
help="Add a new species to the taxonomy. The new species "
"is described by its scientific name")
optionManager.add_option('-f','--add-favorite-name',
action="append", dest="newname",
metavar="<taxon_name>:taxid",
default=[],
help="Add a new favorite name to the taxonomy. The new name "
"is described by two values separated by colon. "
"the new favorite name and the taxid of the taxon")
optionManager.add_option('-m','--min-taxid',
action="store", dest="taxashift",
metavar="####",
default=10000000,
help="minimal taxid for the newly added taxid")
if __name__ == '__main__':
optionParser = getOptionManager([addTaxonomyDBOptions])
optionParser = getOptionManager([addTaxonomyDBOptions,editTaxonomyOptions])
(options, entries) = optionParser()
loadTaxonomyDatabase(options)
localdata=False
for t in options.newtaxon:
tx = t.split(t,':')
taxid = options.taxonomy.addLocalTaxon(tx[0].strip(),tx[1],tx[2],options.taxashift)
taxon = options.taxonomy.findTaxonByTaxid(taxid)
parent= options.taxonomy._taxonomy[taxon[2]]
print "added : %-40s\t%-15s\t%-8d\t->\t%s [%d] (%s)" % (taxon[3],options.taxonomy._ranks[taxon[1]],
taxon[0],
parent[3],parent[0],options.taxonomy._ranks[parent[1]])
localdata=True
for t in options.newspecies:
genus,species = t.split(" ",1)
parent = options.taxonomy.findTaxonByName(genus)
taxid = options.taxonomy.addLocalTaxon(t,'species',parent[0],options.taxashift)
taxon = options.taxonomy.findTaxonByTaxid(taxid)
parent= options.taxonomy._taxonomy[taxon[2]]
print "added : %-40s\t%-15s\t%-8d\t->\t%s [%d] (%s)" % (taxon[3],options.taxonomy._ranks[taxon[1]],
taxon[0],
parent[3],parent[0],options.taxonomy._ranks[parent[1]])
localdata=True
for n in options.newname:
tx = t.split(t,':')
taxid = options.taxonomy.addPreferedName(tx[0].strip(),tx[1])
print "name : %8d\t->\t%s" % (taxid,options.taxonomy.getPreferedName(taxid))
ecoTaxonomyWriter(options.ecodb,options.taxonomy,onlyLocal=True)
\ No newline at end of file
/* Generated by Cython 0.14.1 on Thu Apr 14 22:36:32 2011 */
/* Generated by Cython 0.14.1 on Fri Jun 24 12:21:16 2011 */
#define PY_SSIZE_T_CLEAN
#include "Python.h"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment