Commit 05efb38f by Eric Coissac

initial commit

parents
/man/
/vignettes/
/Read-and-delete-me
Package: ROBITaxonomy
Type: Package
Title: Metabarcoding data biodiversity analysis
Version: 0.1
Date: 2012-08-23
Author: LECA - Laboratoire d'ecologie alpine
Maintainer: LECA OBITools team <obitools@metabarcoding.org>
Description: More about what it does (maybe more than one line)
License: CeCILL v2.0
LazyLoad: yes
Roxygen: list(wrap = FALSE)
Collate:
'ROBITaxonomy.R'
'taxonomy.R'
'basic.R'
'default.R'
'distance.R'
'lca.R'
'rank.R'
RoxygenNote: 5.0.1
# Generated by roxygen2: do not edit by hand
export(default.taxonomy)
export(distance.taxonomy)
export(ecofind)
export(family)
export(genus)
export(is.obitools.taxonomy)
export(is.subcladeof)
export(kingdom)
export(length.obitools.taxonomy)
export(longest.path)
export(lowest.common.ancestor)
export(max.obitools.taxonomy)
export(parent)
export(path)
export(rank.list)
export(read.taxonomy)
export(scientificname)
export(species)
export(superkingdom)
export(taxid.list)
export(taxonatrank)
export(taxonomicrank)
export(validate)
exportClasses(obitools.taxonomy)
exportClasses(obitools.taxonomyOrNULL)
useDynLib(ROBITaxonomy)
#' @include taxonomy.R
NULL
#' @export
setGeneric("scientificname", function(taxonomy,taxid) {
return(standardGeneric("scientificname"))
})
#' Returns the scientific name corresponding to a \emph{NCBI taxid}
#'
#' \code{scientificname} function in package \pkg{\link{ROBITaxonomy}} returns the
#' scientific name corresponding to a \emph{NCBI taxid}.
#'
#' @param taxonomy a \code{\link{obitools.taxonomy}} instance
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#' @return The scientific name of the corresponding taxon as a string or a
#' vector of string if the \code{taxid} argument is itself a vector
#'
#' @examples
#' # load the default taxonomy database include in the ROBITaxonomy library
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the scientific names correponding to these taxids
#' scientificname(taxo,sp.taxid)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}}
#'
#'
#' @docType methods
#' @rdname scientificname-methods
#' @aliases scientificname-methods,obitools.taxonomy
#' @author Eric Coissac
#'
setMethod("scientificname", "obitools.taxonomy",function(taxonomy,taxid) {
getscname = function(t) {
if (is.na(t))
return(NA)
else
return( .Call('R_get_scientific_name',
taxonomy,
t,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
sapply(taxid,getscname)
})
######################################################################
######################################################################
#' @export
setGeneric("parent", function(taxonomy,taxid,name=FALSE) {
return(standardGeneric("parent"))
})
#' Returns the parent taxon corresponding to a \emph{NCBI taxid}
#'
#' \code{parent} function in package \pkg{\link{ROBITaxonomy}} returns the
#' parent taxon corresponding to a \emph{NCBI taxid}.
#'
#' @param taxonomy a \code{\link{obitools.taxonomy}} instance
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating if the
#' method returns a taxid or a scientific name.
#'
#' @return \describe{ \item{If \code{name==FALSE}}{the taxid of the
#' parent taxon as an integer or a vector of
#' integers if the \code{taxid} argument is itself
#' a vector}
#' \item{If \code{name==TRUE}}{the scientific name of the
#' parent taxon as a string or a vector of
#' string if the \code{taxid} argument is itself a
#' vector} }
#'
#' @examples
#' # load the default taxonomy database include in the ROBITaxonomy library
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the parent taxa correponding to these taxids
#' parent(taxo,sp.taxid)
#'
#' # same things but scientific names are returned
#' parent(taxo,sp.taxid,TRUE)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}}
#'
#'
#' @docType methods
#' @rdname parent-methods
#' @aliases parent-methods,obitools.taxonomy
#' @author Eric Coissac
#'
setMethod("parent", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_get_parent',
taxonomy,
as.integer(t),
name,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
name = as.logical(name[1])
sapply(taxid,getp)
})
######################################################################
######################################################################
#' @export
setGeneric("taxid.list", function(taxonomy) {
return(standardGeneric("taxid.list"))
})
#' Returns the list of all taxids belonging the taxonomy.
#'
#' \code{taxid.list} returns the list of all taxids included in the
#' instance of the class \code{\linkS4class{obitools.taxonomy}}
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @return an \code{integer} vector containing the list of taxids.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the count of taxa described in the taxonomy
#' length(taxo)
#'
#' # extracts the list of all valid taxids
#' good = taxid.list(taxo)
#'
#' # returns the size of the returned list
#' length(good)
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname taxid.list-method
#' @aliases taxid.list
#'
setMethod("taxid.list", "obitools.taxonomy",
function(taxonomy) {
return(.Call('R_taxid_list',
taxonomy,
PACKAGE="ROBITaxonomy"))
})
######################################################################
######################################################################
#' Returns the count of taxa in the taxonomy.
#'
#' \code{length} returns the count of taxa included in the
#' instance of the class \code{\linkS4class{obitools.taxonomy}}
#'
#' @param x the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @return an \code{integer} corresponding to the count of taxa.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the count of taxa described in the taxonomy
#' length(taxo)
#'
#' @seealso \code{\link{length}}, \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @export length.obitools.taxonomy
#'
length.obitools.taxonomy = function(x)
{
return(.Call('R_length_taxonomy',
x,
PACKAGE="ROBITaxonomy"))
}
######################################################################
######################################################################
setGeneric('max')
#' Returns the maximum taxid in the taxonomy.
#'
#' \code{length} returns the maximum taxid included in the
#' instance of the class \code{\linkS4class{obitools.taxonomy}}
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#' @param na.rm included for compatibility purpose, this parameter as
#' no effect on this implementation of \code{max}
#'
#' @return an \code{integer} corresponding to the count of taxa.
#'
#' @examples
#' # load the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # gets the larger taxid of the database
#' max(taxo)
#'
#' @seealso \code{\link{max}}, \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @export max.obitools.taxonomy
#'
max.obitools.taxonomy=function(taxonomy,na.rm = FALSE) {
return(.Call('R_max_taxid',
taxonomy,
PACKAGE="ROBITaxonomy"))
}
#' @export
setGeneric("ecofind", function(taxonomy,patterns,rank=NULL,alternative=FALSE) {
return(standardGeneric("ecofind"))
})
#' Returns taxids associated to the names
#'
#' Return the set of taxids having their name matching the given pattern.
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#' @param patterns one or several regular pattern used to select the the taxa.
#' @param rank a \code{character} indicating a taxonomic rank. If not \code{NULL}
#' only taxids correponding to this rank are returned.
#' @param alternative A logical value \code{TRUE} or \code{FALSE} indicating
#' if the function must only look for a scientific name.
#'
#' @return if just one pattern is given, an integer vector is returned with the
#' corresponding taxids. If a list of patterns is given, the function
#' returns a list of integer vectors, each vector containing the taxids
#' corresponding to a pattern. The returned list is in the same order
#' than the given patern list.
#'
#' @examples
#' # load the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # retreives the Vertebrata taxid
#' taxid = ecofind(taxo,"Vertebrata")
#'
#' taxid
#' scientificname(taxo,taxid)
#'
#'
#' taxid = ecofind(taxo,"^Vertebrata$")
#'
#' taxid
#' scientificname(taxo,taxid)
#'
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname ecofind-method
#' @aliases ecofind,obitools.taxonomy
#'
setMethod("ecofind", "obitools.taxonomy",function(taxonomy,patterns,rank=NULL,alternative=FALSE) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(unique(.Call('R_ecofind',
taxonomy,
t,
rank,
alternative,
PACKAGE="ROBITaxonomy")))
}
patterns = as.character(patterns)
taxid=lapply(patterns,getp)
if (length(taxid)==1)
taxid=taxid[[1]]
return(taxid)
})
#' @export
setGeneric("validate", function(taxonomy,taxid) {
return(standardGeneric("validate"))
})
#' Checks that a \emph{taxid} is really present in taxonomy
#'
#' \code{validate} function in package \pkg{\link{ROBITaxonomy}} checks
#' that a \emph{taxid} is declared in the considered taxonomy.
#'
#' @param taxonomy a \code{\link{obitools.taxonomy}} instance
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#'
#' @return The taxid if it exists, NA otherwise. If the input taxid is a
#' vector of integer returns an integer vector composed of validated
#' taxids and NA values.
#'
#' @examples
#' # load the default taxonomy database include in the ROBITaxonomy library
#' taxo=default.taxonomy()
#'
#' # build a vector of 101 taxids
#' sp.taxid=c(7000:7100)
#'
#' # checks the list of taxids
#' validate(taxo,sp.taxid)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}}
#'
#'
#' @docType methods
#' @rdname validate-methods
#' @aliases validate-methods,obitools.taxonomy
#' @author Eric Coissac
#'
setMethod("validate", "obitools.taxonomy",function(taxonomy,taxid) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_validate_taxid',
taxonomy,
t,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
sapply(taxid,getp)
})
#' @include taxonomy.R
NULL
#
#
# Manage le loading of the default taxonomy
#
#
.__default__taxonomy__ = NULL
#' Returns the default taxonomy
#'
#' Returns a \code{\linkS4class{obitools.taxonomy}} instance corresponding
#' to a NCBI taxonomy included by default in the \pkg{\link{ROBITaxonomy}} package.
#'
#' @return a \code{\linkS4class{obitools.taxonomy}} instance.
#'
#' @examples
#'
#' # Load the default taxonomy
#' taxo = default.taxonomy()
#'
#' # and use it for requesting a scientific name
#' scientificname(taxo,7742)
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @export
#'
default.taxonomy = function() {
if (is.null(get(".__default__taxonomy__",envir = environment())))
assign(".__default__taxonomy__",
read.taxonomy(paste(system.file("extdata",
package="ROBITaxonomy"),
'ncbitaxo',
sep='/')),
envir=globalenv())
return(get(".__default__taxonomy__",envir = globalenv()))
}
#' @export
#'
is.obitools.taxonomy = function(taxonomy) {
class(t)[1] == "obitools.taxonomy"
}
#' @include taxonomy.R
NULL
#' @export
setGeneric("longest.path", function(taxonomy,taxid) {
return(standardGeneric("longest.path"))
})
#' Returns the longuest path from a taxon.
#'
#' The method \code{longest.path} returns the length of the
#' path linking a taxid to the farest leaf belonging this taxid.
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @param taxid an \code{integer} vector containing the list of taxids.
#'
#' @return an \code{integer} vector containing the list length.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the longest path in the taxonomy (from the root node)
#' longest.path(taxo,1)
#'
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname longest.path-method
#' @aliases longest.path,obitools.taxonomy
#'
setMethod("longest.path", "obitools.taxonomy",
function(taxonomy,taxid) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_longest_path',
taxonomy,
t,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
sapply(taxid,getp)
})
#' @export
setGeneric("distance.taxonomy", function(taxonomy,taxid1,taxid2=NULL,name=F) {
return(standardGeneric("distance.taxonomy"))
})
#' Computes a distance matrix between taxids
#'
#' The method \code{taxonomy.distance} computes a distance matrix between a
#' set of taxids. The distance between two taxa is based on the topology of
#' the taxonomomy tree.
#'
#' \deqn{ d(Taxon_A,Taxon_B) = \frac{longest.path(lca(Taxon_A,Taxon_B))}{max(longest.path(Taxon_A),longest.path(Taxon_B))}}
#' { longest.path(lca(Taxon_A,Taxon_B)) / max(longest.path(Taxon_A),longest.path(Taxon_B)) }
#'
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @param taxid1 an \code{integer} vector containing a list of taxids.
#'
#' @param taxid2 an \code{integer} vector containing a list of taxids.
#' If \code{taxid2} is set to \code{NULL} (it's default value)
#' then the \code{taxid2} list is considered as equal to
#' \code{taxid1} list.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return distance matrix annotated by taxids or
#' by scientific names.
#'
#' @return the distance matrix between taxids specified in the \code{taxid1}
#' set and the \code{taxid2} set.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # computes the distance matrix between taxids
#' distance.taxonomy(taxo,sp.taxid)
#'
#' # Same thing but the matrix is annotated by scientific names
#' distance.taxonomy(taxo,sp.taxid,name=TRUE)
#'
#' @seealso \code{\link{longest.path}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname distance.taxonomy-method
#' @aliases taxonomy.distance,obitools.taxonomy
#'
setMethod("distance.taxonomy", "obitools.taxonomy",
function(taxonomy,taxid1,taxid2=NULL,name=F) {
taxdist = function(r)
{
t1=r[1]
t2=r[2]
if (is.na(t1) | is.na(t2))
return(NA)
p1 = path(taxonomy,t1)
p2 = path(taxonomy,t2)
minp = min(length(p1),length(p2))
common = sum(p1[1:minp] == p2[1:minp])
lca = p1[common]
lp = longest.path(taxonomy,lca)
return(lp/(lp+common))
}
multitaxdist=function(t1,t2) {
apply(data.frame(t1,t2),1,taxdist)
}
taxid1 = taxid1[! is.na(validate(taxonomy,taxid1))]
t1 = path(taxonomy,taxid1)
same = is.null(taxid2)
if (same)
{
ntaxon = length(taxid1)
t2 = t1[unlist(sapply(2:ntaxon,
function(x) x:ntaxon))]
t1 = t1[rep(1:(ntaxon-1),(ntaxon-1):1)]
}
else
{
taxid2 = taxid2[! is.na(validate(taxonomy,taxid2))]
t2 = path(taxonomy,taxid2)
nt1 = length(taxid1)
nt2 = length(taxid2)
t1 = t1[rep(1:nt1,nt2)]
t2 = t2[rep(1:nt2,rep(nt1,nt2))]
}
lmin = mapply(function(a,b) min(length(a),length(b)),
t1,
t2)
llca = mapply(function(x,y,l) sum(x[1:l]==y[1:l]),
t1,
t2,
lmin)
lb = longest.path(taxonomy,mapply(function(x,y) x[y],t1,llca))
d = as.double(lb / (lb + llca))
if (same) {
attr(d, "Size") <- ntaxon
if (name)
attr(d, "Labels") <- scientificname(taxonomy,taxid1)
else
attr(d, "Labels") <- as.character(taxid1)
attr(d, "Diag") <- FALSE
attr(d, "Upper") <- FALSE
attr(d, "method") <- NULL
attr(d, "call") <- match.call()
class(d) <- "dist"
}
else {
if (name)
d = matrix(d,nt1,nt2,
dimnames=list(scientificname(taxonomy,taxid1),
scientificname(taxonomy,taxid2)))
else
d = matrix(d,nt1,nt2,
dimnames=list(as.character(taxid1),
as.character(taxid2)))
}
return(d)
})
#' @include taxonomy.R
NULL
#' @export
setGeneric("lowest.common.ancestor", function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
return(standardGeneric("lowest.common.ancestor"))
})
#' Computes the lowest common ancestor in the taxonomy tree between a set of taxa
#'
#' The \code{lowest.common.ancestor} function in package \pkg{ROBITaxonomy} computes
#' the lowest common ancestor of a set of taxids. The lowest common ancestor (LCA)
#' is the most precise taxonomic group shared by all the considered taxa. Tha
#' \code{lowest.common.ancestor} function implemented in the \pkg{ROBITaxonomy}
#' package, considers a fuzzy definition of the LCA as the most precise
#' taxonomic group shared by a quorum of the considered taxa.
#'
#' @param taxonomy an instance of \code{\linkS4class{obitools.taxonomy}}
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#' @param threshold a numeric value between 0.0 and 1.0 indicating the minimum
#' quorum of taxid that must belong the LCA.
#' @param error an integer value indicating the maximum count of taxids that
#' have not to belong the returned taxid. A \code{threshold} below 1.0 have
#' priority on the \code{error} parameter.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating if the
#' method return a \emph{taxid} or a scientific name.
#'
#' @return Depending on the value of the \code{name} argument, set by default
#' to \code{FALSE} the method returns :
#' \describe{
#' \item{If \code{name==FALSE}}{ the taxid of the taxon corresponding
#' to the LCA as an integer value}
#' \item{If \code{name==TRUE}}{ the scientific name of the taxon
#' corresponding to the LCA as a string}
#' }
#'
#' @examples
#' require(ROBITaxonomy)
#'
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITaxonomy"))}
#'
#' # read the taxonomy database
#'
#' taxo=read.taxonomy('ncbitaxo')
#'
#' # build a vector of 6 taxids corresponding to species
#'
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the lowest common ancestor taxids
#'
#' lowest.common.ancestor(taxo,sp.taxid)
#'
#' # same thing but returns results as a vector of scientific names
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE)
#'
#' # If we accept than 2 or 1 taxa do not belong the LCA
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=2)
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=1)
#'
#' # Partial LCA can also be speciefied as the minimal frequency of
#' # taxa belonging the LCA
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,threshold=0.8)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{path}}, \code{\link{parent}},
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname lowest.common.ancestor-method
#' @aliases lowest.common.ancestor,obitools.taxonomy
#'
setMethod("lowest.common.ancestor", "obitools.taxonomy",
function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
if (threshold != 1.0)
error=as.integer(floor(length(taxid) * (1-threshold)))
#
# Remove nod valid taxid
#
taxid = validate(taxonomy,taxid)
if (any(is.na(taxid)))
return(NA)