Commit 05efb38f by Eric Coissac

initial commit

parents
/man/
/vignettes/
/Read-and-delete-me
Package: ROBITaxonomy
Type: Package
Title: Metabarcoding data biodiversity analysis
Version: 0.1
Date: 2012-08-23
Author: LECA - Laboratoire d'ecologie alpine
Maintainer: LECA OBITools team <obitools@metabarcoding.org>
Description: More about what it does (maybe more than one line)
License: CeCILL v2.0
LazyLoad: yes
Roxygen: list(wrap = FALSE)
Collate:
'ROBITaxonomy.R'
'taxonomy.R'
'basic.R'
'default.R'
'distance.R'
'lca.R'
'rank.R'
RoxygenNote: 5.0.1
# Generated by roxygen2: do not edit by hand
export(default.taxonomy)
export(distance.taxonomy)
export(ecofind)
export(family)
export(genus)
export(is.obitools.taxonomy)
export(is.subcladeof)
export(kingdom)
export(length.obitools.taxonomy)
export(longest.path)
export(lowest.common.ancestor)
export(max.obitools.taxonomy)
export(parent)
export(path)
export(rank.list)
export(read.taxonomy)
export(scientificname)
export(species)
export(superkingdom)
export(taxid.list)
export(taxonatrank)
export(taxonomicrank)
export(validate)
exportClasses(obitools.taxonomy)
exportClasses(obitools.taxonomyOrNULL)
useDynLib(ROBITaxonomy)
This diff is collapsed. Click to expand it.
#' @include taxonomy.R
NULL
#
#
# Manage le loading of the default taxonomy
#
#
.__default__taxonomy__ = NULL
#' Returns the default taxonomy
#'
#' Returns a \code{\linkS4class{obitools.taxonomy}} instance corresponding
#' to a NCBI taxonomy included by default in the \pkg{\link{ROBITaxonomy}} package.
#'
#' @return a \code{\linkS4class{obitools.taxonomy}} instance.
#'
#' @examples
#'
#' # Load the default taxonomy
#' taxo = default.taxonomy()
#'
#' # and use it for requesting a scientific name
#' scientificname(taxo,7742)
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @export
#'
default.taxonomy = function() {
if (is.null(get(".__default__taxonomy__",envir = environment())))
assign(".__default__taxonomy__",
read.taxonomy(paste(system.file("extdata",
package="ROBITaxonomy"),
'ncbitaxo',
sep='/')),
envir=globalenv())
return(get(".__default__taxonomy__",envir = globalenv()))
}
#' @export
#'
is.obitools.taxonomy = function(taxonomy) {
class(t)[1] == "obitools.taxonomy"
}
#' @include taxonomy.R
NULL
#' @export
setGeneric("longest.path", function(taxonomy,taxid) {
return(standardGeneric("longest.path"))
})
#' Returns the longuest path from a taxon.
#'
#' The method \code{longest.path} returns the length of the
#' path linking a taxid to the farest leaf belonging this taxid.
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @param taxid an \code{integer} vector containing the list of taxids.
#'
#' @return an \code{integer} vector containing the list length.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # returns the longest path in the taxonomy (from the root node)
#' longest.path(taxo,1)
#'
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname longest.path-method
#' @aliases longest.path,obitools.taxonomy
#'
setMethod("longest.path", "obitools.taxonomy",
function(taxonomy,taxid) {
getp = function(t) {
if (is.na(t))
return(NA)
else
return(.Call('R_longest_path',
taxonomy,
t,
PACKAGE="ROBITaxonomy"))
}
taxid = as.integer(taxid)
sapply(taxid,getp)
})
#' @export
setGeneric("distance.taxonomy", function(taxonomy,taxid1,taxid2=NULL,name=F) {
return(standardGeneric("distance.taxonomy"))
})
#' Computes a distance matrix between taxids
#'
#' The method \code{taxonomy.distance} computes a distance matrix between a
#' set of taxids. The distance between two taxa is based on the topology of
#' the taxonomomy tree.
#'
#' \deqn{ d(Taxon_A,Taxon_B) = \frac{longest.path(lca(Taxon_A,Taxon_B))}{max(longest.path(Taxon_A),longest.path(Taxon_B))}}
#' { longest.path(lca(Taxon_A,Taxon_B)) / max(longest.path(Taxon_A),longest.path(Taxon_B)) }
#'
#'
#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
#'
#' @param taxid1 an \code{integer} vector containing a list of taxids.
#'
#' @param taxid2 an \code{integer} vector containing a list of taxids.
#' If \code{taxid2} is set to \code{NULL} (it's default value)
#' then the \code{taxid2} list is considered as equal to
#' \code{taxid1} list.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating
#' if the method return distance matrix annotated by taxids or
#' by scientific names.
#'
#' @return the distance matrix between taxids specified in the \code{taxid1}
#' set and the \code{taxid2} set.
#'
#' @examples
#' # loads the default taxonomy database
#' taxo=default.taxonomy()
#'
#' # build a vector of 6 taxids corresponding to species
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # computes the distance matrix between taxids
#' distance.taxonomy(taxo,sp.taxid)
#'
#' # Same thing but the matrix is annotated by scientific names
#' distance.taxonomy(taxo,sp.taxid,name=TRUE)
#'
#' @seealso \code{\link{longest.path}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname distance.taxonomy-method
#' @aliases taxonomy.distance,obitools.taxonomy
#'
setMethod("distance.taxonomy", "obitools.taxonomy",
function(taxonomy,taxid1,taxid2=NULL,name=F) {
taxdist = function(r)
{
t1=r[1]
t2=r[2]
if (is.na(t1) | is.na(t2))
return(NA)
p1 = path(taxonomy,t1)
p2 = path(taxonomy,t2)
minp = min(length(p1),length(p2))
common = sum(p1[1:minp] == p2[1:minp])
lca = p1[common]
lp = longest.path(taxonomy,lca)
return(lp/(lp+common))
}
multitaxdist=function(t1,t2) {
apply(data.frame(t1,t2),1,taxdist)
}
taxid1 = taxid1[! is.na(validate(taxonomy,taxid1))]
t1 = path(taxonomy,taxid1)
same = is.null(taxid2)
if (same)
{
ntaxon = length(taxid1)
t2 = t1[unlist(sapply(2:ntaxon,
function(x) x:ntaxon))]
t1 = t1[rep(1:(ntaxon-1),(ntaxon-1):1)]
}
else
{
taxid2 = taxid2[! is.na(validate(taxonomy,taxid2))]
t2 = path(taxonomy,taxid2)
nt1 = length(taxid1)
nt2 = length(taxid2)
t1 = t1[rep(1:nt1,nt2)]
t2 = t2[rep(1:nt2,rep(nt1,nt2))]
}
lmin = mapply(function(a,b) min(length(a),length(b)),
t1,
t2)
llca = mapply(function(x,y,l) sum(x[1:l]==y[1:l]),
t1,
t2,
lmin)
lb = longest.path(taxonomy,mapply(function(x,y) x[y],t1,llca))
d = as.double(lb / (lb + llca))
if (same) {
attr(d, "Size") <- ntaxon
if (name)
attr(d, "Labels") <- scientificname(taxonomy,taxid1)
else
attr(d, "Labels") <- as.character(taxid1)
attr(d, "Diag") <- FALSE
attr(d, "Upper") <- FALSE
attr(d, "method") <- NULL
attr(d, "call") <- match.call()
class(d) <- "dist"
}
else {
if (name)
d = matrix(d,nt1,nt2,
dimnames=list(scientificname(taxonomy,taxid1),
scientificname(taxonomy,taxid2)))
else
d = matrix(d,nt1,nt2,
dimnames=list(as.character(taxid1),
as.character(taxid2)))
}
return(d)
})
#' @include taxonomy.R
NULL
#' @export
setGeneric("lowest.common.ancestor", function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
return(standardGeneric("lowest.common.ancestor"))
})
#' Computes the lowest common ancestor in the taxonomy tree between a set of taxa
#'
#' The \code{lowest.common.ancestor} function in package \pkg{ROBITaxonomy} computes
#' the lowest common ancestor of a set of taxids. The lowest common ancestor (LCA)
#' is the most precise taxonomic group shared by all the considered taxa. Tha
#' \code{lowest.common.ancestor} function implemented in the \pkg{ROBITaxonomy}
#' package, considers a fuzzy definition of the LCA as the most precise
#' taxonomic group shared by a quorum of the considered taxa.
#'
#' @param taxonomy an instance of \code{\linkS4class{obitools.taxonomy}}
#' @param taxid an integer value or a vector of integer representing NCBI
#' taxonomic identifiers.
#' @param threshold a numeric value between 0.0 and 1.0 indicating the minimum
#' quorum of taxid that must belong the LCA.
#' @param error an integer value indicating the maximum count of taxids that
#' have not to belong the returned taxid. A \code{threshold} below 1.0 have
#' priority on the \code{error} parameter.
#' @param name A logical value \code{TRUE} or \code{FALSE} indicating if the
#' method return a \emph{taxid} or a scientific name.
#'
#' @return Depending on the value of the \code{name} argument, set by default
#' to \code{FALSE} the method returns :
#' \describe{
#' \item{If \code{name==FALSE}}{ the taxid of the taxon corresponding
#' to the LCA as an integer value}
#' \item{If \code{name==TRUE}}{ the scientific name of the taxon
#' corresponding to the LCA as a string}
#' }
#'
#' @examples
#' require(ROBITaxonomy)
#'
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITaxonomy"))}
#'
#' # read the taxonomy database
#'
#' taxo=read.taxonomy('ncbitaxo')
#'
#' # build a vector of 6 taxids corresponding to species
#'
#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
#'
#' # look for the lowest common ancestor taxids
#'
#' lowest.common.ancestor(taxo,sp.taxid)
#'
#' # same thing but returns results as a vector of scientific names
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE)
#'
#' # If we accept than 2 or 1 taxa do not belong the LCA
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=2)
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=1)
#'
#' # Partial LCA can also be speciefied as the minimal frequency of
#' # taxa belonging the LCA
#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,threshold=0.8)
#'
#' @seealso class \code{\linkS4class{obitools.taxonomy}},
#' and methods \code{\link{path}}, \code{\link{parent}},
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @docType methods
#' @rdname lowest.common.ancestor-method
#' @aliases lowest.common.ancestor,obitools.taxonomy
#'
setMethod("lowest.common.ancestor", "obitools.taxonomy",
function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
if (threshold != 1.0)
error=as.integer(floor(length(taxid) * (1-threshold)))
#
# Remove nod valid taxid
#
taxid = validate(taxonomy,taxid)
if (any(is.na(taxid)))
return(NA)
ntaxid=length(taxid)
nok = ntaxid - error
if (ntaxid==1)
return(taxid)
allpath = path(taxonomy,taxid)
minlength= min(vapply(allpath,length,0))
lca=NA
for (i in 1:minlength) {
n = vapply(allpath,function(x) x[i],0)
nt = table(n)
mt = max(nt)
if (mt >= nok) {
p = nt[nt==mt]
if (length(p)==1)
lca=as.integer(names(p)[1])
else
break
}
else
break
}
if (name)
return(scientificname(taxonomy,lca))
else
return(lca)
})
This diff is collapsed. Click to expand it.
#' @include ROBITaxonomy.R
#' @useDynLib ROBITaxonomy
NULL
#' Gives access to a taxonomy preformated by OBITools
#'
#' A S4 class describing a taxonomy. It allows access to
#' taxonomy formated for OBITools.
#'
#' @references \describe{
#' \item{NCBI Taxonomy : }{\url{http://www.ncbi.nlm.nih.gov/taxonomy}}
#' \item{OBITools : }{\url{http://metabarcoding/obitools/doc}}
#' }
#'
#' @seealso \code{\link{read.taxonomy}}
#'
#' @name obitools.taxonomy
#' @rdname obitools-taxonomy-class
#' @keywords taxonomy
#' @author Eric Coissac
#' @exportClass obitools.taxonomy
#'
setClass("obitools.taxonomy",
#
# Attribute declaration
#
# data.frame containing the counts of reads per samples
# 1 samples per line
# 1 sequence per column
representation(
# An external pointer structure to
# the C taxonomy structure
pointer = "externalptr",
# the name of the database on the hard disk
dbname = 'character',
# the working directory when the taxonomy
# object is created.
# This inforation combined with bname allows
# to reload taxonomy from disk
workingdir = 'character',
# Indicate if the taxonomy is saved in a file
# Taxonomy created in R or modified in R are
# not saved ==> This have to be take into
# consideration but how ???
saved = 'logical'
),
#
# Check object structure
#
validity = function(object) {
return(TRUE)
}
)
#' obitools.taxonomy constructor
#'
#' --> this constructor have not to be called directly
#' use the read.obitools.taxonomy function to
#' create a new instance of taxonomy
#'
#' @docType methods
#' @rdname initialize-methods-obitools.taxonomy
#' @aliases initialize-methods,obitools.taxonomy
setMethod("initialize",
"obitools.taxonomy",
function(.Object, pointer,dbname,workingdir,saved) {
.Object@pointer <- pointer
.Object@dbname <- dbname
.Object@workingdir <- workingdir
.Object@saved <- saved
validObject(.Object) ## valide l'objet
return(.Object)
})
#' @exportClass obitools.taxonomyOrNULL
setClassUnion("obitools.taxonomyOrNULL",c("obitools.taxonomy","NULL"))
#' @export
setGeneric("path", function(taxonomy,taxid,name=FALSE) {
return(standardGeneric("path"))
})
setMethod("path", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
getp = function(t) {
if (is.na(t))
return(NA)
else
{
path=c()
t=.Call('R_validate_taxid',
taxonomy,
as.integer(t),
PACKAGE="ROBITaxonomy")
if (is.na(t))
return(NA)
repeat {
if (name)
path = c(scientificname(taxonomy,t),path)
else
path = c(t,path)
t = .Call('R_get_parent',
taxonomy,
t,
FALSE,
PACKAGE="ROBITaxonomy")
if (is.na(t))
break
}
return(path)
}
}
taxid=as.integer(taxid)
name=as.logical(name)
p = lapply(taxid,getp)
d = dim(p)
if (!is.null(d))
if (d[2]==1)
p = as.vector(p)
return(p)
})
#' @export
setGeneric("is.subcladeof", function(taxonomy,taxid,parent) {
return(standardGeneric("is.subcladeof"))
})
setMethod("is.subcladeof", "obitools.taxonomy",function(taxonomy,taxid,parent) {
taxid = as.integer(taxid)
parent= as.integer(parent)
return(.Call('R_is_under_taxon',
taxonomy,
taxid,
parent,
PACKAGE="ROBITaxonomy"))
})
build.taxonomy = function(pointer,dbname,workingdir,saved) {
rd <- new('obitools.taxonomy',
pointer=pointer,
dbname=dbname,
workingdir=workingdir,
saved=saved
)
return(rd)
}
#' Reads a taxonomy
#'
#' \code{read.taxonomy} reads a taxonomy formated by OBITools.
#' NCBI taxonomy can be download from the NCBI FTP site in taxdump format.
#' The taxdump must be formated using the obitaxonomy command from OBITools
#' before being used in R. A OBITools formated taxonomy is composed of 3 files
#' with the same prefix name and suffixes .tdx, .rdx, .ndx, two extra files
#' suffixed .adx and .ldx can also be present.
#'
#' @param dbname A character string containing the file name of the database
#'
#' @return an instance of the class \code{\linkS4class{obitools.taxonomy}}
#'
#' @examples
#'
#' \dontshow{# switch the working directory to the data package directory}
#' \dontshow{setwd(system.file("extdata", package="ROBITaxonomy"))}
#'
#' # read the taxonomy ncbi
#' ncbi = read.taxonomy("ncbitaxo")
#'
#' # and use it for requesting a scientific name
#' scientificname(ncbi,7742)
#'
#' @seealso \code{\linkS4class{obitools.taxonomy}}
#'
#' @author Eric Coissac
#' @keywords taxonomy
#' @export
read.taxonomy = function(dbname) {
t <- .Call('R_read_taxonomy',dbname,TRUE,PACKAGE="ROBITaxonomy")
return(build.taxonomy(t,dbname,getwd(),TRUE))
}
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: knitr
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
#include "ecoPCR.h"
#include <stdio.h>
#include <stdlib.h>
/*
* print the message given as argument and exit the program
* @param error error number
* @param message the text explaining what's going on
* @param filename the file source where the program failed
* @param linenumber the line where it has failed
* filename and linenumber are written at pre-processing
* time by a macro
*/
void ecoError(int32_t errorcode,
const char* message,
const char * filename,
int linenumber)
{
error("Error %d in file %s line %d : %s",
errorcode,
filename,
linenumber,
message);
}
File added
#include "ecoPCR.h"
#include <stdio.h>
#include <stdlib.h>
#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
(((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
int32_t is_big_endian()
{
int32_t i=1;
return (int32_t)((char*)&i)[0];
}
int32_t swap_int32_t(int32_t i)
{
return SWAPINT32(i);
}
/**
* Read part of the file
* @param *f the database
* @param recordSize the size to be read
*
* @return buffer
*/
void *read_ecorecord(FILE *f,int32_t *recordSize)
{
static void *buffer =NULL;
int32_t buffersize=0;
int32_t read;
if (!recordSize)
ECOERROR(ECO_ASSERT_ERROR,
"recordSize cannot be NULL");
read = fread(recordSize,
1,
sizeof(int32_t),
f);