Commit 9e33d6a1 by Eric Coissac

Initial commit

parents
Package: ROBIBarcodes
Type: Package
Title: Metabarcoding barcode database
Version: 0.1
Date: 2013-10-24
Author: LECA - Laboratoire d'ecologie alpine
Maintainer: LECA OBITools team <obitools@metabarcoding.org>
Description: More about what it does (maybe more than one line)
License: CeCILL v2.0
LazyLoad: yes
RoxygenNote: 5.0.1
Collate:
'ROBIBarcodes.R'
'xmlMods.R'
'bibliography.R'
'ecopcr.R'
'emptydb.R'
'logo.R'
'mismatchplot.R'
'piexy.R'
'primer_table.R'
'primers.R'
'resolution.R'
'taxonomy.R'
'taxonomy_table.R'
'xmlspare.R'
# Generated by roxygen2: do not edit by hand
export(add.reference.barcodedb)
export(add.taxon.barcodedb)
export(addmodsnamespace)
export(bezier3)
export(bib2mods)
export(dna.shanon)
export(dnalogo)
export(dnalogoplot)
export(ecopcr.forward.frequencies)
export(ecopcr.forward.shanon)
export(ecopcr.reverse.frequencies)
export(ecopcr.reverse.shanon)
export(has.bibliography)
export(metabarcodedb)
export(metabarcodedb.taxonomy)
export(mismatchplot)
export(pie.xy)
export(plotDNAletter)
export(primers.data.frame)
export(read.ecopcr.result)
export(resolution)
export(taxonomy.data.frame)
export(whitepaper)
import(ROBITaxonomy)
import(XML)
useDynLib(ROBIBarcodes)
#' A package to manipulate DNA metabarcoding data.
#'
#' This package was written as a following of the OBITools.
#'
#' \tabular{ll}{
#' Package: \tab ROBIBarcodes\cr
#' Type: \tab Package\cr
#' Version: \tab 0.1\cr
#' Date: \tab 2013-06-27\cr
#' License: \tab CeCILL 2.0\cr
#' LazyLoad: \tab yes\cr
#'}
#'
#' @name ROBIBarcodes-package
#' @aliases ROBIBarcodes
#' @docType package
#' @title A package to manipulate DNA metabarcoding marker database.
#' @author Frederic Boyer
#' @author Aurelie Bonin
#' @author Lucie Zinger
#' @author Eric Coissac
#'
#' @references http://metabarcoding.org/obitools
#'
NULL
#'@include xmlMods.R
#'@import XML
#'
NULL
#' Tests if the metabarcode database has at least one bibliography reference
#'
#' @export
has.bibliography = function(barcodedb) {
length(getNodeSet(barcodedb,
path='/obi:obimetabarcodedb/obi:bibliography',
c(obi="http://metabarcoding.org/OBIMetabarcodes")))>0
}
#' @export
add.reference.barcodedb = function(barcodedb,bibfile,bibutils='bib2xml') {
if (! has.bibliography(barcodedb)) {
# We create the bibliography node
metabarcode = getNodeSet(barcodedb,
path='/obi:obimetabarcodedb',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
spare = sparexmltree()
bibliography = getNodeSet(spare,
path='/obi:obimetabarcodedb/obi:bibliography',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
bibliography = xmlClone(bibliography)
addChildren(metadata,bibliography,
at = NA)
}
bibliography=getNodeSet(barcodedb,
path='/obi:obimetabarcodedb/obi:bibliography',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))
ref = bib2mods(bibfile,bibutils)
hidden=addChildren(bibliography[[1]],kids=ref)
}
\ No newline at end of file
#'@include ROBIBarcodes.R
NULL
# column 1 : accession number
# column 2 : sequence length
# column 3 : taxonomic id
# column 4 : rank
# column 5 : species taxonomic id
# column 6 : scientific name
# column 7 : genus taxonomic id
# column 8 : genus name
# column 9 : family taxonomic id
# column 10 : family name
# column 11 : super kingdom taxonomic id
# column 12 : super kingdom name
# column 13 : strand (direct or reverse)
# column 14 : first oligonucleotide
# column 15 : number of errors for the first strand
# column 16 : Tm for hybridization of primer 1 at this site
# column 17 : second oligonucleotide
# column 18 : number of errors for the second strand
# column 19 : Tm for hybridization of primer 1 at this site
# column 20 : amplification length
# column 21 : sequence
# column 22 : definition
#' Read the result file produced by the ecoPCR program.
#'
#' @export
read.ecopcr.result = function(file)
{
split.line = function(line) {
l = strsplit(line,split=" +\\| +")[[1]]
l = c(l[1:21],paste(l[-c(1:21)],sep="|"))
return(l)
}
if (missing(file) && !missing(text)) {
file <- textConnection(text)
on.exit(close(file))
}
if (is.character(file)) {
file <- file(file, "rt")
on.exit(close(file))
}
if (!inherits(file, "connection"))
stop("'file' must be a character string or connection")
if (!isOpen(file, "rt")) {
open(file, "rt")
on.exit(close(file))
}
line = readLines(file,1)
while (length(grep('^#',line))==1) {
line = readLines(file,1)
}
pushBack(line,file)
lines = lapply(readLines(file),split.line)
nlines = length(lines)
AC = sapply(1:nlines,function(x) lines[[x]][1])
seq_length = as.integer(sapply(1:nlines,function(x) lines[[x]][2]))
taxid = as.integer(sapply(1:nlines,function(x) lines[[x]][3]))
rank = as.factor(sapply(1:nlines,function(x) lines[[x]][4]))
species = type.convert(sapply(1:nlines,function(x) lines[[x]][5]),na.string="###")
species_name = sapply(1:nlines,function(x) lines[[x]][6])
genus = type.convert(sapply(1:nlines,function(x) lines[[x]][7]),na.string="###")
genus_name = sapply(1:nlines,function(x) lines[[x]][8])
family = type.convert(sapply(1:nlines,function(x) lines[[x]][9]),na.string="###")
family_name = sapply(1:nlines,function(x) lines[[x]][10])
superkingdom = type.convert(sapply(1:nlines,function(x) lines[[x]][11]),na.string="###")
superkingdom_name = sapply(1:nlines,function(x) lines[[x]][12])
strand = as.factor(sapply(1:nlines,function(x) lines[[x]][13]))
forward_match = sapply(1:nlines,function(x) lines[[x]][14])
forward_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][15]))
forward_tm = as.double(sapply(1:nlines,function(x) lines[[x]][16]))
reverse_match = sapply(1:nlines,function(x) lines[[x]][17])
reverse_mismatch = as.integer(sapply(1:nlines,function(x) lines[[x]][18]))
reverse_tm = as.double(sapply(1:nlines,function(x) lines[[x]][19]))
amplicon_length = as.integer(sapply(1:nlines,function(x) lines[[x]][20]))
sequence = sapply(1:nlines,function(x) lines[[x]][21])
definition = sapply(1:nlines,function(x) lines[[x]][22])
eco = data.frame(AC,seq_length,taxid,rank,
species,species_name,
genus,genus_name,
family,family_name,
superkingdom,superkingdom_name,
strand,
forward_match,forward_mismatch,forward_tm,
reverse_match,reverse_mismatch,reverse_tm,
amplicon_length,sequence,definition
)
return(eco)
}
ecopcr.frequencies = function(matches,group=NULL) {
compute = function(matches) {
w = as.matrix(do.call(rbind,strsplit(as.character(matches),'')))
d = dim(w)
w=factor(w,levels=c('A','C','G','T'))
dim(w)=d
w=t(w)
freq = mapply(function(x) table(w[x,]),1:d[2])
freq = freq[c('A','C','G','T'),]
csum = colSums(freq)
freq = sweep(freq,2,csum,'/')
attr(freq,'count')=length(w)
return(freq)
}
if (is.null(group))
return(compute(matches))
else {
lmatches = aggregate(matches,by=list(group=as.factor(group)),as.character)
w = lmatches$x
names(w)=lmatches$group
lf = lapply(w,compute)
return(lf)
}
}
#' @export
ecopcr.forward.frequencies = function(ecopcr,group=NULL) {
return(ecopcr.frequencies(ecopcr$forward_match,group))
}
#' @export
ecopcr.reverse.frequencies = function(ecopcr,group=NULL) {
return(ecopcr.frequencies(ecopcr$reverse_match,group))
}
#' @export
dna.shanon = function(freq,base=2) {
shanon = log(4)/log(base) - colSums(-freq *log(freq) / log(base),na.rm=TRUE)
return(sweep(freq,2,shanon,'*'))
}
ecopcr.shanon = function(matches,group=NULL,base=2) {
if (is.null(group)) {
freq = ecopcr.frequencies(matches)
return(dna.shanon(freq))
}
else {
lf = lapply(ecopcr.frequencies(matches,group),dna.shanon)
return(lf)
}
}
#' @export
ecopcr.forward.shanon = function(ecopcr,group=NULL,base=2) {
return(ecopcr.shanon(ecopcr$forward_match,group,base))
}
#' @export
ecopcr.reverse.shanon = function(ecopcr,group=NULL,base=2) {
return(ecopcr.shanon(ecopcr$reverse_match,group,base))
}
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
#' Creates a new empty metabarcode database.
#'
#' @export
metabarcodedb = function() {
emptyfile = paste(system.file("extdata",
package="ROBIBarcodes"),
'empty.xml',
sep='/')
empty = xmlParseDoc(emptyfile)
return(empty)
}
\ No newline at end of file
This diff is collapsed. Click to expand it.
#'@include ROBIBarcodes.R
#'@include logo.R
NULL
#' Draw a scatter plot of the reverse mismatches as a function of forward mismatches.
#'
#' The \code{mismatchplot} function draws a scatter plot of the number of mismatches
#' observed in an ecoPCR result for the reverse primer as a function of the mismatches
#' for the reverse primer. Each point for a pair (forward_mismatch,reverse_mismatch) is
#' drawn as a circle having a surface proportional to the aboundance of this pair in the
#' results. If a grouping factor is specified, then the circle is replaced by a pie chart.
#'
#' @param ecopcr an ecoPCR result data.frame as returned by the \code{\link{read.ecopcr.result}}
#' function.
#'
#' @param group a factor decribing classes amongst the amplicon described in the ecoPCR
#' result
#'
#' @param col a vector describing the colored used for the bubble or the pie charts
#'
#' @param legend a character vector describing the legend for each modality of the
#' grouping factor. By default the factor levels are used for the legend
#'
#' @param legend.cex the expension factor for the legend text
#'
#' @param inset the distance to the margin of the legend box (see the \code{\link{legend}}
#' documentation)
#'
#' @examples
#'
#' # Load the ROBITools library
#' library(ROBITools)
#'
#' # Load the default taxonomy
#' taxo = default.taxonomy()
#'
#' # Load the sample ecoPCR data file
#' data(GH.ecopcr)
#'
#' # Computes classes associated to each taxid
#' orders = as.factor(taxonatrank(taxo,GH.ecopcr$taxid,'order',name=T))
#'
#' # Plot the graph
#' mismatchplot(GH.ecopcr,group=orders)
#'
#' @seealso \code{\link{read.ecopcr.result}}
#' @author Eric Coissac
#' @export
mismatchplot = function(ecopcr,group=NULL,
col=NULL,legend=NULL,
legend.cex=0.7,inset=c(0.02,0.02)) {
maxforward_error = max(ecopcr$forward_mismatch)
maxreverse_error = max(ecopcr$reverse_mismatch)
maxerror=max(maxforward_error,maxreverse_error)
if (is.null(group))
group=factor(rep("all",dim(ecopcr)[1]))
else
group=as.factor(group)
if (is.null(legend))
legend = levels(group)
actualheight= maxerror + 1
actualwidth = maxerror + 1
if (length(levels(group)) > 1)
actualwidth = actualwidth + 2
whitepaper(actualwidth,actualheight,xmin=-0.5,ymin=-0.5,asp=1)
axis(1,at=0:maxerror,
labels=0:maxerror)
axis(2,at=0:maxerror,
labels=0:maxerror)
data = aggregate(group,by=list(forward=ecopcr$forward_mismatch,
reverse=ecopcr$reverse_mismatch),
table)
data <- data[rowSums(data[,c(-1,-2),drop=FALSE])>0, , drop=FALSE]
if (is.null(col))
col <- c("white", "lightblue", "mistyrose", "lightcyan",
"lavender", "cornsilk")
value=data[,c(-1,-2),drop=FALSE]
x = as.integer(data[,1])
y = as.integer(data[,2])
diam = sqrt(rowSums(value))
radius = diam / max(diam) / 2
hide = mapply(pie.xy,x,y,
data=lapply(1:(dim(value)[1]),function(y) value[y,]),
radius=radius,
label="",MoreArgs=list(col=col))
if (length(levels(group)) > 1)
legend('topright',legend=legend,fill=col, cex=legend.cex, inset=inset)
}
\ No newline at end of file
#'@include ROBIBarcodes.R
NULL
#' @export
pie.xy = function (x,y,data, labels = names(x), edges = 200, radius = 0.8, clockwise = FALSE,
init.angle = if (clockwise) 90 else 0, density = NULL, angle = 45,
col = NULL, border = NULL, lty = NULL, ...)
{
if (!is.numeric(data) || any(is.na(data) | data < 0))
stop("'data' values must be positive.")
if (is.null(labels))
labels <- as.character(seq_along(data))
else labels <- as.graphicsAnnot(labels)
data <- c(0, cumsum(data)/sum(data))
dx <- diff(data)
nx <- length(dx)
# plot.new()
# pin <- par("pin")
xlim <- ylim <- c(-1, 1)
# if (pin[1L] > pin[2L])
# xlim <- (pin[1L]/pin[2L]) * xlim
# else ylim <- (pin[2L]/pin[1L]) * ylim
# dev.hold()
# on.exit(dev.flush())
# plot.window(xlim, ylim, "", asp = 1)
if (is.null(col))
col <- if (is.null(density))
c("white", "lightblue", "mistyrose", "lightcyan",
"lavender", "cornsilk")
else par("fg")
if (!is.null(col))
col <- rep_len(col, nx)
if (!is.null(border))
border <- rep_len(border, nx)
if (!is.null(lty))
lty <- rep_len(lty, nx)
angle <- rep(angle, nx)
if (!is.null(density))
density <- rep_len(density, nx)
twopi <- if (clockwise)
-2 * pi
else 2 * pi
t2xy <- function(t) {
t2p <- twopi * t + init.angle * pi/180
list(x = radius * cos(t2p) , y = radius * sin(t2p))
}
for (i in 1L:nx) {
n <- max(2, floor(edges * dx[i]))
P <- t2xy(seq.int(data[i], data[i + 1], length.out = n))
if (nx>1)
polygon(c(P$x + x, x), c(P$y + y , y),
density = density[i], angle = angle[i],
border = border[i], col = col[i], lty = lty[i])
else
polygon(P$x + x, P$y + y,
density = density[i], angle = angle[i],
border = border[i], col = col[i], lty = lty[i])
P <- t2xy(mean(data[i + 0:1]))
lab <- as.character(labels[i])
if (!is.na(lab) && nzchar(lab)) {
lines(c(1 , 1.05) * P$x + x, c(1, 1.05) * P$y + y)
text(1.1 * P$x + x , 1.1 * P$y + y, labels[i], xpd = TRUE,
adj = ifelse(P$x < 0, 1, 0), ...)
}
}
# title(main = main, ...)
invisible(NULL)
}
\ No newline at end of file
#Commentaires lus par roxygen
#'@include ROBIBarcodes.R
#'@import XML
#'
NULL
#NULL termine le commentaire pour roxygen
extractPrimers <-function(primer){
id=xmlAttrs(primer)["ID"]
name=xmlValue(xmlChildren(xmlChildren(primer)$name)$text)
sequence=xmlValue(xmlChildren(xmlChildren(primer)$sequence)$text)
coding=as.logical(xmlValue(xmlChildren(xmlChildren(primer)$coding)$text))
p=list(id=id, name=name, sequence=sequence, coding=coding)
return(p)
}
#Export pour rendre publique la fonction
#' Builds primer data frame from metabarcodedb
#'
#' The \code{primers.data.frame} function extracts all the primer information
#' from the \code{metabarcodedb} database.
#'
#' @param barcodedb a xml document containing a metabarcodedb.
#'
#' @return a \code{data.frame} describing primers.
#'
#' @examples
#' # load the XML library
#' library(XML)
#'
#' # load the example metabarcodedb database
#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes"))
#'
#' # extracts the primer table
#' primers.data.frame(db)
#'
#' @author Aurelie Bonin
#' @keywords metabarcodes
#'
#' @export
primers.data.frame <-function(barcodedb){
p=getNodeSet(db,
path="/obi:obimetabarcodedb/obi:primers/obi:primer" ,
namespaces=c(obi="http://metabarcoding.org/OBIMetabarcodes"))
primerTable=as.data.frame(do.call(rbind,lapply(p,extractPrimers)))
rownames(primerTable)=primerTable$id
primerTable=primerTable[,-1]
return(primerTable)
}
#'@include xmlMods.R
#'@import XML
#'
NULL
add.primer.barcodedb = function(barcodedb,
name,
sequence,
coding,
documentation) {
' <primer ID="PR.XXX">
<name>Xxx</name>
<sequence>CGATCGATGCTAGCTAGCTGAT</sequence>
<coding>false</coding>
</primer>'
}
\ No newline at end of file
#'@import ROBITaxonomy
#'@include ROBIBarcodes.R
NULL
#'@export
resolution = function(taxonomy,ecopcr) {
l = aggregate(ecopcr$taxid,
by=list(barcode=ecopcr$sequence),
function(x) lowest.common.ancestor(taxo,x))
r = taxonomicrank(taxo,l$x)
names(r)=as.character(l$barcode)
return(r[as.character(ecopcr$sequence)])
}
\ No newline at end of file
#'@include xmlMods.R
#'@import XML
#'@import ROBITaxonomy
#'@include ROBIBarcodes.R
NULL
taxon.data.frame = function(taxonomy,taxids,strict=TRUE,known.taxid=c()) {
taxids = as.integer(sub("TX.","",as.character(taxids)))
good.taxid = validate(taxonomy,taxids)
if (strict & any(is.na(good.taxid)))
stop(sprintf("The following taxids are absent from the taxonomy : %s",
toString(taxids[is.na(good.taxid)])))
good.taxid = good.taxid[! is.na(good.taxid)]
all.path = path(taxonomy,good.taxid)
all.taxid = Reduce(union,all.path)
all.taxid = sort(union(all.taxid,known.taxid))[-1]
all.parent = sprintf("TX.%d",parent(taxonomy,all.taxid))
all.rank = taxonomicrank(taxonomy,all.taxid)
all.scientificname = scientificname(taxonomy,all.taxid)
all.id = sprintf("TX.%d",all.taxid)
rep = data.frame(taxid=all.id,
name=all.scientificname,
rank=all.rank,
partof=all.parent,
stringsAsFactors=FALSE)
return(rep)
}
build.taxon.node = function(taxid,name,rank,partof) {
nodes = lapply(sprintf('\n<taxon ID="%s"><name>%s</name><rank>%s</rank><partof>%s</partof></taxon>',
taxid,
name,
rank,
partof),
xmlParseString)
return(nodes)
}
#'@export
add.taxon.barcodedb = function(barcodedb,
taxonomy,
taxids) {
taxonomy.node = getNodeSet(barcodedb,
path='/obi:obimetabarcodedb/obi:taxonomy',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
known.taxid = as.character(
getNodeSet(
taxonomy.node,
path="./obi:taxon/@ID",
c(obi="http://metabarcoding.org/OBIMetabarcodes")))
known.taxid = as.integer(sub("TX.","",known.taxid))
taxon = taxon.data.frame(taxonomy,taxids,strict=TRUE,known.taxid)
taxon.nodes = c(xmlChildren(taxonomy.node)$root,
build.taxon.node(taxon$taxid,
taxon$name,
taxon$rank,
taxon$partof))
spare = sparexmltree()
new.taxonomy.node = getNodeSet(spare,
path='/obi:obimetabarcodedb/obi:taxonomy',
c(obi="http://metabarcoding.org/OBIMetabarcodes"))[[1]]
replaceNodes(taxonomy.node,new.taxonomy.node)
hidden = addChildren(new.taxonomy.node,kids=taxon.nodes,append=FALSE)
}
#' @include ROBIBarcodes.R
#' @import ROBITaxonomy
#' @import XML
#' @useDynLib ROBIBarcodes
#'
NULL
extractTaxa <-function(taxon){
id=xmlAttrs(taxon)["ID"]
name=xmlValue(xmlChildren(xmlChildren(taxon)$name)$text)
rank=xmlValue(xmlChildren(xmlChildren(taxon)$rank)$text)
partof=xmlValue(xmlChildren(xmlChildren(taxon)$partof)$text)
p=list(id=id, name=name, rank=rank, partof=partof)
return(p)
}
#' Builds taxa data frame from metabarcodedb
#'
#' The \code{taxonomy.data.frame} function extracts all the taxon information
#' from the \code{metabarcodedb} database.
#'
#' @param barcodedb a xml document containing a metabarcodedb.
#'
#' @return a \code{data.frame} describing taxa.
#'
#' @examples
#' # load the XML library
#' library(XML)
#'
#' # load the example metabarcodedb database
#' db = xmlParseDoc(system.file("extdata/barcodedb.xml", package="ROBIBarcodes"))
#'
#' # extracts the taxonomy table
#' taxonomy.data.frame(db)
#'
#' @author Eric Coissac
#' @keywords metabarcodes
#'
#' @export