Commit 11f8457c by Eric Coissac

Writing of the documentation

parent b55a343b
......@@ -30,7 +30,17 @@ registerDoParallel(1)
#'
.Trace <- function(X) sum(diag(X))
#' Convert a covariance matrix to a correlation matric
#'
#' @param c a square covariance/variance matrix
#' @return correlation matrix corresponding to \code{c}
#'
#' @note Internal function do not use.
#'
#' @rdname internal.var2cor
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#'
.var2cor <- function(c) {
v <- sqrt(diag(c))
vv <- v %o% v
......@@ -159,11 +169,6 @@ varls <- function(...,
xs <- ortho(xs)
# xxs <- as.procmod.frame(mapply(tcrossprod,
# xs,
# SIMPLIFY = FALSE
# ))
nx <- length(xs)
n <- nrow(xs)
n1 <- n - 1
......@@ -173,7 +178,6 @@ varls <- function(...,
# Computes the standard Covls covariance matrix
for (i in seq_len(nx))
for (j in i:nx)
#cov_xxs[i, j] <- Re(.Trace(expm::sqrtm(xxs[[i]] %*% xxs[[j]])))
cov_xxs[i, j] <- sum(svd(crossprod(xs[[i]],xs[[j]]))$d) / n1
cov_xxs <- as.matrix(Matrix::forceSymmetric(cov_xxs, uplo = "U"))
......@@ -186,27 +190,6 @@ varls <- function(...,
for (i in seq_len(nx))
v_xs[[i]] <- var(xs[[i]])
# s_cov_xxs <- array(0, dim = c(nx, nx, nrand))
# for(k in seq_len(nrand)) {
# #s1_cov_xxs <- matrix(0, nrow = nx, ncol = nx)
# r_xs <- vector(mode = "list", nx)
# r_ys <- vector(mode = "list", nx)
# for (i in seq_len(nx)) {
# r_xs[[i]] <- MASS::mvrnorm(n,
# rep(0,nrow(v_xs[[i]])),
# Sigma = v_xs[[i]],
# empirical = TRUE)
# r_ys[[i]] <- MASS::mvrnorm(n,
# rep(0,nrow(v_xs[[i]])),
# Sigma = v_xs[[i]],
# empirical = TRUE)
# }
#
# for (i in seq_len(nx))
# for (j in i:nx)
# s_cov_xxs[i, j, k] <- sum(svd(crossprod(r_xs[[i]],r_ys[[j]]))$d)
# }
if (! getDoParRegistered()) registerDoParallel(1)
s_cov_xxs <- foreach(k = seq_len(nrand),
......@@ -263,7 +246,6 @@ varls <- function(...,
colnames(p_values) <- x_names
rownames(p_values) <- x_names
#attr(cov_xxs, "scovls") <- s_cov_xxs
attr(cov_xxs, "rcovls") <- as.matrix(r_cov_xxs)
attr(cov_xxs, "p.value") <- p_values
attr(cov_xxs, "nrand") <- nrand
......
......@@ -45,7 +45,7 @@ as.data.frame.dist <- function(x, row.names = NULL, optional = FALSE, ...) {
#' an eucleadean space using the Kruskal's Non-metric
#' Multidimensional Scaling. This function is mainly a simplified
#' interface on the \code{\link[MASS]{isoMDS}} function using as
#' much as possible dimensionsto limit the stress. The aims of this
#' much as possible dimensions to limit the stress. The aims of this
#' NDMS being only to project point in an orthogonal space therefore
#' without any correlation between axis. Because a non-metric method
#' is used no condition is required on the used distance.
......@@ -58,6 +58,10 @@ as.data.frame.dist <- function(x, row.names = NULL, optional = FALSE, ...) {
#' @param tol convergence tolerance.
#' @param p Power for Minkowski distance in the configuration space.
#'
#' @return a numeric matrix with at most \code{n-1} dimensions, with
#' \code{n} the number pf observations. This matrix defines the
#' coordinates of each point in the orthogonal space.
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
......@@ -105,6 +109,14 @@ nmds <- function(distances,
#' is used the used distance must be euclidean
#' (cf \code{\link[ProcMod]{is.euclid}}).
#'
#' @param distances a \code{\link[stats]{dist}} object or a
#' \code{\link[base]{matrix}}
#' object representing a distance matrix.
#'
#' @return a numeric matrix with at most \code{n-1} dimensions, with
#' \code{n} the number pf observations. This matrix defines the
#' coordinates of each point in the orthogonal space.
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
......@@ -125,6 +137,23 @@ pcoa <- function(distances) {
#' Project a set of points in a euclidean space (PCA).
#'
#' Project a set of points defined by a set of numeric variables in
#' an eucleadean space using the pricipal componant analysis.
#' This function is mainly a simplified
#' interface on the \code{\link[stats]{prcomp}} function using as
#' much as possible dimensions to keep all the variation. The aims of this
#' PCA being only to project point in an orthogonal space therefore
#' without any correlation between axis. Data are centered by not scaled by
#' default.
#'
#' @param data a numeric matrix describing the points
#' @param scale a \code{logical} value indicating if the dimensions must be scaled
#' to force for every column that \code{sd=1}. \code{FALSE} by default.
#'
#' @return a numeric matrix with at most \code{n-1} dimensions, with
#' \code{n} the number pf observations. This matrix defines the
#' coordinates of each point in the orthogonal space.
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
......@@ -150,6 +179,8 @@ pca <- function(data, scale = FALSE) {
#' Inspired from the algorithm described in stackoverflow
#' \url{https://stackoverflow.com/questions/43639063/double-centering-in-r}
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
bicenter <- function(m) {
......@@ -167,6 +198,11 @@ bicenter <- function(m) {
#' Actually a simplified version of the ADE4 implementation
#' (\code{\link[ade4]{is.euclid}}).
#'
#' @param distances an object of class 'dist'
#' @param tol a tolerance threshold : an eigenvalue is
#' considered positive if it is larger than
#' -tol*lambda1 where lambda1 is the largest eigenvalue.
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
......@@ -204,6 +240,17 @@ is.orthogonal <- function(x) {
#' When points are described by a set of variable the
#' \code{\link[ProcMod]{nmds}} is used.
#'
#' @param data a numeric matrix describing the points
#' @param tol a tolerance threshold : an eigenvalue is
#' considered positive if it is larger than
#' -tol*lambda1 where lambda1 is the largest eigenvalue.
#' @param scale a \code{logical} value indicating if the dimensions must be scaled
#' to force for every column that \code{sd=1}. \code{FALSE} by default.
#'
#' @return a numeric matrix with at most \code{n-1} dimensions, with
#' \code{n} the number pf observations. This matrix defines the
#' coordinates of each point in the orthogonal space.
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
......@@ -211,8 +258,7 @@ ortho <- function(data, ...) {
UseMethod("ortho", data)
}
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @rdname ortho
#' @export
ortho.dist <- function(x, tol = 1e-7) {
if (ProcMod::is.euclid(x, tol = tol)) {
......@@ -222,15 +268,13 @@ ortho.dist <- function(x, tol = 1e-7) {
ProcMod::nmds(x)
}
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @rdname ortho
#' @export
ortho.matrix <- function(x, scale = FALSE) {
pca(x, scale = scale)
}
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @rdname ortho
#' @export
ortho.data.frame <- function(x, scale = FALSE) {
if (!is.null(attributes(x)$is.dist) &&
......@@ -242,8 +286,7 @@ ortho.data.frame <- function(x, scale = FALSE) {
}
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @rdname ortho
#' @export
ortho.procmod.frame <- function(x) {
if (is.orthogonal(x)) {
......
......@@ -14,6 +14,7 @@ NULL
#'
#' @return a new matrix with the same number of columns but with `nrow`
#' rows.
#' @note Internal function do not use.
#'
#' @author Eric Coissac <eric.coissac@metabarcoding.org>
#' @author Christelle Gonindard-Melodelima <christelle.gonindard@metabarcoding.org>
......@@ -49,6 +50,7 @@ NULL
#' of the returned matrix
#'
#' @return a new numeric matrix with correct size.
#' @note Internal function do not use.
#'
#' @author Eric Coissac <eric.coissac@metabarcoding.org>
#' @author Christelle Gonindard-Melodelima <christelle.gonindard@metabarcoding.org>
......@@ -164,6 +166,37 @@ NULL
#' The procmod.frame data structure.
#'
#' A \code{procmod.frame} can be considered as the analog of a
#' \code{data.frame} for vector data. In a \code{procmod.frame}
#' each element, equivalent to a column in a \code{data.frame}
#' is a numeric matrix or a distance matrix object (\code{dist}).
#' Every element must describe the same number of individuals.
#' Therefore every numeric matrix must have the same number of row
#' (\code{nrow}) and every distance matrix must have the same size
#' (\code{attr(d,"Size")}). A \code{procmod.frame} can simultaneously
#' contain both types of data, numeric and distance matrix.
#'
#'
#' @examples
#' library(vegan)
#' data(bacteria)
#' data(eukaryotes)
#' data(soil)
#'
#' dataset <- procmod.frame(euk = vegdist(decostand(eukaryotes,
#' method = "hellinger"),
#' method = "euclidean"),
#' bac = vegdist(decostand(bacteria,
#' method = "hellinger"),
#' method = "euclidean"),
#' soil = scale(soil,
#' center = TRUE,
#' scale = TRUE))
#' length(dataset)
#' nrow(dataset)
#' ncol(dataset)
#' dataset$euk
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
......
......@@ -3,10 +3,29 @@ NULL
#' Simulate n points of dimension p.
#'
#' Points are simulated using the \code{\link[mvtnorm]{rmvnorm}} from
#' the \code{mvtnorm}. The mean of each dimension is set to 0 and all
#' the variances to 1 with all the covariances set to 0.
#' Points are simulated by drawing values of each dimension from a normal
#' distribution of mean 0 and standard deviation equals to 1.
#' The mean of each dimension is forced to 0 (data are centred).
#' By default variable are also scaled to enforce a strandard deviation
#' strictly equal to 1. Covariances between dimensions are not controled.
#' Therefore they are expected to be equal to 0 and reflect only the
#' random distribution of the covariance between two random vectors.
#'
#' @param n an \code{int} value indicating the number of observations.
#' @param p an \code{int} value indicating the number of dimensions (variables)
#' simulated
#' @param equal.var a \code{logical} value indicating if the dimensions must be scaled
#' to force \code{sd=1}. \code{TRUE} by default.
#'
#' @return a numeric matrix of \code{n} rows and \code{p} columns
#'
#' @examples
#' sim1 <- simulate_matrix(25,10)
#' class(sim1)
#' dim(sim1)
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
simulate_matrix <- function(n, p, equal.var = TRUE) {
new <- rnorm(n * p, mean = 0, sd = 1)
......@@ -24,6 +43,37 @@ simulate_matrix <- function(n, p, equal.var = TRUE) {
#' Simulate n points of dimension p correlated with a reference matrix.
#'
#' Simulates a set of point correlated to another set according to the
#' procrustean correlation definition.
#' Points are simulated by drawing values of each dimension from a normal
#' distribution of mean 0 and standard deviation equals to 1.
#' The mean of each dimension is forced to 0 (data are centred).
#' By default variable are also scaled to enforce a strandard deviation
#' strictly equal to 1. Covariances between dimensions are not controled.
#' Therefore they are expected to be equal to 0 and reflect only the
#' random distribution of the covariance between two random vectors.
#' The intensity of the correlation is determined by the \code{r2}
#' parameter.
#'
#' @param reference a numeric matrix to which the simulated data will be correlated
#' @param p an \code{int} value indicating the number of dimensions (variables)
#' simulated
#' @param r2 the fraction of variation shared between the \code{reference} and the
#' simulated data
#' @param equal.var a \code{logical} value indicating if the dimensions must be scaled
#' to force \code{sd=1}. \code{TRUE} by default.
#'
#' @return a numeric matrix of \code{nrow(reference)} rows and \code{p} columns
#'
#' @examples
#' sim1 <- simulate_matrix(25,10)
#' class(sim1)
#' dim(sim1)
#' sim2 <- simulate_correlation(sim1,20,0.8)
#' corls(sim1, sim2)^2
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
simulate_correlation <- function(reference, p, r2, equal.var = TRUE) {
......
require(vegan)
bacteria <- read.csv("data-raw/bacteria.csv",
header = TRUE,
row.names = 1,
......@@ -28,8 +30,53 @@ geography <- read.csv("data-raw/geography.csv",
na.strings = "NA"
)
devtools::use_data(bacteria, overwrite = TRUE)
devtools::use_data(eukaryotes, overwrite = TRUE)
devtools::use_data(climat, overwrite = TRUE)
devtools::use_data(soil, overwrite = TRUE)
devtools::use_data(geography, overwrite = TRUE)
usethis::use_data(bacteria, overwrite = TRUE)
usethis::use_data(eukaryotes, overwrite = TRUE)
usethis::use_data(climat, overwrite = TRUE)
usethis::use_data(soil, overwrite = TRUE)
usethis::use_data(geography, overwrite = TRUE)
#
# MicroCurvulatum datasets
#
mc.bacteria <- read.table("data-raw/MicroCurvulatum/rarefied_bacterial_community_matrix.txt",
row.names = 1,
header = 1,
na.strings = "NA"
)
mc.bacteria = as.matrix(mc.bacteria)
mc.fungi <- read.table("data-raw/MicroCurvulatum/rarefied_fungal_community_matrix.txt",
row.names = 1,
header = 1,
na.strings = "NA"
)
mc.fungi <- as.matrix(mc.fungi[row.names(mc.bacteria),])
mc.plants <- read.table("data-raw/MicroCurvulatum/Phytosocio.txt",
row.names = 1,
header = 1,
na.strings = "NA"
)
mc.plants <- as.matrix(mc.plants[substr(row.names(mc.bacteria),
1,
nchar(row.names(mc.bacteria))-1),])
mc.soil <- read.table("data-raw/MicroCurvulatum/SOIL_CHEM.txt",
row.names = 1,
header = 1,
na.strings = "NA"
)
mc.soil <- as.matrix(mc.soil[row.names(mc.bacteria),5:10])
MicroCurvulatum = ProcMod::procmod.frame(bacteria = mc.bacteria,
fungi = mc.fungi,
plants = mc.plants,
soil = mc.soil)
usethis::use_data(MicroCurvulatum, overwrite = TRUE)
......@@ -13,3 +13,8 @@ colSums and rowSums of the returned matrix are all equal to zero.
Inspired from the algorithm described in stackoverflow
\url{https://stackoverflow.com/questions/43639063/double-centering-in-r}
}
\author{
Eric Coissac
Christelle Gonindard-Melodelima
}
......@@ -19,6 +19,9 @@ a new numeric matrix with correct size.
Transforme the \code{x} value into a \code{numeric matrix} of
the correct size or into a \code{dist} object.
}
\note{
Internal function do not use.
}
\author{
Eric Coissac <eric.coissac@metabarcoding.org>
......
......@@ -22,6 +22,9 @@ repeats several times the rows of a matrix
final row count must be a multiple of the
initial row count
}
\note{
Internal function do not use.
}
\author{
Eric Coissac <eric.coissac@metabarcoding.org>
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/covls.R
\name{.var2cor}
\alias{.var2cor}
\title{Convert a covariance matrix to a correlation matric}
\usage{
.var2cor(c)
}
\arguments{
\item{c}{a square covariance/variance matrix}
}
\value{
correlation matrix corresponding to \code{c}
}
\description{
Convert a covariance matrix to a correlation matric
}
\note{
Internal function do not use.
}
\author{
Eric Coissac
Christelle Gonindard-Melodelima
}
......@@ -6,6 +6,13 @@
\usage{
is.euclid(distances, tol = 1e-07)
}
\arguments{
\item{distances}{an object of class 'dist'}
\item{tol}{a tolerance threshold : an eigenvalue is
considered positive if it is larger than
-tol*lambda1 where lambda1 is the largest eigenvalue.}
}
\description{
Actually a simplified version of the ADE4 implementation
(\code{\link[ade4]{is.euclid}}).
......
......@@ -19,12 +19,17 @@ object representing a distance matrix.}
\item{p}{Power for Minkowski distance in the configuration space.}
}
\value{
a numeric matrix with at most \code{n-1} dimensions, with
\code{n} the number pf observations. This matrix defines the
coordinates of each point in the orthogonal space.
}
\description{
Project a set of points defined by a distance matrix in
an eucleadean space using the Kruskal's Non-metric
Multidimensional Scaling. This function is mainly a simplified
interface on the \code{\link[MASS]{isoMDS}} function using as
much as possible dimensionsto limit the stress. The aims of this
much as possible dimensions to limit the stress. The aims of this
NDMS being only to project point in an orthogonal space therefore
without any correlation between axis. Because a non-metric method
is used no condition is required on the used distance.
......
......@@ -2,9 +2,36 @@
% Please edit documentation in R/multivariate.R
\name{ortho}
\alias{ortho}
\alias{ortho.dist}
\alias{ortho.matrix}
\alias{ortho.data.frame}
\alias{ortho.procmod.frame}
\title{Project a dataset in a euclidean space.}
\usage{
ortho(data, ...)
\method{ortho}{dist}(x, tol = 1e-07)
\method{ortho}{matrix}(x, scale = FALSE)
\method{ortho}{data.frame}(x, scale = FALSE)
\method{ortho}{procmod.frame}(x)
}
\arguments{
\item{data}{a numeric matrix describing the points}
\item{tol}{a tolerance threshold : an eigenvalue is
considered positive if it is larger than
-tol*lambda1 where lambda1 is the largest eigenvalue.}
\item{scale}{a \code{logical} value indicating if the dimensions must be scaled
to force for every column that \code{sd=1}. \code{FALSE} by default.}
}
\value{
a numeric matrix with at most \code{n-1} dimensions, with
\code{n} the number pf observations. This matrix defines the
coordinates of each point in the orthogonal space.
}
\description{
Project a set of points defined by a distance matrix
......
......@@ -6,8 +6,26 @@
\usage{
pca(data, scale = FALSE)
}
\arguments{
\item{data}{a numeric matrix describing the points}
\item{scale}{a \code{logical} value indicating if the dimensions must be scaled
to force for every column that \code{sd=1}. \code{FALSE} by default.}
}
\value{
a numeric matrix with at most \code{n-1} dimensions, with
\code{n} the number pf observations. This matrix defines the
coordinates of each point in the orthogonal space.
}
\description{
Project a set of points in a euclidean space (PCA).
Project a set of points defined by a set of numeric variables in
an eucleadean space using the pricipal componant analysis.
This function is mainly a simplified
interface on the \code{\link[stats]{prcomp}} function using as
much as possible dimensions to keep all the variation. The aims of this
PCA being only to project point in an orthogonal space therefore
without any correlation between axis. Data are centered by not scaled by
default.
}
\author{
Eric Coissac
......
......@@ -6,6 +6,16 @@
\usage{
pcoa(distances)
}
\arguments{
\item{distances}{a \code{\link[stats]{dist}} object or a
\code{\link[base]{matrix}}
object representing a distance matrix.}
}
\value{
a numeric matrix with at most \code{n-1} dimensions, with
\code{n} the number pf observations. This matrix defines the
coordinates of each point in the orthogonal space.
}
\description{
Project a set of points defined by a distance matrix in
an eucleadean space using the Principal Coordinates Analysis
......
......@@ -14,11 +14,36 @@ is.procmod.frame(x)
as.procmod.frame(data, ...)
}
\description{
The procmod.frame data structure.
A \code{procmod.frame} can be considered as the analog of a
\code{data.frame} for vector data. In a \code{procmod.frame}
each element, equivalent to a column in a \code{data.frame}
is a numeric matrix or a distance matrix object (\code{dist}).
Every element must describe the same number of individuals.
Therefore every numeric matrix must have the same number of row
(\code{nrow}) and every distance matrix must have the same size
(\code{attr(d,"Size")}). A \code{procmod.frame} can simultaneously
contain both types of data, numeric and distance matrix.
}
\examples{
library(vegan)
data(bacteria)
data(eukaryotes)
data(soil)
Check if an object is a ProcMod Frame.
dataset <- procmod.frame(euk = vegdist(decostand(eukaryotes,
method = "hellinger"),
method = "euclidean"),
bac = vegdist(decostand(bacteria,
method = "hellinger"),
method = "euclidean"),
soil = scale(soil,
center = TRUE,
scale = TRUE))
length(dataset)
nrow(dataset)
ncol(dataset)
dataset$euk
Coerce to a ProcMod Frame.
}
\author{
Eric Coissac
......
......@@ -6,6 +6,44 @@
\usage{
simulate_correlation(reference, p, r2, equal.var = TRUE)
}
\arguments{
\item{reference}{a numeric matrix to which the simulated data will be correlated}
\item{p}{an \code{int} value indicating the number of dimensions (variables)
simulated}
\item{r2}{the fraction of variation shared between the \code{reference} and the
simulated data}
\item{equal.var}{a \code{logical} value indicating if the dimensions must be scaled
to force \code{sd=1}. \code{TRUE} by default.}
}
\value{
a numeric matrix of \code{nrow(reference)} rows and \code{p} columns
}
\description{
Simulate n points of dimension p correlated with a reference matrix.
Simulates a set of point correlated to another set according to the
procrustean correlation definition.
Points are simulated by drawing values of each dimension from a normal
distribution of mean 0 and standard deviation equals to 1.
The mean of each dimension is forced to 0 (data are centred).
By default variable are also scaled to enforce a strandard deviation
strictly equal to 1. Covariances between dimensions are not controled.
Therefore they are expected to be equal to 0 and reflect only the
random distribution of the covariance between two random vectors.
The intensity of the correlation is determined by the \code{r2}
parameter.
}
\examples{
sim1 <- simulate_matrix(25,10)
class(sim1)
dim(sim1)
sim2 <- simulate_correlation(sim1,20,0.8)
corls(sim1, sim2)^2
}
\author{
Eric Coissac
Christelle Gonindard-Melodelima
}
......@@ -6,8 +6,35 @@
\usage{
simulate_matrix(n, p, equal.var = TRUE)
}
\arguments{
\item{n}{an \code{int} value indicating the number of observations.}
\item{p}{an \code{int} value indicating the number of dimensions (variables)
simulated}
\item{equal.var}{a \code{logical} value indicating if the dimensions must be scaled
to force \code{sd=1}. \code{TRUE} by default.}
}
\value{
a numeric matrix of \code{n} rows and \code{p} columns
}
\description{
Points are simulated using the \code{\link[mvtnorm]{rmvnorm}} from
the \code{mvtnorm}. The mean of each dimension is set to 0 and all
the variances to 1 with all the covariances set to 0.
Points are simulated by drawing values of each dimension from a normal
distribution of mean 0 and standard deviation equals to 1.
The mean of each dimension is forced to 0 (data are centred).
By default variable are also scaled to enforce a strandard deviation
strictly equal to 1. Covariances between dimensions are not controled.
Therefore they are expected to be equal to 0 and reflect only the
random distribution of the covariance between two random vectors.
}
\examples{
sim1 <- simulate_matrix(25,10)
class(sim1)
dim(sim1)
}
\author{
Eric Coissac
Christelle Gonindard-Melodelima
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment