Commit c1e99829 by Eric Coissac

Add docs to the package

patch small typos in manuscripts
outline of the discussion
parent 9e71f8cb
......@@ -13,6 +13,7 @@ S3method(as.procmod.frame,matrix)
S3method(as.procmod.frame,pm)
S3method(as.procmod.frame,procmod.frame)
S3method(dim,procmod.frame)
S3method(names,procmod.corls)
S3method(names,procmod.varls)
S3method(ortho,data.frame)
S3method(ortho,dist)
......
......@@ -6,40 +6,66 @@ NULL
#' Generate permutation matrix according to a schema.
#'
#' @param perm xxx
#' @param n zzz
#' @param strata eeee
#' @param permutations a list of control values for the permutations as returned
#' by the function \code{\link[permute]{how}}, or the number of
#' permutations required.
#' @param n numeric; the number of observations in the sample set.
#' May also be any object that nobs knows about;
#' see \code{\link[permute]{nobs}} methods.
#' @param strata A factor, or an object that can be coerced to a
#' factor via as.factor, specifying the strata for permutation.
#'
#' @note Internal function do not use.
#'
#' @rdname internal.getPermuteMatrix
#' The permutation schema is defined using the `how` function.
#' The implementation of this function is inspired
#' from the VEGAN package and reproduced here to avoid an extra
#' dependency on an hidden vegan function.
#'
getPermuteMatrix = function(perm, n, strata = NULL)
.getPermuteMatrix = function(permutations, n, strata = NULL)
{
if (length(perm) == 1) {
perm <- permute::how(nperm = perm)
if (length(permutations) == 1) {
permutations <- permute::how(nperm = permutations)
}
if (!missing(strata) && !is.null(strata)) {
if (inherits(perm, "how") && is.null(permute::getBlocks(perm)))
permute::setBlocks(perm) <- strata
if (inherits(permutations, "how") && is.null(permute::getBlocks(permutations)))
permute::setBlocks(permutations) <- strata
}
if (inherits(perm, "how"))
perm <- permute::shuffleSet(n, control = perm)
if (inherits(permutations, "how"))
permutations <- permute::shuffleSet(n, control = permutations)
else {
if (!is.integer(perm) && !all(perm == round(perm)))
if (!is.integer(permutations) && !all(permutations == round(permutations)))
stop("permutation matrix must be strictly integers: use round()")
}
if (is.null(attr(perm, "control")))
attr(perm, "control") <- structure(list(within = list(type = "supplied matrix"),
nperm = nrow(perm)), class = "how")
perm
if (is.null(attr(permutations, "control")))
attr(permutations, "control") <- structure(list(within = list(type = "supplied matrix"),
nperm = nrow(permutations)), class = "how")
permutations
}
#' Compute the person correlation matrix of K coordinate matrices
#' Monte-Carlo Test on the sum of the singular values of a procustean rotation.
#'
#' performs a Monte-Carlo Test on the sum of the singular values of a
#' procustean rotation (see \code{\link[ade4]{procuste.rtest}}).
#'
#' @param ... the set of matrices or a \code{\link[ProcMod]{procmod.frame}}
#' object.
#' @param permutations a list of control values for the permutations as returned
#' by the function \code{\link[permute]{how}}, or the number of
#' permutations required.
#' @param p.adjust.method the multiple test correction method used
#' to adjust p values. \code{\link[stats]{p.adjust.method}}
#' belongsone of the folowing values: \code{"holm"},
#' \code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"},
#' \code{"BH"}, \code{"BY"}, \code{"fdr"}, \code{"none"}.
#' The default is,set to \code{"holm"}.
#'
#' @references {
#' \insertRef{Jackson:95:00}{ProcMod}
#' }
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
......@@ -73,7 +99,7 @@ corls.test <- function(...,
n <- nrow(xs)
nx <- length(xs)
pmatrix <- getPermuteMatrix(permutations, n)
pmatrix <- .getPermuteMatrix(permutations, n)
if (ncol(pmatrix) != n) {
stop(gettextf(
......
......@@ -37,20 +37,38 @@ registerDoParallel(1)
c / vv
}
#' Compute the procrustean variance, covariance matrix of K matrices.
#' Procrustean Correlation, and Variance / Covariance Matrices.
#'
#' \code{varls}, \code{corls}, \code{corls.partial} compute the procrustean
#' variance / covariance, correlation, or partial correlation matrices
#' between a set of real matrices and \code{\link[stats]{dist}} objects.
#'
#' Procrustean covariance between two matrices X and Y, is defined as the sum
#' of the singular values of the X'Y matrix
#' \insertCite{Gower:71:00,Lingoes:74:00}{Rdpack}. Both the matrices must have
#' the same number of rows.
#' of the singular values of the X'Y matrix \insertCite{Gower:71:00,Lingoes:74:00}{ProcMod}.
#' Both the X and Y matrices must have the same number of rows.
#'
#' The variances and covariances and correlations are corrected
#' to avoid over fitting \insertCite{Coissac-Eric:19:00}{ProcMod}.
#'
#' Partial correlation coefficients are computed by inverting the correlation followed
#' by a normalisation by the diagonal of the inverted matrix.
#'
#' \code{varls} computes the variance covariance matrix of a set of matrices
#' following the above definition. The variances and covariances are corrected
#' to avoid over fitting \insertCite{Coissac-Eric:19:00}{Rdpack}. .
#' The inputs must be numeric matrices or \code{\link[stats]{dist}} object.
#' The set of input matrices can be aggregated un a
#' \code{\link[ProcMod]{procmod.frame}}.
#'
#' Before computing the covariances, matrices are projected into an
#' Before computing the coefficients, matrices are projected into an
#' orthogonal space using the \code{\link[ProcMod]{ortho}} function.
#'
#' The denominator n - 1 is used which gives an unbiased estimator of the
#' (co)variance for i.i.d. observations.
#'
#' Scaling a covariance matrix into a correlation one can be achieved in many ways,
#' mathematically most appealing by multiplication with a diagonal matrix from left
#' and right, or more efficiently by using sweep(.., FUN = "/") twice.
#' The \code{\link[stats]{cov2cor}} function is even a bit more efficient,
#' and provided mostly for didactical reasons.
#'
#' @references{
#' \insertRef{Gower:71:00}{ProcMod}
#'
......@@ -104,8 +122,15 @@ registerDoParallel(1)
#' varls(A = A, B = B, C = C)
#' data = procmod.frame(A = A, B = B, C = C)
#' varls(data)
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#'
#' @rdname varls
#' @name varls
#' @aliases varls
#' @aliases corls
#' @aliases corls.partial
#' @export
varls <- function(...,
nrand = 100,
......@@ -182,6 +207,8 @@ varls <- function(...,
# s_cov_xxs[i, j, k] <- sum(svd(crossprod(r_xs[[i]],r_ys[[j]]))$d)
# }
if (! getDoParRegistered()) registerDoParallel(1)
s_cov_xxs <- foreach(k = seq_len(nrand),
.combine = cbind) %dopar% {
s1_cov_xxs <- matrix(0, nrow = nx, ncol = nx)
......@@ -249,10 +276,7 @@ varls <- function(...,
}
#' Compute the person correlation matrix of K coordinate matrices
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @rdname varls
#' @export
corls <- function(..., nrand = 100,
p.adjust.method = "holm") {
......@@ -263,20 +287,17 @@ corls <- function(..., nrand = 100,
s <- sqrt(diag(cov))
vv <- s %o% s
rls <- cov / vv
class(rls) <- "matrix"
if (!is.null(attr(cov, "rcovls"))) {
attr(rls, "nrand") <- attr(cov, "nrand")
attr(rls, "rcorls") <- attr(cov, "rcovls") / vv
}
make_subS3Class(rls, "procmod.corls")
}
#' Compute the person partial correlation matrix of K coordinate matrices
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @rdname varls
#' @export
corls.partial <- function(..., nrand = 100) {
rls <- corls(..., nrand = nrand)
......@@ -291,7 +312,7 @@ corls.partial <- function(..., nrand = 100) {
make_subS3Class(rp, "procmod.corls")
}
#' Compute the person partial correlation matrix of K coordinate matrices
#' Print procrustean variance / covariance matrix.
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
......@@ -315,12 +336,14 @@ print.procmod.varls <- function(x, ...) {
#' @author Christelle Gonindard-Melodelima
#' @export
names.procmod.varls <- function(x) {
names(attributes(x))
n <- names(attributes(x))
bn <- grep(pattern = "^(dim|class)",
x = n)
n[-bn]
}
#' Compute the person partial correlation matrix of K coordinate matrices
#' Print procrustean correlation matrix.
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
......@@ -341,3 +364,15 @@ print.procmod.corls <- function(x, ...) {
attr(x,name)
}
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
names.procmod.corls <- function(x) {
n <- names(attributes(x))
bn <- grep(pattern = "^(dim|class)",
x = n)
n[-bn]
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/covls.R
\name{corls}
\alias{corls}
\title{Compute the person correlation matrix of K coordinate matrices}
\usage{
corls(..., nrand = 100, p.adjust.method = "holm")
}
\description{
Compute the person correlation matrix of K coordinate matrices
}
\author{
Eric Coissac
Christelle Gonindard-Melodelima
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/covls.R
\name{corls.partial}
\alias{corls.partial}
\title{Compute the person partial correlation matrix of K coordinate matrices}
\usage{
corls.partial(..., nrand = 100)
}
\description{
Compute the person partial correlation matrix of K coordinate matrices
}
\author{
Eric Coissac
Christelle Gonindard-Melodelima
}
......@@ -2,13 +2,34 @@
% Please edit documentation in R/corls_test.R
\name{corls.test}
\alias{corls.test}
\title{Compute the person correlation matrix of K coordinate matrices}
\title{Monte-Carlo Test on the sum of the singular values of a procustean rotation.}
\usage{
corls.test(..., permutations = permute::how(nperm = 999),
p.adjust.method = "holm")
}
\arguments{
\item{...}{the set of matrices or a \code{\link[ProcMod]{procmod.frame}}
object.}
\item{permutations}{a list of control values for the permutations as returned
by the function \code{\link[permute]{how}}, or the number of
permutations required.}
\item{p.adjust.method}{the multiple test correction method used
to adjust p values. \code{\link[stats]{p.adjust.method}}
belongsone of the folowing values: \code{"holm"},
\code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"},
\code{"BH"}, \code{"BY"}, \code{"fdr"}, \code{"none"}.
The default is,set to \code{"holm"}.}
}
\description{
Compute the person correlation matrix of K coordinate matrices
performs a Monte-Carlo Test on the sum of the singular values of a
procustean rotation (see \code{\link[ade4]{procuste.rtest}}).
}
\references{
{
\insertRef{Jackson:95:00}{ProcMod}
}
}
\author{
Eric Coissac
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/corls_test.R
\name{getPermuteMatrix}
\alias{getPermuteMatrix}
\title{Generate permutation matrix according to a schema.}
\usage{
getPermuteMatrix(perm, n, strata = NULL)
}
\arguments{
\item{perm}{xxx}
\item{n}{zzz}
\item{strata}{eeee
The permutation schema is defined using the `how` function.
The implementation of this function is inspired
from the VEGAN package and reproduced here to avoid an extra
dependency on an hidden vegan function.}
}
\description{
Generate permutation matrix according to a schema.
}
......@@ -2,12 +2,12 @@
% Please edit documentation in R/covls.R
\name{print.procmod.corls}
\alias{print.procmod.corls}
\title{Compute the person partial correlation matrix of K coordinate matrices}
\title{Print procrustean correlation matrix.}
\usage{
\method{print}{procmod.corls}(x, ...)
}
\description{
Compute the person partial correlation matrix of K coordinate matrices
Print procrustean correlation matrix.
}
\author{
Eric Coissac
......
......@@ -2,12 +2,12 @@
% Please edit documentation in R/covls.R
\name{print.procmod.varls}
\alias{print.procmod.varls}
\title{Compute the person partial correlation matrix of K coordinate matrices}
\title{Print procrustean variance / covariance matrix.}
\usage{
\method{print}{procmod.varls}(x, ...)
}
\description{
Compute the person partial correlation matrix of K coordinate matrices
Print procrustean variance / covariance matrix.
}
\author{
Eric Coissac
......
......@@ -2,9 +2,15 @@
% Please edit documentation in R/covls.R
\name{varls}
\alias{varls}
\title{Compute the procrustean variance, covariance matrix of K matrices.}
\alias{corls}
\alias{corls.partial}
\title{Procrustean Correlation, and Variance / Covariance Matrices.}
\usage{
varls(..., nrand = 100, p.adjust.method = "holm")
corls(..., nrand = 100, p.adjust.method = "holm")
corls.partial(..., nrand = 100)
}
\arguments{
\item{...}{the set of matrices or a \code{\link[ProcMod]{procmod.frame}}
......@@ -46,18 +52,36 @@ a \code{procmod.varls} object which corresponds to a numeric
method specified by the \code{p.adjust.method} parameter.
}
\description{
Procrustean covariance between two matrices X and Y, is defined as the sum
of the singular values of the X'Y matrix
\insertCite{Gower:71:00,Lingoes:74:00}{Rdpack}. Both the matrices must have
the same number of rows.
\code{varls}, \code{corls}, \code{corls.partial} compute the procrustean
variance / covariance, correlation, or partial correlation matrices
between a set of real matrices and \code{\link[stats]{dist}} objects.
}
\details{
\code{varls} computes the variance covariance matrix of a set of matrices
following the above definition. The variances and covariances are corrected
to avoid over fitting \insertCite{Coissac-Eric:19:00}{Rdpack}. .
Procrustean covariance between two matrices X and Y, is defined as the sum
of the singular values of the X'Y matrix \insertCite{Gower:71:00,Lingoes:74:00}{ProcMod}.
Both the X and Y matrices must have the same number of rows.
The variances and covariances and correlations are corrected
to avoid over fitting \insertCite{Coissac-Eric:19:00}{ProcMod}.
Before computing the covariances, matrices are projected into an
Partial correlation coefficients are computed by inverting the correlation followed
by a normalisation by the diagonal of the inverted matrix.
The inputs must be numeric matrices or \code{\link[stats]{dist}} object.
The set of input matrices can be aggregated un a
\code{\link[ProcMod]{procmod.frame}}.
Before computing the coefficients, matrices are projected into an
orthogonal space using the \code{\link[ProcMod]{ortho}} function.
The denominator n - 1 is used which gives an unbiased estimator of the
(co)variance for i.i.d. observations.
Scaling a covariance matrix into a correlation one can be achieved in many ways,
mathematically most appealing by multiplication with a diagonal matrix from left
and right, or more efficiently by using sweep(.., FUN = "/") twice.
The \code{\link[stats]{cov2cor}} function is even a bit more efficient,
and provided mostly for didactical reasons.
}
\examples{
# Build Three matrices of 3 rows.
......@@ -69,6 +93,7 @@ varls(A, B, C)
varls(A = A, B = B, C = C)
data = procmod.frame(A = A, B = B, C = C)
varls(data)
}
\references{
{
......
......@@ -112,7 +112,7 @@ set.seed(1)
Multidimensional data and even high-dimensional data, where the number of variables describing each sample is far larger than the sample count, are now regularly produced in functional genomics (\emph{e.g.} transcriptomics, proteomics or metabolomics) and molecular ecology (\emph{e.g.} DNA metabarcoding). Using various techniques, the same sample set can be described by several multidimensional data sets, each of them describing a different facet of the samples. This invites using data analysis methods able to evaluate mutual information shared by these different descriptions. Correlative approaches can be a first and simple way to decipher pairwise relationships of those data sets.
Since a long time ago, several coefficients have been proposed to measure correlation between two matrices \citep[for a comprehensive review see][]{Ramsay:84:00}. But when applied to high-dimensional data, they suffer from the over-fitting effect leading them to estimate a high correlation even for unrelated data sets. Modified versions of some of these matrix correlation coefficients have been already proposed to tackle this problem. The $\rv_2$ coefficient \citep{Smilde:09:00} is correcting the original $\rv$ coefficient \citep{Escoufier:73:00} for over-fitting. Similarly, a modified version of the distance correlation coefficient $\dcor$ \citep{Szekely:07:00} has been proposed by \cite{SzeKely:13:00}. $\dcor$ has the advantage over the other correlation factors for not considering only linear relationships. Here we will focus on the Procrustes correlation coeficient $\rls$ proposed by \cite{Lingoes:74:00} and by \cite{Gower:71:00}. Let define $Trace$, a function summing the diagonal elements of a matrix. For a $ \; n \; \times \; p \; $ real matrix $\X$ and anothet a $ \; n \; \times \; q \; $ real matrix $\Y$ defining respectively two sets of $p$ and $q$ centered variables caracterizing $n$ individuals, we define $\covls(\X,\X)$ an analog of covariance applicable to vectorial data following Equation~(\ref{eq:CovLs})
Since a long time ago, several coefficients have been proposed to measure correlation between two matrices \citep[for a comprehensive review see][]{Ramsay:84:00}. But when applied to high-dimensional data, they suffer from the over-fitting effect leading them to estimate a high correlation even for unrelated data sets. Modified versions of some of these matrix correlation coefficients have been already proposed to tackle this problem. The $\rv_2$ coefficient \citep{Smilde:09:00} is correcting the original $\rv$ coefficient \citep{Escoufier:73:00} for over-fitting. Similarly, a modified version of the distance correlation coefficient $\dcor$ \citep{Szekely:07:00} has been proposed by \cite{SzeKely:13:00}. $\dcor$ has the advantage over the other correlation factors for not considering only linear relationships. Here we will focus on the Procrustes correlation coefficient $\rls$ proposed by \cite{Lingoes:74:00} and by \cite{Gower:71:00}. Let define $Trace$, a function summing the diagonal elements of a matrix. For a $ \; n \; \times \; p \; $ real matrix $\X$ and a second $ \; n \; \times \; q \; $ real matrix $\Y$ defining respectively two sets of $p$ and $q$ centered variables caracterizing $n$ individuals, we define $\covls(\X,\X)$ an analog of covariance applicable to vectorial data following Equation~(\ref{eq:CovLs})
\begin{equation}
\covls(\X,\Y) = \frac{\trace((\mathbf{XX}'\mathbf{YY}')^{1/2})}{n-1}
......@@ -172,7 +172,7 @@ and $\irls(\X,\Y)$ the informative Procruste correlation coefficient as follow.
Like $\rls(\X,\Y)$, $\irls(\X,\Y) \in [0;1]$ with the $0$ value corresponding to not correlation and the maximum value $1$ reached for two strictly homothetic data sets.
The corollary of $\icovls(\X,\Y)$ and $\ivarls(\X)$ definitions is that $\icovls(\X,\Y) \geqslant 0$ and $\ivarls(\X) > 0$. Therefore for $M=\{\mathbf{M}_1,\mathbf{M}_2,...,\mathbf{M}_k\}$ a set of $k$ matrices with the same number of row, the informative covariance matrix $\mathbf{C}$ defined as $\mathbf{C}_{i,j} = \icovls(\mathbf{M}_i,\mathbf{M}_j)$ for is definite positive and symmetrical. This allows for defining the precision matrix $\mathbf{P}=\mathbf{C}^{-1}$ and the related partial correlation coeficent matrix $\irls_{partial}$ using Equation~(\ref{eq:IRls.partial})
The corollary of $\icovls(\X,\Y)$ and $\ivarls(\X)$ definitions is that $\icovls(\X,\Y) \geqslant 0$ and $\ivarls(\X) > 0$. Therefore for $M=\{\mathbf{M}_1,\mathbf{M}_2,...,\mathbf{M}_k\}$ a set of $k$ matrices with the same number of row, the informative covariance matrix $\mathbf{C}$ defined as $\mathbf{C}_{i,j} = \icovls(\mathbf{M}_i,\mathbf{M}_j)$ for is definite positive and symmetrical. This allows for defining the precision matrix $\mathbf{P}=\mathbf{C}^{-1}$ and the related partial correlation coefficent matrix $\irls_{partial}$ using Equation~(\ref{eq:IRls.partial})
\begin{equation}
\irls_{partial}(\mathbf{M}_i,\mathbf{M}_j) = \frac{\mathbf{P}_{i,j}}{\sqrt{P_{i,i}P_{j,j}}}
......@@ -207,7 +207,7 @@ p_qs <- c(10,20,50,100) # Number of variable foreach matrices (Column counts of
n_rands <- c(10,100,1000)
@
For two random vectors $\mathbf{x}$ and $\mathbf{y}$ of length $n$, the average coefficient of determination is $\overline{\rpearson^2}=1/(n-1)$. This value is independent of the distribution of the $\mathbf{x}$ and $\mathbf{y}$ values, but what about the independence of $\overline{\rcovls(\X,\Y)}$ to the distributions of $\X$ and $\Y$. To test this independance and to assess the reasonnable randomization effort needed to estimate $\overline{\rcovls(\X,\Y)}$, this value is estimated for four matrices $\mathbf{K},\,\mathbf{L},\,\mathbf{M},\,\mathbf{N}\,$ of $n=\Sexpr{n_indivdual}$ rows and respectively $\Sexpr{p_qs}$ columns. Values of the four matrices are drawn from a normal or an exponential distribution, and $k \in \{\Sexpr{n_rands}\}$ randomizations are tested to estimate $\overline{\rcovls(\X,\Y)}$ and the respective standard deviation $\sigma_{\overline{\rcovls(\X,\Y)}}$. The $\varls$ of the generated matrices is equal to $1$ therefore the estimated $\covls$ are equals to $\rls$.
For two random vectors $\mathbf{x}$ and $\mathbf{y}$ of length $n$, the average coefficient of determination is $\overline{\rpearson^2}=1/(n-1)$. This value is independent of the distribution of the $\mathbf{x}$ and $\mathbf{y}$ values, but what about the independence of $\overline{\rcovls(\X,\Y)}$ to the distributions of $\X$ and $\Y$. To test this independance and to assess the reasonnable randomization effort needed to estimate $\overline{\rcovls(\X,\Y)}$, this value is estimated for four matrices $\mathbf{K},\,\mathbf{L},\,\mathbf{M},\,\mathbf{N}\,$ of $n=\Sexpr{n_indivdual}$ rows and respectively $\Sexpr{p_qs}$ columns. Values of the four matrices are drawn from a normal or an exponential distribution, and $k \in \{\Sexpr{n_rands}\}$ randomizations are tested to estimate $\overline{\rcovls(\X,\Y)}$ and the respective standard deviation $\sigma(\overline{\rcovls(\X,\Y)})$. The $\varls$ of the generated matrices is equal to $1$ therefore the estimated $\covls$ are equals to $\rls$.
<<estimate_nrand, cache=TRUE, message=FALSE, warning=FALSE, include=FALSE, dependson="estimate_nrand_setting">>=
......@@ -770,7 +770,7 @@ p_qs <- c(10,20,50) # Number of variable foreach matrices (Column counts of the
n_rand <- 1000
@
To assess empirically the $\alpha\text{-risk}$ of the procruste test based on the randomisations realized during the estimation of $\overline{\rcovls(\X,\Y)}$, distribution of $P_{value}$ under the $H_0$ is comparaed to a uniform distribution between $0$ and $1$ ($\mathcal{U}(0,1)$). To estimate such empirical distribution, $k=\Sexpr{n_sim}$ pairs of $n \times p$ random matrices with $n=\Sexpr{n_indivdual}$ and
To assess empirically the $\alpha\text{-risk}$ of the procruste test based on the randomisations realized during the estimation of $\overline{\rcovls(\X,\Y)}$, distribution of $P_{value}$ under the $H_0$ is compared to a uniform distribution between $0$ and $1$ ($\mathcal{U}(0,1)$). To estimate such empirical distribution, $k=\Sexpr{n_sim}$ pairs of $n \times p$ random matrices with $n=\Sexpr{n_indivdual}$ and
$p \in \{\Sexpr{p_qs}\}$ are simulated under the null hypothesis of independancy. Procruste correlation between whose matrices is tested based on three tests. Our proposed test ($CovLs.test$), the \texttt{protest} method of the vegan R package and the \texttt{procuste.rtest} method of the ADE4 R package. Conformance of the distribution of each set of $k$ $P_{value}$ to $\mathcal{U}(0,1)$ is assessed using the Cramer-Von Mises test \citep{CSoRgo:96:00} implemented in the \texttt{cvm.test} function of the R package \texttt{goftest}.
<<estimate_alpha, cache=TRUE, message=FALSE, warning=FALSE, include=FALSE, dependson="estimate_alpha_setting">>=
......@@ -1011,7 +1011,7 @@ print(tab,
}{\ } % <- we can add a footnote in the last curly braces
\end{table}
Two main parameters can influence the Monte Carlo estimation of $\overline{\rcovls(\X,\Y)}$ : the distribution used to generate the random matrices and $k$ the number of random matrix pair. Two very different distribution are tested to regenerate the random matrices, the normal and the exponential distributions. The first one is symmetric where the second is not with a high probability for small values and a long tail of large ones. Despite the use of these contrasted distributions, estimates of $\overline{\rcovls(\X,\Y)}$ and of $\sigma_{\overline{\rcovls(\X,\Y)}}$ are identical if we assume the normal distribution of the $\overline{\rcovls(\X,\Y)}$ estimator and a $0.95$ confidence interval of $\overline{\rcovls(\X,\Y)} \pm 2 \, \sigma_{\overline{\rcovls(\X,\Y)}}$ (Table~\ref{tab:mrcovls}).
Two main parameters can influence the Monte Carlo estimation of $\overline{\rcovls(\X,\Y)}$ : the distribution used to generate the random matrices and $k$ the number of random matrix pair. Two very different distribution are tested to regenerate the random matrices, the normal and the exponential distributions. The first one is symmetric where the second is not with a high probability for small values and a long tail of large ones. Despite the use of these contrasted distributions, estimates of $\overline{\rcovls(\X,\Y)}$ and of $\sigma(\overline{\rcovls(\X,\Y)})$ are identical if we assume the normal distribution of the $\overline{\rcovls(\X,\Y)}$ estimator and a $0.95$ confidence interval of $\overline{\rcovls(\X,\Y)} \pm 2 \, \sigma(\overline{\rcovls(\X,\Y)})$ (Table~\ref{tab:mrcovls}).
\subsection{Relative sensibility of $IRLs(X,Y)$ to overfitting}
......@@ -1125,7 +1125,7 @@ r2_sims_vec_all %>%
\end{figure}
\subsubsection*{partial coefficient of determination}
The simulated correlation network between the four matrices $\mathbf{A},\,\mathbf{B},\,\mathbf{C},\,\mathbf{D}$ induced moreover the direct simulated correlation a network of indirect correlation and therefore shared variances (Figure~\ref{fig:nested_shared_variation}). In such system, the interest of partial correlation coefficients and their associated partial determination coefficients is to measure correlation between a pair of variable without accounting for the part of that correlation which is explained by other variables, hence extracting the pure correlation between these two matrices. From Figure~\ref{fig:nested_shared_variation}, the expected partial shared variation between $\mathbf{A}$ and $\mathbf{B}$ is $480/(200+480)=0.706$; between $\mathbf{B}$ and $\mathbf{C}$, $64/(480+120)=0.107$; and between $\mathbf{C}$ and $\mathbf{D}$ $120/800=0.150$. All other partial coeficient are expected to be equal to $0$. The effect of the correction introduced in $\irls$ is clearly weaker and on the partial coefficient of determination (Figure~\ref{fig:nested_shared}) than on the full coefficient of determination (Figure~\ref{fig:shared_variation}). The spurious random correlations, constituting the over-fitting effect, is distributed over all the pair of matrices $\mathbf{A},\,\mathbf{B},\,\mathbf{C},\,\mathbf{D}$.
The simulated correlation network between the four matrices $\mathbf{A},\,\mathbf{B},\,\mathbf{C},\,\mathbf{D}$ induced moreover the direct simulated correlation a network of indirect correlation and therefore shared variances (Figure~\ref{fig:nested_shared_variation}). In such system, the interest of partial correlation coefficients and their associated partial determination coefficients is to measure correlation between a pair of variable without accounting for the part of that correlation which is explained by other variables, hence extracting the pure correlation between these two matrices. From Figure~\ref{fig:nested_shared_variation}, the expected partial shared variation between $\mathbf{A}$ and $\mathbf{B}$ is $480/(200+480)=0.706$; between $\mathbf{B}$ and $\mathbf{C}$, $64/(480+120)=0.107$; and between $\mathbf{C}$ and $\mathbf{D}$ $120/800=0.150$. All other partial coefficient are expected to be equal to $0$. The effect of the correction introduced in $\irls$ is clearly weaker and on the partial coefficient of determination (Figure~\ref{fig:nested_shared}) than on the full coefficient of determination (Figure~\ref{fig:shared_variation}). The spurious random correlations, constituting the over-fitting effect, is distributed over all the pair of matrices $\mathbf{A},\,\mathbf{B},\,\mathbf{C},\,\mathbf{D}$.
\begin{figure}[!tpb]%figure1
<<fig_nested_shared, echo=FALSE, message=FALSE, warning=FALSE, fig.height=4, fig.width=6>>=
......@@ -1199,7 +1199,7 @@ print(tab,
\subsection{Power of the test based on randomisation}
Power of the $CovLs$ test based on the estimation of $\overline{RCovLs(X,Y)}$ is equivalent of the power estimated for both \texttt{vegan::protest} and \texttt{ade4::procuste.rtest} tests (Table \ref{tab:power}). As for the two other tests, power decreases when the number of variable ($p$ or $q$) increases and increase with the number of individuals and the shared variation. The advantage of the test based on the Monte-Carlo estimation of $\overline{RCovLs(X,Y)}$ is to remove the need of running a supplementary set of permutations when $\irls$ is computed.
Power of the $CovLs$ test based on the estimation of $\overline{RCovLs(X,Y)}$ is equivalent of the power estimated for both \texttt{vegan::protest} and \texttt{ade4::procuste.rtest} tests (Table \ref{tab:power}). As for the two other tests, power decreases when the number of variable ($p$ or $q$) increases, and increase with the number of individuals and the shared variation. The advantage of the test based on the Monte-Carlo estimation of $\overline{RCovLs(X,Y)}$ is to remove the need of running a supplementary set of permutations when $\irls$ is computed.
\begin{table}[!t]
\processtable{Power estimation of the procruste tests for two low level of shared variations $5\%$ and $10\%$.\label{tab:power}} {
......@@ -1263,6 +1263,13 @@ print(tab,
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Discussion}
\begin{itemize}
\item Comme pour les autres coef déja publié, la méthode corrige efficacement le coed de correlation procrustean pour les données de grande dimmension.
\item il faut noter que cela fonctionne aussi pour les modèles linéraires réalisé sur un faible effectif.
\item Ce coefficient au carré représentant des part de variation partagées, il offre l'avantages sur les autres coefficient précédement corrigé d'être utilisale le cadre d'une analalyse de la variance des tableau de données.
\item l'éfficacité de la correction est moins forte pour l'estimation des coeficients partiels. Cependant les coefficients partiels théoriquement à zéro sont mieux prédit par notre estimateur
\end{itemize}
Text Text Text Text Text Text Text Text.
Text Text Text Text Text Text Text Text.
Text Text Text Text Text Text Text Text.
......@@ -1285,10 +1292,6 @@ Text Text Text Text Text Text Text Text.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Conclusion}
A common approach to estimate strengh of the relationship between two variables is to estimate the part of shared variation. This single value ranging from zero to one is easy to interpret. Such value can also be computed between two sets of variable, but the estimation is more than for simple vector data subject to over estimation because the over-fitting phenomena which is amplified for high dimensional data. With $\irls$ and its squared value, we propose an easy to compute correlation and determination coefficient far less biased than the original Procrustean correlation coefficient. Every needed function to estimate the proposed modified version of these coefficients are included in a R package ProcMod available for download from the Comprehensive R Archive Network (CRAN).
......@@ -1333,7 +1336,7 @@ $\X'$ & The transpose of $\X$. \\
$\X \Y$ & Matrix multiplication of $\X$ and $\Y$. \\
$\diag(\X)$ & A column matrix composed of the diagonal
elements of $\X$. \\
$\trace(\X$ & The trace of $\X$.
$\trace(\X)$ & The trace of $\X$.
\end{tabular}
......
......@@ -99,7 +99,7 @@ Sz{\'e}kely, G.~J., Rizzo, M.~L., and Bakirov, N.~K. (2007).
\bibitem[Theil {\em et~al.}(1958)Theil, Cramer, Moerman, and
Russchen]{Theil:58:00}
Theil, H., Cramer, J.~S., Moerman, H., and Russchen, A. (1958).
\newblock {\em Economic forecasts and policy\/}, page 213.
\newblock {\em Economic forecasts and policy\/}.
\newblock North-Holland Publishing Company, Amsterdam.
\end{thebibliography}
No preview for this file type
......@@ -136,7 +136,7 @@ online.}
Multidimensional data and even high-dimensional data, where the number of variables describing each sample is far larger than the sample count, are now regularly produced in functional genomics (\emph{e.g.} transcriptomics, proteomics or metabolomics) and molecular ecology (\emph{e.g.} DNA metabarcoding). Using various techniques, the same sample set can be described by several multidimensional data sets, each of them describing a different facet of the samples. This invites using data analysis methods able to evaluate mutual information shared by these different descriptions. Correlative approaches can be a first and simple way to decipher pairwise relationships of those data sets.
Since a long time ago, several coefficients have been proposed to measure correlation between two matrices \citep[for a comprehensive review see][]{Ramsay:84:00}. But when applied to high-dimensional data, they suffer from the over-fitting effect leading them to estimate a high correlation even for unrelated data sets. Modified versions of some of these matrix correlation coefficients have been already proposed to tackle this problem. The $\rv_2$ coefficient \citep{Smilde:09:00} is correcting the original $\rv$ coefficient \citep{Escoufier:73:00} for over-fitting. Similarly, a modified version of the distance correlation coefficient $\dcor$ \citep{Szekely:07:00} has been proposed by \cite{SzeKely:13:00}. $\dcor$ has the advantage over the other correlation factors for not considering only linear relationships. Here we will focus on the Procrustes correlation coeficient $\rls$ proposed by \cite{Lingoes:74:00} and by \cite{Gower:71:00}. Let define $Trace$, a function summing the diagonal elements of a matrix. For a $ \; n \; \times \; p \; $ real matrix $\X$ and anothet a $ \; n \; \times \; q \; $ real matrix $\Y$ defining respectively two sets of $p$ and $q$ centered variables caracterizing $n$ individuals, we define $\covls(\X,\X)$ an analog of covariance applicable to vectorial data following Equation~(\ref{eq:CovLs})
Since a long time ago, several coefficients have been proposed to measure correlation between two matrices \citep[for a comprehensive review see][]{Ramsay:84:00}. But when applied to high-dimensional data, they suffer from the over-fitting effect leading them to estimate a high correlation even for unrelated data sets. Modified versions of some of these matrix correlation coefficients have been already proposed to tackle this problem. The $\rv_2$ coefficient \citep{Smilde:09:00} is correcting the original $\rv$ coefficient \citep{Escoufier:73:00} for over-fitting. Similarly, a modified version of the distance correlation coefficient $\dcor$ \citep{Szekely:07:00} has been proposed by \cite{SzeKely:13:00}. $\dcor$ has the advantage over the other correlation factors for not considering only linear relationships. Here we will focus on the Procrustes correlation coefficient $\rls$ proposed by \cite{Lingoes:74:00} and by \cite{Gower:71:00}. Let define $Trace$, a function summing the diagonal elements of a matrix. For a $ \; n \; \times \; p \; $ real matrix $\X$ and a second $ \; n \; \times \; q \; $ real matrix $\Y$ defining respectively two sets of $p$ and $q$ centered variables caracterizing $n$ individuals, we define $\covls(\X,\X)$ an analog of covariance applicable to vectorial data following Equation~(\ref{eq:CovLs})
\begin{equation}
\covls(\X,\Y) = \frac{\trace((\mathbf{XX}'\mathbf{YY}')^{1/2})}{n-1}
......@@ -196,7 +196,7 @@ and $\irls(\X,\Y)$ the informative Procruste correlation coefficient as follow.
Like $\rls(\X,\Y)$, $\irls(\X,\Y) \in [0;1]$ with the $0$ value corresponding to not correlation and the maximum value $1$ reached for two strictly homothetic data sets.
The corollary of $\icovls(\X,\Y)$ and $\ivarls(\X)$ definitions is that $\icovls(\X,\Y) \geqslant 0$ and $\ivarls(\X) > 0$. Therefore for $M=\{\mathbf{M}_1,\mathbf{M}_2,...,\mathbf{M}_k\}$ a set of $k$ matrices with the same number of row, the informative covariance matrix $\mathbf{C}$ defined as $\mathbf{C}_{i,j} = \icovls(\mathbf{M}_i,\mathbf{M}_j)$ for is definite positive and symmetrical. This allows for defining the precision matrix $\mathbf{P}=\mathbf{C}^{-1}$ and the related partial correlation coeficent matrix $\irls_{partial}$ using Equation~(\ref{eq:IRls.partial})
The corollary of $\icovls(\X,\Y)$ and $\ivarls(\X)$ definitions is that $\icovls(\X,\Y) \geqslant 0$ and $\ivarls(\X) > 0$. Therefore for $M=\{\mathbf{M}_1,\mathbf{M}_2,...,\mathbf{M}_k\}$ a set of $k$ matrices with the same number of row, the informative covariance matrix $\mathbf{C}$ defined as $\mathbf{C}_{i,j} = \icovls(\mathbf{M}_i,\mathbf{M}_j)$ for is definite positive and symmetrical. This allows for defining the precision matrix $\mathbf{P}=\mathbf{C}^{-1}$ and the related partial correlation coefficent matrix $\irls_{partial}$ using Equation~(\ref{eq:IRls.partial})
\begin{equation}
\irls_{partial}(\mathbf{M}_i,\mathbf{M}_j) = \frac{\mathbf{P}_{i,j}}{\sqrt{P_{i,i}P_{j,j}}}
......@@ -227,7 +227,7 @@ Even when $\X=\Y$ to estimate $\ivarls(\X)$, $\overline{\rcovls(\X,\X)}$ is esti
For two random vectors $\mathbf{x}$ and $\mathbf{y}$ of length $n$, the average coefficient of determination is $\overline{\rpearson^2}=1/(n-1)$. This value is independent of the distribution of the $\mathbf{x}$ and $\mathbf{y}$ values, but what about the independence of $\overline{\rcovls(\X,\Y)}$ to the distributions of $\X$ and $\Y$. To test this independance and to assess the reasonnable randomization effort needed to estimate $\overline{\rcovls(\X,\Y)}$, this value is estimated for four matrices $\mathbf{K},\,\mathbf{L},\,\mathbf{M},\,\mathbf{N}\,$ of $n=20$ rows and respectively $10, 20, 50, 100$ columns. Values of the four matrices are drawn from a normal or an exponential distribution, and $k \in \{10, 100, 1000\}$ randomizations are tested to estimate $\overline{\rcovls(\X,\Y)}$ and the respective standard deviation $\sigma_{\overline{\rcovls(\X,\Y)}}$. The $\varls$ of the generated matrices is equal to $1$ therefore the estimated $\covls$ are equals to $\rls$.
For two random vectors $\mathbf{x}$ and $\mathbf{y}$ of length $n$, the average coefficient of determination is $\overline{\rpearson^2}=1/(n-1)$. This value is independent of the distribution of the $\mathbf{x}$ and $\mathbf{y}$ values, but what about the independence of $\overline{\rcovls(\X,\Y)}$ to the distributions of $\X$ and $\Y$. To test this independance and to assess the reasonnable randomization effort needed to estimate $\overline{\rcovls(\X,\Y)}$, this value is estimated for four matrices $\mathbf{K},\,\mathbf{L},\,\mathbf{M},\,\mathbf{N}\,$ of $n=20$ rows and respectively $10, 20, 50, 100$ columns. Values of the four matrices are drawn from a normal or an exponential distribution, and $k \in \{10, 100, 1000\}$ randomizations are tested to estimate $\overline{\rcovls(\X,\Y)}$ and the respective standard deviation $\sigma(\overline{\rcovls(\X,\Y)})$. The $\varls$ of the generated matrices is equal to $1$ therefore the estimated $\covls$ are equals to $\rls$.
......@@ -320,7 +320,7 @@ It is also possible to take advantage of the Monte-Carlo estimation of $\overlin
To assess empirically the $\alpha\text{-risk}$ of the procruste test based on the randomisations realized during the estimation of $\overline{\rcovls(\X,\Y)}$, distribution of $P_{value}$ under the $H_0$ is comparaed to a uniform distribution between $0$ and $1$ ($\mathcal{U}(0,1)$). To estimate such empirical distribution, $k=1000$ pairs of $n \times p$ random matrices with $n=20$ and
To assess empirically the $\alpha\text{-risk}$ of the procruste test based on the randomisations realized during the estimation of $\overline{\rcovls(\X,\Y)}$, distribution of $P_{value}$ under the $H_0$ is compared to a uniform distribution between $0$ and $1$ ($\mathcal{U}(0,1)$). To estimate such empirical distribution, $k=1000$ pairs of $n \times p$ random matrices with $n=20$ and
$p \in \{10, 20, 50\}$ are simulated under the null hypothesis of independancy. Procruste correlation between whose matrices is tested based on three tests. Our proposed test ($CovLs.test$), the \texttt{protest} method of the vegan R package and the \texttt{procuste.rtest} method of the ADE4 R package. Conformance of the distribution of each set of $k$ $P_{value}$ to $\mathcal{U}(0,1)$ is assessed using the Cramer-Von Mises test \citep{CSoRgo:96:00} implemented in the \texttt{cvm.test} function of the R package \texttt{goftest}.
......@@ -353,7 +353,7 @@ To evaluate relative power of the three considered tests, pairs of to random mat
\begin{table}[!t]
\processtable{Estimation of $\overline{\rcovls(\X,\Y)}$ according to the number of random matrices (k) aligned.\label{tab:mrcovls}}{
% latex table generated in R 3.5.2 by xtable 1.8-4 package
% Wed Aug 21 09:31:39 2019
% Fri Aug 23 14:31:42 2019
\begin{tabular}{rrrrrrr}
\hline
& & \multicolumn{2}{c}{normal} & & \multicolumn{2}{c}{exponential}\\ \cline{3-4} \cline{6-7}p & k &\multicolumn{1}{c}{mean} & \multicolumn{1}{c}{sd} & \multicolumn{1}{c}{ } &\multicolumn{1}{c}{mean} & \multicolumn{1}{c}{sd}\\\hline\multirow{3}{*}{10} & 10 & 0.5746 & $1.3687 \times 10^{-2}$ & & 0.5705 & $1.1714 \times 10^{-2}$ \\
......@@ -374,7 +374,7 @@ To evaluate relative power of the three considered tests, pairs of to random mat
}{\ } % <- we can add a footnote in the last curly braces
\end{table}
Two main parameters can influence the Monte Carlo estimation of $\overline{\rcovls(\X,\Y)}$ : the distribution used to generate the random matrices and $k$ the number of random matrix pair. Two very different distribution are tested to regenerate the random matrices, the normal and the exponential distributions. The first one is symmetric where the second is not with a high probability for small values and a long tail of large ones. Despite the use of these contrasted distributions, estimates of $\overline{\rcovls(\X,\Y)}$ and of $\sigma_{\overline{\rcovls(\X,\Y)}}$ are identical if we assume the normal distribution of the $\overline{\rcovls(\X,\Y)}$ estimator and a $0.95$ confidence interval of $\overline{\rcovls(\X,\Y)} \pm 2 \, \sigma_{\overline{\rcovls(\X,\Y)}}$ (Table~\ref{tab:mrcovls}).
Two main parameters can influence the Monte Carlo estimation of $\overline{\rcovls(\X,\Y)}$ : the distribution used to generate the random matrices and $k$ the number of random matrix pair. Two very different distribution are tested to regenerate the random matrices, the normal and the exponential distributions. The first one is symmetric where the second is not with a high probability for small values and a long tail of large ones. Despite the use of these contrasted distributions, estimates of $\overline{\rcovls(\X,\Y)}$ and of $\sigma(\overline{\rcovls(\X,\Y)})$ are identical if we assume the normal distribution of the $\overline{\rcovls(\X,\Y)}$ estimator and a $0.95$ confidence interval of $\overline{\rcovls(\X,\Y)} \pm 2 \, \sigma(\overline{\rcovls(\X,\Y)})$ (Table~\ref{tab:mrcovls}).
\subsection{Relative sensibility of $IRLs(X,Y)$ to overfitting}
......@@ -432,7 +432,7 @@ Vectors can be considered as a single column matrix, and the efficiency of $\irl
\end{figure}
\subsubsection*{partial coefficient of determination}
The simulated correlation network between the four matrices $\mathbf{A},\,\mathbf{B},\,\mathbf{C},\,\mathbf{D}$ induced moreover the direct simulated correlation a network of indirect correlation and therefore shared variances (Figure~\ref{fig:nested_shared_variation}). In such system, the interest of partial correlation coefficients and their associated partial determination coefficients is to measure correlation between a pair of variable without accounting for the part of that correlation which is explained by other variables, hence extracting the pure correlation between these two matrices. From Figure~\ref{fig:nested_shared_variation}, the expected partial shared variation between $\mathbf{A}$ and $\mathbf{B}$ is $480/(200+480)=0.706$; between $\mathbf{B}$ and $\mathbf{C}$, $64/(480+120)=0.107$; and between $\mathbf{C}$ and $\mathbf{D}$ $120/800=0.150$. All other partial coeficient are expected to be equal to $0$. The effect of the correction introduced in $\irls$ is clearly weaker and on the partial coefficient of determination (Figure~\ref{fig:nested_shared}) than on the full coefficient of determination (Figure~\ref{fig:shared_variation}). The spurious random correlations, constituting the over-fitting effect, is distributed over all the pair of matrices $\mathbf{A},\,\mathbf{B},\,\mathbf{C},\,\mathbf{D}$.
The simulated correlation network between the four matrices $\mathbf{A},\,\mathbf{B},\,\mathbf{C},\,\mathbf{D}$ induced moreover the direct simulated correlation a network of indirect correlation and therefore shared variances (Figure~\ref{fig:nested_shared_variation}). In such system, the interest of partial correlation coefficients and their associated partial determination coefficients is to measure correlation between a pair of variable without accounting for the part of that correlation which is explained by other variables, hence extracting the pure correlation between these two matrices. From Figure~\ref{fig:nested_shared_variation}, the expected partial shared variation between $\mathbf{A}$ and $\mathbf{B}$ is $480/(200+480)=0.706$; between $\mathbf{B}$ and $\mathbf{C}$, $64/(480+120)=0.107$; and between $\mathbf{C}$ and $\mathbf{D}$ $120/800=0.150$. All other partial coefficient are expected to be equal to $0$. The effect of the correction introduced in $\irls$ is clearly weaker and on the partial coefficient of determination (Figure~\ref{fig:nested_shared}) than on the full coefficient of determination (Figure~\ref{fig:shared_variation}). The spurious random correlations, constituting the over-fitting effect, is distributed over all the pair of matrices $\mathbf{A},\,\mathbf{B},\,\mathbf{C},\,\mathbf{D}$.
\begin{figure}[!tpb]%figure1
\begin{knitrout}
......@@ -455,7 +455,7 @@ whatever the $p$ tested (Table~\ref{tab:alpha_pvalue}). This ensure that the pro
of the distribution of $P_{values}$ correlation test to $\mathcal{U}(0,1)$
under the null hypothesis.\label{tab:alpha_pvalue}} {
% latex table generated in R 3.5.2 by xtable 1.8-4 package
% Wed Aug 21 09:31:44 2019
% Fri Aug 23 14:31:45 2019
\begin{tabular*}{0.98\linewidth}{@{\extracolsep{\fill}}crrr}
\hline
& \multicolumn{3}{c}{Cramer-Von Mises p.value} \\
......@@ -472,12 +472,12 @@ whatever the $p$ tested (Table~\ref{tab:alpha_pvalue}). This ensure that the pro
\subsection{Power of the test based on randomisation}
Power of the $CovLs$ test based on the estimation of $\overline{RCovLs(X,Y)}$ is equivalent of the power estimated for both \texttt{vegan::protest} and \texttt{ade4::procuste.rtest} tests (Table \ref{tab:power}). As for the two other tests, power decreases when the number of variable ($p$ or $q$) increases and increase with the number of individuals and the shared variation. The advantage of the test based on the Monte-Carlo estimation of $\overline{RCovLs(X,Y)}$ is to remove the need of running a supplementary set of permutations when $\irls$ is computed.
Power of the $CovLs$ test based on the estimation of $\overline{RCovLs(X,Y)}$ is equivalent of the power estimated for both \texttt{vegan::protest} and \texttt{ade4::procuste.rtest} tests (Table \ref{tab:power}). As for the two other tests, power decreases when the number of variable ($p$ or $q$) increases, and increase with the number of individuals and the shared variation. The advantage of the test based on the Monte-Carlo estimation of $\overline{RCovLs(X,Y)}$ is to remove the need of running a supplementary set of permutations when $\irls$ is computed.
\begin{table}[!t]
\processtable{Power estimation of the procruste tests for two low level of shared variations $5\%$ and $10\%$.\label{tab:power}} {
% latex table generated in R 3.5.2 by xtable 1.8-4 package
% Wed Aug 21 09:31:44 2019
% Fri Aug 23 14:31:45 2019
\begin{tabular}{lcrrrrrrrrr}
\hline
& $R^2$ & \multicolumn{4}{c}{5\%} & &\multicolumn{4}{c}{10\%} \\
......@@ -579,7 +579,7 @@ $\X'$ & The transpose of $\X$. \\
$\X \Y$ & Matrix multiplication of $\X$ and $\Y$. \\
$\diag(\X)$ & A column matrix composed of the diagonal
elements of $\X$. \\
$\trace(\X$ & The trace of $\X$.
$\trace(\X)$ & The trace of $\X$.
\end{tabular}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment