Commit 8a115d95 by Eric Coissac

Reimplement a new version of procmod corrected by simulation of random matrix…

Reimplement a new version of procmod corrected by simulation of random matrix having the same sizes and the same variances
parent 0f96a9a6
......@@ -5,3 +5,4 @@
inst/doc
.DS_Store
vignettes.ooo
before_trash
......@@ -84,18 +84,20 @@ rp^2
```{r}
library(ProcMod)
r2s = seq(from=0.1, to = 0.9, by = 0.1)
n.sim=20
r2s = seq(from=0.0, to = 0.9, by = 0.1)
n.sim=1000
r2.obs = matrix(0,nrow = n.sim,ncol = length(r2s))
d1 = simulate_matrix(10,2000)
d1 = simulate_matrix(10,2)
for (j in seq_along(r2s)) {
print(c("r2",as.character(r2s[j])))
#print(c("r2",as.character(r2s[j])))
for (i in seq_len(n.sim)) {
print(c("round",as.character(i)))
d2 = simulate_correlation(10,3000,d1,r2s[j])
v = varls2(d1,d2,permutations = 10)
s = sqrt(diag(v))
r2.obs[i,j] = (v / outer(s,s))[1,2]^2
# print(c("round",as.character(i)))
d2 = simulate_correlation(d1,1,r2s[j])
#r2.obs[i,j] = cor5(d1,d2)^2
r2.obs[i,j] = ((max(0,.covls(d1,d2)-.rcovls(d1,d2)))/(sqrt(.covls(d1,d1)-.rcovls(d1,d1))*sqrt(.covls(d2,d2)-.rcovls(d2,d2))))^2
# v = varls(d1,d2,permutations = 10)
# s = sqrt(diag(v))
# r2.obs[i,j] = (v / outer(s,s))[1,2]^2
}
}
......@@ -104,4 +106,307 @@ cbind(R2_theo=r2s,
R2_obs=colMeans(r2.obs),
R2_sd=apply(r2.obs,MARGIN = 2,FUN = sd))
```
```{r}
a= simulate_matrix(40,1)
b= simulate_matrix(40,2)
x = varls(a,b,permutations = NULL)
y = corls(a,b,permutations = 100)
y^2
m = x - y
#(sqrt(outer(diag(x),diag(x)))/m)^2
#sqrtm(outer(diag(x),diag(x)))
```
5 -> 6
10 -> 12 6
20 -> 28 14
30 -> 44 22
40 -> 60 30 20
```{r}
i=c(5,10,20,30,40)
j=c(6,12,28,44,60)
lm(j~i)
plot(i,j,xlim=c(0,50),ylim = c(-10,70))
abline(lm(j~i))
plot(sqrt(i),sqrt(j))
lm(sqrt(j)~sqrt(i))
plot(i^2,j^2)
lm(i^2~j^2)
```
```{r}
a = simulate_matrix(20,1)
b = vector(mode = "list",9)
for (i in seq_along(b)) {
b[[i]] = simulate_correlation(a,1,i/10)
}
b = c(list(a),b)
var(do.call(cbind,b))
var2(do.call(cbind,b))
v.perm=varls(as.procmod.frame(b),permutations = 100,rcovls = TRUE)
v.perm
m = apply(attributes(v.perm)$rcov,MARGIN = c(1,2),mean,na.rm=TRUE)^2
mean(m[upper.tri(m,diag = TRUE)],na.rm=TRUE)
```
```{r}
library(ProcMod)
ps = c(seq(1,10,by=1),seq(15,100,by=5),1000)
qs = ps
ns = seq(5,20, by=5)
n.sim=1000
r2.obs = array(0,dim = c(length(ns),length(ps),length(qs),n.sim))
for (ni in seq_along(ns)) {
N=ns[ni]
for (pi in seq_along(ps)) {
P = ps[pi]
for (qi in seq_along(qs)) {
Q = qs[qi]
for (si in 1:n.sim) {
not_ok = TRUE
while(not_ok) {
d1 = simulate_matrix(N,P,equal.var = TRUE)
d2 = simulate_matrix(N,Q,equal.var = TRUE)
r2=tryCatch({v = varls(d1,d2,permutations = 0)
s = sqrt(diag(v))
(v / outer(s,s))[1,2]^2
},
error=function(cond)
{#message("Redo simulation")
NA}
)
r2.obs[ni,pi,qi,si] = r2
not_ok=is.na(r2)
}
}
}
}
}
```
```{r}
R2_obs=apply(r2.obs,MARGIN = c(1,2,3),FUN = mean)
R2_sd=apply(r2.obs,MARGIN = c(1,2,3),FUN = sd)
dimnames(R2_obs) = list(ns,ps,qs)
N = rep(ns,length(ps)*length(qs))
P = rep(rep(ps,rep(length(ns),length(ps))),length(qs))
Q = rep(qs,rep(length(ns)*length(ps),length(qs)))
R2_obs.tab = tibble(N=N,
P=P,
Q=Q,
Pmin = mapply(min,P,Q),
Qmax = mapply(max,P,Q),
mean=as.numeric(R2_obs),
sd=as.numeric(R2_sd))
R2_obs.tab$NPmin = mapply(min,R2_obs.tab$Pmin,N)
R2_obs.tab$NPmax = mapply(max,R2_obs.tab$Pmin,N)
write_csv2(R2_obs.tab,"R2_H0.csv")
#p = rep(ps,rep(length(qs),length(ps)))
#q = rep(qs,length(ps))
#R2_obs.tn = data.frame(p,q,N=N,mean=as.numeric(R2_obs),sd=as.numeric(R2_sd))
```
```{r}
library(expm)
```
# Modle 1 : l'axe suivant se prend le reste
```{r}
R2.alea.predict.single = function(N,nP,nQ) {
# N : nombre de points
# nP : Nb de var dans petit tableau
# nQ : Nb de var dans grand tableau
# Axes principaux : P = (N-2)/(N-1) ; Q = 1/(N-1)
Axes.main = min(nP,(N-1))
i = 0:(Axes.main-1)
Q = 1/(N-1)
P = (N-2)/(N-1)
Pis = P**i
R2.main = sum(Pis)*Q
# Axes secondaires : Q = Axes.main/(N-1)^2 ; P = 1 - Q ;
Axes.comp = max(nP,nQ) - Axes.main
if (Axes.comp >0) {
i = 0:(Axes.comp-1)
Q = 1/(N-1)^2
P = 1 - Q
Pis = (P**i)
R2.main = P**Axes.comp*R2.main+Q*sum(Pis)
}
return(R2.main)
}
```
# Modle 2 : l'axe suivant se prend le reste
```{r}
R2.alea.predict.single = function(N,nP,nQ) {
# N : nombre de points
# nP : Nb de var dans petit tableau
# nQ : Nb de var dans grand tableau
# Axes principaux : P = (N-2)/(N-1) ; Q = 1/(N-1)
Axes.main = min(nP,(N-1))
Rres = numeric(Axes.main)
R2.main=0
for (i in 1:Axes.main) {
Rres[i]=1-R2.main
if (i==1) k = 0
else k = sum(Rres[1:(i-1)]^2)
# print(k / (N-1)^2 * Rres[i])
R2.main = R2.main + Rres[i] * ( 1/(N-1) - k/(N-1)^2)
}
# R2.main = R2.main - 2*Axes.main / (N-1)^2
# print(2*Axes.main / (N-1)^2)
# Axes secondaires : Q = Axes.main/(N-1)^2 ; P = 1 - Q ;
k=sum(Rres^2) + (1 - R2.main)^2
Axes.comp = max(nP,nQ) - Axes.main
if (Axes.comp >0) {
for (i in 1:Axes.comp) {
#print(R2.main)
R2.main = R2.main + (1-R2.main) * k/(N-1)^2
}
}
return(R2.main)
}
R2.alea.predict =Vectorize(R2.alea.predict.single,
vectorize.args = c("N","nP","nQ"))
```
# Modle 3 : l'axe suivant se prend le reste
```{r}
R2.alea.predict.single = function(N,nP,nQ) {
# N : nombre de points
# nP : Nb de var dans petit tableau
# nQ : Nb de var dans grand tableau
# Axes principaux : P = (N-2)/(N-1) ; Q = 1/(N-1)
Axes.main = min(nP,(N-1))
Rres = numeric(Axes.main)
R2.main=0
for (i in 1:Axes.main) {
Rres[i]=1-R2.main
R2.main = R2.main + Rres[i] * ( 1/(N-1) - (Axes.main - i )/(N-1)^2)
}
# R2.main = R2.main - 2*Axes.main / (N-1)^2
# print(2*Axes.main / (N-1)^2)
# Axes secondaires : Q = Axes.main/(N-1)^2 ; P = 1 - Q ;
k=sum(Rres) + (1 - R2.main)
Axes.comp = max(nP,nQ) - Axes.main
if (Axes.comp >0) {
for (i in 1:Axes.comp) {
#print(R2.main)
R2.main = R2.main + (1-R2.main) * k/(N-1)^2
}
}
return(R2.main)
}
R2.alea.predict =Vectorize(R2.alea.predict.single,
vectorize.args = c("N","nP","nQ"))
```
```{r}
library(tidyverse)
R2_obs.tab = read_csv2("R2_H0.csv")
```
```{r}
R2_obs.tab.PQ = R2_obs.tab %>% filter(Qmax==Pmin)
```
```{r}
with(data = R2_obs.tab.PQ,
plot(mean,
R2.alea.predict(N,Pmin,Qmax),
col=rainbow(4)[N/5],
pch=(1+(Qmax==Pmin)),
cex=0.5,
log="")
)
abline(b=1,a=0)
abline(h=1,lty=2)
abline(v=1,lty=2)
abline(lm(R2.alea.predict(N,Pmin,Qmax)~mean,data = R2_obs.tab.PQ))
#summary(lm(mean~R2.alea.predict(N,Pmin,Qmax),data = R2_obs.tab.PQ))
legend("bottomright",legend = (1:4)*5,fill=rainbow(4),cex = 0.5)
```
```{r}
with(data = R2_obs.tab.PQ,
plot(mean,
R2.alea.predict(N,Pmin,Qmax)-mean,
col=rainbow(4)[N/5],
pch=(1+(Qmax==Pmin)),
cex=0.5,
log="")
)
abline(h=1)
abline(v=2)
abline(v=c(5,10,15,20)*2.5,lty=2)
#abline(lm(log10(R2.alea.predict(N,Pmin,Qmax)/mean)[-1]~log10(Pmin)[-1],data=R2_obs.tab.PQ))
legend("topright",legend = (1:4)*5,fill=rainbow(4),cex = 0.5)
#summary(lm(log10(R2.alea.predict(N,Pmin,Qmax)/mean)[-1]~log10(Pmin)[-1],data=R2_obs.tab.PQ))
```
```{r}
with(data = R2_obs.tab.PQ,
plot(Pmin,
R2.alea.predict(N,Pmin,Qmax)-mean,
col=rainbow(4)[N/5],
pch=(1+(Qmax==Pmin)),
cex=0.5,
log="x")
)
abline(h=1)
abline(v=2)
abline(v=c(5,10,15,20)*4,lty=2,col=rainbow(4))
abline(v=c(5,10,15,20),lty=3,col=rainbow(4))
#abline(lm(log10(R2.alea.predict(N,Pmin,Qmax)/mean)[-1]~log10(Pmin)[-1],data=R2_obs.tab.PQ))
legend("topright",legend = (1:4)*5,fill=rainbow(4),cex = 0.5)
#summary(lm(log10(R2.alea.predict(N,Pmin,Qmax)/mean)[-1]~log10(Pmin)[-1],data=R2_obs.tab.PQ))
```
```{r}
with(data = R2_obs.tab.PQ,
plot(Pmin,
mean,
col=rainbow(4)[N/5],
pch=(1+(Qmax==Pmin)),
cex=0.5,
log="x")
)
abline(h=1)
legend("topright",legend = (1:4)*5,fill=rainbow(4),cex = 0.5)
```
......@@ -29,9 +29,9 @@ Collate:
'model.procmod.R'
'mprocuste.R'
'anova.pm.R'
'mcov.R'
'covls.R'
'permute.R'
'plot.pm.R'
'procuste.R'
'simnull.R'
'simulate.R'
'zzzz.R'
......@@ -24,6 +24,8 @@ S3method(ortho,matrix)
S3method(ortho,procmod.frame)
S3method(plot,pm)
S3method(print,pm)
S3method(print,procmod.corls)
S3method(print,procmod.varls)
S3method(residuals,pm)
S3method(subset,procmod.frame)
export(as.procmod.frame)
......@@ -35,6 +37,7 @@ export(is.pm)
export(is.procmod.frame)
export(model.procmod.default)
export(nmds)
export(null_varls_sim)
export(ortho)
export(pca)
export(pcoa)
......@@ -44,7 +47,6 @@ export(protate)
export(simulate_correlation)
export(simulate_matrix)
export(varls)
export(varls2)
export(vars.procmod)
export(weighted.residuals)
import(MASS)
......
......@@ -31,7 +31,7 @@ anova.pm = function(m, permutations = how(nperm = 999),...) {
SCT = m$SCT
SCR = sum(m$residuals^2)
SCE = SCT * coef.norm * cors.Y
SCE = SCT * abs(coef.norm * cors.Y)
names(SCE) = explicatives
......@@ -65,29 +65,29 @@ anova.pm = function(m, permutations = how(nperm = 999),...) {
}
#' Generate permutation matrix according to a schema.
#'
#'
#' The permutation schema is defined using the `how` function.
#' The implementation of this function is currectly extraced
#' The implementation of this function is currectly extraced
#' from the VEGAN package and just copied pasted here to avoid
#' dependency on an hidden vegan function.
#'
getPermuteMatrix = function (perm, N, strata = NULL)
getPermuteMatrix = function (perm, N, strata = NULL)
{
if (length(perm) == 1) {
perm <- how(nperm = perm)
}
if (!missing(strata) && !is.null(strata)) {
if (inherits(perm, "how") && is.null(getBlocks(perm)))
if (inherits(perm, "how") && is.null(getBlocks(perm)))
setBlocks(perm) <- strata
}
if (inherits(perm, "how"))
if (inherits(perm, "how"))
perm <- shuffleSet(N, control = perm)
else {
if (!is.integer(perm) && !all(perm == round(perm)))
if (!is.integer(perm) && !all(perm == round(perm)))
stop("permutation matrix must be strictly integers: use round()")
}
if (is.null(attr(perm, "control")))
attr(perm, "control") <- structure(list(within = list(type = "supplied matrix"),
if (is.null(attr(perm, "control")))
attr(perm, "control") <- structure(list(within = list(type = "supplied matrix"),
nperm = nrow(perm)), class = "how")
perm
}
......
#' @include procmod.frame.R
#' @include multivariate.R
#'
#' @import expm
#'
#' @author Christelle Gonindard-Melodelima
#' @author Eric Coissac
NULL
#' Compute the trace of a square matrix.
#'
#'
#' @param X a square matrix
#' @return the trace of X
#'
#' @examples
#' m = matrix(1:16,nrow=4)
#' ProcMod:::.Trace(m)
#'
#' @note Internal function do not use.
#'
#' @rdname internal.getPermuteMatrix
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#'
.Trace = function(X) sum(diag(X))
#' Compute the variance, covariance matrix of K coordinate matrices.
#'
#' Covariance between two matrices is defined as the sum of the
#' sigular values of the X'Y matrix. All the matrices must have
#' the same number of rows.
#'
#' @param ... the set of matrices
#' @param permutation
#' @param rcovls
#'
#' @examples
#' # Build Three matrices of 3 rows.
#' A <- matrix(1:9,nrow=3)
#' B <- matrix(10:15,nrow=3)
#' C <- matrix(20:31,nrow=3)
#' # compute the variance covariance matrix
#' varls2(A,B,C)
#' varls2(A=A,B=B,C=C)
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
varls = function(...,nperm = 100,rcovls=FALSE) {
if (is.numeric(nperm) &&
length(nperm)==1 &&
nperm==0)
nperm=NULL
if (is.null(nperm))
rcovls=FALSE
Xs <- list(...)
if (length(Xs)==1) {
x = Xs[[1]]
if (is.procmod.frame(x))
Xs=x
else if (is.pm(x))
return(x$cov)
else
Xs=procmod.frame(x)
}
else
Xs=as.procmod.frame(Xs)
Xnames=names(Xs)
Xs <- ortho(Xs)
XXs = as.procmod.frame(mapply(tcrossprod,
Xs,
SIMPLIFY = FALSE))
nX = length(Xs)
N = nrow(Xs)-1
CovXXs = matrix(0, nrow = nX, ncol = nX)
# Computes the standard Covls covariance matrix
for (i in seq_len(nX))
for (j in i:nX) {
CovXXs[i,j] = Re(.Trace(sqrtm(XXs[[i]] %*% XXs[[j]])))
}
CovXX2s = matrix(0, nrow = nX, ncol = nX)
if (! is.null(nperm)) {
rCovXXs = array(0,dim=c(nX,nX))
for (i in seq_len(nX)) {
for (j in i:nX) {
rCovXXs[i,j] = null_varls_sim(Xs[i],Xs[j],nperm)
rCovXXs[j,i] = rCovXXs[i,j]
CovXXs[i,j] = CovXXs[i,j] - rCovXXs[i,j]
}
}
}
for (i in seq_len(nX))
for (j in i:nX)
CovXXs[j,i] = CovXXs[i,j]
CovXXs = CovXXs / N
colnames(CovXXs)=Xnames
rownames(CovXXs)=Xnames
CovXXs = Re(CovXXs)
if (rcovls)
attr(CovXXs,"rcovls") = rCovXXs / N
attr(CovXXs,"nperm") = nperm
return(make_subS3Class(Re(CovXXs), "procmod.varls"))
}
#' Compute the person correlation matrix of K coordinate matrices
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
corls = function(...,nperm = 100,rcovls=FALSE) {
cov = varls(...,nperm = nperm,rcovls = rcovls)
s = sqrt(diag(cov))
vv= outer(s,s)
rls = make_subS3Class(cov/vv, "procmod.corls")
attr(rls,"nperm")=attr(cov,"nperm")
if (rcovls)
attr(rls,"rcovls") = attr(rls,"rcovls") / vv
return(rls)
}
#' Compute the person partial correlation matrix of K coordinate matrices
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
corls.partial = function(...,nperm = 100,rcovls=FALSE) {
rls = corls(...,nperm = nperm,rcovls = rcovls)
C = solve(rls)
D = sqrt(diag(C) %o% diag(C))
rp = make_subS3Class(C/D,"procmod.corls")
attr(rp,"nperm")=attr(rls,"nperm")
if (rcovls)
attr(rp,"rcovls") = attr(rls,"rcovls")
return(rp)
}
#' Compute the person partial correlation matrix of K coordinate matrices
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
print.procmod.varls = function (x, ...) {
class(x)="matrix"
attr(x,"permutations")=NULL
attr(x,"rcovls") =NULL
print(x)
}
#' Compute the person partial correlation matrix of K coordinate matrices
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
print.procmod.corls = function (x, ...) {
class(x)="matrix"
attr(x,"permutations")=NULL
attr(x,"rcovls") =NULL
print(x)
}
#' @include procmod.frame.R
#' @include multivariate.R
#'
#' @import expm
#'
#' @author Christelle Gonindard-Melodelima
#' @author Eric Coissac
NULL
#' Compute the trace of a square matrix.
#'
#'
#' @param X a square matrix
#' @return the trace of X
#'
#' @examples
#' m = matrix(1:16,nrow=4)
#' ProcMod:::.Trace(m)
#'
#' @note Internal function do not use.
#'
#' @rdname internal.getPermuteMatrix
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#'
.Trace = function(X) sum(diag(X))
#' Compute the variance, covariance matrix of K coordinate matrices.
#'
#' Covariance between two matrices is defined as the sum of the
#' sigular values of the X'Y matrix. All the matrices must have
#' the same number of rows.
#'
#' @param ... the set of matrices
#'
#' @examples
#' # Build Three matrices of 3 rows.
#' A <- matrix(1:9,nrow=3)
#' B <- matrix(10:15,nrow=3)
#' C <- matrix(20:31,nrow=3)
#' # compute the variance covariance matrix
#' mvar(A,B,C)
#' mvar(A=A,B=B,C=C)
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
varls = function(...,permutations = how(nperm = 999)) {
Xs <- list(...)
if (length(Xs)==1) {
x = Xs[[1]]
if (is.procmod.frame(x))
Xs=x
else if (is.pm(x))
return(x$cov)
else
Xs=procmod.frame(x)
}
else
Xs=as.procmod.frame(Xs)
Xnames=names(Xs)
Xs <- ortho(Xs)
XXs = as.procmod.frame(mapply(tcrossprod, Xs,
SIMPLIFY = FALSE))
nX = length(Xs)
N = nrow(Xs)-1
if (! is.null(permutations)) {
pmatrix = .getPermuteMatrix(perm=permutations,N=nrow(Xs))
rCovXXs = matrix(0, nrow = nX * 2, ncol = nX * 2)
pval = matrix(0, nrow = nX * 2, ncol = nX * 2)
for (i in 1:(2*nX))
for (j in 1:(2*nX)) {
if (i %% 2 && j %% 2) {
rCovXXs[i,j]=.Trace(sqrtm(XXs[[ceiling(i / 2)]] %*% XXs[[ceiling(j/2)]])) / N
pval[i,j]=-1
}
else if (i ==j) {
vv = numeric(nrow(pmatrix))
for (k in seq_len(nrow(pmatrix))) {
d = Xs[[ceiling(i / 2)]][pmatrix[k,],]
dd = tcrossprod(d)
vv[k] = Re(.Trace(sqrtm(dd %*% dd)))
}
rCovXXs[i,j]=mean(vv) / N
pval[i,j]=shapiro.test(vv)$p.value
}
else if (i <=j) {
vv = numeric(nrow(pmatrix))
for (k in seq_len(nrow(pmatrix))) {
d = Xs[[ceiling(i / 2)]][pmatrix[k,],]
dd = tcrossprod(d)
vv[k] = Re(.Trace(sqrtm(dd %*% XXs[[ceiling(j/2)]])))
}
rCovXXs[i,j]=mean(vv) / N
rCovXXs[j,i]=rCovXXs[i,j]
pval[i,j]=shapiro.test(vv)$p.value
pval[j,i]=shapiro.test(vv)$p.value
}
}
CovXXs=rCovXXs
Xnames = rep(Xnames,rep(2,length(Xnames)))
Xsuff = rep("",length(Xnames))
Xsuff[seq(from=2,to=length(Xnames),by=2)]="r_"
Xnames = mapply(paste0, Xsuff,Xnames,collapse="")
colnames(CovXXs)=Xnames
rownames(CovXXs)=Xnames
colnames(pval)=Xnames
rownames(pval)=Xnames
print(pval)
}
else {
Xx <- rep(1:nX,nX)
Xy <- rep(1:nX,rep(nX,nX))
CovXXs <- mapply(function(x,y) .Trace(sqrtm(XXs[[x]] %*% XXs[[y]])),
Xx,Xy,
SIMPLIFY = TRUE) / N
dim(CovXXs)=c(nX,nX)
colnames(CovXXs)=Xnames
rownames(CovXXs)=Xnames
}
return(Re(CovXXs))
}
#' Compute the variance, covariance matrix of K coordinate matrices.
#'
#' Covariance between two matrices is defined as the sum of the
#' sigular values of the X'Y matrix. All the matrices must have
#' the same number of rows.
#'
#' @param ... the set of matrices