Commit a7d9dea9 by Christelle Melodelima

### Merge branch 'master' of git.metabarcoding.org:malbio-data/ProcMod

parents 84e846b0 4666fe91
 ... ... @@ -9,6 +9,7 @@ Description: More about what it does (maybe more than one line) License: What license is it under? Encoding: UTF-8 LazyData: true Imports: matlib RoxygenNote: 6.0.1 Suggests: knitr, rmarkdown ... ...
 ... ... @@ -6,3 +6,4 @@ S3method(print,pm) export(mcor) export(mvar) export(pm) import(matlib)
 #' @title ProcMod #' @description blabla #' @author Christelle Gonindard-Melodelima #' @import matlib #' require(matlib) #' Compute the variance, covariance matrix of K coordinate matrices #' Compute the variance, covariance matrix of K coordinate matrices. #' #' Covariance between two matrices is defined as the sum of the #' sigular values of the X'Y matrix. All the matrices must have #' the same number of rows. #' #' @param ... the set of matrices #' #' @examples #' # Build Three matrices of 3 rows. #' A <- matrix(1:9,nrow=3) #' B <- matrix(10:15,nrow=3) #' C <- matrix(20:31,nrow=3) #' # compute the variance covariance matrix #' mvar(A,B,C) #' mvar(A=A,B=B,C=C) #' #' @author Eric Coissac & Christelle Gonindard-Melodelima #' @export mvar = function(...) { Xs <- list(...) if (length(Xs)==1 & is.list(Xs[[1]])) Xs=Xs[[1]] Xnames=names(Xs) nXs= sapply(Xs, nrow) ... ... @@ -42,7 +61,7 @@ mvar = function(...) { return(CovXXs) } #' Compute the correlation matrix of K coordinate matrices #' Compute the person correlation matrix of K coordinate matrices #' #' @author Eric Coissac #' @author Christelle Gonindard-Melodelima ... ... @@ -61,12 +80,13 @@ mcor = function(...) { #' @export pm = function (model,data) { cl=match.call() terms = terms(model) if (missing(data)) data=NULL vars = eval(attr(terms(model),"variables"), terms = terms(model,data=data) vars = eval(attr(terms(model,data=data),"variables"), envir = data, enclos = globalenv()) ... ... @@ -298,123 +318,3 @@ summary.pm = function(object, return(results) } mprocuste = function (Y, ...) { ctrace <- function(MAT) sum(MAT^2) Xs <- list(...) nY = nrow(Y) nXs= sapply(Xs, nrow) if (any(nXs!=nY)) { stop("Matrices have different number of rows: ", nY, " and ", cat(nXs)) } Ymean <- colMeans(Y) Xmeans<- sapply(Xs,colMeans) Y <- scale(Y, scale = FALSE) Xs <- lapply(Xs,scale,scale = FALSE) XYs <- lapply(Xs,function(x) crossprod(Y, x)) sol_yxs <- lapply(XYs,svd) A_xys <- lapply(sol_yxs, function(x) x$v %*% t(x$u)) nX = length(Xs) Xx <- rep(1:nX,nX) Xy <- rep(1:nX,rep(nX,nX)) XXs <- mapply(function(x,y) crossprod(Xs[x], Xs[y]), Xx,Xy, SIMPLIFY = FALSE) sol_xxs <- lapply(XXs,svd) A_xxs <- lapply(sol_xxs, function(x) x$v %*% t(x$u)) Xrots <- mapply(function(x,a) x %*% a, Xs,A_xys, SIMPLIFY = FALSE) CovYXs = lapply(sol_yxs, function(sol) sum(sol$d)) CovX1X2 = sum(sol_x1x2$d) VarY = ctrace(Y) VarX1 = ctrace(X1) VarX2 = ctrace(X2) CovEx = matrix(c(VarX1,CovX1X2,CovX1X2,VarX2),nrow=2) Cov2 = matrix(c(CovYX1,CovYX2),nrow=2) pentes = inv(CovEx) %*% Cov2 print(pentes) SdX1 = sqrt(VarX1) SdX2 = sqrt(VarX2) a1 = (CovYX1 * VarX2 - CovYX2 * CovX1X2)/(VarX1*VarX2-CovX1X2^2) a2 = (CovYX2 - a1 * CovX1X2) / VarX2 b = Ymean - a1 * X1mean %*% A_yx1 - a2 * X2mean %*% A_yx2 SCX1 = sum((X1rot * a1)^2) SCX2 = sum((X2rot * a2)^2) Yhat = X1rot * a1 + X2rot * a2 SCR = sum((Y - Yhat)^2) SCT = VarY SCI = SCT - SCX1 - SCX2 - SCR ddl.t = (nrow(Y)-1)^2 ddl.r = ddl.t - 3 vt = SCT/ddl.t vx1= SCX1 vx2= SCX2 vi = SCI vr = SCR/ddl.r fx1=vx1/vr fx2=vx2/vr fi =vi/vr pf.x1=1-pf(fx1,1,ddl.r) pf.x2=1-pf(fx2,1,ddl.r) pf.i =1-pf(fi ,1,ddl.r) ddl = (nrow(Y)-1)^2-1 sd.a1 = sqrt((VarY/VarX1 - a1^2)/ddl) sd.a2 = sqrt((VarY/VarX2 - a2^2)/ddl) t.a1 = a1/sd.a1 t.a2 = a2/sd.a2 p.a1 = 1 - pt(t.a1,ddl) p.a2 = 1 - pt(t.a2,ddl) res = list(coefficients = list(a1=a1,a2=a2,b=b), sd = list(a1=sd.a1,a2=sd.a2), t = list(a1=t.a1,a2=t.a2), p.value = list(a1=p.a1,a2=p.a2), anova = list(X1=SCX1/SCT, X2=SCX2/SCT, X1X2=SCI/SCT, Res=SCR/SCT), SunSq = list(X1=SCX1, X2=SCX2, X1X2=SCI, Res=SCR), MeanSq = list(X1=vx1,X2=vx2,X1X2=vi,Res=vr), Fvalue = list(X1=fx1,X2=fx2,X1X2=fi), Fvalue = list(X1=pf.x1,X2=pf.x2,X1X2=pf.i) ) class(res) <- "mprocuste" return(res) }
 ... ... @@ -343,19 +343,33 @@ Only numerical environmental variables are kept. {r} env=env[,lapply(env,class)=="numeric"] geovar = which(colnames(env) %in% c('Latitude','Longitude')) soilvar= which(colnames(env) %in% c("KLg", "pH", "AlLg", "FeLg", "PLg", "SLg", "CaLg", "MgLg", "MnLg", "CNratio", "CLg", "NLg")) climvar= which(colnames(env) %in% c("Aspect", "TempSeasonality", "MaxMonTemp", "Elevation", "MeanMonTempRange", "AnnMeanTemp", "Isothemality")) geo = env[,geovar]  Eukaryote and bacterial data are arranged in the same order than environmental data. Environmental data are centered and reduced {r} euk=euk[rownames(env),] bac=bac[rownames(env),] env = scale(env,scale = TRUE) soil = env[,soilvar] climat = env[,climvar]  Environmental data are centered and reduced Eukaryote and bacterial data are arranged in the same order than environmental data. {r} env = scale(env,scale = TRUE) euk=euk[rownames(env),] bac=bac[rownames(env),]  Relative frequency tables for Eukaryota and Bacteria are root square transforms which corresponds to Hellinger transformation. ... ... @@ -382,11 +396,20 @@ euk.pco =dudi.pco(euk.dist,full = TRUE) bac.pco =dudi.pco(bac.dist,full = TRUE)  {r} mat.scale = function(mat) { cmat = scale(mat,scale = FALSE) mat.sd = sqrt(sum(cmat^2)) return (cmat/mat.sd) }  Coordinates of the sites are extracted from the PCoA analysis {r} euk.pco.li = euk.pco$li bac.pco.li = bac.pco$li euk.pco.li = mat.scale(euk.pco$li) bac.pco.li = mat.scale(bac.pco$li)  {r, fig.show='hold'} ... ... @@ -407,12 +430,31 @@ The environmental variable are transformed using a Principal Component Analysis. {r} env.pca = dudi.pca(env,scannf = FALSE,nf=nrow(env)-1) env.pca.li = env.pca$li env.pca.li = mat.scale(env.pca$li) plot(env.pca.li[,1:2],cex=0, main="Environmental data") text(env.pca.li[,1:2], labels = rownames(env.pca.li), cex=0.4) soil.pca = dudi.pca(soil,scannf = FALSE,nf=nrow(soil)-1) soil.pca.li = mat.scale(soil.pca$li) climat.pca = dudi.pca(climat,scannf = FALSE,nf=nrow(climat)-1) climat.pca.li = mat.scale(climat.pca$li) geo.pco = dudi.pco(dist(geo),full = TRUE) geo.pco.li = mat.scale(geo.pco$li)  {r} dh = matrix(1,nrow = 62,ncol=62) diag(dh)=0 dh=as.dist(dh) dh.pco =dudi.pco(dh,full = TRUE) dh.pco.li = mat.scale(dh.pco$li)  ### Using the package to analyse relationship among the tables ... ... @@ -426,7 +468,9 @@ library(ProcMod) #### Computing the variance/covariance matix {r} vars = mvar(euk=euk.pco.li,bac=bac.pco.li,env=env.pca.li) vars = mvar(euk=euk.pco.li,bac=bac.pco.li, climat=climat.pca.li,soil=soil.pca.li, geo=geo.pco.li,hist=dh.pco.li)  {r echo=FALSE} ... ... @@ -437,7 +481,13 @@ knitr::kable(vars) #### Computing the correlation matix {r} cors = mcor(euk=euk.pco.li,bac=bac.pco.li,env=env.pca.li) cors = mcor(euk=euk.pco.li,bac=bac.pco.li, climat=climat.pca.li,soil=soil.pca.li, geo=geo.pco.li,hist=dh.pco.li)  {r echo=FALSE} knitr::kable(cors)  {r echo=FALSE} ... ... @@ -447,7 +497,7 @@ knitr::kable(cors) #### Building the multiprocruste model {r} euk.pm = pm(euk.pco.li ~ bac.pco.li + env.pca.li) euk.pm = pm(euk.pco.li ~ soil.pca.li + climat.pca.li + geo.pco.li + dh.pco.li) euk.pm  ... ... @@ -472,3 +522,48 @@ names(partition)=rownames(euk.anova) partition  {r} bac.pm = pm(bac.pco.li ~ soil.pca.li + climat.pca.li + geo.pco.li + dh.pco.li) #bac.pm = pm(bac.pco.li ~ euk.pco.li + dh.pco.li ) bac.pm  {r} plot(bac.pm)  {r} bac.anova = anova(bac.pm) bac.anova  {r} partition = bac.anova[,"Sum Sq"]/sum(bac.anova[,"Sum Sq"]) names(partition)=rownames(bac.anova) partition  {r} soil.pm = pm( dh.pco.li ~ bac.pco.li + euk.pco.li + climat.pca.li + geo.pco.li + soil.pca.li) soil.pm  {r} plot(soil.pm)  {r} soil.anova = anova(soil.pm) soil.anova  {r} histoire = mat.scale(as.matrix(dh)) bacterie = mat.scale(as.matrix(bac.dist)) eukariote = mat.scale(as.matrix(euk.dist)) pm(eukariote ~ bacterie) cor(bac.dist,euk.dist) `
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!