Commit fd173c3b by Eric Coissac

Add data to estimate impact on the number of randomisation

parent 9525860f
library(vegan)
library(ROBITools)
litere_col_clean <- readRDS("litere_col_clean.rds")
litere_col_clean <- metabarcoding.data(reads = litere_col_clean$reads,
motus = litere_col_clean$motus,
samples = litere_col_clean$samp)
litere_bac_clean <- readRDS("litere_bac_clean.rds")
litere_bac_clean <- metabarcoding.data(reads = litere_bac_clean$reads,
motus = litere_bac_clean$motus,
samples = litere_bac_clean$samp)
litere_ins_clean <- readRDS("litere_ins_clean.rds")
litere_ins_clean <- metabarcoding.data(reads = litere_ins_clean$reads,
motus = litere_ins_clean$motus,
samples = litere_ins_clean$samp)
litere_euk_clean <- readRDS("litere_euk_clean.rds")
litere_euk_clean <- metabarcoding.data(reads = litere_euk_clean$reads,
motus = litere_euk_clean$motus,
samples = litere_euk_clean$samp)
# Remove rare motus
litere_col_norare <- litere_col_clean[,apply(decostand(litere_col_clean$reads,method = "total"),MARGIN = 2,max) >= 0.01]
litere_euk_norare <- litere_euk_clean[,apply(decostand(litere_euk_clean$reads,method = "total"),MARGIN = 2,max) >= 0.01]
litere_ins_norare <- litere_ins_clean[,apply(decostand(litere_ins_clean$reads,method = "total"),MARGIN = 2,max) >= 0.01]
litere_bac_norare <- litere_bac_clean[,apply(decostand(litere_bac_clean$reads,method = "total"),MARGIN = 2,max) >= 0.01]
\relax
\citation{Ramsay:84:00}
\citation{Smilde:09:00}
\citation{Escoufier:73:00}
\citation{Szekely:07:00}
\citation{SzeKely:13:00}
\citation{Lingoes:74:00}
\citation{Gower:71:00}
\citation{Peres-Neto:01:00}
\citation{Bravais:44:00}
\newcplabel{^_1}{1}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}}
\newlabel{eq:CovLs}{{1}{1}}
\newlabel{eq:Rls}{{2}{1}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Approach}{1}}
\citation{Robert:76:00}
\citation{El-Ghaziri:15:00,Mayer:11:00,Smilde:09:00}
\citation{Szekely:07:00}
\citation{SzeKely:13:00}
\citation{Jackson:95:00}
\citation{Peres-Neto:01:00}
\citation{Dixon:03:00}
\citation{Dray:07:00}
\newlabel{eq:SVD}{{3}{2}}
\newlabel{eq:CovLsSVD}{{4}{2}}
\newlabel{eq:ICovLs}{{5}{2}}
\newlabel{eq:IRLs}{{6}{2}}
\newlabel{eq:IRls.partial}{{7}{2}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Methods}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {{3.1}}Monte-Carlo estimation of $\overline {\rcovls (\mathbf {X},\mathbf {Y})}$}{2}}
\newlabel{eq:RCovLsMC}{{8}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {{3.2}}Simulating data for testing sensibility to overfitting}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {{3.3}}Empirical assessment of the coefficient of determination}{2}}
\citation{CSoRgo:96:00}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Theoritical distribution of the shared variation between the four matrices $\mathbf {A},\tmspace +\thinmuskip {.1667em}\mathbf {B},\tmspace +\thinmuskip {.1667em}\mathbf {C},\tmspace +\thinmuskip {.1667em}\mathbf {D}$, expressed in permille.}}{3}}
\newlabel{fig:nested_shared_variation}{{1}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {{3.4}}Testing significance of $\irls (\mathbf {X},\mathbf {Y})$}{3}}
\newlabel{eq:pvalue}{{9}{3}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces A) Sensibility to overfitting for various correraltion coefficients. (A) Both simulated data sets are matrices of size $(n \times p)$ with $p > 1$. B) Correlated data sets are vectors ($p=1$) with a various number of individuals $n$ (vector length). A \& B) 100 simulations are run for each combination of parameters}}{3}}
\newlabel{fig:h0_overfit}{{2}{3}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {{4.1}}Relative sensibility of $IRLs(X,Y)$ to overfitting}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {{4.2}}Evaluating the shared variation}{3}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Shared variation ($R^2$) between two matrices is mesured with both the corrected ($\irls $) and the original ($\rls $) versions of the procrustean correlation coefficient. A gradiant of $R^2$ is simulated for two population sizes ($n \in \{10,24\}$) and two numbers of descriptive variables ($p \in \{10,100\}$). The black dashed line corresponds to a perfect match where measured $R^2$ equals the simulated one.}}{4}}
\newlabel{fig:shared_variation}{{3}{4}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Estimation error on the partial determination coefficient. Error is defined as the absolute value of the difference between the expected and the estimated partial $R^2$ using the corrected $\irls _{partial}$ and not corrected $\rls _{partial}$ procruste correlation coefficient.}}{4}}
\newlabel{fig:nested_shared}{{4}{4}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces $P_{values}$ of the Cramer-Von Mises test of conformity of the distribution of $P_{values}$ correlation test to $\mathcal {U}(0,1)$ under the null hypothesis.}}{4}}
\newlabel{tab:alpha_pvalue}{{1}{4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {{4.3}}$p_{value}$ distribution under null hyothesis}{4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {{4.4}}Power of the test based on randomisation}{4}}
\bibstyle{natbib}
\bibdata{Document}
\global\@namedef{@lastpage@}{5}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Power estimation of the procruste tests for two low level of shared variations $5\%$ and $10\%$.}}{5}}
\newlabel{tab:power}{{2}{5}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Discussion}{5}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Conclusion}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {A}Notations}{5}}
\begin{thebibliography}{}
\bibitem[Bravais(1844)Bravais]{Bravais:44:00}
Bravais, A. (1844).
\newblock {\em Analyse math{\'e}matique sur les probabilit{\'e}s des erreurs de
situation d'un point\/}.
\newblock Impr. Royale.
\bibitem[Cs{\"o}rg{\H o} and Faraway(1996)Cs{\"o}rg{\H o} and
Faraway]{CSoRgo:96:00}
Cs{\"o}rg{\H o}, S. and Faraway, J.~J. (1996).
\newblock The exact and asymptotic distributions of {Cram{\'e}r-Von} mises
statistics.
\bibitem[Dixon(2003)Dixon]{Dixon:03:00}
Dixon, P. (2003).
\newblock {VEGAN}, a package of {R} functions for community ecology.
\newblock {\em J. Veg. Sci.}, {\bf 14}(6), 927--930.
\bibitem[Dray and Dufour(2007)Dray and Dufour]{Dray:07:00}
Dray, S. and Dufour, A.-B. (2007).
\newblock The ade4 package: Implementing the duality diagram for ecologists.
\newblock {\em Journal of Statistical Software, Articles\/}, {\bf 22}(4),
1--20.
\bibitem[El~Ghaziri and Qannari(2015)El~Ghaziri and Qannari]{El-Ghaziri:15:00}
El~Ghaziri, A. and Qannari, E.~M. (2015).
\newblock Measures of association between two datasets; application to sensory
data.
\newblock {\em Food Qual. Prefer.}, {\bf 40}, 116--124.
\bibitem[Escoufier(1973)Escoufier]{Escoufier:73:00}
Escoufier, Y. (1973).
\newblock Le traitement des variables vectorielles.
\newblock {\em Biometrics\/}, pages 751--760.
\bibitem[Gower(1971)Gower]{Gower:71:00}
Gower, J.~C. (1971).
\newblock Statistical methods of comparing different multivariate analyses of
the same data.
\newblock {\em Mathematics in the archaeological and historical sciences\/},
pages 138--149.
\bibitem[Jackson(1995)Jackson]{Jackson:95:00}
Jackson, D.~A. (1995).
\newblock {PROTEST}: A {PROcrustean} randomization {TEST} of community
environment concordance.
\newblock {\em {\'E}coscience\/}, {\bf 2}(3), 297--303.
\bibitem[Lingoes and Sch{\"o}nemann(1974)Lingoes and
Sch{\"o}nemann]{Lingoes:74:00}
Lingoes, J.~C. and Sch{\"o}nemann, P.~H. (1974).
\newblock Alternative measures of fit for the sch{\"o}nemann-carroll matrix
fitting algorithm.
\newblock {\em Psychometrika\/}, {\bf 39}(4), 423--427.
\bibitem[Mayer {\em et~al.}(2011)Mayer, Lorent, and Horgan]{Mayer:11:00}
Mayer, C.-D., Lorent, J., and Horgan, G.~W. (2011).
\newblock Exploratory analysis of multiple omics datasets using the adjusted
{RV} coefficient.
\newblock {\em Stat. Appl. Genet. Mol. Biol.}, {\bf 10}, Article 14.
\bibitem[Peres-Neto and Jackson(2001)Peres-Neto and Jackson]{Peres-Neto:01:00}
Peres-Neto, P.~R. and Jackson, D.~A. (2001).
\newblock How well do multivariate data sets match? the advantages of a
procrustean superimposition approach over the mantel test.
\newblock {\em Oecologia\/}, {\bf 129}(2), 169--178.
\bibitem[Ramsay {\em et~al.}(1984)Ramsay, ten Berge, and Styan]{Ramsay:84:00}
Ramsay, J.~O., ten Berge, J., and Styan, G. P.~H. (1984).
\newblock Matrix correlation.
\newblock {\em Psychometrika\/}, {\bf 49}(3), 403--423.
\bibitem[Robert and Escoufier(1976)Robert and Escoufier]{Robert:76:00}
Robert, P. and Escoufier, Y. (1976).
\newblock A unifying tool for linear multivariate statistical methods: The
{RV-} coefficient.
\newblock {\em J. R. Stat. Soc. Ser. C Appl. Stat.}, {\bf 25}(3), 257--265.
\bibitem[Smilde {\em et~al.}(2009)Smilde, Kiers, Bijlsma, Rubingh, and van
Erk]{Smilde:09:00}
Smilde, A.~K., Kiers, H. A.~L., Bijlsma, S., Rubingh, C.~M., and van Erk, M.~J.
(2009).
\newblock Matrix correlations for high-dimensional data: the modified
{RV-coefficient}.
\newblock {\em Bioinformatics\/}, {\bf 25}(3), 401--405.
\bibitem[Sz{\'e}Kely and Rizzo(2013)Sz{\'e}Kely and Rizzo]{SzeKely:13:00}
Sz{\'e}Kely, G.~J. and Rizzo, M.~L. (2013).
\newblock The distance correlation t-test of independence in high dimension.
\newblock {\em J. Multivar. Anal.}, {\bf 117}, 193--213.
\bibitem[Sz{\'e}kely {\em et~al.}(2007)Sz{\'e}kely, Rizzo, and
Bakirov]{Szekely:07:00}
Sz{\'e}kely, G.~J., Rizzo, M.~L., and Bakirov, N.~K. (2007).
\newblock Measuring and testing dependence by correlation of distances.
\newblock {\em Ann. Stat.}, {\bf 35}(6), 2769--2794.
\end{thebibliography}
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment