simulate.R 3.81 KB
 Eric Coissac committed Jan 24, 2019 1 2 #' Simulate n points of dimension p. #'  Eric Coissac committed Sep 12, 2019 3 4 5 6 7 8 9 #' Points are simulated by drawing values of each dimension from a normal #' distribution of mean 0 and standard deviation equals to 1. #' The mean of each dimension is forced to 0 (data are centred). #' By default variable are also scaled to enforce a strandard deviation #' strictly equal to 1. Covariances between dimensions are not controled. #' Therefore they are expected to be equal to 0 and reflect only the #' random distribution of the covariance between two random vectors.  Eric Coissac committed Jan 24, 2019 10 #'  Eric Coissac committed Sep 12, 2019 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 #' @param n an \code{int} value indicating the number of observations. #' @param p an \code{int} value indicating the number of dimensions (variables) #' simulated #' @param equal.var a \code{logical} value indicating if the dimensions must be scaled #' to force \code{sd=1}. \code{TRUE} by default. #' #' @return a numeric matrix of \code{n} rows and \code{p} columns #' #' @examples #' sim1 <- simulate_matrix(25,10) #' class(sim1) #' dim(sim1) #' #' @author Eric Coissac #' @author Christelle Gonindard-Melodelima  Eric Coissac committed Jan 24, 2019 26 #' @export  Eric Coissac committed May 27, 2019 27 simulate_matrix <- function(n, p, equal.var = TRUE) {  Eric Coissac committed Apr 30, 2019 28 29  new <- rnorm(n * p, mean = 0, sd = 1) dim(new) <- c(n, p)  Eric Coissac committed Jan 24, 2019 30   Eric Coissac committed Apr 30, 2019 31  new <- scale(new, center = TRUE, scale = equal.var)  Eric Coissac committed Jan 24, 2019 32   Eric Coissac committed Apr 30, 2019 33 34 35 36  attributes(new)$scaled:center <- NULL attributes(new)$scaled:scale <- NULL new.sd <- sqrt(sum(new^2) / (n - 1)) new <- new / new.sd  Eric Coissac committed Jan 24, 2019 37 38 39 40 41 42  return(new) } #' Simulate n points of dimension p correlated with a reference matrix. #'  Eric Coissac committed Sep 12, 2019 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 #' Simulates a set of point correlated to another set according to the #' procrustean correlation definition. #' Points are simulated by drawing values of each dimension from a normal #' distribution of mean 0 and standard deviation equals to 1. #' The mean of each dimension is forced to 0 (data are centred). #' By default variable are also scaled to enforce a strandard deviation #' strictly equal to 1. Covariances between dimensions are not controled. #' Therefore they are expected to be equal to 0 and reflect only the #' random distribution of the covariance between two random vectors. #' The intensity of the correlation is determined by the \code{r2} #' parameter. #' #' @param reference a numeric matrix to which the simulated data will be correlated #' @param p an \code{int} value indicating the number of dimensions (variables) #' simulated #' @param r2 the fraction of variation shared between the \code{reference} and the #' simulated data #' @param equal.var a \code{logical} value indicating if the dimensions must be scaled #' to force \code{sd=1}. \code{TRUE} by default. #' #' @return a numeric matrix of \code{nrow(reference)} rows and \code{p} columns #' #' @examples #' sim1 <- simulate_matrix(25,10) #' class(sim1) #' dim(sim1) #' sim2 <- simulate_correlation(sim1,20,0.8) #' corls(sim1, sim2)^2 #' #' @author Eric Coissac #' @author Christelle Gonindard-Melodelima  Eric Coissac committed Jan 24, 2019 74 #' @export  Eric Coissac committed May 27, 2019 75 76  simulate_correlation <- function(reference, p, r2, equal.var = TRUE) {  Eric Coissac committed Apr 30, 2019 77 78  n <- nrow(reference) maxdim <- max(ncol(reference), p)  Eric Coissac committed Jan 24, 2019 79   Eric Coissac committed Apr 30, 2019 80  noise <- simulate_matrix(n, p, equal.var = equal.var)  Eric Coissac committed Jan 24, 2019 81 82  if (maxdim == p && maxdim > ncol(reference)) {  Eric Coissac committed May 27, 2019 83 84 85 86 87  # noise is the largest matrix YX <- crossprod(noise, reference) svd.YX <- svd(YX) rot <- svd.YX$v %*% t(svd.YX$u) rotr <- svd.YX$u %*% t(svd.YX$v)  Eric Coissac committed Jan 24, 2019 88   Eric Coissac committed May 27, 2019 89 90  print(rot) print(rotr)  Eric Coissac committed Jan 24, 2019 91   Eric Coissac committed May 27, 2019 92 93 94 95 96 97 98 99 100  new = ((reference %*% rot) * sqrt(r2) + noise * sqrt(1 - r2)) %*% rotr } else { # reference is the largest matrix YX <- crossprod(reference, noise) svd.YX <- svd(YX) rot <- svd.YX$v %*% t(svd.YX$u) rotr <- svd.YX$u %*% t(svd.YX$v)  Eric Coissac committed Jan 24, 2019 101   Eric Coissac committed May 27, 2019 102 103  new = (reference * sqrt(r2) + (noise %*% rot) * sqrt(1 - r2)) %*% rotr  Eric Coissac committed Apr 30, 2019 104  }  Eric Coissac committed Jan 24, 2019 105   Eric Coissac committed Apr 30, 2019 106 107 108 109  new <- scale(new, scale = FALSE) attributes(new)\$scaled:center <- NULL new.sd <- sqrt(sum(new^2) / (n - 1)) new <- new / new.sd  Eric Coissac committed Jan 24, 2019 110 111 112 113  return(new) }  Eric Coissac committed May 27, 2019 114 #simulate_matrix_tree