simulate.R 3.74 KB
Newer Older
1 2
#' Simulate n points of dimension p.
#'
3 4 5 6 7 8 9
#' Points are simulated by drawing values of each dimension from a normal
#' distribution of mean 0 and standard deviation equals to 1.
#' The mean of each dimension is forced to 0 (data are centred).
#' By default variable are also scaled to enforce a strandard deviation
#' strictly equal to 1. Covariances between dimensions are not controled.
#' Therefore they are expected to be equal to 0 and reflect only the
#' random distribution of the covariance between two random vectors.
10
#'
11 12 13
#' @param n an \code{int} value indicating the number of observations.
#' @param p an \code{int} value indicating the number of dimensions (variables)
#'        simulated
14
#' @param equal_var a \code{logical} value indicating if the dimensions must be scaled
15 16 17 18 19
#'        to force \code{sd=1}. \code{TRUE} by default.
#'
#' @return a numeric matrix of \code{n} rows and \code{p} columns
#'
#' @examples
20
#' sim1 <- simulate_matrix(25,10)
21 22 23 24 25
#' class(sim1)
#' dim(sim1)
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
26
#' @export
27
simulate_matrix <- function(n, p, equal_var = TRUE) {
28 29
  new <- rnorm(n * p, mean = 0, sd = 1)
  dim(new) <- c(n, p)
30

31
  new <- scale(new, center = TRUE, scale = equal_var)
32

33 34 35
  attributes(new)$`scaled:center` <- NULL
  attributes(new)$`scaled:scale` <- NULL
  new.sd <- sqrt(sum(new^2) / (n - 1))
36
  new / new.sd
37 38
}

39
#' Simulate n points of dimension p correlated to a reference matrix.
40
#'
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
#' Simulates a set of point correlated to another set according to the
#' procrustean correlation definition.
#' Points are simulated by drawing values of each dimension from a normal
#' distribution of mean 0 and standard deviation equals to 1.
#' The mean of each dimension is forced to 0 (data are centred).
#' By default variable are also scaled to enforce a strandard deviation
#' strictly equal to 1. Covariances between dimensions are not controled.
#' Therefore they are expected to be equal to 0 and reflect only the
#' random distribution of the covariance between two random vectors.
#' The intensity of the correlation is determined by the \code{r2}
#' parameter.
#'
#' @param reference a numeric matrix to which the simulated data will be correlated
#' @param p an \code{int} value indicating the number of dimensions (variables)
#'        simulated
#' @param r2 the fraction of variation shared between the \code{reference} and the
#'        simulated data
58
#' @param equal_var a \code{logical} value indicating if the dimensions must be scaled
59 60 61 62 63
#'        to force \code{sd=1}. \code{TRUE} by default.
#'
#' @return a numeric matrix of \code{nrow(reference)} rows and \code{p} columns
#'
#' @examples
64
#' sim1 <- simulate_matrix(25,10)
65 66
#' class(sim1)
#' dim(sim1)
67
#' sim2 <- simulate_correlation(sim1,20,0.8)
68 69 70 71
#' corls(sim1, sim2)^2
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
72
#' @export
73

74
simulate_correlation <- function(reference, p, r2, equal_var = TRUE) {
75 76
  n <- nrow(reference)
  maxdim <- max(ncol(reference), p)
77

78
  noise <- simulate_matrix(n, p, equal_var = equal_var)
79 80

  if (maxdim == p && maxdim > ncol(reference)) {
81 82 83 84 85
    # noise is the largest matrix
    YX <- crossprod(noise, reference)
    svd.YX <- svd(YX)
    rot  <- svd.YX$v %*% t(svd.YX$u)
    rotr <- svd.YX$u %*% t(svd.YX$v)
86

87 88 89 90 91 92 93 94 95
    new = ((reference %*% rot) * sqrt(r2) +
            noise              * sqrt(1 - r2)) %*% rotr
  }
  else {
    # reference is the largest matrix
    YX <- crossprod(reference, noise)
    svd.YX <- svd(YX)
    rot  <- svd.YX$v %*% t(svd.YX$u)
    rotr <- svd.YX$u %*% t(svd.YX$v)
96

97 98
    new = (reference       * sqrt(r2) +
           (noise %*% rot) * sqrt(1 - r2)) %*% rotr
99
  }
100

101 102 103
  new <- scale(new, scale = FALSE)
  attributes(new)$`scaled:center` <- NULL
  new.sd <- sqrt(sum(new^2) / (n - 1))
104
  new / new.sd
105 106
}

107
#simulate_matrix_tree