simulate.R 3.81 KB
Newer Older
1 2
#' Simulate n points of dimension p.
#'
3 4 5 6 7 8 9
#' Points are simulated by drawing values of each dimension from a normal
#' distribution of mean 0 and standard deviation equals to 1.
#' The mean of each dimension is forced to 0 (data are centred).
#' By default variable are also scaled to enforce a strandard deviation
#' strictly equal to 1. Covariances between dimensions are not controled.
#' Therefore they are expected to be equal to 0 and reflect only the
#' random distribution of the covariance between two random vectors.
10
#'
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#' @param n an \code{int} value indicating the number of observations.
#' @param p an \code{int} value indicating the number of dimensions (variables)
#'        simulated
#' @param equal.var a \code{logical} value indicating if the dimensions must be scaled
#'        to force \code{sd=1}. \code{TRUE} by default.
#'
#' @return a numeric matrix of \code{n} rows and \code{p} columns
#'
#' @examples
#' sim1 <- simulate_matrix(25,10)
#' class(sim1)
#' dim(sim1)
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
26
#' @export
27
simulate_matrix <- function(n, p, equal.var = TRUE) {
28 29
  new <- rnorm(n * p, mean = 0, sd = 1)
  dim(new) <- c(n, p)
30

31
  new <- scale(new, center = TRUE, scale = equal.var)
32

33 34 35 36
  attributes(new)$`scaled:center` <- NULL
  attributes(new)$`scaled:scale` <- NULL
  new.sd <- sqrt(sum(new^2) / (n - 1))
  new <- new / new.sd
37 38 39 40 41 42

  return(new)
}

#' Simulate n points of dimension p correlated with a reference matrix.
#'
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
#' Simulates a set of point correlated to another set according to the
#' procrustean correlation definition.
#' Points are simulated by drawing values of each dimension from a normal
#' distribution of mean 0 and standard deviation equals to 1.
#' The mean of each dimension is forced to 0 (data are centred).
#' By default variable are also scaled to enforce a strandard deviation
#' strictly equal to 1. Covariances between dimensions are not controled.
#' Therefore they are expected to be equal to 0 and reflect only the
#' random distribution of the covariance between two random vectors.
#' The intensity of the correlation is determined by the \code{r2}
#' parameter.
#'
#' @param reference a numeric matrix to which the simulated data will be correlated
#' @param p an \code{int} value indicating the number of dimensions (variables)
#'        simulated
#' @param r2 the fraction of variation shared between the \code{reference} and the
#'        simulated data
#' @param equal.var a \code{logical} value indicating if the dimensions must be scaled
#'        to force \code{sd=1}. \code{TRUE} by default.
#'
#' @return a numeric matrix of \code{nrow(reference)} rows and \code{p} columns
#'
#' @examples
#' sim1 <- simulate_matrix(25,10)
#' class(sim1)
#' dim(sim1)
#' sim2 <- simulate_correlation(sim1,20,0.8)
#' corls(sim1, sim2)^2
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
74
#' @export
75 76

simulate_correlation <- function(reference, p, r2, equal.var = TRUE) {
77 78
  n <- nrow(reference)
  maxdim <- max(ncol(reference), p)
79

80
  noise <- simulate_matrix(n, p, equal.var = equal.var)
81 82

  if (maxdim == p && maxdim > ncol(reference)) {
83 84 85 86 87
    # noise is the largest matrix
    YX <- crossprod(noise, reference)
    svd.YX <- svd(YX)
    rot  <- svd.YX$v %*% t(svd.YX$u)
    rotr <- svd.YX$u %*% t(svd.YX$v)
88

89 90
    print(rot)
    print(rotr)
91

92 93 94 95 96 97 98 99 100
    new = ((reference %*% rot) * sqrt(r2) +
            noise              * sqrt(1 - r2)) %*% rotr
  }
  else {
    # reference is the largest matrix
    YX <- crossprod(reference, noise)
    svd.YX <- svd(YX)
    rot  <- svd.YX$v %*% t(svd.YX$u)
    rotr <- svd.YX$u %*% t(svd.YX$v)
101

102 103
    new = (reference       * sqrt(r2) +
           (noise %*% rot) * sqrt(1 - r2)) %*% rotr
104
  }
105

106 107 108 109
  new <- scale(new, scale = FALSE)
  attributes(new)$`scaled:center` <- NULL
  new.sd <- sqrt(sum(new^2) / (n - 1))
  new <- new / new.sd
110 111 112 113

  return(new)
}

114
#simulate_matrix_tree