multivariate.R 9.3 KB
Newer Older
1
#' @include procmod_frame.R
2 3 4 5
#' @import MASS
#'
NULL

6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
#' Converts a \code{dist} object to a \code{data.frame} object.
#'
#' The created \code{data.frame} has a attribute \code{is.dist} set to
#' the logical value \code{TRUE}.
#'
#' @param x the \code{dist} object to be converted
#' @param row.names NULL or a \code{character} vector giving the row
#'                  names for the data frame. Missing values
#'                  are not allowed.
#' @param optional  logical. If \code{TRUE}, setting row names and converting
#'                  column names (to syntactic names: see make.names)
#'                  is optional. Note that all of R's base package
#'                  as.data.frame() methods use optional only for column
#'                  names treatment, basically with the meaning of
#'                  data.frame(*, check.names = !optional).
#'                  See also the make.names argument of the
#'                  \code{\link[base]{matrix}} method.
#' @param ...       additional arguments to be passed to or from methods.
#'
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
as.data.frame.dist <- function(x, row.names = NULL, optional = FALSE, ...) {

  x <- as.data.frame(
    as.matrix(x),
    row.names = row.names,
    optional = optional,
    ...)

  attr(x, "is.dist") <- TRUE

  x
}

41 42 43

#' Project a distance matrix in a euclidean space (NMDS).
#'
Eric Coissac committed
44 45 46 47
#' Project a set of points defined by a distance matrix in
#' an eucleadean space using the Kruskal's Non-metric
#' Multidimensional Scaling. This function is mainly a simplified
#' interface on the \code{\link[MASS]{isoMDS}} function using as
48
#' much as possible dimensions to limit the stress. The aims of this
Eric Coissac committed
49 50 51 52
#' NDMS being only to project point in an orthogonal space therefore
#' without any correlation between axis. Because a non-metric method
#' is used no condition is required on the used distance.
#'
53 54 55 56 57 58 59 60
#' @param distances   a \code{\link[stats]{dist}} object or a
#'                    \code{\link[base]{matrix}}
#'                    object representing a distance matrix.
#' @param maxit       The maximum number of iterations.
#' @param trace       Logical for tracing optimization. Default \code{TRUE}.
#' @param tol         convergence tolerance.
#' @param p           Power for Minkowski distance in the configuration space.
#'
61 62 63 64
#' @return a numeric matrix with at most \code{n-1} dimensions, with
#'         \code{n} the number pf observations. This matrix defines the
#'         coordinates of each point in the orthogonal space.
#'
65 66 67
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
68
nmds <- function(distances,
69
                 maxit = 100, trace = FALSE,
70 71 72 73 74 75
                 tol = 0.001, p = 2) {


  if (inherits(distances, "matrix")) {
    distances <- as.dist(distances)
  }
76

77
  stopifnot(inherits(distances, "dist"))
78

79
  k <- attributes(distances)$Size - 1
80

81 82
  y <- suppressWarnings(cmdscale(distances, k))
  k <- ncol(y)
83

84 85 86 87 88 89 90 91
  n <- isoMDS(distances,
    y = y,
    k = k,
    maxit = maxit,
    trace = trace,
    tol = tol,
    p = p
  )
92

93 94 95
  p <- n$points
  attributes(p)$stress <- n$stress
  attributes(p)$projected <- "nmds"
96

97
  p
98 99 100 101
}

#' Project a distance matrix in a euclidean space (PCOA).
#'
Eric Coissac committed
102 103 104 105 106 107 108 109 110 111
#' Project a set of points defined by a distance matrix in
#' an eucleadean space using the Principal Coordinates Analysis
#' method. This function is mainly a simplified
#' interface on the \code{\link[stats]{cmdscale}} function using as
#' much as possible dimensions for the projection. The aims of this
#' PCoA being only to project point in an orthogonal space therefore
#' without any correlation between axis. Because a metric method
#' is used the used distance must be euclidean
#' (cf \code{\link[ProcMod]{is.euclid}}).
#'
112 113 114 115 116 117 118 119
#' @param distances   a \code{\link[stats]{dist}} object or a
#'                    \code{\link[base]{matrix}}
#'                    object representing a distance matrix.
#'
#' @return a numeric matrix with at most \code{n-1} dimensions, with
#'         \code{n} the number pf observations. This matrix defines the
#'         coordinates of each point in the orthogonal space.
#'
120 121 122
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
123 124 125 126 127
pcoa <- function(distances) {

  if (inherits(distances, "matrix")) {
    distances <- as.dist(distances)
  }
128

129
  stopifnot(inherits(distances, "dist"))
130

131 132 133
  k <- attributes(distances)$Size - 1
  x <- suppressWarnings(cmdscale(distances, k = k))
  attributes(x)$projected <- "pcoa"
134

135
  x
136 137 138 139
}

#' Project a set of points in a euclidean space (PCA).
#'
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
#' Project a set of points defined by a set of numeric variables in
#' an eucleadean space using the pricipal componant analysis.
#' This function is mainly a simplified
#' interface on the \code{\link[stats]{prcomp}} function using as
#' much as possible dimensions to keep all the variation. The aims of this
#' PCA being only to project point in an orthogonal space therefore
#' without any correlation between axis. Data are centered by not scaled by
#' default.
#'
#' @param data a numeric matrix describing the points
#' @param scale a \code{logical} value indicating if the dimensions must be scaled
#'        to force for every column that \code{sd=1}. \code{FALSE} by default.
#'
#' @return a numeric matrix with at most \code{n-1} dimensions, with
#'         \code{n} the number pf observations. This matrix defines the
#'         coordinates of each point in the orthogonal space.
#'
157 158 159
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
160 161 162 163 164 165 166 167 168 169 170 171 172
pca <- function(data, scale = FALSE) {

  p <- prcomp(data,
    retx = TRUE,
    center = TRUE,
    scale. = scale,
    tol = 0
  )

  x <- p$x
  attributes(x)$projected <- "pca"

  x
173 174 175 176 177 178 179 180 181
}

#' Double centering of a matrix.
#'
#' colSums and rowSums of the returned matrix are all equal to zero.
#'
#' Inspired from the algorithm described in stackoverflow
#' \url{https://stackoverflow.com/questions/43639063/double-centering-in-r}
#'
182 183 184 185 186
#' @param m a \code{numeric} matrix
#'
#' @return a \code{numeric} matrix centred by rows
#'           and columns
#'
187 188
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
189
#' @export
190
bicenter <- function(m) {
191 192

  # compute the row-wise and column-wise mean matrices
193 194 195
  m0 <- m * 0
  R <- m0 + rowMeans(m)
  C <- t(m0 + colMeans(m))
196 197

  # substract them and add the grand mean
198
  m - R - C + mean(m[])
199 200 201 202 203 204 205
}

#' Test if the distance matrix is euclidean.
#'
#' Actually a simplified version of the ADE4 implementation
#' (\code{\link[ade4]{is.euclid}}).
#'
206 207 208 209 210
#' @param distances	an object of class 'dist'
#' @param tol	a tolerance threshold : an eigenvalue is
#'        considered positive if it is larger than
#'        -tol*lambda1 where lambda1 is the largest eigenvalue.
#'
211 212 213
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
214 215
is.euclid <- function(distances, tol = 1e-07) {
  if (!inherits(distances, "dist")) {
216
    stop("Object of class 'dist' expected")
217
  }
218

219
  if (any(distances < tol)) {
220
    warning("Zero distance(s)")
221
  }
222 223 224 225 226

  distances <- as.matrix(distances)
  n <- ncol(distances)
  delta <- -0.5 * ProcMod::bicenter(distances * distances)
  lambda <- eigen(delta, symmetric = TRUE, only.values = TRUE)$values
227 228 229
  w0 <- lambda[n] / lambda[1]

  w0 > -tol
230 231
}

232
is.orthogonal <- function(x) {
233
  stopifnot(is.procmod_frame(x))
234 235

  !is.null(attr(x, "projected"))
Eric Coissac committed
236 237 238
}

#' Project a dataset in a euclidean space.
239
#'
Eric Coissac committed
240 241 242 243 244 245 246
#' Project a set of points defined by a distance matrix
#' or a set of variables in an eucleadean space.
#' If the distance matrix is a metric, this is done using
#' the \code{\link[ProcMod]{pcoa}} function,
#' for other distance the \code{\link[ProcMod]{nmds}} is used.
#' When points are described by a set of variable the
#' \code{\link[ProcMod]{nmds}} is used.
247
#'
248 249 250 251 252 253 254
#' @param data a numeric matrix describing the points
#' @param tol	a tolerance threshold : an eigenvalue is
#'        considered positive if it is larger than
#'        -tol*lambda1 where lambda1 is the largest eigenvalue.
#' @param scale a \code{logical} value indicating if the dimensions must be scaled
#'        to force for every column that \code{sd=1}. \code{FALSE} by default.
#'
255 256
#' @param ... other parameters specific to some implementation
#'
257 258 259 260
#' @return a numeric matrix with at most \code{n-1} dimensions, with
#'         \code{n} the number pf observations. This matrix defines the
#'         coordinates of each point in the orthogonal space.
#'
261 262 263
#' @author Eric Coissac
#' @author Christelle Gonindard-Melodelima
#' @export
264 265
ortho <- function(data, ...) {
  UseMethod("ortho", data)
Eric Coissac committed
266
}
267

268
#' @rdname ortho
Eric Coissac committed
269
#' @export
270 271 272
ortho.dist <- function(data, tol = 1e-7, ...) {
  if (ProcMod::is.euclid(data, tol = tol)) {
    return(ProcMod::pcoa(data))
273 274
  }

275
  ProcMod::nmds(data)
Eric Coissac committed
276 277
}

278
#' @rdname ortho
Eric Coissac committed
279
#' @export
280 281
ortho.matrix <- function(data, scale = FALSE, ...) {
  pca(data, scale = scale)
Eric Coissac committed
282 283
}

284
#' @rdname ortho
Eric Coissac committed
285
#' @export
286 287 288 289
ortho.data.frame <- function(data, scale = FALSE, ...) {
  if (!is.null(attributes(data)$is.dist) &&
    attributes(data)$is.dist == TRUE) {
    return(ortho(as.dist(as.matrix(data))))
290
  }
Eric Coissac committed
291

292
  pca(as.matrix(data), scale = scale)
Eric Coissac committed
293 294 295
}


296
#' @rdname ortho
Eric Coissac committed
297
#' @export
298
ortho.procmod_frame <- function(data, ...) {
299 300
  if (is.orthogonal(data)) {
    return(data)
301
  }
Eric Coissac committed
302

303
  n <- ncol(data)
Eric Coissac committed
304

305
  p <- vector(mode = "character", length = n)
Eric Coissac committed
306
  for (i in seq_len(n)) {
307
    xt <- ortho(data[[i]])
308
    p[i] <- attributes(xt)$projected
309
    data[[i]] <- xt
Eric Coissac committed
310 311
  }

312 313
  names(p) <- names(data)
  attributes(data)$projected <- p
Eric Coissac committed
314

315
  data
316
}