Commit 20907a46 authored by Eric Coissac's avatar Eric Coissac

With a first version of robimetabar as a R6 object

parent 8565ceb1
......@@ -13,6 +13,7 @@ RoxygenNote: 7.1.0
VignetteBuilder: knitr
Imports: R6,
tidyverse,
ore,
Rdpack
RdMacros: Rdpack
Suggests: vegan,
......@@ -25,6 +26,7 @@ Collate:
'utils.R'
'robiobject.R'
'dplyr.R'
'dplyr_robimetabar.R'
'entropie.R'
'robicategory.R'
'robiuniqueid.R'
......@@ -35,12 +37,14 @@ Collate:
'oligotag.R'
'randseq.R'
'rdna.R'
'robitag.R'
'read_ngsfilter.R'
'robiseq.R'
'read_obifasta.R'
'read_metabar.R'
'robitag.R'
'read_ngsfilter.R'
'read_obitab.R'
'robidata.R'
'robimotu.R'
'robimetabar.R'
'robimetabar_xlsx.R'
'robiseq_db.R'
......
# Generated by roxygen2: do not edit by hand
S3method("$",robimetabar)
S3method("$",robiseq)
S3method("$<-",robimetabar)
S3method("$<-",robimotu)
S3method("$<-",robisample)
S3method("[",robicategory)
S3method("[",robiseq)
......@@ -20,9 +19,8 @@ S3method("[<-",robitag_reverse)
S3method("[<-",robitaxid)
S3method("[<-",robitaxid_master)
S3method("[<-",robiuniqueid)
S3method("[[",robimetabar)
S3method("[[",robiseq)
S3method("[[<-",robimetabar)
S3method("[[<-",robimotu)
S3method("[[<-",robisample)
S3method("motus<-",robimetabar)
S3method("samples<-",robimetabar)
......@@ -32,6 +30,8 @@ S3method(as_robicategory,character)
S3method(as_robicategory,default)
S3method(as_robicategory,factor)
S3method(as_robicategory,robicategory)
S3method(as_robimotu,default)
S3method(as_robimotu,robimotu)
S3method(as_robiobject,default)
S3method(as_robiobject,robiobject)
S3method(as_robisample,default)
......@@ -74,7 +74,9 @@ S3method(expand_names,robitaxonomy)
S3method(format,robicategory)
S3method(full_taxonomy,robitaxonomy)
S3method(ids,default)
S3method(ids,robimotu)
S3method(ids,robisample)
S3method(ids_colname,robimotu)
S3method(ids_colname,robisample)
S3method(is_full_taxonomy,robitaxonomy)
S3method(lowest_common_ancestor,robitaxonomy)
......@@ -116,6 +118,8 @@ S3method(type_sum,robitag_reverse)
S3method(type_sum,robitaxid)
S3method(type_sum,robitaxid_master)
S3method(type_sum,robiuniqueid)
S3method(validate_robimetabar,robimetabar)
S3method(validate_robimotu,robimotu)
S3method(validate_robisample,robisample)
S3method(validate_robitaxonomy,robitaxonomy)
S3method(write.xlsx,default)
......@@ -130,6 +134,7 @@ export(alternative_taxids)
export(as.robimetabar.robiseq_db)
export(as_robicategory)
export(as_robimetabar)
export(as_robimotu)
export(as_robiobject)
export(as_robisample)
export(as_robitag)
......@@ -140,6 +145,7 @@ export(as_robitaxid_master)
export(as_robitaxonomy)
export(as_robiuniqueid)
export(build.robiseq)
export(build_motu_names)
export(build_sample_names)
export(clique_tag)
export(colors_tol)
......@@ -155,6 +161,7 @@ export(exp_q)
export(expand_names)
export(family)
export(feature_as_matrix)
export(filter_motus)
export(filter_tag_gcmax)
export(filter_tag_homopolymere)
export(filter_tag_homopolymere_min)
......@@ -171,6 +178,7 @@ export(is_full_taxonomy)
export(is_robi_verbose)
export(is_robicategory)
export(is_robidata)
export(is_robimotu)
export(is_robiobject)
export(is_robisample)
export(is_robitag)
......@@ -185,6 +193,8 @@ export(motus)
export(new_robicategory)
export(new_robidata)
export(new_robilca)
export(new_robimetabar)
export(new_robimotu)
export(new_robiobject)
export(new_robipath)
export(new_robisample)
......@@ -204,6 +214,7 @@ export(rainbow_tol)
export(rank.list)
export(rbarcode)
export(read.sequence)
export(read_metabar)
export(read_ncbi_taxdump)
export(read_ngsfilter)
export(read_obifasta)
......@@ -219,6 +230,7 @@ export(robiassert_arg)
export(robicategory)
export(robilca)
export(robimetabar)
export(robimotu)
export(robipath)
export(robisample)
export(robitag)
......@@ -240,6 +252,8 @@ export(taxon.at.rank)
export(taxon.path)
export(taxonomic.rank)
export(unclass_robiobject)
export(validate_robimetabar)
export(validate_robimotu)
export(validate_robisample)
export(validate_robitaxonomy)
export(write.xlsx)
......
#' Title
#'
#' @param .data
#' @param ...
#' @param .preserve
#'
#' @return
#' @export
#'
#' @examples
filter_motus <- function(.data, ..., .preserve = FALSE) {
fcall <-match.call()
object <- eval(fcall[[2]],parent.frame())
fcall[[1]] = quote(filter)
fcall[[2]] = quote(self$motus)
e <- parent.frame()
print(get("filter",e))
eval(fcall, envir = object$.__enclos_env__)
}
#' @include read_obifasta.R
NULL
#' Title
#'
#' @param file
#' @param keys
#' @param motu_ids
#' @param read_count
#' @param obiclean_prefix
#' @param obiclean_status
#' @param sumaclust_prefix
#' @param main_taxid
#' @param other_taxid
#' @param verbose
#'
#' @return
#' @export
#'
#' @examples
read_metabar <- function(file,
keys = NULL,
motu_ids = "id",
read_count = "sample",
obiclean_prefix = "obiclean",
obiclean_status = paste0(obiclean_prefix,"_status"),
sumaclust_prefix = "cluster",
main_taxid = "taxid",
other_taxid = c("family","genus","order","species","taxid_by_db"),
verbose = is_robi_verbose()) {
mfasta <- read_obifasta(file = file,keys = keys, verbose = verbose)
features <- mfasta$features
merged_read_count <- sprintf("merged_%s",read_count)
features$sequence <- mfasta$sequence
features$id <- robiuniqueid(mfasta$id)
data <- feature_as_three_columns(features[[merged_read_count]],
motus_ids = features$id) %>%
rename(count = value)
features[[merged_read_count]] <- NULL
if (obiclean_status %in% colnames(features)) {
data <- data %>%
full_join(features[[obiclean_status]] %>%
feature_as_three_columns(motus_ids = features$id),
by = c("motu", "sample"))
cd <- colnames(data)
cd[cd == "value"] <- obiclean_status
colnames(data) <- cd
data[[obiclean_status]] = factor(data[[obiclean_status]],levels = c("h","s","i"))
features[[obiclean_status]] <- NULL
}
features %>% lapply(function(x) {
if (is(x,"list")) feature_as_matrix(x) %>% as_tibble()
else x}) %>% as_tibble() -> features
obiclean_idx <- which(str_detect(colnames(features),
pattern = paste0('^', obiclean_prefix)))
if (length(obiclean_idx) > 0) {
obiclean <- features[,obiclean_idx]
features <- features[,-obiclean_idx]
features$obiclean <- obiclean
}
sumaclust_idx <- which(str_detect(colnames(features),
pattern = paste0('^', sumaclust_prefix)))
if (length(sumaclust_idx) > 0) {
sumaclust <- features[,sumaclust_idx]
features <- features[,-sumaclust_idx]
features$sumaclust <- sumaclust
}
if (main_taxid %in% colnames(features)) {
features[[main_taxid]] <- robitaxid_master(features[[main_taxid]])
}
for (c in other_taxid)
if (c %in% colnames(features)) {
fs <- features[[c]]
if (is_tibble(fs))
for (j in seq_along(fs))
fs[[j]] <- robitaxid(fs[[j]])
else
fs <- robitaxid(fs)
features[[c]] <- fs
}
robimetabar(motus = robimotu(features %>% select(id,everything())),
samples = robisample(levels(data$sample)),
data = data,
verbose = verbose)
}
......@@ -9,8 +9,20 @@
NULL
features_as_tibble <- function(features) {
all_names <- sort(unique(unlist(lapply(features, names))))
data <- lapply(all_names, function(n) vapply(features,
function(y)
if (is.null(v <- y[n])) NA else v,
FUN.VALUE = "") %>%
.coerce_vector()
)
names(data) <- all_names
return(as_tibble(data))
data <- do.call(rbind,lapply(features,
function(x) {
x <- x[all_names]
......@@ -26,16 +38,16 @@ features_as_tibble <- function(features) {
#' Title
#'
#' @param features
#' @param feature
#' @param key
#'
#' @return
#' @export
#'
#' @examples
feature_as_matrix <- function(features,na_value=NA) {
all_names <- sort(unique(unlist(lapply(features, function(x) names(x)))))
data <- do.call(rbind,lapply(features, function(x) x[all_names]))
feature_as_matrix <- function(feature,na_value=NA) {
all_names <- sort(unique(unlist(lapply(feature, names))))
data <- do.call(rbind,lapply(feature, function(x) x[all_names]))
if (!is.na(na_value))
data[is.na(data)] <- na_value
......@@ -45,68 +57,101 @@ feature_as_matrix <- function(features,na_value=NA) {
data
}
parse_value <- function(value) {
data = .Call("R_parse_python_dict",value)
if (!is.null(data)) {
value <- parse_values(data[[2]],simplify = TRUE)
names(value) <- data[[1]]
return(list(value))
}
feature_as_three_columns <- function(feature,motus_ids=NULL) {
samples <- unlist(lapply(feature, names))
all_names <- sort(unique(samples))
if (is.null(motus_ids))
motus_ids <- build_motu_names(n = seq_along(feature))
if ((str_starts(value,fixed("'")) && str_ends(value,fixed("'"))) ||
(str_starts(value,fixed('"')) && str_ends(value,fixed('"'))))
return(str_sub(value, 2, nchar(value) - 1))
lengths <- vapply(feature, length, 0)
motus <- factor(rep(motus_ids,lengths),levels = levels(motus_ids))
samples <- factor(samples,levels = all_names)
values <- unlist(feature)
if (str_to_upper(value) == "TRUE")
return(TRUE)
tibble(motu = motus,
sample = samples,
value = values)
}
if (str_to_upper(value) == "FALSE")
return(FALSE)
value
}
.coerce_vector <- function(data) {
parse_values <- function(values,simplify = FALSE) {
parsed_values <- suppressWarnings(as.numeric(values))
NAs <- which(is.na(parsed_values))
if (length(NA) > 0) {
parsed_values = as.list(parsed_values)
parsed_values[NAs] <- lapply(values[NAs],parse_value)
if (simplify) {
classes <- vapply(parsed_values, class,"")
if (all(classes == classes[1]))
parsed_values <- simplify2array(parsed_values)
}
}
else
if (!simplify)
parsed_values = as.list(parsed_values)
data[data == "None"] <- NA
converted <- suppressWarnings(as.logical(data))
if (all(!is.na(converted) | is.na(data)))
return(converted)
parsed_values
converted <- suppressWarnings(as.integer(data))
na_conv <- is.na(converted)
if (all(!na_conv | is.na(data))) {
num <- suppressWarnings(as.numeric(data))
if (all(converted == num | na_conv))
return(converted)
else
return(num)
}
converted <- .Call("R_as_named_list",data,TRUE)
if (all(suppressWarnings(vapply(converted,length,0)) > 0 | is.na(data))) {
converted <- lapply(converted, function(x) {
if (length(x) > 0) {
value <- .coerce_vector(x[[2]])
names(value) <- x[[1]]
value
}
else x
})
return(converted)
}
.Call("R_trim_and_unquote_strings",data)
}
build_key_value_pattern <- function(keys=NULL) {
pattern <- if (is.null(keys)) '[^ ]+'
else paste0("(",
paste(keys,collapse = "|"),
")")
.build_key_value_pattern <- function(keys=NULL) {
.or_rx <- function(...) paste0('(',paste(list(...),collapse = "|"),')')
.and_rx <- function(...) paste0(list(...),collapse = "")
.sub_rx <- function(x) paste0("(",x,")")
.eventually_rx <- function(x) paste0(.sub_rx(x),"?")
.eventually_several_rx <- function(x) paste0(.sub_rx(x),"*")
.several_rx <- function(x) paste0(.sub_rx(x),"+")
.sq_string = "'[^']*'"
.dq_string = '"[^"]*"'
.string = .or_rx(.sq_string,.dq_string)
paste0(pattern,' *= *([^;]+|"[^"]+|\'[^\']+)+')
.space = "[ \t]*"
.integer <- "[+-]?[0-9]+"
.exponent <- .and_rx("[eE]",.integer)
.numeric1 <- "[+-]?\\.[0-9]+"
.numeric2 <- .and_rx(.integer,"(\\.[0-9]*)?")
.numeric <- .and_rx(.or_rx(.numeric1,.numeric2),.eventually_rx(.exponent))
.value <- .or_rx(.string,.numeric)
.key_val <- .and_rx(.string,.space,":",.space,.value)
.dict <- .and_rx("\\{",.eventually_rx(.and_rx(.key_val,.eventually_several_rx(.and_rx(.space,",",.space,.key_val)))),"\\}")
.left <- if (is.null(keys))
"[^0-9'\";= \t][^'\";= \t]*"
else
do.call(.or_rx,keys)
.right <- .or_rx(.numeric,.string,.dict,"[^;]+")
ore::ore(.and_rx(.sub_rx(.left),.space,"=",.space,.sub_rx(.right)))
}
parse_sequence_header <- function(title,keys_pattern) {
tags <- str_split_fixed(str_trim(str_extract_all(title,keys_pattern)[[1]],
side = "right"),
pattern = " *= *",
n = 2)
tags <- ore::groups(ore::ore.search(keys_pattern,title,
all = TRUE))[,1:2]
parsed_values <- parse_values(tags[,2])
parsed_values <- tags[,2]
names(parsed_values) <- tags[,1]
parsed_values
......@@ -122,12 +167,10 @@ parse_sequence_header <- function(title,keys_pattern) {
#'
#' @export
read_obifasta <- function(file,
keys = NULL,
read_count = "sample",
feature_as_tibble = TRUE,
verbose = is_robi_verbose()) {
keys = NULL,
verbose = is_robi_verbose()) {
keys_pattern = build_key_value_pattern(keys)
keys_pattern = .build_key_value_pattern(keys)
if (verbose)
message(sprintf("Loading the %s file in memory...", file))
......@@ -137,9 +180,8 @@ read_obifasta <- function(file,
if (verbose)
message("Looking for the beginning of the sequences...", appendLF = FALSE)
mfasta <- .Call("R_parse_fasta",mfasta)
names(mfasta) <- c('id','feature','sequence')
mfasta <- .Call("R_parse_fasta",mfasta)
names(mfasta) <- c('id','features','sequence')
if (verbose)
message(sprintf("%d sequences",
......@@ -156,29 +198,14 @@ read_obifasta <- function(file,
pbar <- progress_estimated(n = length(mfasta[[1]]),
min_time = 5)
features <- mfasta$feature
features <- foreach(seq_idx = seq_along(features),
.combine = c,
.multicombine = TRUE,
.maxcombine = 100) %dp% {
features <- as.list(mfasta$features)
for (seq_idx in seq_along(features)) {
pbar$tick()$print()
list(parse_sequence_header(features[seq_idx],
keys_pattern = keys_pattern))
features[[seq_idx]] <- parse_sequence_header(features[[seq_idx]],
keys_pattern = keys_pattern)
}
merged_read_count <- sprintf("merged_%s",read_count)
reads <-
mfasta$feature <- if (feature_as_tibble)
features_as_tibble(features)
else
features
mfasta$features <- features_as_tibble(features)
as_tibble(mfasta)
}
\ No newline at end of file
}
#' @title robimetabar.R
#'
#' @import R6
#' @import dplyr
#' @import tibble
#'
#' @include metabar_data_class.R
#' @include robisample.R
#' @include robimotu.R
#'
NULL
.robimetabar = R6Class("robimetabar",
public = list(
data = NULL,
samples = NULL,
motus = NULL,
distances = NULL,
initialize = function(data,
samples,
motus,
distances
) {
robiassert_arg(!missing(data),"data",
"the data parametter is mandatory")
robiassert_arg(is_tibble(data),
"data",
"Must be a tibble not a {wrongclass}.",
wrongclass = class(data)[1])
self$data <- data
robiassert_arg(!missing(samples),"samples",
"the samples parametter is mandatory")
robiassert_arg(is_robisample(samples),
"samples",
"Must be a robisample not a {wrongclass}.",
wrongclass = class(samples)[1])
self$samples <- samples
robiassert_arg(!missing(motus),"motus",
"the motus parametter is mandatory")
robiassert_arg(is_robimotu(motus),
"motus",
"Must be a robimotu not a {wrongclass}.",
wrongclass = class(motus)[1])
self$motus <- motus
if (!missing(distances))
self$distances <- distances
validate_robimetabar(self)
},
dim = function()
c(nrow(self$samples), nrow(self$motus)),
dimnames = function()
c(samples = self$samples[[which(vapply(self$samples,is_robiuniqueid))]],
motus = self$samples[[which(vapply(self$motus,is_robiuniqueid))]])
)
)
#' Validates that a taxonomy instance respects some constraints.
#'
#'
#' @param object
#'
#' @return
#'
#' @examples
#' s <- robisample(n=20)
#' check_robisample(s)
#' @author Eric Coissac <eric.coissac@metabarcoding.org>
#' @export
validate_robimetabar <- function(object, verbose=is_robi_verbose()) {
UseMethod("validate_robimetabar", object)
}
#' @detail The \code{robimetabar} version of this methode checks that
#' taxonomy declared as full verified that constaint.
#'
#' @rdname validate_robimetabar
#' @export
validate_robimetabar.robimetabar <- function(object, verbose=TRUE) {
object
}
#' Title
#'
#' @param object
#' @param alternative_names
#' @param reference_taxonomy
#' @param ...