Commit 20907a46 authored by Eric Coissac's avatar Eric Coissac

With a first version of robimetabar as a R6 object

parent 8565ceb1
......@@ -13,6 +13,7 @@ RoxygenNote: 7.1.0
VignetteBuilder: knitr
Imports: R6,
tidyverse,
ore,
Rdpack
RdMacros: Rdpack
Suggests: vegan,
......@@ -25,6 +26,7 @@ Collate:
'utils.R'
'robiobject.R'
'dplyr.R'
'dplyr_robimetabar.R'
'entropie.R'
'robicategory.R'
'robiuniqueid.R'
......@@ -35,12 +37,14 @@ Collate:
'oligotag.R'
'randseq.R'
'rdna.R'
'robitag.R'
'read_ngsfilter.R'
'robiseq.R'
'read_obifasta.R'
'read_metabar.R'
'robitag.R'
'read_ngsfilter.R'
'read_obitab.R'
'robidata.R'
'robimotu.R'
'robimetabar.R'
'robimetabar_xlsx.R'
'robiseq_db.R'
......
# Generated by roxygen2: do not edit by hand
S3method("$",robimetabar)
S3method("$",robiseq)
S3method("$<-",robimetabar)
S3method("$<-",robimotu)
S3method("$<-",robisample)
S3method("[",robicategory)
S3method("[",robiseq)
......@@ -20,9 +19,8 @@ S3method("[<-",robitag_reverse)
S3method("[<-",robitaxid)
S3method("[<-",robitaxid_master)
S3method("[<-",robiuniqueid)
S3method("[[",robimetabar)
S3method("[[",robiseq)
S3method("[[<-",robimetabar)
S3method("[[<-",robimotu)
S3method("[[<-",robisample)
S3method("motus<-",robimetabar)
S3method("samples<-",robimetabar)
......@@ -32,6 +30,8 @@ S3method(as_robicategory,character)
S3method(as_robicategory,default)
S3method(as_robicategory,factor)
S3method(as_robicategory,robicategory)
S3method(as_robimotu,default)
S3method(as_robimotu,robimotu)
S3method(as_robiobject,default)
S3method(as_robiobject,robiobject)
S3method(as_robisample,default)
......@@ -74,7 +74,9 @@ S3method(expand_names,robitaxonomy)
S3method(format,robicategory)
S3method(full_taxonomy,robitaxonomy)
S3method(ids,default)
S3method(ids,robimotu)
S3method(ids,robisample)
S3method(ids_colname,robimotu)
S3method(ids_colname,robisample)
S3method(is_full_taxonomy,robitaxonomy)
S3method(lowest_common_ancestor,robitaxonomy)
......@@ -116,6 +118,8 @@ S3method(type_sum,robitag_reverse)
S3method(type_sum,robitaxid)
S3method(type_sum,robitaxid_master)
S3method(type_sum,robiuniqueid)
S3method(validate_robimetabar,robimetabar)
S3method(validate_robimotu,robimotu)
S3method(validate_robisample,robisample)
S3method(validate_robitaxonomy,robitaxonomy)
S3method(write.xlsx,default)
......@@ -130,6 +134,7 @@ export(alternative_taxids)
export(as.robimetabar.robiseq_db)
export(as_robicategory)
export(as_robimetabar)
export(as_robimotu)
export(as_robiobject)
export(as_robisample)
export(as_robitag)
......@@ -140,6 +145,7 @@ export(as_robitaxid_master)
export(as_robitaxonomy)
export(as_robiuniqueid)
export(build.robiseq)
export(build_motu_names)
export(build_sample_names)
export(clique_tag)
export(colors_tol)
......@@ -155,6 +161,7 @@ export(exp_q)
export(expand_names)
export(family)
export(feature_as_matrix)
export(filter_motus)
export(filter_tag_gcmax)
export(filter_tag_homopolymere)
export(filter_tag_homopolymere_min)
......@@ -171,6 +178,7 @@ export(is_full_taxonomy)
export(is_robi_verbose)
export(is_robicategory)
export(is_robidata)
export(is_robimotu)
export(is_robiobject)
export(is_robisample)
export(is_robitag)
......@@ -185,6 +193,8 @@ export(motus)
export(new_robicategory)
export(new_robidata)
export(new_robilca)
export(new_robimetabar)
export(new_robimotu)
export(new_robiobject)
export(new_robipath)
export(new_robisample)
......@@ -204,6 +214,7 @@ export(rainbow_tol)
export(rank.list)
export(rbarcode)
export(read.sequence)
export(read_metabar)
export(read_ncbi_taxdump)
export(read_ngsfilter)
export(read_obifasta)
......@@ -219,6 +230,7 @@ export(robiassert_arg)
export(robicategory)
export(robilca)
export(robimetabar)
export(robimotu)
export(robipath)
export(robisample)
export(robitag)
......@@ -240,6 +252,8 @@ export(taxon.at.rank)
export(taxon.path)
export(taxonomic.rank)
export(unclass_robiobject)
export(validate_robimetabar)
export(validate_robimotu)
export(validate_robisample)
export(validate_robitaxonomy)
export(write.xlsx)
......
#' Title
#'
#' @param .data
#' @param ...
#' @param .preserve
#'
#' @return
#' @export
#'
#' @examples
filter_motus <- function(.data, ..., .preserve = FALSE) {
fcall <-match.call()
object <- eval(fcall[[2]],parent.frame())
fcall[[1]] = quote(filter)
fcall[[2]] = quote(self$motus)
e <- parent.frame()
print(get("filter",e))
eval(fcall, envir = object$.__enclos_env__)
}
#' @include read_obifasta.R
NULL
#' Title
#'
#' @param file
#' @param keys
#' @param motu_ids
#' @param read_count
#' @param obiclean_prefix
#' @param obiclean_status
#' @param sumaclust_prefix
#' @param main_taxid
#' @param other_taxid
#' @param verbose
#'
#' @return
#' @export
#'
#' @examples
read_metabar <- function(file,
keys = NULL,
motu_ids = "id",
read_count = "sample",
obiclean_prefix = "obiclean",
obiclean_status = paste0(obiclean_prefix,"_status"),
sumaclust_prefix = "cluster",
main_taxid = "taxid",
other_taxid = c("family","genus","order","species","taxid_by_db"),
verbose = is_robi_verbose()) {
mfasta <- read_obifasta(file = file,keys = keys, verbose = verbose)
features <- mfasta$features
merged_read_count <- sprintf("merged_%s",read_count)
features$sequence <- mfasta$sequence
features$id <- robiuniqueid(mfasta$id)
data <- feature_as_three_columns(features[[merged_read_count]],
motus_ids = features$id) %>%
rename(count = value)
features[[merged_read_count]] <- NULL
if (obiclean_status %in% colnames(features)) {
data <- data %>%
full_join(features[[obiclean_status]] %>%
feature_as_three_columns(motus_ids = features$id),
by = c("motu", "sample"))
cd <- colnames(data)
cd[cd == "value"] <- obiclean_status
colnames(data) <- cd
data[[obiclean_status]] = factor(data[[obiclean_status]],levels = c("h","s","i"))
features[[obiclean_status]] <- NULL
}
features %>% lapply(function(x) {
if (is(x,"list")) feature_as_matrix(x) %>% as_tibble()
else x}) %>% as_tibble() -> features
obiclean_idx <- which(str_detect(colnames(features),
pattern = paste0('^', obiclean_prefix)))
if (length(obiclean_idx) > 0) {
obiclean <- features[,obiclean_idx]
features <- features[,-obiclean_idx]
features$obiclean <- obiclean
}
sumaclust_idx <- which(str_detect(colnames(features),
pattern = paste0('^', sumaclust_prefix)))
if (length(sumaclust_idx) > 0) {
sumaclust <- features[,sumaclust_idx]
features <- features[,-sumaclust_idx]
features$sumaclust <- sumaclust
}
if (main_taxid %in% colnames(features)) {
features[[main_taxid]] <- robitaxid_master(features[[main_taxid]])
}
for (c in other_taxid)
if (c %in% colnames(features)) {
fs <- features[[c]]
if (is_tibble(fs))
for (j in seq_along(fs))
fs[[j]] <- robitaxid(fs[[j]])
else
fs <- robitaxid(fs)
features[[c]] <- fs
}
robimetabar(motus = robimotu(features %>% select(id,everything())),
samples = robisample(levels(data$sample)),
data = data,
verbose = verbose)
}
......@@ -9,8 +9,20 @@
NULL
features_as_tibble <- function(features) {
all_names <- sort(unique(unlist(lapply(features, names))))
data <- lapply(all_names, function(n) vapply(features,
function(y)
if (is.null(v <- y[n])) NA else v,
FUN.VALUE = "") %>%
.coerce_vector()
)
names(data) <- all_names
return(as_tibble(data))
data <- do.call(rbind,lapply(features,
function(x) {
x <- x[all_names]
......@@ -26,16 +38,16 @@ features_as_tibble <- function(features) {
#' Title
#'
#' @param features
#' @param feature
#' @param key
#'
#' @return
#' @export
#'
#' @examples
feature_as_matrix <- function(features,na_value=NA) {
all_names <- sort(unique(unlist(lapply(features, function(x) names(x)))))
data <- do.call(rbind,lapply(features, function(x) x[all_names]))
feature_as_matrix <- function(feature,na_value=NA) {
all_names <- sort(unique(unlist(lapply(feature, names))))
data <- do.call(rbind,lapply(feature, function(x) x[all_names]))
if (!is.na(na_value))
data[is.na(data)] <- na_value
......@@ -45,68 +57,101 @@ feature_as_matrix <- function(features,na_value=NA) {
data
}
parse_value <- function(value) {
data = .Call("R_parse_python_dict",value)
if (!is.null(data)) {
value <- parse_values(data[[2]],simplify = TRUE)
names(value) <- data[[1]]
return(list(value))
}
feature_as_three_columns <- function(feature,motus_ids=NULL) {
samples <- unlist(lapply(feature, names))
all_names <- sort(unique(samples))
if (is.null(motus_ids))
motus_ids <- build_motu_names(n = seq_along(feature))
if ((str_starts(value,fixed("'")) && str_ends(value,fixed("'"))) ||
(str_starts(value,fixed('"')) && str_ends(value,fixed('"'))))
return(str_sub(value, 2, nchar(value) - 1))
lengths <- vapply(feature, length, 0)
motus <- factor(rep(motus_ids,lengths),levels = levels(motus_ids))
samples <- factor(samples,levels = all_names)
values <- unlist(feature)
if (str_to_upper(value) == "TRUE")
return(TRUE)
tibble(motu = motus,
sample = samples,
value = values)
}
if (str_to_upper(value) == "FALSE")
return(FALSE)
value
}
.coerce_vector <- function(data) {
parse_values <- function(values,simplify = FALSE) {
parsed_values <- suppressWarnings(as.numeric(values))
NAs <- which(is.na(parsed_values))
if (length(NA) > 0) {
parsed_values = as.list(parsed_values)
parsed_values[NAs] <- lapply(values[NAs],parse_value)
if (simplify) {
classes <- vapply(parsed_values, class,"")
if (all(classes == classes[1]))
parsed_values <- simplify2array(parsed_values)
}
}
else
if (!simplify)
parsed_values = as.list(parsed_values)
data[data == "None"] <- NA
converted <- suppressWarnings(as.logical(data))
if (all(!is.na(converted) | is.na(data)))
return(converted)
parsed_values
converted <- suppressWarnings(as.integer(data))
na_conv <- is.na(converted)
if (all(!na_conv | is.na(data))) {
num <- suppressWarnings(as.numeric(data))
if (all(converted == num | na_conv))
return(converted)
else
return(num)
}
converted <- .Call("R_as_named_list",data,TRUE)
if (all(suppressWarnings(vapply(converted,length,0)) > 0 | is.na(data))) {
converted <- lapply(converted, function(x) {
if (length(x) > 0) {
value <- .coerce_vector(x[[2]])
names(value) <- x[[1]]
value
}
else x
})
return(converted)
}
.Call("R_trim_and_unquote_strings",data)
}
build_key_value_pattern <- function(keys=NULL) {
pattern <- if (is.null(keys)) '[^ ]+'
else paste0("(",
paste(keys,collapse = "|"),
")")
.build_key_value_pattern <- function(keys=NULL) {
.or_rx <- function(...) paste0('(',paste(list(...),collapse = "|"),')')
.and_rx <- function(...) paste0(list(...),collapse = "")
.sub_rx <- function(x) paste0("(",x,")")
.eventually_rx <- function(x) paste0(.sub_rx(x),"?")
.eventually_several_rx <- function(x) paste0(.sub_rx(x),"*")
.several_rx <- function(x) paste0(.sub_rx(x),"+")
.sq_string = "'[^']*'"
.dq_string = '"[^"]*"'
.string = .or_rx(.sq_string,.dq_string)
paste0(pattern,' *= *([^;]+|"[^"]+|\'[^\']+)+')
.space = "[ \t]*"
.integer <- "[+-]?[0-9]+"
.exponent <- .and_rx("[eE]",.integer)
.numeric1 <- "[+-]?\\.[0-9]+"
.numeric2 <- .and_rx(.integer,"(\\.[0-9]*)?")
.numeric <- .and_rx(.or_rx(.numeric1,.numeric2),.eventually_rx(.exponent))
.value <- .or_rx(.string,.numeric)
.key_val <- .and_rx(.string,.space,":",.space,.value)
.dict <- .and_rx("\\{",.eventually_rx(.and_rx(.key_val,.eventually_several_rx(.and_rx(.space,",",.space,.key_val)))),"\\}")
.left <- if (is.null(keys))
"[^0-9'\";= \t][^'\";= \t]*"
else
do.call(.or_rx,keys)
.right <- .or_rx(.numeric,.string,.dict,"[^;]+")
ore::ore(.and_rx(.sub_rx(.left),.space,"=",.space,.sub_rx(.right)))
}
parse_sequence_header <- function(title,keys_pattern) {
tags <- str_split_fixed(str_trim(str_extract_all(title,keys_pattern)[[1]],
side = "right"),
pattern = " *= *",
n = 2)
tags <- ore::groups(ore::ore.search(keys_pattern,title,
all = TRUE))[,1:2]
parsed_values <- parse_values(tags[,2])
parsed_values <- tags[,2]
names(parsed_values) <- tags[,1]
parsed_values
......@@ -122,12 +167,10 @@ parse_sequence_header <- function(title,keys_pattern) {
#'
#' @export
read_obifasta <- function(file,
keys = NULL,
read_count = "sample",
feature_as_tibble = TRUE,
verbose = is_robi_verbose()) {
keys = NULL,
verbose = is_robi_verbose()) {
keys_pattern = build_key_value_pattern(keys)
keys_pattern = .build_key_value_pattern(keys)
if (verbose)
message(sprintf("Loading the %s file in memory...", file))
......@@ -137,9 +180,8 @@ read_obifasta <- function(file,
if (verbose)
message("Looking for the beginning of the sequences...", appendLF = FALSE)
mfasta <- .Call("R_parse_fasta",mfasta)
names(mfasta) <- c('id','feature','sequence')
mfasta <- .Call("R_parse_fasta",mfasta)
names(mfasta) <- c('id','features','sequence')
if (verbose)
message(sprintf("%d sequences",
......@@ -156,29 +198,14 @@ read_obifasta <- function(file,
pbar <- progress_estimated(n = length(mfasta[[1]]),
min_time = 5)
features <- mfasta$feature
features <- foreach(seq_idx = seq_along(features),
.combine = c,
.multicombine = TRUE,
.maxcombine = 100) %dp% {
features <- as.list(mfasta$features)
for (seq_idx in seq_along(features)) {
pbar$tick()$print()
list(parse_sequence_header(features[seq_idx],
keys_pattern = keys_pattern))
features[[seq_idx]] <- parse_sequence_header(features[[seq_idx]],
keys_pattern = keys_pattern)
}
merged_read_count <- sprintf("merged_%s",read_count)
reads <-
mfasta$feature <- if (feature_as_tibble)
features_as_tibble(features)
else
features
mfasta$features <- features_as_tibble(features)
as_tibble(mfasta)
}
\ No newline at end of file
}
This diff is collapsed.
#' @include robiuniqueid.R
#' @include robicategory.R
#' @include ids.R
#' @import tibble
#' @import dplyr
NULL
#' Builds a serie of motu names.
#'
#' motu names are build by concatenating a group name (facutative), a motu
#' set id, a motu number in the set and a PCR repeat id (facutative).
#'
#' @param n an \code{integer} vector indicating the number of motu names to
#' generate per category. If \code{n} has names, they are used as category
#' names. Otherwhise category names are generated by concatenating the word
#' \code{motu} to a serie of letters.
#'
#' @param ndigits a singme integer value indicating on how many digits the motu
#' numerical serie number have to be padded with 0s.
#'
#' @param pcr_replicates a number indicating the number of pcr replicates or a
#' \code{character} vector with the name of the PCR replicates.
#'
#' @param groups a \code{character} vector of motu group names for which the
#' motu design have to be repeated.
#'
#' @param sep a character string to separate the name parts
#' @param verbose if `TRUE` warnings are emitted if the constructor takes
#' some decision because of missing values and estimates default values these
#' missing data.
#'
#' @examples
#' build_motu_names(5)
#'
#' build_motu_names(c(5, 3))
#'
#' build_motu_names(c(SAMP=5, CTRL=3), ndigits = 2)
#'
#' build_motu_names(c(SAMP=5, CTRL=3), ndigits = 2,
#' pcr_replicates = 3)
#'
#' build_motu_names(c(SAMP=5, CTRL=3), ndigits = 2,
#' pcr_replicates = c("A","B","C"))
#'
#' build_motu_names(c(SAMP=5, CTRL=3), ndigits = 2,
#' pcr_replicates = c("A","B","C"),
#' groups=c("S1","S2"))
#'
#' @author Eric Coissac <eric.coissac@metabarcoding.org>
#' @export
build_motu_names <- function(n,
ndigits = max(floor(log10(n)) + 1),
sep = "_",
verbose = is_robi_verbose()) {
if (!is.numeric(n))
{
stop("'n' must be a numeric value")
}
prefixes = names(n)
if (is.null(prefixes)) {
prefixes <- if (length(n) == 1)
"motu"
else
paste0("motu",
combine_LETTERS(length(n)))
}
prefixes <- rep(prefixes,n)
ns <- unlist(lapply(n, seq_len))
snames <- sprintf("%s%0*d",prefixes,ndigits,ns)
robiuniqueid(snames)
}
#' Title
#'
#' @param object
#'
#' @return
#'
#' @examples
#' s <- robimotu(n=20)
#' check_robimotu(s)
#' @author Eric Coissac <eric.coissac@metabarcoding.org>
#' @export
validate_robimotu <- function(object, verbose=TRUE) {
UseMethod("validate_robimotu", object)
}
#' @rdname validate_robimotu
#' @export
validate_robimotu.robimotu <- function(object, verbose=is_robi_verbose()) {
columns <- table(sapply(object,function(i) class(i)[1]))
cc <- function(x) ifelse(is.na(columns[x]),0,columns[x])
if (cc("robiuniqueid") > 1)
stop("A robimotu must have a single robiuniqueid column")
if (cc("robiuniqueid") == 0)
stop("A robimotu must have a robiuniqueid column")
object
}
#' Test if an object belongs \code{robimotu} class
#'
#' @param object the object to test
#' @return a \code{logical} value
#'
#' @examples
#' x <- robimotu(n=10)
#' is_robimotu(x)
#' is_robimotu("toto")
#'
#' @author Eric Coissac <eric.coissac@metabarcoding.org>
#' @export
is_robimotu = function(object) {
inherits(object,'robimotu')
}
#' Title
#'
#' @param object the object to modify
#' @param ... Passed on to \code{\link[base]{structure}}()
#' @param class Subclasses to assign to the new object, default: none
#'
#' @examples
#'
#' @return
#' @export
new_robimotu <- function(object,...,class=NULL) {
validate_robimotu(
new_robidata(object,
...,
class = c(class, "robimotu")
)
)
}
#' Build a \code{robimotu} instance.
#'
#' \code{robimotu} class is a specialisation of \code{\link[dplyr]{tibble}}.
#' It aims to store information on motus
#'
#' @param motus
#' @param n an integer value indicating the number of motu. By default equat
#' @param motu_ids the column name in the `motus` table containing
#' the motu identifiers.
#'
#' @param motu_categories
#' @param verbose if `TRUE` warnings are emitted if the constructor takes
#' some decision because of missing values and estimates
#' default values these missing data.
#'
#' @author Eric Coissac <eric.coissac@metabarcoding.org>
#' @export
robimotu <- function(table,
n = ifelse(is.null(dim(table)),
length(table),
nrow(table)),
motu_ids = "id",
verbose = is_robi_verbose()) {
if (missing(table)) {
if (missing(n)) {
stop("'table' or 'n' must be provided")
}
if (!is.numeric(n))
{
stop("'n' must be a numeric value")
}
motu=build_motu_names(n)
table <- tibble(motu)
colnames(table) <- motu_ids
}
if (is_atomic(table)) {
table = tibble(table)
colnames(table) = motu_ids
}
data <- as_tibble(table)
if (!motu_ids %in% colnames(data))
data[[motu_ids]] <- build_motu_names(nrow(data))
if (!is_robiuniqueid(data[[motu_ids]]))
data[[motu_ids]] <- robiuniqueid(data[[motu_ids]])
new_robimotu(data)
}
#' @rdname ids
#' @export
ids.robimotu <- function(object) {
object[[which(vapply(object, is_robiuniqueid, FUN.VALUE = TRUE))]]
}
#' @rdname ids_colname
#' @export
ids_colname.robimotu <- function(object) {
colnames(object)[which(vapply(object, is_robiuniqueid, FUN.VALUE = TRUE))]
}
#' @export