Commit 27da5432 authored by Eric Coissac's avatar Eric Coissac

Adds a function for writing sequences following the fasta format

parent de9579f3
......@@ -50,6 +50,7 @@ Collate:
'ecotag.R'
'ecotransform.R'
'entropie.R'
'geom_dnalogo.R'
'lowest_common_ancestor.R'
'metabar_data_class.R'
'taxonomy_methods.R'
......
......@@ -335,6 +335,7 @@ export(as_robitaxid)
export(as_robitaxid_master)
export(as_robitaxonomy)
export(as_robiuniqueid)
export(bezier3)
export(build.robiseq)
export(build_motu_names)
export(build_sample_names)
......@@ -530,6 +531,7 @@ export(vec_cast.robiuniqueid)
export(vec_ptype2.robicategory)
export(vec_ptype2.robimutation)
export(vec_ptype2.robiuniqueid)
export(write_obifasta)
export(write_robitaxonomy)
export(write_xlsx)
import(R6)
......@@ -557,6 +559,7 @@ importFrom(rlang,are_na)
importFrom(rlang,inform)
importFrom(rlang,is_atomic)
importFrom(rlang,warn)
importFrom(stringr,str_sub)
importFrom(vctrs,new_factor)
importFrom(vctrs,new_vctr)
importFrom(vctrs,vec_c)
......
#' @importFrom stringr str_sub
NULL
#' Writes a tibble containing sequences in a fasta file.
#'
#' To be writable as a fasta file a `tibble` requires to have
#' a sequence column containing the `character` strings. The
#' other columns that can be used during the formating are:
#'
#' - `id` : used as sequence id located just after the `>`
#' sign,
#' - `definition` : which is added at the end of the title line,
#' - `features` : the features must be a `tibble` or a `data.frame`,
#' each column of the table will be translated to
#' a *key*, *value* feature following the extended
#' obifasta format used by *OBITools*
#'
#' @param data the `tibble` to format
#' @param file a writable `connection` or a character string naming the
#' file to write to, or NULL to print on stdout.
#' @param make_unique insure that the ids of the sequences are unique
#' @param verbose if `TRUE` warnings and messages are emitted when the
#' function takes some decision.
#'
#' @export
#'
#' @md
#' @examples
#' filename <- system.file("extdata", "B5.fasta.gz",
#' package = "ROBITools2")
#'
#' seqs <- read_obifasta(filename)
#' write_obifasta(seqs)
#'
write_obifasta <- function(data,file = NULL,
make_unique = TRUE,
verbose = is_robi_verbose()) {
robiassert("sequence" %in% colnames(data),
"to be written in fasta format data must contain a `sequence` column",
.abort = stop)
fmt_data <- function(v) {
if (is.list(v)) {
v <- v[[1]]
if (is.null(names(v)))
paste0("[",paste(v,collapse = ", "),"]")
else
paste0("{",paste(paste0("'",names(v),"'"),
v,
sep = ":",collapse = ", "),"}")
} else {
as.character(v)
}
}
if (! "id" %in% colnames(data)) {
ids <- new_robiuniqueid(rep("seq",nrow(data)),
make_unique = TRUE)
} else {
ids <- if (make_unique)
new_robiuniqueid(data$id,
make_unique = TRUE)
else data$id
}
feature_names <- colnames(data$features)
is_a_list <- sapply(data$features,is.list)
suppressWarnings(sink(file))
for (i in seq_len(nrow(data))) {
b <- seq(from = 1,
to = nchar(data$sequence[i]),
by = 60)
f <- data$features[i,]
valid <- ! sapply(f,is.na)
writeLines(paste0(">",ids[i]," ",
paste(feature_names[valid],
sapply(f[,valid],fmt_data),
sep="=", collapse = "; "),
"; ",
data$definition[i]))
writeLines(paste0(str_sub(data$sequence[i],
start = b,
end = b + 59),
collapse = "\n"),
)
}
suppressWarnings(sink())
}
>HELIUM_000100422_612GNAAXX:8:1:1115:5149#0/1_CONS_SUB seqAInsertion=0; tag_length=9; tail_quality=31.3; reverse_match=ccattgagtctctgcacctatc; direct_tag=cgctgtatc; seqADeletion=0; reverse_primer=ccattgagtctctgcacctatc; seqBMismatch=1; alignment=left; merged_sample={'ZP96': 1}; cut=[28, 80, 1]; direct_match=gggcaatcctgagccaa; direct_primer=gggcaatcctgagccaa; experiment=Bison-gh_R8; mid_quality=35.3333333333; expected=0.20313; avg_quality=35.0707964602; reverse_score=88.0; seqBInsertion=0; seqAMismatch=27; seqBDeletion=0; user=Alice_Christian; reverse_tag=cgctgtatc; direct_score=68.0; count=1; region=8; head_quality=36.4; seqABMatch=75; seqASingle=5; plaque=H12; seqBSingle=5;
atcctanntnatgagaacaaaaa
caaacaaggggtcagaacgggagaaagag
>HELIUM_000100422_612GNAAXX:8:1:1116:6282#0/1_CONS_SUB seqAInsertion=0; reverse_score=88.0; cut=[28, 73, 1]; tag_length=9; count=9; experiment=Bison-gh_R8; reverse_match=ccattgagtctctgcacctatc; region=8; seqBInsertion=0; direct_primer=gggcaatcctgagccaa; seqBDeletion=2; expected=0.20313; merged_sample={'ZP38': 1, 'B35': 1, 'ZP31': 1, 'ZP59': 1, 'ZP14': 1, 'ZP19': 4}; user=Alice_Christian; seqADeletion=2; reverse_primer=ccattgagtctctgcacctatc; seqBSingle=0; direct_score=68.0; seqASingle=0; alignment=right; direct_match=gggcaatcctgagccaa;
atccggnncntgaagacaatgtttcttctcctaagataggaaggg
>HELIUM_000100422_612GNAAXX:8:1:1117:12793#0/1_CONS_SUB seqAInsertion=0; tag_length=9; tail_quality=42.4; reverse_match=ccattgagtctctgcacctatc; direct_tag=gacagcatg; seqADeletion=0; reverse_primer=ccattgagtctctgcacctatc; seqBMismatch=0; alignment=left; merged_sample={'B90': 1}; cut=[28, 79, 1]; direct_match=gggcaatcctgagccaa; direct_primer=gggcaatcctgagccaa; experiment=Bison-gh_R8; mid_quality=49.152173913; expected=0.20313; avg_quality=47.4464285714; reverse_score=88.0; seqBInsertion=0; seqAMismatch=23; seqBDeletion=0; user=Alice_Christian; reverse_tag=gacagcatg; direct_score=68.0; count=1; region=8; head_quality=36.8; seqABMatch=81; seqASingle=4; plaque=H9; seqBSingle=4;
atcacgnntnccgaaaacaaacaaaggttcagaaagcgaaaagaaaaaaaa
>HELIUM_000100422_612GNAAXX:8:1:1118:3070#0/1_CONS_SUB_CMP seqAInsertion=0; tag_length=9; tail_quality=11.9; reverse_match=ccattgagtctctgcacctatc; direct_tag=agacgacga; seqADeletion=0; reverse_primer=ccattgagtctctgcacctatc; seqBMismatch=1; alignment=left; merged_sample={'BB79': 1}; cut=[33, 85, 1]; direct_match=gggcaatcctgagccaa; direct_primer=gggcaatcctgagccaa; experiment=Bison-gh_R8; mid_quality=28.9139784946; expected=0.20313; avg_quality=27.1238938053; reverse_score=88.0; complemented=True; seqBInsertion=0; seqAMismatch=30; seqBDeletion=0; user=Alice_Christian; reverse_tag=agacgacga; direct_score=68.0; count=1; region=8; head_quality=25.7; seqABMatch=72; seqASingle=5; plaque=G10; seqBSingle=5;
aacctgttttattaaaacaaacaagggtttcagaaagcgagaataaananng
>HELIUM_000100422_612GNAAXX:8:1:1118:18602#0/1_CONS_SUB_CMP seqAInsertion=0; tag_length=9; reverse_match=ccattgagtctctgcacctatc; seqADeletion=0; reverse_primer=ccattgagtctctgcacctatc; seqBMismatch=0; alignment=left; merged_sample={'B62': 1, 'B92': 1}; cut=[33, 83, 1]; direct_match=gggcaatcctgagccaa; direct_primer=gggcaatcctgagccaa; experiment=Bison-gh_R8; expected=0.20313; reverse_score=88.0; complemented=True; seqBInsertion=0; seqBDeletion=0; user=Alice_Christian; direct_score=68.0; count=2; region=8; seqASingle=3; seqBSingle=3;
atcacgttttccgaaaacaaacaaaggttcagaaagcgaaaataaaaang
This diff is collapsed.
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_ecopcr.R
\name{read_ecopcr}
\alias{read_ecopcr}
\title{Reads an ecoPCR file}
\usage{
read_ecopcr(file, verbose = is_robi_verbose())
}
\arguments{
\item{verbose}{}
}
\value{
}
\description{
Reads an ecoPCR file
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/write_obifasta.R
\name{write_obifasta}
\alias{write_obifasta}
\title{Writes a tibble containing sequences in a fasta file.}
\usage{
write_obifasta(
data,
file = NULL,
make_unique = TRUE,
verbose = is_robi_verbose()
)
}
\arguments{
\item{data}{the \code{tibble} to format}
\item{file}{a writable \code{connection} or a character string naming the
file to write to, or NULL to print on stdout.}
\item{make_unique}{insure that the ids of the sequences are unique}
\item{verbose}{if \code{TRUE} warnings and messages are emitted when the
function takes some decision.}
}
\description{
To be writable as a fasta file a \code{tibble} requires to have
a sequence column containing the \code{character} strings. The
other columns that can be used during the formating are:
}
\details{
\itemize{
\item \code{id} : used as sequence id located just after the \code{>}
sign,
\item \code{definition} : which is added at the end of the title line,
\item \code{features} : the features must be a \code{tibble} or a \code{data.frame},
each column of the table will be translated to
a \emph{key}, \emph{value} feature following the extended
obifasta format used by \emph{OBITools}
}
}
\examples{
filename <- system.file("extdata", "B5.fasta.gz",
package = "ROBITools2")
seqs <- read_obifasta(filename)
write_obifasta(seqs)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment