Commit 381b48a1 by Eric Coissac

Initial commit

parents
^.*\.Rproj$
^\.Rproj\.user$
.Rproj.user
.Rhistory
.RData
.Ruserdata
SNP.key
Package: genotype
Type: Package
Title: What the Package Does (Title Case)
Version: 0.1.0
Author: Master BEE Grenoble
Maintainer: The package maintainer <eric@metabarocding.org>
Description: More about what it does (maybe more than one line)
Use four spaces when indenting paragraphs within the Description.
License: What license is it under?
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.0
Imports: tidyverse,
Rdpack
RdMacros: Rdpack
Suggests: vegan,
roxygen2,
knitr,
rmarkdown
# Generated by roxygen2: do not edit by hand
export(dpolynomialACGT)
export(error_jc)
export(log_add_exp)
export(log_diff_exp)
export(log_sum_exp)
export(sequencing_error)
export(simulate_sites)
import(dplyr)
import(tibble)
#' Logarithm of the sum of two exponentials
#'
#' `log_add_exp` computes the logarithm of the sum
#' of the exponential of two value.
#'
#' \deqn{log\_add\_exp(a,b) = \log(e^a + e^b)}{%
#' log_add_exp(a,b) = log(exp(a) + exp(b))}
#'
#' @param a first logarithmic values to add
#' @param b second logarithmic values to add
#'
#' @return a numeric vector containing the logarithm of the sum
#' of the exponential of `a` and `b`.
#'
#' @author Eric Coissac
#' @export
log_add_exp = function(a, b) {
a + log1p(exp(b - a))
}
#' Logarithm of the difference of two exponentials
#'
#' `log_add_exp` computes the logarithm of the difference
#' of the exponential of two value.
#'
#' \deqn{log\_add\_exp(a,b) = \log(e^a - e^b)}{%
#' log_add_exp(a,b) = log(exp(a) - exp(b))}
#'
#' @param a first logarithmic values to add
#' @param b second logarithmic values to add
#'
#' @return a numeric vector containing the logarithm of the
#' difference of the exponential of `a` and `b`.
#'
#' @author Eric Coissac
#' @export
log_diff_exp = function(a, b) {
a + log(1 - exp(b - a))
}
#' Logarithm of the sum of exponentials
#'
#' `log_sum_exp` computes the logarithm of the sum
#' of exponential of values.
#'
#' \deqn{log\_add\_exp(x_1,x_2,\ldots) = \log(e^{x_1} + e^{x_2} + \ldots)}{%
#' log_add_exp(x_1,x_2,\ldots) = log(exp(x_1) + exp(x_2) + \ldots)}
#'
#' @param x logarithmic values to sum
#'
#' @return a numeric value containing the logarithm of the
#' sum of the exponential of the values of `x`.
#'
#' @author Eric Coissac
#' @export
log_sum_exp = function(x) Reduce(logaddexp, x)
#' Density probability function of a polynomial distribution for nucleotides
#'
#' Computes the probalility to observe a given number of `A`, `C`, `G`, `T`
#' when probalilities of each nucleotide are `pA`, `pC`, `pG`, `pT`.
#'
#' @param A numeric; number of observed *A* at a given site.
#' @param C numeric; number of observed *C* at a given site.
#' @param G numeric; number of observed *G* at a given site.
#' @param T numeric; number of observed *T* at a given site.
#' @param pA numeric; probability to observed a *A* at a given site.
#' @param pC numeric; probability to observed a *C* at a given site.
#' @param pG numeric; probability to observed a *G* at a given site.
#' @param pT numeric; probability to observed a *T* at a given site.
#' @param log logical; if TRUE, probabilities p are given as log(p).
#'
#' @return numeric; the density of probability of the log that probability
#' of the observations given the probabilities of observation
#'
#' @examples
#' dpolynomialACGT(c(3,4),c(3,6),c(3,2),c(3,5),
#' 0.25,0.25,0.25,0.25)
#' dpolynomialACGT(c(3,4),c(3,6),c(3,2),c(3,5),
#' pA = 0.25, pC = 0.25, pG = 0.25, pT = 0.25,
#' log=TRUE)
#'
#' @author Eric Coissac
#' @export
dpolynomialACGT = function(A,C,G,T,pA,pC,pG,pT,log=FALSE) {
lpcomb =( log(pA) * A
+ log(pC) * C
+ log(pG) * G
+ log(pT) * T)
N = A + C + G + T
tetranome = (lgamma(N+1) - lgamma(A+1)
- lgamma(C+1)
- lgamma(G+1)
- lgamma(T+1))
lp = lpcomb + tetranome
if (log) lp else exp(lp)
}
#' @import tibble
#' @import dplyr
NULL
#' Build error matrix.
#'
#' Error matrix is a 4 x 4 numeric matrix providing sequencing
#' error probalibiltie for each nucleotides. Each row correspond
#' to one of the four nucleotides `A`,`C`,`G`,`T`. Each column indicates
#' the probability of reading a given nucleotide considering that the TRUE
#' one is indicated by the row.
#' `error.jc` build an error matrix following the Juke et Cantor model
#' where all errors are equiprobable.
#
#' @param rate is the global error probability per site expressed as a numeric
#' value ranging between 0 and 1.
#'
#' @return a 4 x 4 numeric matrix providing sequencing
#' error probalibiltie for each nucleotides.
#'
#' @examples
#' # Build an error matrix corresponding
#' # to 5% of sequencing error
#'
#' err = error_jc(0.05)
#' err
#'
#' # A C G T
#' # A 0.95000000 0.01666667 0.01666667 0.01666667
#' # C 0.01666667 0.95000000 0.01666667 0.01666667
#' # G 0.01666667 0.01666667 0.95000000 0.01666667
#' # T 0.01666667 0.01666667 0.01666667 0.95000000
#'
#' @author Eric Coissac
#' @export
error_jc = function(rate) {
e = rate/3
err = matrix(e,nrow = 4,ncol=4)
diag(err)=1-rate
rownames(err) <- c("A","C","G","T")
colnames(err) <- rownames(err)
return(err)
}
#' Generate substitution sequencing error.
#'
#'
#' @export
sequencing_error = function(nuc,error) {
apply(error[nuc,],
MARGIN = 1,
function(p) sample(colnames(error),
size = 1,
prob = p))
}
#' @export
simulate_sites = function(haplotype.1,haplotype.2,coverage,error) {
n = length(haplotype.1)
stopifnot(n==length(haplotype.2))
genotypes = tibble(haplotype.1,haplotype.2)
genotypes = as.tibble(t(apply(genotypes,
MARGIN = 1,
sort)))
cov = rpois(n,lambda = coverage)
obs1= vapply(cov,function(x) rbinom(1,x,0.5),0)
obs2 = cov - obs1
rep = matrix(0,nrow = n,ncol = 4)
colnames(rep) = c("A","C","G","T")
mapply(function(i,a1,a2,o1,o2) { rep[i,a1]<<-rep[i,a1] + o1;
rep[i,a2]<<-rep[i,a2] + o2},
1:n,
genotypes$haplotype.1,
genotypes$haplotype.2,
obs1,
obs2)
rep = apply(rep,
MARGIN = 1,
function(ns) table(factor(sequencing_error(rep(c("A","C","G","T"),
ns),
error),
levels=c("A","C","G","T"))))
rep = as.tibble(t(rep))
rep = bind_cols(haplotype.1=haplotype.1,
haplotype.2=haplotype.2,
coverage=cov,
rep)
return(rep)
}
File added
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: ISO-8859-1
RnwWeave: knitr
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/proba.R
\name{dpolynomialACGT}
\alias{dpolynomialACGT}
\title{Density probability function of a polynomial distribution for nucleotides}
\usage{
dpolynomialACGT(A, C, G, T, pA, pC, pG, pT, log = FALSE)
}
\arguments{
\item{A}{numeric; number of observed *A* at a given site.}
\item{C}{numeric; number of observed *C* at a given site.}
\item{G}{numeric; number of observed *G* at a given site.}
\item{T}{numeric; number of observed *T* at a given site.}
\item{pA}{numeric; probability to observed a *A* at a given site.}
\item{pC}{numeric; probability to observed a *C* at a given site.}
\item{pG}{numeric; probability to observed a *G* at a given site.}
\item{pT}{numeric; probability to observed a *T* at a given site.}
\item{log}{logical; if TRUE, probabilities p are given as log(p).}
}
\value{
numeric; the density of probability of the log that probability
of the observations given the probabilities of observation
}
\description{
Computes the probalility to observe a given number of `A`, `C`, `G`, `T`
when probalilities of each nucleotide are `pA`, `pC`, `pG`, `pT`.
}
\examples{
dpolynomialACGT(c(3,4),c(3,6),c(3,2),c(3,5),
0.25,0.25,0.25,0.25)
dpolynomialACGT(c(3,4),c(3,6),c(3,2),c(3,5),
pA = 0.25, pC = 0.25, pG = 0.25, pT = 0.25,
log=TRUE)
}
\author{
Eric Coissac
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulate.R
\name{error_jc}
\alias{error_jc}
\title{Build error matrix.}
\usage{
error_jc(rate)
}
\arguments{
\item{rate}{is the global error probability per site expressed as a numeric
value ranging between 0 and 1.}
}
\value{
a 4 x 4 numeric matrix providing sequencing
error probalibiltie for each nucleotides.
}
\description{
Error matrix is a 4 x 4 numeric matrix providing sequencing
error probalibiltie for each nucleotides. Each row correspond
to one of the four nucleotides `A`,`C`,`G`,`T`. Each column indicates
the probability of reading a given nucleotide considering that the TRUE
one is indicated by the row.
`error.jc` build an error matrix following the Juke et Cantor model
where all errors are equiprobable.
}
\examples{
# Build an error matrix corresponding
# to 5\% of sequencing error
err = error_jc(0.05)
err
# A C G T
# A 0.95000000 0.01666667 0.01666667 0.01666667
# C 0.01666667 0.95000000 0.01666667 0.01666667
# G 0.01666667 0.01666667 0.95000000 0.01666667
# T 0.01666667 0.01666667 0.01666667 0.95000000
}
\author{
Eric Coissac
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/proba.R
\name{log_add_exp}
\alias{log_add_exp}
\title{Logarithm of the sum of two exponentials}
\usage{
log_add_exp(a, b)
}
\arguments{
\item{a}{first logarithmic values to add}
\item{b}{second logarithmic values to add}
}
\value{
a numeric vector containing the logarithm of the sum
of the exponential of `a` and `b`.
}
\description{
`log_add_exp` computes the logarithm of the sum
of the exponential of two value.
}
\details{
\deqn{log\_add\_exp(a,b) = \log(e^a + e^b)}{%
log_add_exp(a,b) = log(exp(a) + exp(b))}
}
\author{
Eric Coissac
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/proba.R
\name{log_diff_exp}
\alias{log_diff_exp}
\title{Logarithm of the difference of two exponentials}
\usage{
log_diff_exp(a, b)
}
\arguments{
\item{a}{first logarithmic values to add}
\item{b}{second logarithmic values to add}
}
\value{
a numeric vector containing the logarithm of the
difference of the exponential of `a` and `b`.
}
\description{
`log_add_exp` computes the logarithm of the difference
of the exponential of two value.
}
\details{
\deqn{log\_add\_exp(a,b) = \log(e^a - e^b)}{%
log_add_exp(a,b) = log(exp(a) - exp(b))}
}
\author{
Eric Coissac
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/proba.R
\name{log_sum_exp}
\alias{log_sum_exp}
\title{Logarithm of the sum of exponentials}
\usage{
log_sum_exp(x)
}
\arguments{
\item{x}{logarithmic values to sum}
}
\value{
a numeric value containing the logarithm of the
sum of the exponential of the values of `x`.
}
\description{
`log_sum_exp` computes the logarithm of the sum
of exponential of values.
}
\details{
\deqn{log\_add\_exp(x_1,x_2,\ldots) = \log(e^{x_1} + e^{x_2} + \ldots)}{%
log_add_exp(x_1,x_2,\ldots) = log(exp(x_1) + exp(x_2) + \ldots)}
}
\author{
Eric Coissac
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulate.R
\name{sequencing_error}
\alias{sequencing_error}
\title{Generate substitution sequencing error.}
\usage{
sequencing_error(nuc, error)
}
\description{
Generate substitution sequencing error.
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment