diff --git a/DESCRIPTION b/DESCRIPTION index 11647fc..b45b8f2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Description: More about what it does (maybe more than one line) License: What license is it under? Encoding: UTF-8 LazyData: true -RoxygenNote: 6.1.0 +RoxygenNote: 6.1.1 Imports: tidyverse, Rdpack RdMacros: Rdpack diff --git a/NAMESPACE b/NAMESPACE index 774ac07..48bb9d1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ export(dmultinomialACGT) export(error_jc) +export(genotyping) +export(good.genotyping) export(log_add_exp) export(log_diff_exp) export(log_sum_exp) @@ -9,6 +11,7 @@ export(pdata_genotype) export(pnuc_genotype) export(pnuc_hetero_genotype) export(pnuc_homo_genotype) +export(prior.genotype) export(sequencing_error) export(simulate_sites) import(dplyr) diff --git a/R/eval.R b/R/eval.R new file mode 100644 index 0000000..0ea54ba --- /dev/null +++ b/R/eval.R @@ -0,0 +1,9 @@ +#' @export +good.genotyping = function(haplotype.1,haplotype.2,nsim,coverage,error,pHe) { + sequenceing = simulate_sites(rep(haplotype.1,nsim), + rep(haplotype.2,nsim), + coverage = coverage, + error = error) + + sequenceing +} diff --git a/R/genotype.R b/R/genotype.R index 09fac6d..f940670 100644 --- a/R/genotype.R +++ b/R/genotype.R @@ -105,3 +105,119 @@ pnuc_homo_genotype = function(haplotype, return(pnuc) } + + +#' Computes pior probability for a genotype. +#' +#' This function estimates the prior probabilty of a genotype +#' from the prior probability of heterozygosis by supposing +#' equiprobability of genotypes in a class +#' (heterozygote vs homozygote). +#' +#' @param haplotype.1 a character indicating the first haplotype +#' (A, C, G, ou T) +#' @param haplotype.2 a character indicating the second haplotype +#' (A, C, G, ou T) +#' @param pHe A numiric value ranging between 0 and 1, +#' the heterozygote pior probability +#' @param log logical; if TRUE, probabilities p are given as log(p). +#' +#' @return A numiric value ranging between 0 and 1, the prior +#' probability of the genotype specified by +#' haplotype.1 and haplotype.2 parameters. +#' +#' @examples +#' prior.genotype("A","C",1e-3) +#' +#' @export +prior.genotype = function(haplotype.1,haplotype.2,pHe, log=FALSE) { + if (haplotype.1 == haplotype.2) + p = (1 - pHe) / 4 + else + p = pHe / 6 + + if (log) + p = log(p) + + return(p) +} + + +all.genotypes = function() { + g = matrix(0,nrow=10,ncol = 2) + i=0 + for (g1 in c("A","C","G","T")) + for (g2 in c("A","C","G","T")) + if (g1 <= g2) { + i = i+1 + g[i,1]=g1 + g[i,2]=g2 + } + + return(g) +} + +#' @export +genotyping = function(sequencing,sequencing.error,phetero) { + genotypes = all.genotypes() + + pnucs = t(mapply(pnuc_genotype,genotypes[,1], + genotypes[,2],sequencing.error)) + colnames(pnucs)=c("pA","pC","pG","pT") + rownames(pnucs)=apply(genotype:::all.genotypes(), + MARGIN = 1, + paste0,collapse="") + + genotype.priors = as.numeric(mapply(prior.genotype, + genotypes[,1], + genotypes[,2], + phetero, + TRUE)) + + + probas = matrix(0.0,nrow = nrow(sequencing), + ncol = nrow(pnucs)) + + for (i in seq_len(nrow(sequencing))) { + obs = as.numeric(sequencing[i,4:7]) + for (j in seq_len(nrow(pnucs))) { + theo= as.numeric(pnucs[j,1:4]) + probas[i,j] = dmultinomialACGT(obs[1],obs[2],obs[3],obs[4], + theo[1],theo[2],theo[3],theo[4], + log = TRUE + ) + } + } + colnames(probas) = rownames(pnucs) + + probas = sweep(probas, + MARGIN = 2, + STATS = genotype.priors, + FUN = "+") + + obs.prior = apply(probas,MARGIN = 1,log_sum_exp) + + probas = sweep(probas, + MARGIN = 1, + STATS = obs.prior, + FUN = "-") + + second = function(x) max(x[-which.max(x)]) + + best.genotype = apply(probas, MARGIN = 1, which.max) + + + second.genotype = apply(probas, MARGIN = 1, function(x) which(x==second(x))[1]) + + + g.good = rownames(pnucs)[best.genotype] + g.alt = rownames(pnucs)[second.genotype] + + score = apply(probas, MARGIN = 1, max) - apply(probas, MARGIN = 1, second) + + true = apply(sequencing[,1:2],MARGIN = 1,function(x) paste0(sort(x),collapse = "")) + + sequencing = cbind(sequencing,probas,g.good,g.alt,score = exp(score),ok=true==g.good) + + return(sequencing) +} diff --git a/R/proba.R b/R/proba.R index 70dc6df..140b89d 100644 --- a/R/proba.R +++ b/R/proba.R @@ -53,7 +53,7 @@ log_diff_exp = function(a, b) { #' #' @author Eric Coissac #' @export -log_sum_exp = function(x) Reduce(logaddexp, x) +log_sum_exp = function(x) Reduce(log_add_exp, x) #' Density probability function of a multinomial distribution for nucleotides #' diff --git a/R/simulate.R b/R/simulate.R index 7c94996..7e472d5 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -61,7 +61,7 @@ error_jc = function(rate) { #' @author Eric Coissac #' @export sequencing_error = function(nuc,error) { - apply(error[nuc,], + apply(error[nuc,,drop=FALSE], MARGIN = 1, function(p) sample(colnames(error), size = 1, @@ -107,7 +107,7 @@ simulate_sites = function(haplotype.1,haplotype.2,coverage,error) { colnames(genotypes) = c("haplotype.1","haplotype.2") - cov = rpois(n,lambda = coverage) + cov = rpois(n,lambda = coverage-1)+1 obs1= vapply(cov,function(x) rbinom(1,x,0.5),0) obs2 = cov - obs1 @@ -122,13 +122,15 @@ simulate_sites = function(haplotype.1,haplotype.2,coverage,error) { obs1, obs2) + rep = apply(rep, MARGIN = 1, function(ns) table(factor(sequencing_error(rep(c("A","C","G","T"), ns), error), levels=c("A","C","G","T")))) - + + rep = as.tibble(t(rep)) rep = bind_cols(haplotype.1=haplotype.1, diff --git a/man/prior.genotype.Rd b/man/prior.genotype.Rd new file mode 100644 index 0000000..c83e612 --- /dev/null +++ b/man/prior.genotype.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/genotype.R +\name{prior.genotype} +\alias{prior.genotype} +\title{Computes pior probability for a genotype.} +\usage{ +prior.genotype(haplotype.1, haplotype.2, pHe, log = FALSE) +} +\arguments{ +\item{haplotype.1}{a character indicating the first haplotype +(A, C, G, ou T)} + +\item{haplotype.2}{a character indicating the second haplotype +(A, C, G, ou T)} + +\item{pHe}{A numiric value ranging between 0 and 1, +the heterozygote pior probability} + +\item{log}{logical; if TRUE, probabilities p are given as log(p).} +} +\value{ +A numiric value ranging between 0 and 1, the prior + probability of the genotype specified by + haplotype.1 and haplotype.2 parameters. +} +\description{ +This function estimates the prior probabilty of a genotype +from the prior probability of heterozygosis by supposing +equiprobability of genotypes in a class +(heterozygote vs homozygote). +} +\examples{ + prior.genotype("A","C",1e-3) + +} diff --git a/test.Rmd b/test.Rmd new file mode 100644 index 0000000..d92118f --- /dev/null +++ b/test.Rmd @@ -0,0 +1,13 @@ +--- +title: "R Notebook" +output: html_notebook +--- + +```{r} +library(genotype) +``` + +```{r} + +``` + diff --git a/test.nb.html b/test.nb.html new file mode 100644 index 0000000..a0cd9f5 --- /dev/null +++ b/test.nb.html @@ -0,0 +1,1756 @@ + + + + + + + + + + + + + +R Notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + +
library(genotype)
+ + + + + + + + + +
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkoZ2Vub3R5cGUpCmBgYAoKYGBge3J9CgpgYGAKCg==
+ + + +
+ + + + + + + +