Commit fbd29402 by Eric Coissac

Patch small bug and add a genotyping function

parent 5fefb79a
......@@ -9,7 +9,7 @@ Description: More about what it does (maybe more than one line)
License: What license is it under?
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.0
RoxygenNote: 6.1.1
Imports: tidyverse,
Rdpack
RdMacros: Rdpack
......
......@@ -2,6 +2,8 @@
export(dmultinomialACGT)
export(error_jc)
export(genotyping)
export(good.genotyping)
export(log_add_exp)
export(log_diff_exp)
export(log_sum_exp)
......@@ -9,6 +11,7 @@ export(pdata_genotype)
export(pnuc_genotype)
export(pnuc_hetero_genotype)
export(pnuc_homo_genotype)
export(prior.genotype)
export(sequencing_error)
export(simulate_sites)
import(dplyr)
......
#' @export
good.genotyping = function(haplotype.1,haplotype.2,nsim,coverage,error,pHe) {
sequenceing = simulate_sites(rep(haplotype.1,nsim),
rep(haplotype.2,nsim),
coverage = coverage,
error = error)
sequenceing
}
......@@ -105,3 +105,119 @@ pnuc_homo_genotype = function(haplotype,
return(pnuc)
}
#' Computes pior probability for a genotype.
#'
#' This function estimates the prior probabilty of a genotype
#' from the prior probability of heterozygosis by supposing
#' equiprobability of genotypes in a class
#' (heterozygote vs homozygote).
#'
#' @param haplotype.1 a character indicating the first haplotype
#' (A, C, G, ou T)
#' @param haplotype.2 a character indicating the second haplotype
#' (A, C, G, ou T)
#' @param pHe A numiric value ranging between 0 and 1,
#' the heterozygote pior probability
#' @param log logical; if TRUE, probabilities p are given as log(p).
#'
#' @return A numiric value ranging between 0 and 1, the prior
#' probability of the genotype specified by
#' haplotype.1 and haplotype.2 parameters.
#'
#' @examples
#' prior.genotype("A","C",1e-3)
#'
#' @export
prior.genotype = function(haplotype.1,haplotype.2,pHe, log=FALSE) {
if (haplotype.1 == haplotype.2)
p = (1 - pHe) / 4
else
p = pHe / 6
if (log)
p = log(p)
return(p)
}
all.genotypes = function() {
g = matrix(0,nrow=10,ncol = 2)
i=0
for (g1 in c("A","C","G","T"))
for (g2 in c("A","C","G","T"))
if (g1 <= g2) {
i = i+1
g[i,1]=g1
g[i,2]=g2
}
return(g)
}
#' @export
genotyping = function(sequencing,sequencing.error,phetero) {
genotypes = all.genotypes()
pnucs = t(mapply(pnuc_genotype,genotypes[,1],
genotypes[,2],sequencing.error))
colnames(pnucs)=c("pA","pC","pG","pT")
rownames(pnucs)=apply(genotype:::all.genotypes(),
MARGIN = 1,
paste0,collapse="")
genotype.priors = as.numeric(mapply(prior.genotype,
genotypes[,1],
genotypes[,2],
phetero,
TRUE))
probas = matrix(0.0,nrow = nrow(sequencing),
ncol = nrow(pnucs))
for (i in seq_len(nrow(sequencing))) {
obs = as.numeric(sequencing[i,4:7])
for (j in seq_len(nrow(pnucs))) {
theo= as.numeric(pnucs[j,1:4])
probas[i,j] = dmultinomialACGT(obs[1],obs[2],obs[3],obs[4],
theo[1],theo[2],theo[3],theo[4],
log = TRUE
)
}
}
colnames(probas) = rownames(pnucs)
probas = sweep(probas,
MARGIN = 2,
STATS = genotype.priors,
FUN = "+")
obs.prior = apply(probas,MARGIN = 1,log_sum_exp)
probas = sweep(probas,
MARGIN = 1,
STATS = obs.prior,
FUN = "-")
second = function(x) max(x[-which.max(x)])
best.genotype = apply(probas, MARGIN = 1, which.max)
second.genotype = apply(probas, MARGIN = 1, function(x) which(x==second(x))[1])
g.good = rownames(pnucs)[best.genotype]
g.alt = rownames(pnucs)[second.genotype]
score = apply(probas, MARGIN = 1, max) - apply(probas, MARGIN = 1, second)
true = apply(sequencing[,1:2],MARGIN = 1,function(x) paste0(sort(x),collapse = ""))
sequencing = cbind(sequencing,probas,g.good,g.alt,score = exp(score),ok=true==g.good)
return(sequencing)
}
......@@ -53,7 +53,7 @@ log_diff_exp = function(a, b) {
#'
#' @author Eric Coissac
#' @export
log_sum_exp = function(x) Reduce(logaddexp, x)
log_sum_exp = function(x) Reduce(log_add_exp, x)
#' Density probability function of a multinomial distribution for nucleotides
#'
......
......@@ -61,7 +61,7 @@ error_jc = function(rate) {
#' @author Eric Coissac
#' @export
sequencing_error = function(nuc,error) {
apply(error[nuc,],
apply(error[nuc,,drop=FALSE],
MARGIN = 1,
function(p) sample(colnames(error),
size = 1,
......@@ -107,7 +107,7 @@ simulate_sites = function(haplotype.1,haplotype.2,coverage,error) {
colnames(genotypes) = c("haplotype.1","haplotype.2")
cov = rpois(n,lambda = coverage)
cov = rpois(n,lambda = coverage-1)+1
obs1= vapply(cov,function(x) rbinom(1,x,0.5),0)
obs2 = cov - obs1
......@@ -122,13 +122,15 @@ simulate_sites = function(haplotype.1,haplotype.2,coverage,error) {
obs1,
obs2)
rep = apply(rep,
MARGIN = 1,
function(ns) table(factor(sequencing_error(rep(c("A","C","G","T"),
ns),
error),
levels=c("A","C","G","T"))))
rep = as.tibble(t(rep))
rep = bind_cols(haplotype.1=haplotype.1,
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/genotype.R
\name{prior.genotype}
\alias{prior.genotype}
\title{Computes pior probability for a genotype.}
\usage{
prior.genotype(haplotype.1, haplotype.2, pHe, log = FALSE)
}
\arguments{
\item{haplotype.1}{a character indicating the first haplotype
(A, C, G, ou T)}
\item{haplotype.2}{a character indicating the second haplotype
(A, C, G, ou T)}
\item{pHe}{A numiric value ranging between 0 and 1,
the heterozygote pior probability}
\item{log}{logical; if TRUE, probabilities p are given as log(p).}
}
\value{
A numiric value ranging between 0 and 1, the prior
probability of the genotype specified by
haplotype.1 and haplotype.2 parameters.
}
\description{
This function estimates the prior probabilty of a genotype
from the prior probability of heterozygosis by supposing
equiprobability of genotypes in a class
(heterozygote vs homozygote).
}
\examples{
prior.genotype("A","C",1e-3)
}
---
title: "R Notebook"
output: html_notebook
---
```{r}
library(genotype)
```
```{r}
```
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment