Commit e79738e1 by Eric Coissac

commit -m "Cleaned code for thermodynamics properties and added Melting…

commit -m "Cleaned code for thermodynamics properties and added Melting Temperature for approximate version of strict repeats. Also cleaned printing code.


git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@223 60f365c0-8329-0410-b2a4-ec073aeeaa1d
parent 91753ace
......@@ -6,7 +6,6 @@
*/
#include "libecoprimer/ecoprimer.h"
#include "libthermo/libthermo.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
......@@ -15,6 +14,9 @@
#include <time.h>
#include <sys/time.h>
#include <dlfcn.h>
#include"libthermo/nnparams.h"
#include"libthermo/thermostats.h"
#define VERSION "0.3"
/* TR: by default, statistics are made on species level*/
......@@ -153,37 +155,10 @@ void initoptions(poptions_t options)
options->r=0;
options->g=0;
options->no_multi_match=FALSE;
options->pnparm = NULL;
strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/
}
void printcurrenttime ()
{
time_t now;
struct tm *ts;
char buf[80];
/* Get the current time */
now = time(NULL);
/* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */
ts = localtime(&now);
strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);
fprintf(stderr,"#%d#, %s\n",(int)now, buf);
}
void printcurrenttimeinmilli()
{
struct timeval tv;
struct timezone tz;
struct tm *tm;
gettimeofday(&tv, &tz);
tm=localtime(&tv.tv_sec);
fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min,
tm->tm_sec, tv.tv_usec, tv.tv_usec/1000);
}
/*TR: Added*/
void printapair(int32_t index,ppair_t pair, poptions_t options)
{
bool_t asdirect1=pair->asdirect1;
......@@ -198,6 +173,9 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
bool_t strand;
uint32_t i;
float temp;
CNNParams nnparams;
//nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA);
char *c;
char p1[32];
......@@ -225,6 +203,7 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
good2=goodtmp;
}
//print serial number
printf("%6d\t",index);
c = ecoUnhashWord(w1,options->primer_length);
......@@ -232,57 +211,67 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
c = ecoUnhashWord(w2,options->primer_length);
strcpy (p2, c);
//print primer1
printf("%s\t", p1);
//print primer2
printf("%s", p2);
/*For thermo: first word should be 5'-3'*/
/*if (!asdirect1)
{
wtmp = ecoComplementWord(w1,options->primer_length);
c = ecoUnhashWord(wtmp,options->primer_length);
strcpy (p1, c);
}*/
/*For thermo: first word should be 5'-3'*/
/*if (asdirect2)
{
wtmp = ecoComplementWord(w2,options->primer_length);
c = ecoUnhashWord(wtmp,options->primer_length);
strcpy (p2, c);
}*/
/*temp = calculateMeltingTemperature (p1, p2);*/
//float temp = calculateMeltingTemperature ("CTGTTTACCAAAAACATC", "GGTCTGAACTCAGATCAC");
//print primer1 melting temperature
printf ("\t%4.3f", pair->p1temp);
temp = calculateMeltingTemperatureBasic(p1);
printf ("\t%4.3f", temp);
//print minimum melting temperature of approximate versions of primer1
printf ("\t%4.3f", pair->p1mintemp);
temp = calculateMeltingTemperatureBasic(p2);
printf ("\t%4.3f", temp);
//print primer2 melting temperature
printf ("\t%4.3f", pair->p2temp);
printf ("\t%d",countGCContent(p1));
//print minimum melting temperature of approximate versions of primer2
printf ("\t%4.3f", pair->p2mintemp);
printf ("\t%d",countGCContent(p2));
//print gc contents of primer1
printf ("\t%d",nparam_CountGCContent(p1));
//print gc contents of primer2
printf ("\t%d",nparam_CountGCContent(p2));
//print good/bad pair indicator
printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]);
//print inexample count
printf("\t%d", pair->inexample);
//print out example count
printf("\t%d", pair->outexample);
//print yule
printf("\t%4.3f", pair->yule);
//print in taxa count
printf("\t%d", pair->intaxa);
//print out taxa count
printf("\t%d", pair->outtaxa);
//print coverage
printf("\t%4.3f", (float)pair->bc);
//print well identified taxa count
printf("\t%d", pair->intaxa - pair->notwellidentifiedtaxa);
//print specificity
printf("\t%4.3f", pair->bs);
//print min amplifia lenght
printf("\t%d", pair->mind);
//print max amplifia lenght
printf("\t%d", pair->maxd);
//print average amplifia lenght
printf("\t%3.2f", (float)pair->sumd/pair->inexample);
//print amplifia information about reference sequence if specified
if (options->refseq && pair->refsequence >=0)
{
printf("\t%s:",options->reference);
......@@ -351,31 +340,21 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
else qfp=0.0;
sortedpairs[i]->quorumin = q;
sortedpairs[i]->quorumout = qfp;
sortedpairs[i]->yule = q - qfp;
sortedpairs[j]=sortedpairs[i];
if (q > options->sensitivity_quorum &&
qfp < options->false_positive_quorum)
{
(void)taxonomycoverage(sortedpairs[j],options);
taxonomyspecificity(sortedpairs[j]);
// fprintf(stderr,"%4.3f %4.3f %4.3f\n",sortedpairs[j]->yule , sortedpairs[j]->bs,sortedpairs[j]->bc);
j++;
}
}
qsort(sortedpairs,j,sizeof(ppair_t),cmpprintedpairs);
return j;
}
......@@ -410,7 +389,8 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
sortedpairs[j]=pl->pairs+i;
count=filterandsortpairs(sortedpairs,pairs->count,options);
getThermoProperties(sortedpairs, count, options);
fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count);
printf("#\n");
......@@ -477,7 +457,6 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
}
/*updateseqparams: This function counts the insample and outsample sequences
* and with each sequences adds a tag of the taxon to which the sequence beongs*/
......@@ -556,6 +535,12 @@ int main(int argc, char **argv)
//float temp = calculateMeltingTemperatureBasic ("CTGTTTACCAAAAACATC");
//printf ("temp = %f\n", temp);
//return 0;
//char *t = "ACGT";
//printf ("\nGETNUMCODE: A=%d, C=%d, G=%d, T=%d\n", GETNUMCODE(t[0]), GETNUMCODE('C'), GETNUMCODE('G'), GETNUMCODE('T'));
//printf ("\nGETREVCODE: A=%d, C=%d, G=%d, T=%d\n", GETREVCODE(t[0]), GETREVCODE('C'), GETREVCODE('G'), GETREVCODE('T'));
//return 0;
CNNParams nnparams;
nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA);
initoptions(&options);
......@@ -701,7 +686,7 @@ int main(int argc, char **argv)
}
}
options.pnparm = &nnparams;
fprintf(stderr,"Reading taxonomy database ...");
taxonomy = read_taxonomy(options.prefix,0);
fprintf(stderr,"Ok\n");
......@@ -741,7 +726,7 @@ int main(int argc, char **argv)
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
// options.filtering=FALSE;
// words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
// fprintf(stderr,"\n Strict primer count : %d\n",words2->size);
......@@ -789,17 +774,10 @@ int main(int argc, char **argv)
fprintf(stderr,"\n");
/*TR: Added*/
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
//fprintf(stderr,"buildPrimerPairs finished\n");
// setoktaxforspecificity (&pairs);
printpairs (pairs, &options,taxonomy);
// closlibman ();
//ECOFREE(pairs.pairs,"Free pairs table");
return 0;
}
......@@ -13,6 +13,7 @@
#include <stdio.h>
#include "ecotype.h"
#include "../libecoPCR/ecoPCR.h"
#include "../libthermo/nnparams.h"
#include "apat.h"
#define DEBUG
......@@ -190,7 +191,10 @@ typedef struct {
//
// uint32_t oktaxoncount;
uint32_t curseqid;
float p1temp; //strict primer1 melting temperature
float p1mintemp; //approx primer1 minimum melting temperature
float p2temp; //strict primer2 melting temperature
float p2mintemp; //approx primer2 minimum melting temperature
} pair_t, *ppair_t;
/*TR: Added*/
......@@ -274,6 +278,7 @@ typedef struct {
int32_t outsamples;
int32_t intaxa;
int32_t outtaxa;
PNNParams pnparm;
} options_t, *poptions_t;
typedef ecoseq_t **pecodnadb_t;
......@@ -338,5 +343,6 @@ void taxonomyspecificity (ppair_t pair);
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum);
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize);
#endif /* EPSORT_H_ */
......@@ -156,16 +156,14 @@ ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t
{
uint32_t i;
ppairtree_t primerpairs;
primerpairs = initpairtree(NULL);
for (i=0; i < seqdbsize; i++)
{
buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
}
return primerpairs;
}
#define DMAX (2000000000)
......@@ -187,7 +185,11 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
bool_t bswp;
size_t distance;
bool_t strand;
//char prmr[50];
//float mtemp;
//prmr[options->primer_length] = '\0';
for (i=0;i < primers->size; i++)
{
matchcount+=primers->primers[i].directCount[seqid];
......@@ -272,10 +274,12 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
current.outexample=0;
current.curseqid = 0;
current.refsequence=-1;
//current.p1temp = 100;
//current.p1mintemp = 100;
//current.p2temp = 100;
//current.p2mintemp = 100;
// Standardize the pair
strand = current.p2->word > current.p1->word;
if (!strand)
{
......@@ -309,6 +313,10 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
//else
// pcurrent->outexample++;
//for each pair we save current sequence id in the pair
//when we see this pair for the first time in currnet sequence
//because we want to increment inexample & outexample count
//only once for one sequence
if (pcurrent->curseqid != (seqid+1))
{
if (seqdb[seqid]->isexample)
......@@ -359,6 +367,23 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
else
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ;
/*strncpy (prmr, seqdb[seqid]->SQ + matches[i].position, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
if (mtemp < pcurrent->p1mintemp)
pcurrent->p1mintemp = mtemp;
//fprintf (stderr, "prmr1: %s\n", seqdb[seqid]->SQ);
strncpy (prmr, seqdb[seqid]->SQ + matches[j].position, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
if (mtemp < pcurrent->p2mintemp)
pcurrent->p2mintemp = mtemp;
//fprintf (stderr, "prmr2: %s\n", prmr);
if (pcurrent->p1temp == 100)
pcurrent->p1temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p1->word, options->primer_length), 0) - 273.0;
if (pcurrent->p2temp == 100)
pcurrent->p2temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p2->word, options->primer_length), 0) - 273.0;
*/
pcurrent->pcr.ampcount++;
// fprintf(stderr,"%c%c W1 : %s direct : %c",
// "bG"[(int)pcurrent->p1->good],
......
......@@ -33,3 +33,18 @@ pecodnadb_t readdnadb(const char *name, uint32_t *size)
return db;
}
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize)
{
uint32_t i;
char ch[11];
ch [10] = '\0';
for (i=0; i < seqdbsize; i++)
{
strncpy (ch, seqdb[i]->SQ, 10);
fprintf (stderr, "seq %d = %s\n", i, ch);
}
exit (0);
}
\ No newline at end of file
SOURCES = dinkelbach.c \
galign.c \
libthermo.c \
nnparams.c \
thermalign.c
SOURCES = nnparams.c \
thermostats.c
SRCS=$(SOURCES)
......
......@@ -2,72 +2,49 @@
* nnparams.h
* PHunterLib
*
* Created by Tiayyba Riaz on 7/2/09.
* Copyright 2009 __MyCompanyName__. All rights reserved.
* Nearest Neighbor Model Parameters
*
* Created by Tiayyba Riaz on 02/07/09.
*
*/
//=============================================================================
// Module: nnparams.h
// Project: Diploma Thesis - Probe Selection for DNA Microarrays
// Type: header file - Nearest Neighbor Parameters / Model.
// Language: c++
// Compiler: microsoft visual c++ 6.0, unix/linux gcc
// System/OS: Windows 32, Sun solaris, Linux, other unix systems (untested)
// Database: none
// Description: class CNNParams - Nearest Neighbor Model Parameters
// Author: kaderali
// Date: 9-12/2000
// Copyright: (c) L. Kaderali, 9/2000 - 12/2000
//
// Revision History
// $ 00sep07 LK : created
// 00dec29 LK : changed to include dangling end data
// 01jan09 LK : included CalcSelfTM function
// 01feb07 LK : optimized
// #$
//=============================================================================
#if !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_)
#define AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#ifndef NNPARAMS_H_
#define NNPARAMS_H_
#include <math.h>
#include <string.h>
#ifdef _pack
#pragma pack(1)
#endif
//#include "../libecoprimer/ecoprimer.h"
// following defines to simplify coding...
#define ndH(a,b,c,d) nparm->dH[a][b][c][d]
#define ndS(a,b,c,d) nparm->dS[a][b][c][d]
#define forbidden_enthalpy 1000000000000000000.0f
//#define forbidden_enthalpy_div1000 1000000000000000.0f
// forbidden entropy=-rlogc
// #define forbidden_entropy 30.205986374220235304486574573422f
// Boltzmann factor (cal/degrees C*mol)
#define R 1.987f
#define SALT_METHOD_SANTALUCIA 1
#define SALT_METHOD_OWCZARZY 2
// Strand concentration (assumption!) (M)
// #define Ct 0.000001f
// r*ln(ct/4) as required by many formulas
//#define rlogc -30.205986374220235304486574573422f
#define DEF_CONC_PRIMERS 0.0000008
#define DEF_CONC_SEQUENCES 0
#define DEF_SALT 0.05
#define GETNUMCODE(a) bpencoder[a - 'A']
#define GETREVCODE(a) 5-bpencoder[a - 'A']
extern float forbidden_entropy;
//-----------------------------------------------------------------------------
// class CNNParams
//typedef class CNNParams* PNNParams;
static char bpencoder[] = { 1, // A
0, // b
2, // C
0,0,0, // d, e, f
3, // G
0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s
4,0, // T,U
0,0,0,0,0}; // v,w,x,y,z
//#pragma GCC visibility push(hidden)
typedef struct CNNParams_st
{
//public:
float Ct1;
float Ct2;
float rlogc;
......@@ -75,30 +52,19 @@ typedef struct CNNParams_st
float kfac;
int saltMethod;
float gcContent;
float new_TM; // ge‰ndert von ML!!!
//CNNParams();
//virtual ~CNNParams();
//private:
float new_TM;
float dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
float dS[6][6][6][6];
}CNNParams, * PNNParams;
//void nparam_InitParams(PNNParams nparm, float c1=0.000001f, float c2=0.000001f, float kp=1, int sm = SALT_METHOD_SANTALUCIA );
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm);
void nparam_UpdateParams(PNNParams nparm, char * s1, char * s2);
float nparam_CountGCContent(char * seq );
char nparam_convertNum(char c);
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_CalcTM(float entropy,float enthalpy);
void nparam_AlterTM(PNNParams nparm, float tm_new);
float nparam_CalcG(PNNParams nparm, float entropy, float enthalpy);
float nparam_CalcSelfTM(PNNParams nparm, char* seq);
float nparam_GetInitialEntropy(PNNParams nparm) ;
char nparam_getComplement(char mychar, int asnum);
int nparam_isMismatch(char,char);
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm);
int nparam_CountGCContent(char * seq );
float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_CalcTM(float entropy,float enthalpy);
float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
float nparam_GetInitialEntropy(PNNParams nparm) ;
float calculateMeltingTemperatureBasic (char * seq);
//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
//#pragma GCC visibility pop
#endif // !defined(AFX_NNPARAMS_H__05604705_84E8_11D4_A001_000000000000__INCLUDED_)
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment