Commit 419bda96 by Eric Coissac

Patch minimum tm computation

Add -A option to list all id present in the DB for helping in -R usage

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@228 60f365c0-8329-0410-b2a4-ec073aeeaa1d
parent 455bf639
......@@ -28,36 +28,6 @@ static int cmpprintedpairs(const void* p1,const void* p2);
void* lib_handle = NULL;
float (*calcMelTemp)(char*, char*);
void openlibman ()
{
// Open the library.
char* lib_name = "./libPHunterLib.dylib";
lib_handle = dlopen(lib_name, RTLD_NOW);
if (lib_handle) {
fprintf(stderr, "[%s] dlopen(\"%s\", RTLD_NOW): Successful\n", __FILE__, lib_name);
}
else {
fprintf(stderr, "[%s] Unable to open library: %s\n",
__FILE__, dlerror());
exit(EXIT_FAILURE);
}
// Get the symbol addresses.
calcMelTemp = dlsym(lib_handle, "_Z27calculateMeltingTemperaturePcS_");
if (calcMelTemp) {
fprintf(stderr, "[%s] dlsym(lib_handle, \"addRating\"): Successful\n", __FILE__);
}
else {
fprintf(stderr, "[%s] Unable to get symbol: %s\n",
__FILE__, dlerror());
exit(EXIT_FAILURE);
}
}
void closlibman ()
{
if (lib_handle) dlclose(lib_handle);
}
/* ----------------------------------------------- */
/* printout help */
/* ----------------------------------------------- */
......@@ -157,8 +127,9 @@ void initoptions(poptions_t options)
options->no_multi_match=FALSE;
options->pnparm = NULL;
strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/
options->saltmethod = SALT_METHOD_SANTALUCIA;
options->saltmethod = SALT_METHOD_SANTALUCIA;
options->salt = DEF_SALT;
options->printAC=FALSE;
}
void printapair(int32_t index,ppair_t pair, poptions_t options)
......@@ -176,7 +147,7 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
uint32_t i;
float temp;
CNNParams nnparams;
//nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA);
char *c;
......@@ -215,25 +186,25 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
//print primer1
printf("%s\t", p1);
//print primer2
printf("%s", p2);
//print primer1 melting temperature
printf ("\t%4.3f", pair->p1temp);
printf ("\t%3.1f", pair->p1temp);
//print minimum melting temperature of approximate versions of primer1
printf ("\t%4.3f", pair->p1mintemp);
printf ("\t%3.1f", pair->p1mintemp);
//print primer2 melting temperature
printf ("\t%4.3f", pair->p2temp);
printf ("\t%3.1f", pair->p2temp);
//print minimum melting temperature of approximate versions of primer2
printf ("\t%4.3f", pair->p2mintemp);
printf ("\t%3.1f", pair->p2mintemp);
//print gc contents of primer1
printf ("\t%d",nparam_CountGCContent(p1));
//print gc contents of primer2
printf ("\t%d",nparam_CountGCContent(p2));
......@@ -242,19 +213,19 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
//print inexample count
printf("\t%d", pair->inexample);
//print out example count
printf("\t%d", pair->outexample);
//print yule
printf("\t%4.3f", pair->yule);
//print in taxa count
printf("\t%d", pair->intaxa);
//print out taxa count
printf("\t%d", pair->outtaxa);
//print coverage
printf("\t%4.3f", (float)pair->bc);
......@@ -266,10 +237,10 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
//print min amplifia lenght
printf("\t%d", pair->mind);
//print max amplifia lenght
printf("\t%d", pair->maxd);
//print average amplifia lenght
printf("\t%3.2f", (float)pair->sumd/pair->inexample);
......@@ -392,7 +363,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
count=filterandsortpairs(sortedpairs,pairs->count,options);
getThermoProperties(sortedpairs, count, options);
fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count);
printf("#\n");
......@@ -454,7 +425,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
for (i=0;i < count;i++)
printapair(i,sortedpairs[i],options);
}
......@@ -508,6 +479,13 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
}
/* to get db stats, totals of species, genus etc....*/
static void printAC(pecodnadb_t seqdb,uint32_t seqdbsize)
{
uint32_t i;
for (i=0; i< seqdbsize;i++)
printf("%15s : %s\n",seqdb[i]->AC,seqdb[i]->DE);
}
int main(int argc, char **argv)
{
......@@ -530,22 +508,11 @@ int main(int argc, char **argv)
int32_t rankdbstats = 0;
//printcurrenttime();
//return 0;
//openlibman ();
//float temp = calculateMeltingTemperature ("GGTCTGAACTCAGATCAC", "CTGTTTACCAAAAACATC");
//float temp = calculateMeltingTemperatureBasic ("CTGTTTACCAAAAACATC");
//printf ("temp = %f\n", temp);
//return 0;
//char *t = "ACGT";
//printf ("\nGETNUMCODE: A=%d, C=%d, G=%d, T=%d\n", GETNUMCODE(t[0]), GETNUMCODE('C'), GETNUMCODE('G'), GETNUMCODE('T'));
//printf ("\nGETREVCODE: A=%d, C=%d, G=%d, T=%d\n", GETREVCODE(t[0]), GETREVCODE('C'), GETREVCODE('G'), GETREVCODE('T'));
//return 0;
CNNParams nnparams;
initoptions(&options);
while ((carg = getopt(argc, argv, "hfvcUDSd:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) {
while ((carg = getopt(argc, argv, "hAfvcUDSd:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) {
switch (carg) {
/* ---------------------------- */
......@@ -560,6 +527,12 @@ int main(int argc, char **argv)
options.filtering=FALSE;
break;
/* ---------------------------- */
case 'A': /* set in single strand mode */
/* ---------------------------- */
options.printAC=TRUE;
break;
/* -------------------- */
case 'd': /* database name */
/* -------------------- */
......@@ -702,11 +675,11 @@ int main(int argc, char **argv)
options.pnparm = &nnparams;
if (options.saltmethod != 2) //if not SALT_METHOD_OWCZARZY
options.saltmethod = SALT_METHOD_SANTALUCIA; //then force SALT_METHOD_SANTALUCIA
if (options.salt < 0.01 || options.salt > 0.3) //if salt value out of literature values
options.salt = DEF_SALT; //set to default
nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,options.salt,options.saltmethod);
fprintf(stderr,"Reading taxonomy database ...");
......@@ -719,6 +692,11 @@ int main(int argc, char **argv)
seqdb = readdnadb(options.prefix,&seqdbsize);
if (options.printAC)
{
printAC(seqdb,seqdbsize);
exit(0);
}
if (options.reference)
for (i=0; i < seqdbsize;i++)
if (strcmp(seqdb[i]->AC,options.reference)==0)
......@@ -748,7 +726,7 @@ int main(int argc, char **argv)
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
// options.filtering=FALSE;
// words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
// fprintf(stderr,"\n Strict primer count : %d\n",words2->size);
......
......@@ -97,7 +97,8 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3
params.circular = options->circular;
params.maxerr = options->error_max;
params.omask = (1 << options->strict_three_prime) -1;
// params.omask = (1 << options->strict_three_prime) -1;
params.omask = 0;
params.patlen = options->primer_length;
positions.val=NULL;
......
......@@ -86,7 +86,8 @@ typedef struct {
typedef union {
uint32_t *pointer;
uint32_t value;
} poslist_t, *ppostlist_t;
} poslist_t, *pposlist_t;
/**
* primer_t structure store fuzzy match positions for a primer
......@@ -96,10 +97,10 @@ typedef union {
typedef struct {
word_t word; //< code for the primer
uint32_t *directCount; //< Occurrence count on direct strand
ppostlist_t directPos; //< list of position list on direct strand
pposlist_t directPos; //< list of position list on direct strand
uint32_t *reverseCount; //< Occurrence count on reverse strand
ppostlist_t reversePos; //< list of position list on reverse strand
pposlist_t reversePos; //< list of position list on reverse strand
bool_t good; //< primer match more than quorum example and no
// more counterexample quorum.
......@@ -191,7 +192,7 @@ typedef struct {
//
// uint32_t oktaxoncount;
uint32_t curseqid;
float p1temp; //strict primer1 melting temperature
float p1temp; //strict primer1 melting temperature
float p1mintemp; //approx primer1 minimum melting temperature
float p2temp; //strict primer2 melting temperature
float p2mintemp; //approx primer2 minimum melting temperature
......@@ -246,6 +247,7 @@ typedef struct {
}taxontoamp_t, *ptaxontoamp_t;
typedef struct {
bool_t printAC;
bool_t statistics;
bool_t filtering;
uint32_t lmin; //**< Amplifia minimal length
......
SOURCES = nnparams.c \
thermostats.c
thermostats.c
SRCS=$(SOURCES)
......
......@@ -3,7 +3,7 @@
* PHunterLib
*
* Nearest Neighbor Model / Parameters
*
*
* Created by Tiayyba Riaz on 7/2/09.
*
*/
......@@ -18,7 +18,7 @@
float forbidden_entropy;
float nparam_GetInitialEntropy(PNNParams nparm)
float nparam_GetInitialEntropy(PNNParams nparm)
{
return -5.9f+nparm->rlogc;
}
......@@ -53,9 +53,9 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
float logk = log(nparm->kplus);
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
}
}
return answer;
}
}
/* PURPOSE: Return melting temperature TM for given entropy and enthalpy
* Assuming a one-state transition and using the formula
......@@ -70,7 +70,7 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
*
* PARAMETERS:
* entrypy and enthalpy
*
*
* RETURN VALUE:
* temperature
*/
......@@ -78,7 +78,7 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
float nparam_CalcTM(float entropy,float enthalpy)
{
float tm = 0; // absolute zero - return if model fails!
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
return 0;
if (entropy<0) // avoid division by zero and model errors!
{
......@@ -93,7 +93,7 @@ float nparam_CalcTM(float entropy,float enthalpy)
void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
{
nparm->Ct1 = c1;
nparm->Ct2 = c2;
nparm->Ct2 = c2;
nparm->kplus = kp;
int maxCT = 1;
if(nparm->Ct2 > nparm->Ct1)
......@@ -101,11 +101,11 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
maxCT = 2;
}
float ctFactor;
if(nparm->Ct1 == nparm->Ct2)
if(nparm->Ct1 == nparm->Ct2)
{
ctFactor = nparm->Ct1/2;
}
else if (maxCT == 1)
else if (maxCT == 1)
{
ctFactor = nparm->Ct1-nparm->Ct2/2;
}
......@@ -123,7 +123,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
memset(nparm->dH,0,sizeof(nparm->dH));
memset(nparm->dS,0,sizeof(nparm->dS));
// Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small!
// Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small!
for (x=1;x<=4;x++)
{
for (y=1;y<=4;y++)
......@@ -152,7 +152,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
ndS(5,x,y,0)=forbidden_entropy;
ndH(0,x,y,5)=forbidden_enthalpy;
ndS(0,x,y,5)=forbidden_entropy;
}
// also, forbid x-/-- and --/x-, i.e. no two inner gaps paired
ndH(x,0,0,0)=forbidden_enthalpy;
......@@ -212,7 +212,7 @@ void nparam_InitParams(PNNParams nparm, float c1, float c2, float kp, int sm)
// x-/ya abd xa/y- as well as -x/ay and ax/-y
// bulge opening and closing parameters with
// adjacent matches / mismatches
// obulge_mism and cbulge_mism chosen so high to avoid
// obulge_mism and cbulge_mism chosen so high to avoid
// AAAAAAAAA
// T--G----T
// being better than
......@@ -474,10 +474,10 @@ void nparam_CleanSeq (char* inseq, char* outseq, int len)
{
int seqlen = strlen (inseq);
int i, j;
if (len != 0)
seqlen = len;
for (i = 0, j = 0; i < seqlen; i++)
{
switch (inseq[i])
......@@ -517,20 +517,61 @@ float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
unsigned int i;
char nseq[50];
char *useq = seq;
nparam_CleanSeq (seq, nseq, len);
useq = nseq;
int slen = strlen(useq);
//fprintf (stderr,"Primer : %s\n",useq);
for ( i=1;i<slen;i++)
for ( i=1;i<len;i++)
{
c1 = GETREVCODE(useq[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq[i-1]);
c4 = GETNUMCODE(useq[i]);
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
// printf("------------------\n");
mtemp = nparam_CalcTM(thedS,thedH);
//if (mtemp == 0)
//{
// fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq);
//exit (0);
//}
return mtemp;
}
float nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
{
float thedH = 0;
//float thedS = nparam_GetInitialEntropy(nparm);
float thedS = -5.9f+nparm->rlogc;
float mtemp;
char c1;
char c2;
char c3;
char c4;
unsigned int i;
char nseq1[50];
char nseq2[50];
char *useq1;
char *useq2;
nparam_CleanSeq (seq1, nseq1, len);
useq1 = nseq1;
nparam_CleanSeq (seq2, nseq2, len);
useq2 = nseq2;
//fprintf (stderr,"Primer : %s\n",useq);
for ( i=1;i<len;i++)
{
c1 = GETREVCODE(useq2[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq2[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq1[i-1]);
c4 = GETNUMCODE(useq1[i]);
//fprintf (stderr,"Primer : %s %f %f %d %d, %d %d %f\n",useq,thedH,thedS,(int)c3,(int)c4,(int)c1,(int)c2,nparam_GetEnthalpy(nparm, c3,c4,c1,c2));
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
......@@ -550,7 +591,7 @@ float calculateMeltingTemperatureBasic (char * seq) {
int gccount;
float temp;
int seqlen;
seqlen = strlen (seq);
gccount = nparam_CountGCContent (seq);
temp = 64.9 + 41*(gccount - 16.4)/seqlen;
......
......@@ -3,7 +3,7 @@
* PHunterLib
*
* Nearest Neighbor Model Parameters
*
*
* Created by Tiayyba Riaz on 02/07/09.
*
*/
......@@ -27,8 +27,8 @@
#define DEF_CONC_SEQUENCES 0
#define DEF_SALT 0.05
#define GETNUMCODE(a) bpencoder[a - 'A']
#define GETREVCODE(a) 5-bpencoder[a - 'A']
#define GETNUMCODE(a) bpencoder[a - 'A']
#define GETREVCODE(a) 5-bpencoder[a - 'A']
extern float forbidden_entropy;
......@@ -43,7 +43,7 @@ static char bpencoder[] = { 1, // A
0,0,0,0,0}; // v,w,x,y,z
typedef struct CNNParams_st
typedef struct CNNParams_st
{
float Ct1;
float Ct2;
......@@ -63,8 +63,10 @@ float nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
float nparam_CalcTM(float entropy,float enthalpy);
float nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
float nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len);
float nparam_GetInitialEntropy(PNNParams nparm) ;
float calculateMeltingTemperatureBasic (char * seq);
//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
#endif
#endif
......@@ -4,18 +4,44 @@
#include <stdlib.h>
#include "thermostats.h"
static word_t extractSite(char* sequence,
size_t begin, size_t length, bool_t strand)
{
char *c;
char *start;
uint32_t l;
word_t site = 0;
start=sequence+begin;
if (!strand)
start+=length-1;
for (c=start,
l=0;
l<length;
l++,
c+=(strand)? 1:-1)
site = (site << 2) | ((strand)? (*c):(~*c)&3);
return site;
}
void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options)
{
size_t i, j;
uint32_t begin;
uint32_t end;
size_t i, j,k,l;
uint32_t bp1,bp2;
uint32_t ep1,ep2;
word_t w1;
word_t w2;
bool_t strand;
char *sq;
char prmr[50];
char *sq,*sq1,*sq2,*c;
char prmrd[50];
char prmrr[50];
char sqsite[50];
float mtemp;
for (i = 0; i < count; i++)
{
w1 = pairs[i]->p1->word;
......@@ -27,38 +53,62 @@ void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options)
if (!pairs[i]->asdirect2)
w2=ecoComplementWord(w2,options->primer_length);
pairs[i]->p1temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(w1, options->primer_length), 0) - 273.0;
pairs[i]->p2temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(w2, options->primer_length), 0) - 273.0;
strncpy(prmrd,ecoUnhashWord(w1, options->primer_length),options->primer_length);
strncpy(prmrr,ecoUnhashWord(w2, options->primer_length),options->primer_length);
prmrd[options->primer_length]=0;
prmrr[options->primer_length]=0;
pairs[i]->p1temp = nparam_CalcSelfTM (options->pnparm, prmrd, options->primer_length) - 273.0;
pairs[i]->p2temp = nparam_CalcSelfTM (options->pnparm, prmrr, options->primer_length) - 273.0;
pairs[i]->p1mintemp = 100;
pairs[i]->p2mintemp = 100;
for (j = 0; j < pairs[i]->pcr.ampcount; j++)
{
sq = pairs[i]->pcr.amplifias[j].sequence->SQ;
begin = pairs[i]->pcr.amplifias[j].begin - options->primer_length;
end = pairs[i]->pcr.amplifias[j].end + 1;
memcpy (prmr, sq + begin, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
if (mtemp < pairs[i]->p1mintemp)
pairs[i]->p1mintemp = mtemp;
//fprintf (stderr, "prmr1: %s\n", seqdb[seqid]->SQ);
memcpy (prmr, sq + end, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
strand = pairs[i]->pcr.amplifias[j].strand;
bp1 = pairs[i]->pcr.amplifias[j].begin - options->primer_length;
bp2 = pairs[i]->pcr.amplifias[j].end + 1;
if (!strand)
{
uint32_t tmp;
tmp=bp1;
bp1=bp2;
bp2=tmp;
}
// printf("%s : %s, %c",prmrd,
// ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length),
// "rd"[strand]);
mtemp = nparam_CalcTwoTM(options->pnparm,
prmrd,
ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length),
options->primer_length) - 273.0;
// printf(" %4.2f %4.2f\n",pairs[i]->p1temp,mtemp);
if (mtemp < pairs[i]->p1mintemp)
pairs[i]->p1mintemp = mtemp;
// printf("%s : %s, %c\n",prmrr,ecoUnhashWord(extractSite(sq,bp2,options->primer_length,!strand),options->primer_length),
// "rd"[strand]);
//
mtemp = nparam_CalcTwoTM(options->pnparm,
prmrr,
ecoUnhashWord(extractSite(sq,bp2,options->primer_length,!strand),options->primer_length),
options->primer_length) - 273.0;
if (mtemp < pairs[i]->p2mintemp)
pairs[i]->p2mintemp = mtemp;
}
if (w2 < w1)
{
mtemp = pairs[i]->p1temp;
pairs[i]->p1temp = pairs[i]->p2temp;
pairs[i]->p2temp = mtemp;
mtemp = pairs[i]->p1mintemp;
pairs[i]->p1mintemp = pairs[i]->p2mintemp;
pairs[i]->p2mintemp = mtemp;
}
}
}
\ No newline at end of file
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment