Commit 313dd59f by Eric Coissac

First run of patch

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@203 60f365c0-8329-0410-b2a4-ec073aeeaa1d
parent e869d6da
......@@ -16,7 +16,10 @@
#define VERSION "0.1"
/* TR: by default, statistics are made on species level*/
#define DEFULTTAXONRANK "species"
#define DEFAULTTAXONRANK "species"
static int cmpprintedpairs(const void* p1,const void* p2);
/* ----------------------------------------------- */
/* printout help */
......@@ -40,7 +43,7 @@ static void PrintHelp()
PP " .tdx : contains information concerning the taxonomy\n");
PP " .rdx : contains the taxonomy rank\n\n");
PP " ecoPrimer needs all the file type. As a result, you have to write the\n");
PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n");
PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n");
PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "-i : [I]gnore the given taxonomy id.\n\n");
......@@ -69,13 +72,14 @@ static void PrintHelp()
PP "column 8 : in taxa count\n");
PP "column 9 : out taxa count\n");
PP "column 10 : coverage\n");
PP "column 11 : specificity\n");
PP "column 12 : minimum amplified length\n");
PP "column 13 : maximum amplified length\n");
PP "column 14 : average amplified length\n");
PP "column 11 : unambiguously identified taxa\n");
PP "column 12 : specificity\n");
PP "column 13 : minimum amplified length\n");
PP "column 14 : maximum amplified length\n");
PP "column 15 : average amplified length\n");
PP "------------------------------------------\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n");
PP "------------------------------------------\n\n");
PP "------------------------------------------\n\n");
PP "\n");
}
......@@ -110,7 +114,7 @@ void initoptions(poptions_t options)
options->r=0;
options->g=0;
options->no_multi_match=FALSE;
strcpy(options->taxonrank, DEFULTTAXONRANK); /*taxon level for results, species by default*/
strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/
}
void printcurrenttime ()
......@@ -143,8 +147,7 @@ void printcurrenttimeinmilli()
void printapair(int32_t index,ppair_t pair, poptions_t options)
{
uint32_t wellidentifiedtaxa;
printf("%6d\t",index);
if (pair->asdirect1)
printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length));
......@@ -167,17 +170,11 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
printf("\t%d", pair->intaxa);
printf("\t%d", pair->outtaxa);
printf("\t%4.3f", (float)pair->intaxa/options->intaxa);
wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa;
//printf("\t%d", pair->notwellidentifiedtaxa);
//printf("\t%d", (pair->intaxa + pair->outtaxa));
printf("\t%4.3f", (float)pair->bc);
printf("\t%d", pair->intaxa - pair->notwellidentifiedtaxa);
printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa));
printf("\t%4.3f", pair->bs);
printf("\t%d", pair->mind);
......@@ -186,6 +183,25 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
}
static int cmpprintedpairs(const void* p1,const void* p2)
{
float s1,s2;
ppair_t pair1,pair2;
pair1=*((ppair_t*)p1);
pair2=*((ppair_t*)p2);
s1 = pair1->yule * pair1->bs;
s2 = pair2->yule * pair2->bs;
// fprintf(stderr,"s1 : %4.3f %4.3f %4.3f\n",pair1->yule , pair1->bs,s1);
// fprintf(stderr,"s2 : %4.3f %4.3f %4.3f\n\n",pair2->yule , pair2->bs,s2);
if (s1 > s2) return -1;
if (s1 < s2) return 1;
return 0;
}
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
{
uint32_t i,j;
......@@ -201,9 +217,10 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
else qfp=0.0;
sortedpairs[i]->quorumin = q;
sortedpairs[i]->quorumout = qfp;
sortedpairs[i]->yule = q -qfp;
sortedpairs[i]->yule = q - qfp;
sortedpairs[j]=sortedpairs[i];
......@@ -215,23 +232,30 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
{
(void)taxonomycoverage(sortedpairs[j],options);
taxonomyspecificity(sortedpairs[j]);
// fprintf(stderr,"%4.3f %4.3f %4.3f\n",sortedpairs[j]->yule , sortedpairs[j]->bs,sortedpairs[j]->bc);
j++;
}
}
qsort(sortedpairs,j,sizeof(ppair_t),cmpprintedpairs);
return j;
}
void printpairs (ppairtree_t pairs, poptions_t options)
{
ppair_t* sortedpairs;
ppair_t* index;
ppairlist_t pl;
size_t i,j;
int32_t count;
size_t count;
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
fprintf(stderr,"Total pair count : %d\n",pairs->count);
sortedpairs = ECOMALLOC(pairs->count*sizeof(ppair_t),"Cannot Allocate ordered pairs");
......@@ -256,40 +280,6 @@ void printpairs (ppairtree_t pairs, poptions_t options)
}
#ifdef MASKEDCODE
void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
{
uint32_t i;
uint32_t wordsize = options->primer_length;
uint32_t quorumseqs;
double sens;
double speci;
float avg;
quorumseqs = seqdbsize * 70 / 100;
printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);
for (i=0; i < pairs.paircount; i++)
{
if (quorumseqs > pairs.pairs[i].inexample) continue;
sens = (pairs.pairs[i].taxsetindex*1.0)/rankdbstats*100;
speci = (pairs.pairs[i].oktaxoncount*1.0)/pairs.pairs[i].taxsetindex*100;
avg = (pairs.pairs[i].mind+pairs.pairs[i].maxd)*1.0/2;
printf("P1\t%s", ecoUnhashWord(pairs.pairs[i].w1, wordsize));
printf("\tP2\t%s", ecoUnhashWord(pairs.pairs[i].w2, wordsize));
printf("\t%d", pairs.pairs[i].inexample);
printf("\t%3.2f", sens);
printf("\t%3.2f", speci);
printf("\t%d", pairs.pairs[i].mind);
printf("\t%d", pairs.pairs[i].maxd);
printf("\t%3.2f\n", avg);
}
}
#endif /* MASKEDCODE */
/*updateseqparams: This function counts the insample and outsample sequences
* and with each sequences adds a tag of the taxon to which the sequence beongs*/
......@@ -340,52 +330,6 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
}
/* to get db stats, totals of species, genus etc....*/
#ifdef MASKEDCODE
void setoktaxforspecificity (ppairtree_t pairs)
{
uint32_t i;
uint32_t j;
uint32_t k;
uint32_t l;
int taxcount;
int32_t taxid;
for (i = 0; i < pairs->paircount; i++)
{
for (j = 0; j < pairs->pairs[i].taxsetindex; j++)
{
for (k = 0; k < pairs->pairs[i].taxset[j].amplifiaindex; k++)
{
taxid = 0;
taxcount = 0;
for (l = 0; l < pairs->pairs[i].ampsetindex; l++)
{
/*compare only char pointers because for equal strings we have same pointer in both sets*/
if (pairs->pairs[i].taxset[j].amplifia[k] == pairs->pairs[i].ampset[l].amplifia)
{
if (pairs->pairs[i].ampset[l].seqidindex > 1)
{
taxcount += pairs->pairs[i].ampset[l].seqidindex;
break;
}
if (taxid != pairs->pairs[i].ampset[l].taxonids[0])
{
if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];
taxcount++;
}
if (taxcount > 1) break;
}
}
if (taxcount == 1) pairs->pairs[i].oktaxoncount++;
}
}
}
}
#endif
int main(int argc, char **argv)
{
......@@ -412,7 +356,7 @@ int main(int argc, char **argv)
initoptions(&options);
while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:")) != -1) {
while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:O:")) != -1) {
switch (carg) {
/* -------------------- */
......@@ -516,6 +460,12 @@ int main(int argc, char **argv)
options.g++;
break;
/* --------------------------------- */
case 'O': /* set primer size */
/* --------------------------------- */
sscanf(optarg,"%d",&(options.primer_length));
break;
/* -------------------- */
case 'c': /* sequences are circular */
/* --------------------------------- */
......@@ -601,7 +551,7 @@ int main(int argc, char **argv)
/*TR: Added*/
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
// setoktaxforspecificity (&pairs);
......
......@@ -104,7 +104,7 @@ typedef struct {
* on all sequences as a list of primer_t
*/
typedef struct {
pprimer_t primers;
pprimer_t primers;
uint32_t size;
} primercount_t, *pprimercount_t;
......@@ -170,6 +170,8 @@ typedef struct {
float yule;
float quorumin;
float quorumout;
float bs;
float bc;
//
// uint32_t taxsetcount;
// uint32_t taxsetindex;
......@@ -224,7 +226,7 @@ typedef struct {
typedef struct {
int32_t taxid;
void *amptree;
void *amptree;
}taxontoamp_t, *ptaxontoamp_t;
typedef struct {
......
......@@ -40,7 +40,7 @@ int32_t counttaxon(int32_t taxid)
taxoncount=0;
return 0;
}
if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon))))
{
......@@ -108,18 +108,21 @@ float taxonomycoverage(ppair_t pair, poptions_t options)
counttaxon(-1);
for (i=0; i < seqcount; i++)
if (pair->pcr.amplifias[i].sequence->isexample)
if (pair->pcr.amplifias[i].sequence->isexample
&& pair->pcr.amplifias[i].sequence->ranktaxonid > 0 )
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
counttaxon(-1);
for (i=0; i < seqcount; i++)
if (!pair->pcr.amplifias[i].sequence->isexample)
if (!pair->pcr.amplifias[i].sequence->isexample
&& pair->pcr.amplifias[i].sequence->ranktaxonid)
outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
pair->intaxa=incount;
pair->outtaxa=outcount;
return (float)incount/options->intaxa;
pair->bc=(float)incount/options->intaxa;
return pair->bc;
}
......@@ -132,11 +135,11 @@ static int cmpamp(const void *ampf1, const void* ampf2)
char cd2;
int chd = 0;
int len = 0;
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
if (pampf1->strand != pampf2->strand)
{
incr = -1;
......@@ -148,9 +151,9 @@ static int cmpamp(const void *ampf1, const void* ampf2)
chd = 1;
}
}
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
for (i = 0; i < len; i++, j += incr)
{
cd1 = pampf1->amplifia[i];
......@@ -158,14 +161,14 @@ static int cmpamp(const void *ampf1, const void* ampf2)
cd2 = ecoComplementChar(pampf2->amplifia[j]);
else
cd2 = pampf2->amplifia[j];
if (cd1 < cd2) return chd ? 1: -1;
if (cd2 < cd1) return chd ? -1: 1;
}
if (pampf1->length > pampf2->length) return chd ? -1: 1;
if (pampf2->length > pampf1->length) return chd ? 1: -1;
return 0;
}
......@@ -183,42 +186,48 @@ void taxonomyspecificity (ppair_t pair)
void *ampftree = NULL;
pamptotaxon_t pcurrentampf;
pamptotaxon_t *ptmp;
pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree");
for (i = 0; i < pair->pcr.ampcount; i++)
{
/*populate taxon ids tree against each unique amplifia
i.e set of taxon ids for each amplifia*/
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
pcurrentampf = &ampfwithtaxtree[ampfindex];
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
ptmp = tfind((const void*)pcurrentampf, &ampftree, cmpamp);
if (ptmp == NULL)
if (pair->pcr.amplifias[i].sequence->isexample)
{
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
pcurrentampf = &ampfwithtaxtree[ampfindex];
tsearch((void*)pcurrentampf,&ampftree,cmpamp);
ampfindex++;
}
else
pcurrentampf = *ptmp;
if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL)
{
pcurrentampf->taxoncount++;
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
ptmp = tfind((const void*)pcurrentampf, &ampftree, cmpamp);
if (ptmp == NULL)
{
pcurrentampf = &ampfwithtaxtree[ampfindex];
tsearch((void*)pcurrentampf,&ampftree,cmpamp);
ampfindex++;
}
else
pcurrentampf = *ptmp;
if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL)
{
pcurrentampf->taxoncount++;
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
}
}
}
counttaxon(-1);
for (i = 0; i < ampfindex; i++)
{
if (ampfwithtaxtree[i].taxoncount > 1)
twalk(ampfwithtaxtree[i].taxontree, twalkaction);
}
pair->notwellidentifiedtaxa = counttaxon(-2);
pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
ECOFREE (ampfwithtaxtree, "Free amplifia table");
}
\ No newline at end of file
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment