Commit dd08d73d by Eric Coissac

Added code for building sets of primers and -p command line option

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@279 60f365c0-8329-0410-b2a4-ec073aeeaa1d
parent e483b17e
......@@ -6,6 +6,7 @@
*/
#include "libecoprimer/ecoprimer.h"
#include "libecoprimer/PrimerSets.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
......@@ -74,6 +75,7 @@ static void PrintHelp()
PP "-A : Print the list of all identifier of sequences present in the database\n\n");
PP "-f : Remove data mining step during strict primer identification\n\n");
PP "-v : Store statistic file about memory usage during strict primer identification\n\n");
PP "-p : Print sets of primers\n\n");
PP "\n");
PP "------------------------------------------\n");
PP "Table result description : \n");
......@@ -132,9 +134,9 @@ void initoptions(poptions_t options)
options->refseq=NULL;
options->circular=0;
options->doublestrand=1;
options->strict_quorum=0.7;
options->strict_quorum=0.3;
options->strict_exclude_quorum=0.1;
options->sensitivity_quorum=0.9;
options->sensitivity_quorum=0.3;
options->false_positive_quorum=0.1;
options->strict_three_prime=0;
options->r=0;
......@@ -146,6 +148,7 @@ void initoptions(poptions_t options)
options->saltmethod = SALT_METHOD_SANTALUCIA;
options->salt = DEF_SALT;
options->printAC=FALSE;
options->print_sets_of_primers = FALSE;
}
void printapair(int32_t index,ppair_t pair, poptions_t options)
......@@ -314,7 +317,7 @@ static int cmpprintedpairs(const void* p1,const void* p2)
return 0;
}
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options, pecodnadb_t seqdb)
{
uint32_t i,j;
float q,qfp;
......@@ -329,6 +332,7 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
qfp = (float)sortedpairs[i]->outexample/options->outsamples;
else qfp=0.0;
sortedpairs[i]->wellIdentifiedSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets
sortedpairs[i]->quorumin = q;
sortedpairs[i]->quorumout = qfp;
sortedpairs[i]->yule = q - qfp;
......@@ -337,8 +341,10 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
if (q > options->sensitivity_quorum &&
qfp < options->false_positive_quorum)
{
//TR 05/09/10 - wellIdentified needed for primer sets
sortedpairs[j]->wellIdentifiedSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array");
(void)taxonomycoverage(sortedpairs[j],options);
taxonomyspecificity(sortedpairs[j]);
taxonomyspecificity(sortedpairs[j], seqdb, options->dbsize);
j++;
}
......@@ -348,7 +354,7 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
}
void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy, pecodnadb_t seqdb)
{
ppair_t* sortedpairs;
ppair_t* index;
......@@ -357,7 +363,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
size_t count;
char *taxon[]={"taxon","taxa"};
ecotx_t *current_taxon;
pairset pair_sets;
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
......@@ -377,7 +383,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
for (i=0;i<pl->paircount;i++,j++)
sortedpairs[j]=pl->pairs+i;
count=filterandsortpairs(sortedpairs,pairs->count,options);
count=filterandsortpairs(sortedpairs,pairs->count,options, seqdb);
getThermoProperties(sortedpairs, count, options);
fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count);
......@@ -441,8 +447,12 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy)
for (i=0;i < count;i++)
printapair(i,sortedpairs[i],options);
if (options->print_sets_of_primers == TRUE)
{
pair_sets = build_primers_set (sortedpairs, count, seqdb, options);
some_other_set_possibilities (&pair_sets, sortedpairs, count, seqdb, options);
}
}
......@@ -528,7 +538,7 @@ int main(int argc, char **argv)
initoptions(&options);
while ((carg = getopt(argc, argv, "hAfvcUDSE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) {
while ((carg = getopt(argc, argv, "hAfvcUDSpE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:")) != -1) {
switch (carg) {
/* ---------------------------- */
......@@ -691,6 +701,12 @@ int main(int argc, char **argv)
options.circular = 1;
break;
/* -------------------- */
case 'p': /* print sets of primers */
/* --------------------------------- */
options.print_sets_of_primers = TRUE;
break;
case '?': /* bad option */
/* -------------------- */
errflag++;
......@@ -799,9 +815,9 @@ int main(int argc, char **argv)
fprintf(stderr,"\n");
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
printpairs (pairs, &options,taxonomy);
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
printpairs (pairs, &options,taxonomy, seqdb);
return 0;
}
......@@ -14,7 +14,8 @@ SOURCES = goodtaxon.c \
pairs.c \
taxstats.c \
apat_search.c \
filtering.c
filtering.c \
PrimerSets.c
SRCS=$(SOURCES)
......
#ifndef PRIMERSETS_H_
#define PRIMERSETS_H_
#include "ecoprimer.h"
#define PRIMERS_IN_SET_COUNT 10
typedef struct {
int *set_wellIdentifiedTaxa;
int32_t set_pairs[PRIMERS_IN_SET_COUNT];
float set_specificity;
float set_coverage;
float set_lmean;
float set_lcov;
float set_score;
}pairset;
typedef struct{
ppair_t* sortedpairs;
int32_t sorted_count;
pecodnadb_t seqdb;
poptions_t options;
}SetParams;
typedef struct{
float t_spc; //specificity contribution
float t_cov; //coverage contribution
float t_lmd; //link spread difference
float len; //length
float score; //score
}primerscore;
void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams);
void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams);
float get_links_distribution (int prb_idx, pairset *prob_set, SetParams *pparams);
pairset build_primers_set (ppair_t* sortedpairs, int32_t sorted_count, pecodnadb_t seqdb,
poptions_t options);
void get_set_mean_cov_stats (pairset *prob_set, SetParams *pparams);
void some_other_set_possibilities (pairset *pair_set,
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
#endif
......@@ -173,6 +173,9 @@ typedef struct {
uint32_t outtaxa; //< counterexample taxa count
uint32_t notwellidentifiedtaxa;
int *wellIdentifiedSeqs; //< an array having elements equla to total seqs
// values are either 0 or 1, if seq is well identified
// its 1 else 0
// these statistics are relative to inexample sequences
......@@ -286,6 +289,7 @@ typedef struct {
int saltmethod;
float salt;
PNNParams pnparm;
bool_t print_sets_of_primers;
} options_t, *poptions_t;
typedef ecoseq_t **pecodnadb_t;
......@@ -347,7 +351,7 @@ int32_t getrankdbstats(pecodnadb_t seqdb,
poptions_t options);
float taxonomycoverage(ppair_t pair, poptions_t options);
char ecoComplementChar(char base);
void taxonomyspecificity (ppair_t pair);
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize);
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum);
......
......@@ -179,9 +179,16 @@ void twalkaction (const void *node, VISIT order, int level)
counttaxon(taxid);
}
void taxonomyspecificity (ppair_t pair)
int32_t gtxid;
void twalkaction2 (const void *node, VISIT order, int level)
{
uint32_t i;
int32_t *pt = (int32_t *) node;
gtxid = *pt;
}
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
{
uint32_t i, j;
uint32_t ampfindex = 0;
int32_t taxid;
void *ampftree = NULL;
......@@ -219,11 +226,31 @@ void taxonomyspecificity (ppair_t pair)
}
}
memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int));
counttaxon(-1);
for (i = 0; i < ampfindex; i++)
{
if (ampfwithtaxtree[i].taxoncount > 1)
twalk(ampfwithtaxtree[i].taxontree, twalkaction);
//TR 5/9/10 - added code for well identified seqs
else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/
{
gtxid = -1;
twalk(ampfwithtaxtree[i].taxontree, twalkaction2);
if (gtxid != -1)
{
for (j = 0; j < seqdbsize; j++)
if (seqdb[j]->ranktaxonid == gtxid
&&(pair->p1->directCount[j] > 0
|| pair->p1->reverseCount[j] > 0)
&& (pair->p2->directCount[j] > 0
|| pair->p2->reverseCount[j] > 0))
{
pair->wellIdentifiedSeqs[j] = 1;
}
}
}
}
pair->notwellidentifiedtaxa = counttaxon(-2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment