obi_ecotag.h 2.64 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
/*************************************************************************************************
 * Header file for functions for the taxonomic assignment of sequences 							 *
 *************************************************************************************************/

/**
 * @file obi_ecotag.h
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 * @date November 15th 2018
 * @brief Header file for the functions for the taxonomic assignment of sequences.
 */


#ifndef OBI_ECOTAG_H_
#define OBI_ECOTAG_H_


#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>



#define ECOTAG_TAXID_COLUMN_NAME "TAXID"
#define ECOTAG_NAME_COLUMN_NAME "SCIENTIFIC_NAME"
#define ECOTAG_STATUS_COLUMN_NAME "ID_STATUS"
#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH"
#define ECOTAG_SCORE_COLUMN_NAME "BEST_IDENTITY"


/**
 * @brief Taxonomic assignment of sequences.
 *
 * Note: The columns where the results are written are automatically named and created.
 *
 * @param dms_name The path to the DMS where the views are.
 * @param query_view_name The name of the view containing the query sequences.
 * @param ref_dms_name The name of the DMS containing the reference database.
 * @param ref_view_name The name of the view corresponding to the reference database as built by build_reference_db().
 * @param taxo_dms_name The name of the DMS containing the taxonomy associated with the reference database.
 * @param taxonomy_name The name of the taxonomy associated with the reference database.
 * @param output_view_name The name to give to the output view.
 * @param output_view_comments The comments to associate to the output view.
 * @param ecotag_threshold The threshold at which to assign.
 *
 * 	The algorithm works like this:
 * 		For each query sequence:
 *			Align with reference database
 *			Keep the indices of all the best matches
 *			For each kept index, get the LCA at that threshold as stored in the reference database, then the LCA of those LCAs
 *			Write result (max score, threshold, taxid and scientific name of the LCA assigned, list of the ids of the best matches)
 *
 * @returns A value indicating the success of the operation.
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since November 2018
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_ecotag(const char* dms_name,
		       const char* query_view_name,
			   const char* ref_dms_name,
			   const char* ref_view_name,
			   const char* taxo_dms_name,
			   const char* taxonomy_name,
			   const char* output_view_name,
			   const char* output_view_comments,
			   double ecotag_threshold);


#endif /* OBI_ECOTAG_H_ */