obidms_taxonomy.h 3.36 KB
Newer Older
1
/********************************************************************
2
 * OBIDMS taxonomy header file                                      *
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 ********************************************************************/

/**
 * @file obidms_taxonomy.h
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 * @date March 2nd 2016
 * @brief Header file for the functions handling the reading of binary taxonomy files.
 */


#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>

#include "obidms.h"


20 21 22 23
#define MIN_LOCAL_TAXID (10000000)
#define TAX_NAME_LEN (1024)


24 25 26 27 28
typedef struct {
	int32_t  taxid;
	int32_t  rank;
	int32_t	 parent;
	int32_t  name_length;
29
	char     name[];
30 31 32 33
} ecotxformat_t;


typedef struct ecotxnode {
34
	int32_t           taxid;	// TODO discuss that this is will be the current taxid even if the struct was accessed through a deprecated one
35 36
	int32_t           rank;
	int32_t  		  farest;
37
	int32_t			  idx;
38
	struct ecotxnode* parent;
39 40
	char*			  name; // scientific name
	char*			  preferred_name; // preferred name
41
	bool			  local;
42 43 44 45 46
} ecotx_t;


typedef struct {
	int32_t count;
47 48
	int32_t ncbi_count;
	int32_t local_count;
49 50
	int32_t max_taxid;
	int32_t buffer_size;
51
	ecotx_t taxon[];
52 53 54 55 56
} ecotxidx_t;


typedef struct {
	int32_t count;
57
	char*   label[];
58 59 60 61
} ecorankidx_t;


typedef struct {
62
 	int32_t  is_scientific_name;
63 64
	int32_t  name_length;
	int32_t  class_length;
65
	int32_t  taxid;	// taxid idx
66
	char     names[];
67 68 69 70 71 72 73 74 75 76 77 78
} econameformat_t;


typedef struct {
	char*   		  name;
	char*   		  class_name;
	int32_t 		  is_scientific_name;
	struct ecotxnode* taxon;
} econame_t;


typedef struct {
79
	int32_t   count;
80
	econame_t names[];
81 82 83
} econameidx_t;


84 85 86 87 88 89 90 91 92 93 94 95
typedef struct {
	int32_t taxid;
	int32_t idx;
} ecomerged_t;


typedef struct {
	int32_t     count;
	ecomerged_t merged[];
} ecomergedidx_t;


96
typedef struct OBIDMS_taxonomy_t {
97 98 99 100 101
	char            tax_name[TAX_NAME_LEN];
	OBIDMS_p        dms;
	ecomergedidx_t* merged_idx;
	ecorankidx_t*   ranks;
	econameidx_t*   names;
102
	econameidx_t*   preferred_names;
103
	ecotxidx_t*     taxa;
104 105 106 107 108 109 110 111 112
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;


OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);

int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);

ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);

113
ecotx_t* obi_taxo_get_taxon_with_current_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
114 115 116 117 118 119 120 121 122 123 124 125 126 127
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);

bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);

ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);

ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);

ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);

ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);

ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);

128 129 130
int obi_write_taxonomy(OBIDMS_p dms, OBIDMS_taxonomy_p tax, const char* tax_name);

OBIDMS_taxonomy_p obi_read_taxdump(const char* taxdump);
131

132 133 134 135 136 137 138 139
int obi_taxo_add_local_taxon(OBIDMS_taxonomy_p tax, const char* name, const char* rank_name, int32_t parent_taxid, int32_t min_taxid);

int obi_taxo_add_preferred_name_with_taxid(OBIDMS_taxonomy_p tax, int32_t taxid, const char* preferred_name);

int obi_taxo_add_preferred_name_with_taxon(OBIDMS_taxonomy_p tax, ecotx_t* taxon, const char* preferred_name);