obiavl.h 17.9 KB
Newer Older
1 2 3 4 5 6 7 8
/****************************************************************************
 * OBIDMS AVL tree header file	                                            *
 ****************************************************************************/

/**
 * @file obiavl.h
 * @author Celine Mercier
 * @date December 3rd 2015
9
 * @brief Header file for handling AVL trees for storing and retrieving blobs.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 */


#ifndef OBIAVL_H_
#define OBIAVL_H_


#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdbool.h>

#include "obidms.h"
26
#include "obiblob.h"
27
#include "obitypes.h"
28
#include "bloom.h"
29 30
#include "utils.h"
#include "encode.h"
31

32

33
#define MAX_NB_OF_AVLS_IN_GROUP (1000)						/**< The maximum number of AVL trees in a group.	// TODO discuss
34
                              	  	 	 	 	 	 	 	 */
35
#define MAX_NODE_COUNT_PER_AVL (5000000)					/**< The maximum number of nodes in an AVL tree.
36
															 *   Only used to decide when to create a new AVL in a group, and to initialize the bloom filter // TODO discuss. Try 30M?
37 38
                              	  	 	 	 	 	 	 	 */
#define MAX_DATA_SIZE_PER_AVL (1073741824)		            /**< The maximum size of the data referred to by an AVL tree in a group.
39
 	 	 	 	 	 	 	         	 	 	 	 	 	 *   Only used to decide when to create a new AVL in a group.
40 41 42 43
 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 *   Should not be greater than int32_t max (2,147,483,647), as indexes will have to be stored on 32 bits.
 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
#define AVL_MAX_DEPTH (1024)								/**< The maximum depth of an AVL tree. Used to save paths through the tree.
                              	  	 	 	 	 	 	 	 */
44
#define AVL_MAX_NAME (250) 				     				/**< The maximum length of an AVL tree name.
45 46 47 48 49 50 51
                              	  	 	 	 	 	 	 	 */
#define AVL_GROWTH_FACTOR (2)								/**< The growth factor when an AVL tree is enlarged.
                                	 	 	 	 	 	 	 */
#define LEFT_CHILD(node)	(avl->tree)+(node->left_child)  /**< Pointer to the left child of a node in an AVL tree.
															 */
#define RIGHT_CHILD(node)	(avl->tree)+(node->right_child) /**< Pointer to the right child of a node in an AVL tree.
															 */
52

53 54 55 56 57

/**
 * @brief AVL tree node structure.
 */
typedef struct AVL_node {
58 59 60 61 62 63 64 65 66 67
	index_t  left_child;	  /**< Index of left less child node.
	  	 	 	 	 	 	   */
	index_t  right_child; 	  /**< Index of right greater child node.
	 	 	 	 	 	 	   */
	int8_t   balance_factor;  /**< Balance factor of the node.
							   */
	index_t  value;			  /**< Index of the value associated with the node in the data array.
							   */
	uint64_t crc64;			  /**< Cyclic Redundancy Check code on 64 bits associated with the value.
	 	 	 	 	 	 	   */
68 69 70 71 72 73 74
} AVL_node_t, *AVL_node_p;


/**
 * @brief OBIDMS AVL tree data header structure.
 */
typedef struct OBIDMS_avl_data_header {
75
	size_t		header_size;						/**< Size of the header in bytes.
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		data_size_used;						/**< Size of the data used in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		data_size_max;						/**< Max size of the data in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		nb_items;							/**< Number of items.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	char    	avl_name[AVL_MAX_NAME+1];			/**< The AVL tree name as a NULL terminated string.
													 */
	time_t		creation_date;			    	    /**< Date of creation of the file.
												     */
} OBIDMS_avl_data_header_t, *OBIDMS_avl_data_header_p;


/**
 * @brief OBIDMS AVL tree data structure.
 */
typedef struct OBIDMS_avl_data {
	OBIDMS_avl_data_header_p	header; 		 	/**< A pointer to the header of the AVL tree data.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
96
	byte_t*                 	data;   		 	/**< A pointer to the beginning of the data.
97
													 */
98 99
	int							data_fd;	        /**< File descriptor of the file containing the data.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
100 101 102 103 104 105 106
} OBIDMS_avl_data_t, *OBIDMS_avl_data_p;


/**
 * @brief OBIDMS AVL tree header structure.
 */
typedef struct OBIDMS_avl_header {
107
	size_t		header_size;						/**< Size of the header in bytes.
108 109 110 111 112 113 114 115 116 117 118 119 120
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	size_t		avl_size;							/**< Size of the AVL tree in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		nb_items;							/**< Number of items in the AVL tree.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		nb_items_max;						/**< Maximum number of items in the AVL tree before it has to be enlarged.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		root_idx;							/**< Index of the root of the AVL tree.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	char    	avl_name[AVL_MAX_NAME+1];			/**< The AVL tree name as a NULL terminated string.
													 */
	time_t		creation_date;			    	    /**< Date of creation of the file.
												     */
121
	bloom_t 	bloom_filter;			    	    /**< Bloom filter associated with the AVL tree, enabling to know if a value
122
													 *   might already be stored in the data referred to by the tree.
123
     	 	 	 	 	 	 	 	 	 	 	 	 */
124 125 126 127 128 129 130
} OBIDMS_avl_header_t, *OBIDMS_avl_header_p;


/**
 * @brief OBIDMS AVL tree structure.
 */
typedef struct OBIDMS_avl {
131 132 133 134 135 136
	OBIDMS_p                	dms;			 			/**< A pointer to the OBIDMS structure to which the AVL tree belongs.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	OBIDMS_avl_header_p			header; 	 				/**< A pointer to the header of the AVL tree.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	struct AVL_node*           	tree;   		 			/**< A pointer to the root of the AVL tree.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
137 138 139
	index_t                     path_idx[AVL_MAX_DEPTH];	/**< The path taken to a node from the root as an array of node indices.
													 	     */
	int8_t                      path_dir[AVL_MAX_DEPTH];	/**< The path taken to a node from the root as an array of directions
140
															 *   (0 for left, -1 for right).
141
															 */
142 143 144 145 146
	OBIDMS_avl_data_p			data;						/**< A pointer to the structure containing the data
													 	 	 *   that the AVL tree references.
													 	 	 */
	int							avl_fd;						/**< The file descriptor of the file containing the AVL tree.
													 	 	 */
147 148 149
} OBIDMS_avl_t, *OBIDMS_avl_p;


150 151 152 153
/**
 * @brief OBIDMS AVL tree group structure.
 */
typedef struct OBIDMS_avl_group {
154 155
	OBIDMS_avl_p 	sub_avls[MAX_NB_OF_AVLS_IN_GROUP];		/**< Array containing the pointers to the AVL trees of the group.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
156
	int 			last_avl_idx;							/**< Index in the sub_avls array of the AVL tree currently being filled.
157
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
158
	char			name[AVL_MAX_NAME+1];				 	/**< Base name of the AVL group. The AVL trees in it have names of the form basename_idx.
159 160 161
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	OBIDMS_p        dms;									/**< Pointer to the OBIDMS structure to which the AVL group belongs.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
162 163
	bool			writable;								/**< Indicates whether the AVL group is read-only or not.
															 */
164 165
	size_t			counter;								/**< Indicates by how many threads/programs (TODO) the AVL group is used.
													 	 	 */
166 167 168
} OBIDMS_avl_group_t, *OBIDMS_avl_group_p;


169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

/**
 * @brief Function building the complete AVL name for an AVL with an associated index (for AVL groups).
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param avl_name The base name of the AVL tree.
 * @param avl_idx The index associated with that AVL.
 *
 * @returns A pointer to the complete name of the AVL.
 * @retval NULL if an error occurred.
 *
 * @since April 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_build_avl_name_with_idx(const char* avl_name, int avl_idx);


/**
 * @brief Function building the full path of an AVL tree file.
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param dms A pointer to the OBIDMS to which the AVL tree belongs.
 * @param avl_name The name of the AVL tree.
 * @param avl_idx The index of the AVL if it's part of an AVL group, or -1 if not.
 *
 * @returns A pointer to the full path of the file where the AVL tree is stored.
 * @retval NULL if an error occurred.
 *
 * @since May 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_get_full_path_of_avl_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx);


/**
 * @brief Function building the file name for an AVL data file.
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param dms A pointer to the OBIDMS to which the AVL tree belongs.
 * @param avl_name The name of the AVL tree.
 * @param avl_idx The index of the AVL if it's part of an AVL group, or -1 if not.
 *
 * @returns A pointer to the full path of the file where the data referred to by the AVL tree is stored.
 * @retval NULL if an error occurred.
 *
 * @since May 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_get_full_path_of_avl_data_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx);


223
/**
224
 * @brief Checks if an AVL tree or AVL tree group already exists or not.
225
 *
226
 * @param dms The OBIDMS to which the AVL tree or AVL tree group belongs.
227
 * @param avl_name The name of the AVL tree or the base name of the AVL tree group.
228
 *
229 230 231
 * @returns A value indicating whether the AVL tree or AVL tree group exists or not.
 * @retval 1 if the AVL tree or AVL tree group exists.
 * @retval 0 if the AVL tree or AVL tree group does not exist.
232 233 234 235 236 237 238 239 240
 * @retval -1 if an error occurred.
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_avl_exists(OBIDMS_p dms, const char* avl_name);


/**
241
 * @brief Creates an AVL tree. Fails if it already exists.
242 243
 *
 * Note: An AVL tree is made of two files (referred to by two structures).
244
 * 		 One file contains the tree referring to the data, and the other
245
 * 		 file contains the data itself. The AVL tree as a whole is referred
246 247 248
 * 		 to via the OBIDMS_avl structure. An AVL tree is stored in a directory
 * 		 with the same name, or with the base name of the AVL group if it is
 * 		 part of an AVL group.
249 250 251
 *
 * @param dms The OBIDMS to which the AVL tree belongs.
 * @param avl_name The name of the AVL tree.
252 253
 * @param avl_idx The index of the AVL tree if it is part of an AVL group,
 *        or -1 if it is not part of an AVL group.
254
 *
255
 * @returns A pointer to the newly created AVL tree structure.
256 257 258 259 260
 * @retval NULL if an error occurred.
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
261
OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
262 263 264


/**
265
 * @brief Opens an AVL tree in read-only mode. Fails if it does not already exist.
266 267
 *
 * Note: An AVL tree is made of two files (referred to by two structures).
268
 * 		 One file contains the tree referring to the data, and the other
269 270 271 272 273
 * 		 file contains the data itself. The AVL tree as a whole is referred
 * 		 to via the OBIDMS_avl structure.
 *
 * @param dms The OBIDMS to which the AVL tree belongs.
 * @param avl_name The name of the AVL tree.
274 275
 * @param avl_idx The index of the AVL tree if it is part of an AVL group,
 *        or -1 if it is not part of an AVL group.
276
 *
277
 * @returns A pointer to the AVL tree structure.
278 279 280 281 282
 * @retval NULL if an error occurred.
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
283
OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
284 285 286


/**
287
 * @brief Opens an AVL tree group and creates it if it does not already exist.
288
 *
289 290
 * Note: An AVL tree group is composed of multiple AVL trees that all have the
 *       same base name, and an index differentiating them.
291 292
 *
 * @param dms The OBIDMS to which the AVL tree belongs.
293
 * @param avl_name The base name of the AVL tree group.
294
 *
295
 * @returns A pointer to the AVL tree group structure.
296 297
 * @retval NULL if an error occurred.
 *
298
 * @since April 2016
299 300
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
301
OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name);
302 303 304


/**
305
 * @brief Creates an AVL tree group.
306
 *
307 308
 * Note: An AVL tree group is composed of multiple AVL trees that all have the
 *       same base name, and an index differentiating them.
309
 *
310 311
 * @param dms The OBIDMS to which the AVL tree belongs.
 * @param avl_name The base name of the AVL tree group.
312
 *
313 314
 * @returns A pointer to the AVL tree group structure.
 * @retval NULL if an error occurred.
315
 *
316
 * @since April 2016
317 318
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
319
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name);
320 321 322


/**
323
 * @brief Opens an AVL tree group in read-only mode.
324
 *
325 326
 * Note: An AVL tree group is composed of multiple AVL trees that all have the
 *       same base name, and an index differentiating them.
327
 *
328 329 330 331 332 333 334 335 336 337 338 339
 * @param dms The OBIDMS to which the AVL tree belongs.
 * @param avl_name The base name of the AVL tree group.
 *
 * @returns A pointer to the AVL tree group structure.
 * @retval NULL if an error occurred.
 *
 * @since April 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
OBIDMS_avl_group_p obi_open_avl_group(OBIDMS_p dms, const char* avl_name);


340 341 342 343 344 345 346 347
/**
 * @brief Clones an AVL.
 *
 * The tree and the data are both cloned into the new AVL.
 *
 * @param avl A pointer on the AVL to clone.
 * @param new_avl A pointer on the new AVL to fill.
 *
348 349 350
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
351 352 353
 * @since May 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
354
int obi_clone_avl(OBIDMS_avl_p avl, OBIDMS_avl_p new_avl);
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373


/**
 * @brief Clones an AVL group.
 *
 * @warning The AVL group that has be cloned is closed by the function.
 *
 * @param avl_group A pointer on the AVL group to clone.
 * @param new_avl_name The name of the new AVL group.
 *
 * @returns A pointer on the new AVL group structure.
 * @retval NULL if an error occurred.
 *
 * @since May 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
OBIDMS_avl_group_p obi_clone_avl_group(OBIDMS_avl_group_p avl_group, const char* new_avl_name);


374 375
/**
 * @brief Closes an AVL tree.
376
 *
377
 * @param avl A pointer to the AVL tree structure to close and free.
378
 * @param writable Whether the AVL is writable or not (and therefore if the files can and should be truncated to the used size).
379 380
 *
 * @retval 0 if the operation was successfully completed.
381 382 383 384 385
 * @retval -1 if an error occurred.
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
386
int obi_close_avl(OBIDMS_avl_p avl, bool writable);
387 388 389


/**
390
 * @brief Closes an AVL tree group.
391
 *
392
 * @param avl_group A pointer to the AVL tree group structure to close and free.
393
 *
394 395
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
396
 *
397
 * @since April 2016
398 399
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
400
int obi_close_avl_group(OBIDMS_avl_group_p avl_group);
401 402 403


/**
404
 * @brief Recovers a value (blob) in an AVL tree.
405
 *
406
 * @warning The blob recovered must be decoded to get the original value.
407
 * @warning The blob recovered is mapped in memory.
408 409 410 411
 *
 * @param avl A pointer to the AVL tree.
 * @param index The index of the value in the data array.
 *
412
 * @returns A pointer to the blob recovered.
413 414 415 416
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
417
Obi_blob_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
418 419 420


/**
421
 * @brief Adds a value (blob) in an AVL tree.
422
 *
423
 * @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
424
 * @warning If the value is already in the AVL tree, an error will be triggered.		// TODO to discuss
425
 *
426
 * @param avl A pointer to the AVL tree.
427
 * @param value The blob to add in the AVL tree.
428
 *
429 430
 * @returns The index of the value newly added in the AVL tree.
 * @retval -1 if an error occurred.
431
 *
432
 * @since December 2015
433 434
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
435
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value);
436 437 438


/**
439
 * @brief Finds a value (blob) in an AVL tree.
440
 *
441
 * @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
442
 *
443
 * @param avl A pointer to the AVL tree.
444
 * @param value The blob to add in the AVL tree.
445
 *
446 447 448 449
 * @returns The data index of the value.
 * @retval -1 if the value is not in the tree.
 *
 * @since December 2015
450 451
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
452
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value);
453 454 455


/**
456
 * @brief Recovers a value (blob) in an AVL tree.
457
 *
458
 * @warning The blob recovered must be decoded to get the original value.
459
 * @warning The blob recovered is mapped in memory.
460
 *
461
 * @param avl_group A pointer to the AVL tree.
462 463 464 465
 * @param index The index of the value in the form of a 64-bit integer
 *              with the 32 left-most bits coding for the index of the tree of
 *              the group in which the value is stored, and the 32 right-most bits
 *              coding for the index at which the value is stored in that AVL tree.
466
 *
467
 * @returns A pointer to the blob recovered.
468
 *
469
 * @since April 2016
470 471
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
472
Obi_blob_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
473 474 475


/**
476
 * @brief Adds a value (blob) in an AVL tree group, checking if it is already in it.
477
 *
478
 * @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
479
 *
480
 * @param avl_group A pointer to the AVL tree group.
481
 * @param value The blob to add in the AVL tree group.
482
 *
483 484 485 486
 * @returns The index of the value newly added in the AVL tree group, in the form of a
 *          64-bit integer with the 32 left-most bits coding for the index of the tree
 *          of the group in which the value is stored, and the 32 right-most bits
 *          coding for the index at which the value is stored in that AVL tree.
487
 * @retval -1 if an error occurred.
488
 *
489
 * @since April 2016
490 491
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
492
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value);
493 494


495 496 497 498 499 500 501 502 503 504 505 506 507
/**
 * @brief Recovers the name of an AVL group.
 *
 * @param avl_group A pointer on the AVL group structure.
 *
 * @returns A pointer on the name of the AVL group.
 *
 * @since April 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
const char* obi_avl_group_get_name(OBIDMS_avl_group_p avl_group);


508 509
#endif /* OBIAVL_H_ */