obiavl.h 18 KB
Newer Older
1 2 3 4 5 6 7 8
/****************************************************************************
 * OBIDMS AVL tree header file	                                            *
 ****************************************************************************/

/**
 * @file obiavl.h
 * @author Celine Mercier
 * @date December 3rd 2015
9
 * @brief Header file for handling AVL trees for storing and retrieving blobs.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 */


#ifndef OBIAVL_H_
#define OBIAVL_H_


#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdbool.h>

#include "obidms.h"
26
#include "obiblob.h"
27
#include "obitypes.h"
28
#include "bloom.h"
29 30
#include "utils.h"
#include "encode.h"
31

32

33
#define MAX_NB_OF_AVLS_IN_GROUP (1000)						/**< The maximum number of AVL trees in a group.	// TODO discuss
34
                              	  	 	 	 	 	 	 	 */
35
#define MAX_NODE_COUNT_PER_AVL (5000000)					/**< The maximum number of nodes in an AVL tree.
36
															 *   Only used to decide when to create a new AVL in a group, and to initialize the bloom filter // TODO discuss. Try 30M?
37 38
                              	  	 	 	 	 	 	 	 */
#define MAX_DATA_SIZE_PER_AVL (1073741824)		            /**< The maximum size of the data referred to by an AVL tree in a group.
39
 	 	 	 	 	 	 	         	 	 	 	 	 	 *   Only used to decide when to create a new AVL in a group.
40 41 42 43
 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 *   Should not be greater than int32_t max (2,147,483,647), as indexes will have to be stored on 32 bits.
 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
#define AVL_MAX_DEPTH (1024)								/**< The maximum depth of an AVL tree. Used to save paths through the tree.
                              	  	 	 	 	 	 	 	 */
44
#define AVL_MAX_NAME (250) 				     				/**< The maximum length of an AVL tree name.
45 46 47 48 49 50 51
                              	  	 	 	 	 	 	 	 */
#define AVL_GROWTH_FACTOR (2)								/**< The growth factor when an AVL tree is enlarged.
                                	 	 	 	 	 	 	 */
#define LEFT_CHILD(node)	(avl->tree)+(node->left_child)  /**< Pointer to the left child of a node in an AVL tree.
															 */
#define RIGHT_CHILD(node)	(avl->tree)+(node->right_child) /**< Pointer to the right child of a node in an AVL tree.
															 */
52

53 54 55 56 57

/**
 * @brief AVL tree node structure.
 */
typedef struct AVL_node {
58 59 60 61 62 63 64 65 66 67
	index_t  left_child;	  /**< Index of left less child node.
	  	 	 	 	 	 	   */
	index_t  right_child; 	  /**< Index of right greater child node.
	 	 	 	 	 	 	   */
	int8_t   balance_factor;  /**< Balance factor of the node.
							   */
	index_t  value;			  /**< Index of the value associated with the node in the data array.
							   */
	uint64_t crc64;			  /**< Cyclic Redundancy Check code on 64 bits associated with the value.
	 	 	 	 	 	 	   */
68 69 70 71 72 73 74
} AVL_node_t, *AVL_node_p;


/**
 * @brief OBIDMS AVL tree data header structure.
 */
typedef struct OBIDMS_avl_data_header {
75
	size_t		header_size;						/**< Size of the header in bytes.
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		data_size_used;						/**< Size of the data used in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		data_size_max;						/**< Max size of the data in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		nb_items;							/**< Number of items.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	char    	avl_name[AVL_MAX_NAME+1];			/**< The AVL tree name as a NULL terminated string.
													 */
	time_t		creation_date;			    	    /**< Date of creation of the file.
												     */
} OBIDMS_avl_data_header_t, *OBIDMS_avl_data_header_p;


/**
 * @brief OBIDMS AVL tree data structure.
 */
typedef struct OBIDMS_avl_data {
	OBIDMS_avl_data_header_p	header; 		 	/**< A pointer to the header of the AVL tree data.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
96
	byte_t*                 	data;   		 	/**< A pointer to the beginning of the data.
97
													 */
98 99
	int							data_fd;	        /**< File descriptor of the file containing the data.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
100 101 102 103 104 105 106
} OBIDMS_avl_data_t, *OBIDMS_avl_data_p;


/**
 * @brief OBIDMS AVL tree header structure.
 */
typedef struct OBIDMS_avl_header {
107
	size_t		header_size;						/**< Size of the header in bytes.
108 109 110 111 112 113 114 115 116 117 118 119 120
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	size_t		avl_size;							/**< Size of the AVL tree in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		nb_items;							/**< Number of items in the AVL tree.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		nb_items_max;						/**< Maximum number of items in the AVL tree before it has to be enlarged.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t		root_idx;							/**< Index of the root of the AVL tree.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	char    	avl_name[AVL_MAX_NAME+1];			/**< The AVL tree name as a NULL terminated string.
													 */
	time_t		creation_date;			    	    /**< Date of creation of the file.
												     */
121
	bloom_t 	bloom_filter;			    	    /**< Bloom filter associated with the AVL tree, enabling to know if a value
122
													 *   might already be stored in the data referred to by the tree.
123
     	 	 	 	 	 	 	 	 	 	 	 	 */
124 125 126 127 128 129 130
} OBIDMS_avl_header_t, *OBIDMS_avl_header_p;


/**
 * @brief OBIDMS AVL tree structure.
 */
typedef struct OBIDMS_avl {
131 132 133 134 135 136
	OBIDMS_p                	dms;			 			/**< A pointer to the OBIDMS structure to which the AVL tree belongs.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	OBIDMS_avl_header_p			header; 	 				/**< A pointer to the header of the AVL tree.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	struct AVL_node*           	tree;   		 			/**< A pointer to the root of the AVL tree.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
137 138 139
	index_t                     path_idx[AVL_MAX_DEPTH];	/**< The path taken to a node from the root as an array of node indices.
													 	     */
	int8_t                      path_dir[AVL_MAX_DEPTH];	/**< The path taken to a node from the root as an array of directions
140
															 *   (0 for left, -1 for right).
141
															 */
142 143 144 145 146
	OBIDMS_avl_data_p			data;						/**< A pointer to the structure containing the data
													 	 	 *   that the AVL tree references.
													 	 	 */
	int							avl_fd;						/**< The file descriptor of the file containing the AVL tree.
													 	 	 */
147 148 149
} OBIDMS_avl_t, *OBIDMS_avl_p;


150 151 152 153
/**
 * @brief OBIDMS AVL tree group structure.
 */
typedef struct OBIDMS_avl_group {
154 155
	OBIDMS_avl_p 	sub_avls[MAX_NB_OF_AVLS_IN_GROUP];		/**< Array containing the pointers to the AVL trees of the group.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
156
	int 			last_avl_idx;							/**< Index in the sub_avls array of the AVL tree currently being filled.
157
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
158
	char			name[AVL_MAX_NAME+1];				 	/**< Base name of the AVL group. The AVL trees in it have names of the form basename_idx.
159 160 161
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	OBIDMS_p        dms;									/**< Pointer to the OBIDMS structure to which the AVL group belongs.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
162 163
	bool			writable;								/**< Indicates whether the AVL group is read-only or not.
															 */
164 165
	DIR*			directory;								/**< A directory entry usable to refer and scan the AVL group directory.
										 	 	 	 	 	 */
166 167
	size_t			counter;								/**< Indicates by how many threads/programs (TODO) the AVL group is used.
													 	 	 */
168 169 170
} OBIDMS_avl_group_t, *OBIDMS_avl_group_p;


171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224

/**
 * @brief Function building the complete AVL name for an AVL with an associated index (for AVL groups).
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param avl_name The base name of the AVL tree.
 * @param avl_idx The index associated with that AVL.
 *
 * @returns A pointer to the complete name of the AVL.
 * @retval NULL if an error occurred.
 *
 * @since April 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_build_avl_name_with_idx(const char* avl_name, int avl_idx);


/**
 * @brief Function building the full path of an AVL tree file.
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param dms A pointer to the OBIDMS to which the AVL tree belongs.
 * @param avl_name The name of the AVL tree.
 * @param avl_idx The index of the AVL if it's part of an AVL group, or -1 if not.
 *
 * @returns A pointer to the full path of the file where the AVL tree is stored.
 * @retval NULL if an error occurred.
 *
 * @since May 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_get_full_path_of_avl_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx);


/**
 * @brief Function building the file name for an AVL data file.
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param dms A pointer to the OBIDMS to which the AVL tree belongs.
 * @param avl_name The name of the AVL tree.
 * @param avl_idx The index of the AVL if it's part of an AVL group, or -1 if not.
 *
 * @returns A pointer to the full path of the file where the data referred to by the AVL tree is stored.
 * @retval NULL if an error occurred.
 *
 * @since May 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_get_full_path_of_avl_data_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx);


225
/**
226
 * @brief Checks if an AVL tree or AVL tree group already exists or not.
227
 *
228
 * @param dms The OBIDMS to which the AVL tree or AVL tree group belongs.
229
 * @param avl_name The name of the AVL tree or the base name of the AVL tree group.
230
 *
231 232 233
 * @returns A value indicating whether the AVL tree or AVL tree group exists or not.
 * @retval 1 if the AVL tree or AVL tree group exists.
 * @retval 0 if the AVL tree or AVL tree group does not exist.
234 235 236 237 238 239 240 241 242
 * @retval -1 if an error occurred.
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_avl_exists(OBIDMS_p dms, const char* avl_name);


/**
243
 * @brief Creates an AVL tree. Fails if it already exists.
244 245
 *
 * Note: An AVL tree is made of two files (referred to by two structures).
246
 * 		 One file contains the tree referring to the data, and the other
247
 * 		 file contains the data itself. The AVL tree as a whole is referred
248 249 250
 * 		 to via the OBIDMS_avl structure. An AVL tree is stored in a directory
 * 		 with the same name, or with the base name of the AVL group if it is
 * 		 part of an AVL group.
251 252 253
 *
 * @param dms The OBIDMS to which the AVL tree belongs.
 * @param avl_name The name of the AVL tree.
254 255
 * @param avl_idx The index of the AVL tree if it is part of an AVL group,
 *        or -1 if it is not part of an AVL group.
256
 *
257
 * @returns A pointer to the newly created AVL tree structure.
258 259 260 261 262
 * @retval NULL if an error occurred.
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
263
OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
264 265 266


/**
267
 * @brief Opens an AVL tree in read-only mode. Fails if it does not already exist.
268 269
 *
 * Note: An AVL tree is made of two files (referred to by two structures).
270
 * 		 One file contains the tree referring to the data, and the other
271 272 273 274 275
 * 		 file contains the data itself. The AVL tree as a whole is referred
 * 		 to via the OBIDMS_avl structure.
 *
 * @param dms The OBIDMS to which the AVL tree belongs.
 * @param avl_name The name of the AVL tree.
276 277
 * @param avl_idx The index of the AVL tree if it is part of an AVL group,
 *        or -1 if it is not part of an AVL group.
278
 *
279
 * @returns A pointer to the AVL tree structure.
280 281 282 283 284
 * @retval NULL if an error occurred.
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
285
OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
286 287 288


/**
289
 * @brief Opens an AVL tree group and creates it if it does not already exist.
290
 *
291 292
 * Note: An AVL tree group is composed of multiple AVL trees that all have the
 *       same base name, and an index differentiating them.
293 294
 *
 * @param dms The OBIDMS to which the AVL tree belongs.
295
 * @param avl_name The base name of the AVL tree group.
296
 *
297
 * @returns A pointer to the AVL tree group structure.
298 299
 * @retval NULL if an error occurred.
 *
300
 * @since April 2016
301 302
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
303
OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name);
304 305 306


/**
307
 * @brief Creates an AVL tree group.
308
 *
309 310
 * Note: An AVL tree group is composed of multiple AVL trees that all have the
 *       same base name, and an index differentiating them.
311
 *
312 313
 * @param dms The OBIDMS to which the AVL tree belongs.
 * @param avl_name The base name of the AVL tree group.
314
 *
315 316
 * @returns A pointer to the AVL tree group structure.
 * @retval NULL if an error occurred.
317
 *
318
 * @since April 2016
319 320
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
321
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name);
322 323 324


/**
325
 * @brief Opens an AVL tree group in read-only mode.
326
 *
327 328
 * Note: An AVL tree group is composed of multiple AVL trees that all have the
 *       same base name, and an index differentiating them.
329
 *
330 331 332 333 334 335 336 337 338 339 340 341
 * @param dms The OBIDMS to which the AVL tree belongs.
 * @param avl_name The base name of the AVL tree group.
 *
 * @returns A pointer to the AVL tree group structure.
 * @retval NULL if an error occurred.
 *
 * @since April 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
OBIDMS_avl_group_p obi_open_avl_group(OBIDMS_p dms, const char* avl_name);


342 343 344 345 346 347 348 349
/**
 * @brief Clones an AVL.
 *
 * The tree and the data are both cloned into the new AVL.
 *
 * @param avl A pointer on the AVL to clone.
 * @param new_avl A pointer on the new AVL to fill.
 *
350 351 352
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
353 354 355
 * @since May 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
356
int obi_clone_avl(OBIDMS_avl_p avl, OBIDMS_avl_p new_avl);
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375


/**
 * @brief Clones an AVL group.
 *
 * @warning The AVL group that has be cloned is closed by the function.
 *
 * @param avl_group A pointer on the AVL group to clone.
 * @param new_avl_name The name of the new AVL group.
 *
 * @returns A pointer on the new AVL group structure.
 * @retval NULL if an error occurred.
 *
 * @since May 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
OBIDMS_avl_group_p obi_clone_avl_group(OBIDMS_avl_group_p avl_group, const char* new_avl_name);


376 377
/**
 * @brief Closes an AVL tree.
378
 *
379
 * @param avl A pointer to the AVL tree structure to close and free.
380
 * @param writable Whether the AVL is writable or not (and therefore if the files can and should be truncated to the used size).
381 382
 *
 * @retval 0 if the operation was successfully completed.
383 384 385 386 387
 * @retval -1 if an error occurred.
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
388
int obi_close_avl(OBIDMS_avl_p avl, bool writable);
389 390 391


/**
392
 * @brief Closes an AVL tree group.
393
 *
394
 * @param avl_group A pointer to the AVL tree group structure to close and free.
395
 *
396 397
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
398
 *
399
 * @since April 2016
400 401
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
402
int obi_close_avl_group(OBIDMS_avl_group_p avl_group);
403 404 405


/**
406
 * @brief Recovers a value (blob) in an AVL tree.
407
 *
408
 * @warning The blob recovered must be decoded to get the original value.
409
 * @warning The blob recovered is mapped in memory.
410 411 412 413
 *
 * @param avl A pointer to the AVL tree.
 * @param index The index of the value in the data array.
 *
414
 * @returns A pointer to the blob recovered.
415 416 417 418
 *
 * @since December 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
419
Obi_blob_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
420 421 422


/**
423
 * @brief Adds a value (blob) in an AVL tree.
424
 *
425
 * @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
426
 * @warning If the value is already in the AVL tree, an error will be triggered.		// TODO to discuss
427
 *
428
 * @param avl A pointer to the AVL tree.
429
 * @param value The blob to add in the AVL tree.
430
 *
431 432
 * @returns The index of the value newly added in the AVL tree.
 * @retval -1 if an error occurred.
433
 *
434
 * @since December 2015
435 436
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
437
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value);
438 439 440


/**
441
 * @brief Finds a value (blob) in an AVL tree.
442
 *
443
 * @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
444
 *
445
 * @param avl A pointer to the AVL tree.
446
 * @param value The blob to add in the AVL tree.
447
 *
448 449 450 451
 * @returns The data index of the value.
 * @retval -1 if the value is not in the tree.
 *
 * @since December 2015
452 453
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
454
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value);
455 456 457


/**
458
 * @brief Recovers a value (blob) in an AVL tree.
459
 *
460
 * @warning The blob recovered must be decoded to get the original value.
461
 * @warning The blob recovered is mapped in memory.
462
 *
463
 * @param avl_group A pointer to the AVL tree.
464 465 466 467
 * @param index The index of the value in the form of a 64-bit integer
 *              with the 32 left-most bits coding for the index of the tree of
 *              the group in which the value is stored, and the 32 right-most bits
 *              coding for the index at which the value is stored in that AVL tree.
468
 *
469
 * @returns A pointer to the blob recovered.
470
 *
471
 * @since April 2016
472 473
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
474
Obi_blob_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
475 476 477


/**
478
 * @brief Adds a value (blob) in an AVL tree group, checking if it is already in it.
479
 *
480
 * @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
481
 *
482
 * @param avl_group A pointer to the AVL tree group.
483
 * @param value The blob to add in the AVL tree group.
484
 *
485 486 487 488
 * @returns The index of the value newly added in the AVL tree group, in the form of a
 *          64-bit integer with the 32 left-most bits coding for the index of the tree
 *          of the group in which the value is stored, and the 32 right-most bits
 *          coding for the index at which the value is stored in that AVL tree.
489
 * @retval -1 if an error occurred.
490
 *
491
 * @since April 2016
492 493
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
494
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value);
495 496


497 498 499 500 501 502 503 504 505 506 507 508 509
/**
 * @brief Recovers the name of an AVL group.
 *
 * @param avl_group A pointer on the AVL group structure.
 *
 * @returns A pointer on the name of the AVL group.
 *
 * @since April 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
const char* obi_avl_group_get_name(OBIDMS_avl_group_p avl_group);


510 511
#endif /* OBIAVL_H_ */