obidmscolumn.h 21.1 KB
Newer Older
Celine Mercier committed
1
/****************************************************************************
2
 * OBIDMS columns header file                                               *
Celine Mercier committed
3 4 5 6
 ****************************************************************************/

/**
 * @file obidmscolumn.h
7
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
Celine Mercier committed
8
 * @date 12 May 2015
9
 * @brief Header file for the functions and structures shared by all the OBIDMS columns.
Celine Mercier committed
10 11
 */

12

Celine Mercier committed
13 14 15 16 17 18 19
#ifndef OBIDMSCOLUMN_H_
#define OBIDMSCOLUMN_H_

#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdbool.h>
Eric Coissac committed
20
#include <time.h>
21
#include <stdbool.h>
Celine Mercier committed
22

23 24 25 26
#include "obidms.h"
#include "obitypes.h"
#include "obierrno.h"
#include "obilittlebigman.h"
27
#include "obidmscolumndir.h"
Celine Mercier committed
28
#include "obiblob_indexer.h"
Celine Mercier committed
29

30

31 32
// TODO delete useless ones, add default nb?
#define ELEMENTS_NAMES_MAX (1000000)     	  	/**< The maximum length of the list of elements names.	// TODO Discuss
33
                                	       	   	 */
34
#define NB_ELTS_MAX_IF_DEFAULT_NAME (1000000) 	/**< The maximum number of elements per line if the default element names
35
										   	   	 *   are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX.  // TODO not up to date
36 37 38
										   	   	 */
#define COLUMN_GROWTH_FACTOR (2)	 	  		/**< The growth factor when a column is enlarged.
                                	   	   	   	 */
39
#define MAXIMUM_LINE_COUNT (1000000000)   		/**< The maximum line count for the data of a column (1E9). //TODO
40
                                	       	   	 */
41
#define COMMENTS_MAX_LENGTH (4096)        		/**< The maximum length for comments.
42
 	 	 	 	 	 	 	 	 	       	   	 */
43
#define FORMATTED_ELT_NAMES_SEPARATOR '\0'		/**< The separator between elements names once formatted to be stored in columns.
44
 	 	 	 	 	 	 	 	 	       	   	 */
45
#define NOT_FORMATTED_ELT_NAMES_SEPARATOR ';'   /**< The separator between elements names before being formatted to be stored in columns (e.g. as sent by the upper layer).
46
 	 	 	 	 	 	 	 	 	 	 	 	 */
47

48

49 50 51 52 53 54 55 56 57 58 59
/**
 * @brief Structure referencing a column by its name and its version.
 */
typedef struct Column_reference {
	char 	   	 column_name[OBIDMS_COLUMN_MAX_NAME+1];    /**< Name of the column.
	 	 	 	 	 	 	 	 	 	 	 	 	 	    */
	obiversion_t version;				   		    	   /**< Version of the column.
	 	 	 	 	 	 	 	 	 	 	 	 	 	    */
} Column_reference_t, *Column_reference_p;


Celine Mercier committed
60
/**
61
 * @brief OBIDMS column header structure.
Celine Mercier committed
62
 */
63
typedef struct OBIDMS_column_header {
64 65 66 67 68 69
	size_t				header_size;		   				    			/**< Size of the header in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	size_t				data_size;			   				    			/**< Size of the data in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t				line_count;							    			/**< Number of lines of data allocated.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
70
	index_t				lines_used;							    			/**< Number of lines of data used (the highest index where data has been entered + 1).
71 72 73 74 75 76 77 78 79
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t				nb_elements_per_line;   			   				/**< Number of elements per line.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	OBIType_t			returned_data_type;		    						/**< Type of the data that is returned when getting an
															 	 	 	 	 *   element from the column.
															 	 	 	 	 */
	OBIType_t			stored_data_type;		    						/**< Type of the data that is actually stored in the data
															 	 	 	 	 *   part of the column.
															 	 	 	 	 */
80 81
	bool				tuples;												/**< A boolean indicating whether the column contains indices referring to indexed tuples.
																			 */
82 83 84
	bool				to_eval;											/**< A boolean indicating whether the column contains expressions that should be evaluated
																			 *   (typically OBI_STR columns containing character strings to be evaluated by Python).
																			 */
85 86 87 88 89 90 91 92 93 94 95 96 97 98
	time_t				creation_date;			    						/**< Date of creation of the file.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	obiversion_t		version;				   							/**< Version of the column.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	obiversion_t		cloned_from;			    						/**< Version of the column from which this column
															 	 	 	 	 *   was cloned from (-1 if it was not created by cloning
															 	 	 	 	 *   another column).
															 	 	 	 	 */
	char            	name[OBIDMS_COLUMN_MAX_NAME+1]; 	    			/**< The column name as a NULL terminated string.
	                                             	 	 	 	 	 	 	 */
	char            	indexer_name[INDEXER_MAX_NAME+1]; 					/**< If there is one, the indexer name as a NULL terminated string.
	                                             	 	 	 	 	 	 	 */
	Column_reference_t 	associated_column;									/**< If there is one, the reference to the associated column.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
99
	bool                finished;											/**< A boolean indicating whether the column was properly closed by the view that created it.
100
																			 */
101 102
	char 				comments[COMMENTS_MAX_LENGTH+1];					/**< Comments stored as a classical zero end C string.
												 	 	 	 	 	 	 	 */
103 104 105 106 107 108 109 110 111 112 113 114
	int64_t				elements_names_length;								/**< Length of the character array where the elements names are stored.
																			 */
	char*				elements_names;										/**< Pointer in mem_arnea on the names of the line elements with '\0' as separator
																 	 	 	 *   and '\0\0' as terminal flag.
																 	 	 	 *	 (default are the indices: "0\01\02\0...\0n\0\0").
																 	 	 	 */
	int64_t* 		    elements_names_idx;									/**< Pointer in mem_arnea on the index for the start of each element name in elements_names.
																			 */
	int64_t*			sorted_elements_idx;								/**< Index for the sorted element names in elements_names_idx.
																			 */
	byte_t              mem_arena[];										/**< Memory array where the elements names, the elements names index and the sorted elements index are stored.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
115
} OBIDMS_column_header_t, *OBIDMS_column_header_p;
Celine Mercier committed
116

117

Eric Coissac committed
118
/**
119
 * @brief OBIDMS column structure.
Eric Coissac committed
120 121
 *
 * A data structure of this type is returned by the functions
122
 * creating, opening or cloning an OBIDMS column.
Eric Coissac committed
123
 */
124
typedef struct OBIDMS_column {
Eric Coissac committed
125 126
	OBIDMS_p                	dms;			 	/**< A pointer to the OBIDMS structure to
	                                                 *   which the column belongs.
127
	 	 	 	 	 	 	 	 	 	 	 	 	 */
Eric Coissac committed
128 129
	OBIDMS_column_directory_p   column_directory;	/**< A pointer to the OBIDMS column directory
	                                                 *   structure to which the column belongs.
130 131 132
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	OBIDMS_column_header_p		header; 		 	/**< A pointer to the header of the column.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
Eric Coissac committed
133 134
	Obi_indexer_p    			indexer;		    /**< A pointer to the blob indexer associated
	                                                 *   with the column if there is one.
135
	 	 	 	 	 	 	 	 	 	 	 	 	 */
136
	void*                   	data;   		 	/**< A `void` pointer to the beginning of the data.
137
	                                 	 	         *
Eric Coissac committed
138 139 140
													 *   @warning Never use this member directly outside
													 *            of the code of the low level functions
													 *            of the OBIDMS.
141
													 */
Celine Mercier committed
142
	bool						writable;	     	/**< Indicates if the column is writable or not.
143 144 145 146 147 148
													 *       - `true` the column is writable
													 *       - `false` the column is read-only
													 *
													 * A column is writable only by its creator
													 * until it closes it.
													 */
Eric Coissac committed
149 150
	size_t						counter;			/**< Indicates by how many threads/programs
	                                                 *   (TODO) the column is used.
151
													 */
152 153
} OBIDMS_column_t, *OBIDMS_column_p;

Eric Coissac committed
154

155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190

/**
 * @brief Function building the full path to the version file of a column in an OBIDMS.
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param dms A pointer on the OBIDMS.
 * @param column_name The name of the OBIDMS column file.
 *
 * @returns A pointer to the version file name.
 * @retval NULL if an error occurred.
 *
 * @since October 2017
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_version_file_full_path(OBIDMS_p dms, const char* column_name);


/**
 * @brief Function building the full path to the version file of a column in an OBIDMS.
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param dms A pointer on the OBIDMS.
 * @param column_name The name of the OBIDMS column file.
 * @param version_number The version number of the OBIDMS column file.
 *
 * @returns A pointer to the version file name.
 * @retval NULL if an error occurred.
 *
 * @since October 2017
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_column_full_path(OBIDMS_p dms, const char* column_name, obiversion_t version_number);


191
/**
192 193 194
 * @brief Returns the latest version number of a column in a column directory using the column directory structure.
 *
 * @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory().
195
 *
196 197
 * @returns The latest version number kept in the version file.
 * @retval -1 if an error occurred.
198
 *
199 200
 * @since May 2015
 * @author Eric Coissac (eric.coissac@metabarcoding.org)
201 202 203 204 205
 */
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory);


/**
206 207 208 209
 * @brief Returns the latest version of a column in a column directory using the column name.
 *
 * @param dms A pointer on an OBIDMS.
 * @param column_name The column name.
210
 *
211 212
 * @returns The latest version number kept in the version file.
 * @retval -1 if an error occurred.
213
 *
214 215
 * @since May 2015
 * @author Eric Coissac (eric.coissac@metabarcoding.org)
216 217 218 219
 */
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name);


Eric Coissac committed
220
/**
221
 * @brief Returns the header size in bytes of a column.
Eric Coissac committed
222
 *
223 224 225 226
 * The header size is rounded to a multiple of the memory page size.
 *
 * @param nb_elements_per_line The number of elements per line.
 * @param elts_names_length The length of elements_names including the two terminal '\0's.
Eric Coissac committed
227
 *
228
 * @returns The header size in bytes.
Eric Coissac committed
229 230
 *
 * @since May 2015
231
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
Eric Coissac committed
232
 */
233
size_t obi_calculate_header_size(index_t nb_elements_per_line, int64_t elts_names_length);
Eric Coissac committed
234 235 236


/**
237
 * @brief Creates a column.
Eric Coissac committed
238
 *
239
 * The minimum data size allocated is one memory page, and the data is initialized to the NA value of the OBIType.
Celine Mercier committed
240
 * If there is an indexer associated with the column, it is opened or created if it does not already exist.
241 242 243 244 245 246
 *
 * @warning If there is one element per line, elements_names should be equal to column_name.	// TODO change this condition?
 *
 * @param dms A pointer on an OBIDMS.
 * @param column_name The name of the new column.
 * @param data_type The OBIType code of the data.
247 248
 * @param nb_lines The number of lines to be stored (can be 0 if not known).
 * @param nb_elements_per_line The number of elements per line.
249
 * @param elements_names The names of the elements with ';' as separator (no terminal ';'),
250
 *                       NULL or "" if the default names are to be used ("0\01\02\0...\0n").
251
 * @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()).
252
 * @param tuples A boolean indicating whether the column should contain indices referring to indexed tuples.
253 254
 * @param to_eval A boolean indicating whether the column contains expressions that should be evaluated
 *                (typically OBI_STR columns containing character strings to be evaluated by Python).
Celine Mercier committed
255
 * @param indexer_name The name of the indexer if there is one associated with the column.
256
 *                     If NULL or "", the indexer name is set as the column name.
257 258
 * @param associated_column_name The name of the associated column if there is one (otherwise NULL or "").
 * @param associated_column_version The version of the associated column if there is one (otherwise -1).
259
 * @param comments Optional comments associated with the column in JSON format (NULL, "" or "{}" if no comments associated).
260 261 262
 *
 * @returns A pointer on the newly created column structure.
 * @retval NULL if an error occurred.
Eric Coissac committed
263 264
 *
 * @since May 2015
265
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
Eric Coissac committed
266
 */
267 268 269 270 271 272
OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
		                          const char*  column_name,
								  OBIType_t    data_type,
								  index_t      nb_lines,
								  index_t      nb_elements_per_line,
								  char*        elements_names,
273
								  bool		   elt_names_formatted,
274
								  bool         tuples,
275
								  bool         to_eval,
276
								  const char*  indexer_name,
277 278
								  const char*  associated_column_name,
								  obiversion_t associated_column_version,
279
								  const char*  comments
Celine Mercier committed
280
								 );
Eric Coissac committed
281 282 283


/**
284
 * @brief Opens a column in read-only mode.
Eric Coissac committed
285
 *
286 287 288
 * @param dms A pointer on an OBIDMS.
 * @param column_name The name of the column.
 * @param version_number The version of the column that should be opened (if -1, the latest version is retrieved).
289
 *
290 291
 * @returns A pointer on the opened column structure.
 * @retval NULL if an error occurred.
Eric Coissac committed
292
 *
293 294
 * @since July 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
Eric Coissac committed
295
 */
296 297 298
OBIDMS_column_p obi_open_column(OBIDMS_p dms, const char* column_name, obiversion_t version_number);


299 300 301
/**
 * @brief Clones a column, and returns a pointer to the writable new column.
 *
302 303 304 305
 * @param dms A pointer on an OBIDMS.
 * @param column_name The name of the column to clone.
 * @param version_number The version of the column that should be cloned (if -1, the latest version is retrieved).
 * @param clone_data Whether the data should be copied or not.
306
 *
307 308
 * @returns A pointer to the created column.
 * @retval NULL if an error occurred.
309 310 311 312
 *
 * @since August 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
Celine Mercier committed
313
OBIDMS_column_p obi_clone_column(OBIDMS_p dms, OBIDMS_column_p line_selection, const char* column_name, obiversion_t version_number, bool clone_data);
314 315


Celine Mercier committed
316 317 318 319 320 321 322 323 324 325 326 327 328 329
/**
 * @brief Clones a column indexer to have it writable.
 *
 * @param column A pointer on an OBIDMS column.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since November 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_clone_column_indexer(OBIDMS_column_p column);


330
/**
331
 * @brief Truncates a column to the number of lines used if it is not read-only and closes it.
332
 *
333 334
 * @warning This function does not flag the column as finished, only finish_view() in the obiview source file does that.
 *
335
 * @param column A pointer on an OBIDMS column.
336
 *
337 338
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
339 340 341 342 343 344 345
 *
 * @since July 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_close_column(OBIDMS_column_p column);


346
/**
347 348
 * @brief Truncates a column file to the number of lines used rounded to the nearest
 * 		  greater multiple of the page size.
349
 *
350
 * @param column A pointer on an OBIDMS column.
351
 *
352 353
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
354 355 356 357
 *
 * @since August 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
358
int obi_truncate_column(OBIDMS_column_p column);
359 360


361 362 363
/**
 * @brief Enlarges a column file.
 *
364
 * @param column A pointer on an OBIDMS column.
365
 *
366 367
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
368 369 370 371 372 373 374
 *
 * @since August 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_enlarge_column(OBIDMS_column_p column);


375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
/**
 * @brief Writes comments to a column file.
 *
 * @warning This overwrites any other previous comments.
 *
 * @param column A pointer on an OBIDMS column.
 * @param comments A character string containing the comments.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since August 2018
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_column_write_comments(OBIDMS_column_p column, const char* comments);


/**
 * @brief Adds comments to a column file.
 *
 * This reads the comments in the JSON format and adds the key value pair.
 * If the key already exists, the value format is turned to array and the new value is appended
 * if it is not already in the array.
 *
 * @param column A pointer on an OBIDMS column.
 * @param key The key.
 * @param value The value associated with the key.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since August 2018
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_column_add_comment(OBIDMS_column_p column, const char* key, const char* value);


412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
/*
 * @brief Sets the data in a column to the specified value.
 *
 * @warning The specified value should be the atomic value effectively stored in the column (i.e. it can not be a character string for example).
 *
 * @param column A pointer on an OBIDMS column.
 * @param start The first line number of the block that should be set.
 * @param nb_lines The number of lines that should be set.
 * @param value_p A pointer on the value to which the column should be set.
 *
 * @since May 2018
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
void obi_set_column_to_value(OBIDMS_column_p column,
						  	 index_t first_line_nb,
							 index_t nb_lines,
							 void* value_p);


431
/*
432
 * @brief Sets the data in a column to the NA value of the data OBIType.
433
 *
434 435 436
 * @param column A pointer on an OBIDMS column.
 * @param start The first line number of the block that should be set.
 * @param nb_lines The number of lines that should be set.
437 438 439 440
 *
 * @since August 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
441
void obi_ini_to_NA_values(OBIDMS_column_p column, index_t first_line_nb, index_t nb_lines);	// TODO make private?
442 443


444
/**
445 446 447
 * @brief Recovers the header of an OBIDMS column from the column name.
 *
 * @warning The header structure has to be munmapped by the caller.
448
 *
449 450
 * @param dms A pointer on an OBIDMS.
 * @param column_name The name of an OBIDMS column.
451 452
 * @param version_number The version of the column from which the header should be
 *        retrieved (-1: latest version).
453
 *
454 455
 * @returns A pointer on the mmapped header of the column.
 * @retval NULL if an error occurred.
456
 *
457
 * @since October 2015
458 459
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
460
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number);
461 462


463
/**
464
 * @brief Munmap a mmapped header as returned by obi_column_get_header_from_name().
465
 *
466
 * @param header A pointer on the mmapped header structure.
467
 *
468 469
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
470
 *
471
 * @since October 2015
472 473
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
474
int obi_close_header(OBIDMS_column_header_p header);
475 476


477
/**
478
 * @brief Recovers the index of an element in an OBIDMS column from the element's name.
479
 *
480 481
 * @param column A pointer on an OBIDMS column.
 * @param element_name The name of the element.
482
 *
483
 * @returns The index of the element in a line of the column.
484
 * @retval OBIIdx_NA if an error occurred.
485 486 487 488
 *
 * @since July 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
489
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
490 491


492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
/**
 * @brief Recovers the elements names of the lines of a column, with ';' as separator (i.e. "0;1;2;...;n\0").
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param column A pointer on an OBIDMS column.
 *
 * @returns A pointer on a character array where the elements names are stored.
 * @retval NULL if an error occurred.
 *
 * @since January 2017
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_get_elements_names(OBIDMS_column_p column);


508 509 510 511 512 513 514 515 516 517 518 519
/**
 * @brief Prepares a column to set a value.
 *
 * @param column A pointer on an OBIDMS column.
 * @param line_nb The number of the line at which the value will be set.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since April 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
520
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb, index_t elt_idx);
521 522


523 524
/**
 * @brief Prepares a column to recover a value.
525
 *
526 527
 * @param column A pointer on an OBIDMS column.
 * @param line_nb The number of the line at which the value will be recovered.
528
 *
529 530
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
531
 *
532
 * @since April 2016
533 534
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
535
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb);
536 537


538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
/**
 * @brief Goes through all the column files of a DMS and deletes columns that have
 *        not been flagged as finished (done by the finish_view() function in the
 *        obiview source file).
 *
 * @param dms A pointer on an OBIDMS.
 *
 * @returns A value indicating the success of the operation.
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since October 2017
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_clean_unfinished_columns(OBIDMS_p dms);


Celine Mercier committed
555
#endif /* OBIDMSCOLUMN_H_ */