/****************************************************************************
 * OBIDMS columns header file                                               *
 ****************************************************************************/

/**
 * @file obidmscolumn.h
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 * @date 12 May 2015
 * @brief Header file for the functions and structures shared by all the OBIDMS columns.
 */


#ifndef OBIDMSCOLUMN_H_
#define OBIDMSCOLUMN_H_

#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdbool.h>
#include <time.h>
#include <stdbool.h>

#include "obidms.h"
#include "obitypes.h"
#include "obierrno.h"
#include "obilittlebigman.h"
#include "obidmscolumndir.h"
#include "obiblob_indexer.h"


// TODO delete useless ones, add default nb?
#define ELEMENTS_NAMES_MAX (1000000)     	  	/**< The maximum length of the list of elements names.	// TODO Discuss
                                	       	   	 */
#define NB_ELTS_MAX_IF_DEFAULT_NAME (1000000) 	/**< The maximum number of elements per line if the default element names
										   	   	 *   are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX.  // TODO not up to date
										   	   	 */
#define COLUMN_GROWTH_FACTOR (2)	 	  		/**< The growth factor when a column is enlarged.
                                	   	   	   	 */
#define MAXIMUM_LINE_COUNT (1000000000)   		/**< The maximum line count for the data of a column (1E9). //TODO
                                	       	   	 */
#define COMMENTS_MAX_LENGTH (4096)        		/**< The maximum length for comments.
 	 	 	 	 	 	 	 	 	       	   	 */
#define FORMATTED_ELT_NAMES_SEPARATOR '\0'		/**< The separator between elements names once formatted to be stored in columns.
 	 	 	 	 	 	 	 	 	       	   	 */
#define NOT_FORMATTED_ELT_NAMES_SEPARATOR ';'   /**< The separator between elements names before being formatted to be stored in columns (e.g. as sent by the upper layer).
 	 	 	 	 	 	 	 	 	 	 	 	 */


/**
 * @brief Structure referencing a column by its name and its version.
 */
typedef struct Column_reference {
	char 	   	 column_name[OBIDMS_COLUMN_MAX_NAME+1];    /**< Name of the column.
	 	 	 	 	 	 	 	 	 	 	 	 	 	    */
	obiversion_t version;				   		    	   /**< Version of the column.
	 	 	 	 	 	 	 	 	 	 	 	 	 	    */
} Column_reference_t, *Column_reference_p;


/**
 * @brief OBIDMS column header structure.
 */
typedef struct OBIDMS_column_header {
	size_t				header_size;		   				    			/**< Size of the header in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	size_t				data_size;			   				    			/**< Size of the data in bytes.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t				line_count;							    			/**< Number of lines of data allocated.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t				lines_used;							    			/**< Number of lines of data used (the highest index where data has been entered + 1).
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	index_t				nb_elements_per_line;   			   				/**< Number of elements per line.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	OBIType_t			returned_data_type;		    						/**< Type of the data that is returned when getting an
															 	 	 	 	 *   element from the column.
															 	 	 	 	 */
	OBIType_t			stored_data_type;		    						/**< Type of the data that is actually stored in the data
															 	 	 	 	 *   part of the column.
															 	 	 	 	 */
	bool				tuples;												/**< A boolean indicating whether the column contains indices referring to indexed tuples.
																			 */
	bool				to_eval;											/**< A boolean indicating whether the column contains expressions that should be evaluated
																			 *   (typically OBI_STR columns containing character strings to be evaluated by Python).
																			 */
	time_t				creation_date;			    						/**< Date of creation of the file.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	obiversion_t		version;				   							/**< Version of the column.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	obiversion_t		cloned_from;			    						/**< Version of the column from which this column
															 	 	 	 	 *   was cloned from (-1 if it was not created by cloning
															 	 	 	 	 *   another column).
															 	 	 	 	 */
	char            	name[OBIDMS_COLUMN_MAX_NAME+1]; 	    			/**< The column name as a NULL terminated string.
	                                             	 	 	 	 	 	 	 */
	char            	indexer_name[INDEXER_MAX_NAME+1]; 					/**< If there is one, the indexer name as a NULL terminated string.
	                                             	 	 	 	 	 	 	 */
	Column_reference_t 	associated_column;									/**< If there is one, the reference to the associated column.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
	bool                finished;											/**< A boolean indicating whether the column was properly closed by the view that created it.
																			 */
	char 				comments[COMMENTS_MAX_LENGTH+1];					/**< Comments stored as a classical zero end C string.
												 	 	 	 	 	 	 	 */
	int64_t				elements_names_length;								/**< Length of the character array where the elements names are stored.
																			 */
	char*				elements_names;										/**< Pointer in mem_arnea on the names of the line elements with '\0' as separator
																 	 	 	 *   and '\0\0' as terminal flag.
																 	 	 	 *	 (default are the indices: "0\01\02\0...\0n\0\0").
																 	 	 	 */
	int64_t* 		    elements_names_idx;									/**< Pointer in mem_arnea on the index for the start of each element name in elements_names.
																			 */
	int64_t*			sorted_elements_idx;								/**< Index for the sorted element names in elements_names_idx.
																			 */
	byte_t              mem_arena[];										/**< Memory array where the elements names, the elements names index and the sorted elements index are stored.
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 */
} OBIDMS_column_header_t, *OBIDMS_column_header_p;


/**
 * @brief OBIDMS column structure.
 *
 * A data structure of this type is returned by the functions
 * creating, opening or cloning an OBIDMS column.
 */
typedef struct OBIDMS_column {
	OBIDMS_p                	dms;			 	/**< A pointer to the OBIDMS structure to
	                                                 *   which the column belongs.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	OBIDMS_column_directory_p   column_directory;	/**< A pointer to the OBIDMS column directory
	                                                 *   structure to which the column belongs.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	OBIDMS_column_header_p		header; 		 	/**< A pointer to the header of the column.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	Obi_indexer_p    			indexer;		    /**< A pointer to the blob indexer associated
	                                                 *   with the column if there is one.
	 	 	 	 	 	 	 	 	 	 	 	 	 */
	void*                   	data;   		 	/**< A `void` pointer to the beginning of the data.
	                                 	 	         *
													 *   @warning Never use this member directly outside
													 *            of the code of the low level functions
													 *            of the OBIDMS.
													 */
	bool						writable;	     	/**< Indicates if the column is writable or not.
													 *       - `true` the column is writable
													 *       - `false` the column is read-only
													 *
													 * A column is writable only by its creator
													 * until it closes it.
													 */
	size_t						counter;			/**< Indicates by how many threads/programs
	                                                 *   (TODO) the column is used.
													 */
} OBIDMS_column_t, *OBIDMS_column_p;



/**
 * @brief Function building the full path to the version file of a column in an OBIDMS.
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param dms A pointer on the OBIDMS.
 * @param column_name The name of the OBIDMS column file.
 *
 * @returns A pointer to the version file name.
 * @retval NULL if an error occurred.
 *
 * @since October 2017
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_version_file_full_path(OBIDMS_p dms, const char* column_name);


/**
 * @brief Function building the full path to the version file of a column in an OBIDMS.
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param dms A pointer on the OBIDMS.
 * @param column_name The name of the OBIDMS column file.
 * @param version_number The version number of the OBIDMS column file.
 *
 * @returns A pointer to the version file name.
 * @retval NULL if an error occurred.
 *
 * @since October 2017
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_column_full_path(OBIDMS_p dms, const char* column_name, obiversion_t version_number);


/**
 * @brief Returns the latest version number of a column in a column directory using the column directory structure.
 *
 * @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory().
 *
 * @returns The latest version number kept in the version file.
 * @retval -1 if an error occurred.
 *
 * @since May 2015
 * @author Eric Coissac (eric.coissac@metabarcoding.org)
 */
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory);


/**
 * @brief Returns the latest version of a column in a column directory using the column name.
 *
 * @param dms A pointer on an OBIDMS.
 * @param column_name The column name.
 *
 * @returns The latest version number kept in the version file.
 * @retval -1 if an error occurred.
 *
 * @since May 2015
 * @author Eric Coissac (eric.coissac@metabarcoding.org)
 */
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name);


/**
 * @brief Returns the header size in bytes of a column.
 *
 * The header size is rounded to a multiple of the memory page size.
 *
 * @param nb_elements_per_line The number of elements per line.
 * @param elts_names_length The length of elements_names including the two terminal '\0's.
 *
 * @returns The header size in bytes.
 *
 * @since May 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
size_t obi_calculate_header_size(index_t nb_elements_per_line, int64_t elts_names_length);


/**
 * @brief Creates a column.
 *
 * The minimum data size allocated is one memory page, and the data is initialized to the NA value of the OBIType.
 * If there is an indexer associated with the column, it is opened or created if it does not already exist.
 *
 * @warning If there is one element per line, elements_names should be equal to column_name.	// TODO change this condition?
 *
 * @param dms A pointer on an OBIDMS.
 * @param column_name The name of the new column.
 * @param data_type The OBIType code of the data.
 * @param nb_lines The number of lines to be stored (can be 0 if not known).
 * @param nb_elements_per_line The number of elements per line.
 * @param elements_names The names of the elements with ';' as separator (no terminal ';'),
 *                       NULL or "" if the default names are to be used ("0\01\02\0...\0n").
 * @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()).
 * @param tuples A boolean indicating whether the column should contain indices referring to indexed tuples.
 * @param to_eval A boolean indicating whether the column contains expressions that should be evaluated
 *                (typically OBI_STR columns containing character strings to be evaluated by Python).
 * @param indexer_name The name of the indexer if there is one associated with the column.
 *                     If NULL or "", the indexer name is set as the column name.
 * @param associated_column_name The name of the associated column if there is one (otherwise NULL or "").
 * @param associated_column_version The version of the associated column if there is one (otherwise -1).
 * @param comments Optional comments associated with the column in JSON format (NULL, "" or "{}" if no comments associated).
 *
 * @returns A pointer on the newly created column structure.
 * @retval NULL if an error occurred.
 *
 * @since May 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
		                          const char*  column_name,
								  OBIType_t    data_type,
								  index_t      nb_lines,
								  index_t      nb_elements_per_line,
								  char*        elements_names,
								  bool		   elt_names_formatted,
								  bool         tuples,
								  bool         to_eval,
								  const char*  indexer_name,
								  const char*  associated_column_name,
								  obiversion_t associated_column_version,
								  const char*  comments
								 );


/**
 * @brief Opens a column in read-only mode.
 *
 * @param dms A pointer on an OBIDMS.
 * @param column_name The name of the column.
 * @param version_number The version of the column that should be opened (if -1, the latest version is retrieved).
 *
 * @returns A pointer on the opened column structure.
 * @retval NULL if an error occurred.
 *
 * @since July 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
OBIDMS_column_p obi_open_column(OBIDMS_p dms, const char* column_name, obiversion_t version_number);


/**
 * @brief Clones a column, and returns a pointer to the writable new column.
 *
 * @param dms A pointer on an OBIDMS.
 * @param column_name The name of the column to clone.
 * @param version_number The version of the column that should be cloned (if -1, the latest version is retrieved).
 * @param clone_data Whether the data should be copied or not.
 *
 * @returns A pointer to the created column.
 * @retval NULL if an error occurred.
 *
 * @since August 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
OBIDMS_column_p obi_clone_column(OBIDMS_p dms, OBIDMS_column_p line_selection, const char* column_name, obiversion_t version_number, bool clone_data);


/**
 * @brief Clones a column indexer to have it writable.
 *
 * @param column A pointer on an OBIDMS column.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since November 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_clone_column_indexer(OBIDMS_column_p column);


/**
 * @brief Truncates a column to the number of lines used if it is not read-only and closes it.
 *
 * @warning This function does not flag the column as finished, only finish_view() in the obiview source file does that.
 *
 * @param column A pointer on an OBIDMS column.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since July 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_close_column(OBIDMS_column_p column);


/**
 * @brief Truncates a column file to the number of lines used rounded to the nearest
 * 		  greater multiple of the page size.
 *
 * @param column A pointer on an OBIDMS column.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since August 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_truncate_column(OBIDMS_column_p column);


/**
 * @brief Enlarges a column file.
 *
 * @param column A pointer on an OBIDMS column.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since August 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_enlarge_column(OBIDMS_column_p column);


/**
 * @brief Writes comments to a column file.
 *
 * @warning This overwrites any other previous comments.
 *
 * @param column A pointer on an OBIDMS column.
 * @param comments A character string containing the comments.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since August 2018
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_column_write_comments(OBIDMS_column_p column, const char* comments);


/**
 * @brief Adds comments to a column file.
 *
 * This reads the comments in the JSON format and adds the key value pair.
 * If the key already exists, the value format is turned to array and the new value is appended
 * if it is not already in the array.
 *
 * @param column A pointer on an OBIDMS column.
 * @param key The key.
 * @param value The value associated with the key.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since August 2018
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_column_add_comment(OBIDMS_column_p column, const char* key, const char* value);


/*
 * @brief Sets the data in a column to the specified value.
 *
 * @warning The specified value should be the atomic value effectively stored in the column (i.e. it can not be a character string for example).
 *
 * @param column A pointer on an OBIDMS column.
 * @param start The first line number of the block that should be set.
 * @param nb_lines The number of lines that should be set.
 * @param value_p A pointer on the value to which the column should be set.
 *
 * @since May 2018
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
void obi_set_column_to_value(OBIDMS_column_p column,
						  	 index_t first_line_nb,
							 index_t nb_lines,
							 void* value_p);


/*
 * @brief Sets the data in a column to the NA value of the data OBIType.
 *
 * @param column A pointer on an OBIDMS column.
 * @param start The first line number of the block that should be set.
 * @param nb_lines The number of lines that should be set.
 *
 * @since August 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
void obi_ini_to_NA_values(OBIDMS_column_p column, index_t first_line_nb, index_t nb_lines);	// TODO make private?


/**
 * @brief Recovers the header of an OBIDMS column from the column name.
 *
 * @warning The header structure has to be munmapped by the caller.
 *
 * @param dms A pointer on an OBIDMS.
 * @param column_name The name of an OBIDMS column.
 * @param version_number The version of the column from which the header should be
 *        retrieved (-1: latest version).
 *
 * @returns A pointer on the mmapped header of the column.
 * @retval NULL if an error occurred.
 *
 * @since October 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number);


/**
 * @brief Munmap a mmapped header as returned by obi_column_get_header_from_name().
 *
 * @param header A pointer on the mmapped header structure.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since October 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_close_header(OBIDMS_column_header_p header);


/**
 * @brief Recovers the index of an element in an OBIDMS column from the element's name.
 *
 * @param column A pointer on an OBIDMS column.
 * @param element_name The name of the element.
 *
 * @returns The index of the element in a line of the column.
 * @retval OBIIdx_NA if an error occurred.
 *
 * @since July 2015
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);


/**
 * @brief Recovers the elements names of the lines of a column, with ';' as separator (i.e. "0;1;2;...;n\0").
 *
 * @warning The returned pointer has to be freed by the caller.
 *
 * @param column A pointer on an OBIDMS column.
 *
 * @returns A pointer on a character array where the elements names are stored.
 * @retval NULL if an error occurred.
 *
 * @since January 2017
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
char* obi_get_elements_names(OBIDMS_column_p column);


/**
 * @brief Prepares a column to set a value.
 *
 * @param column A pointer on an OBIDMS column.
 * @param line_nb The number of the line at which the value will be set.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since April 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb, index_t elt_idx);


/**
 * @brief Prepares a column to recover a value.
 *
 * @param column A pointer on an OBIDMS column.
 * @param line_nb The number of the line at which the value will be recovered.
 *
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since April 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb);


/**
 * @brief Goes through all the column files of a DMS and deletes columns that have
 *        not been flagged as finished (done by the finish_view() function in the
 *        obiview source file).
 *
 * @param dms A pointer on an OBIDMS.
 *
 * @returns A value indicating the success of the operation.
 * @retval 0 if the operation was successfully completed.
 * @retval -1 if an error occurred.
 *
 * @since October 2017
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
int obi_clean_unfinished_columns(OBIDMS_p dms);


#endif /* OBIDMSCOLUMN_H_ */