Commit b45b496b authored by Celine Mercier's avatar Celine Mercier

Major update: new type of columns containing indices referring to lines

in other columns
parent 2cf10cb6
......@@ -19,6 +19,7 @@ cdef class OBIDMS:
str column_name,
bint create=*,
bint clone=*, bint clone_data=*,
bint referring=*,
obiversion_t version_number=*,
str type=*,
index_t nb_lines=*,
......@@ -30,16 +31,19 @@ cdef class OBIDMS:
cdef class OBIDMS_column:
cdef OBIDMS_column_p pointer
cdef OBIDMS dms
cdef str data_type
cdef str dms_name
cdef str column_name
cdef index_t nb_elements_per_line
cdef list elements_names
cdef OBIDMS_column_p pointer
cdef OBIDMS dms
cdef str data_type
cdef str dms_name
cdef str column_name
cdef index_t nb_elements_per_line
cdef list elements_names
cpdef OBIDMS_column_p referred_column_pointer
# cpdef object get_item(self, index_t line_nb, str element_name) TODO can't declare because not the same in all subclasses
# cpdef set_item(self, index_t line_nb, str element_name, object value) TODO can't declare because object value
cpdef grep_line(self, index_t line_nb)
cpdef list get_elements_names(self)
cpdef str get_data_type(self)
cpdef index_t get_nb_lines_used(self)
......
......@@ -14,6 +14,7 @@ from .capi.obidmscolumn cimport obi_column_get_header_from_name, \
obi_open_column, \
obi_close_column, \
obi_column_format_date, \
obi_grep_line, \
OBIDMS_column_header_p
from .capi.obitypes cimport const_char_p, \
OBIType_t, \
......@@ -109,7 +110,7 @@ cdef class OBIDMS :
column_name_b = str2bytes(column_name)
dms[column_name] = {}
header = obi_column_get_header_from_name(self.pointer, column_name_b, -1)
data_type = bytes2str(name_data_type(header.data_type))
data_type = bytes2str(name_data_type(header.returned_data_type))
line_count = header.line_count
creation_date = bytes2str(obi_column_format_date(header.creation_date))
obi_unmap_header(header)
......@@ -127,6 +128,7 @@ cdef class OBIDMS :
str column_name,
bint create=False,
bint clone=False, bint clone_data=True,
bint referring=False,
obiversion_t version_number=-1,
str type='',
index_t nb_lines=0,
......@@ -157,7 +159,7 @@ cdef class OBIDMS :
if create :
raise Exception("A data type must be specified")
else :
data_type = header.data_type
data_type = header.returned_data_type
else :
if type == 'OBI_INT' :
data_type = OBI_INT
......@@ -179,7 +181,7 @@ cdef class OBIDMS :
if create : # Set to one if not provided (default value)
nb_elements_per_line = 1
else :
nb_elements_per_line = header.nb_elements_per_line
nb_elements_per_line = header.returned_nb_elements_per_line
if nb_elements_per_line > 1 :
elements_names = bytes2str(header.elements_names).split(';')
......@@ -258,7 +260,8 @@ cdef class OBIDMS :
raise Exception("Problem with the data type")
column = subclass(self, column_name,
create, clone, clone_data,
create, clone, clone_data,
referring,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names, array_name,
......@@ -276,6 +279,7 @@ cdef class OBIDMS_column :
str column_name,
bint create,
bint clone, bint clone_data,
bint referring,
obiversion_t version_number,
OBIType_t type,
index_t nb_lines,
......@@ -312,10 +316,14 @@ cdef class OBIDMS_column :
elements_names_b = str2bytes(";".join(elements_names))
self.pointer = obi_create_column(self.dms.pointer, column_name_b, type,
nb_lines, nb_elements_per_line,
elements_names_b, array_name_b, comments_b)
elements_names_b, array_name_b, comments_b,
referring)
else :
if clone :
self.pointer = obi_clone_column(self.dms.pointer, column_name_b, version_number, clone_data)
self.pointer = obi_clone_column(self.dms.pointer, column_name_b, version_number, referring, clone_data)
elif referring :
self.pointer = obi_clone_column(self.dms.pointer, column_name_b, version_number, referring, False)
referred_column_pointer = self.pointer.referred_column
else :
self.pointer = obi_open_column(self.dms.pointer, column_name_b, version_number)
......@@ -355,6 +363,9 @@ cdef class OBIDMS_column :
# cpdef set_item(self, index_t line_nb, str element_name, object value): TODO
# raise NotImplementedError
cpdef grep_line(self, index_t line_nb):
if obi_grep_line(self.pointer, line_nb) < 0 :
raise Exception("Error grepping line")
cpdef list get_elements_names(self):
return self.elements_names
......
......@@ -19,12 +19,16 @@ cdef extern from "obidmscolumn.h" nogil:
size_t data_size
index_t line_count
index_t lines_used
index_t nb_elements_per_line
index_t returned_nb_elements_per_line
index_t stored_nb_elements_per_line
const_char_p elements_names
OBIType_t data_type
OBIType_t returned_data_type
OBIType_t stored_data_type
time_t creation_date
obiversion_t version
obiversion_t cloned_from
bint referring
obiversion_t referred_column_version
const_char_p name
const_char_p array_name
const_char_p comments
......@@ -33,6 +37,7 @@ cdef extern from "obidmscolumn.h" nogil:
struct OBIDMS_column_t:
OBIDMS_column_header_p header
OBIDMS_column_t* referred_column
ctypedef OBIDMS_column_t* OBIDMS_column_p
......@@ -43,7 +48,8 @@ cdef extern from "obidmscolumn.h" nogil:
index_t nb_elements_per_line,
const_char_p elements_names,
const_char_p array_name,
const_char_p comments)
const_char_p comments,
bint referring)
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
const_char_p column_name,
......@@ -54,6 +60,7 @@ cdef extern from "obidmscolumn.h" nogil:
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
const_char_p column_name,
obiversion_t version_number,
bint referring,
bint clone_data)
int obi_truncate_and_close_column(OBIDMS_column_p column)
......@@ -69,6 +76,8 @@ cdef extern from "obidmscolumn.h" nogil:
char* obi_column_format_date(time_t date)
int obi_grep_line(OBIDMS_column_p referring_column, index_t line_to_grep)
cdef extern from "obidmscolumn_int.h" nogil:
int obi_column_set_obiint_with_elt_name(OBIDMS_column_p column,
......
......@@ -23,7 +23,8 @@ cdef extern from "obitypes.h" nogil:
OBI_BOOL,
OBI_CHAR,
OBI_STR,
OBI_SEQ
OBI_SEQ,
OBI_IDX
ctypedef OBIType OBIType_t
......
......@@ -320,9 +320,9 @@ char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
}
////////// FOR DEBUGGING ///////////
///////////////////// FOR DEBUGGING ///////////////////////////
//NOTE: The first byte is printed the first (at the left-most).
// little endian
void print_bits(void* ptr, int32_t size)
{
uint8_t* b = (uint8_t*) ptr;
......
......@@ -515,7 +515,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
index_t nb_elements_per_line,
const char* elements_names,
const char* array_name,
const char* comments)
const char* comments,
bool referring)
{
OBIDMS_column_p new_column;
OBIDMS_column_directory_p column_directory;
......@@ -528,6 +529,10 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
size_t header_size;
size_t data_size;
index_t minimum_line_count;
OBIType_t returned_data_type;
OBIType_t stored_data_type;
index_t returned_nb_elements_per_line;
index_t stored_nb_elements_per_line;
new_column = NULL;
......@@ -547,15 +552,28 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
obidebug(1, "\nCan't create column because of invalid data type");
return NULL;
}
if (((data_type == 5) || (data_type == 6)) && (array_name == NULL))
if (((data_type == OBI_STR) || (data_type == OBI_SEQ)) && (array_name == NULL))
{
obidebug(1, "\nCan't create column because of empty array name");
return NULL;
}
returned_data_type = data_type;
if ((data_type == OBI_STR) || (data_type == OBI_SEQ) || referring)
// stored data is indices referring to data stored elsewhere
stored_data_type = OBI_IDX;
else
stored_data_type = returned_data_type;
returned_nb_elements_per_line = nb_elements_per_line;
if (referring)
// stored data is indices referring to lines in another column
stored_nb_elements_per_line = 1;
else
stored_nb_elements_per_line = returned_nb_elements_per_line;
// The initial line count should be between the minimum (corresponding to the page size) and the maximum allowed
minimum_line_count = get_line_count_per_page(data_type, nb_elements_per_line);
minimum_line_count = get_line_count_per_page(stored_data_type, stored_nb_elements_per_line);
if (nb_lines > MAXIMUM_LINE_COUNT)
{
obidebug(1, "\nCan't create column because of line count greater than the maximum allowed (%d)", MAXIMUM_LINE_COUNT);
......@@ -565,12 +583,12 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
nb_lines = minimum_line_count;
// The number of elements names should be equal to the number of elements per line
if ((elements_names == NULL) && (nb_elements_per_line > 1))
if ((elements_names == NULL) && (returned_nb_elements_per_line > 1))
{
obidebug(1, "\nCan't create column because no elements names were given for a number of elements per line greater than 1");
return NULL;
}
else if ((elements_names != NULL) && (nb_elements_per_line > 1))
else if ((elements_names != NULL) && (returned_nb_elements_per_line > 1))
{
char* token;
index_t n = 0;
......@@ -587,7 +605,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
return NULL;
}
}
else if ((nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0))
else if ((returned_nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0))
{
obidebug(1, "\nCan't create column because the element name does not match the column name");
return NULL;
......@@ -604,7 +622,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
// Calculate the size needed
header_size = obi_get_platform_header_size();
data_size = obi_array_sizeof(data_type, nb_lines, nb_elements_per_line);
data_size = obi_array_sizeof(stored_data_type, nb_lines, stored_nb_elements_per_line);
file_size = header_size + data_size;
// Get the latest version number
......@@ -692,16 +710,20 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
new_column->writable = true;
header = new_column->header;
header->header_size = header_size;
header->data_size = data_size;
header->line_count = nb_lines;
header->lines_used = 0;
header->nb_elements_per_line = nb_elements_per_line;
header->data_type = data_type;
header->creation_date = time(NULL);
header->version = version_number;
header->cloned_from = -1;
header = new_column->header;
header->header_size = header_size;
header->data_size = data_size;
header->line_count = nb_lines;
header->lines_used = 0;
header->stored_nb_elements_per_line = stored_nb_elements_per_line;
header->returned_nb_elements_per_line = returned_nb_elements_per_line;
header->stored_data_type = stored_data_type;
header->returned_data_type = returned_data_type;
header->creation_date = time(NULL);
header->version = version_number;
header->cloned_from = -1;
header->referring = referring;
header->referred_column_version = -1;
obi_column_set_elements_names(new_column, elements_names);
......@@ -710,8 +732,9 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
if (comments != NULL)
strncpy(header->comments, comments, COMMENTS_MAX_LENGTH);
// If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened or created
if ((data_type == 5) || (data_type == 6))
// If the data type is OBI_STR or OBI_SEQ, and the column is not referring another,
// the associated obi_array is opened or created
if ((stored_data_type == OBI_STR) || (stored_data_type == OBI_SEQ))
{
array = obi_array(dms, array_name);
if (array == NULL)
......@@ -847,8 +870,9 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
column->writable = false;
// If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened or created
if (((column->header)->data_type == 5) || ((column->header)->data_type == 6))
// If the data type is OBI_STR or OBI_SEQ, and the column is not referring,
// the associated obi_array is opened
if (((column->header)->stored_data_type == OBI_STR) || ((column->header)->stored_data_type == OBI_SEQ))
{
array = obi_array(dms, (column->header)->array_name);
if (array == NULL)
......@@ -862,16 +886,29 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
column->array = array;
}
if ((column->header)->referring)
{
column->referred_column = obi_open_column(dms, column_name, (column->header)->referred_column_version);
if (column->referred_column == NULL)
{
obidebug(1, "\nError opening a referred column");
obi_close_column(column);
close(column_file_descriptor);
return NULL;
}
}
close(column_file_descriptor);
return column;
}
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
const char* column_name,
obiversion_t version_number,
bool clone_data)
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
const char* column_name,
obiversion_t version_number,
bool referring,
bool clone_data)
{
OBIDMS_column_p column_to_clone;
OBIDMS_column_p new_column;
......@@ -887,11 +924,17 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
return NULL;
}
data_type = (column_to_clone->header)->data_type;
data_type = (column_to_clone->header)->returned_data_type;
nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line;
nb_elements_per_line = (column_to_clone->header)->returned_nb_elements_per_line;
if (clone_data)
if (clone_data && referring)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError: can't clone the data when creating a referring column");
return NULL;
}
else if (clone_data)
nb_lines = (column_to_clone->header)->line_count;
else
nb_lines = get_line_count_per_page(data_type, nb_elements_per_line); // minimum line count corresponding to one memory page
......@@ -903,7 +946,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
nb_elements_per_line,
(column_to_clone->header)->elements_names,
(column_to_clone->header)->array_name,
(column_to_clone->header)->comments);
(column_to_clone->header)->comments,
referring);
if (new_column == NULL)
{
......@@ -915,7 +959,15 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
obidebug(1, "\nError deleting a bad cloned file");
}
(new_column->header)->cloned_from = version_number;
(new_column->header)->cloned_from = (column_to_clone->header)->version;
if (referring)
{
if ((column_to_clone->header)->referring)
(new_column->header)->referred_column_version = (column_to_clone->header)->referred_column_version;
else
(new_column->header)->referred_column_version = (column_to_clone->header)->version;
}
if (clone_data)
{
......@@ -923,8 +975,10 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
(new_column->header)->lines_used = (column_to_clone->header)->lines_used;
}
// close column_to_clone
if (obi_close_column(column_to_clone) < 0)
// close column_to_clone or store the pointer if it's referred
if (referring)
new_column->referred_column = column_to_clone;
else if (obi_close_column(column_to_clone) < 0)
{
obidebug(1, "\nError closing a column that has been cloned");
// TODO return NULL or not?
......@@ -936,6 +990,9 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
int obi_close_column(OBIDMS_column_p column)
{
if ((column->header)->referring)
obi_close_column(column->referred_column);
// Munmap data
if (munmap(column->data, (column->header)->data_size) < 0)
{
......@@ -952,7 +1009,7 @@ int obi_close_column(OBIDMS_column_p column)
return -1;
}
obi_close_column_directory(column->column_directory); // TODO or not
//obi_close_column_directory(column->column_directory); // TODO or not
free(column);
......@@ -970,8 +1027,8 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column) // TODO is it nece
char* column_file_name;
// Compute the new line count = the number of lines used rounded to the nearest greater multiple of page size greater than 0
multiple = ceil((double) (ONE_IF_ZERO((column->header)->lines_used) * (column->header)->nb_elements_per_line * obi_sizeof((column->header)->data_type)) / (double) getpagesize());
new_line_count = floor((((int) multiple) * getpagesize()) / ((column->header)->nb_elements_per_line * obi_sizeof((column->header)->data_type)));
multiple = ceil((double) (ONE_IF_ZERO((column->header)->lines_used) * (column->header)->stored_nb_elements_per_line * obi_sizeof((column->header)->stored_data_type)) / (double) getpagesize());
new_line_count = floor((((int) multiple) * getpagesize()) / ((column->header)->stored_nb_elements_per_line * obi_sizeof((column->header)->stored_data_type)));
// Check that it is actually greater than the current number of lines allocated in the file, otherwise no need to truncate
if ((column->header)->line_count == new_line_count)
......@@ -1006,7 +1063,7 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column) // TODO is it nece
}
// Truncate the column file
data_size = obi_array_sizeof((column->header)->data_type, new_line_count, (column->header)->nb_elements_per_line);
data_size = obi_array_sizeof((column->header)->stored_data_type, new_line_count, (column->header)->stored_nb_elements_per_line);
file_size = (column->header)->header_size + data_size;
if (ftruncate(column_file_descriptor, file_size) < 0)
{
......@@ -1152,11 +1209,11 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
{
index_t i, start, end, nb_elements;
nb_elements = nb_lines*((column->header)->nb_elements_per_line);
start = first_line_nb*((column->header)->nb_elements_per_line);
nb_elements = nb_lines*((column->header)->stored_nb_elements_per_line);
start = first_line_nb*((column->header)->stored_nb_elements_per_line);
end = start + nb_elements;
switch ((column->header)->data_type) {
switch ((column->header)->stored_data_type) {
case OBI_VOID: // TODO;
break;
......@@ -1184,8 +1241,9 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
}
break;
case OBI_STR:
case OBI_SEQ: for (i=start;i<end;i++)
case OBI_STR: // fallthrough
case OBI_SEQ: // fallthrough
case OBI_IDX: for (i=start;i<end;i++)
{
*(((index_t*) (column->data)) + i) = OBIIdx_NA;
}
......@@ -1350,3 +1408,35 @@ char* obi_column_format_date(time_t date)
return formatted_time;
}
// TODO put in separate file and needs to lock the dependency with the referred column but...
// warning for the dependency and for the fact that it's always added at the next line (or not cuz might not be a good idea?)
int obi_grep_line(OBIDMS_column_p referring_column, index_t line_to_grep)
{
// Check that the column is referring another
if (!((referring_column->header)->referring))
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError: Grepping a line can only be done with a referring column");
return -1;
}
// Check if the file needs to be enlarged
if ((referring_column->header)->lines_used == (referring_column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(referring_column) < 0)
return -1;
}
// Set the value
*(((index_t*) (referring_column->data)) + (referring_column->header)->lines_used) = line_to_grep;
// Update lines used
((referring_column->header)->lines_used)++;
return 0;
}
......@@ -57,12 +57,20 @@ typedef struct OBIDMS_column_header {
*/
index_t lines_used; /**< Number of lines of data used.
*/
index_t nb_elements_per_line; /**< Number of elements per line (default: 1).
index_t returned_nb_elements_per_line; /**< Number of elements per line returned when getting a
* line from the column.
*/
index_t stored_nb_elements_per_line; /**< Number of elements per line that is actually stored
* in the data part of the column.
*/
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with ';' as separator
* (should be the column name if one element per line).
*/
OBIType_t data_type; /**< Type of the data.
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
*/
OBIType_t stored_data_type; /**< Type of the data that is actually stored in the data
* part of the column.
*/
time_t creation_date; /**< Date of creation of the file.
*/
......@@ -72,6 +80,10 @@ typedef struct OBIDMS_column_header {
* was cloned from (-1 if it was not created by cloning
* another column).
*/
bool referring; /**< Whether the column contains indices referring to another column.
*/
obiversion_t referred_column_version; /**< Version of the column to which this column is referring.
*/
char name[OBIDMS_COLUMN_MAX_NAME+1]; /**< The column name as a NULL terminated string.
*/
char array_name[ARRAY_MAX_NAME+1]; /**< If there is one, the obi_array name as a NULL terminated string.
......@@ -96,6 +108,8 @@ typedef struct OBIDMS_column {
*/
OBIDMS_array_p array; /**< A pointer to the array associated with the column if there is one.
*/
struct OBIDMS_column* referred_column; /**< A pointer to the referred column if the column is referring.
*/
void* data; /**< A `void` pointer to the beginning of the data.
*
* @warning Never use this member directly outside of the code of the
......@@ -170,6 +184,7 @@ size_t obi_get_platform_header_size();
* @param elements_names The names of the elements with ';' as separator.
* @param array_name The name of the array if there is one associated with the column.
* @param comments Optional comments associated with the column.
* @param referring
*
* @returns A pointer on the newly created column structure.
* @retval NULL if an error occurred.
......@@ -184,7 +199,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
index_t nb_elements_per_line,
const char* elements_names,
const char* array_name,
const char* comments);
const char* comments,
bool referring);
/**
......@@ -217,7 +233,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, const char* column_name, obiversio
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_p obi_clone_column(OBIDMS_p dms, const char* column_name, obiversion_t version_number, bool clone_data);
OBIDMS_column_p obi_clone_column(OBIDMS_p dms, const char* column_name, obiversion_t version_number, bool referring, bool clone_data);
/**
......@@ -354,4 +370,8 @@ index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const cha
char* obi_column_format_date(time_t date);
int obi_grep_line(OBIDMS_column_p referring_column, index_t line_to_grep);
#endif /* OBIDMSCOLUMN_H_ */
......@@ -30,6 +30,14 @@
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obibool_t value)
{
// Check that the column is not referring another
if ((column->header)->referring)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError: Setting a value from a referring column is not allowed. The referred column must be cloned to be modified");
return -1;
}
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
......@@ -51,7 +59,7 @@ int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
(column->header)->lines_used = line_nb+1;
// Set the value
*(((obibool_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
*(((obibool_t*) (column->data)) + (line_nb * ((column->header)->stored_nb_elements_per_line)) + element_idx) = value;