Commit 0eaa5aa7 authored by Celine Mercier's avatar Celine Mercier

Major changes : new cython subclasses to handle columns with multiple

elements per line in a more efficient way + now elements_names are
passed as a list + new function to recover only the header of a column
parent 21923e21
......@@ -23,22 +23,24 @@ cdef class OBIDMS:
OBIType_t data_type=*,
size_t nb_lines=*,
size_t nb_elements_per_line=*,
str elements_names=*)
list elements_names=*)
cdef class OBIDMS_column:
cdef OBIDMS_column_p pointer
cdef OBIDMS dms
cdef str data_type # TODO keep as OBIType_t? both?
cdef str dms_name
cdef str column_name
cdef OBIDMS_column_p pointer
cdef OBIDMS dms
cdef str data_type # TODO keep as OBIType_t? both?
cdef str dms_name
cdef str column_name
cdef size_t nb_elements_per_line
cdef list elements_names
cpdef object get_item(self, size_t line_nb, str element_name)
# cpdef object get_item(self, size_t line_nb, str element_name) TODO can't declare because not the same in all subclasses
# cpdef set_item(self, size_t line_nb, str element_name, object value) TODO can't declare because object value
cpdef list get_elements_names(self)
cpdef str get_data_type(self)
cpdef size_t get_nb_lines_used(self)
cpdef str get_creation_date(self)
# cpdef str get_creation_date(self)
cpdef close(self)
......@@ -6,37 +6,48 @@ from obitools3.utils cimport bytes2str, str2bytes
from .capi.obidms cimport obi_dms, \
obi_close_dms
from .capi.obidmscolumn cimport obi_column_get_data_type_from_name, \
obi_column_get_latest_version_from_name, \
obi_column_get_line_count_from_name, \
obi_column_get_nb_lines_used, \
from .capi.obidmscolumn cimport obi_column_get_nb_lines_used, \
obi_column_get_elements_names, \
obi_column_get_formatted_creation_date, \
obi_column_get_formatted_creation_date_from_name, \
obi_column_get_header_from_name, \
obi_unmap_header, \
obi_column_get_latest_version_from_name, \
obi_create_column, \
obi_clone_column, \
obi_open_column, \
obi_close_column
from .capi.obitypes cimport const_char_p, name_data_type
obi_close_column, \
OBIDMS_column_header_p
#obi_column_get_formatted_creation_date
from .capi.obitypes cimport const_char_p, \
name_data_type
from ._obidms cimport OBIDMS
from ._obidms cimport OBIDMS_column
from ._obidmscolumn_int cimport OBIDMS_column_int, \
OBIDMS_column_int_writable
OBIDMS_column_int_writable, \
OBIDMS_column_int_multi_elts, \
OBIDMS_column_int_multi_elts_writable
from ._obidmscolumn_float cimport OBIDMS_column_float, \
OBIDMS_column_float_writable
OBIDMS_column_float_writable, \
OBIDMS_column_float_multi_elts, \
OBIDMS_column_float_multi_elts_writable
from ._obidmscolumn_bool cimport OBIDMS_column_bool, \
OBIDMS_column_bool_writable
OBIDMS_column_bool_writable, \
OBIDMS_column_bool_multi_elts, \
OBIDMS_column_bool_multi_elts_writable
from ._obidmscolumn_char cimport OBIDMS_column_char, \
OBIDMS_column_char_writable
OBIDMS_column_char_writable, \
OBIDMS_column_char_multi_elts, \
OBIDMS_column_char_multi_elts_writable
from ._obidmscolumn_idx cimport OBIDMS_column_idx, \
OBIDMS_column_idx_writable
# from ._obidmscolumn_idx cimport OBIDMS_column_idx, \
# OBIDMS_column_idx_writable, \
# OBIDMS_column_idx_multi_elts, \
# OBIDMS_column_idx_multi_elts_writable
cdef class OBIDMS :
......@@ -72,6 +83,7 @@ cdef class OBIDMS :
cdef str creation_date
cdef obiversion_t latest_version
cdef size_t line_count
cdef OBIDMS_column_header_p header
p = Path(self.dms_name+'.obidms')
......@@ -85,11 +97,12 @@ cdef class OBIDMS :
column_name = entry.stem
column_name_b = str2bytes(column_name)
dms[column_name] = {}
data_type = bytes2str(name_data_type(obi_column_get_data_type_from_name(self.pointer, column_name_b)))
header = obi_column_get_header_from_name(self.pointer, column_name_b)
data_type = bytes2str(name_data_type(header.data_type))
line_count = header.line_count
latest_version = obi_column_get_latest_version_from_name(self.pointer, column_name_b)
line_count = obi_column_get_line_count_from_name(self.pointer, column_name_b)
# creation_date = bytes2str(obi_column_get_formatted_creation_date_from_name(self.pointer, column_name_b)) #TODO
# creation_date = bytes2str(obi_column_get_formatted_creation_date_from_name(self.pointer, column_name_b)) #TODO (deprecated, use header)
# print(creation_date)
dms[column_name]['data_type'] = data_type
......@@ -98,6 +111,7 @@ cdef class OBIDMS :
# dms[column_name]['creation_date'] = creation_date
print("{:<25} {:<25} {:<25} {:<25}".format(column_name, data_type, latest_version, line_count))
obi_unmap_header(header) # TODO check if error? but C will already warn and there's nothing to do
return dms
......@@ -108,97 +122,110 @@ cdef class OBIDMS :
obiversion_t version_number=-1,
OBIType_t data_type= <OBIType_t> 0,
size_t nb_lines=0,
size_t nb_elements_per_line=1,
str elements_names=None):
size_t nb_elements_per_line=0,
list elements_names=None):
# Declarations
cdef OBIDMS_column column
cdef object subclass # TODO object?
cdef bytes column_name_b
cdef OBIDMS_column_header_p header
header = NULL
# Format the character string to send to C function
column_name_b = str2bytes(column_name)
# Get the header of the latest version of the column if
# some needed informations are not provided
if ((not data_type or not nb_elements_per_line) and not create) :
header = obi_column_get_header_from_name(self.pointer, column_name_b)
# Get the data type if not provided
if not data_type :
if create :
raise Exception("A data type must be specified")
else :
data_type = obi_column_get_data_type_from_name(self.pointer, column_name_b)
data_type = header.data_type
# Open the column with the right subclass depending on the data type and the mode (read-only or writable)
# Get the number of elements per line if not provided and needed
if not nb_elements_per_line :
if create : # Set to one if not provided (default value)
nb_elements_per_line = 1
else :
nb_elements_per_line = header.nb_elements_per_line
if nb_elements_per_line > 1 :
elements_names = bytes2str(header.elements_names).split(';')
if header != NULL :
obi_unmap_header(header) # TODO check if error? but C will already warn and there's nothing to do
# Open the column with the right subclass depending on the data type, the mode
# (read-only or writable) and whether there are multiple elements per line or not
if data_type == 1 :
if (create or clone) :
column = OBIDMS_column_int_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_int_writable
else :
subclass = OBIDMS_column_int_multi_elts_writable
else :
column = OBIDMS_column_int(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_int
else :
subclass = OBIDMS_column_int_multi_elts
elif data_type == 2 :
if (create or clone) :
column = OBIDMS_column_float_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_float_writable
else :
subclass = OBIDMS_column_float_multi_elts_writable
else :
column = OBIDMS_column_float(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_float
else :
subclass = OBIDMS_column_float_multi_elts
elif data_type == 3 :
if (create or clone) :
column = OBIDMS_column_bool_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_bool_writable
else :
subclass = OBIDMS_column_bool_multi_elts_writable
else :
column = OBIDMS_column_bool(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_bool
else :
subclass = OBIDMS_column_bool_multi_elts
elif data_type == 4 :
if (create or clone) :
column = OBIDMS_column_char_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_char_writable
else :
subclass = OBIDMS_column_char_multi_elts_writable
else :
column = OBIDMS_column_char(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
if nb_elements_per_line == 1 :
subclass = OBIDMS_column_char
else :
subclass = OBIDMS_column_char_multi_elts
# elif data_type == 5 :
# if (create or clone) :
# column = OBIDMS_column_idx_writable(self, column_name,
# create, clone, clone_data,
# version_number, data_type,
# nb_lines, nb_elements_per_line,
# elements_names)
# if nb_elements_per_line == 1 :
# subclass = OBIDMS_column_idx_writable
# else :
# subclass = OBIDMS_column_idx_multi_elts_writable
# else :
# column = OBIDMS_column_idx(self, column_name,
# create, clone, clone_data,
# version_number, data_type,
# nb_lines, nb_elements_per_line,
# elements_names)
# if nb_elements_per_line == 1 :
# subclass = OBIDMS_column_idx
# else :
# subclass = OBIDMS_column_idx_multi_elts
else :
raise Exception("Problem with the data type")
column = subclass(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
return column
......@@ -215,7 +242,7 @@ cdef class OBIDMS_column :
OBIType_t type,
size_t nb_lines,
size_t nb_elements_per_line,
str elements_names):
list elements_names):
# Declarations
cdef bytes column_name_b
......@@ -226,6 +253,8 @@ cdef class OBIDMS_column :
self.dms = dms
self.data_type = bytes2str(name_data_type(type))
self.column_name = column_name
self.nb_elements_per_line = nb_elements_per_line
self.elements_names = elements_names
# Format the character strings to send them to C functions
column_name_b = str2bytes(column_name)
......@@ -236,7 +265,7 @@ cdef class OBIDMS_column :
if elements_names == None :
elements_names_b = column_name_b
else :
elements_names_b = str2bytes(elements_names)
elements_names_b = str2bytes(";".join(elements_names))
self.pointer = obi_create_column(self.dms.pointer, column_name_b, type, nb_lines, nb_elements_per_line, elements_names_b)
else :
if clone :
......@@ -248,49 +277,28 @@ cdef class OBIDMS_column :
def __iter__(self):
# Declarations
cdef list elements_names
cdef str element_name
cdef bint multiple_elements
cdef object line # TODO
cdef size_t lines_used
cdef size_t line_nb
# Check if there are multiple elements per line anf if yes, get their names
elements_names = self.get_elements_names()
if len(elements_names) > 1 :
multiple_elements = True
else :
element_name = elements_names[0]
# Yield each line
lines_used = obi_column_get_nb_lines_used(self.pointer)
for line_nb in xrange(lines_used):
if multiple_elements :
line = []
for element_name in elements_names :
line.append(self.get_item(line_nb, element_name))
else :
line = self.get_item(line_nb, element_name)
yield line
for line_nb in range(lines_used):
yield self.get_line(line_nb)
def __setitem__(self, size_t line_nb, object value):
self.set_item(line_nb, "", value)
self.set_line(line_nb, value)
def __getitem__(self, size_t line_nb):
return self.get_item(line_nb, "")
return self.get_line(line_nb)
cpdef object get_item(self, size_t line_nb, str element_name):
raise NotImplementedError
# cpdef object get_item(self, size_t line_nb, str element_name): TODO
# raise NotImplementedError
# cpdef set_item(self, size_t line_nb, str element_name, object value): TODO
# raise NotImplementedError
cpdef list get_elements_names(self):
cdef bytes elements_names
elements_names = obi_column_get_elements_names(self.pointer)
return (bytes2str(elements_names)).split(';')
return self.elements_names
cpdef str get_data_type(self):
return self.data_type
......@@ -298,8 +306,8 @@ cdef class OBIDMS_column :
cpdef size_t get_nb_lines_used(self):
return obi_column_get_nb_lines_used(self.pointer)
cpdef str get_creation_date(self):
return bytes2str(obi_column_get_formatted_creation_date(self.pointer))
# cpdef str get_creation_date(self):
# return bytes2str(obi_column_get_formatted_creation_date(self.pointer))
cpdef close(self):
raise NotImplementedError
......
#cython: language_level=3
from .capi.obitypes cimport obibool_t
from .capi.obidmscolumn cimport OBIDMS_column_p
from ._obidms cimport OBIDMS_column
from ._obidms cimport OBIDMS_column
cdef class OBIDMS_column_bool(OBIDMS_column):
cpdef object get_line(self, size_t line_nb)
cpdef set_line(self, size_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_bool_writable(OBIDMS_column_bool):
cpdef set_line(self, size_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_bool_multi_elts(OBIDMS_column_bool):
cpdef object get_item(self, size_t line_nb, str element_name)
cpdef object get_line(self, size_t line_nb)
cpdef set_item(self, size_t line_nb, str element_name, obibool_t value)
cpdef close(self)
cpdef set_line(self, size_t line_nb, object values)
cdef class OBIDMS_column_bool_writable(OBIDMS_column_bool):
cdef class OBIDMS_column_bool_multi_elts_writable(OBIDMS_column_bool_multi_elts):
cpdef set_item(self, size_t line_nb, str element_name, obibool_t value)
cpdef set_line(self, size_t line_nb, object values)
cpdef close(self)
......@@ -3,7 +3,9 @@
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obibool_with_elt_name, \
obi_column_set_obibool_with_elt_name
obi_column_get_obibool_with_elt_idx, \
obi_column_set_obibool_with_elt_name, \
obi_column_set_obibool_with_elt_idx
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIBool_NA
......@@ -13,34 +15,90 @@ from cpython.bool cimport PyBool_FromLong
cdef class OBIDMS_column_bool(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name):
cpdef object get_line(self, size_t line_nb):
cdef obibool_t value
cdef object result
value = obi_column_get_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obibool_with_elt_idx(self.pointer, line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
raise IndexError(line_nb)
if value == OBIBool_NA :
result = None
else :
result = PyBool_FromLong(value)
return result
cpdef set_item(self, size_t line_nb, str element_name, obibool_t value):
cpdef set_line(self, size_t line_nb, object value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_bool_writable(OBIDMS_column_bool):
cpdef set_line(self, size_t line_nb, object value):
if obi_column_set_obibool_with_elt_idx(self.pointer, line_nb, 0, <obibool_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_bool_multi_elts(OBIDMS_column_bool):
cpdef object get_item(self, size_t line_nb, str element_name):
cdef obibool_t value
cdef object result
value = obi_column_get_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIBool_NA :
result = None
else :
result = PyBool_FromLong(value)
return result
cpdef object get_line(self, size_t line_nb) :
cdef obibool_t value
cdef object result
cdef size_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obibool_with_elt_idx(self.pointer, line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
result[self.elements_names[i]] = PyBool_FromLong(value)
if all_NA and (value != OBIBool_NA) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, size_t line_nb, str element_name, obibool_t value):
if obi_column_set_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0 :
raise Exception("Column is read-only")
cpdef set_line(self, size_t line_nb, object values):
raise Exception("Column is read-only")
cdef class OBIDMS_column_bool_multi_elts_writable(OBIDMS_column_bool_multi_elts):
cpdef set_item(self, size_t line_nb, str element_name, obibool_t value):
if obi_column_set_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
raise Exception("Problem setting a value in a column")
cpdef set_line(self, size_t line_nb, object values):
cdef obibool_t value
for element_name in values :
value = <obibool_t> values[element_name]
self.set_item(line_nb, element_name, value)
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
\ No newline at end of file
#cython: language_level=3
from .capi.obitypes cimport obichar_t
from .capi.obidmscolumn cimport OBIDMS_column_p
from ._obidms cimport OBIDMS_column
from ._obidms cimport OBIDMS_column
cdef class OBIDMS_column_char(OBIDMS_column):
cpdef object get_line(self, size_t line_nb)
cpdef set_line(self, size_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_char_writable(OBIDMS_column_char):
cpdef set_line(self, size_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_char_multi_elts(OBIDMS_column_char):
cpdef object get_item(self, size_t line_nb, str element_name)
cpdef object get_line(self, size_t line_nb)
cpdef set_item(self, size_t line_nb, str element_name, bytes value)
cpdef close(self)
cpdef set_line(self, size_t line_nb, object values)
cdef class OBIDMS_column_char_writable(OBIDMS_column_char):
cdef class OBIDMS_column_char_multi_elts_writable(OBIDMS_column_char_multi_elts):
cpdef set_item(self, size_t line_nb, str element_name, bytes value)
cpdef set_line(self, size_t line_nb, object values)
cpdef close(self)
......@@ -3,17 +3,52 @@
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obichar_with_elt_name, \
obi_column_set_obichar_with_elt_name
obi_column_get_obichar_with_elt_idx, \
obi_column_set_obichar_with_elt_name, \
obi_column_set_obichar_with_elt_idx
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIChar_NA
from obitools3.utils cimport str2bytes
cdef class OBIDMS_column_char(OBIDMS_column) :
cdef class OBIDMS_column_char(OBIDMS_column):
cpdef object get_line(self, size_t line_nb):
cdef obichar_t value
cdef object result
value = obi_column_get_obichar_with_elt_idx(self.pointer, line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIChar_NA :
result = None
else :
result = <bytes> value
return result
cpdef set_line(self, size_t line_nb, object value):