Commit 9a50803c authored by Celine Mercier's avatar Celine Mercier

Added tuple columns containing immutable indexed data arrays of any type

parent 1684f96b
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # TODO I absolutely don't understand why it doesn't work without that line
from obitools3.dms.view.view import View, Line_selection
from obitools3.dms.view import View, Line_selection
from obitools3.dms.view.typed_view.view_NUC_SEQS import View_NUC_SEQS
from obitools3.dms.dms import DMS
from obitools3.dms import DMS
from obitools3.dms.column import Column
from obitools3.dms.taxo.taxo import Taxonomy
from obitools3.dms.taxo import Taxonomy
from obitools3.utils cimport str2bytes
from obitools3.dms.capi.obitypes cimport OBI_INT, \
OBI_FLOAT, \
......@@ -69,26 +69,68 @@ def random_bool(config):
return random.choice([True, False])
def random_bool_tuples(config):
l=[]
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
l.append(random.choice([None, random_bool(config)]))
return tuple(l)
def random_char(config):
return str2bytes(random.choice(string.ascii_lowercase))
def random_char_tuples(config):
l=[]
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
l.append(random.choice([None, random_char(config)]))
return tuple(l)
def random_float(config):
return random.randint(0, MAX_INT) + random.random()
def random_float_tuples(config):
l=[]
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
l.append(random.choice([None, random_float(config)]))
return tuple(l)
def random_int(config):
return random.randint(0, config['test']['maxlinenb'])
def random_int_tuples(config):
l=[]
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
l.append(random.choice([None, random_int(config)]))
return tuple(l)
def random_seq(config):
return str2bytes(''.join(random.choice(['a','t','g','c']) for i in range(random_length(config['test']['seqmaxlen']))))
def random_seq_tuples(config):
l=[]
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
l.append(random.choice([None, random_seq(config)]))
return tuple(l)
def random_bytes(config):
return random_bytes_with_max_len(config['test']['strmaxlen'])
def random_bytes_tuples(config):
l=[]
for i in range(random.randint(1, config['test']['tuplemaxlen'])) :
l.append(random.choice([None, random_bytes(config)]))
return tuple(l)
def random_str_with_max_len(max_len):
return ''.join(random.choice(string.ascii_lowercase) for i in range(random_length(max_len)))
......@@ -132,11 +174,28 @@ def test_set_and_get(config, infos):
print_test(config, "-")
return
idx = random_int(config)
value = random.choice([None, infos['random_generator'][data_type](config)])
value = random.choice([None, infos['random_generator'][(data_type, col.tuples)](config)])
if col.nb_elements_per_line > 1 :
elt = random.choice(element_names)
col[idx][elt] = value
assert col[idx][elt] == value, "Column: "+repr(col)+"\nSet value != gotten value "+str(value)+" != "+str(col[idx][elt])
elif col.tuples:
col[idx] = value
if value is None:
totest = None
else:
totest = []
for e in value:
if e is not None and e != '':
totest.append(e)
if len(totest) == 0:
totest = None
else:
totest = tuple(totest)
assert col[idx] == totest, "Column: "+repr(col)+"\nSet value != gotten value "+str(totest)+" != "+str(col[idx])
if totest is not None:
for i in range(len(totest)) :
assert col[idx][i] == totest[i], "Column: "+repr(col)+"\nSet value[i] != gotten value[i] "+str(totest[i])+" != "+str(col[idx][i])
else:
col[idx] = value
assert col[idx] == value, "Column: "+repr(col)+"\nSet value != gotten value "+str(value)+" != "+str(col[idx])
......@@ -210,19 +269,25 @@ def fill_column(config, infos, col) :
if len(element_names) > 1 :
for i in range(random_int(config)) :
for j in range(len(element_names)) :
col[i][element_names[j]] = random.choice([None, infos['random_generator'][data_type](config)])
col[i][element_names[j]] = random.choice([None, infos['random_generator'][(data_type, col.tuples)](config)])
else :
for i in range(random_int(config)) :
col[i] = random.choice([None, infos['random_generator'][data_type](config)])
r = random.choice([None, infos['random_generator'][(data_type, col.tuples)](config)])
col[i] = r
def create_random_column(config, infos) :
alias = random.choice([b'', random_unique_name(infos)])
nb_elements_per_line=random.randint(1, config['test']['maxelts'])
elements_names = []
for i in range(nb_elements_per_line) :
elements_names.append(random_unique_element_name(config, infos))
elements_names = random.choice([None, elements_names])
tuples = random.choice([True, False])
if not tuples :
nb_elements_per_line=random.randint(1, config['test']['maxelts'])
elements_names = []
for i in range(nb_elements_per_line) :
elements_names.append(random_unique_element_name(config, infos))
elements_names = random.choice([None, elements_names])
else :
nb_elements_per_line = 1
elements_names = None
name = random_unique_name(infos)
data_type = random_col_type()
......@@ -231,6 +296,7 @@ def create_random_column(config, infos) :
data_type,
nb_elements_per_line=nb_elements_per_line,
elements_names=elements_names,
tuples=tuples,
comments=random_str_with_max_len(COL_COMMENTS_MAX_LEN),
alias=alias
)
......@@ -347,6 +413,14 @@ def addOptions(parser):
type=int,
help="Maximum length of character strings. "
"Default: 200")
group.add_argument('--tuple_max_len','-u',
action="store", dest="test:tuplemaxlen",
metavar='<TUPLE_MAX_LEN>',
default=20,
type=int,
help="Maximum length of tuples. "
"Default: 200")
group.add_argument('--comments_max_len','-c',
action="store", dest="test:commentsmaxlen",
......@@ -402,7 +476,14 @@ def run(config):
'view': None,
'view_names': None,
'unique_names': [],
'random_generator': {b"OBI_BOOL": random_bool, b"OBI_CHAR": random_char, b"OBI_FLOAT": random_float, b"OBI_INT": random_int, b"OBI_SEQ": random_seq, b"OBI_STR": random_bytes},
'random_generator': {
(b"OBI_BOOL", False): random_bool, (b"OBI_BOOL", True): random_bool_tuples,
(b"OBI_CHAR", False): random_char, (b"OBI_CHAR", True): random_char_tuples,
(b"OBI_FLOAT", False): random_float, (b"OBI_FLOAT", True): random_float_tuples,
(b"OBI_INT", False): random_int, (b"OBI_INT", True): random_int_tuples,
(b"OBI_SEQ", False): random_seq, (b"OBI_SEQ", True): random_seq_tuples,
(b"OBI_STR", False): random_bytes, (b"OBI_STR", True): random_bytes_tuples
},
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view]
}
......
......@@ -31,6 +31,7 @@ cdef extern from "obidmscolumn.h" nogil:
const_char_p elements_names
OBIType_t returned_data_type
OBIType_t stored_data_type
bint tuples
time_t creation_date
obiversion_t version
obiversion_t cloned_from
......@@ -60,3 +61,6 @@ cdef extern from "obidmscolumn.h" nogil:
int obi_close_header(OBIDMS_column_header_p header)
char* obi_get_elements_names(OBIDMS_column_p column)
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
......@@ -52,6 +52,7 @@ cdef extern from "obitypes.h" nogil:
extern const_char_p OBIStr_NA
extern const_char_p OBIQual_char_NA
extern uint8_t* OBIQual_int_NA
extern void* OBITuple_NA
const_char_p name_data_type(int data_type)
......
......@@ -14,7 +14,7 @@ from ..capi.obidmscolumn cimport OBIDMS_column_p, \
Column_reference_t, \
Column_reference_p
from libc.stdint cimport uint8_t
from libc.stdint cimport uint8_t, int32_t
cdef extern from "obiview.h" nogil:
......@@ -86,6 +86,7 @@ cdef extern from "obiview.h" nogil:
index_t nb_lines,
index_t nb_elements_per_line,
char* elements_names,
bint tuples,
const_char_p indexer_name,
const_char_p associated_column_name,
obiversion_t associated_column_version,
......@@ -327,3 +328,29 @@ cdef extern from "obiview.h" nogil:
OBIDMS_column_p column_p,
index_t line_nb,
index_t element_idx)
# ARRAY
int obi_set_array_with_col_p_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const void* value,
uint8_t elt_size,
int32_t value_length)
const void* obi_get_array_with_col_p_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
int32_t* value_length_p)
int obi_set_array_with_col_name_in_view(Obiview_p view,
const char* column_name,
index_t line_nb,
const void* value,
uint8_t elt_size,
int32_t value_length)
const void* obi_get_array_with_col_name_in_view(Obiview_p view,
const char* column_name,
index_t line_nb,
int32_t* value_length_p)
......@@ -22,7 +22,7 @@ cdef class Column(OBIWrapper) :
cdef inline OBIDMS_column_p pointer(self)
@staticmethod
cdef type get_column_class(obitype_t obitype, bint multi_elts)
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)
@staticmethod
cdef type get_python_type(obitype_t obitype, bint multi_elts)
......@@ -44,6 +44,7 @@ cdef class Column_line:
cdef register_column_class(obitype_t obitype,
bint multi_elts,
bint tuples,
type obiclass,
type python)
......
#cython: language_level=3
from obitools3.dms.column import typed_column
__OBIDMS_COLUMN_CLASS__ = {}
......@@ -44,12 +42,12 @@ cdef class Column(OBIWrapper) :
@staticmethod
cdef type get_column_class(obitype_t obitype, bint multi_elts):
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples):
'''
Internal function returning the python class representing
a column for a given obitype.
'''
return __OBIDMS_COLUMN_CLASS__[(obitype, multi_elts)][0]
return __OBIDMS_COLUMN_CLASS__[(obitype, multi_elts, tuples)][0]
@staticmethod
......@@ -76,6 +74,7 @@ cdef class Column(OBIWrapper) :
obitype_t data_type,
index_t nb_elements_per_line=1,
list elements_names=None,
bint tuples=False,
object comments=b"",
object alias=b""):
# TODO indexer_name?
......@@ -115,6 +114,7 @@ cdef class Column(OBIWrapper) :
nb_lines = len(view),
nb_elements_per_line = nb_elements_per_line,
elements_names = elements_names_p,
tuples = tuples,
indexer_name = NULL,
associated_column_name = NULL,
associated_column_version = -1,
......@@ -150,7 +150,7 @@ cdef class Column(OBIWrapper) :
column_p = column_pp[0]
column_type = column_p.header.returned_data_type
column_class = Column.get_column_class(column_type, (column_p.header.nb_elements_per_line > 1))
column_class = Column.get_column_class(column_type, (column_p.header.nb_elements_per_line > 1), column_p.header.tuples)
column = OBIWrapper.new_wrapper(column_class, column_pp)
column._view = view
......@@ -184,6 +184,7 @@ cdef class Column(OBIWrapper) :
nb_lines = -1,
nb_elements_per_line = -1,
elements_names = NULL,
tuples = False,
indexer_name = NULL,
associated_column_name = NULL,
associated_column_version = -1,
......@@ -326,6 +327,13 @@ cdef class Column(OBIWrapper) :
raise OBIDeactivatedInstanceError()
return self.pointer().header.lines_used
# tuples property getter
@property
def tuples(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.tuples
# comments property getter
@property
def comments(self):
......@@ -436,6 +444,7 @@ cdef class Column_line :
cdef register_column_class(obitype_t obitype,
bint multi_elts,
bint tuples,
type obiclass,
type python_type):
'''
......@@ -446,7 +455,7 @@ cdef register_column_class(obitype_t obitype,
assert issubclass(obiclass, Column)
__OBIDMS_COLUMN_CLASS__[(obitype, multi_elts)] = (obiclass, python_type)
__OBIDMS_COLUMN_CLASS__[(obitype, multi_elts, tuples)] = (obiclass, python_type)
cdef register_all_column_classes() :
......
......@@ -20,7 +20,10 @@ cdef class Column_multi_elts_bool(Column_multi_elts) :
cpdef set_item(self, index_t line_nb, object elt_id, object value)
cdef class Column_tuples_bool(Column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
......
......@@ -13,14 +13,23 @@ from ...capi.obiview cimport obi_get_bool_with_elt_name_and_col_p_in_view, \
obi_get_bool_with_elt_idx_and_col_p_in_view, \
obi_set_bool_with_elt_name_and_col_p_in_view, \
obi_set_bool_with_elt_idx_and_col_p_in_view, \
obi_get_array_with_col_p_in_view, \
obi_set_array_with_col_p_in_view, \
Obiview_p
from ...capi.obidmscolumn cimport OBIDMS_column_p
from ...capi.obitypes cimport OBI_BOOL, OBIBool_NA, obibool_t
from ...capi.obitypes cimport OBI_BOOL, \
OBIBool_NA, \
OBITuple_NA, \
obibool_t
from cpython.bool cimport PyBool_FromLong
from libc.stdint cimport int32_t
from libc.stdlib cimport malloc, free
cdef class Column_bool(Column):
......@@ -29,10 +38,12 @@ cdef class Column_bool(Column):
object column_name,
index_t nb_elements_per_line=1,
object elements_names=None,
bint tuples=False,
object comments=b""):
return Column.new_column(view, column_name, OBI_BOOL,
nb_elements_per_line=nb_elements_per_line,
elements_names=elements_names,
tuples=tuples,
comments=comments)
cpdef object get_line(self, index_t line_nb):
......@@ -115,210 +126,67 @@ cdef class Column_multi_elts_bool(Column_multi_elts):
obi_errno_to_exception(obi_errno, line_nb=line_nb, elt_id=elt_id, error_message="Problem setting a value in a column")
def register_class() :
register_column_class(OBI_BOOL, False, Column_bool, bool)
register_column_class(OBI_BOOL, True, Column_multi_elts_bool, bool)
cdef class Column_tuples_bool(Column):
cpdef object get_line(self, index_t line_nb) :
global obi_errno
cdef obibool_t value
cdef bint value_in_result
cdef object result
cdef int32_t i
cdef obibool_t* array
cdef int32_t value_length
result = []
array = <obibool_t*>obi_get_array_with_col_p_in_view(self._view.pointer(), self.pointer(), line_nb, &value_length)
obi_errno_to_exception(obi_errno, line_nb=line_nb, elt_id=None, error_message="Problem getting a value from a column")
if array == OBITuple_NA :
return None
for i in range(value_length) :
value = array[i]
value_in_result = PyBool_FromLong(value)
result.append(value_in_result)
return tuple(result)
cpdef set_line(self, index_t line_nb, object value) :
global obi_errno
cdef obibool_t* array
cdef int32_t value_length
cdef int32_t i, j
cdef object e
value_length = 0
if value is not None:
for e in value:
if e is not None:
value_length+=1
if value is None or value_length == 0 :
array = <obibool_t*>OBITuple_NA
else:
array = <obibool_t*>malloc(value_length * sizeof(obibool_t))
if array == NULL:
raise Exception("Problem allocating memory for an array to store a tuple")
#raise RollbackException("Problem allocating memory for an array to store a tuple", self._view) # TODO can't import
j=0
for i in range(len(value)) :
if value[i] is not None:
array[j] = <obibool_t>(value[i])
j+=1
if obi_set_array_with_col_p_in_view(self._view.pointer(), self.pointer(), line_nb, <obibool_t*> array, sizeof(obibool_t)*8, value_length) < 0 :
obi_errno_to_exception(obi_errno, line_nb=line_nb, elt_id=None, error_message="Problem setting a value in a column")
if array != <obibool_t*>OBITuple_NA:
free(array)
def register_class() :
register_column_class(OBI_BOOL, False, False, Column_bool, bool)
register_column_class(OBI_BOOL, True, False, Column_multi_elts_bool, bool)
register_column_class(OBI_BOOL, False, True, Column_tuples_bool, bool)
# cdef class Column_line_bool(Column_line) :
#
# cdef update_pointer(self):
# """
# Checks if the obicolumn address changed since the last call and update
# if need the `_column_p` and `_data_view` data structure fields.
# """
# cdef OBIDMS_column_p* column_pp
# column_pp = <OBIDMS_column_p*>self._pointer
# cdef OBIDMS_column_p column_p = column_pp[0]
#
# if column_p != self._column_p:
# self._column_p = column_p
# self._data_view = (<obibool_t*> (column_p.data)) + \
# self._index * column_p.header.nb_elements_per_line
#
# @staticmethod
# cdef bool obibool_t2bool(obibool_t value):
# cdef bool result
#
# if value == OBIBool_NA :
# result = None
# else :
# result = PyBool_FromLong(value)
#
# return result
#
# @staticmethod
# cdef bool2obibool_t(bool value):
# cdef obibool_t result
#
# if value is None:
# result=OBIBool_NA
# else:
# result= <obibool_t> <int> value
#
# return result
#
#
# def __init__(self, Column column, index_t line_nb) :
# """
# Creates a new `OBIDMS_column_line_bool`
#
# @param column: an OBIDMS_column instance
# @param line_nb: the line in the column
# """
#
# Column_line.__init__(self, column, line_nb)
# self.update_pointer()
#
#
#
# cpdef bool get_bool_item_by_name(self, bytes element_name) :
# """
# Returns the value associated to the name `element_name` of the current line
#
# @param element_name: a `bytes` instance containing the name of the element
#
# @return: the `bool` value corresponding to the name
# """
# cdef char* cname = element_name
# cdef obibool_t value
# global obi_errno
#
# self.update_pointer()
#
# cdef OBIDMS_column_p* column_pp
# column_pp = <OBIDMS_column_p*>self._pointer
# cdef OBIDMS_column_p column_p = column_pp[0]
#
# value = obi_column_get_obibool_with_elt_name(column_p,
# self._index,
# cname)
#
# if obi_errno > 0 :
# obi_errno = 0
# raise KeyError("Cannot access to key %s" % bytes2str(element_name))
#
# return Column_line_bool.obibool_t2bool(value)
#
#
# cpdef bool get_bool_item_by_idx(self,index_t index):
# """
# Returns the value associated to the name `element_name` of the current line
#
# @param index: a `int` instance containing the index of the element
#
# @return: the `bool` value corresponding to the name
# """
# cdef obibool_t value # @DuplicatedSignature
# global obi_errno
#
# cdef OBIDMS_column_p* column_pp
# column_pp = <OBIDMS_column_p*>self._pointer
# cdef OBIDMS_column_p column_p = column_pp[0]
#
# self.update_pointer()
#
# value = obi_column_get_obibool_with_elt_idx(column_p,
# self._index,
# index)
#
# if obi_errno > 0 :
# obi_errno = 0
# raise IndexError("Cannot access to element %d" % index)
#
# return Column_line_bool.obibool_t2bool(value)
#
#
# def __getitem__(self, object element_name) :
# cdef bytes name
# cdef int cindex
# cdef obibool_t value
# cdef type typearg = type(element_name)
# cdef bool result
#
#
# if typearg == int: