Commit 312f50ff by Celine Mercier

Major update: Column aliases. Columns are now identified in the context

of a view by an alias that can be modified.
parent 3843485a
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -18,7 +18,6 @@ cdef class OBIDMS_column:
cdef index_t nb_elements_per_line
cdef list elements_names
cpdef update_pointer(self)
cpdef list get_elements_names(self)
cpdef str get_data_type(self)
cpdef index_t get_nb_lines_used(self)
......@@ -52,6 +51,7 @@ cdef class OBIView:
cpdef add_column(self,
str column_name,
obiversion_t version_number=*,
str alias=*,
str type=*,
index_t nb_lines=*,
index_t nb_elements_per_line=*,
......@@ -62,6 +62,8 @@ cdef class OBIView:
str comments=*,
bint create=*
)
cpdef change_column_alias(self, str current_alias, str new_alias)
cpdef update_column_pointers(self)
cpdef select_line(self, index_t line_nb)
cpdef select_lines(self, list line_selection)
cpdef save_and_close(self)
......
......@@ -55,7 +55,7 @@ from ._obidmscolumn_seq cimport OBIDMS_column_seq, \
from .capi.obiview cimport Obiview_p, \
Obiview_infos_p, \
Column_reference_p, \
Alias_column_pair_p, \
obi_new_view_nuc_seqs, \
obi_new_view, \
obi_new_view_cloned_from_name, \
......@@ -65,7 +65,7 @@ from .capi.obiview cimport Obiview_p, \
obi_open_view, \
obi_view_delete_column, \
obi_view_add_column, \
obi_view_get_column, \
obi_view_create_column_alias, \
obi_view_get_column, \
obi_view_get_pointer_on_column_in_view, \
obi_select_line, \
......@@ -124,9 +124,6 @@ cdef class OBIDMS_column :
for line_nb in range(lines_used):
yield self.get_line(line_nb)
cpdef update_pointer(self):
self.pointer = <OBIDMS_column_p*> obi_view_get_pointer_on_column_in_view(self.view.pointer, str2bytes(self.column_name))
cpdef list get_elements_names(self):
return self.elements_names
......@@ -297,16 +294,16 @@ cdef class OBIView :
for i in range(view.infos.column_count) :
column_p = <OBIDMS_column_p> (view.columns)[i]
header = (column_p).header
col_name = bytes2str(header.name)
col_name = bytes2str(view.infos.column_references[i].alias)
subclass = OBIDMS_column.get_subclass_type(column_p)
self.columns[col_name] = subclass(self, col_name)
def __repr__(self) :
cdef str s
s = str(self.name) + "\n" + str(self.comments) + "\n" + str(self.pointer.infos.line_count) + " lines\n"
for column_name in self.columns :
s = s + self.columns[column_name].__repr__() + '\n'
s = s + column_name + ": " + self.columns[column_name].__repr__() + '\n'
return s
......@@ -317,15 +314,15 @@ cdef class OBIView :
if obi_view_delete_column(self.pointer, str2bytes(column_name)) < 0 :
raise Exception("Problem deleting a column from a view")
# Update the dictionaries of column pointers and column objects, and update pointers in column objects (make function?):
# Update the dictionary of column objects:
(self.columns).pop(column_name)
for column_n in self.columns :
(self.columns[column_n]).update_pointer()
self.update_column_pointers()
cpdef add_column(self,
str column_name,
obiversion_t version_number=-1,
str alias='',
str type='',
index_t nb_lines=0,
index_t nb_elements_per_line=1,
......@@ -343,6 +340,11 @@ cdef class OBIView :
cdef OBIDMS_column_p column_p
column_name_b = str2bytes(column_name)
if alias == '' :
alias = column_name
alias_b = column_name_b
else :
alias_b = str2bytes(alias)
if nb_elements_per_line > 1 :
elements_names_b = str2bytes(';'.join(elements_names))
......@@ -366,8 +368,8 @@ cdef class OBIView :
data_type = OBI_SEQ
else :
raise Exception("Invalid provided data type")
if (obi_view_add_column(self.pointer, column_name_b, version_number, # TODO should return pointer on column?
if (obi_view_add_column(self.pointer, column_name_b, version_number, alias_b, # TODO should return pointer on column?
data_type, nb_lines, nb_elements_per_line,
elements_names_b, str2bytes(indexer_name),
str2bytes(associated_column_name), associated_column_version,
......@@ -375,12 +377,28 @@ cdef class OBIView :
raise Exception("Problem adding a column in a view")
# Get the column pointer
column_p = obi_view_get_column(self.pointer, column_name_b)
column_p = obi_view_get_column(self.pointer, alias_b)
# Open and store the subclass
subclass = OBIDMS_column.get_subclass_type(column_p)
(self.columns)[column_name] = subclass(self, column_name)
(self.columns)[alias] = subclass(self, alias)
cpdef change_column_alias(self, str current_alias, str new_alias):
if (obi_view_create_column_alias(self.pointer, str2bytes(current_alias), str2bytes(new_alias)) < 0) :
raise Exception("Problem changing a column alias")
# Update the dictionaries of column column objects
self.columns[new_alias] = self.columns[current_alias]
(self.columns).pop(current_alias)
cpdef update_column_pointers(self):
cdef str column_n
cdef OBIDMS_column column
for column_n in self.columns :
column = self.columns[column_n]
column.pointer = <OBIDMS_column_p*> obi_view_get_pointer_on_column_in_view(self.pointer, str2bytes(column_n))
cpdef save_and_close(self) :
if (obi_save_and_close_view(self.pointer) < 0) :
......@@ -488,7 +506,7 @@ cdef class OBIView_NUC_SEQS(OBIView):
for i in range(view.infos.column_count) :
column_p = <OBIDMS_column_p> (view.columns)[i]
header = (column_p).header
col_name = bytes2str(header.name)
col_name = bytes2str(view.infos.column_references[i].alias)
subclass = OBIDMS_column.get_subclass_type(column_p)
self.columns[col_name] = subclass(self, col_name)
......@@ -548,7 +566,7 @@ cdef class OBIView_line :
(((self.view).columns)[column_name]).set_line(self.index, value)
def __contains__(self, str column_name):
return (column_name in self.view)
return (column_name in self.view.columns)
def __repr__(self):
cdef dict line
......@@ -618,7 +636,7 @@ cdef class OBIDMS :
cdef Obiview_infos_p view_infos_p
cdef dict view_infos_d
cdef Column_reference_p column_refs
cdef Alias_column_pair_p column_refs
cdef int i, j
cdef str column_name
......@@ -638,11 +656,12 @@ cdef class OBIDMS :
view_infos_d["line_selection"]["column_name"] = bytes2str((view_infos_p.line_selection).column_name)
view_infos_d["line_selection"]["version"] = <int> (view_infos_p.line_selection).version
view_infos_d["column_references"] = {}
column_refs = view_infos_p.column_references
column_references = view_infos_p.column_references
for j in range(view_infos_d["column_count"]) :
column_name = bytes2str((column_refs[j]).column_name)
column_name = bytes2str((column_references[j]).alias)
view_infos_d["column_references"][column_name] = {}
view_infos_d["column_references"][column_name]["version"] = column_refs[j].version
view_infos_d["column_references"][column_name]["original_name"] = bytes2str((column_references[j]).column_refs.column_name)
view_infos_d["column_references"][column_name]["version"] = (column_references[j]).column_refs.version
obi_view_unmap_file(self.pointer, view_infos_p)
......
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -8,6 +8,8 @@
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/hashtable.h
../../../src/hashtable.c
../../../src/murmurhash2.h
../../../src/murmurhash2.c
../../../src/obi_align.h
......
......@@ -26,6 +26,13 @@ cdef extern from "obiview.h" nogil:
extern const_char_p QUALITY_COLUMN
struct Alias_column_pair_t :
Column_reference_t column_refs
const_char_p alias
ctypedef Alias_column_pair_t* Alias_column_pair_p
struct Obiview_infos_t :
time_t creation_date
const_char_p name
......@@ -35,7 +42,7 @@ cdef extern from "obiview.h" nogil:
Column_reference_t line_selection
index_t line_count
int column_count
Column_reference_p column_references
Alias_column_pair_p column_references
const_char_p comments
ctypedef Obiview_infos_t* Obiview_infos_p
......@@ -48,7 +55,9 @@ cdef extern from "obiview.h" nogil:
OBIDMS_column_p line_selection
OBIDMS_column_p new_line_selection
OBIDMS_column_p columns
int nb_predicates
# TODO declarations for column dictionary and predicate function array?
ctypedef Obiview_t* Obiview_p
......@@ -69,6 +78,7 @@ cdef extern from "obiview.h" nogil:
int obi_view_add_column(Obiview_p view,
const_char_p column_name,
obiversion_t version_number,
const_char_p alias,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
......@@ -89,6 +99,8 @@ cdef extern from "obiview.h" nogil:
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name)
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
int obi_save_view(Obiview_p view)
int obi_close_view(Obiview_p view)
......
/****************************************************************************
* Hash table source file *
****************************************************************************/
/**
* @file hashtable.c
* @author Celine Mercier
* @date July 26th 2016
* @brief Source file for hash table functions.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include "string.h"
#include "murmurhash2.h"
#include "hashtable.h"
// Create a new hashtable
hashtable_p ht_create(size_t size)
{
hashtable_p hashtable = NULL;
size_t i;
// Allocate the table
hashtable = malloc(sizeof(hashtable_t));
if (hashtable == NULL)
return NULL;
// Allocate the head nodes
hashtable->table = malloc(size * sizeof(entry_p));
if (hashtable->table == NULL)
return NULL;
// Initialize the head nodes
for (i=0; i<size; i++)
hashtable->table[i] = NULL;
hashtable->size = size;
return hashtable;
}
// Create an entry
entry_p ht_new_entry(const char* key, void* value)
{
entry_p new_entry;
new_entry = malloc(sizeof(entry_t));
if (new_entry == NULL)
return NULL;
new_entry->key = strdup(key);
if (new_entry->key == NULL)
return NULL;
new_entry->value = value;
new_entry->next = NULL;
return new_entry;
}
// Delete an entry
int ht_delete_entry(hashtable_p hashtable, const char* key)
{
entry_p last = NULL;
entry_p entry = NULL;
size_t bin = 0;
bin = murmurhash2(key, strlen(key), SEED);
bin = bin % hashtable->size;
// Step through the bin looking for the value
entry = hashtable->table[bin];
while ((entry != NULL) && (strcmp(key, entry->key ) != 0))
{
last = entry;
entry = entry->next;
}
if (entry == NULL) // key not found
return -1;
// Link the entries before and after the entry
if (last != NULL) // If not head node
last->next = entry->next;
else // If head node
hashtable->table[bin] = entry->next;
// Free the entry
free(entry->key);
free(entry->value);
free(entry);
return 0;
}
// Set a new entry in the hash table. If the key is already in the table, the value is replaced by the new one
int ht_set(hashtable_p hashtable, const char* key, void* value)
{
size_t bin = 0;
entry_p new_entry = NULL;
entry_p next = NULL;
entry_p last = NULL;
if ((key == NULL) || (value == NULL))
return -1;
bin = murmurhash2(key, strlen(key), SEED);
bin = bin % hashtable->size;
next = hashtable->table[bin];
while ((next != NULL) && (strcmp(key, next->key) != 0))
{
last = next;
next = next->next;
}
// If the key is already in the table, the value is replaced
if ((next != NULL) && (strcmp(key, next->key) == 0))
new_entry->value = value;
// Else, create the new entry and link it at the end of the list
else
{
// Create the new entry
new_entry = ht_new_entry(key, value);
if (new_entry == NULL)
return -1;
// If it is the first entry of that bin, we're at the head node of the list, and we replace it with the new entry
if (last == NULL)
hashtable->table[bin] = new_entry;
// Else link the new entry at the end of the list
else
last->next = new_entry;
}
return 0;
}
// Retrieve a value from a hash table
void* ht_get(hashtable_p hashtable, const char* key)
{
size_t bin = 0;
entry_p entry;
bin = murmurhash2(key, strlen(key), SEED);
bin = bin % hashtable->size;
// Step through the bin looking for the value
entry = hashtable->table[bin];
while ((entry != NULL) && (strcmp(key, entry->key ) != 0))
entry = entry->next;
if (entry == NULL)
return NULL;
else
return entry->value;
}
// Free the hash table
void ht_free(hashtable_p hashtable)
{
size_t i;
entry_p entry;
entry_p next;
for (i=0; i < hashtable->size; i++)
{
next = hashtable->table[i];
while (next != NULL)
{
entry = next;
free(entry->key);
next = entry->next;
free(entry);
}
}
free(hashtable->table);
free(hashtable);
}
/****************************************************************************
* Hash table header file *
****************************************************************************/
/**
* @file hashtable.h
* @author Celine Mercier
* @date July 26th 2016
* @brief Header file for hash table functions.
*/
#ifndef HASHTABLE_H_
#define HASHTABLE_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#define SEED (0x9747b28c) /**< The seed used by the hash function.
*/
/**
* @brief Structure for an entry.
*/
typedef struct entry_s {
char* key; /**< Key used to refer to the entry.
*/
void* value; /**< Pointer on the value to be stored.
*/
struct entry_s* next; /**< Pointer on the next entry in the bin.
*/
} entry_t, *entry_p;
/**
* @brief Structure for a hash table.
*/
typedef struct hashtable {
size_t size; /**< Number of bins in the table.
*/
entry_p* table; /**< Table of bins.
*/
} hashtable_t, *hashtable_p;
/**
* @brief Creates a new hashtable.
*
* @param size The number of bins in the hash table.
*
* @returns A pointer to the newly created hash table.
* @retval NULL if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
hashtable_p ht_create(size_t size);
/**
* @brief Inserts a new entry in the hash table.
* If the key is already in the table, the value is replaced by the new one.
*
* @param hashtable A pointer on the hash table structure.
* @param key The key.
* @param value A pointer on the value associated with the key.
*
* @retval 0 if the entry was correctly set.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int ht_set(hashtable_p hashtable, const char* key, void* value);
/**
* @brief Retrieves a value from a hash table.
*
* @param hashtable A pointer on the hash table structure.
* @param key The key.
*
* @returns A pointer on the value associated with the key.
* @retval NULL if the key was not found.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void* ht_get(hashtable_p hashtable, const char* key);
/**
* @brief Deletes an entry.
*
* @param hashtable A pointer on the hash table structure.
* @param key The key.
*
* @retval 0 if the entry was correctly deleted.
* @retval -1 if an error occurred.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int ht_delete_entry(hashtable_p hashtable, const char* key);
/**
* @brief Frees a hash table.
*
* @param hashtable A pointer on the hash table structure.
*
* @since July 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void ht_free(hashtable_p hashtable);
#endif /* HASHTABLE_H_ */
......@@ -25,6 +25,7 @@
#include "obidms.h"
#include "obidmscolumn.h"
#include "obierrno.h"
#include "hashtable.h"
#define OBIVIEW_NAME_MAX_LENGTH (1000) /**< The maximum length of an OBIDMS view name.
......@@ -55,6 +56,18 @@
/**
* @brief Structure for column aliases.
* Column aliases are alternative names used to identify a column in the context of a view.
*/
typedef struct Alias_column_pair {
Column_reference_t column_refs; /**< References (name and version) of the column.
*/
char alias[OBIDMS_COLUMN_MAX_NAME+1]; /**< Alias of the column in the context of a view.
*/
} Alias_column_pair_t, *Alias_column_pair_p;
/**
* @brief Structure for a closed view stored in the view file.
* Views are identified by their name.
* Once a view has been written in the view file, it can not be modified and can only be read.
......@@ -77,8 +90,8 @@ typedef struct Obiview_infos {
*/
int column_count; /**< The number of columns in the view.
*/
Column_reference_t column_references[MAX_NB_OPENED_COLUMNS]; /**< References (name and version) for all the columns in the view.
*/
Alias_column_pair_t column_references[MAX_NB_OPENED_COLUMNS]; /**< References (name, version and alias) for all the columns in the view.
*/
char comments[OBIVIEW_COMMENTS_MAX_LENGTH+1]; /**< Comments, additional informations on the view.
*/
} Obiview_infos_t, *Obiview_infos_p;
......@@ -88,27 +101,30 @@ typedef struct Obiview_infos {
* @brief Structure for an opened view.
*/
typedef struct Obiview {
Obiview_infos_p infos; /**< A pointer on the mapped view informations.
*/
OBIDMS_p dms; /**< A pointer on the DMS to which the view belongs.
*/
bool read_only; /**< Whether the view is read-only or can be modified.
Obiview_infos_p infos; /**< A pointer on the mapped view informations.
*/
OBIDMS_p dms; /**< A pointer on the DMS to which the view belongs.
*/
bool read_only; /**< Whether the view is read-only or can be modified.
*/
OBIDMS_column_p line_selection; /**< A pointer on the column containing the line selection
OBIDMS_column_p line_selection; /**< A pointer on the column containing the line selection
* associated with the view if there is one.
* This line selection is read-only, and when a line from the view is read,
* it is this line selection that is used.
*/
OBIDMS_column_p new_line_selection; /**< A pointer on the column containing the new line selection being built
OBIDMS_column_p new_line_selection; /**< A pointer on the column containing the new line selection being built
* to associate with the view, if there is one.
* When a line is selected with obi_select_line() or obi_select_lines(),
* it is recorded in this line selection.
*/
OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS]; /**< Array of pointers on all the columns of the view.
OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS]; /**< Array of pointers on all the columns of the view.
*/
int nb_predicates; /**< Number of predicates to test when closing the view.
hashtable_p column_dict; /**< Hash table storing the pairs of column names or aliases with the associated
* column pointers.
*/
int nb_predicates; /**< Number of predicates to test when closing the view.
*/
char* (**predicate_functions)(struct Obiview* view); /**< Array of pointers on all predicate functions to test when closing the view.
char* (**predicate_functions)(struct Obiview* view); /**< Array of pointers on all predicate functions to test when closing the view.
*/
} Obiview_t, *Obiview_p;
......@@ -275,6 +291,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
* @param view A pointer on the view.
* @param column_name The name of the column.
* @param version_number The version of the column if it should be opened and not created (if -1, the latest version is retrieved).
* @param alias The unique name used to identify the column in the context of this view.
* @param data_type The OBIType code of the data.
* @param nb_lines The number of lines to be stored.
* @param nb_elements_per_line The number of elements per line.
......@@ -293,6 +310,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
int obi_view_add_column(Obiview_p view,
const char* column_name,
obiversion_t version_number,
const char* alias,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
......@@ -355,6 +373,27 @@ OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const ch
/**
* @brief Changes the name that identifies a column in the context of a view.
*
* In the context of a view, each column is identified by a name that is unique in this view.
*
* @warning The view must be writable.
*
* @param view A pointer on the view.
* @param current_name The current name that identifies the column in this view.
* @param alias The new name that should be used to identify the column in this view.
*