Commit 69856f18 by Celine Mercier

untested (and no possible retrieval) of CRC used to represent data in

AVL trees
parent 58ac860c
......@@ -25,4 +25,6 @@
../../../src/bloom.c
../../../src/bloom.h
../../../src/MurmurHash2.c
../../../src/murmurhash2.h
\ No newline at end of file
../../../src/murmurhash2.h
../../../src/crc64.c
../../../src/crc64.h
This diff is collapsed. Click to expand it.
/**
* @file crc64.h
* @date March 24th 2016
* @brief Header file for CRC64 function.
*/
#include <stdint.h>
uint64_t crc64(const unsigned char* s, uint64_t l);
......@@ -19,9 +19,8 @@
#include <fcntl.h>
#include <math.h>
//#include <libbloom.h>
#include "bloom.h"
#include "crc64.h"
#include "obiavl.h"
#include "obierrno.h"
#include "obitypes.h"
......@@ -692,6 +691,7 @@ AVL_node_p avl_create_node(OBIDMS_avl_p avl, index_t node_idx)
node->right_child = -1;
node->balance_factor = 0;
node->value = -1;
node->crc64 = 0; // TODO
return node;
}
......@@ -1018,7 +1018,7 @@ int remap_an_avl(OBIDMS_avl_p avl)
(avl->data)->data = mmap(NULL,
((avl->data)->header)->data_size_max,
PROT_READ,
MAP_SHARED,
MAP_SHARED, // TODO test MAP_PRIVATE?
avl->data_fd,
((avl->data)->header)->header_size);
if ((avl->data)->data == NULL)
......@@ -1027,7 +1027,7 @@ int remap_an_avl(OBIDMS_avl_p avl)
avl->tree = mmap(NULL,
((avl->header)->nb_items_max) * sizeof(AVL_node_t),
PROT_READ,
MAP_SHARED,
MAP_SHARED, // TODO test MAP_PRIVATE?
avl->avl_fd,
(avl->header)->header_size);
if (avl->tree == NULL)
......@@ -1054,8 +1054,6 @@ int obi_add_new_avl_in_group(OBIDMS_avl_group_p avl_group) // TODO check for err
return -1;
}
//fprintf(stderr, "\nindex length = %d, file name = %s\n", avl_idx_length, avl_name_with_idx);
(avl_group->sub_avls)[avl_group->current_avl_idx] = obi_create_avl(avl_group->dms, avl_name_with_idx);
if ((avl_group->sub_avls)[avl_group->current_avl_idx] == NULL)
{
......@@ -1578,7 +1576,8 @@ index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value) // TODO
if ((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->nb_items == NODE_COUNT_PER_AVL) // TODO add condition with data size
obi_add_new_avl_in_group(avl_group);
bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1))));
bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)));
return obi_avl_add((avl_group->sub_avls)[avl_group->current_avl_idx], value);
}
......@@ -1596,6 +1595,10 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
int n = 0;
int depth = 0;
uint64_t crc;
crc = crc64(value, BYTE_ARRAY_HEADER_SIZE + ((uint64_t) (*((int32_t*)(value+1))))); // TODO warning
// Check if first node
if (!((avl->header)->nb_items))
{
......@@ -1604,6 +1607,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
// Add the value in the data array and store its index
value_data_idx = avl_add_value_in_data_array(avl, value);
node_to_add->value = value_data_idx;
node_to_add->crc64 = crc;
// Update the number of items
((avl->header)->nb_items)++;
......@@ -1635,8 +1639,15 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
parent = next;
// Compare value with value of current node
to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value);
//to_compare = obi_avl_get(avl, current_node->value);
//comp = byte_array_compare(to_compare, value);
comp = (current_node->crc64) - crc;
if (comp == 0)
{ // check if really same value
to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value);
}
if (comp > 0)
// Go to left child
......@@ -1647,7 +1658,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
else if (comp == 0)
// Value already stored
{
//fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
return current_node->value; // TODO should trigger error if using bloom filters
}
......@@ -1676,6 +1687,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
// Add the value in the data array and store its index
value_data_idx = avl_add_value_in_data_array(avl, value);
node_to_add->value = value_data_idx;
node_to_add->crc64 = crc;
// Update the number of items
((avl->header)->nb_items)++;
......@@ -1706,7 +1718,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
}
// Find if a value is already in an AVL tree TODO use bloom
// Find if a value is already in an AVL tree
index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
{
int comp;
......@@ -1714,14 +1726,25 @@ index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
byte_t* to_compare;
AVL_node_p current_node;
uint64_t crc;
crc = crc64(value, BYTE_ARRAY_HEADER_SIZE + ((uint64_t) (*((int32_t*)(value+1))))); // TODO warning
next = (avl->header)->root_idx;
while (next != -1)
{
current_node = (avl->tree)+next;
// Compare value with value of current node
to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value);
//to_compare = obi_avl_get(avl, current_node->value);
//comp = byte_array_compare(to_compare, value);
comp = (current_node->crc64) - crc;
if (comp == 0)
{ // check if really same value
to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value);
}
if (comp > 0)
// Go to left child
......@@ -1730,8 +1753,10 @@ index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
// Go to right child
next = current_node->right_child;
else if (comp == 0)
// Value found
{ // Value found
fprintf(stderr, "\n>>>ALREADY IN in find, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
return current_node->value;
}
}
// Value not found
return -1;
......
......@@ -24,10 +24,10 @@
#include "obidms.h"
#include "obitypes.h"
#include "bloom.h"
#define NODE_COUNT_PER_AVL (2000000)
#define NODE_COUNT_PER_AVL (10000000)
#define BLOOM_FILTER_ERROR_RATE (0.001)
......@@ -59,6 +59,7 @@ typedef struct AVL_node {
*/
index_t value; /**< Index of the value associated with the node in the data array.
*/
uint64_t crc64; // TODO
} AVL_node_t, *AVL_node_p;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment