Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OBITools3
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
23
Issues
23
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools3
Commits
be05c889
Commit
be05c889
authored
Apr 12, 2016
by
Celine Mercier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
DNA_seq_indexer API
parent
04e3a7b5
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
200 additions
and
97 deletions
+200
-97
python/obitools3/obidms/_obidms.cfiles
python/obitools3/obidms/_obidms.cfiles
+2
-0
python/obitools3/obidms/_obidmscolumn_bool.cfiles
python/obitools3/obidms/_obidmscolumn_bool.cfiles
+2
-0
python/obitools3/obidms/_obidmscolumn_char.cfiles
python/obitools3/obidms/_obidmscolumn_char.cfiles
+2
-0
python/obitools3/obidms/_obidmscolumn_float.cfiles
python/obitools3/obidms/_obidmscolumn_float.cfiles
+2
-0
python/obitools3/obidms/_obidmscolumn_int.cfiles
python/obitools3/obidms/_obidmscolumn_int.cfiles
+2
-0
python/obitools3/obidms/_obidmscolumn_seq.cfiles
python/obitools3/obidms/_obidmscolumn_seq.cfiles
+2
-0
python/obitools3/obidms/_obidmscolumn_str.cfiles
python/obitools3/obidms/_obidmscolumn_str.cfiles
+2
-0
python/obitools3/obidms/_obiseq.cfiles
python/obitools3/obidms/_obiseq.cfiles
+2
-0
python/obitools3/obidms/_obitaxo.cfiles
python/obitools3/obidms/_obitaxo.cfiles
+2
-0
src/dna_seq_indexer.c
src/dna_seq_indexer.c
+106
-0
src/dna_seq_indexer.h
src/dna_seq_indexer.h
+63
-0
src/encode.c
src/encode.c
+2
-2
src/encode.h
src/encode.h
+2
-2
src/obiblob.c
src/obiblob.c
+0
-47
src/obiblob.h
src/obiblob.h
+5
-30
src/obidmscolumn_seq.c
src/obidmscolumn_seq.c
+4
-16
No files found.
python/obitools3/obidms/_obidms.cfiles
View file @
be05c889
...
...
@@ -2,6 +2,8 @@
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
...
...
python/obitools3/obidms/_obidmscolumn_bool.cfiles
View file @
be05c889
...
...
@@ -4,6 +4,8 @@
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
...
...
python/obitools3/obidms/_obidmscolumn_char.cfiles
View file @
be05c889
...
...
@@ -4,6 +4,8 @@
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
...
...
python/obitools3/obidms/_obidmscolumn_float.cfiles
View file @
be05c889
...
...
@@ -4,6 +4,8 @@
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
...
...
python/obitools3/obidms/_obidmscolumn_int.cfiles
View file @
be05c889
...
...
@@ -4,6 +4,8 @@
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
...
...
python/obitools3/obidms/_obidmscolumn_seq.cfiles
View file @
be05c889
...
...
@@ -4,6 +4,8 @@
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
...
...
python/obitools3/obidms/_obidmscolumn_str.cfiles
View file @
be05c889
...
...
@@ -4,6 +4,8 @@
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
...
...
python/obitools3/obidms/_obiseq.cfiles
View file @
be05c889
...
...
@@ -2,6 +2,8 @@
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
...
...
python/obitools3/obidms/_obitaxo.cfiles
View file @
be05c889
...
...
@@ -2,6 +2,8 @@
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
...
...
src/dna_seq_indexer.c
0 → 100644
View file @
be05c889
/****************************************************************************
* DNA sequence indexing functions *
****************************************************************************/
/**
* @file dna_seq_indexer.c
* @author Celine Mercier
* @date April 12th 2016
* @brief Functions handling the indexing and retrieval of DNA sequences.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "obiblob.h"
#include "obiblob_indexer.h"
#include "obidms.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
Obi_blob_p
obi_seq_to_blob
(
const
char
*
seq
)
{
Obi_blob_p
value_b
;
int32_t
length_encoded_seq
;
// length of the encoded sequence in bytes
int32_t
seq_length
;
byte_t
*
encoded_seq
;
seq_length
=
strlen
(
seq
);
// Check if just ATGC and encode accordingly
if
(
only_ATGC
(
seq
))
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq
=
ceil
((
double
)
seq_length
/
(
double
)
4
.
0
);
// Encode
encoded_seq
=
encode_seq_on_2_bits
(
seq
,
seq_length
);
if
(
encoded_seq
==
NULL
)
return
NULL
;
value_b
=
obi_blob
(
encoded_seq
,
ELEMENT_SIZE_SEQ_2
,
length_encoded_seq
,
seq_length
);
}
else
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq
=
ceil
((
double
)
seq_length
/
(
double
)
2
.
0
);
// Encode
encoded_seq
=
encode_seq_on_4_bits
(
seq
,
seq_length
);
if
(
encoded_seq
==
NULL
)
return
NULL
;
value_b
=
obi_blob
(
encoded_seq
,
ELEMENT_SIZE_SEQ_4
,
length_encoded_seq
,
seq_length
);
}
free
(
encoded_seq
);
return
value_b
;
}
char
*
obi_blob_to_seq
(
Obi_blob_p
value_b
)
{
// Decode
if
(
value_b
->
element_size
==
2
)
return
decode_seq_on_2_bits
(
value_b
->
value
,
value_b
->
length_decoded_value
);
else
return
decode_seq_on_4_bits
(
value_b
->
value
,
value_b
->
length_decoded_value
);
}
index_t
obi_index_dna_seq
(
Obi_indexer_p
indexer
,
const
char
*
value
)
{
Obi_blob_p
value_b
;
index_t
idx
;
// Encode value
value_b
=
obi_seq_to_blob
(
value
);
if
(
value_b
==
NULL
)
return
-
1
;
// Add in the indexer
idx
=
obi_indexer_add
(
indexer
,
value_b
);
free
(
value_b
);
return
idx
;
}
char
*
obi_retrieve_dna_seq
(
Obi_indexer_p
indexer
,
index_t
idx
)
{
Obi_blob_p
value_b
;
char
*
seq
;
// Get encoded value
value_b
=
obi_indexer_get
(
indexer
,
idx
);
// Decode sequence
seq
=
obi_blob_to_seq
(
value_b
);
free
(
value_b
);
return
seq
;
}
src/dna_seq_indexer.h
0 → 100644
View file @
be05c889
/****************************************************************************
* DNA sequence indexer header file *
****************************************************************************/
/**
* @file dna_seq_indexer.h
* @author Celine Mercier
* @date April 12th 2016
* @brief Header file for the functions handling the indexing of DNA sequences.
*/
#ifndef DNA_SEQ_INDEXER_H_
#define DNA_SEQ_INDEXER_H_
#include <stdlib.h>
#include <stdio.h>
#include "obidms.h"
#include "obitypes.h"
#include "obiblob.h"
#include "obiblob_indexer.h"
/**
* @brief Converts a DNA sequence to a blob.
*
* @warning The blob must be freed by the caller.
*
* @param value The DNA sequence to convert.
*
* @returns A pointer to the blob created.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p
obi_seq_to_blob
(
const
char
*
seq
);
/**
* @brief Converts a blob to a DNA sequence.
*
* @param value_b The blob to convert.
*
* @returns A pointer to the DNA sequence contained in the blob.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char
*
obi_blob_to_seq
(
Obi_blob_p
value_b
);
// TODO doc
index_t
obi_index_dna_seq
(
Obi_indexer_p
indexer
,
const
char
*
value
);
char
*
obi_retrieve_dna_seq
(
Obi_indexer_p
indexer
,
index_t
idx
);
#endif
/* DNA_SEQ_INDEXER_H_ */
src/encode.c
View file @
be05c889
...
...
@@ -54,7 +54,7 @@ bool only_ATGC(const char* seq)
}
byte_t
*
encode_seq_on_2_bits
(
char
*
seq
,
int32_t
length
)
byte_t
*
encode_seq_on_2_bits
(
c
onst
c
har
*
seq
,
int32_t
length
)
{
byte_t
*
seq_b
;
uint8_t
modulo
;
...
...
@@ -163,7 +163,7 @@ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq)
}
byte_t
*
encode_seq_on_4_bits
(
char
*
seq
,
int32_t
length
)
byte_t
*
encode_seq_on_4_bits
(
c
onst
c
har
*
seq
,
int32_t
length
)
{
byte_t
*
seq_b
;
uint8_t
modulo
;
...
...
src/encode.h
View file @
be05c889
...
...
@@ -96,7 +96,7 @@ bool only_ATGC(const char* seq);
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
byte_t
*
encode_seq_on_2_bits
(
char
*
seq
,
int32_t
length
);
byte_t
*
encode_seq_on_2_bits
(
c
onst
c
har
*
seq
,
int32_t
length
);
/**
...
...
@@ -147,7 +147,7 @@ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
byte_t
*
encode_seq_on_4_bits
(
char
*
seq
,
int32_t
length
);
byte_t
*
encode_seq_on_4_bits
(
c
onst
c
har
*
seq
,
int32_t
length
);
/**
...
...
src/obiblob.c
View file @
be05c889
...
...
@@ -82,52 +82,5 @@ const char* obi_blob_to_str(Obi_blob_p value_b)
}
Obi_blob_p
obi_seq_to_blob
(
char
*
seq
)
{
Obi_blob_p
value_b
;
int32_t
length_encoded_seq
;
// length of the encoded sequence in bytes
int32_t
seq_length
;
byte_t
*
encoded_seq
;
seq_length
=
strlen
(
seq
);
// Check if just ATGC and encode accordingly
if
(
only_ATGC
(
seq
))
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq
=
ceil
((
double
)
seq_length
/
(
double
)
4
.
0
);
// Encode
encoded_seq
=
encode_seq_on_2_bits
(
seq
,
seq_length
);
if
(
encoded_seq
==
NULL
)
return
NULL
;
value_b
=
obi_blob
(
encoded_seq
,
ELEMENT_SIZE_SEQ_2
,
length_encoded_seq
,
seq_length
);
}
else
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq
=
ceil
((
double
)
seq_length
/
(
double
)
2
.
0
);
// Encode
encoded_seq
=
encode_seq_on_4_bits
(
seq
,
seq_length
);
if
(
encoded_seq
==
NULL
)
return
NULL
;
value_b
=
obi_blob
(
encoded_seq
,
ELEMENT_SIZE_SEQ_4
,
length_encoded_seq
,
seq_length
);
}
free
(
encoded_seq
);
return
value_b
;
}
const
char
*
obi_blob_to_seq
(
Obi_blob_p
value_b
)
{
// Decode
if
(
value_b
->
element_size
==
2
)
return
decode_seq_on_2_bits
(
value_b
->
value
,
value_b
->
length_decoded_value
);
else
return
decode_seq_on_4_bits
(
value_b
->
value
,
value_b
->
length_decoded_value
);
}
// TODO same for int
src/obiblob.h
View file @
be05c889
...
...
@@ -45,6 +45,11 @@ typedef struct Obi_blob {
}
Obi_blob_t
,
*
Obi_blob_p
;
// TODO doc
Obi_blob_p
obi_blob
(
byte_t
*
encoded_value
,
uint8_t
element_size
,
int32_t
length_encoded_value
,
int32_t
length_decoded_value
);
/**
* @brief Converts a character string to a blob.
*
...
...
@@ -74,35 +79,5 @@ Obi_blob_p obi_str_to_blob(char* value);
const
char
*
obi_blob_to_str
(
Obi_blob_p
value_b
);
/**
* @brief Converts a DNA sequence to a blob with a header.
*
* @warning The blob must be freed by the caller.
*
* @param value The DNA sequence to convert.
*
* @returns A pointer to the blob created.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p
obi_seq_to_blob
(
char
*
seq
);
/**
* @brief Converts a blob to a DNA sequence.
*
* @param value_b The blob to convert.
*
* @returns A pointer to the DNA sequence contained in the blob.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const
char
*
obi_blob_to_seq
(
Obi_blob_p
value_b
);
// TODO move to encode source files
#endif
/* OBIBLOB_H_ */
src/obidmscolumn_seq.c
View file @
be05c889
...
...
@@ -18,7 +18,7 @@
#include "obitypes.h"
#include "obierrno.h"
#include "obidebug.h"
#include "
obiblob
_indexer.h"
#include "
dna_seq
_indexer.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
...
...
@@ -33,7 +33,6 @@
int
obi_column_set_obiseq_with_elt_idx
(
OBIDMS_column_p
column
,
index_t
line_nb
,
index_t
element_idx
,
char
*
value
)
{
Obi_blob_p
value_b
;
index_t
idx
;
// Check that the line number is not greater than the maximum allowed
...
...
@@ -56,21 +55,13 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
if
((
line_nb
+
1
)
>
(
column
->
header
)
->
lines_used
)
(
column
->
header
)
->
lines_used
=
line_nb
+
1
;
// Encode the value on a byte array with a header // TODO make function
value_b
=
obi_seq_to_blob
(
value
);
if
(
value_b
==
NULL
)
return
-
1
;
// Add in the indexer
idx
=
obi_indexer_add
(
column
->
indexer
,
value_b
);
idx
=
obi_index_dna_seq
(
column
->
indexer
,
value
);
if
(
idx
==
-
1
)
return
-
1
;
// Add the value's index in the column
*
(((
index_t
*
)
(
column
->
data
))
+
(
line_nb
*
((
column
->
header
)
->
nb_elements_per_line
))
+
element_idx
)
=
idx
;
free
(
value_b
);
return
0
;
}
...
...
@@ -114,8 +105,7 @@ int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p c
const
char
*
obi_column_get_obiseq_with_elt_idx
(
OBIDMS_column_p
column
,
index_t
line_nb
,
index_t
element_idx
)
{
index_t
idx
;
Obi_blob_p
value_b
;
index_t
idx
;
if
((
line_nb
+
1
)
>
((
column
->
header
)
->
line_count
))
{
...
...
@@ -130,9 +120,7 @@ const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t l
if
(
idx
==
OBIIdx_NA
)
return
OBISeq_NA
;
value_b
=
obi_indexer_get
(
column
->
indexer
,
idx
);
return
obi_blob_to_seq
(
value_b
);
return
obi_retrieve_dna_seq
(
column
->
indexer
,
idx
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment