Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OBITools3
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
21
Issues
21
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools3
Commits
019dfc01
Commit
019dfc01
authored
Apr 08, 2016
by
Celine Mercier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Branch to refactor and debug (AVLs bugged)
parent
edc4fd7b
Changes
32
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
32 changed files
with
1511 additions
and
770 deletions
+1511
-770
python/obitools3/obidms/_obidms.cfiles
python/obitools3/obidms/_obidms.cfiles
+2
-2
python/obitools3/obidms/_obidmscolumn_bool.cfiles
python/obitools3/obidms/_obidmscolumn_bool.cfiles
+2
-2
python/obitools3/obidms/_obidmscolumn_char.cfiles
python/obitools3/obidms/_obidmscolumn_char.cfiles
+2
-2
python/obitools3/obidms/_obidmscolumn_float.cfiles
python/obitools3/obidms/_obidmscolumn_float.cfiles
+2
-2
python/obitools3/obidms/_obidmscolumn_int.cfiles
python/obitools3/obidms/_obidmscolumn_int.cfiles
+2
-2
python/obitools3/obidms/_obidmscolumn_seq.cfiles
python/obitools3/obidms/_obidmscolumn_seq.cfiles
+2
-2
python/obitools3/obidms/_obidmscolumn_seq.pyx
python/obitools3/obidms/_obidmscolumn_seq.pyx
+19
-13
python/obitools3/obidms/_obidmscolumn_str.cfiles
python/obitools3/obidms/_obidmscolumn_str.cfiles
+2
-2
python/obitools3/obidms/_obidmscolumn_str.pyx
python/obitools3/obidms/_obidmscolumn_str.pyx
+18
-13
python/obitools3/obidms/_obiseq.cfiles
python/obitools3/obidms/_obiseq.cfiles
+2
-2
python/obitools3/obidms/_obiseq.pyx
python/obitools3/obidms/_obiseq.pyx
+5
-8
python/obitools3/obidms/_obitaxo.cfiles
python/obitools3/obidms/_obitaxo.cfiles
+2
-2
python/obitools3/obiimport.py
python/obitools3/obiimport.py
+25
-23
requirements.txt
requirements.txt
+1
-1
src/bloom.c
src/bloom.c
+42
-6
src/bloom.h
src/bloom.h
+13
-2
src/encode.c
src/encode.c
+133
-0
src/encode.h
src/encode.h
+91
-2
src/obiavl.c
src/obiavl.c
+890
-489
src/obiavl.h
src/obiavl.h
+153
-129
src/obidms.c
src/obidms.c
+1
-1
src/obidms.h
src/obidms.h
+1
-1
src/obidms_taxonomy.c
src/obidms_taxonomy.c
+1
-1
src/obidmscolumn.c
src/obidmscolumn.c
+21
-27
src/obidmscolumn.h
src/obidmscolumn.h
+0
-2
src/obidmscolumn_seq.c
src/obidmscolumn_seq.c
+5
-10
src/obidmscolumn_str.c
src/obidmscolumn_str.c
+5
-5
src/obidmscolumndir.c
src/obidmscolumndir.c
+1
-1
src/obierrno.h
src/obierrno.h
+8
-0
src/obiview.c
src/obiview.c
+1
-1
src/utils.c
src/utils.c
+38
-7
src/utils.h
src/utils.h
+21
-10
No files found.
python/obitools3/obidms/_obidms.cfiles
View file @
019dfc01
...
...
@@ -12,8 +12,6 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/encode.h
...
...
@@ -28,3 +26,5 @@
../../../src/murmurhash2.h
../../../src/crc64.c
../../../src/crc64.h
../../../src/utils.c
../../../src/utils.h
python/obitools3/obidms/_obidmscolumn_bool.cfiles
View file @
019dfc01
...
...
@@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/
private_at_functions.h
../../../src/
private_at_functions.c
../../../src/
utils.c
../../../src/
utils.h
../../../src/obiavl.h
../../../src/obiavl.c
python/obitools3/obidms/_obidmscolumn_char.cfiles
View file @
019dfc01
...
...
@@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/
private_at_functions.h
../../../src/
private_at_functions.c
../../../src/
utils.c
../../../src/
utils.h
../../../src/obiavl.h
../../../src/obiavl.c
python/obitools3/obidms/_obidmscolumn_float.cfiles
View file @
019dfc01
...
...
@@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/
private_at_functions.h
../../../src/
private_at_functions.c
../../../src/
utils.c
../../../src/
utils.h
../../../src/obiavl.h
../../../src/obiavl.c
python/obitools3/obidms/_obidmscolumn_int.cfiles
View file @
019dfc01
...
...
@@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/
private_at_functions.h
../../../src/
private_at_functions.c
../../../src/
utils.c
../../../src/
utils.h
../../../src/obiavl.h
../../../src/obiavl.c
python/obitools3/obidms/_obidmscolumn_seq.cfiles
View file @
019dfc01
...
...
@@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/
private_at_functions.h
../../../src/
private_at_functions.c
../../../src/
utils.c
../../../src/
utils.h
../../../src/obiavl.h
../../../src/obiavl.c
python/obitools3/obidms/_obidmscolumn_seq.pyx
View file @
019dfc01
...
...
@@ -10,19 +10,23 @@ from .capi.obitypes cimport OBISeq_NA, const_char_p
from
obitools3.utils
cimport
str2bytes
,
bytes2str
from
libc.stdlib
cimport
free
from
libc.string
cimport
strcmp
cdef
class
OBIDMS_column_seq
(
OBIDMS_column
):
cpdef
object
get_line
(
self
,
index_t
line_nb
):
cdef
bytes
value
cdef
char
*
value
cdef
object
result
value
=
<
bytes
>
obi_column_get_obiseq_with_elt_idx_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
0
)
value
=
obi_column_get_obiseq_with_elt_idx_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
0
)
if
obi_errno
>
0
:
raise
IndexError
(
line_nb
)
if
value
==
OBISeq_NA
:
# TODO
if
strcmp
(
value
,
OBISeq_NA
)
==
0
:
result
=
None
else
:
result
=
bytes2str
(
value
)
free
(
value
)
return
result
cpdef
set_line
(
self
,
index_t
line_nb
,
object
value
):
...
...
@@ -38,33 +42,35 @@ cdef class OBIDMS_column_seq(OBIDMS_column):
cdef
class
OBIDMS_column_multi_elts_seq
(
OBIDMS_column_multi_elts
):
cpdef
object
get_item
(
self
,
index_t
line_nb
,
str
element_name
):
cdef
bytes
value
cdef
char
*
value
cdef
object
result
value
=
<
bytes
>
obi_column_get_obiseq_with_elt_name_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
str2bytes
(
element_name
))
value
=
obi_column_get_obiseq_with_elt_name_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
str2bytes
(
element_name
))
if
obi_errno
>
0
:
raise
IndexError
(
line_nb
,
element_name
)
if
value
==
OBISeq_NA
:
if
strcmp
(
value
,
OBISeq_NA
)
==
0
:
result
=
None
else
:
result
=
bytes2str
(
value
)
free
(
value
)
return
result
cpdef
object
get_line
(
self
,
index_t
line_nb
)
:
cdef
bytes
value
cdef
object
value_in_result
cdef
dict
result
cdef
char
*
value
cdef
object
value_in_result
cdef
dict
result
cdef
index_t
i
cdef
bint
all_NA
cdef
bint
all_NA
result
=
{}
all_NA
=
True
for
i
in
range
(
self
.
nb_elements_per_line
)
:
value
=
<
bytes
>
obi_column_get_obiseq_with_elt_idx_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
i
)
value
=
obi_column_get_obiseq_with_elt_idx_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
i
)
if
obi_errno
>
0
:
raise
IndexError
(
line_nb
)
if
value
==
OBISeq_NA
:
if
strcmp
(
value
,
OBISeq_NA
)
==
0
:
value_in_result
=
None
else
:
value_in_result
=
bytes2str
(
value
)
value_in_result
=
bytes2str
(
value
)
free
(
value
)
result
[
self
.
elements_names
[
i
]]
=
value_in_result
if
all_NA
and
(
value_in_result
is
not
None
)
:
all_NA
=
False
...
...
python/obitools3/obidms/_obidmscolumn_str.cfiles
View file @
019dfc01
...
...
@@ -14,7 +14,7 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/
private_at_functions.h
../../../src/
private_at_functions.c
../../../src/
utils.c
../../../src/
utils.h
../../../src/obiavl.h
../../../src/obiavl.c
python/obitools3/obidms/_obidmscolumn_str.pyx
View file @
019dfc01
...
...
@@ -10,19 +10,22 @@ from .capi.obitypes cimport OBIStr_NA, const_char_p
from
obitools3.utils
cimport
str2bytes
,
bytes2str
from
libc.string
cimport
strcmp
cdef
class
OBIDMS_column_str
(
OBIDMS_column
):
cpdef
object
get_line
(
self
,
index_t
line_nb
):
cdef
bytes
value
cdef
char
*
value
cdef
object
result
value
=
<
bytes
>
obi_column_get_obistr_with_elt_idx_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
0
)
value
=
obi_column_get_obistr_with_elt_idx_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
0
)
if
obi_errno
>
0
:
raise
IndexError
(
line_nb
)
if
value
==
OBIStr_NA
:
# TODO
if
strcmp
(
value
,
OBIStr_NA
)
==
0
:
result
=
None
else
:
result
=
bytes2str
(
value
)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
return
result
cpdef
set_line
(
self
,
index_t
line_nb
,
object
value
):
...
...
@@ -38,33 +41,35 @@ cdef class OBIDMS_column_str(OBIDMS_column):
cdef
class
OBIDMS_column_multi_elts_str
(
OBIDMS_column_multi_elts
):
cpdef
object
get_item
(
self
,
index_t
line_nb
,
str
element_name
):
cdef
bytes
value
cdef
char
*
value
cdef
object
result
value
=
<
bytes
>
obi_column_get_obistr_with_elt_name_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
str2bytes
(
element_name
))
value
=
obi_column_get_obistr_with_elt_name_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
str2bytes
(
element_name
))
if
obi_errno
>
0
:
raise
IndexError
(
line_nb
,
element_name
)
if
value
==
OBIStr_NA
:
if
strcmp
(
value
,
OBIStr_NA
)
==
0
:
result
=
None
else
:
result
=
bytes2str
(
value
)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
return
result
cpdef
object
get_line
(
self
,
index_t
line_nb
)
:
cdef
bytes
value
cdef
object
value_in_result
cdef
dict
result
cdef
char
*
value
cdef
object
value_in_result
cdef
dict
result
cdef
index_t
i
cdef
bint
all_NA
cdef
bint
all_NA
result
=
{}
all_NA
=
True
for
i
in
range
(
self
.
nb_elements_per_line
)
:
value
=
<
bytes
>
obi_column_get_obistr_with_elt_idx_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
i
)
value
=
obi_column_get_obistr_with_elt_idx_in_view
(
self
.
view
,
(
self
.
pointer
)[
0
],
line_nb
,
i
)
if
obi_errno
>
0
:
raise
IndexError
(
line_nb
)
if
value
==
OBIStr_NA
:
if
strcmp
(
value
,
OBIStr_NA
)
==
0
:
value_in_result
=
None
else
:
value_in_result
=
bytes2str
(
value
)
value_in_result
=
bytes2str
(
value
)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
result
[
self
.
elements_names
[
i
]]
=
value_in_result
if
all_NA
and
(
value_in_result
is
not
None
)
:
all_NA
=
False
...
...
python/obitools3/obidms/_obiseq.cfiles
View file @
019dfc01
...
...
@@ -12,8 +12,8 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/
private_at_functions.h
../../../src/
private_at_functions.c
../../../src/
utils.c
../../../src/
utils.h
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/encode.h
...
...
python/obitools3/obidms/_obiseq.pyx
View file @
019dfc01
...
...
@@ -26,7 +26,7 @@ cdef class OBI_Seq(dict) :
self
[
bytes2str
(
DESCRIPTION_COLUMN
)]
=
description
cpdef
get_description
(
self
)
:
return
self
.
description
return
self
.
description
# TODO no
cpdef
get_sequence
(
self
)
:
return
self
.
sequence
...
...
@@ -48,28 +48,25 @@ cdef class OBI_Nuc_Seq(OBI_Seq) :
cdef
class
OBI_Nuc_Seq_Stored
(
OBIView_line
)
:
cpdef
set_id
(
self
,
str
id
)
:
self
.
id
=
id
self
[
bytes2str
(
ID_COLUMN
)]
=
id
cpdef
get_id
(
self
)
:
return
self
.
id
return
self
[
bytes2str
(
ID_COLUMN
)]
cpdef
set_description
(
self
,
str
description
)
:
self
.
description
=
description
self
[
bytes2str
(
DESCRIPTION_COLUMN
)]
=
description
cpdef
get_description
(
self
)
:
return
self
.
description
return
self
[
bytes2str
(
DESCRIPTION_COLUMN
)]
cpdef
set_sequence
(
self
,
str
sequence
)
:
self
.
sequence
=
sequence
self
[
bytes2str
(
NUC_SEQUENCE_COLUMN
)]
=
sequence
cpdef
get_sequence
(
self
)
:
return
self
.
sequence
return
self
[
bytes2str
(
NUC_SEQUENCE_COLUMN
)]
def
__str__
(
self
)
:
return
self
.
sequence
# or not
return
self
[
bytes2str
(
NUC_SEQUENCE_COLUMN
)]
# or not
# cpdef str reverse_complement(self) : TODO in C ?
# pass
...
...
python/obitools3/obidms/_obitaxo.cfiles
View file @
019dfc01
...
...
@@ -12,8 +12,8 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/
private_at_functions.h
../../../src/
private_at_functions.c
../../../src/
utils.c
../../../src/
utils.h
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/encode.h
...
...
python/obitools3/obiimport.py
View file @
019dfc01
...
...
@@ -5,7 +5,7 @@ import time
from
obitools3.obidms._obidms
import
OBIDMS
def
bufferedRead
(
fileobj
,
size
=
100000000
):
def
bufferedRead
(
fileobj
,
size
=
209715200
):
## 200 MB
buffer
=
fileobj
.
readlines
(
size
)
while
buffer
:
for
l
in
buffer
:
...
...
@@ -26,14 +26,16 @@ if __name__ == '__main__':
view
=
d
.
new_view
(
'uniq view'
,
view_type
=
"NUC_SEQS_VIEW"
)
# for i in range(35000000) :
# if (not (i%500000)) :
# print(str(time.time())+'\t'+str(i))
# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
# view[i].set_id(id)
for
i
in
range
(
35000000
)
:
if
(
not
(
i
%
500000
))
:
print
(
str
(
time
.
time
())
+
'
\t
'
+
str
(
i
))
id
=
"@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"
+
str
(
i
)
view
[
i
].
set_id
(
id
)
if
id
!=
view
[
i
][
"ID"
]
:
print
(
"nope"
,
id
,
view
[
i
][
"ID"
])
input_file
=
open
(
args
.
input_file
,
'r'
)
input_file_buffered
=
bufferedRead
(
input_file
)
#
input_file = open(args.input_file, 'r')
#
input_file_buffered = bufferedRead(input_file)
#
# if args.input_file[-1:] == "a" :
...
...
@@ -111,37 +113,37 @@ if __name__ == '__main__':
# l = 0
# next = False
#
l
=
0
i
=
0
#
l=0
#
i=0
# while (True):
# l+=1
# line = input_file.readline()
# if line=="":
# break
for
line
in
input_file_buffered
:
#
for line in input_file_buffered :
#
# #if i > 1E7 :
# # print('hmm?')
#
#
#if i == 10
000000 :
#
#
break
#
if i == 6
000000 :
# break
#
if
l
%
4
==
0
:
#
if l%4 == 0 :
#
if
(
not
(
i
%
500000
))
:
print
(
str
(
time
.
time
())
+
'
\t
'
+
str
(
i
))
#
if (not (i%500000)) :
#
print(str(time.time())+'\t'+str(i))
# #
# # #print("header", line)
# #
id
=
line
.
split
(
" "
,
1
)[
0
][
1
:]
print
(
id
)
#
id = line.split(" ", 1)[0][1:]
#
print(id)
# # #rest = (line[:-1].split(" ", 1)[1]).split(";")
view
[
i
].
set_id
(
id
)
#
print(view[i]["ID"])
#
view[i].set_id(id)
#
print(view[i]["ID"])
#
i
+=
1
#
i+=1
l
+=
1
#
l+=1
#
# # description = ""
# # for j in range(len(rest)) :
...
...
@@ -186,7 +188,7 @@ if __name__ == '__main__':
# l+=1
#
#
input_file
.
close
()
#
input_file.close()
#print(view)
print
(
view
.
__repr__
())
...
...
requirements.txt
View file @
019dfc01
--extra-index-url https://pypi.python.org/simple/
Cython
>=0.21
Cython
==0.23.5
Sphinx
>=1.2.0
ipython
>=3.0.0
breathe
>=4.0.0
src/bloom.c
View file @
019dfc01
...
...
@@ -122,6 +122,40 @@ static void setup_buckets(struct bloom * bloom, unsigned int cache_size)
}
// TODO
int
bloom_filter_size
(
int
entries
,
double
error
)
{
int
bytes
;
double
num
;
double
denom
;
double
bpe
;
int
bits
;
unsigned
bucket_bytes
;
int
not_even_by
;
num
=
log
(
error
);
denom
=
0
.
480453013918201
;
// ln(2)^2
bpe
=
-
(
num
/
denom
);
bits
=
(
int
)(((
double
)
entries
)
*
bpe
);
if
(
bits
%
8
)
{
bytes
=
(
bits
/
8
)
+
1
;
}
else
{
bytes
=
bits
/
8
;
}
bucket_bytes
=
BLOOM_BUCKET_SIZE_FALLBACK
;
not_even_by
=
bytes
%
bucket_bytes
;
if
(
not_even_by
)
{
// adjust bytes
bytes
+=
(
bucket_bytes
-
not_even_by
);
}
return
bytes
;
}
int
bloom_init_size
(
struct
bloom
*
bloom
,
int
entries
,
double
error
,
unsigned
int
cache_size
)
{
...
...
@@ -151,19 +185,21 @@ int bloom_init_size(struct bloom * bloom, int entries, double error,
setup_buckets
(
bloom
,
cache_size
);
bloom
->
bf
=
(
unsigned
char
*
)
calloc
(
bloom
->
bytes
,
sizeof
(
unsigned
char
));
if
(
bloom
->
bf
==
NULL
)
{
return
1
;
}
// TODO comment
memset
(
bloom
->
bf
,
0
,
bloom
->
bytes
);
//bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
//if (bloom->bf == NULL) {
// return 1;
//}
bloom
->
ready
=
1
;
return
0
;
}
int
bloom_init
(
struct
bloom
*
bloom
,
int
entries
,
double
error
)
int
bloom_init
(
struct
bloom
*
bloom
,
int
entries
)
//
, double error)
{
return
bloom_init_size
(
bloom
,
entries
,
error
,
0
);
return
bloom_init_size
(
bloom
,
entries
,
BLOOM_FILTER_ERROR_RATE
,
0
);
}
...
...
src/bloom.h
View file @
019dfc01
...
...
@@ -9,6 +9,10 @@
#define _BLOOM_H
// TODO
#define BLOOM_FILTER_ERROR_RATE (0.001)
/** ***************************************************************************
* On Linux, the code attempts to compute a bucket size based on CPU cache
* size info, if available. If that fails for any reason, this fallback size
...
...
@@ -60,10 +64,17 @@ struct bloom
unsigned
bucket_bits_fast_mod_operand
;
double
bpe
;
unsigned
char
*
bf
;
int
ready
;
unsigned
char
bf
[];
};
typedef
struct
bloom
bloom_t
;
// TODO
int
bloom_filter_size
(
int
entries
,
double
error
);
/** ***************************************************************************
* Initialize the bloom filter for use.
...
...
@@ -91,7 +102,7 @@ struct bloom
* 1 - on failure
*
*/
int
bloom_init
(
struct
bloom
*
bloom
,
int
entries
,
double
error
);
int
bloom_init
(
struct
bloom
*
bloom
,
int
entries
);
//
, double error);
/** ***************************************************************************
...
...
src/encode.c
View file @
019dfc01
...
...
@@ -64,6 +64,12 @@ byte_t* encode_seq_on_2_bits(char* seq, int32_t length)
length_b
=
ceil
((
double
)
length
/
(
double
)
4
.
0
);
seq_b
=
(
byte_t
*
)
malloc
(
length_b
*
sizeof
(
byte_t
));
if
(
seq_b
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
// TODO
obidebug
(
1
,
"
\n
Error allocating memory for an encoded DNA sequence"
);
return
NULL
;
}
// Initialize all the bits to 0
memset
(
seq_b
,
0
,
length_b
);
...
...
@@ -93,6 +99,7 @@ byte_t* encode_seq_on_2_bits(char* seq, int32_t length)
seq_b
[
i
/
4
]
|=
NUC_T_2b
;
break
;
default:
obi_set_errno
(
OBI_ENCODE_ERROR
);
// TODO
obidebug
(
1
,
"
\n
Invalid nucleotide base when encoding (not [atgcATGC])"
);
return
NULL
;
}
...
...
@@ -116,6 +123,12 @@ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq)
uint8_t
nuc
;
seq
=
(
char
*
)
malloc
((
length_seq
+
1
)
*
sizeof
(
char
));
if
(
seq
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
// TODO
obidebug
(
1
,
"
\n
Error allocating memory for a decoded DNA sequence"
);
return
NULL
;
}
for
(
i
=
0
;
i
<
length_seq
;
i
++
)
{
...
...
@@ -138,6 +151,7 @@ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq)
seq
[
i
]
=
't'
;
break
;
default:
obi_set_errno
(
OBI_DECODE_ERROR
);
// TODO
obidebug
(
1
,
"
\n
Invalid nucleotide base when decoding"
);
return
NULL
;
}
...
...
@@ -159,6 +173,12 @@ byte_t* encode_seq_on_4_bits(char* seq, int32_t length)
length_b
=
ceil
((
double
)
length
/
(
double
)
2
.
0
);
seq_b
=
(
byte_t
*
)
malloc
(
length_b
*
sizeof
(
byte_t
));
if
(
seq_b
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
// TODO
obidebug
(
1
,
"
\n
Error allocating memory for an encoded DNA sequence"
);
return
NULL
;
}
// Initialize all the bits to 0
memset
(
seq_b
,
0
,
length_b
);
...
...
@@ -232,6 +252,7 @@ byte_t* encode_seq_on_4_bits(char* seq, int32_t length)
seq_b
[
i
/
2
]
|=
NUC_N_4b
;
break
;
default:
obi_set_errno
(
OBI_ENCODE_ERROR
);
// TODO
obidebug
(
1
,
"
\n
Invalid nucleotide base when encoding (not IUPAC)"
);
return
NULL
;
}
...
...
@@ -255,6 +276,12 @@ char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
uint8_t
nuc
;
seq
=
(
char
*
)
malloc
((
length_seq
+
1
)
*
sizeof
(
char
));
if
(
seq
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
// TODO
obidebug
(
1
,
"
\n
Error allocating memory for a decoded DNA sequence"
);
return
NULL
;
}
for
(
i
=
0
;
i
<
length_seq
;
i
++
)
{
...
...
@@ -310,6 +337,7 @@ char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
seq
[
i
]
=
'n'
;
break
;
default:
obi_set_errno
(
OBI_DECODE_ERROR
);
// TODO
obidebug
(
1
,
"
\n
Invalid nucleotide base when decoding"
);
return
NULL
;
}
...
...
@@ -321,6 +349,111 @@ char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
}
Obi_byte_array_p
obi_byte_array
(
byte_t
*
encoded_value
,
uint8_t
element_size
,
int32_t
length_encoded_value
,
int32_t
length_decoded_value
)
{
Obi_byte_array_p
byte_array
;
// Allocate the memory for the byte array structure
byte_array
=
(
Obi_byte_array_p
)
malloc
(
sizeof
(
Obi_byte_array_t
)
+
length_encoded_value
);
if
(
byte_array
==
NULL
)
{
obi_set_errno
(
OBI_MALLOC_ERROR
);
// TODO
obidebug
(
1
,
"
\n
Error allocating memory for a byte array"
);
return
NULL
;
}
// Store the number of bits on which each element is encoded
byte_array
->
element_size
=
element_size
;
// Store the length (in bytes) of the encoded value
byte_array
->
length_encoded_value
=
length_encoded_value
;
// Store the initial length (in bytes) of the decoded value
byte_array
->
length_decoded_value
=
length_decoded_value
;
// Store the encoded value
memcpy
(
byte_array
->
value
,
encoded_value
,
length_encoded_value
);
return
byte_array
;
}
Obi_byte_array_p
obi_str_to_obibytes
(
char
*
value
)
{