Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OBITools3
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
23
Issues
23
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools3
Commits
156fb04e
Commit
156fb04e
authored
Jan 05, 2018
by
Celine Mercier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implemented functions to build reverse complement sequences
parent
428c4eb5
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
303 additions
and
25 deletions
+303
-25
python/obitools3/dms/capi/obiutils.pxd
python/obitools3/dms/capi/obiutils.pxd
+1
-1
python/obitools3/dms/obiseq.pxd
python/obitools3/dms/obiseq.pxd
+7
-5
python/obitools3/dms/obiseq.pyx
python/obitools3/dms/obiseq.pyx
+105
-15
src/utils.c
src/utils.c
+146
-0
src/utils.h
src/utils.h
+44
-4
No files found.
python/obitools3/dms/capi/obiutils.pxd
View file @
156fb04e
...
...
@@ -9,4 +9,4 @@ from ..capi.obitypes cimport const_char_p
cdef
extern
from
"utils.h"
nogil
:
const_char_p
obi_format_date
(
time_t
date
)
char
*
reverse_complement_sequence
(
char
*
nucAcSeq
)
python/obitools3/dms/obiseq.pxd
View file @
156fb04e
...
...
@@ -8,9 +8,10 @@ cdef class Seq(dict) :
cdef
class
Nuc_Seq
(
Seq
)
:
cdef
object
_quality
#cpdef object reverse_complement(self)
cdef
char
*
_reverse_complement
cdef
object
_quality_array
cpdef
bytes
build_reverse_complement
(
self
)
cdef
class
Seq_Stored
(
Line
)
:
...
...
@@ -18,7 +19,8 @@ cdef class Seq_Stored(Line) :
cdef
class
Nuc_Seq_Stored
(
Seq_Stored
)
:
cdef
char
*
_reverse_complement
cpdef
set_quality_int
(
self
,
list
new_qual
)
cpdef
set_quality_char
(
self
,
bytes
new_qual
,
int
offset
=*
)
#cpdef object reverse_complement(self)
\ No newline at end of file
cpdef
bytes
build_reverse_complement
(
self
)
\ No newline at end of file
python/obitools3/dms/obiseq.pyx
View file @
156fb04e
...
...
@@ -2,12 +2,23 @@
from
obitools3.utils
cimport
bytes2str
,
str2bytes
,
tostr
,
tobytes
from
obitools3.dms.view.view
cimport
View
from
.capi.obitypes
cimport
index_t
from
.capi.obiview
cimport
NUC_SEQUENCE_COLUMN
,
\
ID_COLUMN
,
\
DEFINITION_COLUMN
,
\
QUALITY_COLUMN
,
\
COUNT_COLUMN
from
.capi.obiutils
cimport
reverse_complement_sequence
from
cpython
cimport
array
import
array
from
copy
import
deepcopy
cdef
class
Seq
(
dict
)
:
def
__init__
(
self
,
object
id
,
object
seq
,
object
definition
=
None
,
object
quality
=
None
,
int
offset
=-
1
,
object
tags
=
None
)
:
...
...
@@ -23,11 +34,18 @@ cdef class Seq(dict) :
k_b
=
tobytes
(
k
)
self
[
k_b
]
=
tags
[
k
]
@
staticmethod
def
new_from_stored
(
Seq_Stored
seq_to_clone
)
:
cdef
Seq
new_seq
new_seq
=
Seq
(
seq_to_clone
.
id
,
seq_to_clone
.
seq
,
definition
=
seq_to_clone
.
definition
,
quality
=
seq_to_clone
.
quality
,
tags
=
seq_to_clone
)
return
new_seq
def
__contains__
(
self
,
object
key
):
return
dict
.
__contains__
(
self
,
tobytes
(
key
))
def
__str__
(
self
):
return
tostr
(
self
[
NUC_SEQUENCE_COLUMN
])
# sequence id property getter and setter
@
property
def
id
(
self
):
# @ReservedAssignment
...
...
@@ -66,7 +84,31 @@ cdef class Seq(dict) :
cdef
class
Nuc_Seq
(
Seq
)
:
def
__init__
(
self
,
object
id
,
object
seq
,
object
definition
=
None
,
object
quality
=
None
,
int
offset
=-
1
,
object
tags
=
None
)
:
cdef
object
k
cdef
bytes
k_b
self
[
ID_COLUMN
]
=
tobytes
(
id
)
self
.
seq
=
tobytes
(
seq
)
if
definition
is
not
None
:
self
.
definition
=
tobytes
(
definition
)
if
quality
is
not
None
:
self
.
quality
=
quality
if
type
(
quality
)
==
list
:
# TODO discuss
self
.
_quality_array
=
array
.
array
(
'd'
,
quality
)
self
.
_annotations
=
{}
self
.
_reverse_complement
=
NULL
# TODO unnecessary?
if
tags
is
not
None
:
for
k
in
tags
:
k_b
=
tobytes
(
k
)
self
[
k_b
]
=
tags
[
k
]
@
staticmethod
def
new_from_stored
(
Nuc_Seq_Stored
seq_to_clone
)
:
cdef
Nuc_Seq
new_seq
new_seq
=
Nuc_Seq
(
seq_to_clone
.
id
,
seq_to_clone
.
seq
,
definition
=
seq_to_clone
.
definition
,
quality
=
seq_to_clone
.
quality
,
tags
=
seq_to_clone
)
return
new_seq
# nuc sequence property getter and setter
@
property
def
seq
(
self
):
...
...
@@ -83,10 +125,41 @@ cdef class Nuc_Seq(Seq) :
@
quality
.
setter
def
quality
(
self
,
object
new_quality
):
# @DuplicatedSignature
self
[
QUALITY_COLUMN
]
=
tobytes
(
new_quality
)
self
[
QUALITY_COLUMN
]
=
new_quality
# sequence quality array property getter and setter
@
property
def
quality_array
(
self
):
return
self
.
_quality_array
# reverse complement property getter
@
property
def
reverse_complement
(
self
):
cdef
bytes
rev_comp
if
self
.
_reverse_complement
==
NULL
:
rev_comp
=
self
.
build_reverse_complement
()
self
.
_reverse_complement
=
rev_comp
return
self
.
_reverse_complement
def
__str__
(
self
):
return
tostr
(
self
[
NUC_SEQUENCE_COLUMN
])
cpdef
bytes
build_reverse_complement
(
self
)
:
cdef
bytes
rev_comp
rev_comp
=
deepcopy
(
self
[
NUC_SEQUENCE_COLUMN
]).
lower
()
return
<
bytes
>
reverse_complement_sequence
(
rev_comp
)
# TODO DISCUSS
# sequence quality (character string format) property getter and setter
# @property
# def quality_str(self):
# return self[QUALITY_COLUMN]
#
# @quality.setter
# def quality_str(self, list new_quality): # @DuplicatedSignature
# self[QUALITY_COLUMN] = new_quality
# cpdef str reverse_complement(self) : TODO in C ?
# pass
cdef
class
Seq_Stored
(
Line
)
:
...
...
@@ -112,19 +185,23 @@ cdef class Seq_Stored(Line) :
cdef
class
Nuc_Seq_Stored
(
Seq_Stored
)
:
def
__init__
(
self
,
View
view
,
index_t
line_nb
)
:
self
=
Seq_Stored
.
__init__
(
self
,
view
,
line_nb
)
self
.
_reverse_complement
=
NULL
# TODO unnecessary?
cpdef
set_quality_int
(
self
,
list
new_qual
):
self
.
_view
.
get_column
(
QUALITY_COLUMN
).
set_line
(
self
.
index
,
new_qual
)
cpdef
set_quality_char
(
self
,
bytes
new_qual
,
int
offset
=-
1
):
self
.
_view
.
get_column
(
QUALITY_COLUMN
).
set_str_line
(
self
.
index
,
new_qual
,
offset
=
offset
)
#
nuc_
seq property getter and setter
# seq property getter and setter
@
property
def
nuc_
seq
(
self
):
def
seq
(
self
):
return
self
.
_view
.
get_column
(
NUC_SEQUENCE_COLUMN
).
get_line
(
self
.
index
)
@
nuc_
seq
.
setter
def
nuc_
seq
(
self
,
object
new_seq
):
# @DuplicatedSignature
@
seq
.
setter
def
seq
(
self
,
object
new_seq
):
# @DuplicatedSignature
self
.
_view
.
get_column
(
NUC_SEQUENCE_COLUMN
).
set_line
(
self
.
index
,
tobytes
(
new_seq
))
# quality property getter and setter
...
...
@@ -144,16 +221,29 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
# quality character string property getter
@
property
def
quality_str
(
self
):
return
self
.
_view
.
get_column
(
QUALITY_COLUMN
).
get_str_line
(
self
.
_index
)
return
self
.
_view
.
get_column
(
QUALITY_COLUMN
).
get_str_line
(
self
.
_index
)
# reverse complement property getter
@
property
def
reverse_complement
(
self
):
cdef
bytes
rev_comp
if
self
.
_reverse_complement
==
NULL
:
rev_comp
=
self
.
build_reverse_complement
()
self
.
_reverse_complement
=
rev_comp
return
self
.
_reverse_complement
def
__str__
(
self
):
return
bytes2str
(
self
.
nuc_seq
)
return
tostr
(
self
.
_view
.
get_column
(
NUC_SEQUENCE_COLUMN
).
get_line
(
self
.
index
)
)
# cpdef str reverse_complement(self) : TODO in C ?
# pass
def
__len__
(
self
):
return
len
(
self
.
_view
.
get_column
(
NUC_SEQUENCE_COLUMN
).
get_line
(
self
.
index
))
cpdef
bytes
build_reverse_complement
(
self
)
:
cdef
bytes
rev_comp
rev_comp
=
deepcopy
(
self
[
NUC_SEQUENCE_COLUMN
]).
lower
()
return
<
bytes
>
reverse_complement_sequence
(
rev_comp
)
# TODO static method to import OBI_Nuc_Seq to OBI_Nuc_Seq_Stored ?
\ No newline at end of file
src/utils.c
View file @
156fb04e
...
...
@@ -28,6 +28,140 @@
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**************************************************************************
*
* D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S
*
**************************************************************************/
/**
* Internal function returning the complement of a nucleotide base.
*
* @warning The base must be in lower case.
*
* @param nucAc The nucleotide base.
*
* @returns The complement of the nucleotide base.
* @retval The nucleotide base itself if no complement was found.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
static
char
nuc_base_complement
(
char
nucAc
);
/**
* Internal function returning the complement of a nucleotide sequence.
*
* @warning The sequence must be in lower case.
* @warning The sequence will be replaced by its complement without being copied.
*
* @param nucAcSeq The nucleotide sequence.
*
* @returns The complemented sequence.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
static
char
*
nuc_seq_complement
(
char
*
nucAcSeq
);
/**
* Internal function returning the reverse of a nucleotide sequence.
*
* @warning The sequence must be in lower case.
* @warning The sequence will be replaced by its reverse without being copied.
*
* @param str The nucleotide sequence.
* @param isPattern Whether the sequence is a pattern. TODO
*
* @returns The reversed sequence.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
static
char
*
reverse_sequence
(
char
*
str
,
char
isPattern
);
/************************************************************************
*
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
*
************************************************************************/
static
char
nuc_base_complement
(
char
nucAc
)
{
char
*
c
;
if
((
c
=
strchr
(
DNA_ALPHA
,
nucAc
)))
return
CDNA_ALPHA
[(
c
-
DNA_ALPHA
)];
else
return
nucAc
;
}
static
char
*
nuc_seq_complement
(
char
*
nucAcSeq
)
{
char
*
s
;
for
(
s
=
nucAcSeq
;
*
s
;
s
++
)
*
s
=
nuc_base_complement
(
*
s
);
return
nucAcSeq
;
}
static
char
*
reverse_sequence
(
char
*
str
,
char
isPattern
)
{
char
*
sb
,
*
se
,
c
;
if
(
!
str
)
return
str
;
sb
=
str
;
se
=
str
+
strlen
(
str
)
-
1
;
while
(
sb
<=
se
)
{
c
=
*
sb
;
*
sb
++
=
*
se
;
*
se
--
=
c
;
}
sb
=
str
;
se
=
str
+
strlen
(
str
)
-
1
;
if
(
isPattern
)
for
(;
sb
<
se
;
sb
++
)
{
if
(
*
sb
==
'#'
)
{
if
(((
se
-
sb
)
>
2
)
&&
(
*
(
sb
+
2
)
==
'!'
))
{
*
sb
=
'!'
;
sb
+=
2
;
*
sb
=
'#'
;
}
else
{
*
sb
=*
(
sb
+
1
);
sb
++
;
*
sb
=
'#'
;
}
}
else
if
(
*
sb
==
'!'
)
{
*
sb
=*
(
sb
-
1
);
*
(
sb
-
1
)
=
'!'
;
}
}
return
str
;
}
/**********************************************************************
*
...
...
@@ -404,3 +538,15 @@ loop: SWAPINIT(a, es);
/* qsort(pn - r, r / es, es, cmp);*/
}
char
*
reverse_complement_pattern
(
char
*
nucAcSeq
)
{
return
reverse_sequence
(
nuc_seq_complement
(
nucAcSeq
),
1
);
}
char
*
reverse_complement_sequence
(
char
*
nucAcSeq
)
{
return
reverse_sequence
(
nuc_seq_complement
(
nucAcSeq
),
0
);
}
src/utils.h
View file @
156fb04e
...
...
@@ -20,10 +20,16 @@
#include "obitypes.h"
#define FORMATTED_TIME_LENGTH (1024)
/**< The length allocated for the character string containing a formatted date.
*/
#define ONE_IF_ZERO(x) (((x)==0)?1:(x))
/**< If x is equal to 0, x takes the value 1.
*/
#define FORMATTED_TIME_LENGTH (1024)
/**< The length allocated for the character string containing a formatted date.
*/
#define ONE_IF_ZERO(x) (((x)==0)?1:(x))
/**< If x is equal to 0, x takes the value 1.
*/
#define DNA_ALPHA "acgtbdefhijklmnopqrsuvwxyz#![]"
/**< DNA alphabet (IUPAC).
//"ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]" */
#define CDNA_ALPHA "tgcavhefdijmlknopqysabwxrz#!]["
/**< Complementary DNA alphabet (IUPAC).
//"TVGHEFCDIJMLKNOPQYSAABWXRZ#!][" */
/**
* @brief Copy the content of a file into another file.
...
...
@@ -155,4 +161,38 @@ void* bsearch_user_data(const void* key, const void* base, size_t num, size_t si
void
qsort_user_data
(
void
*
aa
,
size_t
n
,
size_t
es
,
const
void
*
user_data
,
int
(
*
cmp
)(
const
void
*
,
const
void
*
,
const
void
*
));
/**
* Function returning the reverse complement of a nucleotide sequence.
*
* @warning The sequence must be in lower case.
* @warning The sequence will be replaced by its reverse complement without being copied.
*
* @param nucAcSeq The nucleotide sequence.
*
* @returns The reverse complemented sequence.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
char
*
reverse_complement_sequence
(
char
*
nucAcSeq
);
/**
* Function returning the reverse complement of a pattern.
*
* @warning The pattern must be in lower case.
* @warning The pattern will be replaced by its reverse complement without being copied.
*
* @param nucAcSeq The pattern.
*
* @returns The reverse complemented pattern.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @note Copied from ecoPCR source code
*/
char
*
reverse_complement_pattern
(
char
*
nucAcSeq
);
#endif
/* UTILS_H_ */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment