Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OBITools3
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
21
Issues
21
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools3
Commits
c7f5b8d9
Commit
c7f5b8d9
authored
Jan 21, 2019
by
Celine Mercier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Alignpairedend: added alignment using shifting with best kmer similarity
(low level layer in C and Cython API)
parent
59017c0d
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
934 additions
and
89 deletions
+934
-89
python/obitools3/commands/alignpairedend.pxd
python/obitools3/commands/alignpairedend.pxd
+4
-0
python/obitools3/commands/alignpairedend.pyx
python/obitools3/commands/alignpairedend.pyx
+61
-49
python/obitools3/dms/capi/kmer_similarity.pxd
python/obitools3/dms/capi/kmer_similarity.pxd
+42
-0
python/obitools3/dms/dms.cfiles
python/obitools3/dms/dms.cfiles
+1
-0
python/obitools3/libalign/_solexapairend.pyx
python/obitools3/libalign/_solexapairend.pyx
+73
-40
python/obitools3/libalign/shifted_ali.pxd
python/obitools3/libalign/shifted_ali.pxd
+30
-0
python/obitools3/libalign/shifted_ali.pyx
python/obitools3/libalign/shifted_ali.pyx
+119
-0
python/obitools3/utils.cfiles
python/obitools3/utils.cfiles
+1
-0
src/kmer_similarity.c
src/kmer_similarity.c
+483
-0
src/kmer_similarity.h
src/kmer_similarity.h
+120
-0
No files found.
python/obitools3/commands/alignpairedend.pxd
0 → 100755
View file @
c7f5b8d9
#cython: language_level=3
cdef
object
buildAlignment
(
object
direct
,
object
reverse
)
\ No newline at end of file
python/obitools3/commands/alignpairedend.pyx
View file @
c7f5b8d9
...
...
@@ -2,19 +2,20 @@
from
obitools3.apps.progress
cimport
ProgressBar
# @UnresolvedImport
from
obitools3.dms
import
DMS
from
obitools3.dms.view.view
cimport
View
from
obitools3.dms.view
import
RollbackException
from
obitools3.dms.view.typed_view.view_NUC_SEQS
cimport
View_NUC_SEQS
from
obitools3.dms.column.column
cimport
Column
,
Column_line
from
obitools3.dms.capi.obiview
cimport
QUALITY_COLUMN
,
COUNT_COLUMN
,
NUC_SEQUENCE_COLUMN
,
ID_COLUMN
from
obitools3.dms.capi.obitypes
cimport
OBI_
INT
,
OBI_STR
,
index_t
,
OBI_
QUAL
from
obitools3.dms.column.column
cimport
Column
from
obitools3.dms.capi.obiview
cimport
QUALITY_COLUMN
from
obitools3.dms.capi.obitypes
cimport
OBI_QUAL
from
obitools3.apps.optiongroups
import
addMinimalInputOption
,
addMinimalOutputOption
from
obitools3.uri.decode
import
open_uri
from
obitools3.apps.config
import
logger
from
obitools3.libalign._qsassemble
import
QSolexaReverseAssemble
from
obitools3.libalign._qsrassemble
import
QSolexaRightReverseAssemble
from
obitools3.libalign._solexapairend
import
buildConsensus
,
buildJoinedSequence
from
obitools3.dms.obiseq
cimport
Nuc_Seq_Stored
,
Nuc_Seq
from
obitools3.dms.obiseq
cimport
Nuc_Seq
from
obitools3.libalign.shifted_ali
cimport
Kmer_similarity
,
Ali_shifted
import
sys
import
os
...
...
@@ -27,6 +28,7 @@ REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY"
__title__
=
"Aligns paired-ended reads"
def
addOptions
(
parser
):
addMinimalInputOption
(
parser
)
...
...
@@ -41,14 +43,6 @@ def addOptions(parser):
type
=
str
,
help
=
"URI to the reverse reads if they are in a different view than the forward reads"
)
# TODO
# group.add_argument('--index-file',
# action="store", dest="alignpairedend:index",
# metavar="<FILENAME>",
# default=None,
# type=str,
# help="URI to the index reads")
group
.
add_argument
(
'--score-min'
,
action
=
"store"
,
dest
=
"alignpairedend:smin"
,
metavar
=
"#.###"
,
...
...
@@ -56,12 +50,22 @@ def addOptions(parser):
type
=
float
,
help
=
"Minimum score for keeping alignments"
)
group
.
add_argument
(
'-A'
,
'--true-ali'
,
action
=
"store_true"
,
dest
=
"alignpairedend:trueali"
,
default
=
False
,
help
=
"Performs gap free end alignment of sequences instead of using kmers to compute alignments (slower)."
)
group
.
add_argument
(
'-k'
,
'--kmer-size'
,
action
=
"store"
,
dest
=
"alignpairedend:kmersize"
,
metavar
=
"#"
,
default
=
3
,
type
=
int
,
help
=
"K-mer size for kmer comparisons, between 1 and 4 (not when using -A option; default: 3)"
)
# TODO declarations, cdef, imports
la
=
QSolexaReverseAssemble
()
ra
=
QSolexaRightReverseAssemble
()
def
buildAlignment
(
object
direct
,
object
reverse
):
cdef
object
buildAlignment
(
object
direct
,
object
reverse
):
if
len
(
direct
)
==
0
or
len
(
reverse
)
==
0
:
return
None
...
...
@@ -82,9 +86,9 @@ def buildAlignment(object direct, object reverse):
ali
=
rali
return
ali
def
alignmentIterator
(
entries
):
def
alignmentIterator
(
entries
,
aligner
):
if
type
(
entries
)
==
list
:
two_views
=
True
...
...
@@ -94,7 +98,7 @@ def alignmentIterator(entries):
else
:
two_views
=
False
entries_len
=
len
(
entries
)
for
i
in
range
(
entries_len
):
if
two_views
:
seqF
=
forward
[
i
]
...
...
@@ -104,9 +108,12 @@ def alignmentIterator(entries):
seqR
=
Nuc_Seq
(
seqF
.
id
,
seqF
[
REVERSE_SEQ_COLUMN_NAME
],
quality
=
seqF
[
REVERSE_QUALITY_COLUMN_NAME
])
seqF
.
pop
(
REVERSE_SEQ_COLUMN_NAME
)
seqF
.
pop
(
REVERSE_QUALITY_COLUMN_NAME
)
ali
=
buildAlignment
(
seqF
,
seqR
)
ali
=
aligner
(
seqF
,
seqR
)
if
ali
is
None
:
continue
yield
ali
...
...
@@ -160,11 +167,6 @@ def run(config):
else
:
entries_len
=
len
(
entries
)
# if "index" in config["alignpairedend"]: # TODO
# index = open_uri(config["alignpairedend"]["index"])
# else:
# index = None
# Open the output
output
=
open_uri
(
config
[
'obi'
][
'outputURI'
],
input
=
False
,
...
...
@@ -174,22 +176,30 @@ def run(config):
view
=
output
[
1
]
Column
.
new_column
(
view
,
b
"QUALITY"
,
OBI_QUAL
)
#TODO output URI quality option
Column
.
new_column
(
view
,
QUALITY_COLUMN
,
OBI_QUAL
)
#TODO output URI quality option?
if
'smin'
in
config
[
'alignpairedend'
]:
config
[
'alignpairedend'
][
'sminL'
]
=
config
[
'alignpairedend'
][
'smin'
]
config
[
'alignpairedend'
][
'sminR'
]
=
config
[
'alignpairedend'
][
'smin'
]
sminL
=
config
[
'alignpairedend'
][
'sminL'
]
sminR
=
config
[
'alignpairedend'
][
'sminR'
]
smin
=
config
[
'alignpairedend'
][
'smin'
]
else
:
sminL
=
0
sminR
=
0
smin
=
0
# Initialize the progress bar
pb
=
ProgressBar
(
entries_len
,
config
,
seconde
=
5
)
ba
=
alignmentIterator
(
entries
)
if
config
[
'alignpairedend'
][
'trueali'
]:
kmer_ali
=
False
aligner
=
buildAlignment
else
:
kmer_ali
=
True
if
type
(
entries
)
==
list
:
forward
=
entries
[
0
]
reverse
=
entries
[
1
]
aligner
=
Kmer_similarity
(
forward
,
view2
=
reverse
,
kmer_size
=
config
[
'alignpairedend'
][
'kmersize'
])
else
:
aligner
=
Kmer_similarity
(
entries
,
kmer_size
=
config
[
'alignpairedend'
][
'kmersize'
])
ba
=
alignmentIterator
(
entries
,
aligner
)
i
=
0
for
ali
in
ba
:
...
...
@@ -197,31 +207,33 @@ def run(config):
consensus
=
view
[
i
]
if
sminL
>
0
:
if
((
ali
.
direction
==
'left'
and
ali
.
score
>
sminL
)
or
(
ali
.
score
>
sminR
)):
buildConsensus
(
ali
,
consensus
)
if
not
two_views
:
seqF
=
entries
[
i
]
else
:
seqF
=
forward
[
i
]
if
smin
>
0
:
if
(
ali
.
score
>
smin
)
:
buildConsensus
(
ali
,
consensus
,
seqF
)
else
:
seqF
=
ali
[
0
].
wrapped
if
not
two_views
:
seq
=
entries
[
i
]
seqR
=
Nuc_Seq
(
seq
.
id
,
seq
[
REVERSE_SEQ_COLUMN_NAME
],
quality
=
seq
[
REVERSE_QUALITY_COLUMN_NAME
])
seqR
=
Nuc_Seq
(
seqF
.
id
,
seqF
[
REVERSE_SEQ_COLUMN_NAME
],
quality
=
seqF
[
REVERSE_QUALITY_COLUMN_NAME
])
else
:
seqR
=
reverse
[
i
]
buildJoinedSequence
(
ali
,
seqR
,
consensus
)
buildJoinedSequence
(
ali
,
seqR
,
consensus
,
forward
=
seqF
)
consensus
[
b
"sminL"
]
=
sminL
consensus
[
b
"sminR"
]
=
sminR
consensus
[
b
"smin"
]
=
smin
else
:
buildConsensus
(
ali
,
consensus
)
# TODO
# if "index" in config['alignpairedend'] and config['alignpairedend']['index'] is not None:
# idx = str(config['alignpairedend']['index'].next())
# consensus[b"illumina_index"] = idx
buildConsensus
(
ali
,
consensus
,
seqF
)
if
kmer_ali
:
ali
.
free
()
i
+=
1
if
kmer_ali
:
aligner
.
free
()
# Save command config in View and DMS comments
command_line
=
" "
.
join
(
sys
.
argv
[
1
:])
view
.
write_config
(
config
,
"alignpairedend"
,
command_line
,
input_dms_name
=
input_dms_name
,
input_view_name
=
input_view_name
)
...
...
python/obitools3/dms/capi/kmer_similarity.pxd
0 → 100755
View file @
c7f5b8d9
#cython: language_level=3
from
obitools3.dms.capi.obiview
cimport
Obiview_p
from
obitools3.dms.capi.obidmscolumn
cimport
OBIDMS_column_p
from
obitools3.dms.capi.obitypes
cimport
index_t
from
libc.stdint
cimport
int32_t
,
uint8_t
cdef
extern
from
"kmer_similarity.h"
nogil
:
struct
Obi_ali_t
:
double
score
int
consensus_length
int
overlap_length
char
*
consensus_seq
uint8_t
*
consensus_qual
int
shift
char
*
direction
ctypedef
Obi_ali_t
*
Obi_ali_p
void
obi_free_shifted_ali
(
Obi_ali_p
ali
)
Obi_ali_p
kmer_similarity
(
Obiview_p
view1
,
OBIDMS_column_p
column1
,
index_t
idx1
,
index_t
elt_idx1
,
Obiview_p
view2
,
OBIDMS_column_p
column2
,
index_t
idx2
,
index_t
elt_idx2
,
uint8_t
kmer_size
,
int32_t
*
kmer_pos_array
,
int32_t
*
kmer_pos_array_height_p
,
int32_t
*
shift_array
,
int32_t
*
shift_array_height_p
,
int32_t
*
shift_count_array
,
int32_t
*
shift_count_array_height_p
,
bint
build_consensus
)
python/obitools3/dms/dms.cfiles
View file @
c7f5b8d9
...
...
@@ -6,6 +6,7 @@
../../../src/dna_seq_indexer.c
../../../src/encode.c
../../../src/hashtable.c
../../../src/kmer_similarity.c
../../../src/linked_list.c
../../../src/murmurhash2.c
../../../src/obi_lcs.c
...
...
python/obitools3/libalign/_solexapairend.pyx
View file @
c7f5b8d9
...
...
@@ -3,6 +3,18 @@
from
cpython
cimport
array
from
.solexapairend
import
iterOnAligment
from
.shifted_ali
cimport
Ali_shifted
from
obitools3.dms.capi.obiview
cimport
Obiview_p
,
QUALITY_COLUMN
,
\
obi_set_qual_int_with_elt_idx_and_col_p_in_view
,
\
obi_set_str_with_elt_idx_and_col_p_in_view
from
obitools3.dms.capi.obidmscolumn
cimport
OBIDMS_column_p
from
obitools3.dms.view.view
cimport
View
from
obitools3.dms.column.column
cimport
Column
from
math
import
log10
...
...
@@ -146,7 +158,7 @@ cdef class IterOnConsensus:
seqBDeletion
=
property
(
get_seqBDeletion
,
None
,
None
,
"deletereverse's docstring"
)
def
buildConsensus
(
ali
,
seq
):
def
buildConsensus
(
ali
,
seq
,
ref_tags
=
None
):
cdef
list
quality
cdef
char
aseq
[
1000
]
cdef
int
i
=
0
...
...
@@ -156,61 +168,82 @@ def buildConsensus(ali, seq):
cdef
double
score
cdef
bytes
sseq
cdef
double
p
cdef
OBIDMS_column_p
col_p
cdef
Obiview_p
view_p
cdef
View
view
cdef
Column
column
quality
=
[]
if
type
(
ali
)
==
Ali_shifted
:
view
=
seq
.
view
view_p
=
view
.
pointer
()
column
=
view
[
QUALITY_COLUMN
]
col_p
=
column
.
pointer
()
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
seq
.
set
(
ref_tags
.
id
+
b
"_CONS"
,
ali
.
consensus_seq
,
quality
=
ali
.
consensus_qual
,
tags
=
ref_tags
)
seq
[
b
'ali_length'
]
=
ali
.
consensus_len
seq
[
b
'score_norm'
]
=
float
(
ali
.
score
)
/
ali
.
consensus_len
seq
[
b
'shift'
]
=
ali
.
shift
else
:
if
len
(
ali
[
0
])
>
999
:
# TODO why?
raise
AssertionError
,
"Too long alignemnt"
if
len
(
ali
[
0
])
>
999
:
# TODO why?
raise
AssertionError
,
"Too long alignemnt"
ic
=
IterOnConsensus
(
ali
)
for
nuc
,
score
in
ic
:
cnuc
=
nuc
quality
.
append
(
score
)
aseq
[
i
]
=
cnuc
[
0
]
i
+=
1
aseq
[
i
]
=
0
sseq
=
aseq
# Reconvert quality from array of probabilities to array of raw quality values
i
=
0
for
p
in
quality
:
quality
[
i
]
=
min
(
42
,
round
(
-
10
*
log10
(
p
)))
i
+=
1
ic
=
IterOnConsensus
(
ali
)
for
nuc
,
score
in
ic
:
cnuc
=
nuc
quality
.
append
(
score
)
aseq
[
i
]
=
cnuc
[
0
]
i
+=
1
seq
.
set
(
ali
[
0
].
wrapped
.
id
+
b
"_CONS"
,
sseq
,
quality
=
quality
,
tags
=
ali
[
0
].
wrapped
)
aseq
[
i
]
=
0
sseq
=
aseq
# Reconvert quality from array of probabilities to array of raw quality values
i
=
0
for
p
in
quality
:
quality
[
i
]
=
min
(
42
,
round
(
-
10
*
log10
(
p
)))
i
+=
1
seq
.
set
(
ali
[
0
].
wrapped
.
id
+
b
"_CONS"
,
sseq
,
quality
=
quality
,
tags
=
ali
[
0
].
wrapped
)
if
hasattr
(
ali
,
"counter"
):
seq
[
b
'alignement_id'
]
=
ali
.
counter
seq
[
b
'seq_a_single'
]
=
ic
.
seqASingle
seq
[
b
'seq_b_single'
]
=
ic
.
seqBSingle
seq
[
b
'seq_ab_match'
]
=
ic
.
seqABMatch
seq
[
b
'seq_a_mismatch'
]
=
ic
.
seqAMismatch
seq
[
b
'seq_b_mismatch'
]
=
ic
.
seqBMismatch
seq
[
b
'seq_a_insertion'
]
=
ic
.
seqAInsertion
seq
[
b
'seq_b_insertion'
]
=
ic
.
seqBInsertion
-
ic
.
seqBSingle
seq
[
b
'seq_a_deletion'
]
=
ic
.
seqADeletion
seq
[
b
'seq_b_deletion'
]
=
ic
.
seqBDeletion
seq
[
b
'ali_length'
]
=
len
(
seq
)
-
ic
.
seqASingle
-
ic
.
seqBSingle
if
seq
[
b
'ali_length'
]
>
0
:
seq
[
b
'score_norm'
]
=
float
(
ali
.
score
)
/
seq
[
b
'ali_length'
]
if
hasattr
(
ali
,
"direction"
):
seq
[
b
'direction'
]
=
ali
.
direction
if
hasattr
(
ali
,
"counter"
):
seq
[
b
'alignement_id'
]
=
ali
.
counter
seq
[
b
'seq_a_single'
]
=
ic
.
seqASingle
seq
[
b
'seq_b_single'
]
=
ic
.
seqBSingle
seq
[
b
'seq_ab_match'
]
=
ic
.
seqABMatch
seq
[
b
'seq_a_mismatch'
]
=
ic
.
seqAMismatch
seq
[
b
'seq_b_mismatch'
]
=
ic
.
seqBMismatch
seq
[
b
'seq_a_insertion'
]
=
ic
.
seqAInsertion
seq
[
b
'seq_b_insertion'
]
=
ic
.
seqBInsertion
-
ic
.
seqBSingle
seq
[
b
'seq_a_deletion'
]
=
ic
.
seqADeletion
seq
[
b
'seq_b_deletion'
]
=
ic
.
seqBDeletion
seq
[
b
'ali_direction'
]
=
ali
.
direction
seq
[
b
'score'
]
=
ali
.
score
seq
[
b
'ali_length'
]
=
len
(
seq
)
-
ic
.
seqASingle
-
ic
.
seqBSingle
if
seq
[
b
'ali_length'
]
>
0
:
seq
[
b
'score_norm'
]
=
float
(
ali
.
score
)
/
seq
[
b
'ali_length'
]
seq
[
b
'mode'
]
=
b
'alignment'
return
seq
def
buildJoinedSequence
(
ali
,
reverse
,
seq
):
forward
=
ali
[
0
].
wrapped
def
buildJoinedSequence
(
ali
,
reverse
,
seq
,
forward
=
None
):
if
forward
is
not
None
:
forward
=
forward
else
:
forward
=
ali
[
0
].
wrapped
s
=
forward
.
seq
+
reverse
.
seq
quality
=
forward
.
quality
quality
.
extend
(
reverse
.
quality
)
seq
.
set
(
forward
.
id
+
b
"_PairedEnd"
,
s
,
definition
=
forward
.
definition
,
quality
=
quality
,
tags
=
forward
)
seq
[
b
"score"
]
=
ali
.
score
seq
[
b
"ali_dir"
]
=
ali
.
direction
seq
[
b
"ali_dir
ection
"
]
=
ali
.
direction
seq
[
b
"mode"
]
=
b
"joined"
seq
[
b
"pairedend_limit"
]
=
len
(
forward
)
return
seq
...
...
python/obitools3/libalign/shifted_ali.pxd
0 → 100755
View file @
c7f5b8d9
#cython: language_level=3
from
obitools3.dms.capi.kmer_similarity
cimport
kmer_similarity
,
Obi_ali_p
from
libc.stdint
cimport
int32_t
,
uint8_t
from
obitools3.dms.capi.obiview
cimport
Obiview_p
from
obitools3.dms.capi.obidmscolumn
cimport
OBIDMS_column_p
cdef
class
Ali_shifted
:
cdef
Obi_ali_p
_pointer
cdef
inline
Obi_ali_p
pointer
(
self
)
cpdef
free
(
self
)
@
staticmethod
cdef
Ali_shifted
new_ali
(
Obi_ali_p
ali_p
)
cdef
class
Kmer_similarity
:
cdef
uint8_t
kmer_size
cdef
int32_t
*
kmer_pos_array_p
cdef
int32_t
kmer_pos_array_height_a
[
1
]
cdef
int32_t
*
shift_array_p
cdef
int32_t
shift_array_height_a
[
1
]
cdef
int32_t
*
shift_count_array_p
cdef
int32_t
shift_count_array_height_a
[
1
]
cdef
Obiview_p
view1_p
cdef
OBIDMS_column_p
column1_p
cdef
Obiview_p
view2_p
cdef
OBIDMS_column_p
column2_p
cdef
bint
build_consensus
cpdef
free
(
self
)
\ No newline at end of file
python/obitools3/libalign/shifted_ali.pyx
0 → 100755
View file @
c7f5b8d9
#cython: language_level=3
from
obitools3.dms.capi.obiview
cimport
NUC_SEQUENCE_COLUMN
from
obitools3.dms.obiseq
cimport
Nuc_Seq_Stored
from
obitools3.dms.capi.kmer_similarity
cimport
kmer_similarity
,
Obi_ali_p
,
obi_free_shifted_ali
from
obitools3.dms.view.typed_view.view_NUC_SEQS
cimport
View_NUC_SEQS
from
obitools3.dms.column.column
cimport
Column
from
libc.stdlib
cimport
free
cdef
class
Ali_shifted
:
cdef
inline
Obi_ali_p
pointer
(
self
)
:
return
<
Obi_ali_p
>
(
self
.
_pointer
)
@
staticmethod
cdef
Ali_shifted
new_ali
(
Obi_ali_p
ali_p
)
:
# not __init__ method because need to pass pointer
ali
=
Ali_shifted
()
ali
.
_pointer
=
ali_p
return
ali
@
property
def
score
(
self
):
return
self
.
pointer
().
score
@
property
def
consensus_len
(
self
):
return
self
.
pointer
().
consensus_length
@
property
def
overlap_len
(
self
):
return
self
.
pointer
().
overlap_length
@
property
def
consensus_seq
(
self
):
return
self
.
pointer
().
consensus_seq
@
property
def
shift
(
self
):
return
self
.
pointer
().
shift
@
property
def
consensus_qual
(
self
):
# must return list because uint8_t* are forced into bytes by cython which won't keep beyond the first 0 value
qual_list
=
[]
for
i
in
range
(
self
.
consensus_len
):
qual_list
.
append
((
self
.
pointer
().
consensus_qual
)[
i
])
return
qual_list
@
property
def
direction
(
self
):
return
self
.
pointer
().
direction
cpdef
free
(
self
):
obi_free_shifted_ali
(
self
.
pointer
())
cdef
class
Kmer_similarity
:
def
__init__
(
self
,
View_NUC_SEQS
view1
,
Column
column1
=
None
,
View_NUC_SEQS
view2
=
None
,
Column
column2
=
None
,
uint8_t
kmer_size
=
3
,
build_consensus
=
True
)
:
cdef
Column
default_seq_col
if
kmer_size
<
1
or
kmer_size
>
3
:
raise
Exception
(
"Kmer size to compute kmer similarity must be >=1 or <=4"
)
self
.
kmer_pos_array_p
=
NULL
self
.
shift_array_p
=
NULL
self
.
shift_count_array_p
=
NULL
self
.
kmer_pos_array_height_a
[
0
]
=
0
self
.
shift_array_height_a
[
0
]
=
0
self
.
shift_count_array_height_a
[
0
]
=
0
self
.
kmer_size
=
kmer_size
self
.
build_consensus
=
build_consensus
self
.
view1_p
=
view1
.
pointer
()
if
column1
is
not
None
:
self
.
column1_p
=
column1
.
pointer
()
else
:
if
type
(
view1
)
!=
View_NUC_SEQS
:
raise
Exception
(
"Kmer similarity with no sequence column given must be given a NUC_SEQS view"
)
default_seq_col
=
view1
[
NUC_SEQUENCE_COLUMN
]
self
.
column1_p
=
default_seq_col
.
pointer
()
if
view2
is
not
None
:
self
.
view2_p
=
view2
.
pointer
()
else
:
view2
=
view1
self
.
view2_p
=
self
.
view1_p
if
column2
is
not
None
:
self
.
column2_p
=
column2
.
pointer
()
else
:
if
type
(
view2
)
!=
View_NUC_SEQS
:
raise
Exception
(
"Kmer similarity with no sequence column given must be given a NUC_SEQS view"
)
default_seq_col
=
view2
[
NUC_SEQUENCE_COLUMN
]
self
.
column2_p
=
default_seq_col
.
pointer
()
def
__call__
(
self
,
Nuc_Seq_Stored
seq1
,
Nuc_Seq_Stored
seq2
):
cdef
Obi_ali_p
ali_p
cdef
Ali_shifted
ali
ali_p
=
kmer_similarity
(
self
.
view1_p
,
self
.
column1_p
,
seq1
.
index
,
0
,
\
self
.
view2_p
,
self
.
column2_p
,
seq2
.
index
,
0
,
\
self
.
kmer_size
,
\
self
.
kmer_pos_array_p
,
self
.
kmer_pos_array_height_a
,
\
self
.
shift_array_p
,
self
.
shift_array_height_a
,
\
self
.
shift_count_array_p
,
self
.
shift_count_array_height_a
,
self
.
build_consensus
)
ali
=
Ali_shifted
.
new_ali
(
ali_p
)
return
ali
cpdef
free
(
self
):
if
self
.
kmer_pos_array_p
is
not
NULL
:
free
(
self
.
kmer_pos_array_p
)
self
.
kmer_pos_array_height_a
[
0
]
=
0
if
(
self
.
shift_array_p
is
not
NULL
)
:
free
(
self
.
shift_array_p
)
self
.
shift_array_height_a
[
0
]
=
0
if
(
self
.
shift_count_array_p
is
not
NULL
)
:
free
(
self
.
shift_count_array_p
)
self
.
shift_count_array_height_a
[
0
]
=
0
python/obitools3/utils.cfiles
View file @
c7f5b8d9
...
...
@@ -6,6 +6,7 @@
../../src/dna_seq_indexer.c
../../src/encode.c
../../src/hashtable.c
../../src/kmer_similarity.c
../../src/linked_list.c
../../src/murmurhash2.c
../../src/obi_lcs.c
...
...
src/kmer_similarity.c
0 → 100755
View file @
c7f5b8d9
/****************************************************************************
* Kmer similarity computation functions *
****************************************************************************/
/**
* @file kmer_similarity.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date January 7th 2019
* @brief Kmer similarity computation functions.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include "utils.h"
#include "obidebug.h"
#include "obierrno.h"
#include "obitypes.h"
#include "kmer_similarity.h"
#include "obidmscolumn.h"
#include "obiview.h"
#include "encode.h"
#include "dna_seq_indexer.h"