Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
O
OBITools3
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
23
Issues
23
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
OBITools
OBITools3
Commits
d076ea99
Commit
d076ea99
authored
Nov 07, 2018
by
Celine Mercier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Alignment: updated functions to align columns (LCS)
parent
6b1c41f3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
160 additions
and
74 deletions
+160
-74
src/obi_lcs.c
src/obi_lcs.c
+145
-63
src/obi_lcs.h
src/obi_lcs.h
+15
-11
No files found.
src/obi_
align
.c
→
src/obi_
lcs
.c
View file @
d076ea99
...
...
@@ -3,7 +3,7 @@
****************************************************************************/
/**
* @file obi_
align
.c
* @file obi_
lcs
.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date May 4th 2016
* @brief Functions handling LCS sequence alignments.
...
...
@@ -18,7 +18,7 @@
#include <stdio.h>
#include <stdbool.h>
#include "obi_
align
.h"
#include "obi_
lcs
.h"
#include "obidebug.h"
#include "obierrno.h"
#include "obitypes.h"
...
...
@@ -121,11 +121,11 @@ static int print_alignment_result(Obiview_p output_view,
OBIDMS_column_p
seq2_column
,
index_t
seq1_idx
,
index_t
seq2_idx
,
//
bool print_count,
//
OBIDMS_column_p count1_column,
//
OBIDMS_column_p count2_column,
//
int count1,
//
int count2,
bool
print_count
,
OBIDMS_column_p
count1_column
,
OBIDMS_column_p
count2_column
,
int
count1
,
int
count2
,
OBIDMS_column_p
ali_length_column
,
int
ali_length
,
OBIDMS_column_p
lcs_length_column
,
...
...
@@ -231,22 +231,22 @@ static int create_alignment_output_columns(Obiview_p output_view,
return
-
1
;
}
}
// if (print_count) // TODO count columns not implemented yet
//
{
//
// Create the column for the count of the first sequences aligned
//
if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0)
//
{
//
obidebug(1, "\nError creating the first column for the sequence counts when aligning");
//
return -1;
//
}
//
//
// Create the column for the count of the second sequences aligned
//
if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0)
//
{
//
obidebug(1, "\nError creating the second column for the sequence counts when aligning");
//
return -1;
//
}
//
}
if
(
print_count
)
{
// Create the column for the count of the first sequences aligned
if
(
obi_view_add_column
(
output_view
,
COUNT1_COLUMN_NAME
,
-
1
,
NULL
,
OBI_INT
,
0
,
1
,
NULL
,
false
,
false
,
false
,
NULL
,
NULL
,
-
1
,
COUNT1_COLUMN_COMMENTS
,
true
)
<
0
)
{
obidebug
(
1
,
"
\n
Error creating the first column for the sequence counts when aligning"
);
return
-
1
;
}
// Create the column for the count of the second sequences aligned
if
(
obi_view_add_column
(
output_view
,
COUNT2_COLUMN_NAME
,
-
1
,
NULL
,
OBI_INT
,
0
,
1
,
NULL
,
false
,
false
,
false
,
NULL
,
NULL
,
-
1
,
COUNT2_COLUMN_COMMENTS
,
true
)
<
0
)
{
obidebug
(
1
,
"
\n
Error creating the second column for the sequence counts when aligning"
);
return
-
1
;
}
}
return
0
;
}
...
...
@@ -267,11 +267,11 @@ static int print_alignment_result(Obiview_p output_view,
OBIDMS_column_p
seq2_column
,
index_t
seq1_idx
,
index_t
seq2_idx
,
//
bool print_count,
//
OBIDMS_column_p count1_column,
//
OBIDMS_column_p count2_column,
//
int count1,
//
int count2,
bool
print_count
,
OBIDMS_column_p
count1_column
,
OBIDMS_column_p
count2_column
,
int
count1
,
int
count2
,
OBIDMS_column_p
ali_length_column
,
int
ali_length
,
OBIDMS_column_p
lcs_length_column
,
...
...
@@ -322,21 +322,21 @@ static int print_alignment_result(Obiview_p output_view,
}
}
// // Write the counts if needed // TODO count columns not implemented yet
//
if (print_count)
//
{
// if (obi_set_index
_with_elt_idx_and_col_p_in_view(output_view, count1_column, line, 0, count1) < 0)
//
{
//
obidebug(1, "\nError writing count1 in a column");
//
return -1;
//
}
//
// if (obi_set_index
_with_elt_idx_and_col_p_in_view(output_view, count2_column, line, 0, count2) < 0)
//
{
//
obidebug(1, "\nError writing count2 in a column");
//
return -1;
//
}
//
}
// Write the counts if needed
if
(
print_count
)
{
if
(
obi_set_int
_with_elt_idx_and_col_p_in_view
(
output_view
,
count1_column
,
line
,
0
,
count1
)
<
0
)
{
obidebug
(
1
,
"
\n
Error writing count1 in a column"
);
return
-
1
;
}
if
(
obi_set_int
_with_elt_idx_and_col_p_in_view
(
output_view
,
count2_column
,
line
,
0
,
count2
)
<
0
)
{
obidebug
(
1
,
"
\n
Error writing count2 in a column"
);
return
-
1
;
}
}
// Write the alignment length if it was computed
if
((
reference
==
ALILEN
)
&&
(
normalize
||
!
similarity_mode
))
...
...
@@ -385,9 +385,13 @@ static int print_alignment_result(Obiview_p output_view,
**********************************************************************/
int
obi_lcs_align_one_column
(
OBIDMS_p
dms
,
const
char
*
seq_view_name
,
const
char
*
seq_column_name
,
const
char
*
seq_elt_name
,
int
obi_lcs_align_one_column
(
const
char
*
dms_name
,
const
char
*
seq_view_name
,
const
char
*
seq_column_name
,
const
char
*
seq_elt_name
,
const
char
*
id_column_name
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
bool
print_seq
,
bool
print_count
,
double
threshold
,
bool
normalize
,
int
reference
,
bool
similarity_mode
,
int
thread_count
)
...
...
@@ -396,6 +400,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
index_t
seq_count
;
index_t
id1_idx
,
id2_idx
;
index_t
seq1_idx
,
seq2_idx
;
int
count1
,
count2
;
double
score
;
int
lcs_length
;
int
ali_length
;
...
...
@@ -405,16 +410,18 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
int
lcs_min
;
index_t
seq_elt_idx
;
OBIDMS_p
dms
=
NULL
;
Obiview_p
seq_view
=
NULL
;
Obiview_p
output_view
=
NULL
;
OBIDMS_column_p
iseq_column
=
NULL
;
OBIDMS_column_p
i_count_column
=
NULL
;
OBIDMS_column_p
id_column
=
NULL
;
OBIDMS_column_p
id1_column
=
NULL
;
OBIDMS_column_p
id2_column
=
NULL
;
OBIDMS_column_p
seq1_column
=
NULL
;
OBIDMS_column_p
seq2_column
=
NULL
;
//
OBIDMS_column_p count1_column = NULL;
//
OBIDMS_column_p count2_column = NULL;
OBIDMS_column_p
count1_column
=
NULL
;
OBIDMS_column_p
count2_column
=
NULL
;
OBIDMS_column_p
idx1_column
=
NULL
;
OBIDMS_column_p
idx2_column
=
NULL
;
OBIDMS_column_p
lcs_length_column
=
NULL
;
...
...
@@ -423,6 +430,14 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
k
=
0
;
// Open DMS
dms
=
obi_open_dms
(
dms_name
);
if
(
dms
==
NULL
)
{
obidebug
(
1
,
"
\n
Error opening the DMS"
);
return
-
1
;
}
// Open input view
seq_view
=
obi_open_view
(
dms
,
seq_view_name
);
if
(
seq_view
==
NULL
)
...
...
@@ -494,6 +509,17 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
return
-
1
;
}
// Open the input count column
if
(
print_count
)
{
i_count_column
=
obi_view_get_column
(
seq_view
,
COUNT_COLUMN
);
if
(
i_count_column
==
NULL
)
{
obidebug
(
1
,
"
\n
Error getting the input COUNT column"
);
return
-
1
;
}
}
// Create the output view
output_view
=
obi_new_view
(
dms
,
output_view_name
,
NULL
,
NULL
,
output_view_comments
);
if
(
output_view
==
NULL
)
...
...
@@ -521,11 +547,11 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
seq1_column
=
obi_view_get_column
(
output_view
,
SEQ1_COLUMN_NAME
);
seq2_column
=
obi_view_get_column
(
output_view
,
SEQ2_COLUMN_NAME
);
}
// if (print_count) // TODO count columns not implemented yet
//
{
// count1_column = obi_view_get_column(seq
_view, COUNT1_COLUMN_NAME);
// count2_column = obi_view_get_column(seq
_view, COUNT2_COLUMN_NAME);
//
}
if
(
print_count
)
{
count1_column
=
obi_view_get_column
(
output
_view
,
COUNT1_COLUMN_NAME
);
count2_column
=
obi_view_get_column
(
output
_view
,
COUNT2_COLUMN_NAME
);
}
// Build kmer tables
ktable
=
hash_seq_column
(
seq_view
,
iseq_column
,
seq_elt_idx
);
...
...
@@ -597,11 +623,18 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
// Get second id idx
id2_idx
=
obi_get_index_with_elt_idx_and_col_p_in_view
(
seq_view
,
id_column
,
j
,
0
);
// Get counts // TODO use array for efficiency?
if
(
print_count
)
{
count1
=
obi_get_int_with_elt_idx_and_col_p_in_view
(
seq_view
,
i_count_column
,
i
,
0
);
count2
=
obi_get_int_with_elt_idx_and_col_p_in_view
(
seq_view
,
i_count_column
,
j
,
0
);
}
if
(
print_alignment_result
(
output_view
,
k
,
idx1_column
,
idx2_column
,
i
,
j
,
id1_column
,
id2_column
,
id1_idx
,
id2_idx
,
print_seq
,
seq1_column
,
seq2_column
,
seq1_idx
,
seq2_idx
,
//
print_count, count1_column, count2_column, count1, count2,
print_count
,
count1_column
,
count2_column
,
count1
,
count2
,
ali_length_column
,
ali_length
,
lcs_length_column
,
lcs_length
,
score_column
,
score
,
...
...
@@ -625,13 +658,19 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
return
-
1
;
}
if
(
obi_close_dms
(
dms
,
false
)
<
0
)
{
obidebug
(
1
,
"
\n
Error closing the DMS after aligning"
);
return
-
1
;
}
free_kmer_tables
(
ktable
,
seq_count
);
return
0
;
}
int
obi_lcs_align_two_columns
(
OBIDMS_p
dms
,
int
obi_lcs_align_two_columns
(
const
char
*
dms_name
,
const
char
*
seq1_view_name
,
const
char
*
seq2_view_name
,
const
char
*
seq1_column_name
,
...
...
@@ -640,7 +679,8 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
const
char
*
seq2_elt_name
,
const
char
*
id1_column_name
,
const
char
*
id2_column_name
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
bool
print_seq
,
bool
print_count
,
double
threshold
,
bool
normalize
,
int
reference
,
bool
similarity_mode
)
{
...
...
@@ -649,6 +689,7 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
index_t
seq2_count
;
index_t
id1_idx
,
id2_idx
;
index_t
seq1_idx
,
seq2_idx
;
int
count1
,
count2
;
double
score
;
int
lcs_length
;
int
ali_length
;
...
...
@@ -660,6 +701,7 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
index_t
seq2_elt_idx
;
bool
same_indexer
;
OBIDMS_p
dms
=
NULL
;
Obiview_p
seq1_view
=
NULL
;
Obiview_p
seq2_view
=
NULL
;
Obiview_p
output_view
=
NULL
;
...
...
@@ -667,12 +709,14 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
OBIDMS_column_p
i_seq2_column
=
NULL
;
OBIDMS_column_p
i_id1_column
=
NULL
;
OBIDMS_column_p
i_id2_column
=
NULL
;
OBIDMS_column_p
i_count1_column
=
NULL
;
OBIDMS_column_p
i_count2_column
=
NULL
;
OBIDMS_column_p
id1_column
=
NULL
;
OBIDMS_column_p
id2_column
=
NULL
;
OBIDMS_column_p
seq1_column
=
NULL
;
OBIDMS_column_p
seq2_column
=
NULL
;
//
OBIDMS_column_p count1_column = NULL;
//
OBIDMS_column_p count2_column = NULL;
OBIDMS_column_p
count1_column
=
NULL
;
OBIDMS_column_p
count2_column
=
NULL
;
OBIDMS_column_p
idx1_column
=
NULL
;
OBIDMS_column_p
idx2_column
=
NULL
;
OBIDMS_column_p
lcs_length_column
=
NULL
;
...
...
@@ -681,6 +725,14 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
k
=
0
;
// Open DMS
dms
=
obi_open_dms
(
dms_name
);
if
(
dms
==
NULL
)
{
obidebug
(
1
,
"
\n
Error opening the DMS to align"
);
return
-
1
;
}
// Open the first input view
seq1_view
=
obi_open_view
(
dms
,
seq1_view_name
);
if
(
seq1_view
==
NULL
)
...
...
@@ -835,6 +887,23 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
return
-
1
;
}
// Open the input count columns
if
(
print_count
)
{
i_count1_column
=
obi_view_get_column
(
seq1_view
,
COUNT_COLUMN
);
if
(
i_count1_column
==
NULL
)
{
obidebug
(
1
,
"
\n
Error getting the first input COUNT column"
);
return
-
1
;
}
i_count2_column
=
obi_view_get_column
(
seq2_view
,
COUNT_COLUMN
);
if
(
i_count2_column
==
NULL
)
{
obidebug
(
1
,
"
\n
Error getting the second input COUNT column"
);
return
-
1
;
}
}
// Create the output view
output_view
=
obi_new_view
(
dms
,
output_view_name
,
NULL
,
NULL
,
output_view_comments
);
if
(
output_view
==
NULL
)
...
...
@@ -862,11 +931,11 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
seq1_column
=
obi_view_get_column
(
output_view
,
SEQ1_COLUMN_NAME
);
seq2_column
=
obi_view_get_column
(
output_view
,
SEQ2_COLUMN_NAME
);
}
// if (print_count) // TODO count columns not implemented yet
//
{
// count1_column = obi_view_get_column(seq
_view, COUNT1_COLUMN_NAME);
// count2_column = obi_view_get_column(seq
_view, COUNT2_COLUMN_NAME);
//
}
if
(
print_count
)
{
count1_column
=
obi_view_get_column
(
output
_view
,
COUNT1_COLUMN_NAME
);
count2_column
=
obi_view_get_column
(
output
_view
,
COUNT2_COLUMN_NAME
);
}
// Check if the sequence columns share the same indexer (allows for quick checking of sequence equality)
if
(
strcmp
((
i_seq1_column
->
header
)
->
indexer_name
,
(
i_seq2_column
->
header
)
->
indexer_name
)
==
0
)
...
...
@@ -949,11 +1018,18 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
// Get second id idx
id2_idx
=
obi_get_index_with_elt_idx_and_col_p_in_view
(
seq2_view
,
i_id2_column
,
j
,
0
);
// Get counts // TODO use array for efficiency?
if
(
print_count
)
{
count1
=
obi_get_int_with_elt_idx_and_col_p_in_view
(
seq1_view
,
i_count1_column
,
i
,
0
);
count2
=
obi_get_int_with_elt_idx_and_col_p_in_view
(
seq2_view
,
i_count2_column
,
j
,
0
);
}
if
(
print_alignment_result
(
output_view
,
k
,
idx1_column
,
idx2_column
,
i
,
j
,
id1_column
,
id2_column
,
id1_idx
,
id2_idx
,
print_seq
,
seq1_column
,
seq2_column
,
seq1_idx
,
seq2_idx
,
//
print_count, count1_column, count2_column, count1, count2,
print_count
,
count1_column
,
count2_column
,
count1
,
count2
,
ali_length_column
,
ali_length
,
lcs_length_column
,
lcs_length
,
score_column
,
score
,
...
...
@@ -986,6 +1062,12 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
return
-
1
;
}
if
(
obi_close_dms
(
dms
,
false
)
<
0
)
{
obidebug
(
1
,
"
\n
Error closing the DMS after aligning"
);
return
-
1
;
}
free_kmer_tables
(
ktable
,
seq1_count
+
seq2_count
);
return
0
;
...
...
src/obi_
align
.h
→
src/obi_
lcs
.h
View file @
d076ea99
...
...
@@ -3,15 +3,15 @@
****************************************************************************/
/**
* @file obi_
align
.h
* @file obi_
lcs
.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date May 11th 2016
* @brief Header file for the functions handling the LCS alignment of DNA sequences.
*/
#ifndef OBI_
ALIGN
_H_
#define OBI_
ALIGN
_H_
#ifndef OBI_
LCS
_H_
#define OBI_
LCS
_H_
#include <stdlib.h>
...
...
@@ -59,7 +59,7 @@
*
* Note: The columns where the results are written are automatically named and created.
*
* @param dms
A pointer on an OBI
DMS.
* @param dms
_name The path of the
DMS.
* @param seq_view_name The name of the view where the column to align is.
* @param seq_column_name The name of the OBI_SEQ column in the input view to align.
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
...
...
@@ -87,10 +87,13 @@
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int
obi_lcs_align_one_column
(
OBIDMS_p
dms
,
const
char
*
seq_view_name
,
const
char
*
seq_column_name
,
const
char
*
seq_elt_name
,
int
obi_lcs_align_one_column
(
const
char
*
dms_name
,
const
char
*
seq_view_name
,
const
char
*
seq_column_name
,
const
char
*
seq_elt_name
,
const
char
*
id_column_name
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
bool
print_seq
,
bool
print_count
,
double
threshold
,
bool
normalize
,
int
reference
,
bool
similarity_mode
,
int
thread_count
);
...
...
@@ -103,7 +106,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
*
* Note: The columns where the results are written are automatically named and created.
*
* @param dms
A pointer on an OBI
DMS.
* @param dms
_name The path of the
DMS.
* @param seq1_view_name The name of the view where the first column to align is.
* @param seq2_view_name The name of the view where the second column to align is ("" if it is the same view as the first one).
* @param seq1_column_name The name of the first OBI_SEQ column in the input view to align.
...
...
@@ -138,7 +141,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int
obi_lcs_align_two_columns
(
OBIDMS_p
dms
,
int
obi_lcs_align_two_columns
(
const
char
*
dms_name
,
const
char
*
seq1_view_name
,
const
char
*
seq2_view_name
,
const
char
*
seq1_column_name
,
...
...
@@ -147,10 +150,11 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
const
char
*
seq2_elt_name
,
const
char
*
id1_column_name
,
const
char
*
id2_column_name
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
const
char
*
output_view_name
,
const
char
*
output_view_comments
,
bool
print_seq
,
bool
print_count
,
double
threshold
,
bool
normalize
,
int
reference
,
bool
similarity_mode
);
#endif
/* OBI_
ALIGN
_H_ */
#endif
/* OBI_
LCS
_H_ */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment