Commit d076ea99 authored by Celine Mercier's avatar Celine Mercier

Alignment: updated functions to align columns (LCS)

parent 6b1c41f3
......@@ -3,7 +3,7 @@
****************************************************************************/
/**
* @file obi_align.c
* @file obi_lcs.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date May 4th 2016
* @brief Functions handling LCS sequence alignments.
......@@ -18,7 +18,7 @@
#include <stdio.h>
#include <stdbool.h>
#include "obi_align.h"
#include "obi_lcs.h"
#include "obidebug.h"
#include "obierrno.h"
#include "obitypes.h"
......@@ -121,11 +121,11 @@ static int print_alignment_result(Obiview_p output_view,
OBIDMS_column_p seq2_column,
index_t seq1_idx,
index_t seq2_idx,
// bool print_count,
// OBIDMS_column_p count1_column,
// OBIDMS_column_p count2_column,
// int count1,
// int count2,
bool print_count,
OBIDMS_column_p count1_column,
OBIDMS_column_p count2_column,
int count1,
int count2,
OBIDMS_column_p ali_length_column,
int ali_length,
OBIDMS_column_p lcs_length_column,
......@@ -231,22 +231,22 @@ static int create_alignment_output_columns(Obiview_p output_view,
return -1;
}
}
// if (print_count) // TODO count columns not implemented yet
// {
// // Create the column for the count of the first sequences aligned
// if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0)
// {
// obidebug(1, "\nError creating the first column for the sequence counts when aligning");
// return -1;
// }
//
// // Create the column for the count of the second sequences aligned
// if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0)
// {
// obidebug(1, "\nError creating the second column for the sequence counts when aligning");
// return -1;
// }
// }
if (print_count)
{
// Create the column for the count of the first sequences aligned
if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0)
{
obidebug(1, "\nError creating the first column for the sequence counts when aligning");
return -1;
}
// Create the column for the count of the second sequences aligned
if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0)
{
obidebug(1, "\nError creating the second column for the sequence counts when aligning");
return -1;
}
}
return 0;
}
......@@ -267,11 +267,11 @@ static int print_alignment_result(Obiview_p output_view,
OBIDMS_column_p seq2_column,
index_t seq1_idx,
index_t seq2_idx,
// bool print_count,
// OBIDMS_column_p count1_column,
// OBIDMS_column_p count2_column,
// int count1,
// int count2,
bool print_count,
OBIDMS_column_p count1_column,
OBIDMS_column_p count2_column,
int count1,
int count2,
OBIDMS_column_p ali_length_column,
int ali_length,
OBIDMS_column_p lcs_length_column,
......@@ -322,21 +322,21 @@ static int print_alignment_result(Obiview_p output_view,
}
}
// // Write the counts if needed // TODO count columns not implemented yet
// if (print_count)
// {
// if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count1_column, line, 0, count1) < 0)
// {
// obidebug(1, "\nError writing count1 in a column");
// return -1;
// }
//
// if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count2_column, line, 0, count2) < 0)
// {
// obidebug(1, "\nError writing count2 in a column");
// return -1;
// }
// }
// Write the counts if needed
if (print_count)
{
if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, count1_column, line, 0, count1) < 0)
{
obidebug(1, "\nError writing count1 in a column");
return -1;
}
if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, count2_column, line, 0, count2) < 0)
{
obidebug(1, "\nError writing count2 in a column");
return -1;
}
}
// Write the alignment length if it was computed
if ((reference == ALILEN) && (normalize || !similarity_mode))
......@@ -385,9 +385,13 @@ static int print_alignment_result(Obiview_p output_view,
**********************************************************************/
int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char* seq_column_name, const char* seq_elt_name,
int obi_lcs_align_one_column(const char* dms_name,
const char* seq_view_name,
const char* seq_column_name,
const char* seq_elt_name,
const char* id_column_name,
const char* output_view_name, const char* output_view_comments,
const char* output_view_name,
const char* output_view_comments,
bool print_seq, bool print_count,
double threshold, bool normalize, int reference, bool similarity_mode,
int thread_count)
......@@ -396,6 +400,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
index_t seq_count;
index_t id1_idx, id2_idx;
index_t seq1_idx, seq2_idx;
int count1, count2;
double score;
int lcs_length;
int ali_length;
......@@ -405,16 +410,18 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
int lcs_min;
index_t seq_elt_idx;
OBIDMS_p dms = NULL;
Obiview_p seq_view = NULL;
Obiview_p output_view = NULL;
OBIDMS_column_p iseq_column = NULL;
OBIDMS_column_p i_count_column = NULL;
OBIDMS_column_p id_column = NULL;
OBIDMS_column_p id1_column = NULL;
OBIDMS_column_p id2_column = NULL;
OBIDMS_column_p seq1_column = NULL;
OBIDMS_column_p seq2_column = NULL;
//OBIDMS_column_p count1_column = NULL;
//OBIDMS_column_p count2_column = NULL;
OBIDMS_column_p count1_column = NULL;
OBIDMS_column_p count2_column = NULL;
OBIDMS_column_p idx1_column = NULL;
OBIDMS_column_p idx2_column = NULL;
OBIDMS_column_p lcs_length_column = NULL;
......@@ -423,6 +430,14 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
k = 0;
// Open DMS
dms = obi_open_dms(dms_name);
if (dms == NULL)
{
obidebug(1, "\nError opening the DMS");
return -1;
}
// Open input view
seq_view = obi_open_view(dms, seq_view_name);
if (seq_view == NULL)
......@@ -494,6 +509,17 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
return -1;
}
// Open the input count column
if (print_count)
{
i_count_column = obi_view_get_column(seq_view, COUNT_COLUMN);
if (i_count_column == NULL)
{
obidebug(1, "\nError getting the input COUNT column");
return -1;
}
}
// Create the output view
output_view = obi_new_view(dms, output_view_name, NULL, NULL, output_view_comments);
if (output_view == NULL)
......@@ -521,11 +547,11 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
seq1_column = obi_view_get_column(output_view, SEQ1_COLUMN_NAME);
seq2_column = obi_view_get_column(output_view, SEQ2_COLUMN_NAME);
}
// if (print_count) // TODO count columns not implemented yet
// {
// count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME);
// count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME);
// }
if (print_count)
{
count1_column = obi_view_get_column(output_view, COUNT1_COLUMN_NAME);
count2_column = obi_view_get_column(output_view, COUNT2_COLUMN_NAME);
}
// Build kmer tables
ktable = hash_seq_column(seq_view, iseq_column, seq_elt_idx);
......@@ -597,11 +623,18 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
// Get second id idx
id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
// Get counts // TODO use array for efficiency?
if (print_count)
{
count1 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, i, 0);
count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, j, 0);
}
if (print_alignment_result(output_view, k,
idx1_column, idx2_column, i, j,
id1_column, id2_column, id1_idx, id2_idx,
print_seq, seq1_column, seq2_column, seq1_idx, seq2_idx,
//print_count, count1_column, count2_column, count1, count2,
print_count, count1_column, count2_column, count1, count2,
ali_length_column, ali_length,
lcs_length_column, lcs_length,
score_column, score,
......@@ -625,13 +658,19 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
return -1;
}
if (obi_close_dms(dms, false) < 0)
{
obidebug(1, "\nError closing the DMS after aligning");
return -1;
}
free_kmer_tables(ktable, seq_count);
return 0;
}
int obi_lcs_align_two_columns(OBIDMS_p dms,
int obi_lcs_align_two_columns(const char* dms_name,
const char* seq1_view_name,
const char* seq2_view_name,
const char* seq1_column_name,
......@@ -640,7 +679,8 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
const char* seq2_elt_name,
const char* id1_column_name,
const char* id2_column_name,
const char* output_view_name, const char* output_view_comments,
const char* output_view_name,
const char* output_view_comments,
bool print_seq, bool print_count,
double threshold, bool normalize, int reference, bool similarity_mode)
{
......@@ -649,6 +689,7 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
index_t seq2_count;
index_t id1_idx, id2_idx;
index_t seq1_idx, seq2_idx;
int count1, count2;
double score;
int lcs_length;
int ali_length;
......@@ -660,6 +701,7 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
index_t seq2_elt_idx;
bool same_indexer;
OBIDMS_p dms = NULL;
Obiview_p seq1_view = NULL;
Obiview_p seq2_view = NULL;
Obiview_p output_view = NULL;
......@@ -667,12 +709,14 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
OBIDMS_column_p i_seq2_column = NULL;
OBIDMS_column_p i_id1_column = NULL;
OBIDMS_column_p i_id2_column = NULL;
OBIDMS_column_p i_count1_column = NULL;
OBIDMS_column_p i_count2_column = NULL;
OBIDMS_column_p id1_column = NULL;
OBIDMS_column_p id2_column = NULL;
OBIDMS_column_p seq1_column = NULL;
OBIDMS_column_p seq2_column = NULL;
//OBIDMS_column_p count1_column = NULL;
//OBIDMS_column_p count2_column = NULL;
OBIDMS_column_p count1_column = NULL;
OBIDMS_column_p count2_column = NULL;
OBIDMS_column_p idx1_column = NULL;
OBIDMS_column_p idx2_column = NULL;
OBIDMS_column_p lcs_length_column = NULL;
......@@ -681,6 +725,14 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
k = 0;
// Open DMS
dms = obi_open_dms(dms_name);
if (dms == NULL)
{
obidebug(1, "\nError opening the DMS to align");
return -1;
}
// Open the first input view
seq1_view = obi_open_view(dms, seq1_view_name);
if (seq1_view == NULL)
......@@ -835,6 +887,23 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
return -1;
}
// Open the input count columns
if (print_count)
{
i_count1_column = obi_view_get_column(seq1_view, COUNT_COLUMN);
if (i_count1_column == NULL)
{
obidebug(1, "\nError getting the first input COUNT column");
return -1;
}
i_count2_column = obi_view_get_column(seq2_view, COUNT_COLUMN);
if (i_count2_column == NULL)
{
obidebug(1, "\nError getting the second input COUNT column");
return -1;
}
}
// Create the output view
output_view = obi_new_view(dms, output_view_name, NULL, NULL, output_view_comments);
if (output_view == NULL)
......@@ -862,11 +931,11 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
seq1_column = obi_view_get_column(output_view, SEQ1_COLUMN_NAME);
seq2_column = obi_view_get_column(output_view, SEQ2_COLUMN_NAME);
}
// if (print_count) // TODO count columns not implemented yet
// {
// count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME);
// count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME);
// }
if (print_count)
{
count1_column = obi_view_get_column(output_view, COUNT1_COLUMN_NAME);
count2_column = obi_view_get_column(output_view, COUNT2_COLUMN_NAME);
}
// Check if the sequence columns share the same indexer (allows for quick checking of sequence equality)
if (strcmp((i_seq1_column->header)->indexer_name, (i_seq2_column->header)->indexer_name) == 0)
......@@ -949,11 +1018,18 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
// Get second id idx
id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq2_view, i_id2_column, j, 0);
// Get counts // TODO use array for efficiency?
if (print_count)
{
count1 = obi_get_int_with_elt_idx_and_col_p_in_view(seq1_view, i_count1_column, i, 0);
count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq2_view, i_count2_column, j, 0);
}
if (print_alignment_result(output_view, k,
idx1_column, idx2_column, i, j,
id1_column, id2_column, id1_idx, id2_idx,
print_seq, seq1_column, seq2_column, seq1_idx, seq2_idx,
//print_count, count1_column, count2_column, count1, count2,
print_count, count1_column, count2_column, count1, count2,
ali_length_column, ali_length,
lcs_length_column, lcs_length,
score_column, score,
......@@ -986,6 +1062,12 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
return -1;
}
if (obi_close_dms(dms, false) < 0)
{
obidebug(1, "\nError closing the DMS after aligning");
return -1;
}
free_kmer_tables(ktable, seq1_count + seq2_count);
return 0;
......
......@@ -3,15 +3,15 @@
****************************************************************************/
/**
* @file obi_align.h
* @file obi_lcs.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date May 11th 2016
* @brief Header file for the functions handling the LCS alignment of DNA sequences.
*/
#ifndef OBI_ALIGN_H_
#define OBI_ALIGN_H_
#ifndef OBI_LCS_H_
#define OBI_LCS_H_
#include <stdlib.h>
......@@ -59,7 +59,7 @@
*
* Note: The columns where the results are written are automatically named and created.
*
* @param dms A pointer on an OBIDMS.
* @param dms_name The path of the DMS.
* @param seq_view_name The name of the view where the column to align is.
* @param seq_column_name The name of the OBI_SEQ column in the input view to align.
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
......@@ -87,10 +87,13 @@
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_lcs_align_one_column(OBIDMS_p dms,
const char* seq_view_name, const char* seq_column_name, const char* seq_elt_name,
int obi_lcs_align_one_column(const char* dms_name,
const char* seq_view_name,
const char* seq_column_name,
const char* seq_elt_name,
const char* id_column_name,
const char* output_view_name, const char* output_view_comments,
const char* output_view_name,
const char* output_view_comments,
bool print_seq, bool print_count,
double threshold, bool normalize, int reference, bool similarity_mode,
int thread_count);
......@@ -103,7 +106,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
*
* Note: The columns where the results are written are automatically named and created.
*
* @param dms A pointer on an OBIDMS.
* @param dms_name The path of the DMS.
* @param seq1_view_name The name of the view where the first column to align is.
* @param seq2_view_name The name of the view where the second column to align is ("" if it is the same view as the first one).
* @param seq1_column_name The name of the first OBI_SEQ column in the input view to align.
......@@ -138,7 +141,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_lcs_align_two_columns(OBIDMS_p dms,
int obi_lcs_align_two_columns(const char* dms_name,
const char* seq1_view_name,
const char* seq2_view_name,
const char* seq1_column_name,
......@@ -147,10 +150,11 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
const char* seq2_elt_name,
const char* id1_column_name,
const char* id2_column_name,
const char* output_view_name, const char* output_view_comments,
const char* output_view_name,
const char* output_view_comments,
bool print_seq, bool print_count,
double threshold, bool normalize, int reference, bool similarity_mode);
#endif /* OBI_ALIGN_H_ */
#endif /* OBI_LCS_H_ */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment