uint8_indexer.c 2.39 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
/****************************************************************************
 * Sequence quality scores indexing functions                               *
 ****************************************************************************/

/**
 * @file quality_indexer.c
 * @author Celine Mercier
 * @date May 4th 2016
 * @brief Functions handling the indexing and retrieval of sequence quality scores.
 */


#include <stdlib.h>
#include <stdio.h>
#include <math.h>

#include "obiblob.h"
#include "obiblob_indexer.h"
#include "obidebug.h"
#include "obitypes.h"


#define DEBUG_LEVEL 0	// TODO has to be defined somewhere else (cython compil flag?)


Obi_blob_p obi_uint8_to_blob(const char* quality)
{
	Obi_blob_p 		 value_b;
	int32_t          length_encoded_seq;	// length of the encoded sequence in bytes
	int32_t          seq_length;
	byte_t*          encoded_seq;

	seq_length = strlen(seq);

	// Check if just ATGC and encode accordingly
	if (only_ATGC(seq))
	{
		// Compute the length (in bytes) of the encoded sequence
		length_encoded_seq = ceil((double) seq_length / (double) 4.0);
		// Encode
		encoded_seq = encode_seq_on_2_bits(seq, seq_length);
		if (encoded_seq == NULL)
			return NULL;
		value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
	}
	else
	{
		// Compute the length (in bytes) of the encoded sequence
		length_encoded_seq = ceil((double) seq_length / (double) 2.0);
		// Encode
		encoded_seq = encode_seq_on_4_bits(seq, seq_length);
		if (encoded_seq == NULL)
			return NULL;
		value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
	}

	free(encoded_seq);

	return value_b;
}


char* obi_blob_to_quality_char(Obi_blob_p value_b)
{
	// Decode
	if (value_b->element_size == 2)
		return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
	else
		return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
}


index_t obi_index_quality_char(Obi_indexer_p indexer, const char* value)
{
	Obi_blob_p  value_b;
	index_t 	idx;

	// Encode value
	value_b = obi_seq_to_blob(value);
	if (value_b == NULL)
		return -1;

	// Add in the indexer
	idx = obi_indexer_add(indexer, value_b);

	free(value_b);

	return idx;
}


char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
{
	Obi_blob_p  value_b;

	// Get encoded value
	value_b = obi_indexer_get(indexer, idx);

	// Return decoded sequence
	return obi_blob_to_seq(value_b);
}