decode.c 4.33 KB
Newer Older
Eric Coissac committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
/*
 * decode.c
 *
 *  Created on: 24 sept. 2012
 *      Author: coissac
 */



#include "orgasm.h"

/**
 * decode a sequence encoded in the two bits format.
 *
 * buffer   : pointer to the sequence buffer containing sequences
 * recordid : position of the sequence in the buffer
 * begin    : first nucleotide to decode
 * length   : length of the sequence to decode
 * dest     : a pointer to a memory area where the function saves
 *            the decodes sequence. if dest is NULL, then an internal
 *            buffer is used. If dest is specified it must point to
 *            a memory area at least large enough to store a full read
 *
 * Returns  : a pointer to the decoded sequence. The sequence is returned
 *            as a NULL terminated string. The returned pointer is not
 *            always equal to dest and can point to an internal part of
 *            the destination buffer
 */
char * decode(buffer_t *buffer, uint32_t recordid, uint32_t begin, int32_t length, char *dest)
{
	static char internal[MAXREADLEN+9];
	uint64_t*   idest;
	uint32_t    from;
	uint32_t    shift;
	uint32_t    i;
	uint32_t    lcompact;
Eric Coissac committed
37
//	uint32_t    mask;
Eric Coissac committed
38 39 40 41 42
	uint16_t    *compactseq;

	if (dest==NULL) dest = internal;

	dest = (char*)PTR8(dest);
Eric Coissac committed
43
	idest = (uint64_t*)dest;
Eric Coissac committed
44

45 46 47
	from = recordid * buffer->recordSize + ((begin >> 2) & 0xFFFFFFFE);
	shift = begin & 7;
	lcompact = (length+shift) / 8 + (((length+shift) & 7) ? 1:0);
Eric Coissac committed
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62

	for (compactseq = (uint16_t*)(buffer->records + from), i=0;
		 i < lcompact;
		 compactseq++,i++, idest++)
		*idest=decode16bitsnuc[*compactseq];

	dest[length + shift]=0;

	return dest + shift;
}


char *decodeComp(buffer_t *buffer, uint32_t recordid, uint32_t begin, int32_t length, char *dest)
{
	static char internal[MAXREADLEN+9];
63
	       char internal2[MAXREADLEN+9];
Eric Coissac committed
64 65 66
	uint64_t*   idest;
	uint32_t    from;
	uint32_t    shift;
67
	int32_t     i;
Eric Coissac committed
68
	uint32_t    lcompact;
Eric Coissac committed
69
//	uint32_t    mask;
Eric Coissac committed
70 71 72 73 74 75 76 77 78
	uint16_t    *compactseq;
	size_t      lkey;
	char*       key;
	char*       dest2;

	if (dest==NULL) dest = internal;

	dest = (char*)PTR8(dest);
	dest2 = (char*)PTR8(internal2);
Eric Coissac committed
79
	idest = (uint64_t*)dest;
Eric Coissac committed
80

81
	from = buffer->readSize - begin;
Eric Coissac committed
82

83 84 85 86 87 88 89 90 91 92 93 94 95
	// lkey -> length of the compacted sequence to unpack
	// old  version potentially with a bug
	// lkey = CODELENGTH(buffer->readSize) - (begin >> 2) + 1;
	lkey = CODELENGTH(from);

	// key -> pointer to the byte of the compacted sequence containing
	//        the position begin on the complementary sequence
	// old  version potentially with a bug
	// key  = READEND(buffer,buffer->order1[recordid]) - (begin >> 2);
	key  = READSTART(buffer,buffer->order1[recordid])+lkey-1;


	// reverse complement the read in the dest2 buffer in a compact form
Eric Coissac committed
96 97 98
	for (i=0; i<lkey;i++)
		dest2[i]=complement4nuc[*(((uint8_t*)(key)-i))];

99 100 101 102 103 104
	// (buffer->readSize & 3) : base count encoded by the last byte

	// old  version potentially with a bug
	// shift = (begin & 3) + (buffer->readSize & 3);
	shift = (4 - (from & 3)) & 3;

Eric Coissac committed
105

Eric Coissac committed
106
    ASSERT (shift <4,"There is a bug readsize = %zu begin = %d shift=%d",buffer->readSize,begin,shift)
Eric Coissac committed
107

108
	lcompact = (length+shift) / 8 + (((length+shift) & 7) ? 1:0);
Eric Coissac committed
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135

	for (compactseq = (uint16_t*)(dest2), i=0;
		 i < lcompact;
		 compactseq++,i++, idest++)
		*idest=decode16bitsnuc[*compactseq];

	dest[length + shift]=0;

	return dest + shift;
}

char *getRead(buffer_t *buffer, int32_t recordid, uint32_t begin, int32_t length, char *dest)
{
	static char internal[MAXREADLEN+9];

	if (dest==NULL)
		dest = internal;

	if (recordid > 0)
	{
		recordid--;
		return decode(buffer, recordid, begin, length, dest);
	}
	else if(recordid < 0)
	{
		recordid=-recordid;
		recordid--;
Eric Coissac committed
136 137
		recordid=buffer->complement[recordid];

Eric Coissac committed
138 139 140 141 142
		return decodeComp(buffer, recordid, begin, length, dest);
	}

	return NULL;
}
143 144 145 146 147 148 149 150 151

char * decodeSequence(pnuc buffer, uint32_t begin, int32_t length, char *dest)
{
	static char internal[MAXREADLEN+20];
	uint64_t*   idest;
	uint32_t    from;
	uint32_t    shift;
	uint32_t    i;
	uint32_t    lcompact;
Eric Coissac committed
152
//	uint32_t    mask;
153 154 155 156 157
	uint16_t    *compactseq;

	if (dest==NULL) dest = internal;

	dest = (char*)PTR8(dest);
Eric Coissac committed
158
	idest = (uint64_t*)dest;
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173

	from = ((begin >> 2) & 0xFFFFFFFE);
	shift = begin & 7;
	lcompact = (length+shift) / 8 + (((length+shift) & 7) ? 1:0);

	for (compactseq =(uint16_t*)(buffer + from), i=0;
		 i < lcompact;
		 compactseq++,i++, idest++)
		*idest=decode16bitsnuc[*compactseq];

	dest[length + shift]=0;

	return dest + shift;
}