Commit 8fb5e673 authored by Eric Coissac's avatar Eric Coissac

Add length constraint

parent e0b7eb6b
......@@ -10,7 +10,8 @@ buffer_t *buildIndex(const char *forwardFileName, const char* reverseFileName,
const char *indexname,
uint32_t minword,
size_t maxread,
size_t maxbuffersize
size_t maxbuffersize,
uint32_t readLength
)
{
FILE* forward;
......@@ -33,7 +34,7 @@ buffer_t *buildIndex(const char *forwardFileName, const char* reverseFileName,
if (reverse == NULL)
FATALERROR("Cannot open file %s",reverseFileName)
reads = loadPairedFastq(forward,reverse,maxread,maxbuffersize);
reads = loadPairedFastq(forward,reverse,maxread,maxbuffersize,readLength);
fclose(forward);
fclose(reverse);
......
......@@ -86,42 +86,86 @@ uint32_t readFastq(char* dest, FILE* in, int32_t maxsize)
* an error.
*/
uint32_t readPairedFastq(char* dest, FILE* forward, FILE* reverse, int32_t maxsize)
uint32_t readPairedFastq(char* dest,
FILE* forward,
FILE* reverse,
int32_t maxsize,
uint32_t readLength)
{
int32_t forwardLength;
int32_t forwardLength16;
int32_t reverseLength;
int32_t reverseLength=0;
char* rdest;
char* c;
int32_t rsize;
bzero(dest,maxsize);
if (readLength > 0)
{
forwardLength16 = round16(readLength);
if (forwardLength16*2 > maxsize)
return 0;
rdest = dest + forwardLength16;
rsize = maxsize - forwardLength16;
}
else
forwardLength16=0;
// we read the forward sequence
forwardLength = readFastq(dest,forward,maxsize);
do {
for (forwardLength = readFastq(dest,forward,maxsize);
forwardLength>0 && forwardLength<readLength;
forwardLength = readFastq(dest,forward,maxsize))
// skip the reverse read and take the next forward
reverseLength = readFastq(dest,reverse,maxsize);
if (forwardLength==0)
return 0;
if (forwardLength16==0)
{
forwardLength16 = round16(forwardLength);
if (forwardLength16*2 > maxsize)
return 0;
rdest = dest + forwardLength16;
rsize = maxsize - forwardLength16;
}
// them the reverse one
forwardLength16 = round16(forwardLength);
if (forwardLength16*2 > maxsize)
return 0;
// founded a good forward get the corresponding reverse
reverseLength = readFastq(rdest,reverse,rsize);
maxsize-=forwardLength16;
dest+=forwardLength16;
} while ( forwardLength > 0 &&
reverseLength > 0 &&
reverseLength < readLength );
reverseLength = readFastq(dest,reverse,maxsize);
// We check that both lengths are equal
if (forwardLength==0 || reverseLength==0)
return 0;
if (readLength > 0 && forwardLength > readLength)
for (c=dest+readLength;
c < rdest;
c++) *c=0;
if (forwardLength!=reverseLength)
if (readLength > 0 && reverseLength > readLength)
for (c=rdest+readLength;
c < (dest+maxsize);
c++) *c=0;
if (readLength==0 && forwardLength!=reverseLength)
return 0;
// we return the length of one read
return forwardLength;
// we return the length of one read
if (readLength > 0)
return readLength;
else
return forwardLength;
}
......@@ -141,7 +185,8 @@ uint8_t checkACGT(const char* src, size_t size)
buffer_t * loadPairedFastq(FILE* forward,
FILE* reverse,
size_t maxread,
size_t maxbuffersize
size_t maxbuffersize,
uint32_t readLength
)
{
......@@ -157,7 +202,7 @@ buffer_t * loadPairedFastq(FILE* forward,
char* reads;
char* index;
char* bufferend;
uint32_t readLength;
// uint32_t readLength;
uint32_t recordLength;
uint32_t nextprint;
uint32_t* encodedseqs;
......@@ -177,7 +222,7 @@ buffer_t * loadPairedFastq(FILE* forward,
buffersize= 2048 - (reads - (char*)(&readbuffer));
readLength = readPairedFastq(reads,forward,reverse,buffersize);
readLength = readPairedFastq(reads,forward,reverse,buffersize,readLength);
seqbuffer = newBuffer(maxbuffersize,readLength,maxread);
encodedseqs= (uint32_t*)(seqbuffer->records);
......@@ -203,7 +248,7 @@ buffer_t * loadPairedFastq(FILE* forward,
seqbuffer->readCount+=2;
}
readLength = readPairedFastq(reads,forward,reverse,buffersize);
readLength = readPairedFastq(reads,forward,reverse,buffersize,readLength);
//DEBUG("Filled %d/%d",seqbuffer->readCount,seqbuffer->maxrecord);
}
......
......@@ -87,7 +87,9 @@ void util_free(void *chunk, const char *filename, int32_t line);
void *util_realloc(void *chunk, int32_t newsize, const char *filename, int32_t line);
void *util_malloc(int32_t chunksize, const char *filename, int32_t line);
uint32_t readPairedFastq(char* dest, FILE* forward, FILE* reverse, int32_t maxsize);
uint32_t readPairedFastq(char* dest,
FILE* forward, FILE* reverse,
int32_t maxsize, uint32_t readLength);
uint32_t round2(uint32_t x);
uint32_t round4(uint32_t x);
......@@ -106,14 +108,16 @@ char *decodeSequence(pnuc buffer, uint32_t begin, int32_t length, char *dest);
buffer_t *loadPairedFastq(FILE* forward,
FILE* reverse,
size_t maxread,
size_t maxbuffersize
size_t maxbuffersize,
uint32_t readLength
);
buffer_t *buildIndex(const char *forwardFileName, const char* reverseFileName,
const char *indexname,
uint32_t minword,
size_t maxread,
size_t maxbuffersize
size_t maxbuffersize,
uint32_t readLength
);
buffer_t *newBuffer(size_t maxsize,size_t readSize,size_t maxread);
......
......@@ -41,6 +41,7 @@ static void PrintHelp()
PP " <index>.opx : contains read pairing data\n\n");
PP " The assembler will need all these file to process assembling\n\n");
PP "-M : If specified the count in million of reads to index");
PP "-l : read length to consider");
PP "-h : [H]elp - print <this> help\n\n");
PP "\n");
PP "------------------------------------------\n");
......@@ -76,6 +77,7 @@ int main(int argc, char *argv[])
buffer_t* reads;
char* indexfiles=NULL;
size_t toread=0;
uint32_t readLength=0;
int carg;
int errflag=0;
......@@ -88,7 +90,7 @@ int main(int argc, char *argv[])
while ((carg = getopt(argc, argv, "hM:o:")) != -1) {
while ((carg = getopt(argc, argv, "hM:o:l:")) != -1) {
switch (carg) {
/* -------------------- */
......@@ -106,9 +108,16 @@ while ((carg = getopt(argc, argv, "hM:o:")) != -1) {
break;
/* -------------------- */
case 'M': /* index name */
case 'M': /* sequence to index */
/* -------------------- */
toread = atoll((const char*)optarg) * 1000000;
break;
/* -------------------- */
case 'l': /* index name */
/* -------------------- */
readLength = atoll((const char*)optarg);
break;
case '?': /* bad option */
......@@ -145,7 +154,7 @@ if (errflag)
#define MAXREAD 2000000000
reads = buildIndex(forwardFileName,reverseFileName,indexfiles,95,toread,MAXREAD);
reads = buildIndex(forwardFileName,reverseFileName,indexfiles,95,toread,MAXREAD,readLength);
freeBuffer(reads);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment