Commit a00e22f6 authored by Eric Coissac's avatar Eric Coissac

--no commit message

--no commit message
parent eea2b00f
......@@ -34,7 +34,7 @@ cdef class NodeIterator:
cpdef add(self,int node):
cdef char[50] buffer
cdef bytes key
cdef bytes key # @DuplicatedSignature
snprintf(buffer,50,"%d",node)
key = buffer
......@@ -42,8 +42,8 @@ cdef class NodeIterator:
self._exclude.add(key)
cpdef remove(self, int node):
cdef char[50] buffer
cdef bytes key
cdef char[50] buffer # @DuplicatedSignature
cdef bytes key # @DuplicatedSignature
snprintf(buffer,50,"%d",node)
key = buffer
......
......@@ -24,6 +24,7 @@ cdef extern from "orgasm.h":
buffer_t *loadIndexedReads(char *indexname)
int32_t lookForString(buffer_t *buffer, char *key, size_t length, int32_t* endoflist)
int32_t fastLookForString(buffer_t *buffer, char *key, size_t length, int32_t* fcount, int32_t* rcount, int32_t* fpos, int32_t* rpos)
int32_t nextRead(buffer_t *buffer, int32_t current, size_t length, int32_t* endoflist)
int32_t lookForReadsIds(buffer_t *buffer, int32_t key, int32_t* endoflist)
int32_t nextReadIds(buffer_t *buffer, int32_t current, int32_t* endoflist)
......@@ -364,15 +365,36 @@ cdef class Index:
return self.contains(word)
def __getitem__(self, bytes word):
cdef int32_t endoflist # @DuplicatedSignature
cdef int32_t fpos # @DuplicatedSignature
cdef int32_t rpos # @DuplicatedSignature
cdef int32_t fcount # @DuplicatedSignature
cdef int32_t rcount # @DuplicatedSignature
cdef int32_t count # @DuplicatedSignature
cdef int32_t readid # @DuplicatedSignature
cdef size_t lword = len(word) # @DuplicatedSignature
readid = lookForString(self._index,word,lword,&endoflist)
while(endoflist==0):
yield readid
readid = nextRead(self._index,readid,lword,&endoflist)
cdef list readids
count = fastLookForString(self._index,word,lword,&fcount,&rcount,&fpos,&rpos)
# print fcount,rcount,fpos,rpos
# print count
readids=range(fpos+1,fpos+count+1)
for readid in range(fcount,count):
readids[readid]=-(<int32_t>self._index.order1[readid-fcount+rpos])-1
# print readids
return readids
# def __getitem__(self, bytes word):
# cdef int32_t endoflist # @DuplicatedSignature
# cdef int32_t readid # @DuplicatedSignature
# cdef size_t lword = len(word) # @DuplicatedSignature
#
# readid = lookForString(self._index,word,lword,&endoflist)
#
# while(endoflist==0):
# yield readid
# readid = nextRead(self._index,readid,lword,&endoflist)
def __len__(self):
return self.len()
......
......@@ -251,17 +251,19 @@ int8_t cmpCompPrefix(buffer_t *buffer,uint32_t r1,pnuc key, uint32_t length)
}
int32_t lookForForward(buffer_t *buffer, pnuc key, size_t length, int32_t* endoflist)
int32_t lookForForward(buffer_t *buffer, pnuc key, size_t length, int32_t* count)
{
int32_t initcode;
int32_t start;
int32_t end;
int32_t l_start;
int32_t l_end;
int32_t middle;
int32_t comp;
// int32_t comp1;
// I set the end of list flag to 0
*endoflist=0;
// I set read count to 0
*count=0;
// and extract the 8 first nucleotides
initcode = *((uint16_t*)key);
......@@ -282,9 +284,9 @@ int32_t lookForForward(buffer_t *buffer, pnuc key, size_t length, int32_t* endof
// in this case no reads begin with the 8mer
if (end-start==0)
if (start==end)
{
*endoflist=1;
*count=0;
return 0;
}
......@@ -294,6 +296,7 @@ int32_t lookForForward(buffer_t *buffer, pnuc key, size_t length, int32_t* endof
// comp = cmpPrefix(buffer,middle,key,length);
end--;
l_end=end;
while (start!=end)
{
......@@ -301,7 +304,12 @@ int32_t lookForForward(buffer_t *buffer, pnuc key, size_t length, int32_t* endof
comp = cmpPrefix(buffer,middle,key,length);
if (comp < 0) start=middle+1;
if (comp >= 0) end=middle;
if (comp >= 0)
{
end=middle;
if (comp > 0)
l_end=end;
}
}
......@@ -311,10 +319,23 @@ int32_t lookForForward(buffer_t *buffer, pnuc key, size_t length, int32_t* endof
if (comp!=0)
{
*endoflist=1;
*count=0;
return 0;
}
l_start=start;
while (l_start!=l_end)
{
middle = (l_start + l_end) / 2 + 1;
comp = cmpPrefix(buffer,middle,key,length);
if (comp <= 0) l_start=middle;
if (comp > 0) l_end=middle-1;
}
*count = l_end-start+1;
return start;
}
......@@ -355,7 +376,7 @@ static char* bin8(int val){
* endoflist : set to 0 by the function if it finds a solution to 1 otherwise
*
*/
int32_t lookForReverse(buffer_t *buffer, pnuc key, size_t length, int32_t* endoflist)
int32_t lookForReverse(buffer_t *buffer, pnuc key, size_t length, int32_t* count)
{
static uint8_t bmask[]={255,192,240,252};
char internalbuffer[512] __attribute__ ((aligned (16))); // Force the alignment of the buffer;
......@@ -367,12 +388,14 @@ int32_t lookForReverse(buffer_t *buffer, pnuc key, size_t length, int32_t* endof
int32_t initcode;
int32_t start;
int32_t end;
int32_t l_start;
int32_t l_end;
int32_t middle;
int32_t comp;
uint32_t lkey;
// I set the end of list flag to 0
*endoflist=0;
*count=0;
dest = (pnuc) internalbuffer;
dest2 = (pnuc) internalbuffer2;
......@@ -440,22 +463,29 @@ int32_t lookForReverse(buffer_t *buffer, pnuc key, size_t length, int32_t* endof
end = buffer->readCount;
// in this case no reads begin with the 8mer
if (end-start==0)
if (start==end)
{
*endoflist=1;
*count=0;
return 0;
}
// I start a bsearch between start and end
end--;
l_end=end;
while (start!=end)
{
middle = (start + end) / 2;
comp = cmpCompPrefix(buffer,buffer->order1[middle],dest,length);
if (comp < 0) start=middle+1;
if (comp >= 0) end=middle;
if (comp >= 0)
{
end=middle;
if (comp > 0)
l_end=end;
}
}
......@@ -465,10 +495,23 @@ int32_t lookForReverse(buffer_t *buffer, pnuc key, size_t length, int32_t* endof
if (comp!=0)
{
*endoflist=1;
*count=0;
return 0;
}
l_start=start;
while (l_start!=l_end)
{
middle = (l_start + l_end) / 2 + 1;
comp = cmpCompPrefix(buffer,buffer->order1[middle],dest,length);
if (comp <= 0) l_start=middle;
if (comp > 0) l_end=middle-1;
}
*count = l_end-start+1;
// printf("%d-%d ->%d\n",start,l_end,*count);
return start;
......@@ -496,25 +539,50 @@ int32_t nextReverse(buffer_t *buffer, int32_t current, size_t length, int32_t* e
int32_t lookForReads(buffer_t *buffer, pnuc key, size_t length, int32_t* endoflist)
{
int32_t pos = lookForForward(buffer, key, length, endoflist);
int32_t count;
int32_t pos = lookForForward(buffer, key, length, &count);
if (*endoflist==0)
if (count>0)
{
pos++;
*endoflist=0;
}
else
{
pos = lookForReverse(buffer, key, length, endoflist);
if (*endoflist==0)
pos = lookForReverse(buffer, key, length, &count);
if (count>0)
{
pos = - buffer->order1[pos] - 1;
*endoflist=0;
}
else
*endoflist=1;
}
return pos;
}
int32_t fastLookForReads(buffer_t *buffer,
pnuc key,
size_t length,
int32_t* fcount,
int32_t* rcount,
int32_t* fpos,
int32_t* rpos
)
{
*fpos = lookForForward(buffer, key, length, fcount);
*rpos = lookForReverse(buffer, key, length, rcount);
return *fcount + *rcount;
}
int32_t nextRead(buffer_t *buffer, int32_t current, size_t length, int32_t* endoflist)
{
int32_t next=0;
int32_t count;
pnuc from;
if (current == 0)
......@@ -530,10 +598,15 @@ int32_t nextRead(buffer_t *buffer, int32_t current, size_t length, int32_t* endo
else
{
from = (pnuc)((uint8_t*)(buffer->records) + buffer->recordSize * current);
next = lookForReverse(buffer, from, length, endoflist);
next = lookForReverse(buffer, from, length, &count);
if (*endoflist==0)
if (count>0)
{
next = - buffer->order1[next] -1;
*endoflist=0;
}
else
*endoflist=1;
}
}
......@@ -565,6 +638,29 @@ int32_t lookForString(buffer_t *buffer, char *key, size_t length, int32_t* endof
return lookForReads(buffer, dest, length, endoflist);
}
int32_t fastLookForString(buffer_t *buffer,
char *key,
size_t length,
int32_t* fcount,
int32_t* rcount,
int32_t* fpos,
int32_t* rpos
)
{
char internalbuffer[512];
pnuc dest;
ASSERT(length>=8,"length have to be greater or equal to %d",8)
// I encode the key string
dest = (pnuc) PTR16(internalbuffer);
dest = encodeSequence(dest, key, length);
return fastLookForReads(buffer,dest,length,fcount,rcount,fpos,rpos);
}
int32_t lookForReadsIds(buffer_t *buffer, int32_t key, int32_t* endoflist)
{
pnuc dest;
......
......@@ -152,11 +152,11 @@ buffer_t *loadIndexedReads(const char *indexname);
int32_t lookForReads(buffer_t *buffer, pnuc key, size_t length, int32_t* endoflist);
int32_t lookForString(buffer_t *buffer, char *key, size_t length, int32_t* endoflist);
int32_t fastLookForString(buffer_t *buffer, char *key, size_t length, int32_t* fcount, int32_t* rcount, int32_t* fpos, int32_t* rpos);
int32_t nextRead(buffer_t *buffer, int32_t current, size_t length, int32_t* endoflist);
int32_t lookForReadsIds(buffer_t *buffer, int32_t key, int32_t* endoflist);
int32_t nextReadIds(buffer_t *buffer, int32_t current, int32_t* endoflist);
extern uint64_t decode16bitsnuc[];
extern uint8_t complement4nuc[];
extern uint32_t expanded8bitsnuc[];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment