fakereads.pyx 2.33 KB
Newer Older
Eric Coissac committed
1
from .fakereads cimport *
Eric Coissac committed
2 3 4 5 6 7 8 9 10 11 12 13 14
import traceback

cdef class FakeReads(dict):

    def __init__(self,size_t initid,size_t readsize=0):
        
        self._firstid = initid
        self._nextid  = initid
        self._readsize = readsize
        
        self._reverse = {}
        self._keys = {}
            
Eric Coissac committed
15
    cpdef bint isFake(self, int32_t id):
16 17
        cdef bint rep =  abs(id) >= self._firstid and abs(id) < self._nextid
        return rep
Eric Coissac committed
18 19 20
    
    cdef int _getid(self, bytes seq):
        cdef bytes cseq
Eric Coissac committed
21
        cdef object oid
Eric Coissac committed
22 23 24 25 26 27 28
        cdef char[50] buffer
        cdef char* pbuffer=buffer+1
        cdef char* nbuffer=buffer
        cdef int id
        
        seq = seq.upper()
        
Eric Coissac committed
29 30 31
        oid = self.get(seq,None)

        if  oid is None:
Eric Coissac committed
32 33 34 35 36 37 38 39 40
            
            assert PyBytes_GET_SIZE(seq)==self._readsize
            
            #print 'Create a new fake sequence'
            #traceback.print_stack()
            
            id = self._nextid
            self._nextid+=1
            
Eric Coissac committed
41
            self[seq]=PyInt_FromLong(id)
Eric Coissac committed
42 43
            
            cseq = reverseComplement(seq)
Eric Coissac committed
44
            self[cseq]=PyInt_FromLong(-id)
Eric Coissac committed
45
            
Eric Coissac committed
46 47 48 49
            snprintf(pbuffer,49,b"%d",id)
            nbuffer[0]=b'-'
            self._reverse[pbuffer]=seq
            self._reverse[nbuffer]=cseq
Eric Coissac committed
50
        else:
Eric Coissac committed
51
            id = PyInt_AsLong(oid)
Eric Coissac committed
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
            
        return id
            
            
    cpdef tuple getReadIds(self, bytes seq):
        cdef int id = self._getid(seq)
    
        return (id,0,set([id]))

    cpdef tuple getIds(self, int32_t id):
        assert self.isFake(id)
        return (abs(id),0,set([abs(id)]))
    
    cpdef bytes getRead(self, int32_t readid, int32_t begin, int32_t length):
        cdef char[50] buffer
        cdef int id
        cdef bytes sequence
        cdef char* pseq

        assert self.isFake(readid)

Eric Coissac committed
73 74
        snprintf(buffer,50,b"%d",readid)
        sequence = self._reverse[buffer]
Eric Coissac committed
75 76 77 78 79 80
        pseq = <char*>sequence + begin 
        return PyBytes_FromStringAndSize(pseq,length)
     
    cpdef int len(self):
        return PyDict_Size(self)
        
81 82 83 84 85 86 87 88 89 90 91 92 93 94
    property firstid:

        "A doc string can go here."

        def __get__(self):
            return self._firstid

    property lastid:

        "A doc string can go here."

        def __get__(self):
            return self._nextid

Eric Coissac committed
95 96