dna.pyx 2.36 KB
Newer Older
Eric Coissac committed
1 2
# cython: language_level=3

Eric Coissac committed
3
from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_GET_SIZE
4
from collections import Counter
Eric Coissac committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48

#                   "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
cdef char* __COMP = "T-G---C------------A------"

cdef int _reverseComplement(char* seq, size_t length):
    """
    Internal function computing the reverse complement sequence
    of a DNA sequence in place.
    """
    cdef char   tmp
    cdef char*  end = seq + length -1
    
    while seq <= end:
        tmp = end[0]
        end[0] = __COMP[(seq[0] & (~32)) - 65]
        seq[0] = __COMP[(tmp & (~32)) - 65]
        seq+=1
        end-=1 
        
    return 0
    
cpdef bytes reverseComplement(bytes seq):
    """
    Computes the reverse complement sequence of a DNA sequence
    composed only of acgt. No check is done for efficiency 
    purpose so be careful to the input sequence.
    
    :param seq: the sequence to revert-compelent expressed as
                a serie of acgt in upper or lower case
    :type  seq: a bytes instance
    
    :return: the reverse-complemented sequence in upper case
    :rtype:  a bytes instance
    """
    cdef bytes  comp 
    cdef char*  ccomp= seq
    cdef size_t lcomp= PyBytes_GET_SIZE(seq)
    
    comp = PyBytes_FromStringAndSize(ccomp,lcomp)
    ccomp= comp
    
    _reverseComplement(ccomp,lcomp)
    
    return comp
49 50

cpdef bint isDNA(bytes seq):
Eric Coissac committed
51
    cdef set c = set([x for x in seq.upper()])
52
    return len(c)<=4
Eric Coissac committed
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71


cdef dict codon = {}
cdef int i

ncbieaa="FFLLSSSSYYQQCCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"
Base1  ="TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG"
Base2  ="TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG"
Base3  ="TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"

for i in range(len(ncbieaa)):
    codon[Base1[i]+Base2[i]+Base3[i]]=ncbieaa[i]
    
cpdef tuple translate3(bytes seq): 
    cdef int i                               # @DuplicatedSignature
    cdef list prot0=[]
    cdef list prot1=[]
    cdef list prot2=[]
    
72
    
Eric Coissac committed
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
    for i in range(len(seq)-3):
        aa = codon[seq[i:(i+3)].upper()]
        p = i % 3
        if p==0:
            prot0.append(aa)
        elif p==1:
            prot1.append(aa)
        else:
            prot2.append(aa)
            
    return (b''.join(prot0),
            b''.join(prot1),
            b''.join(prot2)
            )