Package obitools :: Package dnahash
[hide private]
[frames] | no frames]

Source Code for Package obitools.dnahash

 1  _A=[0] 
 2  _C=[1] 
 3  _G=[2] 
 4  _T=[3] 
 5  _R= _A + _G 
 6  _Y= _C + _T 
 7  _M= _C + _A 
 8  _K= _T + _G 
 9  _W= _T + _A 
10  _S= _C + _G 
11  _B= _C + _G + _T 
12  _D= _A + _G + _T 
13  _H= _A + _C + _T 
14  _V= _A + _C + _G 
15  _N= _A + _C + _G + _T 
16   
17  _dnahash={'a':_A, 
18            'c':_C, 
19            'g':_G, 
20            't':_T, 
21            'r':_R, 
22            'y':_Y, 
23            'm':_M, 
24            'k':_K, 
25            'w':_W, 
26            's':_S, 
27            'b':_B, 
28            'd':_D, 
29            'h':_H, 
30            'v':_V, 
31            'n':_N, 
32            } 
33   
34 -def hashCodeIterator(sequence,wsize,degeneratemax=0,offset=0):
35 errors = 0 36 emask = [0] * wsize 37 epointer = 0 38 size = 0 39 position = offset 40 hashs = set([0]) 41 hashmask = 0 42 for i in xrange(wsize): 43 hashmask <<= 2 44 hashmask +=3 45 46 for l in sequence: 47 l = l.lower() 48 hl = _dnahash[l] 49 50 if emask[epointer]: 51 errors-=1 52 emask[epointer]=0 53 54 if len(hl) > 1: 55 errors +=1 56 emask[epointer]=1 57 58 epointer+=1 59 epointer%=wsize 60 61 if errors > degeneratemax: 62 hl=set([hl[0]]) 63 64 hashs=set((((hc<<2) | cl) & hashmask) 65 for hc in hashs 66 for cl in hl) 67 68 if size < wsize: 69 size+=1 70 71 if size==wsize: 72 if errors <= degeneratemax: 73 yield (position,hashs,errors) 74 position+=1
75
76 -def hashSequence(sequence,wsize,degeneratemax=0,offset=0,hashs=None):
77 if hashs is None: 78 hashs=[[] for x in xrange(4**wsize)] 79 80 for pos,keys,errors in hashCodeIterator(sequence, wsize, degeneratemax, offset): 81 for k in keys: 82 hashs[k].append(pos) 83 84 return hashs 85
86 -def hashSequences(sequences,wsize,maxpos,degeneratemax=0):
87 hashs=None 88 offsets=[] 89 offset=0 90 for s in sequences: 91 offsets.append(offset) 92 hashSequence(s,wsize,degeneratemax=degeneratemax,offset=offset,hashs=hashs) 93 offset+=len(s) 94 95 return hashs,offsets 96