1 _A=[0]
2 _C=[1]
3 _G=[2]
4 _T=[3]
5 _R= _A + _G
6 _Y= _C + _T
7 _M= _C + _A
8 _K= _T + _G
9 _W= _T + _A
10 _S= _C + _G
11 _B= _C + _G + _T
12 _D= _A + _G + _T
13 _H= _A + _C + _T
14 _V= _A + _C + _G
15 _N= _A + _C + _G + _T
16
17 _dnahash={'a':_A,
18 'c':_C,
19 'g':_G,
20 't':_T,
21 'r':_R,
22 'y':_Y,
23 'm':_M,
24 'k':_K,
25 'w':_W,
26 's':_S,
27 'b':_B,
28 'd':_D,
29 'h':_H,
30 'v':_V,
31 'n':_N,
32 }
33
35 errors = 0
36 emask = [0] * wsize
37 epointer = 0
38 size = 0
39 position = offset
40 hashs = set([0])
41 hashmask = 0
42 for i in xrange(wsize):
43 hashmask <<= 2
44 hashmask +=3
45
46 for l in sequence:
47 l = l.lower()
48 hl = _dnahash[l]
49
50 if emask[epointer]:
51 errors-=1
52 emask[epointer]=0
53
54 if len(hl) > 1:
55 errors +=1
56 emask[epointer]=1
57
58 epointer+=1
59 epointer%=wsize
60
61 if errors > degeneratemax:
62 hl=set([hl[0]])
63
64 hashs=set((((hc<<2) | cl) & hashmask)
65 for hc in hashs
66 for cl in hl)
67
68 if size < wsize:
69 size+=1
70
71 if size==wsize:
72 if errors <= degeneratemax:
73 yield (position,hashs,errors)
74 position+=1
75
76 -def hashSequence(sequence,wsize,degeneratemax=0,offset=0,hashs=None):
77 if hashs is None:
78 hashs=[[] for x in xrange(4**wsize)]
79
80 for pos,keys,errors in hashCodeIterator(sequence, wsize, degeneratemax, offset):
81 for k in keys:
82 hashs[k].append(pos)
83
84 return hashs
85
87 hashs=None
88 offsets=[]
89 offset=0
90 for s in sequences:
91 offsets.append(offset)
92 hashSequence(s,wsize,degeneratemax=degeneratemax,offset=offset,hashs=hashs)
93 offset+=len(s)
94
95 return hashs,offsets
96