1 from itertools import imap
2
3
4 _dna='acgt'
5
13
14
16 assert not (endIncluded and circular), \
17 "endIncluded and circular cannot not be set to True at the same time"
18
19 L = len(sequence)
20 sequence = str(sequence)
21 if circular:
22 sequence += sequence[0:lword]
23 pmax=L
24 elif endIncluded:
25 pmax=L
26 else:
27 pmax = L - lword + 1
28
29 pos = xrange(0,pmax,step)
30
31 for x in pos:
32 yield sequence[x:x+lword]
33
34
36 '''
37 Iterate thought the list of all DNA word of
38 size `size`.
39
40 @param size: size of the DNA word
41 @type size: int
42 @param _prefix: internal parameter used for recursion purpose
43 @type _prefix: string
44
45 @return: an iterator on DNA word (str)
46 @rtype: iterator
47 '''
48 if size:
49 for l in _dna:
50 for w in allWordIterator(size-1,_prefix+l):
51 yield w
52 else:
53 yield _prefix
54
56 '''
57 Filter over a DNA word iterator.
58
59 @param words: an iterable object other a list of DNA words
60 @type words: an iterator
61 @param accept: a list of predicat. Eeach predicat is a function
62 accepting one str parametter and returning a boolean
63 value.
64 @type accept: list
65 @param reject: a list of predicat. Eeach predicat is a function
66 accepting one str parametter and returning a boolean
67 value.
68 @type reject: list
69
70 @return: an iterator on DNA word (str)
71 @rtype: iterator
72 '''
73 if accept is None:
74 accept=[]
75 if reject is None:
76 reject=[]
77 for w in words:
78 accepted = reduce(lambda x,y: bool(x) and bool(y),
79 (p(w) for p in accept),
80 True)
81 rejected = reduce(lambda x,y:bool(x) or bool(y),
82 (p(w) for p in reject),
83 False)
84 if accepted and not rejected:
85 yield w
86
88 '''
89 estimate Hamming distance between two words of the same size.
90
91 @param w1: the first word
92 @type w1: str
93 @param w2: the second word
94 @type w2: str
95
96 @return: the count of difference between the two words
97 @rtype: int
98 '''
99 dist = reduce(lambda x,y:x+y,
100 (int(i[0]!=i[1])
101 for i in imap(None,w1,w2)))
102 return dist
103