Package obitools :: Package word
[hide private]
[frames] | no frames]

Source Code for Package obitools.word

  1  from itertools import imap 
  2   
  3   
  4  _dna='acgt' 
  5   
6 -def wordCount(liste):
7 count = {} 8 9 for e in liste: 10 count[e]=count.get(e,0) + 1 11 12 return count
13 14
15 -def wordIterator(sequence,lword,step=1,endIncluded=False,circular=False):
16 assert not (endIncluded and circular), \ 17 "endIncluded and circular cannot not be set to True at the same time" 18 19 L = len(sequence) 20 sequence = str(sequence) 21 if circular: 22 sequence += sequence[0:lword] 23 pmax=L 24 elif endIncluded: 25 pmax=L 26 else: 27 pmax = L - lword + 1 28 29 pos = xrange(0,pmax,step) 30 31 for x in pos: 32 yield sequence[x:x+lword]
33 34
35 -def allWordIterator(size,_prefix=''):
36 ''' 37 Iterate thought the list of all DNA word of 38 size `size`. 39 40 @param size: size of the DNA word 41 @type size: int 42 @param _prefix: internal parameter used for recursion purpose 43 @type _prefix: string 44 45 @return: an iterator on DNA word (str) 46 @rtype: iterator 47 ''' 48 if size: 49 for l in _dna: 50 for w in allWordIterator(size-1,_prefix+l): 51 yield w 52 else: 53 yield _prefix
54
55 -def wordSelector(words,accept=None,reject=None):
56 ''' 57 Filter over a DNA word iterator. 58 59 @param words: an iterable object other a list of DNA words 60 @type words: an iterator 61 @param accept: a list of predicat. Eeach predicat is a function 62 accepting one str parametter and returning a boolean 63 value. 64 @type accept: list 65 @param reject: a list of predicat. Eeach predicat is a function 66 accepting one str parametter and returning a boolean 67 value. 68 @type reject: list 69 70 @return: an iterator on DNA word (str) 71 @rtype: iterator 72 ''' 73 if accept is None: 74 accept=[] 75 if reject is None: 76 reject=[] 77 for w in words: 78 accepted = reduce(lambda x,y: bool(x) and bool(y), 79 (p(w) for p in accept), 80 True) 81 rejected = reduce(lambda x,y:bool(x) or bool(y), 82 (p(w) for p in reject), 83 False) 84 if accepted and not rejected: 85 yield w
86
87 -def wordDist(w1,w2):
88 ''' 89 estimate Hamming distance between two words of the same size. 90 91 @param w1: the first word 92 @type w1: str 93 @param w2: the second word 94 @type w2: str 95 96 @return: the count of difference between the two words 97 @rtype: int 98 ''' 99 dist = reduce(lambda x,y:x+y, 100 (int(i[0]!=i[1]) 101 for i in imap(None,w1,w2))) 102 return dist
103