Package obitools :: Module fast
[hide private]
[frames] | no frames]

Source Code for Module obitools.fast

 1  """ 
 2      implement fastn/fastp sililarity search algorithm for BioSequence. 
 3  """ 
 4   
5 -class Fast(object):
6
7 - def __init__(self,seq,kup=2):
8 ''' 9 @param seq: sequence to hash 10 @type seq: BioSequence 11 @param kup: word size used for hashing process 12 @type kup: int 13 ''' 14 hash={} 15 seq = str(seq) 16 for word,pos in ((seq[i:i+kup].upper(),i) for i in xrange(len(seq)-kup)): 17 if word in hash: 18 hash[word].append(pos) 19 else: 20 hash[word]=[pos] 21 22 self._kup = kup 23 self._hash= hast
24
25 - def __call__(self,seq):
26 ''' 27 Align one sequence with the fast hash table. 28 29 @param seq: the sequence to align 30 @type seq: BioSequence 31 32 @return: where smax is the 33 score of the largest diagonal and pmax the 34 associated shift 35 @rtype: a int tuple (smax,pmax) 36 ''' 37 histo={} 38 seq = str(seq) 39 hash= self._hash 40 kup = self._kup 41 42 for word,pos in ((seq[i:i+kup],i) for i in xrange(len(seq)-kup)): 43 matchedpos = hash.get(word,[]) 44 for p in matchedpos: 45 delta = pos - p 46 histo[delta]=histo.get(delta,0) + 1 47 smax = max(histo.values()) 48 pmax = [x for x in histo if histo[x]==smax] 49 return smax,pmax
50