Package obitools :: Module fast
[hide private]
[frames] | no frames]

Source Code for Module obitools.fast

 1  """ 
 2      implement fastn/fastp sililarity search algorithm for BioSequence. 
 3  """ 
 4   
5 -class Fast(object):
6
7 - def __init__(self,seq,kup=2):
8 ''' 9 @param seq: sequence to hash 10 @type seq: BioSequence 11 @param kup: word size used for hashing process 12 @type kup: int 13 ''' 14 hash={} 15 seq = str(seq) 16 for word,pos in ((seq[i:i+kup].upper(),i) for i in xrange(len(seq)-kup)): 17 if word in hash: 18 hash[word].append(pos) 19 else: 20 hash[word]=[pos] 21 22 self._kup = kup 23 self._hash= hash 24 self._seq = seq
25
26 - def __call__(self,seq):
27 ''' 28 Align one sequence with the fast hash table. 29 30 @param seq: the sequence to align 31 @type seq: BioSequence 32 33 @return: where smax is the 34 score of the largest diagonal and pmax the 35 associated shift 36 @rtype: a int tuple (smax,pmax) 37 ''' 38 histo={} 39 seq = str(seq).upper() 40 hash= self._hash 41 kup = self._kup 42 43 for word,pos in ((seq[i:i+kup],i) for i in xrange(len(seq)-kup)): 44 matchedpos = hash.get(word,[]) 45 for p in matchedpos: 46 delta = pos - p 47 histo[delta]=histo.get(delta,0) + 1 48 smax = max(histo.values()) 49 pmax = [x for x in histo if histo[x]==smax] 50 return smax,pmax
51
52 - def __len__(self):
53 return len(self._seq)
54