Package obitools
[hide private]
[frames] | no frames]

Source Code for Package obitools

  1  ''' 
  2   
  3  ''' 
  4   
5 -class BioSequence(object):
6 ''' 7 BioSequence class is the base class for biological 8 sequence representation. 9 10 It provides storage of the sequence itself, of an identifier, 11 a defintion an manage a set of complementary information on 12 a key / value principle. 13 14 BioSequence is an abstract class and must be instanciate 15 from its subclasses 16 ''' 17
18 - def __init__(self,id,seq,definition=None,**info):
19 ''' 20 BioSequence constructor. 21 22 @param id sequence identifier 23 @type id str 24 25 @param seq the sequence 26 @type seq str 27 28 @param definition sequence defintion (optional) 29 @type definition str 30 31 @param info extra named parametters can be add to associtiate complementary 32 data to the sequence 33 34 ''' 35 36 self.seq = seq 37 self.definition = definition 38 self.id = id 39 self._info = dict(info)
40
41 - def __str__(self):
42 return self.seq
43
44 - def __getitem__(self,key):
45 if isinstance(key, str): 46 return self._info[key] 47 elif isinstance(key, int): 48 return self.seq[key] 49 elif isinstance(key, slice): 50 subseq=self.seq[key] 51 info = dict(self._info) 52 if key.start is not None: 53 start = key.start +1 54 else: 55 start = 1 56 if key.stop is not None: 57 stop = key.stop+1 58 else: 59 stop = len(self.seq) 60 if key.step is not None: 61 step = key.step 62 else: 63 step = 1 64 65 info['cut']='[%d,%d,%s]' % (start,stop,step) 66 return bioSeqGenerator(self.id, subseq, self.definition,**info) 67 raise TypeError,'key must be an integer, a str or a slice'
68
69 - def __setitem__(self,key,value):
70 self._info[key]=value
71
72 - def __iter__(self):
73 return iter(self.seq)
74
75 - def __len__(self):
76 return len(self.seq)
77
78 - def isNucleotide(self):
79 raise NotImplementedError
80
81 -class NucSequence(BioSequence):
82 _comp={'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 83 'R': 'Y', 'Y': 'R', 'K': 'M', 'M': 'K', 84 'S': 'S', 'W': 'W', 'B': 'V', 'D': 'H', 85 'H': 'D', 'V': 'B', 'N': 'N', 'U': 'A', 86 '-': '-'} 87
88 - def complement(self):
89 cseq = [NucSequence._comp.get(x,'N') for x in self.seq] 90 cseq.reverse() 91 rep = NucSequence(self.id,''.join(cseq),self.definition,**self._info) 92 rep._info['complemented']=not rep._info.get('complemented',False) 93 return rep
94
95 - def isNucleotide(self):
96 return True
97
98 -class AASequence(BioSequence):
99
100 - def isNucleotide(self):
101 return False
102 103 104
105 -def _isNucSeq(text):
106 acgt = 0 107 notnuc = 0 108 ltot = len(text) 109 for c in text.upper(): 110 if c in 'ACGT-': 111 acgt+=1 112 if c not in NucSequence._comp: 113 notnuc+=1 114 return notnuc==0 and float(acgt)/ltot > 0.8
115 116
117 -def bioSeqGenerator(id,seq,definition=None,**info):
118 if _isNucSeq(seq): 119 return NucSequence(id,seq,definition,**info) 120 else: 121 return AASequence(id,seq,definition,**info)
122