Package obitools
[hide private]
[frames] | no frames]

Source Code for Package obitools

  1  ''' 
  2   
  3  ''' 
  4   
  5  from logging import debug 
  6  from weakref import ref 
  7   
  8  from obitools.utils.iterator import uniqueChain 
  9   
 10   
 11  try: 
 12      from functools import partial 
 13  except: 
14 # 15 # Add for compatibility purpose with Python < 2.5 16 # 17 - def partial(func, *args, **keywords):
18 def newfunc(*fargs, **fkeywords): 19 newkeywords = keywords.copy() 20 newkeywords.update(fkeywords) 21 return func(*(args + fargs), **newkeywords)
22 newfunc.func = func 23 newfunc.args = args 24 newfunc.keywords = keywords 25 return newfunc 26 27 28 from obitools.sequenceencoder import DNAComplementEncoder 29 from obitools.location import Location
30 31 -class WrapperSetIterator(object):
32 - def __init__(self,s):
33 self._i = set.__iter__(s)
34 - def next(self):
35 return self._i.next()()
36 - def __iter__(self):
37 return self
38
39 -class WrapperSet(set):
40 - def __iter__(self):
41 return WrapperSetIterator(self)
42
43 44 -class BioSequence(object):
45 ''' 46 BioSequence class is the base class for biological 47 sequence representation. 48 49 It provides storage of : 50 51 - the sequence itself, 52 - an identifier, 53 - a definition an manage 54 - a set of complementary information on a key / value principle. 55 56 BioSequence is an abstract class and must be instanciated 57 from its subclasses 58 ''' 59
60 - def __init__(self,id,seq,definition=None,**info):
61 ''' 62 BioSequence constructor. 63 64 @param id: sequence identifier 65 @type id: str 66 67 @param seq: the sequence 68 @type seq: str 69 70 @param definition: sequence definition (optional) 71 @type definition: str 72 73 @param info: extra named parameters can be add to associate complementary 74 data to the sequence 75 76 ''' 77 78 self._seq=str(seq).lower() 79 self._info = dict(info) 80 self.definition=definition 81 self.id=id
82
83 - def getDefinition(self):
84 ''' 85 Sequence definition getter 86 87 @return: the sequence definition 88 @rtype: str 89 90 ''' 91 return self._definition
92
93 - def setDefinition(self, value):
94 self._definition = value
95
96 - def getId(self):
97 return self._id
98
99 - def setId(self, value):
100 self._id = value
101
102 - def getStr(self):
103 ''' 104 Return the sequence as a string 105 106 @return: the string representation of the sequence 107 @rtype: str 108 ''' 109 return self._seq
110
111 - def getSymbolAt(self,position):
112 ''' 113 Return the symbole at position in the sequence 114 115 @param position: the desired position. Position start from 0 116 if position is < 0 then they are considered 117 to reference the end of the sequence. 118 @type position: C{int} 119 120 @return: a one letter string 121 @rtype: C{str} 122 ''' 123 return str(self)[position]
124
125 - def getSubSeq(self,location):
126 if isinstance(location,Location): 127 return key.extractSequence(self) 128 elif isinstance(location, int): 129 return self.getSymbolAt(location) 130 elif isinstance(location, slice): 131 return SubSequence(self,location) 132 133 raise TypeError,'key must be a C{Location}, an integer or a slice'
134
135 - def extractTaxon(self):
136 return None
137
138 - def __str__(self):
139 return self.getStr()
140
141 - def __getitem__(self,key):
142 if isinstance(key, str): 143 return self._info[key] 144 else: 145 return self.getSubSeq(key)
146
147 - def __setitem__(self,key,value):
148 self._info[key]=value
149
150 - def __delitem__(self,key):
151 if isinstance(key, str): 152 del self._info[key] 153 else: 154 raise TypeError,key
155
156 - def __iter__(self):
157 ''' 158 Iterate through the sequence symbols 159 ''' 160 return iter(str(self))
161
162 - def __len__(self):
163 return len(str(self))
164
165 - def __contains__(self,key):
166 return key in self._info
167
168 - def iteritems(self):
169 return self._info.iteritems()
170
171 - def items(self):
172 return [x for x in self.iteritems()]
173
174 - def iterkeys(self):
175 return self._info.iterkeys()
176
177 - def keys(self):
178 return [x for x in self.iterkeys()]
179
180 - def getTags(self):
181 return self._info
182
183 - def getRoot(self):
184 return self
185
186 - def getWrappers(self):
187 if not hasattr(self, '_wrappers'): 188 self._wrappers=WrapperSet() 189 return self._wrappers
190
191 - def register(self,wrapper):
192 self.wrappers.add(ref(wrapper,self._unregister))
193
194 - def _unregister(self,ref):
195 self.wrappers.remove(ref)
196 197 wrappers = property(getWrappers,None,None,'') 198 199 definition = property(getDefinition, setDefinition, None, "Sequence Definition") 200 201 id = property(getId, setId, None, 'Sequence identifier')
202
203 -class NucSequence(BioSequence):
204
205 - def complement(self):
206 return DNAComplementSequence(self)
207
208 - def isNucleotide(self):
209 return True
210
211 212 -class AASequence(BioSequence):
213
214 - def isNucleotide(self):
215 return False
216
217 218 -class WrappedBioSequence(BioSequence):
219
220 - def __init__(self,reference,id=None,definition=None,**info):
221 self._wrapped = reference 222 reference.register(self) 223 self._id=id 224 self.definition=definition 225 self._info=info
226
227 - def getWrapped(self):
228 return self._wrapped
229
230 - def getDefinition(self):
231 d = self._definition or ("%s Wrapped" % self.wrapped.definition) 232 return d
233
234 - def getId(self):
235 d = self._id or ("%s_WBS" % self.wrapped.id) 236 return d
237
238 - def isNucleotide(self):
239 return self.wrapped.isNucleotide()
240 241
242 - def iterkeys(self):
243 return uniqueChain(self._info.iterkeys(), 244 self.wrapped.iterkeys())
245
246 - def iteritems(self):
247 for x in self.iterkeys(): 248 yield (x,self[x])
249
250 - def __getitem__(self,key):
251 debug("coucou from WrappedBioSequence.__getitem__") 252 if isinstance(key, str): 253 if key in self._info: 254 return self._info[key] 255 else: 256 return self.wrapped[key] 257 else: 258 return self.getSubSeq(key)
259
260 - def getSymbolAt(self,position):
261 return self.wrapped.getSymbolAt(self.posInWrapped(position))
262
263 - def posInWrapped(self,position,reference=None):
264 if reference is None or reference is self.wrapped: 265 return self._posInWrapped(position) 266 else: 267 return self.wrapped.posInWrapped(self._posInWrapped(position),reference)
268 269
270 - def getStr(self):
271 return str(self.wrapped)
272
273 - def getRoot(self):
274 return self.wrapped.getRoot()
275
276 - def _posInWrapped(self,position):
277 return position
278 279 280 definition = property(getDefinition,BioSequence.setDefinition, None, "Sequence Definition") 281 id = property(getId,BioSequence.setId, None, "Sequence Identifier") 282 283 wrapped = property(getWrapped, None, None, "Wrapped's Docstring")
284
285 286 -class SubSequence(WrappedBioSequence):
287 288 @staticmethod
289 - def _sign(x):
290 if x == 0: 291 return 0 292 elif x < 0: 293 return -1 294 return 1
295
296 - def __init__(self,reference, 297 location=None, 298 start=None,stop=None, 299 id=None,definition=None,**info):
300 WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info) 301 302 if isinstance(location, slice): 303 self._location = location 304 else: 305 step = 1 306 if not isinstance(start, int): 307 begin = 0; 308 if not isinstance(stop,int): 309 end = len(reference) 310 self._location=slice(start,stop,step) 311 312 self._indices=self._location.indices(len(self.wrapped)) 313 self._xrange=xrange(*self._indices) 314 315 self._info['cut']='[%d,%d,%s]' % self._indices
316
317 - def __len__(self):
318 return len(self._xrange)
319
320 - def getStr(self):
321 return ''.join([x for x in self])
322
323 - def __iter__(self):
324 return (self.wrapped.getSymbolAt(x) for x in xrange(*self._indices))
325
326 - def _posInWrapped(self,position):
327 return self._xrange[position]
328
329 - def complement(self):
330 if self.wrapped.isNucleotide(): 331 return DNAComplementSequence(self) 332 raise AttributeError
333
334 335 336 337 -class DNAComplementSequence(WrappedBioSequence):
338 339 _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a', 340 'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k', 341 's': 's', 'w': 'w', 'b': 'v', 'd': 'h', 342 'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a', 343 '-': '-'} 344 345
346 - def __init__(self,reference, 347 id=None,definition=None,**info):
348 WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info) 349 assert reference.isNucleotide() 350 self._info['complemented']=True
351
352 - def getId(self):
353 d = self._id or ("%s_CMP" % self.wrapped.id) 354 return d
355
356 - def __len__(self):
357 return len(self._wrapped)
358
359 - def getStr(self):
360 return ''.join([x for x in self])
361
362 - def __iter__(self):
363 return (self.getSymbolAt(x) for x in xrange(len(self)))
364
365 - def _posInWrapped(self,position):
366 return -(position+1)
367
368 - def getSymbolAt(self,position):
369 return DNAComplementSequence._comp[self.wrapped.getSymbolAt(self.posInWrapped(position))]
370
371 - def complement(self):
372 return self.wrapped
373 374 id = property(getId,BioSequence.setId, None, "Sequence Identifier")
375
376 377 -def _isNucSeq(text):
378 acgt = 0 379 notnuc = 0 380 ltot = len(text) 381 for c in text.lower(): 382 if c in 'acgt-': 383 acgt+=1 384 if c not in DNAComplementEncoder._comp: 385 notnuc+=1 386 return notnuc==0 and float(acgt)/ltot > 0.8
387
388 389 -def bioSeqGenerator(id,seq,definition=None,**info):
390 if _isNucSeq(seq): 391 return NucSequence(id,seq,definition,**info) 392 else: 393 return AASequence(id,seq,definition,**info)
394