1 '''
2
3 '''
4
5 from logging import debug
6 from weakref import ref
7
8 from obitools.utils.iterator import uniqueChain
9
10
11 try:
12 from functools import partial
13 except:
14
15
16
17 - def partial(func, *args, **keywords):
18 def newfunc(*fargs, **fkeywords):
19 newkeywords = keywords.copy()
20 newkeywords.update(fkeywords)
21 return func(*(args + fargs), **newkeywords)
22 newfunc.func = func
23 newfunc.args = args
24 newfunc.keywords = keywords
25 return newfunc
26
27
28 from obitools.sequenceencoder import DNAComplementEncoder
29 from obitools.location import Location
35 return self._i.next()()
38
42
45 '''
46 BioSequence class is the base class for biological
47 sequence representation.
48
49 It provides storage of :
50
51 - the sequence itself,
52 - an identifier,
53 - a definition an manage
54 - a set of complementary information on a key / value principle.
55
56 BioSequence is an abstract class and must be instanciated
57 from its subclasses
58 '''
59
60 - def __init__(self,id,seq,definition=None,**info):
61 '''
62 BioSequence constructor.
63
64 @param id: sequence identifier
65 @type id: str
66
67 @param seq: the sequence
68 @type seq: str
69
70 @param definition: sequence definition (optional)
71 @type definition: str
72
73 @param info: extra named parameters can be add to associate complementary
74 data to the sequence
75
76 '''
77
78 self._seq=str(seq).lower()
79 self._info = dict(info)
80 self.definition=definition
81 self.id=id
82
84 '''
85 Sequence definition getter
86
87 @return: the sequence definition
88 @rtype: str
89
90 '''
91 return self._definition
92
94 self._definition = value
95
98
101
103 '''
104 Return the sequence as a string
105
106 @return: the string representation of the sequence
107 @rtype: str
108 '''
109 return self._seq
110
112 '''
113 Return the symbole at position in the sequence
114
115 @param position: the desired position. Position start from 0
116 if position is < 0 then they are considered
117 to reference the end of the sequence.
118 @type position: C{int}
119
120 @return: a one letter string
121 @rtype: C{str}
122 '''
123 return str(self)[position]
124
134
137
140
142 if isinstance(key, str):
143 return self._info[key]
144 else:
145 return self.getSubSeq(key)
146
149
151 if isinstance(key, str):
152 del self._info[key]
153 else:
154 raise TypeError,key
155
157 '''
158 Iterate through the sequence symbols
159 '''
160 return iter(str(self))
161
163 return len(str(self))
164
166 return key in self._info
167
170
173
176
179
182
185
187 if not hasattr(self, '_wrappers'):
188 self._wrappers=WrapperSet()
189 return self._wrappers
190
193
196
197 wrappers = property(getWrappers,None,None,'')
198
199 definition = property(getDefinition, setDefinition, None, "Sequence Definition")
200
201 id = property(getId, setId, None, 'Sequence identifier')
202
210
216
219
220 - def __init__(self,reference,id=None,definition=None,**info):
226
229
233
235 d = self._id or ("%s_WBS" % self.wrapped.id)
236 return d
237
240
241
245
247 for x in self.iterkeys():
248 yield (x,self[x])
249
251 debug("coucou from WrappedBioSequence.__getitem__")
252 if isinstance(key, str):
253 if key in self._info:
254 return self._info[key]
255 else:
256 return self.wrapped[key]
257 else:
258 return self.getSubSeq(key)
259
262
268
269
272
275
278
279
280 definition = property(getDefinition,BioSequence.setDefinition, None, "Sequence Definition")
281 id = property(getId,BioSequence.setId, None, "Sequence Identifier")
282
283 wrapped = property(getWrapped, None, None, "Wrapped's Docstring")
284
287
288 @staticmethod
290 if x == 0:
291 return 0
292 elif x < 0:
293 return -1
294 return 1
295
296 - def __init__(self,reference,
297 location=None,
298 start=None,stop=None,
299 id=None,definition=None,**info):
300 WrappedBioSequence.__init__(self,reference,id=None,definition=None,**info)
301
302 if isinstance(location, slice):
303 self._location = location
304 else:
305 step = 1
306 if not isinstance(start, int):
307 begin = 0;
308 if not isinstance(stop,int):
309 end = len(reference)
310 self._location=slice(start,stop,step)
311
312 self._indices=self._location.indices(len(self.wrapped))
313 self._xrange=xrange(*self._indices)
314
315 self._info['cut']='[%d,%d,%s]' % self._indices
316
318 return len(self._xrange)
319
321 return ''.join([x for x in self])
322
325
327 return self._xrange[position]
328
333
338
339 _comp={'a': 't', 'c': 'g', 'g': 'c', 't': 'a',
340 'r': 'y', 'y': 'r', 'k': 'm', 'm': 'k',
341 's': 's', 'w': 'w', 'b': 'v', 'd': 'h',
342 'h': 'd', 'v': 'b', 'n': 'n', 'u': 'a',
343 '-': '-'}
344
345
346 - def __init__(self,reference,
347 id=None,definition=None,**info):
351
353 d = self._id or ("%s_CMP" % self.wrapped.id)
354 return d
355
357 return len(self._wrapped)
358
360 return ''.join([x for x in self])
361
363 return (self.getSymbolAt(x) for x in xrange(len(self)))
364
367
370
373
374 id = property(getId,BioSequence.setId, None, "Sequence Identifier")
375
378 acgt = 0
379 notnuc = 0
380 ltot = len(text)
381 for c in text.lower():
382 if c in 'acgt-':
383 acgt+=1
384 if c not in DNAComplementEncoder._comp:
385 notnuc+=1
386 return notnuc==0 and float(acgt)/ltot > 0.8
387
394