1 '''
2
3 '''
4
6 '''
7 BioSequence class is the base class for biological
8 sequence representation.
9
10 It provides storage of the sequence itself, of an identifier,
11 a defintion an manage a set of complementary information on
12 a key / value principle.
13
14 BioSequence is an abstract class and must be instanciate
15 from its subclasses
16 '''
17
18 - def __init__(self,id,seq,definition=None,**info):
19 '''
20 BioSequence constructor.
21
22 @param id sequence identifier
23 @type id str
24
25 @param seq the sequence
26 @type seq str
27
28 @param definition sequence defintion (optional)
29 @type definition str
30
31 @param info extra named parametters can be add to associtiate complementary
32 data to the sequence
33
34 '''
35
36 self.seq = seq
37 self.definition = definition
38 self.id = id
39 self._info = dict(info)
40
43
45 if isinstance(key, str):
46 return self._info[key]
47 elif isinstance(key, int):
48 return self.seq[key]
49 elif isinstance(key, slice):
50 subseq=self.seq[key]
51 info = dict(self._info)
52 if key.start is not None:
53 start = key.start +1
54 else:
55 start = 1
56 if key.stop is not None:
57 stop = key.stop+1
58 else:
59 stop = len(self.seq)
60 if key.step is not None:
61 step = key.step
62 else:
63 step = 1
64
65 info['cut']='[%d,%d,%s]' % (start,stop,step)
66 return bioSeqGenerator(self.id, subseq, self.definition,**info)
67 raise TypeError,'key must be an integer, a str or a slice'
68
71
74
77
79 raise NotImplementedError
80
82 _comp={'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A',
83 'R': 'Y', 'Y': 'R', 'K': 'M', 'M': 'K',
84 'S': 'S', 'W': 'W', 'B': 'V', 'D': 'H',
85 'H': 'D', 'V': 'B', 'N': 'N', 'U': 'A',
86 '-': '-'}
87
89 cseq = [NucSequence._comp.get(x,'N') for x in self.seq]
90 cseq.reverse()
91 rep = NucSequence(self.id,''.join(cseq),self.definition,**self._info)
92 rep._info['complemented']=not rep._info.get('complemented',False)
93 return rep
94
97
102
103
104
106 acgt = 0
107 notnuc = 0
108 ltot = len(text)
109 for c in text.upper():
110 if c in 'ACGT-':
111 acgt+=1
112 if c not in NucSequence._comp:
113 notnuc+=1
114 return notnuc==0 and float(acgt)/ltot > 0.8
115
116
122