1 import sys
2
3 import time
4 import re
5 import shelve
6
7 from threading import Lock
8 from logging import warning
9 import urllib2
10
11 from obitools.gzip import GzipFile
12 from obitools.zipfile import ZipFile
13
14
15
17 '''
18 Open a file gziped or not.
19
20 If file is a C{str} instance, file is
21 concidered as a file name. In this case
22 the C{.gz} suffixe is tested to eventually
23 open it a a gziped file.
24
25 If file is an other kind of object, it is assumed
26 that this object follow the C{file} interface
27 and it is return as is.
28
29 @param file: the file to open
30 @type file: C{str} or a file like object
31
32 @return: an iterator on text lines.
33 '''
34 if isinstance(file,str):
35 if urllib2.urlparse.urlparse(file)[0]=='':
36 rep = open(file,*options)
37 else:
38 rep = urllib2.urlopen(file,timeout=15)
39
40 if file[-3:] == '.gz':
41 rep = GzipFile(fileobj=rep)
42 if file[-4:] == '.zip':
43 zip = ZipFile(file=rep)
44 data = zip.infolist()
45 assert len(data)==1,'Only zipped file containning a single file can be open'
46 name = data[0].filename
47 rep = zip.open(name)
48 else:
49 rep = file
50 return rep
51
53 '''
54 Return the position in the file even if
55 it is a gziped one.
56
57 @param file: the file to check
58 @type file: a C{file} like instance
59
60 @return: position in the file
61 @rtype: C{int}
62 '''
63 if isinstance(file, GzipFile):
64 file=file.myfileobj
65 return file.tell()
66
68 '''
69 Return the file size even if it is a
70 gziped one.
71
72 @param file: the file to check
73 @type file: a C{file} like instance
74
75 @return: the size of the file
76 @rtype: C{int}
77 '''
78 if isinstance(file, GzipFile):
79 file=file.myfileobj
80 pos = file.tell()
81 file.seek(0,2)
82 length = file.tell()
83 file.seek(pos,0)
84 return length
85
87 if reset:
88 del delta[:]
89 if not delta:
90 delta.append(time.time())
91 delta.append(time.time())
92
93 delta[1]=time.time()
94 elapsed = delta[1]-delta[0]
95 percent = float(pos)/max * 100
96 remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
97 bar = '#' * int(percent/2)
98 bar+= '|/-\\-'[pos % 5]
99 bar+= ' ' * (50 - int(percent/2))
100 sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
101
103 for x in endedlist:
104 yield x
105 while(1):
106 yield endedlist[-1]
107
108
110 '''
111 Aggregator of strings.
112
113 @param lineiterator: a stream of strings from an opened OBO file.
114 @type lineiterator: a stream of strings.
115
116 @return: an aggregated stanza.
117 @rtype: an iterotor on str
118
119 @note: The aggregator aggregates strings from an opened OBO file.
120 When the length of a string is < 2, the current stanza is over.
121 '''
122
123 for line in lineiterator:
124 rep = [line]
125 while len(line)>=2 and line[-2]=='\\':
126 rep[-1]=rep[-1][0:-2]
127 try:
128 line = lineiterator.next()
129 except StopIteration:
130 raise FileFormatError
131 rep.append(line)
132 yield ''.join(rep)
133
134
136 '''
137 Curator of stanza.
138
139 @param lineiterator: a stream of strings from an opened OBO file.
140 @type lineiterator: a stream of strings.
141
142 @return: a stream of strings without blank strings.
143 @rtype: a stream strings
144
145 @note: The curator skip white lines of the current stanza.
146 '''
147
148 for line in lineiterator:
149 cleanline = line.strip()
150 if cleanline:
151 yield line
152 else:
153 print 'skipped'
154
155
157
158 - def __init__(self,stream,sep=None,strip=True,types=None,skip=None,head=None):
159 self._stream = universalOpen(stream)
160 self._delimiter=sep
161 self._strip=strip
162 if types:
163 self._types=[x for x in types]
164 for i in xrange(len(self._types)):
165 if self._types[i] is bool:
166 self._types[i]=ColumnFile.str2bool
167 else:
168 self._types=None
169
170 self._skip = skip
171 if skip is not None:
172 self._lskip= len(skip)
173 else:
174 self._lskip= 0
175 self._head=head
176
178 return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
179
180 str2bool = staticmethod(str2bool)
181
182
185
187 ligne = self._stream.next()
188 if self._skip is not None:
189 while ligne[0:self._lskip]==self._skip:
190 ligne = self._stream.next()
191 data = ligne.split(self._delimiter)
192 if self._strip or self._types:
193 data = [x.strip() for x in data]
194 if self._types:
195 it = endLessIterator(self._types)
196 data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
197 if self._head is not None:
198 data=dict(map(None, self._head,data))
199 return data
200
203
204
206
208 self._cache = shelve.open(cachefile,'c')
209 self._db = masterdb
210 self._lock=Lock()
211
213 self._lock.acquire()
214 self._cache[seq.id]=seq
215 self._lock.release()
216 return seq
217
219 if isinstance(ac,str):
220 self._lock.acquire()
221 if ac in self._cache:
222
223 data = self._cache[ac]
224 self._lock.release()
225
226 else:
227 self._lock.release()
228 data = self._db[ac]
229 self._cacheSeq(data)
230 return data
231 else:
232 self._lock.acquire()
233 acs = [[x,self._cache.get(x,None)] for x in ac]
234 self._lock.release()
235 newacs = [ac for ac,cached in acs if cached is None]
236 if newacs:
237 newseqs = self._db[newacs]
238 else:
239 newseqs = iter([])
240 for r in acs:
241 if r[1] is None:
242 r[1]=self._cacheSeq(newseqs.next())
243
244
245 return (x[1] for x in acs)
246
247
249 Warning('This module %s is under development : use it with caution' % name)
250