Package obitools :: Package utils
[hide private]
[frames] | no frames]

Source Code for Package obitools.utils

  1  import sys 
  2   
  3  import time 
  4  import re 
  5  import shelve 
  6   
  7  from threading import Lock 
  8  from logging  import warning 
  9  import urllib2 
 10   
 11  from obitools.gzip import GzipFile 
 12  from obitools.zipfile import ZipFile 
 13   
 14   
 15   
16 -def universalOpen(file,*options):
17 ''' 18 Open a file gziped or not. 19 20 If file is a C{str} instance, file is 21 concidered as a file name. In this case 22 the C{.gz} suffixe is tested to eventually 23 open it a a gziped file. 24 25 If file is an other kind of object, it is assumed 26 that this object follow the C{file} interface 27 and it is return as is. 28 29 @param file: the file to open 30 @type file: C{str} or a file like object 31 32 @return: an iterator on text lines. 33 ''' 34 if isinstance(file,str): 35 if urllib2.urlparse.urlparse(file)[0]=='': 36 rep = open(file,*options) 37 else: 38 rep = urllib2.urlopen(file,timeout=15) 39 40 if file[-3:] == '.gz': 41 rep = GzipFile(fileobj=rep) 42 if file[-4:] == '.zip': 43 zip = ZipFile(file=rep) 44 data = zip.infolist() 45 assert len(data)==1,'Only zipped file containning a single file can be open' 46 name = data[0].filename 47 rep = zip.open(name) 48 else: 49 rep = file 50 return rep
51
52 -def universalTell(file):
53 ''' 54 Return the position in the file even if 55 it is a gziped one. 56 57 @param file: the file to check 58 @type file: a C{file} like instance 59 60 @return: position in the file 61 @rtype: C{int} 62 ''' 63 if isinstance(file, GzipFile): 64 file=file.myfileobj 65 return file.tell()
66
67 -def fileSize(file):
68 ''' 69 Return the file size even if it is a 70 gziped one. 71 72 @param file: the file to check 73 @type file: a C{file} like instance 74 75 @return: the size of the file 76 @rtype: C{int} 77 ''' 78 if isinstance(file, GzipFile): 79 file=file.myfileobj 80 pos = file.tell() 81 file.seek(0,2) 82 length = file.tell() 83 file.seek(pos,0) 84 return length
85
86 -def progressBar(pos,max,reset=False,delta=[]):
87 if reset: 88 del delta[:] 89 if not delta: 90 delta.append(time.time()) 91 delta.append(time.time()) 92 93 delta[1]=time.time() 94 elapsed = delta[1]-delta[0] 95 percent = float(pos)/max * 100 96 remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent))) 97 bar = '#' * int(percent/2) 98 bar+= '|/-\\-'[pos % 5] 99 bar+= ' ' * (50 - int(percent/2)) 100 sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
101
102 -def endLessIterator(endedlist):
103 for x in endedlist: 104 yield x 105 while(1): 106 yield endedlist[-1]
107 108
109 -def multiLineWrapper(lineiterator):
110 ''' 111 Aggregator of strings. 112 113 @param lineiterator: a stream of strings from an opened OBO file. 114 @type lineiterator: a stream of strings. 115 116 @return: an aggregated stanza. 117 @rtype: an iterotor on str 118 119 @note: The aggregator aggregates strings from an opened OBO file. 120 When the length of a string is < 2, the current stanza is over. 121 ''' 122 123 for line in lineiterator: 124 rep = [line] 125 while len(line)>=2 and line[-2]=='\\': 126 rep[-1]=rep[-1][0:-2] 127 try: 128 line = lineiterator.next() 129 except StopIteration: 130 raise FileFormatError 131 rep.append(line) 132 yield ''.join(rep)
133 134
135 -def skipWhiteLineIterator(lineiterator):
136 ''' 137 Curator of stanza. 138 139 @param lineiterator: a stream of strings from an opened OBO file. 140 @type lineiterator: a stream of strings. 141 142 @return: a stream of strings without blank strings. 143 @rtype: a stream strings 144 145 @note: The curator skip white lines of the current stanza. 146 ''' 147 148 for line in lineiterator: 149 cleanline = line.strip() 150 if cleanline: 151 yield line 152 else: 153 print 'skipped'
154 155
156 -class ColumnFile(object):
157
158 - def __init__(self,stream,sep=None,strip=True,types=None,skip=None,head=None):
159 self._stream = universalOpen(stream) 160 self._delimiter=sep 161 self._strip=strip 162 if types: 163 self._types=[x for x in types] 164 for i in xrange(len(self._types)): 165 if self._types[i] is bool: 166 self._types[i]=ColumnFile.str2bool 167 else: 168 self._types=None 169 170 self._skip = skip 171 if skip is not None: 172 self._lskip= len(skip) 173 else: 174 self._lskip= 0 175 self._head=head
176
177 - def str2bool(x):
178 return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
179 180 str2bool = staticmethod(str2bool) 181 182
183 - def __iter__(self):
184 return self
185
186 - def next(self):
187 ligne = self._stream.next() 188 if self._skip is not None: 189 while ligne[0:self._lskip]==self._skip: 190 ligne = self._stream.next() 191 data = ligne.split(self._delimiter) 192 if self._strip or self._types: 193 data = [x.strip() for x in data] 194 if self._types: 195 it = endLessIterator(self._types) 196 data = [x[1](x[0]) for x in ((y,it.next()) for y in data)] 197 if self._head is not None: 198 data=dict(map(None, self._head,data)) 199 return data
200
201 - def tell(self):
202 return universalTell(self._stream)
203 204
205 -class CachedDB(object):
206
207 - def __init__(self,cachefile,masterdb):
208 self._cache = shelve.open(cachefile,'c') 209 self._db = masterdb 210 self._lock=Lock()
211
212 - def _cacheSeq(self,seq):
213 self._lock.acquire() 214 self._cache[seq.id]=seq 215 self._lock.release() 216 return seq
217
218 - def __getitem__(self,ac):
219 if isinstance(ac,str): 220 self._lock.acquire() 221 if ac in self._cache: 222 # print >>sys.stderr,"Use cache for %s" % ac 223 data = self._cache[ac] 224 self._lock.release() 225 226 else: 227 self._lock.release() 228 data = self._db[ac] 229 self._cacheSeq(data) 230 return data 231 else: 232 self._lock.acquire() 233 acs = [[x,self._cache.get(x,None)] for x in ac] 234 self._lock.release() 235 newacs = [ac for ac,cached in acs if cached is None] 236 if newacs: 237 newseqs = self._db[newacs] 238 else: 239 newseqs = iter([]) 240 for r in acs: 241 if r[1] is None: 242 r[1]=self._cacheSeq(newseqs.next()) 243 # else: 244 # print >>sys.stderr,"Use cache for %s" % r[0] 245 return (x[1] for x in acs)
246 247
248 -def moduleInDevelopment(name):
249 Warning('This module %s is under development : use it with caution' % name)
250