Package obitools :: Module zipfile
[hide private]
[frames] | no frames]

Source Code for Module obitools.zipfile

   1  """ 
   2  Read and write ZIP files. 
   3  """ 
   4  import struct, os, time, sys, shutil 
   5  import binascii, cStringIO 
   6   
   7  try: 
   8      import zlib # We may need its compression method 
   9      crc32 = zlib.crc32 
  10  except ImportError: 
  11      zlib = None 
  12      crc32 = binascii.crc32 
  13   
  14  __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", 
  15             "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ] 
  16   
17 -class BadZipfile(Exception):
18 pass
19 20
21 -class LargeZipFile(Exception):
22 """ 23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 24 and those extensions are disabled. 25 """
26 27 error = BadZipfile # The exception raised by this module 28 29 ZIP64_LIMIT= (1 << 31) - 1 30 31 # constants for Zip file compression methods 32 ZIP_STORED = 0 33 ZIP_DEFLATED = 8 34 # Other ZIP compression methods not supported 35 36 # Here are some struct module formats for reading headers 37 structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes 38 stringEndArchive = "PK\005\006" # magic number for end of archive record 39 structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes 40 stringCentralDir = "PK\001\002" # magic number for central directory 41 structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes 42 stringFileHeader = "PK\003\004" # magic number for file header 43 structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes 44 stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header 45 structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes 46 stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header 47 48 49 # indexes of entries in the central directory structure 50 _CD_SIGNATURE = 0 51 _CD_CREATE_VERSION = 1 52 _CD_CREATE_SYSTEM = 2 53 _CD_EXTRACT_VERSION = 3 54 _CD_EXTRACT_SYSTEM = 4 # is this meaningful? 55 _CD_FLAG_BITS = 5 56 _CD_COMPRESS_TYPE = 6 57 _CD_TIME = 7 58 _CD_DATE = 8 59 _CD_CRC = 9 60 _CD_COMPRESSED_SIZE = 10 61 _CD_UNCOMPRESSED_SIZE = 11 62 _CD_FILENAME_LENGTH = 12 63 _CD_EXTRA_FIELD_LENGTH = 13 64 _CD_COMMENT_LENGTH = 14 65 _CD_DISK_NUMBER_START = 15 66 _CD_INTERNAL_FILE_ATTRIBUTES = 16 67 _CD_EXTERNAL_FILE_ATTRIBUTES = 17 68 _CD_LOCAL_HEADER_OFFSET = 18 69 70 # indexes of entries in the local file header structure 71 _FH_SIGNATURE = 0 72 _FH_EXTRACT_VERSION = 1 73 _FH_EXTRACT_SYSTEM = 2 # is this meaningful? 74 _FH_GENERAL_PURPOSE_FLAG_BITS = 3 75 _FH_COMPRESSION_METHOD = 4 76 _FH_LAST_MOD_TIME = 5 77 _FH_LAST_MOD_DATE = 6 78 _FH_CRC = 7 79 _FH_COMPRESSED_SIZE = 8 80 _FH_UNCOMPRESSED_SIZE = 9 81 _FH_FILENAME_LENGTH = 10 82 _FH_EXTRA_FIELD_LENGTH = 11 83
84 -def is_zipfile(filename):
85 """Quickly see if file is a ZIP file by checking the magic number.""" 86 try: 87 fpin = open(filename, "rb") 88 endrec = _EndRecData(fpin) 89 fpin.close() 90 if endrec: 91 return True # file has correct magic number 92 except IOError: 93 pass 94 return False
95
96 -def _EndRecData64(fpin, offset, endrec):
97 """ 98 Read the ZIP64 end-of-archive records and use that to update endrec 99 """ 100 locatorSize = struct.calcsize(structEndArchive64Locator) 101 fpin.seek(offset - locatorSize, 2) 102 data = fpin.read(locatorSize) 103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 104 if sig != stringEndArchive64Locator: 105 return endrec 106 107 if diskno != 0 or disks != 1: 108 raise BadZipfile("zipfiles that span multiple disks are not supported") 109 110 # Assume no 'zip64 extensible data' 111 endArchiveSize = struct.calcsize(structEndArchive64) 112 fpin.seek(offset - locatorSize - endArchiveSize, 2) 113 data = fpin.read(endArchiveSize) 114 sig, sz, create_version, read_version, disk_num, disk_dir, \ 115 dircount, dircount2, dirsize, diroffset = \ 116 struct.unpack(structEndArchive64, data) 117 if sig != stringEndArchive64: 118 return endrec 119 120 # Update the original endrec using data from the ZIP64 record 121 endrec[1] = disk_num 122 endrec[2] = disk_dir 123 endrec[3] = dircount 124 endrec[4] = dircount2 125 endrec[5] = dirsize 126 endrec[6] = diroffset 127 return endrec
128 129
130 -def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None. 132 133 The data is a list of the nine items in the ZIP "End of central dir" 134 record followed by a tenth item, the file seek offset of this record.""" 135 fpin.seek(-22, 2) # Assume no archive comment. 136 filesize = fpin.tell() + 22 # Get file size 137 data = fpin.read() 138 if data[0:4] == stringEndArchive and data[-2:] == "\000\000": 139 endrec = struct.unpack(structEndArchive, data) 140 endrec = list(endrec) 141 endrec.append("") # Append the archive comment 142 endrec.append(filesize - 22) # Append the record start offset 143 if endrec[-4] == 0xffffffff: 144 return _EndRecData64(fpin, -22, endrec) 145 return endrec 146 # Search the last END_BLOCK bytes of the file for the record signature. 147 # The comment is appended to the ZIP file and has a 16 bit length. 148 # So the comment may be up to 64K long. We limit the search for the 149 # signature to a few Kbytes at the end of the file for efficiency. 150 # also, the signature must not appear in the comment. 151 END_BLOCK = min(filesize, 1024 * 4) 152 fpin.seek(filesize - END_BLOCK, 0) 153 data = fpin.read() 154 start = data.rfind(stringEndArchive) 155 if start >= 0: # Correct signature string was found 156 endrec = struct.unpack(structEndArchive, data[start:start+22]) 157 endrec = list(endrec) 158 comment = data[start+22:] 159 if endrec[7] == len(comment): # Comment length checks out 160 # Append the archive comment and start offset 161 endrec.append(comment) 162 endrec.append(filesize - END_BLOCK + start) 163 if endrec[-4] == 0xffffffff: 164 return _EndRecData64(fpin, - END_BLOCK + start, endrec) 165 return endrec 166 return # Error, return None
167 168
169 -class ZipInfo (object):
170 """Class with attributes describing each file in the ZIP archive.""" 171 172 __slots__ = ( 173 'orig_filename', 174 'filename', 175 'date_time', 176 'compress_type', 177 'comment', 178 'extra', 179 'create_system', 180 'create_version', 181 'extract_version', 182 'reserved', 183 'flag_bits', 184 'volume', 185 'internal_attr', 186 'external_attr', 187 'header_offset', 188 'CRC', 189 'compress_size', 190 'file_size', 191 '_raw_time', 192 ) 193
194 - def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
195 self.orig_filename = filename # Original file name in archive 196 197 # Terminate the file name at the first null byte. Null bytes in file 198 # names are used as tricks by viruses in archives. 199 null_byte = filename.find(chr(0)) 200 if null_byte >= 0: 201 filename = filename[0:null_byte] 202 # This is used to ensure paths in generated ZIP files always use 203 # forward slashes as the directory separator, as required by the 204 # ZIP format specification. 205 if os.sep != "/" and os.sep in filename: 206 filename = filename.replace(os.sep, "/") 207 208 self.filename = filename # Normalized file name 209 self.date_time = date_time # year, month, day, hour, min, sec 210 # Standard values: 211 self.compress_type = ZIP_STORED # Type of compression for the file 212 self.comment = "" # Comment for each file 213 self.extra = "" # ZIP extra data 214 if sys.platform == 'win32': 215 self.create_system = 0 # System which created ZIP archive 216 else: 217 # Assume everything else is unix-y 218 self.create_system = 3 # System which created ZIP archive 219 self.create_version = 20 # Version which created ZIP archive 220 self.extract_version = 20 # Version needed to extract archive 221 self.reserved = 0 # Must be zero 222 self.flag_bits = 0 # ZIP flag bits 223 self.volume = 0 # Volume number of file header 224 self.internal_attr = 0 # Internal attributes 225 self.external_attr = 0 # External file attributes
226 # Other attributes are set by class ZipFile: 227 # header_offset Byte offset to the file header 228 # CRC CRC-32 of the uncompressed file 229 # compress_size Size of the compressed file 230 # file_size Size of the uncompressed file 231
232 - def FileHeader(self):
233 """Return the per-file header as a string.""" 234 dt = self.date_time 235 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 236 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 237 if self.flag_bits & 0x08: 238 # Set these to zero because we write them after the file data 239 CRC = compress_size = file_size = 0 240 else: 241 CRC = self.CRC 242 compress_size = self.compress_size 243 file_size = self.file_size 244 245 extra = self.extra 246 247 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 248 # File is larger than what fits into a 4 byte integer, 249 # fall back to the ZIP64 extension 250 fmt = '<HHQQ' 251 extra = extra + struct.pack(fmt, 252 1, struct.calcsize(fmt)-4, file_size, compress_size) 253 file_size = 0xffffffff # -1 254 compress_size = 0xffffffff # -1 255 self.extract_version = max(45, self.extract_version) 256 self.create_version = max(45, self.extract_version) 257 258 header = struct.pack(structFileHeader, stringFileHeader, 259 self.extract_version, self.reserved, self.flag_bits, 260 self.compress_type, dostime, dosdate, CRC, 261 compress_size, file_size, 262 len(self.filename), len(extra)) 263 return header + self.filename + extra
264
265 - def _decodeExtra(self):
266 # Try to decode the extra field. 267 extra = self.extra 268 unpack = struct.unpack 269 while extra: 270 tp, ln = unpack('<HH', extra[:4]) 271 if tp == 1: 272 if ln >= 24: 273 counts = unpack('<QQQ', extra[4:28]) 274 elif ln == 16: 275 counts = unpack('<QQ', extra[4:20]) 276 elif ln == 8: 277 counts = unpack('<Q', extra[4:12]) 278 elif ln == 0: 279 counts = () 280 else: 281 raise RuntimeError, "Corrupt extra field %s"%(ln,) 282 283 idx = 0 284 285 # ZIP64 extension (large files and/or large archives) 286 # XXX Is this correct? won't this exclude 2**32-1 byte files? 287 if self.file_size in (0xffffffffffffffffL, 0xffffffffL): 288 self.file_size = counts[idx] 289 idx += 1 290 291 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL: 292 self.compress_size = counts[idx] 293 idx += 1 294 295 if self.header_offset == -1 or self.header_offset == 0xffffffffL: 296 old = self.header_offset 297 self.header_offset = counts[idx] 298 idx+=1 299 300 extra = extra[ln+4:]
301 302
303 -class _ZipDecrypter:
304 """ 305 Class to handle decryption of files stored within a ZIP archive. 306 307 ZIP supports a password-based form of encryption. Even though known 308 plaintext attacks have been found against it, it is still useful 309 to be able to get data out of such a file. 310 311 Usage :: 312 zd = _ZipDecrypter(mypwd) 313 plain_char = zd(cypher_char) 314 plain_text = map(zd, cypher_text) 315 """ 316
317 - def _GenerateCRCTable():
318 """Generate a CRC-32 table. 319 320 ZIP encryption uses the CRC32 one-byte primitive for scrambling some 321 internal keys. We noticed that a direct implementation is faster than 322 relying on binascii.crc32(). 323 """ 324 poly = 0xedb88320 325 table = [0] * 256 326 for i in range(256): 327 crc = i 328 for j in range(8): 329 if crc & 1: 330 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly 331 else: 332 crc = ((crc >> 1) & 0x7FFFFFFF) 333 table[i] = crc 334 return table
335 crctable = _GenerateCRCTable() 336
337 - def _crc32(self, ch, crc):
338 """Compute the CRC32 primitive on one byte.""" 339 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
340
341 - def __init__(self, pwd):
342 self.key0 = 305419896 343 self.key1 = 591751049 344 self.key2 = 878082192 345 for p in pwd: 346 self._UpdateKeys(p)
347
348 - def _UpdateKeys(self, c):
349 self.key0 = self._crc32(c, self.key0) 350 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 351 self.key1 = (self.key1 * 134775813 + 1) & 4294967295 352 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
353
354 - def __call__(self, c):
355 """Decrypt a single character.""" 356 c = ord(c) 357 k = self.key2 | 2 358 c = c ^ (((k * (k^1)) >> 8) & 255) 359 c = chr(c) 360 self._UpdateKeys(c) 361 return c
362
363 -class ZipExtFile:
364 """File-like object for reading an archive member. 365 Is returned by ZipFile.open(). 366 """ 367
368 - def __init__(self, fileobj, zipinfo, decrypt=None):
369 self.fileobj = fileobj 370 self.decrypter = decrypt 371 self.bytes_read = 0L 372 self.rawbuffer = '' 373 self.readbuffer = '' 374 self.linebuffer = '' 375 self.eof = False 376 self.univ_newlines = False 377 self.nlSeps = ("\n", ) 378 self.lastdiscard = '' 379 380 self.compress_type = zipinfo.compress_type 381 self.compress_size = zipinfo.compress_size 382 383 self.closed = False 384 self.mode = "r" 385 self.name = zipinfo.filename 386 387 # read from compressed files in 64k blocks 388 self.compreadsize = 64*1024 389 if self.compress_type == ZIP_DEFLATED: 390 self.dc = zlib.decompressobj(-15)
391
392 - def set_univ_newlines(self, univ_newlines):
393 self.univ_newlines = univ_newlines 394 395 # pick line separator char(s) based on universal newlines flag 396 self.nlSeps = ("\n", ) 397 if self.univ_newlines: 398 self.nlSeps = ("\r\n", "\r", "\n")
399
400 - def __iter__(self):
401 return self
402
403 - def next(self):
404 nextline = self.readline() 405 if not nextline: 406 raise StopIteration() 407 408 return nextline
409
410 - def close(self):
411 self.closed = True
412
413 - def _checkfornewline(self):
414 nl, nllen = -1, -1 415 if self.linebuffer: 416 # ugly check for cases where half of an \r\n pair was 417 # read on the last pass, and the \r was discarded. In this 418 # case we just throw away the \n at the start of the buffer. 419 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'): 420 self.linebuffer = self.linebuffer[1:] 421 422 for sep in self.nlSeps: 423 nl = self.linebuffer.find(sep) 424 if nl >= 0: 425 nllen = len(sep) 426 return nl, nllen 427 428 return nl, nllen
429
430 - def readline(self, size = -1):
431 """Read a line with approx. size. If size is negative, 432 read a whole line. 433 """ 434 if size < 0: 435 size = sys.maxint 436 elif size == 0: 437 return '' 438 439 # check for a newline already in buffer 440 nl, nllen = self._checkfornewline() 441 442 if nl >= 0: 443 # the next line was already in the buffer 444 nl = min(nl, size) 445 else: 446 # no line break in buffer - try to read more 447 size -= len(self.linebuffer) 448 while nl < 0 and size > 0: 449 buf = self.read(min(size, 100)) 450 if not buf: 451 break 452 self.linebuffer += buf 453 size -= len(buf) 454 455 # check for a newline in buffer 456 nl, nllen = self._checkfornewline() 457 458 # we either ran out of bytes in the file, or 459 # met the specified size limit without finding a newline, 460 # so return current buffer 461 if nl < 0: 462 s = self.linebuffer 463 self.linebuffer = '' 464 return s 465 466 buf = self.linebuffer[:nl] 467 self.lastdiscard = self.linebuffer[nl:nl + nllen] 468 self.linebuffer = self.linebuffer[nl + nllen:] 469 470 # line is always returned with \n as newline char (except possibly 471 # for a final incomplete line in the file, which is handled above). 472 return buf + "\n"
473
474 - def readlines(self, sizehint = -1):
475 """Return a list with all (following) lines. The sizehint parameter 476 is ignored in this implementation. 477 """ 478 result = [] 479 while True: 480 line = self.readline() 481 if not line: break 482 result.append(line) 483 return result
484
485 - def read(self, size = None):
486 # act like file() obj and return empty string if size is 0 487 if size == 0: 488 return '' 489 490 # determine read size 491 bytesToRead = self.compress_size - self.bytes_read 492 493 # adjust read size for encrypted files since the first 12 bytes 494 # are for the encryption/password information 495 if self.decrypter is not None: 496 bytesToRead -= 12 497 498 if size is not None and size >= 0: 499 if self.compress_type == ZIP_STORED: 500 lr = len(self.readbuffer) 501 bytesToRead = min(bytesToRead, size - lr) 502 elif self.compress_type == ZIP_DEFLATED: 503 if len(self.readbuffer) > size: 504 # the user has requested fewer bytes than we've already 505 # pulled through the decompressor; don't read any more 506 bytesToRead = 0 507 else: 508 # user will use up the buffer, so read some more 509 lr = len(self.rawbuffer) 510 bytesToRead = min(bytesToRead, self.compreadsize - lr) 511 512 # avoid reading past end of file contents 513 if bytesToRead + self.bytes_read > self.compress_size: 514 bytesToRead = self.compress_size - self.bytes_read 515 516 # try to read from file (if necessary) 517 if bytesToRead > 0: 518 bytes = self.fileobj.read(bytesToRead) 519 self.bytes_read += len(bytes) 520 self.rawbuffer += bytes 521 522 # handle contents of raw buffer 523 if self.rawbuffer: 524 newdata = self.rawbuffer 525 self.rawbuffer = '' 526 527 # decrypt new data if we were given an object to handle that 528 if newdata and self.decrypter is not None: 529 newdata = ''.join(map(self.decrypter, newdata)) 530 531 # decompress newly read data if necessary 532 if newdata and self.compress_type == ZIP_DEFLATED: 533 newdata = self.dc.decompress(newdata) 534 self.rawbuffer = self.dc.unconsumed_tail 535 if self.eof and len(self.rawbuffer) == 0: 536 # we're out of raw bytes (both from the file and 537 # the local buffer); flush just to make sure the 538 # decompressor is done 539 newdata += self.dc.flush() 540 # prevent decompressor from being used again 541 self.dc = None 542 543 self.readbuffer += newdata 544 545 546 # return what the user asked for 547 if size is None or len(self.readbuffer) <= size: 548 bytes = self.readbuffer 549 self.readbuffer = '' 550 else: 551 bytes = self.readbuffer[:size] 552 self.readbuffer = self.readbuffer[size:] 553 554 return bytes
555 556
557 -class ZipFile:
558 """ Class with methods to open, read, write, close, list zip files. 559 560 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True) 561 562 @var file: Either the path to the file, or a file-like object. 563 If it is a path, the file will be opened and closed by ZipFile. 564 @var mode: The mode can be either read "r", write "w" or append "a". 565 @var compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). 566 @var allowZip64: if True ZipFile will create files with ZIP64 extensions when 567 needed, otherwise it will raise an exception when this would 568 be necessary. 569 570 """ 571 572 fp = None # Set here since __del__ checks it 573
574 - def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
575 """Open the ZIP file with mode read "r", write "w" or append "a".""" 576 if mode not in ("r", "w", "a"): 577 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') 578 579 if compression == ZIP_STORED: 580 pass 581 elif compression == ZIP_DEFLATED: 582 if not zlib: 583 raise RuntimeError,\ 584 "Compression requires the (missing) zlib module" 585 else: 586 raise RuntimeError, "That compression method is not supported" 587 588 self._allowZip64 = allowZip64 589 self._didModify = False 590 self.debug = 0 # Level of printing: 0 through 3 591 self.NameToInfo = {} # Find file info given name 592 self.filelist = [] # List of ZipInfo instances for archive 593 self.compression = compression # Method of compression 594 self.mode = key = mode.replace('b', '')[0] 595 self.pwd = None 596 597 # Check if we were passed a file-like object 598 if isinstance(file, basestring): 599 self._filePassed = 0 600 self.filename = file 601 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} 602 try: 603 self.fp = open(file, modeDict[mode]) 604 except IOError: 605 if mode == 'a': 606 mode = key = 'w' 607 self.fp = open(file, modeDict[mode]) 608 else: 609 raise 610 else: 611 self._filePassed = 1 612 self.fp = file 613 self.filename = getattr(file, 'name', None) 614 615 if key == 'r': 616 self._GetContents() 617 elif key == 'w': 618 pass 619 elif key == 'a': 620 try: # See if file is a zip file 621 self._RealGetContents() 622 # seek to start of directory and overwrite 623 self.fp.seek(self.start_dir, 0) 624 except BadZipfile: # file is not a zip file, just append 625 self.fp.seek(0, 2) 626 else: 627 if not self._filePassed: 628 self.fp.close() 629 self.fp = None 630 raise RuntimeError, 'Mode must be "r", "w" or "a"'
631
632 - def _GetContents(self):
633 """Read the directory, making sure we close the file if the format 634 is bad.""" 635 try: 636 self._RealGetContents() 637 except BadZipfile: 638 if not self._filePassed: 639 self.fp.close() 640 self.fp = None 641 raise
642
643 - def _RealGetContents(self):
644 """Read in the table of contents for the ZIP file.""" 645 fp = self.fp 646 endrec = _EndRecData(fp) 647 if not endrec: 648 raise BadZipfile, "File is not a zip file" 649 if self.debug > 1: 650 print endrec 651 size_cd = endrec[5] # bytes in central directory 652 offset_cd = endrec[6] # offset of central directory 653 self.comment = endrec[8] # archive comment 654 # endrec[9] is the offset of the "End of Central Dir" record 655 if endrec[9] > ZIP64_LIMIT: 656 x = endrec[9] - size_cd - 56 - 20 657 else: 658 x = endrec[9] - size_cd 659 # "concat" is zero, unless zip was concatenated to another file 660 concat = x - offset_cd 661 if self.debug > 2: 662 print "given, inferred, offset", offset_cd, x, concat 663 # self.start_dir: Position of start of central directory 664 self.start_dir = offset_cd + concat 665 fp.seek(self.start_dir, 0) 666 data = fp.read(size_cd) 667 fp = cStringIO.StringIO(data) 668 total = 0 669 while total < size_cd: 670 centdir = fp.read(46) 671 total = total + 46 672 if centdir[0:4] != stringCentralDir: 673 raise BadZipfile, "Bad magic number for central directory" 674 centdir = struct.unpack(structCentralDir, centdir) 675 if self.debug > 2: 676 print centdir 677 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 678 # Create ZipInfo instance to store file information 679 x = ZipInfo(filename) 680 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 681 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 682 total = (total + centdir[_CD_FILENAME_LENGTH] 683 + centdir[_CD_EXTRA_FIELD_LENGTH] 684 + centdir[_CD_COMMENT_LENGTH]) 685 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 686 (x.create_version, x.create_system, x.extract_version, x.reserved, 687 x.flag_bits, x.compress_type, t, d, 688 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 689 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 690 # Convert date/time code to (year, month, day, hour, min, sec) 691 x._raw_time = t 692 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 693 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 694 695 x._decodeExtra() 696 x.header_offset = x.header_offset + concat 697 self.filelist.append(x) 698 self.NameToInfo[x.filename] = x 699 if self.debug > 2: 700 print "total", total
701 702
703 - def namelist(self):
704 """Return a list of file names in the archive.""" 705 l = [] 706 for data in self.filelist: 707 l.append(data.filename) 708 return l
709
710 - def infolist(self):
711 """Return a list of class ZipInfo instances for files in the 712 archive.""" 713 return self.filelist
714
715 - def printdir(self):
716 """Print a table of contents for the zip file.""" 717 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size") 718 for zinfo in self.filelist: 719 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 720 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
721
722 - def testzip(self):
723 """Read all the files and check the CRC.""" 724 for zinfo in self.filelist: 725 try: 726 self.read(zinfo.filename) # Check CRC-32 727 except BadZipfile: 728 return zinfo.filename
729 730
731 - def getinfo(self, name):
732 """Return the instance of ZipInfo given 'name'.""" 733 info = self.NameToInfo.get(name) 734 if info is None: 735 raise KeyError( 736 'There is no item named %r in the archive' % name) 737 738 return info
739
740 - def setpassword(self, pwd):
741 """Set default password for encrypted files.""" 742 self.pwd = pwd
743
744 - def read(self, name, pwd=None):
745 """Return file bytes (as a string) for name.""" 746 return self.open(name, "r", pwd).read()
747
748 - def open(self, name, mode="r", pwd=None):
749 """Return file-like object for 'name'.""" 750 if mode not in ("r", "U", "rU"): 751 raise RuntimeError, 'open() requires mode "r", "U", or "rU"' 752 if not self.fp: 753 raise RuntimeError, \ 754 "Attempt to read ZIP archive that was already closed" 755 756 # Only open a new file for instances where we were not 757 # given a file object in the constructor 758 if self._filePassed: 759 zef_file = self.fp 760 else: 761 zef_file = open(self.filename, 'rb') 762 763 # Get info object for name 764 zinfo = self.getinfo(name) 765 766 filepos = zef_file.tell() 767 768 zef_file.seek(zinfo.header_offset, 0) 769 770 # Skip the file header: 771 fheader = zef_file.read(30) 772 if fheader[0:4] != stringFileHeader: 773 raise BadZipfile, "Bad magic number for file header" 774 775 fheader = struct.unpack(structFileHeader, fheader) 776 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 777 if fheader[_FH_EXTRA_FIELD_LENGTH]: 778 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 779 780 if fname != zinfo.orig_filename: 781 raise BadZipfile, \ 782 'File name in directory "%s" and header "%s" differ.' % ( 783 zinfo.orig_filename, fname) 784 785 # check for encrypted flag & handle password 786 is_encrypted = zinfo.flag_bits & 0x1 787 zd = None 788 if is_encrypted: 789 if not pwd: 790 pwd = self.pwd 791 if not pwd: 792 raise RuntimeError, "File %s is encrypted, " \ 793 "password required for extraction" % name 794 795 zd = _ZipDecrypter(pwd) 796 # The first 12 bytes in the cypher stream is an encryption header 797 # used to strengthen the algorithm. The first 11 bytes are 798 # completely random, while the 12th contains the MSB of the CRC, 799 # or the MSB of the file time depending on the header type 800 # and is used to check the correctness of the password. 801 bytes = zef_file.read(12) 802 h = map(zd, bytes[0:12]) 803 if zinfo.flag_bits & 0x8: 804 # compare against the file type from extended local headers 805 check_byte = (zinfo._raw_time >> 8) & 0xff 806 else: 807 # compare against the CRC otherwise 808 check_byte = (zinfo.CRC >> 24) & 0xff 809 if ord(h[11]) != check_byte: 810 raise RuntimeError("Bad password for file", name) 811 812 # build and return a ZipExtFile 813 if zd is None: 814 zef = ZipExtFile(zef_file, zinfo) 815 else: 816 zef = ZipExtFile(zef_file, zinfo, zd) 817 818 # set universal newlines on ZipExtFile if necessary 819 if "U" in mode: 820 zef.set_univ_newlines(True) 821 return zef
822
823 - def extract(self, member, path=None, pwd=None):
824 """Extract a member from the archive to the current working directory, 825 using its full name. Its file information is extracted as accurately 826 as possible. `member' may be a filename or a ZipInfo object. You can 827 specify a different directory using `path'. 828 """ 829 if not isinstance(member, ZipInfo): 830 member = self.getinfo(member) 831 832 if path is None: 833 path = os.getcwd() 834 835 return self._extract_member(member, path, pwd)
836
837 - def extractall(self, path=None, members=None, pwd=None):
838 """Extract all members from the archive to the current working 839 directory. `path' specifies a different directory to extract to. 840 `members' is optional and must be a subset of the list returned 841 by namelist(). 842 """ 843 if members is None: 844 members = self.namelist() 845 846 for zipinfo in members: 847 self.extract(zipinfo, path, pwd)
848
849 - def _extract_member(self, member, targetpath, pwd):
850 """Extract the ZipInfo object 'member' to a physical 851 file on the path targetpath. 852 """ 853 # build the destination pathname, replacing 854 # forward slashes to platform specific separators. 855 if targetpath[-1:] == "/": 856 targetpath = targetpath[:-1] 857 858 # don't include leading "/" from file name if present 859 if os.path.isabs(member.filename): 860 targetpath = os.path.join(targetpath, member.filename[1:]) 861 else: 862 targetpath = os.path.join(targetpath, member.filename) 863 864 targetpath = os.path.normpath(targetpath) 865 866 # Create all upper directories if necessary. 867 upperdirs = os.path.dirname(targetpath) 868 if upperdirs and not os.path.exists(upperdirs): 869 os.makedirs(upperdirs) 870 871 source = self.open(member.filename, pwd=pwd) 872 target = file(targetpath, "wb") 873 shutil.copyfileobj(source, target) 874 source.close() 875 target.close() 876 877 return targetpath
878
879 - def _writecheck(self, zinfo):
880 """Check for errors before writing a file to the archive.""" 881 if zinfo.filename in self.NameToInfo: 882 if self.debug: # Warning for duplicate names 883 print "Duplicate name:", zinfo.filename 884 if self.mode not in ("w", "a"): 885 raise RuntimeError, 'write() requires mode "w" or "a"' 886 if not self.fp: 887 raise RuntimeError, \ 888 "Attempt to write ZIP archive that was already closed" 889 if zinfo.compress_type == ZIP_DEFLATED and not zlib: 890 raise RuntimeError, \ 891 "Compression requires the (missing) zlib module" 892 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): 893 raise RuntimeError, \ 894 "That compression method is not supported" 895 if zinfo.file_size > ZIP64_LIMIT: 896 if not self._allowZip64: 897 raise LargeZipFile("Filesize would require ZIP64 extensions") 898 if zinfo.header_offset > ZIP64_LIMIT: 899 if not self._allowZip64: 900 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
901
902 - def write(self, filename, arcname=None, compress_type=None):
903 """Put the bytes from filename into the archive under the name 904 arcname.""" 905 if not self.fp: 906 raise RuntimeError( 907 "Attempt to write to ZIP archive that was already closed") 908 909 st = os.stat(filename) 910 mtime = time.localtime(st.st_mtime) 911 date_time = mtime[0:6] 912 # Create ZipInfo instance to store file information 913 if arcname is None: 914 arcname = filename 915 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 916 while arcname[0] in (os.sep, os.altsep): 917 arcname = arcname[1:] 918 zinfo = ZipInfo(arcname, date_time) 919 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes 920 if compress_type is None: 921 zinfo.compress_type = self.compression 922 else: 923 zinfo.compress_type = compress_type 924 925 zinfo.file_size = st.st_size 926 zinfo.flag_bits = 0x00 927 zinfo.header_offset = self.fp.tell() # Start of header bytes 928 929 self._writecheck(zinfo) 930 self._didModify = True 931 fp = open(filename, "rb") 932 # Must overwrite CRC and sizes with correct data later 933 zinfo.CRC = CRC = 0 934 zinfo.compress_size = compress_size = 0 935 zinfo.file_size = file_size = 0 936 self.fp.write(zinfo.FileHeader()) 937 if zinfo.compress_type == ZIP_DEFLATED: 938 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 939 zlib.DEFLATED, -15) 940 else: 941 cmpr = None 942 while 1: 943 buf = fp.read(1024 * 8) 944 if not buf: 945 break 946 file_size = file_size + len(buf) 947 CRC = crc32(buf, CRC) & 0xffffffff 948 if cmpr: 949 buf = cmpr.compress(buf) 950 compress_size = compress_size + len(buf) 951 self.fp.write(buf) 952 fp.close() 953 if cmpr: 954 buf = cmpr.flush() 955 compress_size = compress_size + len(buf) 956 self.fp.write(buf) 957 zinfo.compress_size = compress_size 958 else: 959 zinfo.compress_size = file_size 960 zinfo.CRC = CRC 961 zinfo.file_size = file_size 962 # Seek backwards and write CRC and file sizes 963 position = self.fp.tell() # Preserve current position in file 964 self.fp.seek(zinfo.header_offset + 14, 0) 965 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, 966 zinfo.file_size)) 967 self.fp.seek(position, 0) 968 self.filelist.append(zinfo) 969 self.NameToInfo[zinfo.filename] = zinfo
970
971 - def writestr(self, zinfo_or_arcname, bytes):
972 """Write a file into the archive. The contents is the string 973 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or 974 the name of the file in the archive.""" 975 if not isinstance(zinfo_or_arcname, ZipInfo): 976 zinfo = ZipInfo(filename=zinfo_or_arcname, 977 date_time=time.localtime(time.time())[:6]) 978 zinfo.compress_type = self.compression 979 else: 980 zinfo = zinfo_or_arcname 981 982 if not self.fp: 983 raise RuntimeError( 984 "Attempt to write to ZIP archive that was already closed") 985 986 zinfo.file_size = len(bytes) # Uncompressed size 987 zinfo.header_offset = self.fp.tell() # Start of header bytes 988 self._writecheck(zinfo) 989 self._didModify = True 990 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum 991 if zinfo.compress_type == ZIP_DEFLATED: 992 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 993 zlib.DEFLATED, -15) 994 bytes = co.compress(bytes) + co.flush() 995 zinfo.compress_size = len(bytes) # Compressed size 996 else: 997 zinfo.compress_size = zinfo.file_size 998 zinfo.header_offset = self.fp.tell() # Start of header bytes 999 self.fp.write(zinfo.FileHeader()) 1000 self.fp.write(bytes) 1001 self.fp.flush() 1002 if zinfo.flag_bits & 0x08: 1003 # Write CRC and file sizes after the file data 1004 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size, 1005 zinfo.file_size)) 1006 self.filelist.append(zinfo) 1007 self.NameToInfo[zinfo.filename] = zinfo
1008
1009 - def __del__(self):
1010 """Call the "close()" method in case the user forgot.""" 1011 self.close()
1012
1013 - def close(self):
1014 """Close the file, and for mode "w" and "a" write the ending 1015 records.""" 1016 if self.fp is None: 1017 return 1018 1019 if self.mode in ("w", "a") and self._didModify: # write ending records 1020 count = 0 1021 pos1 = self.fp.tell() 1022 for zinfo in self.filelist: # write central directory 1023 count = count + 1 1024 dt = zinfo.date_time 1025 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1026 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1027 extra = [] 1028 if zinfo.file_size > ZIP64_LIMIT \ 1029 or zinfo.compress_size > ZIP64_LIMIT: 1030 extra.append(zinfo.file_size) 1031 extra.append(zinfo.compress_size) 1032 file_size = 0xffffffff #-1 1033 compress_size = 0xffffffff #-1 1034 else: 1035 file_size = zinfo.file_size 1036 compress_size = zinfo.compress_size 1037 1038 if zinfo.header_offset > ZIP64_LIMIT: 1039 extra.append(zinfo.header_offset) 1040 header_offset = 0xffffffffL # -1 32 bit 1041 else: 1042 header_offset = zinfo.header_offset 1043 1044 extra_data = zinfo.extra 1045 if extra: 1046 # Append a ZIP64 field to the extra's 1047 extra_data = struct.pack( 1048 '<HH' + 'Q'*len(extra), 1049 1, 8*len(extra), *extra) + extra_data 1050 1051 extract_version = max(45, zinfo.extract_version) 1052 create_version = max(45, zinfo.create_version) 1053 else: 1054 extract_version = zinfo.extract_version 1055 create_version = zinfo.create_version 1056 1057 try: 1058 centdir = struct.pack(structCentralDir, 1059 stringCentralDir, create_version, 1060 zinfo.create_system, extract_version, zinfo.reserved, 1061 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1062 zinfo.CRC, compress_size, file_size, 1063 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1064 0, zinfo.internal_attr, zinfo.external_attr, 1065 header_offset) 1066 except DeprecationWarning: 1067 print >>sys.stderr, (structCentralDir, 1068 stringCentralDir, create_version, 1069 zinfo.create_system, extract_version, zinfo.reserved, 1070 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1071 zinfo.CRC, compress_size, file_size, 1072 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1073 0, zinfo.internal_attr, zinfo.external_attr, 1074 header_offset) 1075 raise 1076 self.fp.write(centdir) 1077 self.fp.write(zinfo.filename) 1078 self.fp.write(extra_data) 1079 self.fp.write(zinfo.comment) 1080 1081 pos2 = self.fp.tell() 1082 # Write end-of-zip-archive record 1083 if pos1 > ZIP64_LIMIT: 1084 # Need to write the ZIP64 end-of-archive records 1085 zip64endrec = struct.pack( 1086 structEndArchive64, stringEndArchive64, 1087 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1) 1088 self.fp.write(zip64endrec) 1089 1090 zip64locrec = struct.pack( 1091 structEndArchive64Locator, 1092 stringEndArchive64Locator, 0, pos2, 1) 1093 self.fp.write(zip64locrec) 1094 1095 endrec = struct.pack(structEndArchive, stringEndArchive, 1096 0, 0, count, count, pos2 - pos1, 0xffffffffL, 0) 1097 self.fp.write(endrec) 1098 1099 else: 1100 endrec = struct.pack(structEndArchive, stringEndArchive, 1101 0, 0, count, count, pos2 - pos1, pos1, 0) 1102 self.fp.write(endrec) 1103 self.fp.flush() 1104 if not self._filePassed: 1105 self.fp.close() 1106 self.fp = None
1107 1108
1109 -class PyZipFile(ZipFile):
1110 """Class to create ZIP archives with Python library files and packages.""" 1111
1112 - def writepy(self, pathname, basename = ""):
1113 """Add all files from "pathname" to the ZIP archive. 1114 1115 If pathname is a package directory, search the directory and 1116 all package subdirectories recursively for all *.py and enter 1117 the modules into the archive. If pathname is a plain 1118 directory, listdir *.py and enter all modules. Else, pathname 1119 must be a Python *.py file and the module will be put into the 1120 archive. Added modules are always module.pyo or module.pyc. 1121 This method will compile the module.py into module.pyc if 1122 necessary. 1123 """ 1124 dir, name = os.path.split(pathname) 1125 if os.path.isdir(pathname): 1126 initname = os.path.join(pathname, "__init__.py") 1127 if os.path.isfile(initname): 1128 # This is a package directory, add it 1129 if basename: 1130 basename = "%s/%s" % (basename, name) 1131 else: 1132 basename = name 1133 if self.debug: 1134 print "Adding package in", pathname, "as", basename 1135 fname, arcname = self._get_codename(initname[0:-3], basename) 1136 if self.debug: 1137 print "Adding", arcname 1138 self.write(fname, arcname) 1139 dirlist = os.listdir(pathname) 1140 dirlist.remove("__init__.py") 1141 # Add all *.py files and package subdirectories 1142 for filename in dirlist: 1143 path = os.path.join(pathname, filename) 1144 root, ext = os.path.splitext(filename) 1145 if os.path.isdir(path): 1146 if os.path.isfile(os.path.join(path, "__init__.py")): 1147 # This is a package directory, add it 1148 self.writepy(path, basename) # Recursive call 1149 elif ext == ".py": 1150 fname, arcname = self._get_codename(path[0:-3], 1151 basename) 1152 if self.debug: 1153 print "Adding", arcname 1154 self.write(fname, arcname) 1155 else: 1156 # This is NOT a package directory, add its files at top level 1157 if self.debug: 1158 print "Adding files from directory", pathname 1159 for filename in os.listdir(pathname): 1160 path = os.path.join(pathname, filename) 1161 root, ext = os.path.splitext(filename) 1162 if ext == ".py": 1163 fname, arcname = self._get_codename(path[0:-3], 1164 basename) 1165 if self.debug: 1166 print "Adding", arcname 1167 self.write(fname, arcname) 1168 else: 1169 if pathname[-3:] != ".py": 1170 raise RuntimeError, \ 1171 'Files added with writepy() must end with ".py"' 1172 fname, arcname = self._get_codename(pathname[0:-3], basename) 1173 if self.debug: 1174 print "Adding file", arcname 1175 self.write(fname, arcname)
1176
1177 - def _get_codename(self, pathname, basename):
1178 """Return (filename, archivename) for the path. 1179 1180 Given a module name path, return the correct file path and 1181 archive name, compiling if necessary. For example, given 1182 /python/lib/string, return (/python/lib/string.pyc, string). 1183 """ 1184 file_py = pathname + ".py" 1185 file_pyc = pathname + ".pyc" 1186 file_pyo = pathname + ".pyo" 1187 if os.path.isfile(file_pyo) and \ 1188 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: 1189 fname = file_pyo # Use .pyo file 1190 elif not os.path.isfile(file_pyc) or \ 1191 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: 1192 import py_compile 1193 if self.debug: 1194 print "Compiling", file_py 1195 try: 1196 py_compile.compile(file_py, file_pyc, None, True) 1197 except py_compile.PyCompileError,err: 1198 print err.msg 1199 fname = file_pyc 1200 else: 1201 fname = file_pyc 1202 archivename = os.path.split(fname)[1] 1203 if basename: 1204 archivename = "%s/%s" % (basename, archivename) 1205 return (fname, archivename)
1206 1207
1208 -def main(args = None):
1209 import textwrap 1210 USAGE=textwrap.dedent("""\ 1211 Usage: 1212 zipfile.py -l zipfile.zip # Show listing of a zipfile 1213 zipfile.py -t zipfile.zip # Test if a zipfile is valid 1214 zipfile.py -e zipfile.zip target # Extract zipfile into target dir 1215 zipfile.py -c zipfile.zip src ... # Create zipfile from sources 1216 """) 1217 if args is None: 1218 args = sys.argv[1:] 1219 1220 if not args or args[0] not in ('-l', '-c', '-e', '-t'): 1221 print USAGE 1222 sys.exit(1) 1223 1224 if args[0] == '-l': 1225 if len(args) != 2: 1226 print USAGE 1227 sys.exit(1) 1228 zf = ZipFile(args[1], 'r') 1229 zf.printdir() 1230 zf.close() 1231 1232 elif args[0] == '-t': 1233 if len(args) != 2: 1234 print USAGE 1235 sys.exit(1) 1236 zf = ZipFile(args[1], 'r') 1237 zf.testzip() 1238 print "Done testing" 1239 1240 elif args[0] == '-e': 1241 if len(args) != 3: 1242 print USAGE 1243 sys.exit(1) 1244 1245 zf = ZipFile(args[1], 'r') 1246 out = args[2] 1247 for path in zf.namelist(): 1248 if path.startswith('./'): 1249 tgt = os.path.join(out, path[2:]) 1250 else: 1251 tgt = os.path.join(out, path) 1252 1253 tgtdir = os.path.dirname(tgt) 1254 if not os.path.exists(tgtdir): 1255 os.makedirs(tgtdir) 1256 fp = open(tgt, 'wb') 1257 fp.write(zf.read(path)) 1258 fp.close() 1259 zf.close() 1260 1261 elif args[0] == '-c': 1262 if len(args) < 3: 1263 print USAGE 1264 sys.exit(1) 1265 1266 def addToZip(zf, path, zippath): 1267 if os.path.isfile(path): 1268 zf.write(path, zippath, ZIP_DEFLATED) 1269 elif os.path.isdir(path): 1270 for nm in os.listdir(path): 1271 addToZip(zf, 1272 os.path.join(path, nm), os.path.join(zippath, nm))
1273 # else: ignore 1274 1275 zf = ZipFile(args[1], 'w', allowZip64=True) 1276 for src in args[2:]: 1277 addToZip(zf, src, os.path.basename(src)) 1278 1279 zf.close() 1280 1281 if __name__ == "__main__": 1282 main() 1283