Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

zipstream.py 9.5KB

1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. # -*- test-case-name: twisted.python.test.test_zipstream -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. An incremental approach to unzipping files. This allows you to unzip a little
  6. bit of a file at a time, which means you can report progress as a file unzips.
  7. """
  8. import os.path
  9. import struct
  10. import zipfile
  11. import zlib
  12. class ChunkingZipFile(zipfile.ZipFile):
  13. """
  14. A L{zipfile.ZipFile} object which, with L{readfile}, also gives you access
  15. to a file-like object for each entry.
  16. """
  17. def readfile(self, name):
  18. """
  19. Return file-like object for name.
  20. """
  21. if self.mode not in ("r", "a"):
  22. raise RuntimeError('read() requires mode "r" or "a"')
  23. if not self.fp:
  24. raise RuntimeError("Attempt to read ZIP archive that was already closed")
  25. zinfo = self.getinfo(name)
  26. self.fp.seek(zinfo.header_offset, 0)
  27. fheader = self.fp.read(zipfile.sizeFileHeader)
  28. if fheader[0:4] != zipfile.stringFileHeader:
  29. raise zipfile.BadZipfile("Bad magic number for file header")
  30. fheader = struct.unpack(zipfile.structFileHeader, fheader)
  31. fname = self.fp.read(fheader[zipfile._FH_FILENAME_LENGTH])
  32. if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]:
  33. self.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH])
  34. if zinfo.flag_bits & 0x800:
  35. # UTF-8 filename
  36. fname_str = fname.decode("utf-8")
  37. else:
  38. fname_str = fname.decode("cp437")
  39. if fname_str != zinfo.orig_filename:
  40. raise zipfile.BadZipfile(
  41. 'File name in directory "%s" and header "%s" differ.'
  42. % (zinfo.orig_filename, fname_str)
  43. )
  44. if zinfo.compress_type == zipfile.ZIP_STORED:
  45. return ZipFileEntry(self, zinfo.compress_size)
  46. elif zinfo.compress_type == zipfile.ZIP_DEFLATED:
  47. return DeflatedZipFileEntry(self, zinfo.compress_size)
  48. else:
  49. raise zipfile.BadZipfile(
  50. "Unsupported compression method %d for file %s"
  51. % (zinfo.compress_type, name)
  52. )
  53. class _FileEntry:
  54. """
  55. Abstract superclass of both compressed and uncompressed variants of
  56. file-like objects within a zip archive.
  57. @ivar chunkingZipFile: a chunking zip file.
  58. @type chunkingZipFile: L{ChunkingZipFile}
  59. @ivar length: The number of bytes within the zip file that represent this
  60. file. (This is the size on disk, not the number of decompressed bytes
  61. which will result from reading it.)
  62. @ivar fp: the underlying file object (that contains pkzip data). Do not
  63. touch this, please. It will quite likely move or go away.
  64. @ivar closed: File-like 'closed' attribute; True before this file has been
  65. closed, False after.
  66. @type closed: L{bool}
  67. @ivar finished: An older, broken synonym for 'closed'. Do not touch this,
  68. please.
  69. @type finished: L{int}
  70. """
  71. def __init__(self, chunkingZipFile, length):
  72. """
  73. Create a L{_FileEntry} from a L{ChunkingZipFile}.
  74. """
  75. self.chunkingZipFile = chunkingZipFile
  76. self.fp = self.chunkingZipFile.fp
  77. self.length = length
  78. self.finished = 0
  79. self.closed = False
  80. def isatty(self):
  81. """
  82. Returns false because zip files should not be ttys
  83. """
  84. return False
  85. def close(self):
  86. """
  87. Close self (file-like object)
  88. """
  89. self.closed = True
  90. self.finished = 1
  91. del self.fp
  92. def readline(self):
  93. """
  94. Read a line.
  95. """
  96. line = b""
  97. for byte in iter(lambda: self.read(1), b""):
  98. line += byte
  99. if byte == b"\n":
  100. break
  101. return line
  102. def __next__(self):
  103. """
  104. Implement next as file does (like readline, except raises StopIteration
  105. at EOF)
  106. """
  107. nextline = self.readline()
  108. if nextline:
  109. return nextline
  110. raise StopIteration()
  111. # Iterators on Python 2 use next(), not __next__()
  112. next = __next__
  113. def readlines(self):
  114. """
  115. Returns a list of all the lines
  116. """
  117. return list(self)
  118. def xreadlines(self):
  119. """
  120. Returns an iterator (so self)
  121. """
  122. return self
  123. def __iter__(self):
  124. """
  125. Returns an iterator (so self)
  126. """
  127. return self
  128. def __enter__(self):
  129. return self
  130. def __exit__(self, exc_type, exc_value, traceback):
  131. self.close()
  132. class ZipFileEntry(_FileEntry):
  133. """
  134. File-like object used to read an uncompressed entry in a ZipFile
  135. """
  136. def __init__(self, chunkingZipFile, length):
  137. _FileEntry.__init__(self, chunkingZipFile, length)
  138. self.readBytes = 0
  139. def tell(self):
  140. return self.readBytes
  141. def read(self, n=None):
  142. if n is None:
  143. n = self.length - self.readBytes
  144. if n == 0 or self.finished:
  145. return b""
  146. data = self.chunkingZipFile.fp.read(min(n, self.length - self.readBytes))
  147. self.readBytes += len(data)
  148. if self.readBytes == self.length or len(data) < n:
  149. self.finished = 1
  150. return data
  151. class DeflatedZipFileEntry(_FileEntry):
  152. """
  153. File-like object used to read a deflated entry in a ZipFile
  154. """
  155. def __init__(self, chunkingZipFile, length):
  156. _FileEntry.__init__(self, chunkingZipFile, length)
  157. self.returnedBytes = 0
  158. self.readBytes = 0
  159. self.decomp = zlib.decompressobj(-15)
  160. self.buffer = b""
  161. def tell(self):
  162. return self.returnedBytes
  163. def read(self, n=None):
  164. if self.finished:
  165. return b""
  166. if n is None:
  167. result = [
  168. self.buffer,
  169. ]
  170. result.append(
  171. self.decomp.decompress(
  172. self.chunkingZipFile.fp.read(self.length - self.readBytes)
  173. )
  174. )
  175. result.append(self.decomp.decompress(b"Z"))
  176. result.append(self.decomp.flush())
  177. self.buffer = b""
  178. self.finished = 1
  179. result = b"".join(result)
  180. self.returnedBytes += len(result)
  181. return result
  182. else:
  183. while len(self.buffer) < n:
  184. data = self.chunkingZipFile.fp.read(
  185. min(n, 1024, self.length - self.readBytes)
  186. )
  187. self.readBytes += len(data)
  188. if not data:
  189. result = (
  190. self.buffer + self.decomp.decompress(b"Z") + self.decomp.flush()
  191. )
  192. self.finished = 1
  193. self.buffer = b""
  194. self.returnedBytes += len(result)
  195. return result
  196. else:
  197. self.buffer += self.decomp.decompress(data)
  198. result = self.buffer[:n]
  199. self.buffer = self.buffer[n:]
  200. self.returnedBytes += len(result)
  201. return result
  202. DIR_BIT = 16
  203. def countZipFileChunks(filename, chunksize):
  204. """
  205. Predict the number of chunks that will be extracted from the entire
  206. zipfile, given chunksize blocks.
  207. """
  208. totalchunks = 0
  209. zf = ChunkingZipFile(filename)
  210. for info in zf.infolist():
  211. totalchunks += countFileChunks(info, chunksize)
  212. return totalchunks
  213. def countFileChunks(zipinfo, chunksize):
  214. """
  215. Count the number of chunks that will result from the given C{ZipInfo}.
  216. @param zipinfo: a C{zipfile.ZipInfo} instance describing an entry in a zip
  217. archive to be counted.
  218. @return: the number of chunks present in the zip file. (Even an empty file
  219. counts as one chunk.)
  220. @rtype: L{int}
  221. """
  222. count, extra = divmod(zipinfo.file_size, chunksize)
  223. if extra > 0:
  224. count += 1
  225. return count or 1
  226. def unzipIterChunky(filename, directory=".", overwrite=0, chunksize=4096):
  227. """
  228. Return a generator for the zipfile. This implementation will yield after
  229. every chunksize uncompressed bytes, or at the end of a file, whichever
  230. comes first.
  231. The value it yields is the number of chunks left to unzip.
  232. """
  233. czf = ChunkingZipFile(filename, "r")
  234. if not os.path.exists(directory):
  235. os.makedirs(directory)
  236. remaining = countZipFileChunks(filename, chunksize)
  237. names = czf.namelist()
  238. infos = czf.infolist()
  239. for entry, info in zip(names, infos):
  240. isdir = info.external_attr & DIR_BIT
  241. f = os.path.join(directory, entry)
  242. if isdir:
  243. # overwrite flag only applies to files
  244. if not os.path.exists(f):
  245. os.makedirs(f)
  246. remaining -= 1
  247. yield remaining
  248. else:
  249. # create the directory the file will be in first,
  250. # since we can't guarantee it exists
  251. fdir = os.path.split(f)[0]
  252. if not os.path.exists(fdir):
  253. os.makedirs(fdir)
  254. if overwrite or not os.path.exists(f):
  255. fp = czf.readfile(entry)
  256. if info.file_size == 0:
  257. remaining -= 1
  258. yield remaining
  259. with open(f, "wb") as outfile:
  260. while fp.tell() < info.file_size:
  261. hunk = fp.read(chunksize)
  262. outfile.write(hunk)
  263. remaining -= 1
  264. yield remaining
  265. else:
  266. remaining -= countFileChunks(info, chunksize)
  267. yield remaining