You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

unpacking.py 9.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. """Utilities related archives.
  2. """
  3. # The following comment should be removed at some point in the future.
  4. # mypy: strict-optional=False
  5. # mypy: disallow-untyped-defs=False
  6. from __future__ import absolute_import
  7. import logging
  8. import os
  9. import shutil
  10. import stat
  11. import tarfile
  12. import zipfile
  13. from pip._internal.exceptions import InstallationError
  14. from pip._internal.utils.filetypes import (
  15. BZ2_EXTENSIONS,
  16. TAR_EXTENSIONS,
  17. XZ_EXTENSIONS,
  18. ZIP_EXTENSIONS,
  19. )
  20. from pip._internal.utils.misc import ensure_dir
  21. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  22. if MYPY_CHECK_RUNNING:
  23. from typing import Iterable, List, Optional, Text, Union
  24. logger = logging.getLogger(__name__)
  25. SUPPORTED_EXTENSIONS = ZIP_EXTENSIONS + TAR_EXTENSIONS
  26. try:
  27. import bz2 # noqa
  28. SUPPORTED_EXTENSIONS += BZ2_EXTENSIONS
  29. except ImportError:
  30. logger.debug('bz2 module is not available')
  31. try:
  32. # Only for Python 3.3+
  33. import lzma # noqa
  34. SUPPORTED_EXTENSIONS += XZ_EXTENSIONS
  35. except ImportError:
  36. logger.debug('lzma module is not available')
  37. def current_umask():
  38. """Get the current umask which involves having to set it temporarily."""
  39. mask = os.umask(0)
  40. os.umask(mask)
  41. return mask
  42. def split_leading_dir(path):
  43. # type: (Union[str, Text]) -> List[Union[str, Text]]
  44. path = path.lstrip('/').lstrip('\\')
  45. if (
  46. '/' in path and (
  47. ('\\' in path and path.find('/') < path.find('\\')) or
  48. '\\' not in path
  49. )
  50. ):
  51. return path.split('/', 1)
  52. elif '\\' in path:
  53. return path.split('\\', 1)
  54. else:
  55. return [path, '']
  56. def has_leading_dir(paths):
  57. # type: (Iterable[Union[str, Text]]) -> bool
  58. """Returns true if all the paths have the same leading path name
  59. (i.e., everything is in one subdirectory in an archive)"""
  60. common_prefix = None
  61. for path in paths:
  62. prefix, rest = split_leading_dir(path)
  63. if not prefix:
  64. return False
  65. elif common_prefix is None:
  66. common_prefix = prefix
  67. elif prefix != common_prefix:
  68. return False
  69. return True
  70. def is_within_directory(directory, target):
  71. # type: ((Union[str, Text]), (Union[str, Text])) -> bool
  72. """
  73. Return true if the absolute path of target is within the directory
  74. """
  75. abs_directory = os.path.abspath(directory)
  76. abs_target = os.path.abspath(target)
  77. prefix = os.path.commonprefix([abs_directory, abs_target])
  78. return prefix == abs_directory
  79. def unzip_file(filename, location, flatten=True):
  80. # type: (str, str, bool) -> None
  81. """
  82. Unzip the file (with path `filename`) to the destination `location`. All
  83. files are written based on system defaults and umask (i.e. permissions are
  84. not preserved), except that regular file members with any execute
  85. permissions (user, group, or world) have "chmod +x" applied after being
  86. written. Note that for windows, any execute changes using os.chmod are
  87. no-ops per the python docs.
  88. """
  89. ensure_dir(location)
  90. zipfp = open(filename, 'rb')
  91. try:
  92. zip = zipfile.ZipFile(zipfp, allowZip64=True)
  93. leading = has_leading_dir(zip.namelist()) and flatten
  94. for info in zip.infolist():
  95. name = info.filename
  96. fn = name
  97. if leading:
  98. fn = split_leading_dir(name)[1]
  99. fn = os.path.join(location, fn)
  100. dir = os.path.dirname(fn)
  101. if not is_within_directory(location, fn):
  102. message = (
  103. 'The zip file ({}) has a file ({}) trying to install '
  104. 'outside target directory ({})'
  105. )
  106. raise InstallationError(message.format(filename, fn, location))
  107. if fn.endswith('/') or fn.endswith('\\'):
  108. # A directory
  109. ensure_dir(fn)
  110. else:
  111. ensure_dir(dir)
  112. # Don't use read() to avoid allocating an arbitrarily large
  113. # chunk of memory for the file's content
  114. fp = zip.open(name)
  115. try:
  116. with open(fn, 'wb') as destfp:
  117. shutil.copyfileobj(fp, destfp)
  118. finally:
  119. fp.close()
  120. mode = info.external_attr >> 16
  121. # if mode and regular file and any execute permissions for
  122. # user/group/world?
  123. if mode and stat.S_ISREG(mode) and mode & 0o111:
  124. # make dest file have execute for user/group/world
  125. # (chmod +x) no-op on windows per python docs
  126. os.chmod(fn, (0o777 - current_umask() | 0o111))
  127. finally:
  128. zipfp.close()
  129. def untar_file(filename, location):
  130. # type: (str, str) -> None
  131. """
  132. Untar the file (with path `filename`) to the destination `location`.
  133. All files are written based on system defaults and umask (i.e. permissions
  134. are not preserved), except that regular file members with any execute
  135. permissions (user, group, or world) have "chmod +x" applied after being
  136. written. Note that for windows, any execute changes using os.chmod are
  137. no-ops per the python docs.
  138. """
  139. ensure_dir(location)
  140. if filename.lower().endswith('.gz') or filename.lower().endswith('.tgz'):
  141. mode = 'r:gz'
  142. elif filename.lower().endswith(BZ2_EXTENSIONS):
  143. mode = 'r:bz2'
  144. elif filename.lower().endswith(XZ_EXTENSIONS):
  145. mode = 'r:xz'
  146. elif filename.lower().endswith('.tar'):
  147. mode = 'r'
  148. else:
  149. logger.warning(
  150. 'Cannot determine compression type for file %s', filename,
  151. )
  152. mode = 'r:*'
  153. tar = tarfile.open(filename, mode)
  154. try:
  155. leading = has_leading_dir([
  156. member.name for member in tar.getmembers()
  157. ])
  158. for member in tar.getmembers():
  159. fn = member.name
  160. if leading:
  161. # https://github.com/python/mypy/issues/1174
  162. fn = split_leading_dir(fn)[1] # type: ignore
  163. path = os.path.join(location, fn)
  164. if not is_within_directory(location, path):
  165. message = (
  166. 'The tar file ({}) has a file ({}) trying to install '
  167. 'outside target directory ({})'
  168. )
  169. raise InstallationError(
  170. message.format(filename, path, location)
  171. )
  172. if member.isdir():
  173. ensure_dir(path)
  174. elif member.issym():
  175. try:
  176. # https://github.com/python/typeshed/issues/2673
  177. tar._extract_member(member, path) # type: ignore
  178. except Exception as exc:
  179. # Some corrupt tar files seem to produce this
  180. # (specifically bad symlinks)
  181. logger.warning(
  182. 'In the tar file %s the member %s is invalid: %s',
  183. filename, member.name, exc,
  184. )
  185. continue
  186. else:
  187. try:
  188. fp = tar.extractfile(member)
  189. except (KeyError, AttributeError) as exc:
  190. # Some corrupt tar files seem to produce this
  191. # (specifically bad symlinks)
  192. logger.warning(
  193. 'In the tar file %s the member %s is invalid: %s',
  194. filename, member.name, exc,
  195. )
  196. continue
  197. ensure_dir(os.path.dirname(path))
  198. with open(path, 'wb') as destfp:
  199. shutil.copyfileobj(fp, destfp)
  200. fp.close()
  201. # Update the timestamp (useful for cython compiled files)
  202. # https://github.com/python/typeshed/issues/2673
  203. tar.utime(member, path) # type: ignore
  204. # member have any execute permissions for user/group/world?
  205. if member.mode & 0o111:
  206. # make dest file have execute for user/group/world
  207. # no-op on windows per python docs
  208. os.chmod(path, (0o777 - current_umask() | 0o111))
  209. finally:
  210. tar.close()
  211. def unpack_file(
  212. filename, # type: str
  213. location, # type: str
  214. content_type=None, # type: Optional[str]
  215. ):
  216. # type: (...) -> None
  217. filename = os.path.realpath(filename)
  218. if (
  219. content_type == 'application/zip' or
  220. filename.lower().endswith(ZIP_EXTENSIONS) or
  221. zipfile.is_zipfile(filename)
  222. ):
  223. unzip_file(
  224. filename,
  225. location,
  226. flatten=not filename.endswith('.whl')
  227. )
  228. elif (
  229. content_type == 'application/x-gzip' or
  230. tarfile.is_tarfile(filename) or
  231. filename.lower().endswith(
  232. TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS
  233. )
  234. ):
  235. untar_file(filename, location)
  236. else:
  237. # FIXME: handle?
  238. # FIXME: magic signatures?
  239. logger.critical(
  240. 'Cannot unpack file %s (downloaded from %s, content-type: %s); '
  241. 'cannot detect archive format',
  242. filename, location, content_type,
  243. )
  244. raise InstallationError(
  245. 'Cannot determine archive format of {}'.format(location)
  246. )