from __future__ import absolute_import import contextlib import errno import io import locale # we have a submodule named 'logging' which would shadow this if we used the # regular name: import logging as std_logging import os import posixpath import re import shutil import stat import subprocess import sys import tarfile import zipfile from collections import deque from pip._vendor import pkg_resources # NOTE: retrying is not annotated in typeshed as on 2017-07-17, which is # why we ignore the type on this import. from pip._vendor.retrying import retry # type: ignore from pip._vendor.six import PY2 from pip._vendor.six.moves import input from pip._vendor.six.moves.urllib import parse as urllib_parse from pip._internal.exceptions import CommandError, InstallationError from pip._internal.locations import ( running_under_virtualenv, site_packages, user_site, virtualenv_no_global, write_delete_marker_file, ) from pip._internal.utils.compat import ( WINDOWS, console_to_str, expanduser, stdlib_pkgs, ) if PY2: from io import BytesIO as StringIO else: from io import StringIO __all__ = ['rmtree', 'display_path', 'backup_dir', 'ask', 'splitext', 'format_size', 'is_installable_dir', 'is_svn_page', 'file_contents', 'split_leading_dir', 'has_leading_dir', 'normalize_path', 'renames', 'get_prog', 'unzip_file', 'untar_file', 'unpack_file', 'call_subprocess', 'captured_stdout', 'ensure_dir', 'ARCHIVE_EXTENSIONS', 'SUPPORTED_EXTENSIONS', 'get_installed_version', 'remove_auth_from_url'] logger = std_logging.getLogger(__name__) BZ2_EXTENSIONS = ('.tar.bz2', '.tbz') XZ_EXTENSIONS = ('.tar.xz', '.txz', '.tlz', '.tar.lz', '.tar.lzma') ZIP_EXTENSIONS = ('.zip', '.whl') TAR_EXTENSIONS = ('.tar.gz', '.tgz', '.tar') ARCHIVE_EXTENSIONS = ( ZIP_EXTENSIONS + BZ2_EXTENSIONS + TAR_EXTENSIONS + XZ_EXTENSIONS) SUPPORTED_EXTENSIONS = ZIP_EXTENSIONS + TAR_EXTENSIONS try: import bz2 # noqa SUPPORTED_EXTENSIONS += BZ2_EXTENSIONS except ImportError: logger.debug('bz2 module is not available') try: # Only for Python 3.3+ import lzma # noqa SUPPORTED_EXTENSIONS += XZ_EXTENSIONS except ImportError: logger.debug('lzma module is not available') def import_or_raise(pkg_or_module_string, ExceptionType, *args, **kwargs): try: return __import__(pkg_or_module_string) except ImportError: raise ExceptionType(*args, **kwargs) def ensure_dir(path): """os.path.makedirs without EEXIST.""" try: os.makedirs(path) except OSError as e: if e.errno != errno.EEXIST: raise def get_prog(): try: prog = os.path.basename(sys.argv[0]) if prog in ('__main__.py', '-c'): return "%s -m pip" % sys.executable else: return prog except (AttributeError, TypeError, IndexError): pass return 'pip' # Retry every half second for up to 3 seconds @retry(stop_max_delay=3000, wait_fixed=500) def rmtree(dir, ignore_errors=False): shutil.rmtree(dir, ignore_errors=ignore_errors, onerror=rmtree_errorhandler) def rmtree_errorhandler(func, path, exc_info): """On Windows, the files in .svn are read-only, so when rmtree() tries to remove them, an exception is thrown. We catch that here, remove the read-only attribute, and hopefully continue without problems.""" # if file type currently read only if os.stat(path).st_mode & stat.S_IREAD: # convert to read/write os.chmod(path, stat.S_IWRITE) # use the original function to repeat the operation func(path) return else: raise def display_path(path): """Gives the display value for a given path, making it relative to cwd if possible.""" path = os.path.normcase(os.path.abspath(path)) if sys.version_info[0] == 2: path = path.decode(sys.getfilesystemencoding(), 'replace') path = path.encode(sys.getdefaultencoding(), 'replace') if path.startswith(os.getcwd() + os.path.sep): path = '.' + path[len(os.getcwd()):] return path def backup_dir(dir, ext='.bak'): """Figure out the name of a directory to back up the given dir to (adding .bak, .bak2, etc)""" n = 1 extension = ext while os.path.exists(dir + extension): n += 1 extension = ext + str(n) return dir + extension def ask_path_exists(message, options): for action in os.environ.get('PIP_EXISTS_ACTION', '').split(): if action in options: return action return ask(message, options) def ask(message, options): """Ask the message interactively, with the given possible responses""" while 1: if os.environ.get('PIP_NO_INPUT'): raise Exception( 'No input was expected ($PIP_NO_INPUT set); question: %s' % message ) response = input(message) response = response.strip().lower() if response not in options: print( 'Your response (%r) was not one of the expected responses: ' '%s' % (response, ', '.join(options)) ) else: return response def format_size(bytes): if bytes > 1000 * 1000: return '%.1fMB' % (bytes / 1000.0 / 1000) elif bytes > 10 * 1000: return '%ikB' % (bytes / 1000) elif bytes > 1000: return '%.1fkB' % (bytes / 1000.0) else: return '%ibytes' % bytes def is_installable_dir(path): """Is path is a directory containing setup.py or pyproject.toml? """ if not os.path.isdir(path): return False setup_py = os.path.join(path, 'setup.py') if os.path.isfile(setup_py): return True pyproject_toml = os.path.join(path, 'pyproject.toml') if os.path.isfile(pyproject_toml): return True return False def is_svn_page(html): """ Returns true if the page appears to be the index page of an svn repository """ return (re.search(r'[^<]*Revision \d+:', html) and re.search(r'Powered by (?:<a[^>]*?>)?Subversion', html, re.I)) def file_contents(filename): with open(filename, 'rb') as fp: return fp.read().decode('utf-8') def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE): """Yield pieces of data from a file-like object until EOF.""" while True: chunk = file.read(size) if not chunk: break yield chunk def split_leading_dir(path): path = path.lstrip('/').lstrip('\\') if '/' in path and (('\\' in path and path.find('/') < path.find('\\')) or '\\' not in path): return path.split('/', 1) elif '\\' in path: return path.split('\\', 1) else: return path, '' def has_leading_dir(paths): """Returns true if all the paths have the same leading path name (i.e., everything is in one subdirectory in an archive)""" common_prefix = None for path in paths: prefix, rest = split_leading_dir(path) if not prefix: return False elif common_prefix is None: common_prefix = prefix elif prefix != common_prefix: return False return True def normalize_path(path, resolve_symlinks=True): """ Convert a path to its canonical, case-normalized, absolute version. """ path = expanduser(path) if resolve_symlinks: path = os.path.realpath(path) else: path = os.path.abspath(path) return os.path.normcase(path) def splitext(path): """Like os.path.splitext, but take off .tar too""" base, ext = posixpath.splitext(path) if base.lower().endswith('.tar'): ext = base[-4:] + ext base = base[:-4] return base, ext def renames(old, new): """Like os.renames(), but handles renaming across devices.""" # Implementation borrowed from os.renames(). head, tail = os.path.split(new) if head and tail and not os.path.exists(head): os.makedirs(head) shutil.move(old, new) head, tail = os.path.split(old) if head and tail: try: os.removedirs(head) except OSError: pass def is_local(path): """ Return True if path is within sys.prefix, if we're running in a virtualenv. If we're not in a virtualenv, all paths are considered "local." """ if not running_under_virtualenv(): return True return normalize_path(path).startswith(normalize_path(sys.prefix)) def dist_is_local(dist): """ Return True if given Distribution object is installed locally (i.e. within current virtualenv). Always True if we're not in a virtualenv. """ return is_local(dist_location(dist)) def dist_in_usersite(dist): """ Return True if given Distribution is installed in user site. """ norm_path = normalize_path(dist_location(dist)) return norm_path.startswith(normalize_path(user_site)) def dist_in_site_packages(dist): """ Return True if given Distribution is installed in sysconfig.get_python_lib(). """ return normalize_path( dist_location(dist) ).startswith(normalize_path(site_packages)) def dist_is_editable(dist): """Is distribution an editable install?""" for path_item in sys.path: egg_link = os.path.join(path_item, dist.project_name + '.egg-link') if os.path.isfile(egg_link): return True return False def get_installed_distributions(local_only=True, skip=stdlib_pkgs, include_editables=True, editables_only=False, user_only=False): """ Return a list of installed Distribution objects. If ``local_only`` is True (default), only return installations local to the current virtualenv, if in a virtualenv. ``skip`` argument is an iterable of lower-case project names to ignore; defaults to stdlib_pkgs If ``include_editables`` is False, don't report editables. If ``editables_only`` is True , only report editables. If ``user_only`` is True , only report installations in the user site directory. """ if local_only: local_test = dist_is_local else: def local_test(d): return True if include_editables: def editable_test(d): return True else: def editable_test(d): return not dist_is_editable(d) if editables_only: def editables_only_test(d): return dist_is_editable(d) else: def editables_only_test(d): return True if user_only: user_test = dist_in_usersite else: def user_test(d): return True return [d for d in pkg_resources.working_set if local_test(d) and d.key not in skip and editable_test(d) and editables_only_test(d) and user_test(d) ] def egg_link_path(dist): """ Return the path for the .egg-link file if it exists, otherwise, None. There's 3 scenarios: 1) not in a virtualenv try to find in site.USER_SITE, then site_packages 2) in a no-global virtualenv try to find in site_packages 3) in a yes-global virtualenv try to find in site_packages, then site.USER_SITE (don't look in global location) For #1 and #3, there could be odd cases, where there's an egg-link in 2 locations. This method will just return the first one found. """ sites = [] if running_under_virtualenv(): if virtualenv_no_global(): sites.append(site_packages) else: sites.append(site_packages) if user_site: sites.append(user_site) else: if user_site: sites.append(user_site) sites.append(site_packages) for site in sites: egglink = os.path.join(site, dist.project_name) + '.egg-link' if os.path.isfile(egglink): return egglink def dist_location(dist): """ Get the site-packages location of this distribution. Generally this is dist.location, except in the case of develop-installed packages, where dist.location is the source code location, and we want to know where the egg-link file is. """ egg_link = egg_link_path(dist) if egg_link: return egg_link return dist.location def current_umask(): """Get the current umask which involves having to set it temporarily.""" mask = os.umask(0) os.umask(mask) return mask def unzip_file(filename, location, flatten=True): """ Unzip the file (with path `filename`) to the destination `location`. All files are written based on system defaults and umask (i.e. permissions are not preserved), except that regular file members with any execute permissions (user, group, or world) have "chmod +x" applied after being written. Note that for windows, any execute changes using os.chmod are no-ops per the python docs. """ ensure_dir(location) zipfp = open(filename, 'rb') try: zip = zipfile.ZipFile(zipfp, allowZip64=True) leading = has_leading_dir(zip.namelist()) and flatten for info in zip.infolist(): name = info.filename data = zip.read(name) fn = name if leading: fn = split_leading_dir(name)[1] fn = os.path.join(location, fn) dir = os.path.dirname(fn) if fn.endswith('/') or fn.endswith('\\'): # A directory ensure_dir(fn) else: ensure_dir(dir) fp = open(fn, 'wb') try: fp.write(data) finally: fp.close() mode = info.external_attr >> 16 # if mode and regular file and any execute permissions for # user/group/world? if mode and stat.S_ISREG(mode) and mode & 0o111: # make dest file have execute for user/group/world # (chmod +x) no-op on windows per python docs os.chmod(fn, (0o777 - current_umask() | 0o111)) finally: zipfp.close() def untar_file(filename, location): """ Untar the file (with path `filename`) to the destination `location`. All files are written based on system defaults and umask (i.e. permissions are not preserved), except that regular file members with any execute permissions (user, group, or world) have "chmod +x" applied after being written. Note that for windows, any execute changes using os.chmod are no-ops per the python docs. """ ensure_dir(location) if filename.lower().endswith('.gz') or filename.lower().endswith('.tgz'): mode = 'r:gz' elif filename.lower().endswith(BZ2_EXTENSIONS): mode = 'r:bz2' elif filename.lower().endswith(XZ_EXTENSIONS): mode = 'r:xz' elif filename.lower().endswith('.tar'): mode = 'r' else: logger.warning( 'Cannot determine compression type for file %s', filename, ) mode = 'r:*' tar = tarfile.open(filename, mode) try: # note: python<=2.5 doesn't seem to know about pax headers, filter them leading = has_leading_dir([ member.name for member in tar.getmembers() if member.name != 'pax_global_header' ]) for member in tar.getmembers(): fn = member.name if fn == 'pax_global_header': continue if leading: fn = split_leading_dir(fn)[1] path = os.path.join(location, fn) if member.isdir(): ensure_dir(path) elif member.issym(): try: tar._extract_member(member, path) except Exception as exc: # Some corrupt tar files seem to produce this # (specifically bad symlinks) logger.warning( 'In the tar file %s the member %s is invalid: %s', filename, member.name, exc, ) continue else: try: fp = tar.extractfile(member) except (KeyError, AttributeError) as exc: # Some corrupt tar files seem to produce this # (specifically bad symlinks) logger.warning( 'In the tar file %s the member %s is invalid: %s', filename, member.name, exc, ) continue ensure_dir(os.path.dirname(path)) with open(path, 'wb') as destfp: shutil.copyfileobj(fp, destfp) fp.close() # Update the timestamp (useful for cython compiled files) tar.utime(member, path) # member have any execute permissions for user/group/world? if member.mode & 0o111: # make dest file have execute for user/group/world # no-op on windows per python docs os.chmod(path, (0o777 - current_umask() | 0o111)) finally: tar.close() def unpack_file(filename, location, content_type, link): filename = os.path.realpath(filename) if (content_type == 'application/zip' or filename.lower().endswith(ZIP_EXTENSIONS) or zipfile.is_zipfile(filename)): unzip_file( filename, location, flatten=not filename.endswith('.whl') ) elif (content_type == 'application/x-gzip' or tarfile.is_tarfile(filename) or filename.lower().endswith( TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS)): untar_file(filename, location) elif (content_type and content_type.startswith('text/html') and is_svn_page(file_contents(filename))): # We don't really care about this from pip._internal.vcs.subversion import Subversion Subversion('svn+' + link.url).unpack(location) else: # FIXME: handle? # FIXME: magic signatures? logger.critical( 'Cannot unpack file %s (downloaded from %s, content-type: %s); ' 'cannot detect archive format', filename, location, content_type, ) raise InstallationError( 'Cannot determine archive format of %s' % location ) def call_subprocess(cmd, show_stdout=True, cwd=None, on_returncode='raise', command_desc=None, extra_environ=None, unset_environ=None, spinner=None): """ Args: unset_environ: an iterable of environment variable names to unset prior to calling subprocess.Popen(). """ if unset_environ is None: unset_environ = [] # This function's handling of subprocess output is confusing and I # previously broke it terribly, so as penance I will write a long comment # explaining things. # # The obvious thing that affects output is the show_stdout= # kwarg. show_stdout=True means, let the subprocess write directly to our # stdout. Even though it is nominally the default, it is almost never used # inside pip (and should not be used in new code without a very good # reason); as of 2016-02-22 it is only used in a few places inside the VCS # wrapper code. Ideally we should get rid of it entirely, because it # creates a lot of complexity here for a rarely used feature. # # Most places in pip set show_stdout=False. What this means is: # - We connect the child stdout to a pipe, which we read. # - By default, we hide the output but show a spinner -- unless the # subprocess exits with an error, in which case we show the output. # - If the --verbose option was passed (= loglevel is DEBUG), then we show # the output unconditionally. (But in this case we don't want to show # the output a second time if it turns out that there was an error.) # # stderr is always merged with stdout (even if show_stdout=True). if show_stdout: stdout = None else: stdout = subprocess.PIPE if command_desc is None: cmd_parts = [] for part in cmd: if ' ' in part or '\n' in part or '"' in part or "'" in part: part = '"%s"' % part.replace('"', '\\"') cmd_parts.append(part) command_desc = ' '.join(cmd_parts) logger.debug("Running command %s", command_desc) env = os.environ.copy() if extra_environ: env.update(extra_environ) for name in unset_environ: env.pop(name, None) try: proc = subprocess.Popen( cmd, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, stdout=stdout, cwd=cwd, env=env, ) proc.stdin.close() except Exception as exc: logger.critical( "Error %s while executing command %s", exc, command_desc, ) raise all_output = [] if stdout is not None: while True: line = console_to_str(proc.stdout.readline()) if not line: break line = line.rstrip() all_output.append(line + '\n') if logger.getEffectiveLevel() <= std_logging.DEBUG: # Show the line immediately logger.debug(line) else: # Update the spinner if spinner is not None: spinner.spin() try: proc.wait() finally: if proc.stdout: proc.stdout.close() if spinner is not None: if proc.returncode: spinner.finish("error") else: spinner.finish("done") if proc.returncode: if on_returncode == 'raise': if (logger.getEffectiveLevel() > std_logging.DEBUG and not show_stdout): logger.info( 'Complete output from command %s:', command_desc, ) logger.info( ''.join(all_output) + '\n----------------------------------------' ) raise InstallationError( 'Command "%s" failed with error code %s in %s' % (command_desc, proc.returncode, cwd)) elif on_returncode == 'warn': logger.warning( 'Command "%s" had error code %s in %s', command_desc, proc.returncode, cwd, ) elif on_returncode == 'ignore': pass else: raise ValueError('Invalid value: on_returncode=%s' % repr(on_returncode)) if not show_stdout: return ''.join(all_output) def read_text_file(filename): """Return the contents of *filename*. Try to decode the file contents with utf-8, the preferred system encoding (e.g., cp1252 on some Windows machines), and latin1, in that order. Decoding a byte string with latin1 will never raise an error. In the worst case, the returned string will contain some garbage characters. """ with open(filename, 'rb') as fp: data = fp.read() encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1'] for enc in encodings: try: data = data.decode(enc) except UnicodeDecodeError: continue break assert type(data) != bytes # Latin1 should have worked. return data def _make_build_dir(build_dir): os.makedirs(build_dir) write_delete_marker_file(build_dir) class FakeFile(object): """Wrap a list of lines in an object with readline() to make ConfigParser happy.""" def __init__(self, lines): self._gen = (l for l in lines) def readline(self): try: try: return next(self._gen) except NameError: return self._gen.next() except StopIteration: return '' def __iter__(self): return self._gen class StreamWrapper(StringIO): @classmethod def from_stream(cls, orig_stream): cls.orig_stream = orig_stream return cls() # compileall.compile_dir() needs stdout.encoding to print to stdout @property def encoding(self): return self.orig_stream.encoding @contextlib.contextmanager def captured_output(stream_name): """Return a context manager used by captured_stdout/stdin/stderr that temporarily replaces the sys stream *stream_name* with a StringIO. Taken from Lib/support/__init__.py in the CPython repo. """ orig_stdout = getattr(sys, stream_name) setattr(sys, stream_name, StreamWrapper.from_stream(orig_stdout)) try: yield getattr(sys, stream_name) finally: setattr(sys, stream_name, orig_stdout) def captured_stdout(): """Capture the output of sys.stdout: with captured_stdout() as stdout: print('hello') self.assertEqual(stdout.getvalue(), 'hello\n') Taken from Lib/support/__init__.py in the CPython repo. """ return captured_output('stdout') class cached_property(object): """A property that is only computed once per instance and then replaces itself with an ordinary attribute. Deleting the attribute resets the property. Source: https://github.com/bottlepy/bottle/blob/0.11.5/bottle.py#L175 """ def __init__(self, func): self.__doc__ = getattr(func, '__doc__') self.func = func def __get__(self, obj, cls): if obj is None: # We're being accessed from the class itself, not from an object return self value = obj.__dict__[self.func.__name__] = self.func(obj) return value def get_installed_version(dist_name, working_set=None): """Get the installed version of dist_name avoiding pkg_resources cache""" # Create a requirement that we'll look for inside of setuptools. req = pkg_resources.Requirement.parse(dist_name) if working_set is None: # We want to avoid having this cached, so we need to construct a new # working set each time. working_set = pkg_resources.WorkingSet() # Get the installed distribution from our working set dist = working_set.find(req) # Check to see if we got an installed distribution or not, if we did # we want to return it's version. return dist.version if dist else None def consume(iterator): """Consume an iterable at C speed.""" deque(iterator, maxlen=0) # Simulates an enum def enum(*sequential, **named): enums = dict(zip(sequential, range(len(sequential))), **named) reverse = {value: key for key, value in enums.items()} enums['reverse_mapping'] = reverse return type('Enum', (), enums) def make_vcs_requirement_url(repo_url, rev, egg_project_name, subdir=None): """ Return the URL for a VCS requirement. Args: repo_url: the remote VCS url, with any needed VCS prefix (e.g. "git+"). """ req = '{}@{}#egg={}'.format(repo_url, rev, egg_project_name) if subdir: req += '&subdirectory={}'.format(subdir) return req def split_auth_from_netloc(netloc): """ Parse out and remove the auth information from a netloc. Returns: (netloc, (username, password)). """ if '@' not in netloc: return netloc, (None, None) # Split from the right because that's how urllib.parse.urlsplit() # behaves if more than one @ is present (which can be checked using # the password attribute of urlsplit()'s return value). auth, netloc = netloc.rsplit('@', 1) if ':' in auth: # Split from the left because that's how urllib.parse.urlsplit() # behaves if more than one : is present (which again can be checked # using the password attribute of the return value) user_pass = tuple(auth.split(':', 1)) else: user_pass = auth, None return netloc, user_pass def remove_auth_from_url(url): # Return a copy of url with 'username:password@' removed. # username/pass params are passed to subversion through flags # and are not recognized in the url. # parsed url purl = urllib_parse.urlsplit(url) netloc, user_pass = split_auth_from_netloc(purl.netloc) # stripped url url_pieces = ( purl.scheme, netloc, purl.path, purl.query, purl.fragment ) surl = urllib_parse.urlunsplit(url_pieces) return surl def protect_pip_from_modification_on_windows(modifying_pip): """Protection of pip.exe from modification on Windows On Windows, any operation modifying pip should be run as: python -m pip ... """ pip_names = [ "pip.exe", "pip{}.exe".format(sys.version_info[0]), "pip{}.{}.exe".format(*sys.version_info[:2]) ] # See https://github.com/pypa/pip/issues/1299 for more discussion should_show_use_python_msg = ( modifying_pip and WINDOWS and os.path.basename(sys.argv[0]) in pip_names ) if should_show_use_python_msg: new_command = [ sys.executable, "-m", "pip" ] + sys.argv[1:] raise CommandError( 'To modify pip, please run the following command:\n{}' .format(" ".join(new_command)) )