wheels.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. """Find, download and unpack wheels."""
  2. import fnmatch
  3. import hashlib
  4. import itertools
  5. import logging
  6. import glob
  7. import os
  8. import re
  9. import shutil
  10. import yarg
  11. import zipfile
  12. from pathlib import Path
  13. from requests_download import download, HashTracker
  14. from tempfile import mkdtemp
  15. from .util import get_cache_dir, normalize_path
  16. logger = logging.getLogger(__name__)
  17. class NoWheelError(Exception): pass
  18. class CompatibilityScorer:
  19. """Score wheels for a given target platform
  20. 0 for any score means incompatible.
  21. Higher numbers are more platform specific.
  22. """
  23. def __init__(self, py_version, platform):
  24. self.py_version = py_version
  25. py_version_tuple = tuple(map(int, py_version.split('.')[:2]))
  26. self.platform = platform
  27. # {('cp38', 'none', 'any'): N} (higher N for more specific tags)
  28. self.tag_prio = {
  29. tag: i for i, tag in enumerate(reversed(
  30. list(compatible_tags(py_version_tuple, platform))
  31. ), start=1)
  32. }
  33. def score(self, whl_filename: str) -> int:
  34. """Return a number for how suitable a wheel is for the target Python
  35. Higher numbers mean more specific (preferred) tags. 0 -> incompatible.
  36. """
  37. m = re.search(r'-([^-]+)-([^-]+)-([^-]+)\.whl$', whl_filename)
  38. if not m:
  39. raise ValueError("Failed to find wheel tag in %r" % whl_filename)
  40. interpreter, abi, platform = m.group(1, 2, 3)
  41. # Expand compressed tags ('cp38.cp39' indicates compatibility w/ both)
  42. expanded_tags = itertools.product(
  43. interpreter.split('.'), abi.split('.'), platform.split('.')
  44. )
  45. return max(self.tag_prio.get(whl_tag, 0) for whl_tag in expanded_tags)
  46. def is_compatible(self, whl_filename: str) -> bool:
  47. return self.score(whl_filename) > 0
  48. class WheelLocator(object):
  49. def __init__(self, requirement, scorer, extra_sources=None):
  50. self.requirement = requirement
  51. self.scorer = scorer
  52. self.extra_sources = extra_sources or []
  53. if requirement.count('==') != 1:
  54. raise ValueError("Requirement {!r} did not match name==version".format(requirement))
  55. self.name, self.version = requirement.split('==', 1)
  56. def pick_best_wheel(self, release_list):
  57. """Return the most specific compatible wheel
  58. Returns None if none of the supplied
  59. """
  60. best_score = 0
  61. best = None
  62. for release in release_list:
  63. if release.package_type != 'wheel':
  64. continue
  65. score = self.scorer.score(release.filename)
  66. if score == 0:
  67. # Incompatible
  68. continue
  69. if score > best_score:
  70. best = release
  71. best_score = score
  72. return best
  73. def check_extra_sources(self):
  74. """Find a compatible wheel in the specified extra_sources directories.
  75. Returns a Path or None.
  76. """
  77. whl_filename_prefix = '{name}-{version}-'.format(
  78. name=re.sub(r'[^\w\d.]+', '_', self.name),
  79. version=re.sub(r'[^\w\d.]+', '_', self.version),
  80. )
  81. for source in self.extra_sources:
  82. candidates = [CachedRelease(p.name)
  83. for p in source.iterdir()
  84. if p.name.startswith(whl_filename_prefix)]
  85. rel = self.pick_best_wheel(candidates)
  86. if rel:
  87. path = source / rel.filename
  88. return path
  89. def check_cache(self):
  90. """Find a wheel previously downloaded from PyPI in the cache.
  91. Returns a Path or None.
  92. """
  93. release_dir = get_cache_dir() / 'pypi' / self.name / self.version
  94. if not release_dir.is_dir():
  95. return None
  96. rel = self.pick_best_wheel(CachedRelease(p.name)
  97. for p in release_dir.iterdir())
  98. if rel is None:
  99. return None
  100. return release_dir / rel.filename
  101. def get_from_pypi(self):
  102. """Download a compatible wheel from PyPI.
  103. Downloads to the cache directory and returns the destination as a Path.
  104. Raises NoWheelError if no compatible wheel is found.
  105. """
  106. try:
  107. pypi_pkg = yarg.get(self.name)
  108. except yarg.HTTPError as e:
  109. if e.status_code == 404:
  110. raise NoWheelError("No package named {} found on PyPI".format(self.name))
  111. raise
  112. release_list = pypi_pkg.release(self.version)
  113. if release_list is None:
  114. raise NoWheelError("No release {0.version} for package {0.name}".format(self))
  115. preferred_release = self.pick_best_wheel(release_list)
  116. if preferred_release is None:
  117. raise NoWheelError('No compatible wheels found for {0.name} {0.version}'.format(self))
  118. download_to = get_cache_dir() / 'pypi' / self.name / self.version
  119. try:
  120. download_to.mkdir(parents=True)
  121. except OSError:
  122. # Ignore OSError only if the directory exists
  123. if not download_to.is_dir():
  124. raise
  125. target = download_to / preferred_release.filename
  126. from . import __version__
  127. hasher = HashTracker(hashlib.md5())
  128. headers = {'user-agent': 'pynsist/'+__version__}
  129. logger.info('Downloading wheel: %s', preferred_release.url)
  130. download(preferred_release.url, str(target), headers=headers,
  131. trackers=(hasher,))
  132. if hasher.hashobj.hexdigest() != preferred_release.md5_digest:
  133. target.unlink()
  134. raise ValueError('Downloaded wheel corrupted: {}'.format(preferred_release.url))
  135. return target
  136. def fetch(self):
  137. """Find and return a compatible wheel (main interface)"""
  138. p = self.check_extra_sources()
  139. if p is not None:
  140. logger.info('Using wheel from extra directory: %s', p)
  141. return p
  142. p = self.check_cache()
  143. if p is not None:
  144. logger.info('Using cached wheel: %s', p)
  145. return p
  146. return self.get_from_pypi()
  147. class CachedRelease(object):
  148. # Mock enough of the yarg Release object to be compatible with
  149. # pick_best_release above
  150. def __init__(self, filename):
  151. self.filename = filename
  152. self.package_type = 'wheel' if filename.endswith('.whl') else ''
  153. def merge_dir_to(src, dst):
  154. """Merge all files from one directory into another.
  155. Subdirectories will be merged recursively. If filenames are the same, those
  156. from src will overwrite those in dst. If a regular file clashes with a
  157. directory, an error will occur.
  158. """
  159. for p in src.iterdir():
  160. if p.is_dir():
  161. dst_p = dst / p.name
  162. if dst_p.is_dir():
  163. merge_dir_to(p, dst_p)
  164. elif dst_p.is_file():
  165. raise RuntimeError('Directory {} clashes with file {}'
  166. .format(p, dst_p))
  167. else:
  168. shutil.copytree(str(p), str(dst_p))
  169. else:
  170. # Copy regular file
  171. dst_p = dst / p.name
  172. if dst_p.is_dir():
  173. raise RuntimeError('File {} clashes with directory {}'
  174. .format(p, dst_p))
  175. shutil.copy2(str(p), str(dst_p))
  176. def extract_wheel(whl_file, target_dir, exclude=None):
  177. """Extract importable modules from a wheel to the target directory
  178. """
  179. # Extract to temporary directory
  180. td = Path(mkdtemp())
  181. with zipfile.ZipFile(str(whl_file), mode='r') as zf:
  182. if exclude:
  183. exclude_regexen = make_exclude_regexen(exclude)
  184. for zpath in zf.namelist():
  185. if is_excluded('pkgs/' + zpath, exclude_regexen):
  186. continue # Skip excluded paths
  187. zf.extract(zpath, path=str(td))
  188. else:
  189. zf.extractall(str(td))
  190. # Move extra lib files out of the .data subdirectory
  191. for p in td.iterdir():
  192. if p.suffix == '.data':
  193. if (p / 'purelib').is_dir():
  194. merge_dir_to(p / 'purelib', td)
  195. if (p / 'platlib').is_dir():
  196. merge_dir_to(p / 'platlib', td)
  197. # HACK: Some wheels from Christoph Gohlke's page have extra package
  198. # files added in data/Lib/site-packages. This is a trick that relies
  199. # on the default installation layout. It doesn't look like it will
  200. # change, so in the best tradition of packaging, we'll work around
  201. # the workaround.
  202. # https://github.com/takluyver/pynsist/issues/171
  203. # This is especially ugly because we do a case-insensitive match,
  204. # regardless of the filesystem.
  205. if (p / 'data').is_dir():
  206. for sd in (p / 'data').iterdir():
  207. if sd.name.lower() == 'lib' and sd.is_dir():
  208. for sd2 in sd.iterdir():
  209. if sd2.name.lower() == 'site-packages' and sd2.is_dir():
  210. merge_dir_to(sd2, td)
  211. # Copy to target directory
  212. target = Path(target_dir)
  213. copied_something = False
  214. for p in td.iterdir():
  215. if p.suffix not in {'.data'}:
  216. if p.is_dir():
  217. # If the dst directory already exists, this will combine them.
  218. # shutil.copytree will not combine them.
  219. try:
  220. target.joinpath(p.name).mkdir()
  221. except OSError:
  222. if not target.joinpath(p.name).is_dir():
  223. raise
  224. merge_dir_to(p, target / p.name)
  225. else:
  226. shutil.copy2(str(p), str(target))
  227. copied_something = True
  228. if not copied_something:
  229. raise RuntimeError("Did not find any files to extract from wheel {}"
  230. .format(whl_file))
  231. # Clean up temporary directory
  232. shutil.rmtree(str(td))
  233. class WheelGetter:
  234. def __init__(self, requirements, wheel_globs, target_dir,
  235. py_version, bitness, extra_sources=None, exclude=None):
  236. self.requirements = requirements
  237. self.wheel_globs = wheel_globs
  238. self.target_dir = target_dir
  239. target_platform = 'win_amd64' if bitness == 64 else 'win32'
  240. self.scorer = CompatibilityScorer(py_version, target_platform)
  241. self.extra_sources = extra_sources
  242. self.exclude = exclude
  243. self.got_distributions = {}
  244. def get_all(self):
  245. self.get_requirements()
  246. self.get_globs()
  247. def get_requirements(self):
  248. for req in self.requirements:
  249. wl = WheelLocator(req, self.scorer, self.extra_sources)
  250. whl_file = wl.fetch()
  251. extract_wheel(whl_file, self.target_dir, exclude=self.exclude)
  252. self.got_distributions[wl.name] = whl_file
  253. def get_globs(self):
  254. for glob_path in self.wheel_globs:
  255. paths = glob.glob(glob_path)
  256. if not paths:
  257. raise ValueError('Glob path {} does not match any files'
  258. .format(glob_path))
  259. for path in paths:
  260. logger.info('Collecting wheel file: %s (from: %s)',
  261. os.path.basename(path), glob_path)
  262. self.validate_wheel(path)
  263. extract_wheel(path, self.target_dir, exclude=self.exclude)
  264. def validate_wheel(self, whl_path):
  265. """
  266. Verify that the given wheel can safely be included in the current installer.
  267. If so, the given wheel info will be included in the given wheel info array.
  268. If not, an exception will be raised.
  269. """
  270. wheel_name = os.path.basename(whl_path)
  271. distribution = wheel_name.split('-', 1)[0]
  272. # Check that a distribution of same name has not been included before
  273. if distribution in self.got_distributions:
  274. prev_path = self.got_distributions[distribution]
  275. raise ValueError('Multiple wheels specified for {}:\n {}\n {}'.format(
  276. distribution, prev_path, whl_path))
  277. # Check that the wheel is compatible with the installer environment
  278. if not self.scorer.is_compatible(wheel_name):
  279. raise ValueError('Wheel {} is not compatible with Python {}, {}'
  280. .format(wheel_name, self.scorer.py_version, self.scorer.platform))
  281. self.got_distributions[distribution] = whl_path
  282. def make_exclude_regexen(exclude_patterns):
  283. """Translate exclude glob patterns to regex pattern objects.
  284. Handles matching files under a named directory.
  285. """
  286. re_pats = set()
  287. for pattern in exclude_patterns:
  288. re_pats.add(fnmatch.translate(pattern))
  289. if not pattern.endswith('*'):
  290. # Also use the pattern as a directory name and match anything
  291. # under that directory.
  292. suffix = '*' if pattern.endswith('/') else '/*'
  293. re_pats.add(fnmatch.translate(pattern + suffix))
  294. return [re.compile(p) for p in sorted(re_pats)]
  295. def is_excluded(path, exclude_regexen):
  296. """Return True if path matches an exclude pattern"""
  297. path = normalize_path(path)
  298. for re_pattern in exclude_regexen:
  299. if re_pattern.match(path):
  300. return True
  301. return False
  302. # The function below is based on the packaging.tags module, used with
  303. # modification following the BSD 2 clause license:
  304. # Copyright (c) Donald Stufft and individual contributors.
  305. # All rights reserved.
  306. #
  307. # Redistribution and use in source and binary forms, with or without
  308. # modification, are permitted provided that the following conditions are met:
  309. #
  310. # 1. Redistributions of source code must retain the above copyright notice,
  311. # this list of conditions and the following disclaimer.
  312. #
  313. # 2. Redistributions in binary form must reproduce the above copyright
  314. # notice, this list of conditions and the following disclaimer in the
  315. # documentation and/or other materials provided with the distribution.
  316. #
  317. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  318. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  319. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  320. # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  321. # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  322. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  323. # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  324. # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  325. # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  326. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  327. def compatible_tags(python_version : tuple =None, platform : str =None):
  328. """Iterate through compatible tags for our target Python
  329. Tags are yielded in order from the most specific to the most general.
  330. Based on packaging.tags module, but simplified for Pynsist's use case,
  331. and avoiding getting any details from the currently running Python.
  332. """
  333. interpreter = "cp{}{}".format(python_version[0], python_version[1])
  334. cpython_abi = interpreter
  335. # Python is normally built with the pymalloc (m) option, and most wheels
  336. # are published for this ABI. The flag is dropped in Python 3.8.
  337. if python_version < (3, 8):
  338. cpython_abi += 'm'
  339. yield interpreter, cpython_abi, platform
  340. yield interpreter, "abi3", platform
  341. yield interpreter, "none", platform
  342. # cp3x-abi3 down to cp32 (Python 3.2 was the first version to have ABI3)
  343. for minor_version in range(python_version[1] - 1, 1, -1):
  344. interpreter = "cp{}{}".format(python_version[0], minor_version)
  345. yield interpreter, "abi3", platform
  346. py_interps = [
  347. f"py{python_version[0]}{python_version[1]}", # e.g. py38
  348. f"py{python_version[0]}", # py3
  349. ] + [
  350. f"py{python_version[0]}{minor}" # py37 ... py30
  351. for minor in range(python_version[1] - 1, -1, -1)
  352. ]
  353. for version in py_interps:
  354. yield version, "none", platform
  355. for version in py_interps:
  356. yield version, "none", "any"