wheels.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. """Find, download and unpack wheels."""
  2. import fnmatch
  3. import hashlib
  4. import logging
  5. import glob
  6. import os
  7. import re
  8. import shutil
  9. import yarg
  10. import zipfile
  11. from pathlib import Path
  12. from requests_download import download, HashTracker
  13. from tempfile import mkdtemp
  14. from .util import get_cache_dir, normalize_path
  15. logger = logging.getLogger(__name__)
  16. class NoWheelError(Exception): pass
  17. class CompatibilityScorer:
  18. """Score wheels for a given target platform
  19. 0 for any score means incompatible.
  20. Higher numbers are more platform specific.
  21. """
  22. def __init__(self, py_version, platform):
  23. self.py_version = py_version
  24. self.py_version_tuple = tuple(map(int, py_version.split('.')[:2]))
  25. self.platform = platform
  26. def score_platform(self, platform):
  27. # target = 'win_amd64' if self.bitness == 64 else 'win32'
  28. d = {self.platform: 2, 'any': 1}
  29. return max(d.get(p, 0) for p in platform.split('.'))
  30. def score_abi(self, abi):
  31. py_version_nodot = '%s%s' % (self.py_version_tuple[0], self.py_version_tuple[1])
  32. abi_suffix = 'm' if self.py_version_tuple < (3, 8) else ''
  33. # Are there other valid options here?
  34. d = {'cp%s%s' % (py_version_nodot, abi_suffix): 3,
  35. 'abi3': 2,
  36. 'none': 1}
  37. return max(d.get(a, 0) for a in abi.split('.'))
  38. def score_interpreter(self, interpreter):
  39. py_version_nodot = '%s%s' % (self.py_version_tuple[0], self.py_version_tuple[1])
  40. py_version_major = str(self.py_version_tuple[0])
  41. d = {'cp'+py_version_nodot: 4,
  42. 'cp'+py_version_major: 3,
  43. 'py'+py_version_nodot: 2,
  44. 'py'+py_version_major: 1
  45. }
  46. return max(d.get(i, 0) for i in interpreter.split('.'))
  47. def score(self, whl_filename):
  48. m = re.search(r'-([^-]+)-([^-]+)-([^-]+)\.whl$', whl_filename)
  49. if not m:
  50. raise ValueError("Failed to find wheel tag in %r" % whl_filename)
  51. interpreter, abi, platform = m.group(1, 2, 3)
  52. return (
  53. self.score_platform(platform),
  54. self.score_abi(abi),
  55. self.score_interpreter(interpreter)
  56. )
  57. class WheelLocator(object):
  58. def __init__(self, requirement, scorer, extra_sources=None):
  59. self.requirement = requirement
  60. self.scorer = scorer
  61. self.extra_sources = extra_sources or []
  62. if requirement.count('==') != 1:
  63. raise ValueError("Requirement {!r} did not match name==version".format(requirement))
  64. self.name, self.version = requirement.split('==', 1)
  65. def pick_best_wheel(self, release_list):
  66. best_score = (0, 0, 0)
  67. best = None
  68. for release in release_list:
  69. if release.package_type != 'wheel':
  70. continue
  71. score = self.scorer.score(release.filename)
  72. if any(s==0 for s in score):
  73. # Incompatible
  74. continue
  75. if score > best_score:
  76. best = release
  77. best_score = score
  78. return best
  79. def check_extra_sources(self):
  80. """Find a compatible wheel in the specified extra_sources directories.
  81. Returns a Path or None.
  82. """
  83. whl_filename_prefix = '{name}-{version}-'.format(
  84. name=re.sub(r'[^\w\d.]+', '_', self.name),
  85. version=re.sub(r'[^\w\d.]+', '_', self.version),
  86. )
  87. for source in self.extra_sources:
  88. candidates = [CachedRelease(p.name)
  89. for p in source.iterdir()
  90. if p.name.startswith(whl_filename_prefix)]
  91. rel = self.pick_best_wheel(candidates)
  92. if rel:
  93. path = source / rel.filename
  94. return path
  95. def check_cache(self):
  96. """Find a wheel previously downloaded from PyPI in the cache.
  97. Returns a Path or None.
  98. """
  99. release_dir = get_cache_dir() / 'pypi' / self.name / self.version
  100. if not release_dir.is_dir():
  101. return None
  102. rel = self.pick_best_wheel(CachedRelease(p.name)
  103. for p in release_dir.iterdir())
  104. if rel is None:
  105. return None
  106. return release_dir / rel.filename
  107. def get_from_pypi(self):
  108. """Download a compatible wheel from PyPI.
  109. Downloads to the cache directory and returns the destination as a Path.
  110. Raises NoWheelError if no compatible wheel is found.
  111. """
  112. try:
  113. pypi_pkg = yarg.get(self.name)
  114. except yarg.HTTPError as e:
  115. if e.status_code == 404:
  116. raise NoWheelError("No package named {} found on PyPI".format(self.name))
  117. raise
  118. release_list = pypi_pkg.release(self.version)
  119. if release_list is None:
  120. raise NoWheelError("No release {0.version} for package {0.name}".format(self))
  121. preferred_release = self.pick_best_wheel(release_list)
  122. if preferred_release is None:
  123. raise NoWheelError('No compatible wheels found for {0.name} {0.version}'.format(self))
  124. download_to = get_cache_dir() / 'pypi' / self.name / self.version
  125. try:
  126. download_to.mkdir(parents=True)
  127. except OSError:
  128. # Ignore OSError only if the directory exists
  129. if not download_to.is_dir():
  130. raise
  131. target = download_to / preferred_release.filename
  132. from . import __version__
  133. hasher = HashTracker(hashlib.md5())
  134. headers = {'user-agent': 'pynsist/'+__version__}
  135. logger.info('Downloading wheel: %s', preferred_release.url)
  136. download(preferred_release.url, str(target), headers=headers,
  137. trackers=(hasher,))
  138. if hasher.hashobj.hexdigest() != preferred_release.md5_digest:
  139. target.unlink()
  140. raise ValueError('Downloaded wheel corrupted: {}'.format(preferred_release.url))
  141. return target
  142. def fetch(self):
  143. """Find and return a compatible wheel (main interface)"""
  144. p = self.check_extra_sources()
  145. if p is not None:
  146. logger.info('Using wheel from extra directory: %s', p)
  147. return p
  148. p = self.check_cache()
  149. if p is not None:
  150. logger.info('Using cached wheel: %s', p)
  151. return p
  152. return self.get_from_pypi()
  153. class CachedRelease(object):
  154. # Mock enough of the yarg Release object to be compatible with
  155. # pick_best_release above
  156. def __init__(self, filename):
  157. self.filename = filename
  158. self.package_type = 'wheel' if filename.endswith('.whl') else ''
  159. def merge_dir_to(src, dst):
  160. """Merge all files from one directory into another.
  161. Subdirectories will be merged recursively. If filenames are the same, those
  162. from src will overwrite those in dst. If a regular file clashes with a
  163. directory, an error will occur.
  164. """
  165. for p in src.iterdir():
  166. if p.is_dir():
  167. dst_p = dst / p.name
  168. if dst_p.is_dir():
  169. merge_dir_to(p, dst_p)
  170. elif dst_p.is_file():
  171. raise RuntimeError('Directory {} clashes with file {}'
  172. .format(p, dst_p))
  173. else:
  174. shutil.copytree(str(p), str(dst_p))
  175. else:
  176. # Copy regular file
  177. dst_p = dst / p.name
  178. if dst_p.is_dir():
  179. raise RuntimeError('File {} clashes with directory {}'
  180. .format(p, dst_p))
  181. shutil.copy2(str(p), str(dst_p))
  182. def extract_wheel(whl_file, target_dir, exclude=None):
  183. """Extract importable modules from a wheel to the target directory
  184. """
  185. # Extract to temporary directory
  186. td = Path(mkdtemp())
  187. with zipfile.ZipFile(str(whl_file), mode='r') as zf:
  188. if exclude:
  189. exclude_regexen = make_exclude_regexen(exclude)
  190. for zpath in zf.namelist():
  191. if is_excluded('pkgs/' + zpath, exclude_regexen):
  192. continue # Skip excluded paths
  193. zf.extract(zpath, path=str(td))
  194. else:
  195. zf.extractall(str(td))
  196. # Move extra lib files out of the .data subdirectory
  197. for p in td.iterdir():
  198. if p.suffix == '.data':
  199. if (p / 'purelib').is_dir():
  200. merge_dir_to(p / 'purelib', td)
  201. if (p / 'platlib').is_dir():
  202. merge_dir_to(p / 'platlib', td)
  203. # HACK: Some wheels from Christoph Gohlke's page have extra package
  204. # files added in data/Lib/site-packages. This is a trick that relies
  205. # on the default installation layout. It doesn't look like it will
  206. # change, so in the best tradition of packaging, we'll work around
  207. # the workaround.
  208. # https://github.com/takluyver/pynsist/issues/171
  209. # This is especially ugly because we do a case-insensitive match,
  210. # regardless of the filesystem.
  211. if (p / 'data').is_dir():
  212. for sd in (p / 'data').iterdir():
  213. if sd.name.lower() == 'lib' and sd.is_dir():
  214. for sd2 in sd.iterdir():
  215. if sd2.name.lower() == 'site-packages' and sd2.is_dir():
  216. merge_dir_to(sd2, td)
  217. # Copy to target directory
  218. target = Path(target_dir)
  219. copied_something = False
  220. for p in td.iterdir():
  221. if p.suffix not in {'.data'}:
  222. if p.is_dir():
  223. # If the dst directory already exists, this will combine them.
  224. # shutil.copytree will not combine them.
  225. try:
  226. target.joinpath(p.name).mkdir()
  227. except OSError:
  228. if not target.joinpath(p.name).is_dir():
  229. raise
  230. merge_dir_to(p, target / p.name)
  231. else:
  232. shutil.copy2(str(p), str(target))
  233. copied_something = True
  234. if not copied_something:
  235. raise RuntimeError("Did not find any files to extract from wheel {}"
  236. .format(whl_file))
  237. # Clean up temporary directory
  238. shutil.rmtree(str(td))
  239. class WheelGetter:
  240. def __init__(self, requirements, wheel_globs, target_dir,
  241. py_version, bitness, extra_sources=None, exclude=None):
  242. self.requirements = requirements
  243. self.wheel_globs = wheel_globs
  244. self.target_dir = target_dir
  245. target_platform = 'win_amd64' if bitness == 64 else 'win32'
  246. self.scorer = CompatibilityScorer(py_version, target_platform)
  247. self.extra_sources = extra_sources
  248. self.exclude = exclude
  249. self.got_distributions = {}
  250. def get_all(self):
  251. self.get_requirements()
  252. self.get_globs()
  253. def get_requirements(self):
  254. for req in self.requirements:
  255. wl = WheelLocator(req, self.scorer, self.extra_sources)
  256. whl_file = wl.fetch()
  257. extract_wheel(whl_file, self.target_dir, exclude=self.exclude)
  258. self.got_distributions[wl.name] = whl_file
  259. def get_globs(self):
  260. for glob_path in self.wheel_globs:
  261. paths = glob.glob(glob_path)
  262. if not paths:
  263. raise ValueError('Glob path {} does not match any files'
  264. .format(glob_path))
  265. for path in paths:
  266. logger.info('Collecting wheel file: %s (from: %s)',
  267. os.path.basename(path), glob_path)
  268. self.validate_wheel(path)
  269. extract_wheel(path, self.target_dir, exclude=self.exclude)
  270. def validate_wheel(self, whl_path):
  271. """
  272. Verify that the given wheel can safely be included in the current installer.
  273. If so, the given wheel info will be included in the given wheel info array.
  274. If not, an exception will be raised.
  275. """
  276. wheel_name = os.path.basename(whl_path)
  277. distribution = wheel_name.split('-', 1)[0]
  278. # Check that a distribution of same name has not been included before
  279. if distribution in self.got_distributions:
  280. prev_path = self.got_distributions[distribution]
  281. raise ValueError('Multiple wheels specified for {}:\n {}\n {}'.format(
  282. distribution, prev_path, whl_path))
  283. # Check that the wheel is compatible with the installer environment
  284. scores = self.scorer.score(wheel_name)
  285. if any(s == 0 for s in scores):
  286. raise ValueError('Wheel {} is not compatible with Python {}, {}'
  287. .format(wheel_name, self.scorer.py_version, self.scorer.platform))
  288. self.got_distributions[distribution] = whl_path
  289. def make_exclude_regexen(exclude_patterns):
  290. """Translate exclude glob patterns to regex pattern objects.
  291. Handles matching files under a named directory.
  292. """
  293. re_pats = set()
  294. for pattern in exclude_patterns:
  295. re_pats.add(fnmatch.translate(pattern))
  296. if not pattern.endswith('*'):
  297. # Also use the pattern as a directory name and match anything
  298. # under that directory.
  299. suffix = '*' if pattern.endswith('/') else '/*'
  300. re_pats.add(fnmatch.translate(pattern + suffix))
  301. return [re.compile(p) for p in sorted(re_pats)]
  302. def is_excluded(path, exclude_regexen):
  303. """Return True if path matches an exclude pattern"""
  304. path = normalize_path(path)
  305. for re_pattern in exclude_regexen:
  306. if re_pattern.match(path):
  307. return True
  308. return False