path_deploy.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. import os.path
  2. from functools import partial
  3. from contextlib import contextmanager
  4. import ast
  5. import tornado
  6. from tornado import template
  7. from tornado.web import HTTPError, Finish
  8. from tornado.web import StaticFileHandler
  9. from . import page
  10. from .httpbased import HttpHandler
  11. from .tornado import webio_handler, set_ioloop
  12. from .tornado_http import TornadoHttpContext
  13. from .utils import cdn_validation, print_listen_address
  14. from .page import make_applications
  15. from ..session import register_session_implement, CoroutineBasedSession, ThreadBasedSession, Session
  16. from ..utils import get_free_port, STATIC_PATH, parse_file_size
  17. LOCAL_STATIC_URL = '/_pywebio_static'
  18. def filename_ok(f):
  19. return not f.startswith(('.', '_'))
  20. def identifiers_info(code):
  21. """Get the identifiers and theirs docstring from python source code.
  22. :return dict:
  23. """
  24. try:
  25. tree = ast.parse(code)
  26. except Exception:
  27. return {}
  28. if not isinstance(tree, ast.Module):
  29. return {}
  30. identifier2doc = {}
  31. for node in tree.body:
  32. if isinstance(node, ast.Assign):
  33. for name in node.targets:
  34. if hasattr(name, 'id'):
  35. identifier2doc[name.id] = ''
  36. elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
  37. doc_string = ast.get_docstring(node) or ''
  38. title = doc_string.split('\n\n')[0]
  39. identifier2doc[node.name] = title
  40. return identifier2doc
  41. def valid_and_norm_path(base, subpath):
  42. """Join the sub-path to base path. This function always ensure the result path is a subpath of base path.
  43. :param str base: MUST a absolute path
  44. :param str subpath: sub-path under the `base` path
  45. :return: normalized result path. None returned if the sub path is not valid
  46. """
  47. subpath = subpath.lstrip('/')
  48. full_path = os.path.normpath(os.path.join(base, subpath))
  49. if not full_path.startswith(base):
  50. return None
  51. parts = subpath.split('/')
  52. for i in parts:
  53. if not filename_ok(i):
  54. return None
  55. return full_path
  56. _cached_modules = {}
  57. def _get_module(path, reload=False):
  58. # https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
  59. # https://stackoverflow.com/questions/41861427/python-3-5-how-to-dynamically-import-a-module-given-the-full-file-path-in-the
  60. global _cached_modules
  61. import importlib.util
  62. @contextmanager
  63. def add_to_path(p):
  64. import sys
  65. sys.path.append(p)
  66. try:
  67. yield
  68. finally:
  69. sys.path.remove(p)
  70. if not reload and path in _cached_modules:
  71. return _cached_modules[path]
  72. # import_name will be the `__name__` of the imported module
  73. import_name = "__pywebio__"
  74. with add_to_path(os.path.dirname(path)):
  75. spec = importlib.util.spec_from_file_location(import_name, path, submodule_search_locations=None)
  76. module = importlib.util.module_from_spec(spec)
  77. spec.loader.exec_module(module)
  78. _cached_modules[path] = module
  79. return module
  80. _app_list_tpl = template.Template("""
  81. <!DOCTYPE html>
  82. <html lang="">
  83. <head>
  84. <meta charset="UTF-8">
  85. <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
  86. <title>{{ title }}</title>
  87. <meta name="description" content="PyWebIO applications index">
  88. <style>a{text-decoration:none;display:inline-block;min-width:{{ max_name_width }}ch}span{color:grey}</style>
  89. </head>
  90. <body>
  91. <h1>{{ title }}</h1>
  92. <hr>
  93. <pre style="line-height: 1.6em; font-size: 16px;">
  94. {% for name,doc in files %} <a href="{{ name }}">{{ name }}</a> <span>{{ doc }}</span>
  95. {% end %}</pre>
  96. <hr>
  97. </body>
  98. </html>
  99. """.strip())
  100. def default_index_page(path, base):
  101. urlpath = path[len(base):] or '/'
  102. title = "Index of %s" % urlpath
  103. dirs = [] if path == base else [('../', '')] # (name, doc)
  104. files = [] # (name, doc)
  105. for f in os.listdir(path):
  106. if not filename_ok(f):
  107. continue
  108. full_path = os.path.join(path, f)
  109. if os.path.isfile(full_path):
  110. if f.endswith('.py'):
  111. code = open(full_path, encoding='utf8').read()
  112. identifiers = identifiers_info(code)
  113. if 'main' in identifiers:
  114. files.append([f[:-3], identifiers['main']])
  115. else:
  116. dirs.append([(f + '/'), ''])
  117. items = dirs + files
  118. max_name_width = max([len(n) for n, _ in items]+[0])
  119. return _app_list_tpl.generate(files=items, title=title, max_name_width=max_name_width)
  120. def get_app_from_path(request_path, base, index, reload=False):
  121. """Get PyWebIO app
  122. :param str request_path: request path
  123. :param str base: dir base path, MUST a absolute path
  124. :param callable index:
  125. :return: ('error', http error code in int) / ('app', pywebio task function) / ('html', Html content in bytes)
  126. """
  127. path = valid_and_norm_path(base, request_path)
  128. if path is None:
  129. return 'error', 403
  130. if os.path.isdir(path):
  131. if not request_path.endswith('/'):
  132. return 'error', 404
  133. if os.path.isfile(os.path.join(path, 'index.py')):
  134. path = os.path.join(path, 'index.py')
  135. elif index:
  136. content = index(path)
  137. return 'html', content
  138. else:
  139. return 'error', 404
  140. else:
  141. path += '.py'
  142. if not os.path.isfile(path):
  143. return 'error', 404
  144. module = _get_module(path, reload=reload)
  145. if hasattr(module, 'main'):
  146. return 'app', make_applications(module.main)
  147. return 'error', 404
  148. def _path_deploy(base, port=0, host='', static_dir=None, max_payload_size=2 ** 20 * 200,
  149. **tornado_app_settings):
  150. if not host:
  151. host = '0.0.0.0'
  152. if port == 0:
  153. port = get_free_port()
  154. tornado_app_settings = {k: v for k, v in tornado_app_settings.items() if v is not None}
  155. abs_base = os.path.normpath(os.path.abspath(base))
  156. register_session_implement(CoroutineBasedSession)
  157. register_session_implement(ThreadBasedSession)
  158. RequestHandler = yield abs_base
  159. handlers = []
  160. if static_dir is not None:
  161. handlers.append((r"/static/(.*)", StaticFileHandler, {"path": static_dir}))
  162. handlers.append((LOCAL_STATIC_URL+r"/(.*)", StaticFileHandler, {"path": STATIC_PATH}))
  163. handlers.append((r"/.*", RequestHandler))
  164. print_listen_address(host, port)
  165. set_ioloop(tornado.ioloop.IOLoop.current()) # to enable bokeh app
  166. app = tornado.web.Application(handlers=handlers, **tornado_app_settings)
  167. app.listen(port, address=host, max_buffer_size=max_payload_size)
  168. tornado.ioloop.IOLoop.current().start()
  169. def path_deploy(base, port=0, host='',
  170. index=True, static_dir=None,
  171. reconnect_timeout=0,
  172. cdn=True, debug=False,
  173. allowed_origins=None, check_origin=None,
  174. max_payload_size='200M',
  175. **tornado_app_settings):
  176. """Deploy the PyWebIO applications from a directory.
  177. The server communicates with the browser using WebSocket protocol.
  178. :param str base: Base directory to load PyWebIO application.
  179. :param int port: The port the server listens on.
  180. :param str host: The host the server listens on.
  181. :param bool/callable index: Whether to provide a default index page when request a directory, default is ``True``.
  182. ``index`` also accepts a function to custom index page, which receives the requested directory path as parameter
  183. and return HTML content in string.
  184. You can override the index page by add a `index.py` PyWebIO app file to the directory.
  185. :param str static_dir: Directory to store the application static files.
  186. The files in this directory can be accessed via ``http://<host>:<port>/static/files``.
  187. For example, if there is a ``A/B.jpg`` file in ``static_dir`` path,
  188. it can be accessed via ``http://<host>:<port>/static/A/B.jpg``.
  189. :param int reconnect_timeout: The client can reconnect to server within ``reconnect_timeout`` seconds after an unexpected disconnection.
  190. If set to 0 (default), once the client disconnects, the server session will be closed.
  191. The rest arguments of ``path_deploy()`` have the same meaning as for :func:`pywebio.platform.tornado.start_server`
  192. """
  193. debug = Session.debug = os.environ.get('PYWEBIO_DEBUG', debug)
  194. page.MAX_PAYLOAD_SIZE = max_payload_size = parse_file_size(max_payload_size)
  195. # Since some cloud server may close idle connections (such as heroku),
  196. # use `websocket_ping_interval` to keep the connection alive
  197. tornado_app_settings.setdefault('websocket_ping_interval', 30)
  198. tornado_app_settings.setdefault('websocket_max_message_size', max_payload_size) # Backward compatible
  199. tornado_app_settings['websocket_max_message_size'] = parse_file_size(tornado_app_settings['websocket_max_message_size'])
  200. gen = _path_deploy(base, port=port, host=host,
  201. static_dir=static_dir, debug=debug,
  202. max_payload_size=max_payload_size,
  203. **tornado_app_settings)
  204. cdn = cdn_validation(cdn, 'warn', stacklevel=3) # if CDN is not available, warn user and disable CDN
  205. abs_base = next(gen)
  206. index_func = {True: partial(default_index_page, base=abs_base), False: lambda p: '403 Forbidden'}.get(index, index)
  207. Handler = webio_handler(lambda: None, cdn=cdn, allowed_origins=allowed_origins,
  208. check_origin=check_origin, reconnect_timeout=reconnect_timeout)
  209. class WSHandler(Handler):
  210. def get_cdn(self):
  211. _cdn = super().get_cdn()
  212. if not _cdn:
  213. return LOCAL_STATIC_URL
  214. return _cdn
  215. def get_app(self):
  216. reload = self.get_query_argument('reload', None) is not None
  217. type, res = get_app_from_path(self.request.path, abs_base, index=index_func, reload=reload)
  218. if type == 'error':
  219. raise HTTPError(status_code=res)
  220. elif type == 'html':
  221. raise Finish(res)
  222. app_name = self.get_query_argument('app', 'index')
  223. app = res.get(app_name) or res['index']
  224. return app
  225. gen.send(WSHandler)
  226. gen.close()
  227. def path_deploy_http(base, port=0, host='',
  228. index=True, static_dir=None,
  229. cdn=True, debug=False,
  230. allowed_origins=None, check_origin=None,
  231. session_expire_seconds=None,
  232. session_cleanup_interval=None,
  233. max_payload_size='200M',
  234. **tornado_app_settings):
  235. """Deploy the PyWebIO applications from a directory.
  236. The server communicates with the browser using HTTP protocol.
  237. The ``base``, ``port``, ``host``, ``index``, ``static_dir`` arguments of ``path_deploy_http()``
  238. have the same meaning as for :func:`pywebio.platform.path_deploy`
  239. The rest arguments of ``path_deploy_http()`` have the same meaning as for :func:`pywebio.platform.tornado_http.start_server`
  240. """
  241. debug = Session.debug = os.environ.get('PYWEBIO_DEBUG', debug)
  242. page.MAX_PAYLOAD_SIZE = max_payload_size = parse_file_size(max_payload_size)
  243. gen = _path_deploy(base, port=port, host=host,
  244. static_dir=static_dir, debug=debug,
  245. max_payload_size=max_payload_size,
  246. **tornado_app_settings)
  247. cdn = cdn_validation(cdn, 'warn', stacklevel=3) # if CDN is not available, warn user and disable CDN
  248. abs_base = next(gen)
  249. index_func = {True: partial(default_index_page, base=abs_base), False: lambda p: '403 Forbidden'}.get(index, index)
  250. def get_app(context: TornadoHttpContext):
  251. reload = context.request_url_parameter('reload', None) is not None
  252. type, res = get_app_from_path(context.get_path(), abs_base, index=index_func, reload=reload)
  253. if type == 'error':
  254. raise HTTPError(status_code=res)
  255. elif type == 'html':
  256. raise Finish(res)
  257. app_name = context.request_url_parameter('app', 'index')
  258. return res.get(app_name) or res['index']
  259. class HttpPathDeployHandler(HttpHandler):
  260. def get_cdn(self, context):
  261. _cdn = super().get_cdn(context)
  262. if not _cdn:
  263. return LOCAL_STATIC_URL
  264. return _cdn
  265. handler = HttpPathDeployHandler(app_loader=get_app, cdn=cdn,
  266. session_expire_seconds=session_expire_seconds,
  267. session_cleanup_interval=session_cleanup_interval,
  268. allowed_origins=allowed_origins,
  269. check_origin=check_origin)
  270. class ReqHandler(tornado.web.RequestHandler):
  271. def options(self):
  272. return self.get()
  273. def post(self):
  274. return self.get()
  275. def get(self):
  276. context = TornadoHttpContext(self)
  277. self.write(handler.handle_request(context))
  278. gen.send(ReqHandler)
  279. gen.close()