path_deploy.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. import ast
  2. import os.path
  3. from contextlib import contextmanager
  4. from functools import partial
  5. import tornado.template
  6. import tornado.web
  7. import tornado.ioloop
  8. from . import page
  9. from .adaptor.http import HttpHandler
  10. from .page import make_applications
  11. from .tornado import webio_handler, set_ioloop
  12. from .tornado_http import TornadoHttpContext
  13. from .utils import cdn_validation, print_listen_address
  14. from ..session import register_session_implement, CoroutineBasedSession, ThreadBasedSession, Session
  15. from ..utils import get_free_port, STATIC_PATH, parse_file_size
  16. LOCAL_STATIC_URL = '/_pywebio_static'
  17. def filename_ok(f):
  18. return not f.startswith(('.', '_'))
  19. def identifiers_info(code):
  20. """Get the identifiers and theirs docstring from python source code.
  21. :return dict:
  22. """
  23. try:
  24. tree = ast.parse(code)
  25. except Exception:
  26. return {}
  27. if not isinstance(tree, ast.Module):
  28. return {}
  29. identifier2doc = {}
  30. for node in tree.body:
  31. if isinstance(node, ast.Assign):
  32. for name in node.targets:
  33. if hasattr(name, 'id'):
  34. identifier2doc[name.id] = ''
  35. elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
  36. doc_string = ast.get_docstring(node) or ''
  37. title = doc_string.split('\n\n')[0]
  38. identifier2doc[node.name] = title
  39. return identifier2doc
  40. def valid_and_norm_path(base, subpath):
  41. """Join the sub-path to base path. This function always ensure the result path is a subpath of base path.
  42. :param str base: MUST a absolute path
  43. :param str subpath: sub-path under the `base` path
  44. :return: normalized result path. None returned if the sub path is not valid
  45. """
  46. subpath = subpath.lstrip('/')
  47. full_path = os.path.normpath(os.path.join(base, subpath))
  48. if not full_path.startswith(base):
  49. return None
  50. parts = subpath.split('/')
  51. for i in parts:
  52. if not filename_ok(i):
  53. return None
  54. return full_path
  55. _cached_modules = {}
  56. def _get_module(path, reload=False):
  57. # https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
  58. # https://stackoverflow.com/questions/41861427/python-3-5-how-to-dynamically-import-a-module-given-the-full-file-path-in-the
  59. global _cached_modules
  60. import importlib.util
  61. @contextmanager
  62. def add_to_path(p):
  63. import sys
  64. sys.path.append(p)
  65. try:
  66. yield
  67. finally:
  68. sys.path.remove(p)
  69. if not reload and path in _cached_modules:
  70. return _cached_modules[path]
  71. # import_name will be the `__name__` of the imported module
  72. import_name = "__pywebio__"
  73. with add_to_path(os.path.dirname(path)):
  74. spec = importlib.util.spec_from_file_location(import_name, path, submodule_search_locations=None)
  75. module = importlib.util.module_from_spec(spec)
  76. spec.loader.exec_module(module)
  77. _cached_modules[path] = module
  78. return module
  79. _app_list_tpl = tornado.template.Template("""
  80. <!DOCTYPE html>
  81. <html lang="">
  82. <head>
  83. <meta charset="UTF-8">
  84. <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
  85. <title>{{ title }}</title>
  86. <meta name="description" content="PyWebIO applications index">
  87. <style>a{text-decoration:none;display:inline-block;min-width:{{ max_name_width }}ch}span{color:grey}</style>
  88. </head>
  89. <body>
  90. <h1>{{ title }}</h1>
  91. <hr>
  92. <pre style="line-height: 1.6em; font-size: 16px;">
  93. {% for name,doc in files %} <a href="{{ name }}">{{ name }}</a> <span>{{ doc }}</span>
  94. {% end %}</pre>
  95. <hr>
  96. </body>
  97. </html>
  98. """.strip())
  99. def default_index_page(path, base):
  100. urlpath = path[len(base):] or '/'
  101. title = "Index of %s" % urlpath
  102. dirs = [] if path == base else [('../', '')] # (name, doc)
  103. files = [] # (name, doc)
  104. for f in os.listdir(path):
  105. if not filename_ok(f):
  106. continue
  107. full_path = os.path.join(path, f)
  108. if os.path.isfile(full_path):
  109. if f.endswith('.py'):
  110. code = open(full_path, encoding='utf8').read()
  111. identifiers = identifiers_info(code)
  112. if 'main' in identifiers:
  113. files.append([f[:-3], identifiers['main']])
  114. else:
  115. dirs.append([(f + '/'), ''])
  116. items = dirs + files
  117. max_name_width = max([len(n) for n, _ in items] + [0])
  118. return _app_list_tpl.generate(files=items, title=title, max_name_width=max_name_width)
  119. def get_app_from_path(request_path, base, index, reload=False):
  120. """Get PyWebIO app
  121. :param str request_path: request path
  122. :param str base: dir base path, MUST a absolute path
  123. :param callable index:
  124. :return: ('error', http error code in int) / ('app', pywebio task function) / ('html', Html content in bytes)
  125. """
  126. path = valid_and_norm_path(base, request_path)
  127. if path is None:
  128. return 'error', 403
  129. if os.path.isdir(path):
  130. if not request_path.endswith('/'):
  131. return 'error', 404
  132. if os.path.isfile(os.path.join(path, 'index.py')):
  133. path = os.path.join(path, 'index.py')
  134. elif index:
  135. content = index(path)
  136. return 'html', content
  137. else:
  138. return 'error', 404
  139. else:
  140. path += '.py'
  141. if not os.path.isfile(path):
  142. return 'error', 404
  143. module = _get_module(path, reload=reload)
  144. if hasattr(module, 'main'):
  145. return 'app', make_applications(module.main)
  146. return 'error', 404
  147. def _path_deploy(base, port=0, host='', static_dir=None, max_payload_size=2 ** 20 * 200,
  148. **tornado_app_settings):
  149. if not host:
  150. host = '0.0.0.0'
  151. if port == 0:
  152. port = get_free_port()
  153. tornado_app_settings = {k: v for k, v in tornado_app_settings.items() if v is not None}
  154. abs_base = os.path.normpath(os.path.abspath(base))
  155. register_session_implement(CoroutineBasedSession)
  156. register_session_implement(ThreadBasedSession)
  157. RequestHandler = yield abs_base
  158. handlers = []
  159. if static_dir is not None:
  160. handlers.append((r"/static/(.*)", tornado.web.StaticFileHandler, {"path": static_dir}))
  161. handlers.append((LOCAL_STATIC_URL + r"/(.*)", tornado.web.StaticFileHandler, {"path": STATIC_PATH}))
  162. handlers.append((r"/.*", RequestHandler))
  163. print_listen_address(host, port)
  164. set_ioloop(tornado.ioloop.IOLoop.current()) # to enable bokeh app
  165. app = tornado.web.Application(handlers=handlers, **tornado_app_settings)
  166. app.listen(port, address=host, max_buffer_size=max_payload_size)
  167. tornado.ioloop.IOLoop.current().start()
  168. def path_deploy(base, port=0, host='',
  169. index=True, static_dir=None,
  170. reconnect_timeout=0,
  171. cdn=True, debug=False,
  172. allowed_origins=None, check_origin=None,
  173. max_payload_size='200M',
  174. **tornado_app_settings):
  175. """Deploy the PyWebIO applications from a directory.
  176. The server communicates with the browser using WebSocket protocol.
  177. :param str base: Base directory to load PyWebIO application.
  178. :param int port: The port the server listens on.
  179. :param str host: The host the server listens on.
  180. :param bool/callable index: Whether to provide a default index page when request a directory, default is ``True``.
  181. ``index`` also accepts a function to custom index page, which receives the requested directory path as parameter
  182. and return HTML content in string.
  183. You can override the index page by add a `index.py` PyWebIO app file to the directory.
  184. :param str static_dir: Directory to store the application static files.
  185. The files in this directory can be accessed via ``http://<host>:<port>/static/files``.
  186. For example, if there is a ``A/B.jpg`` file in ``static_dir`` path,
  187. it can be accessed via ``http://<host>:<port>/static/A/B.jpg``.
  188. :param int reconnect_timeout: The client can reconnect to server within ``reconnect_timeout`` seconds after an unexpected disconnection.
  189. If set to 0 (default), once the client disconnects, the server session will be closed.
  190. The rest arguments of ``path_deploy()`` have the same meaning as for :func:`pywebio.platform.tornado.start_server`
  191. """
  192. debug = Session.debug = os.environ.get('PYWEBIO_DEBUG', debug)
  193. page.MAX_PAYLOAD_SIZE = max_payload_size = parse_file_size(max_payload_size)
  194. # Since some cloud server may close idle connections (such as heroku),
  195. # use `websocket_ping_interval` to keep the connection alive
  196. tornado_app_settings.setdefault('websocket_ping_interval', 30)
  197. tornado_app_settings.setdefault('websocket_max_message_size', max_payload_size) # Backward compatible
  198. tornado_app_settings['websocket_max_message_size'] = parse_file_size(
  199. tornado_app_settings['websocket_max_message_size'])
  200. gen = _path_deploy(base, port=port, host=host,
  201. static_dir=static_dir, debug=debug,
  202. max_payload_size=max_payload_size,
  203. **tornado_app_settings)
  204. cdn = cdn_validation(cdn, 'warn', stacklevel=3) # if CDN is not available, warn user and disable CDN
  205. abs_base = next(gen)
  206. index_func = {True: partial(default_index_page, base=abs_base), False: lambda p: '403 Forbidden'}.get(index, index)
  207. Handler = webio_handler(lambda: None, cdn=cdn, allowed_origins=allowed_origins,
  208. check_origin=check_origin, reconnect_timeout=reconnect_timeout)
  209. class WSHandler(Handler):
  210. def get_cdn(self):
  211. _cdn = super().get_cdn()
  212. if not _cdn:
  213. return LOCAL_STATIC_URL
  214. return _cdn
  215. def get_app(self):
  216. reload = self.get_query_argument('reload', None) is not None
  217. type, res = get_app_from_path(self.request.path, abs_base, index=index_func, reload=reload)
  218. if type == 'error':
  219. raise tornado.web.HTTPError(status_code=res)
  220. elif type == 'html':
  221. raise tornado.web.Finish(res)
  222. app_name = self.get_query_argument('app', 'index')
  223. app = res.get(app_name) or res['index']
  224. return app
  225. gen.send(WSHandler)
  226. gen.close()
  227. def path_deploy_http(base, port=0, host='',
  228. index=True, static_dir=None,
  229. cdn=True, debug=False,
  230. allowed_origins=None, check_origin=None,
  231. session_expire_seconds=None,
  232. session_cleanup_interval=None,
  233. max_payload_size='200M',
  234. **tornado_app_settings):
  235. """Deploy the PyWebIO applications from a directory.
  236. The server communicates with the browser using HTTP protocol.
  237. The ``base``, ``port``, ``host``, ``index``, ``static_dir`` arguments of ``path_deploy_http()``
  238. have the same meaning as for :func:`pywebio.platform.path_deploy`
  239. The rest arguments of ``path_deploy_http()`` have the same meaning as for :func:`pywebio.platform.tornado_http.start_server`
  240. """
  241. debug = Session.debug = os.environ.get('PYWEBIO_DEBUG', debug)
  242. page.MAX_PAYLOAD_SIZE = max_payload_size = parse_file_size(max_payload_size)
  243. gen = _path_deploy(base, port=port, host=host,
  244. static_dir=static_dir, debug=debug,
  245. max_payload_size=max_payload_size,
  246. **tornado_app_settings)
  247. cdn = cdn_validation(cdn, 'warn', stacklevel=3) # if CDN is not available, warn user and disable CDN
  248. abs_base = next(gen)
  249. index_func = {True: partial(default_index_page, base=abs_base), False: lambda p: '403 Forbidden'}.get(index, index)
  250. def get_app(context: TornadoHttpContext):
  251. reload = context.request_url_parameter('reload', None) is not None
  252. type, res = get_app_from_path(context.get_path(), abs_base, index=index_func, reload=reload)
  253. if type == 'error':
  254. raise tornado.web.HTTPError(status_code=res)
  255. elif type == 'html':
  256. raise tornado.web.Finish(res)
  257. app_name = context.request_url_parameter('app', 'index')
  258. return res.get(app_name) or res['index']
  259. class HttpPathDeployHandler(HttpHandler):
  260. def get_cdn(self, context):
  261. _cdn = super().get_cdn(context)
  262. if not _cdn:
  263. return LOCAL_STATIC_URL
  264. return _cdn
  265. handler = HttpPathDeployHandler(app_loader=get_app, cdn=cdn,
  266. session_expire_seconds=session_expire_seconds,
  267. session_cleanup_interval=session_cleanup_interval,
  268. allowed_origins=allowed_origins,
  269. check_origin=check_origin)
  270. class ReqHandler(tornado.web.RequestHandler):
  271. def options(self):
  272. return self.get()
  273. def post(self):
  274. return self.get()
  275. def get(self):
  276. context = TornadoHttpContext(self)
  277. self.write(handler.handle_request(context))
  278. gen.send(ReqHandler)
  279. gen.close()