datanode.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663
  1. # Copyright 2021-2024 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. from typing import List
  12. import numpy as np
  13. import pandas as pd
  14. from flask import request
  15. from flask_restful import Resource
  16. from taipy.config.config import Config
  17. from taipy.core import DataNode
  18. from taipy.core.data._data_manager_factory import _DataManagerFactory
  19. from taipy.core.data.operator import Operator
  20. from taipy.core.exceptions.exceptions import NonExistingDataNode, NonExistingDataNodeConfig
  21. from ...commons.to_from_model import _to_model
  22. from ..exceptions.exceptions import ConfigIdMissingException
  23. from ..middlewares._middleware import _middleware
  24. from ..schemas import (
  25. CSVDataNodeConfigSchema,
  26. DataNodeFilterSchema,
  27. DataNodeSchema,
  28. ExcelDataNodeConfigSchema,
  29. GenericDataNodeConfigSchema,
  30. InMemoryDataNodeConfigSchema,
  31. JSONDataNodeConfigSchema,
  32. MongoCollectionDataNodeConfigSchema,
  33. PickleDataNodeConfigSchema,
  34. SQLDataNodeConfigSchema,
  35. SQLTableDataNodeConfigSchema,
  36. )
  37. ds_schema_map = {
  38. "csv": CSVDataNodeConfigSchema,
  39. "pickle": PickleDataNodeConfigSchema,
  40. "in_memory": InMemoryDataNodeConfigSchema,
  41. "sql_table": SQLTableDataNodeConfigSchema,
  42. "sql": SQLDataNodeConfigSchema,
  43. "mongo_collection": MongoCollectionDataNodeConfigSchema,
  44. "excel": ExcelDataNodeConfigSchema,
  45. "generic": GenericDataNodeConfigSchema,
  46. "json": JSONDataNodeConfigSchema,
  47. }
  48. REPOSITORY = "data"
  49. def _get_or_raise(data_node_id: str) -> DataNode:
  50. manager = _DataManagerFactory._build_manager()
  51. data_node = manager._get(data_node_id)
  52. if not data_node:
  53. raise NonExistingDataNode(data_node_id)
  54. return data_node
  55. class DataNodeResource(Resource):
  56. """Single object resource
  57. ---
  58. get:
  59. tags:
  60. - api
  61. description: |
  62. Returns a `DataNodeSchema^` representing the unique `DataNode^` identified by the *datanode_id*
  63. given as parameter. If no data node corresponds to *datanode_id*, a `404` error is returned.
  64. !!! Example
  65. === "Curl"
  66. ```shell
  67. curl -X GET http://localhost:5000/api/v1/datanodes/DATANODE_hist_cfg_75750ed8-4e09-4e00-958d
  68. -e352ee426cc9
  69. ```
  70. In this example the REST API is served on port 5000 on localhost. We are using curl command line
  71. client.
  72. `DATANODE_hist_cfg_75750ed8-4e09-4e00-958d-e352ee426cc9` is the value of the *datanode_id* parameter. It
  73. represents the identifier of the data node we want to retrieve.
  74. In case of success here is an example of the response:
  75. ``` JSON
  76. {"datanode": {
  77. "id": "DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d",
  78. "config_id": "historical_data_set",
  79. "scope": "<Scope.SCENARIO: 2>",
  80. "storage_type": "csv",
  81. "name": "Name of my historical data node",
  82. "owner_id": "SCENARIO_my_awesome_scenario_97f3fd67-8556-4c62-9b3b-ef189a599a38",
  83. "last_edit_date": "2022-08-10T16:03:40.855082",
  84. "job_ids": [],
  85. "version": "latest",
  86. "cacheable": false,
  87. "validity_days": null,
  88. "validity_seconds": null,
  89. "edit_in_progress": false,
  90. "data_node_properties": {
  91. "path": "daily-min-temperatures.csv",
  92. "has_header": true}
  93. }}
  94. ```
  95. In case of failure here is an example of the response:
  96. ``` JSON
  97. {"message":"DataNode DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d not found"}
  98. ```
  99. === "Python"
  100. This Python example requires the 'requests' package to be installed (`pip install requests`).
  101. ```python
  102. import requests
  103. response = requests.get(
  104. "http://localhost:5000/api/v1/datanodes/DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d")
  105. print(response)
  106. print(response.json())
  107. ```
  108. `DATANODE_hist_cfg_75750ed8-4e09-4e00-958d-e352ee426cc9` is the value of the *datanode_id* parameter. It
  109. represents the identifier of the data node we want to retrieve.
  110. In case of success here is an output example:
  111. ```
  112. <Response [200]>
  113. {"datanode": {
  114. "id": "DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d",
  115. "config_id": "historical_data_set",
  116. "scope": "<Scope.SCENARIO: 2>",
  117. "storage_type": "csv",
  118. "name": "Name of my historical data node",
  119. "owner_id": "SCENARIO_my_awesome_scenario_97f3fd67-8556-4c62-9b3b-ef189a599a38",
  120. "last_edit_date": "2022-08-10T16:03:40.855082",
  121. "job_ids": [],
  122. "version": "latest",
  123. "cacheable": false,
  124. "validity_days": null,
  125. "validity_seconds": null,
  126. "edit_in_progress": false,
  127. "data_node_properties": {
  128. "path": "daily-min-temperatures.csv",
  129. "has_header": true}
  130. }}
  131. ```
  132. In case of failure here is an output example:
  133. ```
  134. <Response [404]>
  135. {"message":"DataNode DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d not found"}
  136. ```
  137. !!! Note
  138. When the authorization feature is activated (available in Taipy Enterprise edition only), this endpoint
  139. requires the `TAIPY_READER` role.
  140. parameters:
  141. - in: path
  142. name: datanode_id
  143. schema:
  144. type: string
  145. description: The identifier of the data node to retrieve.
  146. responses:
  147. 200:
  148. content:
  149. application/json:
  150. schema:
  151. type: object
  152. properties:
  153. datanode: DataNodeSchema
  154. 404:
  155. description: No data node has the *datanode_id* identifier.
  156. delete:
  157. tags:
  158. - api
  159. summary: Delete a data node.
  160. description: |
  161. Deletes the `DataNode^` identified by the *datanode_id* given as parameter. If the data node does not exist,
  162. a 404 error is returned.
  163. !!! Example
  164. === "Curl"
  165. ```shell
  166. curl -X DELETE \
  167. http://localhost:5000/api/v1/datanodes/DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d
  168. ```
  169. In this example the REST API is served on port 5000 on localhost. We are using curl command line
  170. client.
  171. `DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d` is the value of the
  172. *datanode_id* parameter. It represents the identifier of the data node we want to delete.
  173. In case of success here is an example of the response:
  174. ``` JSON
  175. {"msg": "datanode DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d deleted"}
  176. ```
  177. In case of failure here is an example of the response:
  178. ``` JSON
  179. {"message": "Data node DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d not found."}
  180. ```
  181. === "Python"
  182. This Python example requires the 'requests' package to be installed (`pip install requests`).
  183. ```python
  184. import requests
  185. response = requests.delete(
  186. "http://localhost:5000/api/v1/datanodes/DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d")
  187. print(response)
  188. print(response.json())
  189. ```
  190. `DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d` is the value of the
  191. *datanode_id* parameter. It represents the identifier of the Cycle we want to delete.
  192. In case of success here is an output example:
  193. ```
  194. <Response [200]>
  195. {"msg": "Data node DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d deleted."}
  196. ```
  197. In case of failure here is an output example:
  198. ```
  199. <Response [404]>
  200. {'message': 'Data node DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d not found.'}
  201. ```
  202. !!! Note
  203. When the authorization feature is activated (available in Taipy Enterprise edition only), this endpoint
  204. requires the `TAIPY_EDITOR` role.
  205. parameters:
  206. - in: path
  207. name: datanode_id
  208. schema:
  209. type: string
  210. description: The identifier of the data node to delete.
  211. responses:
  212. 200:
  213. content:
  214. application/json:
  215. schema:
  216. type: object
  217. properties:
  218. message:
  219. type: string
  220. description: Status message.
  221. 404:
  222. description: No data node has the *datanode_id* identifier.
  223. """
  224. def __init__(self, **kwargs):
  225. self.logger = kwargs.get("logger")
  226. @_middleware
  227. def get(self, datanode_id):
  228. schema = DataNodeSchema()
  229. datanode = _get_or_raise(datanode_id)
  230. return {"datanode": schema.dump(_to_model(REPOSITORY, datanode))}
  231. @_middleware
  232. def delete(self, datanode_id):
  233. _get_or_raise(datanode_id)
  234. manager = _DataManagerFactory._build_manager()
  235. manager._delete(datanode_id)
  236. return {"message": f"Data node {datanode_id} was deleted."}
  237. class DataNodeList(Resource):
  238. """Creation and get_all
  239. ---
  240. get:
  241. tags:
  242. - api
  243. description: |
  244. Returns a `DataNodeSchema^` list representing all existing data nodes.
  245. !!! Example
  246. === "Curl"
  247. ```shell
  248. curl -X GET http://localhost:5000/api/v1/datanodes
  249. ```
  250. In this example the REST API is served on port 5000 on localhost. We are using curl command line
  251. client.
  252. Here is an example of the response:
  253. ``` JSON
  254. [
  255. {"datanode": {
  256. "id": "DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d",
  257. "config_id": "historical_data_set",
  258. "scope": "<Scope.SCENARIO: 2>",
  259. "storage_type": "csv",
  260. "name": "Name of my historical data node",
  261. "owner_id": "SCENARIO_my_awesome_scenario_97f3fd67-8556-4c62-9b3b-ef189a599a38",
  262. "last_edit_date": "2022-08-10T16:03:40.855082",
  263. "job_ids": [],
  264. "version": "latest",
  265. "cacheable": false,
  266. "validity_days": null,
  267. "validity_seconds": null,
  268. "edit_in_progress": false,
  269. "data_node_properties": {
  270. "path": "daily-min-temperatures.csv",
  271. "has_header": true}
  272. }}
  273. ]
  274. ```
  275. If there is no data node, the response is an empty list as follows:
  276. ``` JSON
  277. []
  278. ```
  279. === "Python"
  280. This Python example requires the 'requests' package to be installed (`pip install requests`).
  281. ```python
  282. import requests
  283. response = requests.get("http://localhost:5000/api/v1/datanodes")
  284. print(response)
  285. print(response.json())
  286. ```
  287. In case of success here is an output example:
  288. ```
  289. <Response [200]>
  290. [
  291. {"datanode": {
  292. "id": "DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d",
  293. "config_id": "historical_data_set",
  294. "scope": "<Scope.SCENARIO: 2>",
  295. "storage_type": "csv",
  296. "name": "Name of my historical data node",
  297. "owner_id": "SCENARIO_my_awesome_scenario_97f3fd67-8556-4c62-9b3b-ef189a599a38",
  298. "last_edit_date": "2022-08-10T16:03:40.855082",
  299. "job_ids": [],
  300. "version": "latest",
  301. "cacheable": false,
  302. "validity_days": null,
  303. "validity_seconds": null,
  304. "edit_in_progress": false,
  305. "data_node_properties": {
  306. "path": "daily-min-temperatures.csv",
  307. "has_header": true}
  308. }}
  309. ]
  310. ```
  311. If there is no data node, the response is an empty list as follows:
  312. ```
  313. <Response [200]>
  314. []
  315. ```
  316. !!! Note
  317. When the authorization feature is activated (available in Taipy Enterprise edition only), this endpoint
  318. requires the `TAIPY_READER` role.
  319. responses:
  320. 200:
  321. content:
  322. application/json:
  323. schema:
  324. allOf:
  325. - type: object
  326. properties:
  327. results:
  328. type: array
  329. items:
  330. $ref: '#/components/schemas/DataNodeSchema'
  331. post:
  332. tags:
  333. - api
  334. description: |
  335. Creates a new data node from the *config_id* given as parameter.
  336. !!! Example
  337. === "Curl"
  338. ```shell
  339. curl -X POST http://localhost:5000/api/v1/datanodes?config_id=historical_data_set
  340. ```
  341. In this example the REST API is served on port 5000 on localhost. We are using curl command line
  342. client.
  343. In this example the *config_id* value ("historical_data_set") is given as parameter directly in the
  344. url. A corresponding `DataNodeConfig^` must exist and must have been configured before.
  345. Here is the output message example:
  346. ```
  347. {"msg": "datanode created",
  348. "datanode": {
  349. "default_path": null,
  350. "path": "daily-min-temperatures.csv",
  351. "name": null,
  352. "storage_type": "csv",
  353. "scope": 2,
  354. "has_header": true}
  355. }
  356. ```
  357. === "Python"
  358. This Python example requires the 'requests' package to be installed (`pip install requests`).
  359. ```python
  360. import requests
  361. response = requests.post("http://localhost:5000/api/v1/datanodes?config_id=historical_data_set")
  362. print(response)
  363. print(response.json())
  364. ```
  365. In this example the *config_id* value ("historical_data_set") is given as parameter directly in the
  366. url. A corresponding `DataNodeConfig^` must exist and must have been configured before.
  367. Here is the output example:
  368. ```
  369. <Response [201]>
  370. {'msg': 'datanode created',
  371. 'datanode': {
  372. 'name': None,
  373. 'scope': 2,
  374. 'path': 'daily-min-temperatures.csv',
  375. 'storage_type': 'csv',
  376. 'default_path': None,
  377. 'has_header': True}}
  378. ```
  379. !!! Note
  380. When the authorization feature is activated (available in Taipy Enterprise edition only), this endpoint
  381. requires the `TAIPY_EDITOR` role.
  382. parameters:
  383. - in: query
  384. name: config_id
  385. schema:
  386. type: string
  387. description: The identifier of the data node configuration.
  388. responses:
  389. 201:
  390. content:
  391. application/json:
  392. schema:
  393. type: object
  394. properties:
  395. message:
  396. type: string
  397. description: Status message.
  398. datanode: DataNodeSchema
  399. """
  400. def __init__(self, **kwargs):
  401. self.logger = kwargs.get("logger")
  402. def fetch_config(self, config_id):
  403. config = Config.data_nodes.get(config_id)
  404. if not config:
  405. raise NonExistingDataNodeConfig(config_id)
  406. return config
  407. @_middleware
  408. def get(self):
  409. schema = DataNodeSchema(many=True)
  410. manager = _DataManagerFactory._build_manager()
  411. datanodes = [_to_model(REPOSITORY, datanode) for datanode in manager._get_all()]
  412. return schema.dump(datanodes)
  413. @_middleware
  414. def post(self):
  415. args = request.args
  416. config_id = args.get("config_id")
  417. if not config_id:
  418. raise ConfigIdMissingException
  419. config = self.fetch_config(config_id)
  420. schema = ds_schema_map.get(config.storage_type)()
  421. manager = _DataManagerFactory._build_manager()
  422. manager._bulk_get_or_create({config})
  423. return {
  424. "message": "Data node was created.",
  425. "datanode": schema.dump(config),
  426. }, 201
  427. class DataNodeReader(Resource):
  428. """Single object resource
  429. ---
  430. get:
  431. tags:
  432. - api
  433. description: |
  434. Returns the data read from the data node identified by *datanode_id*. If the data node does not exist,
  435. a 404 error is returned.
  436. !!! Example
  437. === "Curl"
  438. ```shell
  439. curl -X GET \
  440. http://localhost:5000/api/v1/datanodes/DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d/read
  441. ```
  442. `DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d` is the *datanode_id*
  443. parameter. It represents the identifier of the data node to read.
  444. Here is an output example. In this case, the storage type of the data node to read is `csv`,
  445. and no exposed type is specified. The data is exposed as a list of dictionaries, each dictionary
  446. representing a raw of the csv file.
  447. ```
  448. {"data": [
  449. {"Date": "1981-01-01", "Temp": 20.7}, {"Date": "1981-01-02", "Temp": 17.9},
  450. {"Date": "1981-01-03", "Temp": 18.8}, {"Date": "1981-01-04", "Temp": 14.6},
  451. {"Date": "1981-01-05", "Temp": 15.8}, {"Date": "1981-01-06", "Temp": 15.8},
  452. {"Date": "1981-01-07", "Temp": 15.8}
  453. ]}
  454. ```
  455. === "Python"
  456. This Python example requires the 'requests' package to be installed (`pip install requests`).
  457. ```python
  458. import requests
  459. response = requests.get(
  460. "http://localhost:5000/api/v1/datanodes/DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d/read")
  461. print(response)
  462. print(response.json())
  463. ```
  464. `DATANODE_historical_data_set_9db1b542-2e45-44e7-8a85-03ef9ead173d` is the *datanode_id*
  465. parameter. It represents the identifier of the data node to read.
  466. Here is an output example. In this case, the storage type of the data node to read is `csv`,
  467. and no exposed type is specified. The data is exposed as a list of dictionaries, each dictionary
  468. representing a raw of the csv file.
  469. ```
  470. {"data": [
  471. {"Date": "1981-01-01", "Temp": 20.7}, {"Date": "1981-01-02", "Temp": 17.9},
  472. {"Date": "1981-01-03", "Temp": 18.8}, {"Date": "1981-01-04", "Temp": 14.6},
  473. {"Date": "1981-01-05", "Temp": 15.8}, {"Date": "1981-01-06", "Temp": 15.8},
  474. {"Date": "1981-01-07", "Temp": 15.8}
  475. ]}
  476. ```
  477. !!! Note
  478. When the authorization feature is activated (available in Taipy Enterprise edition only), this endpoint
  479. requires the `TAIPY_READER` role.
  480. parameters:
  481. - in: path
  482. name: datanode_id
  483. schema:
  484. type: string
  485. description: The id of the data node to read.
  486. requestBody:
  487. content:
  488. application/json:
  489. schema:
  490. DataNodeFilterSchema
  491. responses:
  492. 200:
  493. content:
  494. application/json:
  495. schema:
  496. type: object
  497. properties:
  498. data:
  499. type: Any
  500. description: The data read from the data node.
  501. 404:
  502. description: No data node has the *datanode_id* identifier.
  503. """
  504. def __init__(self, **kwargs):
  505. self.logger = kwargs.get("logger")
  506. def __make_operators(self, schema: DataNodeFilterSchema) -> List:
  507. return [
  508. (
  509. x.get("key"),
  510. x.get("value"),
  511. Operator(getattr(Operator, x.get("operator", "").upper())),
  512. )
  513. for x in schema.get("operators")
  514. ]
  515. @_middleware
  516. def get(self, datanode_id):
  517. schema = DataNodeFilterSchema()
  518. data = request.get_json(silent=True)
  519. data_node = _get_or_raise(datanode_id)
  520. operators = self.__make_operators(schema.load(data)) if data else []
  521. data = data_node.filter(operators)
  522. if isinstance(data, pd.DataFrame):
  523. data = data.to_dict(orient="records")
  524. elif isinstance(data, np.ndarray):
  525. data = list(data)
  526. return {"data": data}
  527. class DataNodeWriter(Resource):
  528. """Single object resource
  529. ---
  530. put:
  531. tags:
  532. - api
  533. summary: Write into a data node.
  534. description: |
  535. Write data from request body into a data node by *datanode_id*. If the data node does not exist, a 404 error is
  536. returned.
  537. !!! Note
  538. When the authorization feature is activated (available in the **Enterprise** edition only), this endpoint
  539. requires `TAIPY_EDITOR` role.
  540. Code example:
  541. ```shell
  542. curl -X PUT -d '[{"path": "/abc", "type": 1}, {"path": "/def", "type": 2}]' \\
  543. -H 'Content-Type: application/json' \\
  544. http://localhost:5000/api/v1/datanodes/DATANODE_my_config_75750ed8-4e09-4e00-958d-e352ee426cc9/write
  545. ```
  546. parameters:
  547. - in: path
  548. name: datanode_id
  549. schema:
  550. type: string
  551. requestBody:
  552. content:
  553. application/json:
  554. schema:
  555. Any
  556. responses:
  557. 200:
  558. content:
  559. application/json:
  560. schema:
  561. type: object
  562. properties:
  563. message:
  564. type: string
  565. description: Status message.
  566. 404:
  567. description: No data node has the *datanode_id* identifier.
  568. """
  569. def __init__(self, **kwargs):
  570. self.logger = kwargs.get("logger")
  571. @_middleware
  572. def put(self, datanode_id):
  573. data = request.json
  574. data_node = _get_or_raise(datanode_id)
  575. data_node.write(data)
  576. return {"message": f"Data node {datanode_id} was successfully written."}