test_data_node_config.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. # Copyright 2023 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import datetime
  12. import os
  13. from unittest import mock
  14. import pytest
  15. from src.taipy.core import MongoDefaultDocument
  16. from src.taipy.core._orchestrator._orchestrator_factory import _OrchestratorFactory
  17. from src.taipy.core.config import DataNodeConfig
  18. from src.taipy.core.config.job_config import JobConfig
  19. from taipy.config.common.scope import Scope
  20. from taipy.config.config import Config
  21. from taipy.config.exceptions.exceptions import ConfigurationUpdateBlocked
  22. def test_data_node_config_default_parameter():
  23. csv_dn_cfg = Config.configure_data_node("data_node_1", "csv")
  24. assert csv_dn_cfg.scope == Scope.SCENARIO
  25. assert csv_dn_cfg.has_header is True
  26. assert csv_dn_cfg.exposed_type == "pandas"
  27. assert csv_dn_cfg.validity_period is None
  28. json_dn_cfg = Config.configure_data_node("data_node_2", "json")
  29. assert json_dn_cfg.scope == Scope.SCENARIO
  30. assert json_dn_cfg.validity_period is None
  31. parquet_dn_cfg = Config.configure_data_node("data_node_3", "parquet")
  32. assert parquet_dn_cfg.scope == Scope.SCENARIO
  33. assert parquet_dn_cfg.engine == "pyarrow"
  34. assert parquet_dn_cfg.compression == "snappy"
  35. assert parquet_dn_cfg.exposed_type == "pandas"
  36. assert parquet_dn_cfg.validity_period is None
  37. excel_dn_cfg = Config.configure_data_node("data_node_4", "excel")
  38. assert excel_dn_cfg.scope == Scope.SCENARIO
  39. assert excel_dn_cfg.has_header is True
  40. assert excel_dn_cfg.exposed_type == "pandas"
  41. assert excel_dn_cfg.validity_period is None
  42. generic_dn_cfg = Config.configure_data_node("data_node_5", "generic")
  43. assert generic_dn_cfg.scope == Scope.SCENARIO
  44. assert generic_dn_cfg.validity_period is None
  45. in_memory_dn_cfg = Config.configure_data_node("data_node_6", "in_memory")
  46. assert in_memory_dn_cfg.scope == Scope.SCENARIO
  47. assert in_memory_dn_cfg.validity_period is None
  48. pickle_dn_cfg = Config.configure_data_node("data_node_7", "pickle")
  49. assert pickle_dn_cfg.scope == Scope.SCENARIO
  50. assert pickle_dn_cfg.validity_period is None
  51. sql_table_dn_cfg = Config.configure_data_node(
  52. "data_node_8", "sql_table", db_name="test", db_engine="mssql", table_name="test"
  53. )
  54. assert sql_table_dn_cfg.scope == Scope.SCENARIO
  55. assert sql_table_dn_cfg.db_host == "localhost"
  56. assert sql_table_dn_cfg.db_port == 1433
  57. assert sql_table_dn_cfg.db_driver == ""
  58. assert sql_table_dn_cfg.sqlite_file_extension == ".db"
  59. assert sql_table_dn_cfg.exposed_type == "pandas"
  60. assert sql_table_dn_cfg.validity_period is None
  61. sql_dn_cfg = Config.configure_data_node(
  62. "data_node_9", "sql", db_name="test", db_engine="mssql", read_query="test", write_query_builder=print
  63. )
  64. assert sql_dn_cfg.scope == Scope.SCENARIO
  65. assert sql_dn_cfg.db_host == "localhost"
  66. assert sql_dn_cfg.db_port == 1433
  67. assert sql_dn_cfg.db_driver == ""
  68. assert sql_dn_cfg.sqlite_file_extension == ".db"
  69. assert sql_dn_cfg.exposed_type == "pandas"
  70. assert sql_dn_cfg.validity_period is None
  71. mongo_dn_cfg = Config.configure_data_node(
  72. "data_node_10", "mongo_collection", db_name="test", collection_name="test"
  73. )
  74. assert mongo_dn_cfg.scope == Scope.SCENARIO
  75. assert mongo_dn_cfg.db_host == "localhost"
  76. assert mongo_dn_cfg.db_port == 27017
  77. assert mongo_dn_cfg.custom_document == MongoDefaultDocument
  78. assert mongo_dn_cfg.db_username == ""
  79. assert mongo_dn_cfg.db_password == ""
  80. assert mongo_dn_cfg.db_driver == ""
  81. assert mongo_dn_cfg.validity_period is None
  82. def test_data_node_config_check(caplog):
  83. data_node_config = Config.configure_data_node("data_nodes1", "pickle")
  84. assert list(Config.data_nodes) == [DataNodeConfig._DEFAULT_KEY, data_node_config.id]
  85. data_node2_config = Config.configure_data_node("data_nodes2", "pickle")
  86. assert list(Config.data_nodes) == [DataNodeConfig._DEFAULT_KEY, data_node_config.id, data_node2_config.id]
  87. data_node3_config = Config.configure_data_node("data_nodes3", "csv", has_header=True, default_path="")
  88. assert list(Config.data_nodes) == [
  89. "default",
  90. data_node_config.id,
  91. data_node2_config.id,
  92. data_node3_config.id,
  93. ]
  94. with pytest.raises(SystemExit):
  95. Config.configure_data_node("data_nodes", storage_type="bar")
  96. Config.check()
  97. expected_error_message = (
  98. "`storage_type` field of DataNodeConfig `data_nodes` must be either csv, sql_table,"
  99. " sql, mongo_collection, pickle, excel, generic, json, parquet, or in_memory. Current"
  100. ' value of property `storage_type` is "bar".'
  101. )
  102. assert expected_error_message in caplog.text
  103. with pytest.raises(SystemExit):
  104. Config.configure_data_node("data_nodes", scope="bar")
  105. Config.check()
  106. expected_error_message = (
  107. "`scope` field of DataNodeConfig `data_nodes` must be populated with a Scope value."
  108. ' Current value of property `scope` is "bar".'
  109. )
  110. assert expected_error_message in caplog.text
  111. with pytest.raises(TypeError):
  112. Config.configure_data_node("data_nodes", storage_type="sql")
  113. with pytest.raises(SystemExit):
  114. Config.configure_data_node("data_nodes", storage_type="generic")
  115. Config.check()
  116. expected_error_message = (
  117. "`storage_type` field of DataNodeConfig `data_nodes` must be either csv, sql_table,"
  118. " sql, mongo_collection, pickle, excel, generic, json, parquet, or in_memory."
  119. ' Current value of property `storage_type` is "bar".'
  120. )
  121. assert expected_error_message in caplog.text
  122. def test_configure_data_node_from_another_configuration():
  123. d1_cfg = Config.configure_sql_table_data_node(
  124. "d1",
  125. db_username="foo",
  126. db_password="bar",
  127. db_name="db",
  128. db_engine="mssql",
  129. db_port=8080,
  130. db_host="somewhere",
  131. table_name="foo",
  132. scope=Scope.GLOBAL,
  133. foo="bar",
  134. )
  135. d2_cfg = Config.configure_data_node_from(
  136. source_configuration=d1_cfg,
  137. id="d2",
  138. table_name="table_2",
  139. )
  140. assert d2_cfg.id == "d2"
  141. assert d2_cfg.storage_type == "sql_table"
  142. assert d2_cfg.scope == Scope.GLOBAL
  143. assert d2_cfg.validity_period is None
  144. assert d2_cfg.db_username == "foo"
  145. assert d2_cfg.db_password == "bar"
  146. assert d2_cfg.db_name == "db"
  147. assert d2_cfg.db_engine == "mssql"
  148. assert d2_cfg.db_port == 8080
  149. assert d2_cfg.db_host == "somewhere"
  150. assert d2_cfg.table_name == "table_2"
  151. assert d2_cfg.foo == "bar"
  152. d3_cfg = Config.configure_data_node_from(
  153. source_configuration=d1_cfg,
  154. id="d3",
  155. scope=Scope.SCENARIO,
  156. validity_period=datetime.timedelta(days=1),
  157. table_name="table_3",
  158. foo="baz",
  159. )
  160. assert d3_cfg.id == "d3"
  161. assert d3_cfg.storage_type == "sql_table"
  162. assert d3_cfg.scope == Scope.SCENARIO
  163. assert d3_cfg.validity_period == datetime.timedelta(days=1)
  164. assert d3_cfg.db_username == "foo"
  165. assert d3_cfg.db_password == "bar"
  166. assert d3_cfg.db_name == "db"
  167. assert d3_cfg.db_engine == "mssql"
  168. assert d3_cfg.db_port == 8080
  169. assert d3_cfg.db_host == "somewhere"
  170. assert d3_cfg.table_name == "table_3"
  171. assert d3_cfg.foo == "baz"
  172. def test_data_node_count():
  173. Config.configure_data_node("data_nodes1", "pickle")
  174. assert len(Config.data_nodes) == 2
  175. Config.configure_data_node("data_nodes2", "pickle")
  176. assert len(Config.data_nodes) == 3
  177. Config.configure_data_node("data_nodes3", "pickle")
  178. assert len(Config.data_nodes) == 4
  179. def test_data_node_getitem():
  180. data_node_id = "data_nodes1"
  181. data_node_config = Config.configure_data_node(data_node_id, "pickle", default_path="foo.p")
  182. assert Config.data_nodes[data_node_id].id == data_node_config.id
  183. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  184. assert Config.data_nodes[data_node_id].storage_type == data_node_config.storage_type
  185. assert Config.data_nodes[data_node_id].scope == data_node_config.scope
  186. assert Config.data_nodes[data_node_id].properties == data_node_config.properties
  187. assert Config.data_nodes[data_node_id].cacheable == data_node_config.cacheable
  188. def test_data_node_creation_no_duplication():
  189. Config.configure_data_node("data_nodes1", "pickle")
  190. assert len(Config.data_nodes) == 2
  191. Config.configure_data_node("data_nodes1", "pickle")
  192. assert len(Config.data_nodes) == 2
  193. def test_date_node_create_with_datetime():
  194. data_node_config = Config.configure_data_node(
  195. id="datetime_data",
  196. my_property=datetime.datetime(1991, 1, 1),
  197. foo="hello",
  198. test=1,
  199. test_dict={"type": "Datetime", 2: "daw"},
  200. )
  201. assert data_node_config.foo == "hello"
  202. assert data_node_config.my_property == datetime.datetime(1991, 1, 1)
  203. assert data_node_config.test == 1
  204. assert data_node_config.test_dict.get("type") == "Datetime"
  205. def test_data_node_with_env_variable_value():
  206. with mock.patch.dict(os.environ, {"FOO": "pickle", "BAR": "baz"}):
  207. Config.configure_data_node("data_node", storage_type="ENV[FOO]", prop="ENV[BAR]")
  208. assert Config.data_nodes["data_node"].prop == "baz"
  209. assert Config.data_nodes["data_node"].properties["prop"] == "baz"
  210. assert Config.data_nodes["data_node"]._properties["prop"] == "ENV[BAR]"
  211. assert Config.data_nodes["data_node"].storage_type == "pickle"
  212. assert Config.data_nodes["data_node"]._storage_type == "ENV[FOO]"
  213. def test_data_node_with_env_variable_in_write_fct_args():
  214. def read_fct():
  215. ...
  216. def write_fct():
  217. ...
  218. with mock.patch.dict(os.environ, {"FOO": "bar", "BAZ": "qux"}):
  219. Config.configure_data_node(
  220. "data_node",
  221. storage_type="generic",
  222. read_fct=read_fct,
  223. write_fct=write_fct,
  224. write_fct_args=["ENV[FOO]", "my_param", "ENV[BAZ]"],
  225. )
  226. assert Config.data_nodes["data_node"].write_fct_args == ["bar", "my_param", "qux"]
  227. def test_data_node_with_env_variable_in_read_fct_args():
  228. def read_fct():
  229. ...
  230. def write_fct():
  231. ...
  232. with mock.patch.dict(os.environ, {"FOO": "bar", "BAZ": "qux"}):
  233. Config.configure_data_node(
  234. "data_node",
  235. storage_type="generic",
  236. read_fct=read_fct,
  237. write_fct=write_fct,
  238. read_fct_args=["ENV[FOO]", "my_param", "ENV[BAZ]"],
  239. )
  240. assert Config.data_nodes["data_node"].read_fct_args == ["bar", "my_param", "qux"]
  241. def test_block_datanode_config_update_in_development_mode():
  242. data_node_id = "data_node_id"
  243. Config.configure_job_executions(mode=JobConfig._DEVELOPMENT_MODE)
  244. data_node_config = Config.configure_data_node(
  245. id=data_node_id,
  246. storage_type="pickle",
  247. default_path="foo.p",
  248. scope=Scope.SCENARIO,
  249. )
  250. assert Config.data_nodes[data_node_id].id == data_node_id
  251. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  252. assert Config.data_nodes[data_node_id].storage_type == "pickle"
  253. assert Config.data_nodes[data_node_id].scope == Scope.SCENARIO
  254. assert Config.data_nodes[data_node_id].properties == {"default_path": "foo.p"}
  255. _OrchestratorFactory._build_dispatcher()
  256. with pytest.raises(ConfigurationUpdateBlocked):
  257. data_node_config.storage_type = "foo"
  258. with pytest.raises(ConfigurationUpdateBlocked):
  259. data_node_config.scope = Scope.SCENARIO
  260. with pytest.raises(ConfigurationUpdateBlocked):
  261. data_node_config.cacheable = True
  262. with pytest.raises(ConfigurationUpdateBlocked):
  263. data_node_config.properties = {"foo": "bar"}
  264. assert Config.data_nodes[data_node_id].id == data_node_id
  265. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  266. assert Config.data_nodes[data_node_id].storage_type == "pickle"
  267. assert Config.data_nodes[data_node_id].scope == Scope.SCENARIO
  268. assert Config.data_nodes[data_node_id].properties == {"default_path": "foo.p"}
  269. def test_block_datanode_config_update_in_standalone_mode():
  270. data_node_id = "data_node_id"
  271. Config.configure_job_executions(mode=JobConfig._STANDALONE_MODE)
  272. data_node_config = Config.configure_data_node(
  273. id=data_node_id,
  274. storage_type="pickle",
  275. default_path="foo.p",
  276. scope=Scope.SCENARIO,
  277. )
  278. assert Config.data_nodes[data_node_id].id == data_node_id
  279. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  280. assert Config.data_nodes[data_node_id].storage_type == "pickle"
  281. assert Config.data_nodes[data_node_id].scope == Scope.SCENARIO
  282. assert Config.data_nodes[data_node_id].properties == {"default_path": "foo.p"}
  283. _OrchestratorFactory._build_dispatcher()
  284. with pytest.raises(ConfigurationUpdateBlocked):
  285. data_node_config.storage_type = "foo"
  286. with pytest.raises(ConfigurationUpdateBlocked):
  287. data_node_config.scope = Scope.SCENARIO
  288. with pytest.raises(ConfigurationUpdateBlocked):
  289. data_node_config.cacheable = True
  290. with pytest.raises(ConfigurationUpdateBlocked):
  291. data_node_config.properties = {"foo": "bar"}
  292. assert Config.data_nodes[data_node_id].id == data_node_id
  293. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  294. assert Config.data_nodes[data_node_id].storage_type == "pickle"
  295. assert Config.data_nodes[data_node_id].scope == Scope.SCENARIO
  296. assert Config.data_nodes[data_node_id].properties == {"default_path": "foo.p"}
  297. def test_clean_config():
  298. dn1_config = Config.configure_data_node(
  299. id="id1",
  300. storage_type="csv",
  301. default_path="foo.p",
  302. scope=Scope.GLOBAL,
  303. validity_period=datetime.timedelta(2),
  304. )
  305. dn2_config = Config.configure_data_node(
  306. id="id2",
  307. storage_type="json",
  308. default_path="bar.json",
  309. scope=Scope.GLOBAL,
  310. validity_period=datetime.timedelta(2),
  311. )
  312. assert Config.data_nodes["id1"] is dn1_config
  313. assert Config.data_nodes["id2"] is dn2_config
  314. dn1_config._clean()
  315. dn2_config._clean()
  316. # Check if the instance before and after _clean() is the same
  317. assert Config.data_nodes["id1"] is dn1_config
  318. assert Config.data_nodes["id2"] is dn2_config
  319. # Check if the value is similar to the default_config, but with difference instances
  320. assert dn1_config.id == "id1"
  321. assert dn2_config.id == "id2"
  322. assert dn1_config.storage_type == dn2_config.storage_type == "pickle"
  323. assert dn1_config.scope == dn2_config.scope == Scope.SCENARIO
  324. assert dn1_config.validity_period is dn2_config.validity_period is None
  325. assert dn1_config.default_path is dn2_config.default_path is None
  326. assert dn1_config.properties == dn2_config.properties == {}
  327. def test_deprecated_cacheable_attribute_remains_compatible():
  328. dn_1_id = "dn_1_id"
  329. dn_1_config = Config.configure_data_node(
  330. id=dn_1_id,
  331. storage_type="pickle",
  332. cacheable=False,
  333. scope=Scope.SCENARIO,
  334. )
  335. assert Config.data_nodes[dn_1_id].id == dn_1_id
  336. assert Config.data_nodes[dn_1_id].storage_type == "pickle"
  337. assert Config.data_nodes[dn_1_id].scope == Scope.SCENARIO
  338. assert Config.data_nodes[dn_1_id].properties == {"cacheable": False}
  339. assert not Config.data_nodes[dn_1_id].cacheable
  340. dn_1_config.cacheable = True
  341. assert Config.data_nodes[dn_1_id].properties == {"cacheable": True}
  342. assert Config.data_nodes[dn_1_id].cacheable
  343. dn_2_id = "dn_2_id"
  344. dn_2_config = Config.configure_data_node(
  345. id=dn_2_id,
  346. storage_type="pickle",
  347. cacheable=True,
  348. scope=Scope.SCENARIO,
  349. )
  350. assert Config.data_nodes[dn_2_id].id == dn_2_id
  351. assert Config.data_nodes[dn_2_id].storage_type == "pickle"
  352. assert Config.data_nodes[dn_2_id].scope == Scope.SCENARIO
  353. assert Config.data_nodes[dn_2_id].properties == {"cacheable": True}
  354. assert Config.data_nodes[dn_2_id].cacheable
  355. dn_2_config.cacheable = False
  356. assert Config.data_nodes[dn_1_id].properties == {"cacheable": False}
  357. assert not Config.data_nodes[dn_1_id].cacheable
  358. dn_3_id = "dn_3_id"
  359. dn_3_config = Config.configure_data_node(
  360. id=dn_3_id,
  361. storage_type="pickle",
  362. scope=Scope.SCENARIO,
  363. )
  364. assert Config.data_nodes[dn_3_id].id == dn_3_id
  365. assert Config.data_nodes[dn_3_id].storage_type == "pickle"
  366. assert Config.data_nodes[dn_3_id].scope == Scope.SCENARIO
  367. assert Config.data_nodes[dn_3_id].properties == {}
  368. assert not Config.data_nodes[dn_3_id].cacheable
  369. dn_3_config.cacheable = True
  370. assert Config.data_nodes[dn_3_id].properties == {"cacheable": True}
  371. assert Config.data_nodes[dn_3_id].cacheable