test_data_node_config.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. # Copyright 2023 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import datetime
  12. import os
  13. from unittest import mock
  14. import pytest
  15. from taipy.config.common.scope import Scope
  16. from taipy.config.config import Config
  17. from taipy.config.exceptions.exceptions import ConfigurationUpdateBlocked
  18. from taipy.core import MongoDefaultDocument
  19. from taipy.core._orchestrator._orchestrator_factory import _OrchestratorFactory
  20. from taipy.core.config import DataNodeConfig
  21. from taipy.core.config.job_config import JobConfig
  22. def test_data_node_config_default_parameter():
  23. csv_dn_cfg = Config.configure_data_node("data_node_1", "csv")
  24. assert csv_dn_cfg.scope == Scope.SCENARIO
  25. assert csv_dn_cfg.has_header is True
  26. assert csv_dn_cfg.exposed_type == "pandas"
  27. assert csv_dn_cfg.validity_period is None
  28. json_dn_cfg = Config.configure_data_node("data_node_2", "json")
  29. assert json_dn_cfg.scope == Scope.SCENARIO
  30. assert json_dn_cfg.validity_period is None
  31. parquet_dn_cfg = Config.configure_data_node("data_node_3", "parquet")
  32. assert parquet_dn_cfg.scope == Scope.SCENARIO
  33. assert parquet_dn_cfg.engine == "pyarrow"
  34. assert parquet_dn_cfg.compression == "snappy"
  35. assert parquet_dn_cfg.exposed_type == "pandas"
  36. assert parquet_dn_cfg.validity_period is None
  37. excel_dn_cfg = Config.configure_data_node("data_node_4", "excel")
  38. assert excel_dn_cfg.scope == Scope.SCENARIO
  39. assert excel_dn_cfg.has_header is True
  40. assert excel_dn_cfg.exposed_type == "pandas"
  41. assert excel_dn_cfg.validity_period is None
  42. generic_dn_cfg = Config.configure_data_node("data_node_5", "generic")
  43. assert generic_dn_cfg.scope == Scope.SCENARIO
  44. assert generic_dn_cfg.validity_period is None
  45. in_memory_dn_cfg = Config.configure_data_node("data_node_6", "in_memory")
  46. assert in_memory_dn_cfg.scope == Scope.SCENARIO
  47. assert in_memory_dn_cfg.validity_period is None
  48. pickle_dn_cfg = Config.configure_data_node("data_node_7", "pickle")
  49. assert pickle_dn_cfg.scope == Scope.SCENARIO
  50. assert pickle_dn_cfg.validity_period is None
  51. sql_table_dn_cfg = Config.configure_data_node(
  52. "data_node_8", "sql_table", db_name="test", db_engine="mssql", table_name="test"
  53. )
  54. assert sql_table_dn_cfg.scope == Scope.SCENARIO
  55. assert sql_table_dn_cfg.db_host == "localhost"
  56. assert sql_table_dn_cfg.db_port == 1433
  57. assert sql_table_dn_cfg.db_driver == ""
  58. assert sql_table_dn_cfg.sqlite_file_extension == ".db"
  59. assert sql_table_dn_cfg.exposed_type == "pandas"
  60. assert sql_table_dn_cfg.validity_period is None
  61. sql_dn_cfg = Config.configure_data_node(
  62. "data_node_9", "sql", db_name="test", db_engine="mssql", read_query="test", write_query_builder=print
  63. )
  64. assert sql_dn_cfg.scope == Scope.SCENARIO
  65. assert sql_dn_cfg.db_host == "localhost"
  66. assert sql_dn_cfg.db_port == 1433
  67. assert sql_dn_cfg.db_driver == ""
  68. assert sql_dn_cfg.sqlite_file_extension == ".db"
  69. assert sql_dn_cfg.exposed_type == "pandas"
  70. assert sql_dn_cfg.validity_period is None
  71. mongo_dn_cfg = Config.configure_data_node(
  72. "data_node_10", "mongo_collection", db_name="test", collection_name="test"
  73. )
  74. assert mongo_dn_cfg.scope == Scope.SCENARIO
  75. assert mongo_dn_cfg.db_host == "localhost"
  76. assert mongo_dn_cfg.db_port == 27017
  77. assert mongo_dn_cfg.custom_document == MongoDefaultDocument
  78. assert mongo_dn_cfg.db_username == ""
  79. assert mongo_dn_cfg.db_password == ""
  80. assert mongo_dn_cfg.db_driver == ""
  81. assert mongo_dn_cfg.validity_period is None
  82. aws_s3_object_dn_cfg = Config.configure_data_node(
  83. "data_node_11", "s3_object", aws_access_key="test", aws_secret_access_key="test_secret",
  84. aws_s3_bucket_name="test_bucket", aws_s3_object_key="test_file.txt"
  85. )
  86. assert aws_s3_object_dn_cfg.scope == Scope.SCENARIO
  87. assert aws_s3_object_dn_cfg.aws_access_key == "test"
  88. assert aws_s3_object_dn_cfg.aws_secret_access_key == "test_secret"
  89. assert aws_s3_object_dn_cfg.aws_s3_bucket_name == "test_bucket"
  90. assert aws_s3_object_dn_cfg.aws_s3_object_key == "test_file.txt"
  91. assert aws_s3_object_dn_cfg.aws_region is None
  92. assert aws_s3_object_dn_cfg.aws_s3_object_parameters is None
  93. assert aws_s3_object_dn_cfg.validity_period is None
  94. def test_data_node_config_check(caplog):
  95. data_node_config = Config.configure_data_node("data_nodes1", "pickle")
  96. assert list(Config.data_nodes) == [DataNodeConfig._DEFAULT_KEY, data_node_config.id]
  97. data_node2_config = Config.configure_data_node("data_nodes2", "pickle")
  98. assert list(Config.data_nodes) == [DataNodeConfig._DEFAULT_KEY, data_node_config.id, data_node2_config.id]
  99. data_node3_config = Config.configure_data_node("data_nodes3", "csv", has_header=True, default_path="")
  100. assert list(Config.data_nodes) == [
  101. "default",
  102. data_node_config.id,
  103. data_node2_config.id,
  104. data_node3_config.id,
  105. ]
  106. with pytest.raises(SystemExit):
  107. Config.configure_data_node("data_nodes", storage_type="bar")
  108. Config.check()
  109. expected_error_message = (
  110. "`storage_type` field of DataNodeConfig `data_nodes` must be either csv, sql_table,"
  111. " sql, mongo_collection, pickle, excel, generic, json, parquet, s3_object, or in_memory. Current"
  112. ' value of property `storage_type` is "bar".'
  113. )
  114. assert expected_error_message in caplog.text
  115. with pytest.raises(SystemExit):
  116. Config.configure_data_node("data_nodes", scope="bar")
  117. Config.check()
  118. expected_error_message = (
  119. "`scope` field of DataNodeConfig `data_nodes` must be populated with a Scope value."
  120. ' Current value of property `scope` is "bar".'
  121. )
  122. assert expected_error_message in caplog.text
  123. with pytest.raises(TypeError):
  124. Config.configure_data_node("data_nodes", storage_type="sql")
  125. with pytest.raises(SystemExit):
  126. Config.configure_data_node("data_nodes", storage_type="generic")
  127. Config.check()
  128. expected_error_message = (
  129. "`storage_type` field of DataNodeConfig `data_nodes` must be either csv, sql_table,"
  130. " sql, mongo_collection, pickle, excel, generic, json, parquet, s3_object, or in_memory."
  131. ' Current value of property `storage_type` is "bar".'
  132. )
  133. assert expected_error_message in caplog.text
  134. def test_configure_data_node_from_another_configuration():
  135. d1_cfg = Config.configure_sql_table_data_node(
  136. "d1",
  137. db_username="foo",
  138. db_password="bar",
  139. db_name="db",
  140. db_engine="mssql",
  141. db_port=8080,
  142. db_host="somewhere",
  143. table_name="foo",
  144. scope=Scope.GLOBAL,
  145. foo="bar",
  146. )
  147. d2_cfg = Config.configure_data_node_from(
  148. source_configuration=d1_cfg,
  149. id="d2",
  150. table_name="table_2",
  151. )
  152. assert d2_cfg.id == "d2"
  153. assert d2_cfg.storage_type == "sql_table"
  154. assert d2_cfg.scope == Scope.GLOBAL
  155. assert d2_cfg.validity_period is None
  156. assert d2_cfg.db_username == "foo"
  157. assert d2_cfg.db_password == "bar"
  158. assert d2_cfg.db_name == "db"
  159. assert d2_cfg.db_engine == "mssql"
  160. assert d2_cfg.db_port == 8080
  161. assert d2_cfg.db_host == "somewhere"
  162. assert d2_cfg.table_name == "table_2"
  163. assert d2_cfg.foo == "bar"
  164. d3_cfg = Config.configure_data_node_from(
  165. source_configuration=d1_cfg,
  166. id="d3",
  167. scope=Scope.SCENARIO,
  168. validity_period=datetime.timedelta(days=1),
  169. table_name="table_3",
  170. foo="baz",
  171. )
  172. assert d3_cfg.id == "d3"
  173. assert d3_cfg.storage_type == "sql_table"
  174. assert d3_cfg.scope == Scope.SCENARIO
  175. assert d3_cfg.validity_period == datetime.timedelta(days=1)
  176. assert d3_cfg.db_username == "foo"
  177. assert d3_cfg.db_password == "bar"
  178. assert d3_cfg.db_name == "db"
  179. assert d3_cfg.db_engine == "mssql"
  180. assert d3_cfg.db_port == 8080
  181. assert d3_cfg.db_host == "somewhere"
  182. assert d3_cfg.table_name == "table_3"
  183. assert d3_cfg.foo == "baz"
  184. def test_data_node_count():
  185. Config.configure_data_node("data_nodes1", "pickle")
  186. assert len(Config.data_nodes) == 2
  187. Config.configure_data_node("data_nodes2", "pickle")
  188. assert len(Config.data_nodes) == 3
  189. Config.configure_data_node("data_nodes3", "pickle")
  190. assert len(Config.data_nodes) == 4
  191. def test_data_node_getitem():
  192. data_node_id = "data_nodes1"
  193. data_node_config = Config.configure_data_node(data_node_id, "pickle", default_path="foo.p")
  194. assert Config.data_nodes[data_node_id].id == data_node_config.id
  195. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  196. assert Config.data_nodes[data_node_id].storage_type == data_node_config.storage_type
  197. assert Config.data_nodes[data_node_id].scope == data_node_config.scope
  198. assert Config.data_nodes[data_node_id].properties == data_node_config.properties
  199. assert Config.data_nodes[data_node_id].cacheable == data_node_config.cacheable
  200. def test_data_node_creation_no_duplication():
  201. Config.configure_data_node("data_nodes1", "pickle")
  202. assert len(Config.data_nodes) == 2
  203. Config.configure_data_node("data_nodes1", "pickle")
  204. assert len(Config.data_nodes) == 2
  205. def test_date_node_create_with_datetime():
  206. data_node_config = Config.configure_data_node(
  207. id="datetime_data",
  208. my_property=datetime.datetime(1991, 1, 1),
  209. foo="hello",
  210. test=1,
  211. test_dict={"type": "Datetime", 2: "daw"},
  212. )
  213. assert data_node_config.foo == "hello"
  214. assert data_node_config.my_property == datetime.datetime(1991, 1, 1)
  215. assert data_node_config.test == 1
  216. assert data_node_config.test_dict.get("type") == "Datetime"
  217. def test_data_node_with_env_variable_value():
  218. with mock.patch.dict(os.environ, {"FOO": "pickle", "BAR": "baz"}):
  219. Config.configure_data_node("data_node", storage_type="ENV[FOO]", prop="ENV[BAR]")
  220. assert Config.data_nodes["data_node"].prop == "baz"
  221. assert Config.data_nodes["data_node"].properties["prop"] == "baz"
  222. assert Config.data_nodes["data_node"]._properties["prop"] == "ENV[BAR]"
  223. assert Config.data_nodes["data_node"].storage_type == "pickle"
  224. assert Config.data_nodes["data_node"]._storage_type == "ENV[FOO]"
  225. def test_data_node_with_env_variable_in_write_fct_args():
  226. def read_fct():
  227. ...
  228. def write_fct():
  229. ...
  230. with mock.patch.dict(os.environ, {"FOO": "bar", "BAZ": "qux"}):
  231. Config.configure_data_node(
  232. "data_node",
  233. storage_type="generic",
  234. read_fct=read_fct,
  235. write_fct=write_fct,
  236. write_fct_args=["ENV[FOO]", "my_param", "ENV[BAZ]"],
  237. )
  238. assert Config.data_nodes["data_node"].write_fct_args == ["bar", "my_param", "qux"]
  239. def test_data_node_with_env_variable_in_read_fct_args():
  240. def read_fct():
  241. ...
  242. def write_fct():
  243. ...
  244. with mock.patch.dict(os.environ, {"FOO": "bar", "BAZ": "qux"}):
  245. Config.configure_data_node(
  246. "data_node",
  247. storage_type="generic",
  248. read_fct=read_fct,
  249. write_fct=write_fct,
  250. read_fct_args=["ENV[FOO]", "my_param", "ENV[BAZ]"],
  251. )
  252. assert Config.data_nodes["data_node"].read_fct_args == ["bar", "my_param", "qux"]
  253. def test_block_datanode_config_update_in_development_mode():
  254. data_node_id = "data_node_id"
  255. Config.configure_job_executions(mode=JobConfig._DEVELOPMENT_MODE)
  256. data_node_config = Config.configure_data_node(
  257. id=data_node_id,
  258. storage_type="pickle",
  259. default_path="foo.p",
  260. scope=Scope.SCENARIO,
  261. )
  262. assert Config.data_nodes[data_node_id].id == data_node_id
  263. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  264. assert Config.data_nodes[data_node_id].storage_type == "pickle"
  265. assert Config.data_nodes[data_node_id].scope == Scope.SCENARIO
  266. assert Config.data_nodes[data_node_id].properties == {"default_path": "foo.p"}
  267. _OrchestratorFactory._build_dispatcher()
  268. with pytest.raises(ConfigurationUpdateBlocked):
  269. data_node_config.storage_type = "foo"
  270. with pytest.raises(ConfigurationUpdateBlocked):
  271. data_node_config.scope = Scope.SCENARIO
  272. with pytest.raises(ConfigurationUpdateBlocked):
  273. data_node_config.cacheable = True
  274. with pytest.raises(ConfigurationUpdateBlocked):
  275. data_node_config.properties = {"foo": "bar"}
  276. assert Config.data_nodes[data_node_id].id == data_node_id
  277. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  278. assert Config.data_nodes[data_node_id].storage_type == "pickle"
  279. assert Config.data_nodes[data_node_id].scope == Scope.SCENARIO
  280. assert Config.data_nodes[data_node_id].properties == {"default_path": "foo.p"}
  281. def test_block_datanode_config_update_in_standalone_mode():
  282. data_node_id = "data_node_id"
  283. Config.configure_job_executions(mode=JobConfig._STANDALONE_MODE)
  284. data_node_config = Config.configure_data_node(
  285. id=data_node_id,
  286. storage_type="pickle",
  287. default_path="foo.p",
  288. scope=Scope.SCENARIO,
  289. )
  290. assert Config.data_nodes[data_node_id].id == data_node_id
  291. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  292. assert Config.data_nodes[data_node_id].storage_type == "pickle"
  293. assert Config.data_nodes[data_node_id].scope == Scope.SCENARIO
  294. assert Config.data_nodes[data_node_id].properties == {"default_path": "foo.p"}
  295. _OrchestratorFactory._build_dispatcher()
  296. with pytest.raises(ConfigurationUpdateBlocked):
  297. data_node_config.storage_type = "foo"
  298. with pytest.raises(ConfigurationUpdateBlocked):
  299. data_node_config.scope = Scope.SCENARIO
  300. with pytest.raises(ConfigurationUpdateBlocked):
  301. data_node_config.cacheable = True
  302. with pytest.raises(ConfigurationUpdateBlocked):
  303. data_node_config.properties = {"foo": "bar"}
  304. assert Config.data_nodes[data_node_id].id == data_node_id
  305. assert Config.data_nodes[data_node_id].default_path == "foo.p"
  306. assert Config.data_nodes[data_node_id].storage_type == "pickle"
  307. assert Config.data_nodes[data_node_id].scope == Scope.SCENARIO
  308. assert Config.data_nodes[data_node_id].properties == {"default_path": "foo.p"}
  309. def test_clean_config():
  310. dn1_config = Config.configure_data_node(
  311. id="id1",
  312. storage_type="csv",
  313. default_path="foo.p",
  314. scope=Scope.GLOBAL,
  315. validity_period=datetime.timedelta(2),
  316. )
  317. dn2_config = Config.configure_data_node(
  318. id="id2",
  319. storage_type="json",
  320. default_path="bar.json",
  321. scope=Scope.GLOBAL,
  322. validity_period=datetime.timedelta(2),
  323. )
  324. assert Config.data_nodes["id1"] is dn1_config
  325. assert Config.data_nodes["id2"] is dn2_config
  326. dn1_config._clean()
  327. dn2_config._clean()
  328. # Check if the instance before and after _clean() is the same
  329. assert Config.data_nodes["id1"] is dn1_config
  330. assert Config.data_nodes["id2"] is dn2_config
  331. # Check if the value is similar to the default_config, but with difference instances
  332. assert dn1_config.id == "id1"
  333. assert dn2_config.id == "id2"
  334. assert dn1_config.storage_type == dn2_config.storage_type == "pickle"
  335. assert dn1_config.scope == dn2_config.scope == Scope.SCENARIO
  336. assert dn1_config.validity_period is dn2_config.validity_period is None
  337. assert dn1_config.default_path is dn2_config.default_path is None
  338. assert dn1_config.properties == dn2_config.properties == {}
  339. def test_deprecated_cacheable_attribute_remains_compatible():
  340. dn_1_id = "dn_1_id"
  341. dn_1_config = Config.configure_data_node(
  342. id=dn_1_id,
  343. storage_type="pickle",
  344. cacheable=False,
  345. scope=Scope.SCENARIO,
  346. )
  347. assert Config.data_nodes[dn_1_id].id == dn_1_id
  348. assert Config.data_nodes[dn_1_id].storage_type == "pickle"
  349. assert Config.data_nodes[dn_1_id].scope == Scope.SCENARIO
  350. assert Config.data_nodes[dn_1_id].properties == {"cacheable": False}
  351. assert not Config.data_nodes[dn_1_id].cacheable
  352. dn_1_config.cacheable = True
  353. assert Config.data_nodes[dn_1_id].properties == {"cacheable": True}
  354. assert Config.data_nodes[dn_1_id].cacheable
  355. dn_2_id = "dn_2_id"
  356. dn_2_config = Config.configure_data_node(
  357. id=dn_2_id,
  358. storage_type="pickle",
  359. cacheable=True,
  360. scope=Scope.SCENARIO,
  361. )
  362. assert Config.data_nodes[dn_2_id].id == dn_2_id
  363. assert Config.data_nodes[dn_2_id].storage_type == "pickle"
  364. assert Config.data_nodes[dn_2_id].scope == Scope.SCENARIO
  365. assert Config.data_nodes[dn_2_id].properties == {"cacheable": True}
  366. assert Config.data_nodes[dn_2_id].cacheable
  367. dn_2_config.cacheable = False
  368. assert Config.data_nodes[dn_1_id].properties == {"cacheable": False}
  369. assert not Config.data_nodes[dn_1_id].cacheable
  370. dn_3_id = "dn_3_id"
  371. dn_3_config = Config.configure_data_node(
  372. id=dn_3_id,
  373. storage_type="pickle",
  374. scope=Scope.SCENARIO,
  375. )
  376. assert Config.data_nodes[dn_3_id].id == dn_3_id
  377. assert Config.data_nodes[dn_3_id].storage_type == "pickle"
  378. assert Config.data_nodes[dn_3_id].scope == Scope.SCENARIO
  379. assert Config.data_nodes[dn_3_id].properties == {}
  380. assert not Config.data_nodes[dn_3_id].cacheable
  381. dn_3_config.cacheable = True
  382. assert Config.data_nodes[dn_3_id].properties == {"cacheable": True}
  383. assert Config.data_nodes[dn_3_id].cacheable