test_data_manager.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. # Copyright 2021-2025 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import os
  12. import pathlib
  13. import pytest
  14. from taipy import Scope
  15. from taipy.common.config import Config
  16. from taipy.core._version._version_manager import _VersionManager
  17. from taipy.core.config.data_node_config import DataNodeConfig
  18. from taipy.core.data._data_manager import _DataManager
  19. from taipy.core.data.csv import CSVDataNode
  20. from taipy.core.data.data_node_id import DataNodeId
  21. from taipy.core.data.in_memory import InMemoryDataNode
  22. from taipy.core.data.pickle import PickleDataNode
  23. from taipy.core.exceptions.exceptions import InvalidDataNodeType, ModelNotFound
  24. from taipy.core.reason import NotGlobalScope, WrongConfigType
  25. from tests.core.utils.named_temporary_file import NamedTemporaryFile
  26. def file_exists(file_path: str) -> bool:
  27. return os.path.exists(file_path)
  28. class TestDataManager:
  29. def test_create_data_node_and_modify_properties_does_not_modify_config(self):
  30. dn_config = Config.configure_data_node(id="name", foo="bar")
  31. dn = _DataManager._create_and_set(dn_config, None, None)
  32. assert dn_config.properties.get("foo") == "bar"
  33. assert dn_config.properties.get("baz") is None
  34. dn.properties["baz"] = "qux"
  35. _DataManager._set(dn)
  36. assert dn_config.properties.get("foo") == "bar"
  37. assert dn_config.properties.get("baz") is None
  38. assert dn.properties.get("foo") == "bar"
  39. assert dn.properties.get("baz") == "qux"
  40. def test_can_create(self):
  41. dn_config = Config.configure_data_node("dn", 10, scope=Scope.SCENARIO)
  42. global_dn_config = Config.configure_data_node(
  43. id="global_dn", storage_type="in_memory", scope=Scope.GLOBAL, data=10
  44. )
  45. reasons = _DataManager._can_create()
  46. assert bool(reasons) is True
  47. assert reasons._reasons == {}
  48. reasons = _DataManager._can_create(global_dn_config)
  49. assert bool(reasons) is True
  50. assert reasons._reasons == {}
  51. reasons = _DataManager._can_create(dn_config)
  52. assert bool(reasons) is False
  53. assert reasons._reasons[dn_config.id] == {NotGlobalScope(dn_config.id)}
  54. assert (
  55. str(list(reasons._reasons[dn_config.id])[0])
  56. == f'Data node config "{dn_config.id}" does not have GLOBAL scope'
  57. )
  58. reasons = _DataManager._can_create(1)
  59. assert bool(reasons) is False
  60. assert reasons._reasons["1"] == {WrongConfigType("1", DataNodeConfig.__name__)}
  61. assert str(list(reasons._reasons["1"])[0]) == 'Object "1" must be a valid DataNodeConfig'
  62. def test_create_data_node_with_name_provided(self):
  63. dn_config = Config.configure_data_node(id="dn", foo="bar", name="acb")
  64. dn = _DataManager._create_and_set(dn_config, None, None)
  65. assert dn.name == "acb"
  66. def test_create_and_get_csv_data_node(self):
  67. # Test we can instantiate a CsvDataNode from DataNodeConfig with :
  68. # - a csv type
  69. # - a default scenario scope
  70. # - No owner_id
  71. csv_dn_config = Config.configure_data_node(id="foo", storage_type="csv", path="bar", has_header=True)
  72. csv_dn = _DataManager._create_and_set(csv_dn_config, None, None)
  73. assert isinstance(csv_dn, CSVDataNode)
  74. assert isinstance(_DataManager._get(csv_dn.id), CSVDataNode)
  75. assert _DataManager._exists(csv_dn.id)
  76. assert _DataManager._get(csv_dn.id) is not None
  77. assert _DataManager._get(csv_dn.id).id == csv_dn.id
  78. assert _DataManager._get(csv_dn.id).config_id == "foo"
  79. assert _DataManager._get(csv_dn.id).config_id == csv_dn.config_id
  80. assert _DataManager._get(csv_dn.id).scope == Scope.SCENARIO
  81. assert _DataManager._get(csv_dn.id).scope == csv_dn.scope
  82. assert _DataManager._get(csv_dn.id).owner_id is None
  83. assert _DataManager._get(csv_dn.id).owner_id == csv_dn.owner_id
  84. assert _DataManager._get(csv_dn.id).parent_ids == set()
  85. assert _DataManager._get(csv_dn.id).parent_ids == csv_dn.parent_ids
  86. assert _DataManager._get(csv_dn.id).last_edit_date is None
  87. assert _DataManager._get(csv_dn.id).last_edit_date == csv_dn.last_edit_date
  88. assert _DataManager._get(csv_dn.id).job_ids == []
  89. assert _DataManager._get(csv_dn.id).job_ids == csv_dn.job_ids
  90. assert not _DataManager._get(csv_dn.id).is_ready_for_reading
  91. assert _DataManager._get(csv_dn.id).is_ready_for_reading == csv_dn.is_ready_for_reading
  92. assert (
  93. len(_DataManager._get(csv_dn.id).properties) == 5
  94. ) # path, encoding, has_header, exposed_type, is_generated
  95. assert _DataManager._get(csv_dn.id).properties.get("path") == "bar"
  96. assert _DataManager._get(csv_dn.id).properties.get("encoding") == "utf-8"
  97. assert _DataManager._get(csv_dn.id).properties.get("has_header") is True
  98. assert _DataManager._get(csv_dn.id).properties.get("exposed_type") == "pandas"
  99. assert _DataManager._get(csv_dn.id).properties.get("is_generated") is False
  100. assert _DataManager._get(csv_dn.id).properties == csv_dn.properties
  101. assert _DataManager._get(csv_dn.id).edit_in_progress is False
  102. assert _DataManager._get(csv_dn.id)._editor_id is None
  103. assert _DataManager._get(csv_dn.id)._editor_expiration_date is None
  104. assert _DataManager._get(csv_dn) is not None
  105. assert _DataManager._get(csv_dn).id == csv_dn.id
  106. assert _DataManager._get(csv_dn).config_id == "foo"
  107. assert _DataManager._get(csv_dn).config_id == csv_dn.config_id
  108. assert _DataManager._get(csv_dn).scope == Scope.SCENARIO
  109. assert _DataManager._get(csv_dn).scope == csv_dn.scope
  110. assert _DataManager._get(csv_dn).owner_id is None
  111. assert _DataManager._get(csv_dn).owner_id == csv_dn.owner_id
  112. assert _DataManager._get(csv_dn).parent_ids == set()
  113. assert _DataManager._get(csv_dn).parent_ids == csv_dn.parent_ids
  114. assert _DataManager._get(csv_dn).last_edit_date is None
  115. assert _DataManager._get(csv_dn).last_edit_date == csv_dn.last_edit_date
  116. assert _DataManager._get(csv_dn).job_ids == []
  117. assert _DataManager._get(csv_dn).job_ids == csv_dn.job_ids
  118. assert not _DataManager._get(csv_dn).is_ready_for_reading
  119. assert _DataManager._get(csv_dn).is_ready_for_reading == csv_dn.is_ready_for_reading
  120. assert len(_DataManager._get(csv_dn).properties) == 5 # path, encoding, has_header, exposed_type, is_generated
  121. assert _DataManager._get(csv_dn).properties.get("path") == "bar"
  122. assert _DataManager._get(csv_dn).properties.get("encoding") == "utf-8"
  123. assert _DataManager._get(csv_dn).properties.get("has_header") is True
  124. assert _DataManager._get(csv_dn.id).properties.get("exposed_type") == "pandas"
  125. assert _DataManager._get(csv_dn.id).properties.get("is_generated") is False
  126. assert _DataManager._get(csv_dn).properties == csv_dn.properties
  127. assert _DataManager._get(csv_dn.id).edit_in_progress is False
  128. assert _DataManager._get(csv_dn.id)._editor_id is None
  129. assert _DataManager._get(csv_dn.id)._editor_expiration_date is None
  130. def test_edit_and_get_data_node(self):
  131. config = Config.configure_pickle_data_node(id="foo")
  132. dn = _DataManager._create_and_set(config, None, None)
  133. assert _DataManager._get(dn.id).last_edit_date is None
  134. assert len(_DataManager._get(dn.id).properties) == 2 # is_generated and path
  135. assert isinstance(_DataManager._get(dn.id).properties.get("path"), str)
  136. assert _DataManager._get(dn.id).properties.get("is_generated") is True
  137. assert not _DataManager._get(dn.id).edit_in_progress
  138. assert _DataManager._get(dn.id)._editor_id is None
  139. assert _DataManager._get(dn.id)._editor_expiration_date is None
  140. dn.lock_edit("foo")
  141. assert _DataManager._get(dn.id).last_edit_date is None
  142. assert len(_DataManager._get(dn.id).properties) == 2 # is_generated and path
  143. assert isinstance(_DataManager._get(dn.id).properties.get("path"), str)
  144. assert _DataManager._get(dn.id).properties.get("is_generated") is True
  145. assert _DataManager._get(dn.id).edit_in_progress
  146. assert _DataManager._get(dn.id).editor_id == "foo"
  147. assert _DataManager._get(dn.id).editor_expiration_date is not None
  148. dn.unlock_edit("foo")
  149. assert _DataManager._get(dn.id).last_edit_date is None
  150. assert len(_DataManager._get(dn.id).properties) == 2 # is_generated and path
  151. assert isinstance(_DataManager._get(dn.id).properties.get("path"), str)
  152. assert _DataManager._get(dn.id).properties.get("is_generated") is True
  153. assert not _DataManager._get(dn.id).edit_in_progress
  154. assert _DataManager._get(dn.id).editor_id is None
  155. assert _DataManager._get(dn.id).editor_expiration_date is None
  156. def test_create_and_get_in_memory_data_node(self):
  157. # Test we can instantiate an InMemoryDataNode from DataNodeConfig with :
  158. # - an in_memory type
  159. # - a scenario scope
  160. # - an owner id
  161. # - some default data
  162. in_memory_dn_config = Config.configure_data_node(
  163. id="baz", storage_type="in_memory", scope=Scope.SCENARIO, default_data="qux", other_data="foo"
  164. )
  165. in_mem_dn = _DataManager._create_and_set(in_memory_dn_config, "Scenario_id", {"task_id"})
  166. assert isinstance(in_mem_dn, InMemoryDataNode)
  167. assert isinstance(_DataManager._get(in_mem_dn.id), InMemoryDataNode)
  168. assert _DataManager._exists(in_mem_dn.id)
  169. assert _DataManager._get(in_mem_dn.id) is not None
  170. assert _DataManager._get(in_mem_dn.id).id == in_mem_dn.id
  171. assert _DataManager._get(in_mem_dn.id).config_id == "baz"
  172. assert _DataManager._get(in_mem_dn.id).config_id == in_mem_dn.config_id
  173. assert _DataManager._get(in_mem_dn.id).scope == Scope.SCENARIO
  174. assert _DataManager._get(in_mem_dn.id).scope == in_mem_dn.scope
  175. assert _DataManager._get(in_mem_dn.id).owner_id == "Scenario_id"
  176. assert _DataManager._get(in_mem_dn.id).owner_id == in_mem_dn.owner_id
  177. assert _DataManager._get(in_mem_dn.id).parent_ids == {"task_id"}
  178. assert _DataManager._get(in_mem_dn.id).parent_ids == in_mem_dn.parent_ids
  179. assert _DataManager._get(in_mem_dn.id).last_edit_date is not None
  180. assert _DataManager._get(in_mem_dn.id).last_edit_date == in_mem_dn.last_edit_date
  181. assert _DataManager._get(in_mem_dn.id).job_ids == []
  182. assert _DataManager._get(in_mem_dn.id).job_ids == in_mem_dn.job_ids
  183. assert _DataManager._get(in_mem_dn.id).is_ready_for_reading
  184. assert _DataManager._get(in_mem_dn.id).is_ready_for_reading == in_mem_dn.is_ready_for_reading
  185. assert len(_DataManager._get(in_mem_dn.id).properties) == 1
  186. assert _DataManager._get(in_mem_dn.id).properties.get("other_data") == "foo"
  187. assert _DataManager._get(in_mem_dn.id).properties == in_mem_dn.properties
  188. assert _DataManager._get(in_mem_dn) is not None
  189. assert _DataManager._get(in_mem_dn).id == in_mem_dn.id
  190. assert _DataManager._get(in_mem_dn).config_id == "baz"
  191. assert _DataManager._get(in_mem_dn).config_id == in_mem_dn.config_id
  192. assert _DataManager._get(in_mem_dn).scope == Scope.SCENARIO
  193. assert _DataManager._get(in_mem_dn).scope == in_mem_dn.scope
  194. assert _DataManager._get(in_mem_dn).owner_id == "Scenario_id"
  195. assert _DataManager._get(in_mem_dn).owner_id == in_mem_dn.owner_id
  196. assert _DataManager._get(in_mem_dn).parent_ids == {"task_id"}
  197. assert _DataManager._get(in_mem_dn).parent_ids == in_mem_dn.parent_ids
  198. assert _DataManager._get(in_mem_dn).last_edit_date is not None
  199. assert _DataManager._get(in_mem_dn).last_edit_date == in_mem_dn.last_edit_date
  200. assert _DataManager._get(in_mem_dn).job_ids == []
  201. assert _DataManager._get(in_mem_dn).job_ids == in_mem_dn.job_ids
  202. assert _DataManager._get(in_mem_dn).is_ready_for_reading
  203. assert _DataManager._get(in_mem_dn).is_ready_for_reading == in_mem_dn.is_ready_for_reading
  204. assert len(_DataManager._get(in_mem_dn).properties) == 1
  205. assert _DataManager._get(in_mem_dn).properties.get("other_data") == "foo"
  206. assert _DataManager._get(in_mem_dn).properties == in_mem_dn.properties
  207. def test_create_and_get_pickle_data_node(self):
  208. # Test we can instantiate a PickleDataNode from DataNodeConfig with :
  209. # - an in_memory type
  210. # - a business cycle scope
  211. # - No owner id
  212. # - no default data
  213. dn_config = Config.configure_data_node(id="plop", storage_type="pickle", scope=Scope.CYCLE)
  214. pickle_dn = _DataManager._create_and_set(dn_config, None, {"task_id_1", "task_id_2"})
  215. assert isinstance(pickle_dn, PickleDataNode)
  216. assert isinstance(_DataManager._get(pickle_dn.id), PickleDataNode)
  217. assert _DataManager._exists(pickle_dn.id)
  218. assert _DataManager._get(pickle_dn.id) is not None
  219. assert _DataManager._get(pickle_dn.id).id == pickle_dn.id
  220. assert _DataManager._get(pickle_dn.id).config_id == "plop"
  221. assert _DataManager._get(pickle_dn.id).config_id == pickle_dn.config_id
  222. assert _DataManager._get(pickle_dn.id).scope == Scope.CYCLE
  223. assert _DataManager._get(pickle_dn.id).scope == pickle_dn.scope
  224. assert _DataManager._get(pickle_dn.id).owner_id is None
  225. assert _DataManager._get(pickle_dn.id).owner_id == pickle_dn.owner_id
  226. assert _DataManager._get(pickle_dn.id).parent_ids == {"task_id_1", "task_id_2"}
  227. assert _DataManager._get(pickle_dn.id).parent_ids == pickle_dn.parent_ids
  228. assert _DataManager._get(pickle_dn.id).last_edit_date is None
  229. assert _DataManager._get(pickle_dn.id).last_edit_date == pickle_dn.last_edit_date
  230. assert _DataManager._get(pickle_dn.id).job_ids == []
  231. assert _DataManager._get(pickle_dn.id).job_ids == pickle_dn.job_ids
  232. assert not _DataManager._get(pickle_dn.id).is_ready_for_reading
  233. assert _DataManager._get(pickle_dn.id).is_ready_for_reading == pickle_dn.is_ready_for_reading
  234. assert len(_DataManager._get(pickle_dn.id).properties) == 2 # is_generated and path
  235. assert _DataManager._get(pickle_dn.id).properties == pickle_dn.properties
  236. assert _DataManager._get(pickle_dn) is not None
  237. assert _DataManager._get(pickle_dn).id == pickle_dn.id
  238. assert _DataManager._get(pickle_dn).config_id == "plop"
  239. assert _DataManager._get(pickle_dn).config_id == pickle_dn.config_id
  240. assert _DataManager._get(pickle_dn).scope == Scope.CYCLE
  241. assert _DataManager._get(pickle_dn).scope == pickle_dn.scope
  242. assert _DataManager._get(pickle_dn).owner_id is None
  243. assert _DataManager._get(pickle_dn).owner_id == pickle_dn.owner_id
  244. assert _DataManager._get(pickle_dn).parent_ids == {"task_id_1", "task_id_2"}
  245. assert _DataManager._get(pickle_dn).parent_ids == pickle_dn.parent_ids
  246. assert _DataManager._get(pickle_dn).last_edit_date is None
  247. assert _DataManager._get(pickle_dn).last_edit_date == pickle_dn.last_edit_date
  248. assert _DataManager._get(pickle_dn).job_ids == []
  249. assert _DataManager._get(pickle_dn).job_ids == pickle_dn.job_ids
  250. assert not _DataManager._get(pickle_dn).is_ready_for_reading
  251. assert _DataManager._get(pickle_dn).is_ready_for_reading == pickle_dn.is_ready_for_reading
  252. assert len(_DataManager._get(pickle_dn).properties) == 2 # is_generated and path
  253. assert _DataManager._get(pickle_dn).properties == pickle_dn.properties
  254. def test_create_raises_exception_with_wrong_type(self):
  255. wrong_type_dn_config = DataNodeConfig(id="foo", storage_type="bar", scope=DataNodeConfig._DEFAULT_SCOPE)
  256. with pytest.raises(InvalidDataNodeType):
  257. _DataManager._create_and_set(wrong_type_dn_config, None, None)
  258. def test_create_from_same_config_generates_new_data_node_and_new_id(self):
  259. dn_config = Config.configure_data_node(id="foo", storage_type="in_memory")
  260. dn = _DataManager._create_and_set(dn_config, None, None)
  261. dn_2 = _DataManager._create_and_set(dn_config, None, None)
  262. assert dn_2.id != dn.id
  263. def test_create_uses_overridden_attributes_in_config_file(self):
  264. Config.override(os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/config.toml"))
  265. csv_dn_cfg = Config.configure_data_node(id="foo", storage_type="csv", path="bar", has_header=True)
  266. csv_dn = _DataManager._create_and_set(csv_dn_cfg, None, None)
  267. assert csv_dn.config_id == "foo"
  268. assert isinstance(csv_dn, CSVDataNode)
  269. assert csv_dn._path == "path_from_config_file"
  270. assert csv_dn.properties["has_header"]
  271. csv_dn_cfg = Config.configure_data_node(id="baz", storage_type="csv", path="bar", has_header=True)
  272. csv_dn = _DataManager._create_and_set(csv_dn_cfg, None, None)
  273. assert csv_dn.config_id == "baz"
  274. assert isinstance(csv_dn, CSVDataNode)
  275. assert csv_dn._path == "bar"
  276. assert csv_dn.properties["has_header"]
  277. def test_get_if_not_exists(self):
  278. with pytest.raises(ModelNotFound):
  279. _DataManager._repository._load("test_data_node_2")
  280. def test_get_all(self):
  281. assert len(_DataManager._get_all()) == 0
  282. dn_config_1 = Config.configure_data_node(id="foo", storage_type="in_memory")
  283. _DataManager._create_and_set(dn_config_1, None, None)
  284. assert len(_DataManager._get_all()) == 1
  285. dn_config_2 = Config.configure_data_node(id="baz", storage_type="in_memory")
  286. _DataManager._create_and_set(dn_config_2, None, None)
  287. _DataManager._create_and_set(dn_config_2, None, None)
  288. assert len(_DataManager._get_all()) == 3
  289. assert len([dn for dn in _DataManager._get_all() if dn.config_id == "foo"]) == 1
  290. assert len([dn for dn in _DataManager._get_all() if dn.config_id == "baz"]) == 2
  291. def test_get_all_on_multiple_versions_environment(self):
  292. # Create 5 data nodes with 2 versions each
  293. # Only version 1.0 has the data node with config_id = "config_id_1"
  294. # Only version 2.0 has the data node with config_id = "config_id_6"
  295. for version in range(1, 3):
  296. for i in range(5):
  297. _DataManager._set(
  298. InMemoryDataNode(
  299. f"config_id_{i + version}",
  300. Scope.SCENARIO,
  301. id=DataNodeId(f"id{i}_v{version}"),
  302. version=f"{version}.0",
  303. )
  304. )
  305. _VersionManager._set_experiment_version("1.0")
  306. assert len(_DataManager._get_all()) == 5
  307. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_1"}])) == 1
  308. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_6"}])) == 0
  309. _VersionManager._set_development_version("1.0")
  310. assert len(_DataManager._get_all()) == 5
  311. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_1"}])) == 1
  312. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_6"}])) == 0
  313. _VersionManager._set_experiment_version("2.0")
  314. assert len(_DataManager._get_all()) == 5
  315. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_1"}])) == 0
  316. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_6"}])) == 1
  317. _VersionManager._set_development_version("2.0")
  318. assert len(_DataManager._get_all()) == 5
  319. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_1"}])) == 0
  320. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_6"}])) == 1
  321. def test_set(self):
  322. dn = InMemoryDataNode(
  323. "config_id",
  324. Scope.SCENARIO,
  325. id=DataNodeId("id"),
  326. owner_id=None,
  327. parent_ids={"task_id_1"},
  328. last_edit_date=None,
  329. edits=[],
  330. edit_in_progress=False,
  331. properties={"foo": "bar"},
  332. )
  333. assert len(_DataManager._get_all()) == 0
  334. assert not _DataManager._exists(dn.id)
  335. _DataManager._set(dn)
  336. assert len(_DataManager._get_all()) == 1
  337. assert _DataManager._exists(dn.id)
  338. # changing data node attribute
  339. dn._config_id = "foo"
  340. assert dn.config_id == "foo"
  341. _DataManager._set(dn)
  342. assert len(_DataManager._get_all()) == 1
  343. assert dn.config_id == "foo"
  344. assert _DataManager._get(dn.id).config_id == "foo"
  345. def test_delete(self):
  346. dn_1 = InMemoryDataNode("config_id", Scope.SCENARIO, id="id_1")
  347. dn_2 = InMemoryDataNode("config_id", Scope.SCENARIO, id="id_2")
  348. dn_3 = InMemoryDataNode("config_id", Scope.SCENARIO, id="id_3")
  349. assert len(_DataManager._get_all()) == 0
  350. _DataManager._set(dn_1)
  351. _DataManager._set(dn_2)
  352. _DataManager._set(dn_3)
  353. assert len(_DataManager._get_all()) == 3
  354. assert all(_DataManager._exists(dn.id) for dn in [dn_1, dn_2, dn_3])
  355. _DataManager._delete(dn_1.id)
  356. assert len(_DataManager._get_all()) == 2
  357. assert _DataManager._get(dn_2.id).id == dn_2.id
  358. assert _DataManager._get(dn_3.id).id == dn_3.id
  359. assert _DataManager._get(dn_1.id) is None
  360. assert all(_DataManager._exists(dn.id) for dn in [dn_2, dn_3])
  361. assert not _DataManager._exists(dn_1.id)
  362. _DataManager._delete_all()
  363. assert len(_DataManager._get_all()) == 0
  364. assert not any(_DataManager._exists(dn.id) for dn in [dn_2, dn_3])
  365. def test_get_or_create(self):
  366. def _get_or_create_dn(config, *args):
  367. return _DataManager._bulk_get_or_create([config], *args)[config]
  368. _DataManager._delete_all()
  369. global_dn_config = Config.configure_data_node(
  370. id="test_data_node", storage_type="in_memory", scope=Scope.GLOBAL, data="In memory Data Node"
  371. )
  372. cycle_dn_config = Config.configure_data_node(
  373. id="test_data_node1", storage_type="in_memory", scope=Scope.CYCLE, data="In memory Data Node"
  374. )
  375. scenario_dn_config = Config.configure_data_node(
  376. id="test_data_node2", storage_type="in_memory", scope=Scope.SCENARIO, data="In memory scenario"
  377. )
  378. assert len(_DataManager._get_all()) == 0
  379. global_dn = _get_or_create_dn(global_dn_config, None, None)
  380. assert len(_DataManager._get_all()) == 1
  381. global_dn_bis = _get_or_create_dn(global_dn_config, None)
  382. assert len(_DataManager._get_all()) == 1
  383. assert global_dn.id == global_dn_bis.id
  384. scenario_dn = _get_or_create_dn(scenario_dn_config, None, "scenario_id")
  385. assert len(_DataManager._get_all()) == 2
  386. scenario_dn_bis = _get_or_create_dn(scenario_dn_config, None, "scenario_id")
  387. assert len(_DataManager._get_all()) == 2
  388. assert scenario_dn.id == scenario_dn_bis.id
  389. scenario_dn_ter = _get_or_create_dn(scenario_dn_config, None, "scenario_id")
  390. assert len(_DataManager._get_all()) == 2
  391. assert scenario_dn.id == scenario_dn_bis.id
  392. assert scenario_dn_bis.id == scenario_dn_ter.id
  393. scenario_dn_quater = _get_or_create_dn(scenario_dn_config, None, "scenario_id_2")
  394. assert len(_DataManager._get_all()) == 3
  395. assert scenario_dn.id == scenario_dn_bis.id
  396. assert scenario_dn_bis.id == scenario_dn_ter.id
  397. assert scenario_dn_ter.id != scenario_dn_quater.id
  398. assert len(_DataManager._get_all()) == 3
  399. cycle_dn = _get_or_create_dn(cycle_dn_config, "cycle_id", None)
  400. assert len(_DataManager._get_all()) == 4
  401. cycle_dn_1 = _get_or_create_dn(cycle_dn_config, "cycle_id", None)
  402. assert len(_DataManager._get_all()) == 4
  403. assert cycle_dn.id == cycle_dn_1.id
  404. cycle_dn_2 = _get_or_create_dn(cycle_dn_config, "cycle_id", "scenario_id")
  405. assert len(_DataManager._get_all()) == 4
  406. assert cycle_dn.id == cycle_dn_2.id
  407. cycle_dn_3 = _get_or_create_dn(cycle_dn_config, "cycle_id", None)
  408. assert len(_DataManager._get_all()) == 4
  409. assert cycle_dn.id == cycle_dn_3.id
  410. cycle_dn_4 = _get_or_create_dn(cycle_dn_config, "cycle_id", "scenario_id")
  411. assert len(_DataManager._get_all()) == 4
  412. assert cycle_dn.id == cycle_dn_4.id
  413. cycle_dn_5 = _get_or_create_dn(cycle_dn_config, "cycle_id", "scenario_id_2")
  414. assert len(_DataManager._get_all()) == 4
  415. assert cycle_dn.id == cycle_dn_5.id
  416. assert cycle_dn_1.id == cycle_dn_2.id
  417. assert cycle_dn_2.id == cycle_dn_3.id
  418. assert cycle_dn_3.id == cycle_dn_4.id
  419. assert cycle_dn_4.id == cycle_dn_5.id
  420. def test_ensure_persistence_of_data_node(self):
  421. dm = _DataManager()
  422. dm._delete_all()
  423. dn_config_1 = Config.configure_data_node(
  424. id="data_node_1", storage_type="in_memory", data="In memory sequence 2"
  425. )
  426. dn_config_2 = Config.configure_data_node(
  427. id="data_node_2", storage_type="in_memory", data="In memory sequence 2"
  428. )
  429. dm._bulk_get_or_create([dn_config_1, dn_config_2])
  430. assert len(dm._get_all()) == 2
  431. # Delete the DataManager to ensure it's get from the storage system
  432. del dm
  433. dm = _DataManager()
  434. dm._bulk_get_or_create([dn_config_1])
  435. assert len(dm._get_all()) == 2
  436. dm._delete_all()
  437. @pytest.mark.parametrize(
  438. "storage_type,path",
  439. [
  440. ("pickle", "pickle_file_path"),
  441. ("csv", "csv_file"),
  442. ("excel", "excel_file"),
  443. ("json", "json_file"),
  444. ("parquet", "parquet_file_path"),
  445. ],
  446. )
  447. def test_clean_generated_files(self, storage_type, path, request):
  448. path = request.getfixturevalue(path)
  449. user_dn_config = Config.configure_data_node(
  450. id="d1", storage_type=storage_type, path=path, default_data={"a": [1], "b": [2]}
  451. )
  452. generated_dn_1_config = Config.configure_data_node(
  453. id="d2", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  454. )
  455. generated_dn_2_config = Config.configure_data_node(
  456. id="d3", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  457. )
  458. dns = _DataManager._bulk_get_or_create([user_dn_config, generated_dn_1_config, generated_dn_2_config])
  459. user_dn = dns[user_dn_config]
  460. generated_dn_1 = dns[generated_dn_1_config]
  461. generated_dn_2 = dns[generated_dn_2_config]
  462. _DataManager._clean_generated_file(user_dn.id)
  463. assert file_exists(user_dn.path)
  464. _DataManager._clean_generated_files([generated_dn_1, generated_dn_2])
  465. assert not file_exists(generated_dn_1.path)
  466. assert not file_exists(generated_dn_2.path)
  467. @pytest.mark.parametrize(
  468. "storage_type,path",
  469. [
  470. ("pickle", "pickle_file_path"),
  471. ("csv", "csv_file"),
  472. ("excel", "excel_file"),
  473. ("json", "json_file"),
  474. ("parquet", "parquet_file_path"),
  475. ],
  476. )
  477. def test_delete_does_clean_generated_pickle_files(self, storage_type, path, request):
  478. path = request.getfixturevalue(path)
  479. user_dn_config = Config.configure_data_node(
  480. id="d1", storage_type=storage_type, path=path, default_data={"a": [1], "b": [2]}
  481. )
  482. generated_dn_config_1 = Config.configure_data_node(
  483. id="d2", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  484. )
  485. generated_dn_config_2 = Config.configure_data_node(
  486. id="d3", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  487. )
  488. generated_dn_config_3 = Config.configure_data_node(
  489. id="d4", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  490. )
  491. dns = _DataManager._bulk_get_or_create(
  492. [
  493. user_dn_config,
  494. generated_dn_config_1,
  495. generated_dn_config_2,
  496. generated_dn_config_3,
  497. ]
  498. )
  499. user_dn = dns[user_dn_config]
  500. generated_dn_1 = dns[generated_dn_config_1]
  501. generated_dn_2 = dns[generated_dn_config_2]
  502. generated_dn_3 = dns[generated_dn_config_3]
  503. _DataManager._delete(user_dn.id)
  504. assert file_exists(user_dn.path)
  505. _DataManager._delete_many([generated_dn_1.id, generated_dn_2.id])
  506. assert not file_exists(generated_dn_1.path)
  507. assert not file_exists(generated_dn_2.path)
  508. _DataManager._delete_all()
  509. assert not file_exists(generated_dn_3.path)
  510. def test_create_dn_from_loaded_config_no_scope(self):
  511. file_config = NamedTemporaryFile(
  512. """
  513. [TAIPY]
  514. [DATA_NODE.a]
  515. default_data = "4:int"
  516. [DATA_NODE.b]
  517. [TASK.t]
  518. function = "math.sqrt:function"
  519. inputs = [ "a:SECTION",]
  520. outputs = [ "b:SECTION",]
  521. skippable = "False:bool"
  522. [SCENARIO.s]
  523. tasks = [ "t:SECTION",]
  524. sequences.s_sequence = [ "t:SECTION",]
  525. [SCENARIO.s.comparators]
  526. """
  527. )
  528. from taipy import core as tp
  529. Config.override(file_config.filename)
  530. tp.create_scenario(Config.scenarios["s"])
  531. tp.create_scenario(Config.scenarios["s"])
  532. assert len(tp.get_data_nodes()) == 4
  533. def test_create_dn_from_loaded_config_no_storage_type(self):
  534. file_config = NamedTemporaryFile(
  535. """
  536. [TAIPY]
  537. [DATA_NODE.input_dn]
  538. scope = "SCENARIO:SCOPE"
  539. default_data = "21:int"
  540. [DATA_NODE.output_dn]
  541. storage_type = "in_memory"
  542. scope = "SCENARIO:SCOPE"
  543. [TASK.double]
  544. inputs = [ "input_dn:SECTION",]
  545. function = "math.sqrt:function"
  546. outputs = [ "output_dn:SECTION",]
  547. skippable = "False:bool"
  548. [SCENARIO.my_scenario]
  549. tasks = [ "double:SECTION",]
  550. sequences.my_sequence = [ "double:SECTION",]
  551. [SCENARIO.my_scenario.comparators]
  552. """
  553. )
  554. from taipy import core as tp
  555. Config.override(file_config.filename)
  556. scenario = tp.create_scenario(Config.scenarios["my_scenario"])
  557. assert isinstance(scenario.input_dn, PickleDataNode)
  558. assert isinstance(scenario.output_dn, InMemoryDataNode)
  559. def test_create_dn_from_loaded_config_modified_default_config(self):
  560. file_config = NamedTemporaryFile(
  561. """
  562. [TAIPY]
  563. [DATA_NODE.input_dn]
  564. scope = "SCENARIO:SCOPE"
  565. default_path="fake/path.csv"
  566. [DATA_NODE.output_dn]
  567. storage_type = "in_memory"
  568. scope = "SCENARIO:SCOPE"
  569. [TASK.double]
  570. inputs = [ "input_dn:SECTION",]
  571. function = "math.sqrt:function"
  572. outputs = [ "output_dn:SECTION",]
  573. skippable = "False:bool"
  574. [SCENARIO.my_scenario]
  575. tasks = [ "double:SECTION",]
  576. sequences.my_sequence = [ "double:SECTION",]
  577. [SCENARIO.my_scenario.comparators]
  578. """
  579. )
  580. from taipy import core as tp
  581. Config.set_default_data_node_configuration(storage_type="csv")
  582. Config.override(file_config.filename)
  583. scenario = tp.create_scenario(Config.scenarios["my_scenario"])
  584. assert isinstance(scenario.input_dn, CSVDataNode)
  585. assert isinstance(scenario.output_dn, InMemoryDataNode)
  586. def test_get_tasks_by_config_id(self):
  587. dn_config_1 = Config.configure_data_node("dn_1", scope=Scope.SCENARIO)
  588. dn_config_2 = Config.configure_data_node("dn_2", scope=Scope.SCENARIO)
  589. dn_config_3 = Config.configure_data_node("dn_3", scope=Scope.SCENARIO)
  590. dn_1_1 = _DataManager._create_and_set(dn_config_1, None, None)
  591. dn_1_2 = _DataManager._create_and_set(dn_config_1, None, None)
  592. dn_1_3 = _DataManager._create_and_set(dn_config_1, None, None)
  593. assert len(_DataManager._get_all()) == 3
  594. dn_2_1 = _DataManager._create_and_set(dn_config_2, None, None)
  595. dn_2_2 = _DataManager._create_and_set(dn_config_2, None, None)
  596. assert len(_DataManager._get_all()) == 5
  597. dn_3_1 = _DataManager._create_and_set(dn_config_3, None, None)
  598. assert len(_DataManager._get_all()) == 6
  599. dn_1_datanodes = _DataManager._get_by_config_id(dn_config_1.id)
  600. assert len(dn_1_datanodes) == 3
  601. assert sorted([dn_1_1.id, dn_1_2.id, dn_1_3.id]) == sorted([sequence.id for sequence in dn_1_datanodes])
  602. dn_2_datanodes = _DataManager._get_by_config_id(dn_config_2.id)
  603. assert len(dn_2_datanodes) == 2
  604. assert sorted([dn_2_1.id, dn_2_2.id]) == sorted([sequence.id for sequence in dn_2_datanodes])
  605. dn_3_datanodes = _DataManager._get_by_config_id(dn_config_3.id)
  606. assert len(dn_3_datanodes) == 1
  607. assert sorted([dn_3_1.id]) == sorted([sequence.id for sequence in dn_3_datanodes])
  608. def test_get_data_nodes_by_config_id_in_multiple_versions_environment(self):
  609. dn_config_1 = Config.configure_data_node("dn_1", scope=Scope.SCENARIO)
  610. dn_config_2 = Config.configure_data_node("dn_2", scope=Scope.SCENARIO)
  611. _VersionManager._set_experiment_version("1.0")
  612. _DataManager._create_and_set(dn_config_1, None, None)
  613. _DataManager._create_and_set(dn_config_1, None, None)
  614. _DataManager._create_and_set(dn_config_1, None, None)
  615. _DataManager._create_and_set(dn_config_2, None, None)
  616. _DataManager._create_and_set(dn_config_2, None, None)
  617. assert len(_DataManager._get_by_config_id(dn_config_1.id)) == 3
  618. assert len(_DataManager._get_by_config_id(dn_config_2.id)) == 2
  619. _VersionManager._set_experiment_version("2.0")
  620. _DataManager._create_and_set(dn_config_1, None, None)
  621. _DataManager._create_and_set(dn_config_1, None, None)
  622. _DataManager._create_and_set(dn_config_1, None, None)
  623. _DataManager._create_and_set(dn_config_2, None, None)
  624. _DataManager._create_and_set(dn_config_2, None, None)
  625. assert len(_DataManager._get_by_config_id(dn_config_1.id)) == 3
  626. assert len(_DataManager._get_by_config_id(dn_config_2.id)) == 2