test_data_manager.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667
  1. # Copyright 2023 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import os
  12. import pathlib
  13. import pytest
  14. from src.taipy.core._version._version_manager import _VersionManager
  15. from src.taipy.core.config.data_node_config import DataNodeConfig
  16. from src.taipy.core.data._data_manager import _DataManager
  17. from src.taipy.core.data.csv import CSVDataNode
  18. from src.taipy.core.data.data_node_id import DataNodeId
  19. from src.taipy.core.data.in_memory import InMemoryDataNode
  20. from src.taipy.core.data.pickle import PickleDataNode
  21. from src.taipy.core.exceptions.exceptions import InvalidDataNodeType, ModelNotFound
  22. from taipy.config.common.scope import Scope
  23. from taipy.config.config import Config
  24. from tests.core.utils.named_temporary_file import NamedTemporaryFile
  25. def file_exists(file_path: str) -> bool:
  26. return os.path.exists(file_path)
  27. class TestDataManager:
  28. def test_create_data_node_and_modify_properties_does_not_modify_config(self):
  29. dn_config = Config.configure_data_node(id="name", foo="bar")
  30. dn = _DataManager._create_and_set(dn_config, None, None)
  31. assert dn_config.properties.get("foo") == "bar"
  32. assert dn_config.properties.get("baz") is None
  33. dn.properties["baz"] = "qux"
  34. _DataManager._set(dn)
  35. assert dn_config.properties.get("foo") == "bar"
  36. assert dn_config.properties.get("baz") is None
  37. assert dn.properties.get("foo") == "bar"
  38. assert dn.properties.get("baz") == "qux"
  39. def test_create_data_node_with_name_provided(self):
  40. dn_config = Config.configure_data_node(id="dn", foo="bar", name="acb")
  41. dn = _DataManager._create_and_set(dn_config, None, None)
  42. assert dn.name == "acb"
  43. def test_create_and_get_csv_data_node(self):
  44. # Test we can instantiate a CsvDataNode from DataNodeConfig with :
  45. # - a csv type
  46. # - a default scenario scope
  47. # - No owner_id
  48. csv_dn_config = Config.configure_data_node(id="foo", storage_type="csv", path="bar", has_header=True)
  49. csv_dn = _DataManager._create_and_set(csv_dn_config, None, None)
  50. assert isinstance(csv_dn, CSVDataNode)
  51. assert isinstance(_DataManager._get(csv_dn.id), CSVDataNode)
  52. assert _DataManager._exists(csv_dn.id)
  53. assert _DataManager._get(csv_dn.id) is not None
  54. assert _DataManager._get(csv_dn.id).id == csv_dn.id
  55. assert _DataManager._get(csv_dn.id).config_id == "foo"
  56. assert _DataManager._get(csv_dn.id).config_id == csv_dn.config_id
  57. assert _DataManager._get(csv_dn.id).scope == Scope.SCENARIO
  58. assert _DataManager._get(csv_dn.id).scope == csv_dn.scope
  59. assert _DataManager._get(csv_dn.id).owner_id is None
  60. assert _DataManager._get(csv_dn.id).owner_id == csv_dn.owner_id
  61. assert _DataManager._get(csv_dn.id).parent_ids == set()
  62. assert _DataManager._get(csv_dn.id).parent_ids == csv_dn.parent_ids
  63. assert _DataManager._get(csv_dn.id).last_edit_date is None
  64. assert _DataManager._get(csv_dn.id).last_edit_date == csv_dn.last_edit_date
  65. assert _DataManager._get(csv_dn.id).job_ids == []
  66. assert _DataManager._get(csv_dn.id).job_ids == csv_dn.job_ids
  67. assert not _DataManager._get(csv_dn.id).is_ready_for_reading
  68. assert _DataManager._get(csv_dn.id).is_ready_for_reading == csv_dn.is_ready_for_reading
  69. assert len(_DataManager._get(csv_dn.id).properties) == 4
  70. assert _DataManager._get(csv_dn.id).properties.get("path") == "bar"
  71. assert _DataManager._get(csv_dn.id).properties.get("encoding") == "utf-8"
  72. assert _DataManager._get(csv_dn.id).properties.get("has_header") is True
  73. assert _DataManager._get(csv_dn.id).properties.get("exposed_type") == "pandas"
  74. assert _DataManager._get(csv_dn.id).properties == csv_dn.properties
  75. assert _DataManager._get(csv_dn.id).edit_in_progress is False
  76. assert _DataManager._get(csv_dn.id)._editor_id is None
  77. assert _DataManager._get(csv_dn.id)._editor_expiration_date is None
  78. assert _DataManager._get(csv_dn) is not None
  79. assert _DataManager._get(csv_dn).id == csv_dn.id
  80. assert _DataManager._get(csv_dn).config_id == "foo"
  81. assert _DataManager._get(csv_dn).config_id == csv_dn.config_id
  82. assert _DataManager._get(csv_dn).scope == Scope.SCENARIO
  83. assert _DataManager._get(csv_dn).scope == csv_dn.scope
  84. assert _DataManager._get(csv_dn).owner_id is None
  85. assert _DataManager._get(csv_dn).owner_id == csv_dn.owner_id
  86. assert _DataManager._get(csv_dn).parent_ids == set()
  87. assert _DataManager._get(csv_dn).parent_ids == csv_dn.parent_ids
  88. assert _DataManager._get(csv_dn).last_edit_date is None
  89. assert _DataManager._get(csv_dn).last_edit_date == csv_dn.last_edit_date
  90. assert _DataManager._get(csv_dn).job_ids == []
  91. assert _DataManager._get(csv_dn).job_ids == csv_dn.job_ids
  92. assert not _DataManager._get(csv_dn).is_ready_for_reading
  93. assert _DataManager._get(csv_dn).is_ready_for_reading == csv_dn.is_ready_for_reading
  94. assert len(_DataManager._get(csv_dn).properties) == 4
  95. assert _DataManager._get(csv_dn).properties.get("path") == "bar"
  96. assert _DataManager._get(csv_dn).properties.get("encoding") == "utf-8"
  97. assert _DataManager._get(csv_dn).properties.get("has_header") is True
  98. assert _DataManager._get(csv_dn.id).properties.get("exposed_type") == "pandas"
  99. assert _DataManager._get(csv_dn).properties == csv_dn.properties
  100. assert _DataManager._get(csv_dn.id).edit_in_progress is False
  101. assert _DataManager._get(csv_dn.id)._editor_id is None
  102. assert _DataManager._get(csv_dn.id)._editor_expiration_date is None
  103. def test_edit_and_get_data_node(self):
  104. config = Config.configure_pickle_data_node(id="foo")
  105. dn = _DataManager._create_and_set(config, None, None)
  106. assert _DataManager._get(dn.id).last_edit_date is None
  107. assert len(_DataManager._get(dn.id).properties) == 1
  108. assert _DataManager._get(dn.id).properties.get("is_generated")
  109. assert not _DataManager._get(dn.id).edit_in_progress
  110. assert _DataManager._get(dn.id)._editor_id is None
  111. assert _DataManager._get(dn.id)._editor_expiration_date is None
  112. dn.lock_edit("foo")
  113. assert _DataManager._get(dn.id).last_edit_date is None
  114. assert len(_DataManager._get(dn.id).properties) == 1
  115. assert _DataManager._get(dn.id).properties.get("is_generated")
  116. assert _DataManager._get(dn.id).edit_in_progress
  117. assert _DataManager._get(dn.id).editor_id == "foo"
  118. assert _DataManager._get(dn.id).editor_expiration_date is not None
  119. dn.unlock_edit("foo")
  120. assert _DataManager._get(dn.id).last_edit_date is None
  121. assert len(_DataManager._get(dn.id).properties) == 1
  122. assert _DataManager._get(dn.id).properties.get("is_generated")
  123. assert not _DataManager._get(dn.id).edit_in_progress
  124. assert _DataManager._get(dn.id).editor_id is None
  125. assert _DataManager._get(dn.id).editor_expiration_date is None
  126. def test_create_and_get_in_memory_data_node(self):
  127. # Test we can instantiate an InMemoryDataNode from DataNodeConfig with :
  128. # - an in_memory type
  129. # - a scenario scope
  130. # - an owner id
  131. # - some default data
  132. in_memory_dn_config = Config.configure_data_node(
  133. id="baz", storage_type="in_memory", scope=Scope.SCENARIO, default_data="qux", other_data="foo"
  134. )
  135. in_mem_dn = _DataManager._create_and_set(in_memory_dn_config, "Scenario_id", {"task_id"})
  136. assert isinstance(in_mem_dn, InMemoryDataNode)
  137. assert isinstance(_DataManager._get(in_mem_dn.id), InMemoryDataNode)
  138. assert _DataManager._exists(in_mem_dn.id)
  139. assert _DataManager._get(in_mem_dn.id) is not None
  140. assert _DataManager._get(in_mem_dn.id).id == in_mem_dn.id
  141. assert _DataManager._get(in_mem_dn.id).config_id == "baz"
  142. assert _DataManager._get(in_mem_dn.id).config_id == in_mem_dn.config_id
  143. assert _DataManager._get(in_mem_dn.id).scope == Scope.SCENARIO
  144. assert _DataManager._get(in_mem_dn.id).scope == in_mem_dn.scope
  145. assert _DataManager._get(in_mem_dn.id).owner_id == "Scenario_id"
  146. assert _DataManager._get(in_mem_dn.id).owner_id == in_mem_dn.owner_id
  147. assert _DataManager._get(in_mem_dn.id).parent_ids == {"task_id"}
  148. assert _DataManager._get(in_mem_dn.id).parent_ids == in_mem_dn.parent_ids
  149. assert _DataManager._get(in_mem_dn.id).last_edit_date is not None
  150. assert _DataManager._get(in_mem_dn.id).last_edit_date == in_mem_dn.last_edit_date
  151. assert _DataManager._get(in_mem_dn.id).job_ids == []
  152. assert _DataManager._get(in_mem_dn.id).job_ids == in_mem_dn.job_ids
  153. assert _DataManager._get(in_mem_dn.id).is_ready_for_reading
  154. assert _DataManager._get(in_mem_dn.id).is_ready_for_reading == in_mem_dn.is_ready_for_reading
  155. assert len(_DataManager._get(in_mem_dn.id).properties) == 1
  156. assert _DataManager._get(in_mem_dn.id).properties.get("other_data") == "foo"
  157. assert _DataManager._get(in_mem_dn.id).properties == in_mem_dn.properties
  158. assert _DataManager._get(in_mem_dn) is not None
  159. assert _DataManager._get(in_mem_dn).id == in_mem_dn.id
  160. assert _DataManager._get(in_mem_dn).config_id == "baz"
  161. assert _DataManager._get(in_mem_dn).config_id == in_mem_dn.config_id
  162. assert _DataManager._get(in_mem_dn).scope == Scope.SCENARIO
  163. assert _DataManager._get(in_mem_dn).scope == in_mem_dn.scope
  164. assert _DataManager._get(in_mem_dn).owner_id == "Scenario_id"
  165. assert _DataManager._get(in_mem_dn).owner_id == in_mem_dn.owner_id
  166. assert _DataManager._get(in_mem_dn).parent_ids == {"task_id"}
  167. assert _DataManager._get(in_mem_dn).parent_ids == in_mem_dn.parent_ids
  168. assert _DataManager._get(in_mem_dn).last_edit_date is not None
  169. assert _DataManager._get(in_mem_dn).last_edit_date == in_mem_dn.last_edit_date
  170. assert _DataManager._get(in_mem_dn).job_ids == []
  171. assert _DataManager._get(in_mem_dn).job_ids == in_mem_dn.job_ids
  172. assert _DataManager._get(in_mem_dn).is_ready_for_reading
  173. assert _DataManager._get(in_mem_dn).is_ready_for_reading == in_mem_dn.is_ready_for_reading
  174. assert len(_DataManager._get(in_mem_dn).properties) == 1
  175. assert _DataManager._get(in_mem_dn).properties.get("other_data") == "foo"
  176. assert _DataManager._get(in_mem_dn).properties == in_mem_dn.properties
  177. def test_create_and_get_pickle_data_node(self):
  178. # Test we can instantiate a PickleDataNode from DataNodeConfig with :
  179. # - an in_memory type
  180. # - a business cycle scope
  181. # - No owner id
  182. # - no default data
  183. dn_config = Config.configure_data_node(id="plop", storage_type="pickle", scope=Scope.CYCLE)
  184. pickle_dn = _DataManager._create_and_set(dn_config, None, {"task_id_1", "task_id_2"})
  185. assert isinstance(pickle_dn, PickleDataNode)
  186. assert isinstance(_DataManager._get(pickle_dn.id), PickleDataNode)
  187. assert _DataManager._exists(pickle_dn.id)
  188. assert _DataManager._get(pickle_dn.id) is not None
  189. assert _DataManager._get(pickle_dn.id).id == pickle_dn.id
  190. assert _DataManager._get(pickle_dn.id).config_id == "plop"
  191. assert _DataManager._get(pickle_dn.id).config_id == pickle_dn.config_id
  192. assert _DataManager._get(pickle_dn.id).scope == Scope.CYCLE
  193. assert _DataManager._get(pickle_dn.id).scope == pickle_dn.scope
  194. assert _DataManager._get(pickle_dn.id).owner_id is None
  195. assert _DataManager._get(pickle_dn.id).owner_id == pickle_dn.owner_id
  196. assert _DataManager._get(pickle_dn.id).parent_ids == {"task_id_1", "task_id_2"}
  197. assert _DataManager._get(pickle_dn.id).parent_ids == pickle_dn.parent_ids
  198. assert _DataManager._get(pickle_dn.id).last_edit_date is None
  199. assert _DataManager._get(pickle_dn.id).last_edit_date == pickle_dn.last_edit_date
  200. assert _DataManager._get(pickle_dn.id).job_ids == []
  201. assert _DataManager._get(pickle_dn.id).job_ids == pickle_dn.job_ids
  202. assert not _DataManager._get(pickle_dn.id).is_ready_for_reading
  203. assert _DataManager._get(pickle_dn.id).is_ready_for_reading == pickle_dn.is_ready_for_reading
  204. assert len(_DataManager._get(pickle_dn.id).properties) == 1
  205. assert _DataManager._get(pickle_dn.id).properties == pickle_dn.properties
  206. assert _DataManager._get(pickle_dn) is not None
  207. assert _DataManager._get(pickle_dn).id == pickle_dn.id
  208. assert _DataManager._get(pickle_dn).config_id == "plop"
  209. assert _DataManager._get(pickle_dn).config_id == pickle_dn.config_id
  210. assert _DataManager._get(pickle_dn).scope == Scope.CYCLE
  211. assert _DataManager._get(pickle_dn).scope == pickle_dn.scope
  212. assert _DataManager._get(pickle_dn).owner_id is None
  213. assert _DataManager._get(pickle_dn).owner_id == pickle_dn.owner_id
  214. assert _DataManager._get(pickle_dn).parent_ids == {"task_id_1", "task_id_2"}
  215. assert _DataManager._get(pickle_dn).parent_ids == pickle_dn.parent_ids
  216. assert _DataManager._get(pickle_dn).last_edit_date is None
  217. assert _DataManager._get(pickle_dn).last_edit_date == pickle_dn.last_edit_date
  218. assert _DataManager._get(pickle_dn).job_ids == []
  219. assert _DataManager._get(pickle_dn).job_ids == pickle_dn.job_ids
  220. assert not _DataManager._get(pickle_dn).is_ready_for_reading
  221. assert _DataManager._get(pickle_dn).is_ready_for_reading == pickle_dn.is_ready_for_reading
  222. assert len(_DataManager._get(pickle_dn).properties) == 1
  223. assert _DataManager._get(pickle_dn).properties == pickle_dn.properties
  224. def test_create_raises_exception_with_wrong_type(self):
  225. wrong_type_dn_config = DataNodeConfig(id="foo", storage_type="bar", scope=DataNodeConfig._DEFAULT_SCOPE)
  226. with pytest.raises(InvalidDataNodeType):
  227. _DataManager._create_and_set(wrong_type_dn_config, None, None)
  228. def test_create_from_same_config_generates_new_data_node_and_new_id(self):
  229. dn_config = Config.configure_data_node(id="foo", storage_type="in_memory")
  230. dn = _DataManager._create_and_set(dn_config, None, None)
  231. dn_2 = _DataManager._create_and_set(dn_config, None, None)
  232. assert dn_2.id != dn.id
  233. def test_create_uses_overridden_attributes_in_config_file(self):
  234. Config.override(os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/config.toml"))
  235. csv_dn_cfg = Config.configure_data_node(id="foo", storage_type="csv", path="bar", has_header=True)
  236. csv_dn = _DataManager._create_and_set(csv_dn_cfg, None, None)
  237. assert csv_dn.config_id == "foo"
  238. assert isinstance(csv_dn, CSVDataNode)
  239. assert csv_dn._path == "path_from_config_file"
  240. assert csv_dn.has_header
  241. csv_dn_cfg = Config.configure_data_node(id="baz", storage_type="csv", path="bar", has_header=True)
  242. csv_dn = _DataManager._create_and_set(csv_dn_cfg, None, None)
  243. assert csv_dn.config_id == "baz"
  244. assert isinstance(csv_dn, CSVDataNode)
  245. assert csv_dn._path == "bar"
  246. assert csv_dn.has_header
  247. def test_get_if_not_exists(self):
  248. with pytest.raises(ModelNotFound):
  249. _DataManager._repository._load("test_data_node_2")
  250. def test_get_all(self):
  251. assert len(_DataManager._get_all()) == 0
  252. dn_config_1 = Config.configure_data_node(id="foo", storage_type="in_memory")
  253. _DataManager._create_and_set(dn_config_1, None, None)
  254. assert len(_DataManager._get_all()) == 1
  255. dn_config_2 = Config.configure_data_node(id="baz", storage_type="in_memory")
  256. _DataManager._create_and_set(dn_config_2, None, None)
  257. _DataManager._create_and_set(dn_config_2, None, None)
  258. assert len(_DataManager._get_all()) == 3
  259. assert len([dn for dn in _DataManager._get_all() if dn.config_id == "foo"]) == 1
  260. assert len([dn for dn in _DataManager._get_all() if dn.config_id == "baz"]) == 2
  261. def test_get_all_on_multiple_versions_environment(self):
  262. # Create 5 data nodes with 2 versions each
  263. # Only version 1.0 has the data node with config_id = "config_id_1"
  264. # Only version 2.0 has the data node with config_id = "config_id_6"
  265. for version in range(1, 3):
  266. for i in range(5):
  267. _DataManager._set(
  268. InMemoryDataNode(
  269. f"config_id_{i + version}",
  270. Scope.SCENARIO,
  271. id=DataNodeId(f"id{i}_v{version}"),
  272. version=f"{version}.0",
  273. )
  274. )
  275. _VersionManager._set_experiment_version("1.0")
  276. assert len(_DataManager._get_all()) == 5
  277. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_1"}])) == 1
  278. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_6"}])) == 0
  279. _VersionManager._set_development_version("1.0")
  280. assert len(_DataManager._get_all()) == 5
  281. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_1"}])) == 1
  282. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_6"}])) == 0
  283. _VersionManager._set_experiment_version("2.0")
  284. assert len(_DataManager._get_all()) == 5
  285. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_1"}])) == 0
  286. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_6"}])) == 1
  287. _VersionManager._set_development_version("2.0")
  288. assert len(_DataManager._get_all()) == 5
  289. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_1"}])) == 0
  290. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_6"}])) == 1
  291. def test_set(self):
  292. dn = InMemoryDataNode(
  293. "config_id",
  294. Scope.SCENARIO,
  295. id=DataNodeId("id"),
  296. owner_id=None,
  297. parent_ids={"task_id_1"},
  298. last_edit_date=None,
  299. edits=[],
  300. edit_in_progress=False,
  301. properties={"foo": "bar"},
  302. )
  303. assert len(_DataManager._get_all()) == 0
  304. assert not _DataManager._exists(dn.id)
  305. _DataManager._set(dn)
  306. assert len(_DataManager._get_all()) == 1
  307. assert _DataManager._exists(dn.id)
  308. # changing data node attribute
  309. dn.config_id = "foo"
  310. assert dn.config_id == "foo"
  311. _DataManager._set(dn)
  312. assert len(_DataManager._get_all()) == 1
  313. assert dn.config_id == "foo"
  314. assert _DataManager._get(dn.id).config_id == "foo"
  315. def test_delete(self):
  316. dn_1 = InMemoryDataNode("config_id", Scope.SCENARIO, id="id_1")
  317. dn_2 = InMemoryDataNode("config_id", Scope.SCENARIO, id="id_2")
  318. dn_3 = InMemoryDataNode("config_id", Scope.SCENARIO, id="id_3")
  319. assert len(_DataManager._get_all()) == 0
  320. _DataManager._set(dn_1)
  321. _DataManager._set(dn_2)
  322. _DataManager._set(dn_3)
  323. assert len(_DataManager._get_all()) == 3
  324. assert all(_DataManager._exists(dn.id) for dn in [dn_1, dn_2, dn_3])
  325. _DataManager._delete(dn_1.id)
  326. assert len(_DataManager._get_all()) == 2
  327. assert _DataManager._get(dn_2.id).id == dn_2.id
  328. assert _DataManager._get(dn_3.id).id == dn_3.id
  329. assert _DataManager._get(dn_1.id) is None
  330. assert all(_DataManager._exists(dn.id) for dn in [dn_2, dn_3])
  331. assert not _DataManager._exists(dn_1.id)
  332. _DataManager._delete_all()
  333. assert len(_DataManager._get_all()) == 0
  334. assert not any(_DataManager._exists(dn.id) for dn in [dn_2, dn_3])
  335. def test_get_or_create(self):
  336. def _get_or_create_dn(config, *args):
  337. return _DataManager._bulk_get_or_create([config], *args)[config]
  338. _DataManager._delete_all()
  339. global_dn_config = Config.configure_data_node(
  340. id="test_data_node", storage_type="in_memory", scope=Scope.GLOBAL, data="In memory Data Node"
  341. )
  342. cycle_dn_config = Config.configure_data_node(
  343. id="test_data_node1", storage_type="in_memory", scope=Scope.CYCLE, data="In memory Data Node"
  344. )
  345. scenario_dn_config = Config.configure_data_node(
  346. id="test_data_node2", storage_type="in_memory", scope=Scope.SCENARIO, data="In memory scenario"
  347. )
  348. assert len(_DataManager._get_all()) == 0
  349. global_dn = _get_or_create_dn(global_dn_config, None, None)
  350. assert len(_DataManager._get_all()) == 1
  351. global_dn_bis = _get_or_create_dn(global_dn_config, None)
  352. assert len(_DataManager._get_all()) == 1
  353. assert global_dn.id == global_dn_bis.id
  354. scenario_dn = _get_or_create_dn(scenario_dn_config, None, "scenario_id")
  355. assert len(_DataManager._get_all()) == 2
  356. scenario_dn_bis = _get_or_create_dn(scenario_dn_config, None, "scenario_id")
  357. assert len(_DataManager._get_all()) == 2
  358. assert scenario_dn.id == scenario_dn_bis.id
  359. scenario_dn_ter = _get_or_create_dn(scenario_dn_config, None, "scenario_id")
  360. assert len(_DataManager._get_all()) == 2
  361. assert scenario_dn.id == scenario_dn_bis.id
  362. assert scenario_dn_bis.id == scenario_dn_ter.id
  363. scenario_dn_quater = _get_or_create_dn(scenario_dn_config, None, "scenario_id_2")
  364. assert len(_DataManager._get_all()) == 3
  365. assert scenario_dn.id == scenario_dn_bis.id
  366. assert scenario_dn_bis.id == scenario_dn_ter.id
  367. assert scenario_dn_ter.id != scenario_dn_quater.id
  368. assert len(_DataManager._get_all()) == 3
  369. cycle_dn = _get_or_create_dn(cycle_dn_config, "cycle_id", None)
  370. assert len(_DataManager._get_all()) == 4
  371. cycle_dn_1 = _get_or_create_dn(cycle_dn_config, "cycle_id", None)
  372. assert len(_DataManager._get_all()) == 4
  373. assert cycle_dn.id == cycle_dn_1.id
  374. cycle_dn_2 = _get_or_create_dn(cycle_dn_config, "cycle_id", "scenario_id")
  375. assert len(_DataManager._get_all()) == 4
  376. assert cycle_dn.id == cycle_dn_2.id
  377. cycle_dn_3 = _get_or_create_dn(cycle_dn_config, "cycle_id", None)
  378. assert len(_DataManager._get_all()) == 4
  379. assert cycle_dn.id == cycle_dn_3.id
  380. cycle_dn_4 = _get_or_create_dn(cycle_dn_config, "cycle_id", "scenario_id")
  381. assert len(_DataManager._get_all()) == 4
  382. assert cycle_dn.id == cycle_dn_4.id
  383. cycle_dn_5 = _get_or_create_dn(cycle_dn_config, "cycle_id", "scenario_id_2")
  384. assert len(_DataManager._get_all()) == 4
  385. assert cycle_dn.id == cycle_dn_5.id
  386. assert cycle_dn_1.id == cycle_dn_2.id
  387. assert cycle_dn_2.id == cycle_dn_3.id
  388. assert cycle_dn_3.id == cycle_dn_4.id
  389. assert cycle_dn_4.id == cycle_dn_5.id
  390. def test_ensure_persistence_of_data_node(self):
  391. dm = _DataManager()
  392. dm._delete_all()
  393. dn_config_1 = Config.configure_data_node(
  394. id="data_node_1", storage_type="in_memory", data="In memory sequence 2"
  395. )
  396. dn_config_2 = Config.configure_data_node(
  397. id="data_node_2", storage_type="in_memory", data="In memory sequence 2"
  398. )
  399. dm._bulk_get_or_create([dn_config_1, dn_config_2])
  400. assert len(dm._get_all()) == 2
  401. # Delete the DataManager to ensure it's get from the storage system
  402. del dm
  403. dm = _DataManager()
  404. dm._bulk_get_or_create([dn_config_1])
  405. assert len(dm._get_all()) == 2
  406. dm._delete_all()
  407. def test_clean_generated_pickle_files(self, pickle_file_path):
  408. user_pickle_dn_config = Config.configure_data_node(
  409. id="d1", storage_type="pickle", path=pickle_file_path, default_data="d"
  410. )
  411. generated_pickle_dn_1_config = Config.configure_data_node(id="d2", storage_type="pickle", default_data="d")
  412. generated_pickle_dn_2_config = Config.configure_data_node(id="d3", storage_type="pickle", default_data="d")
  413. dns = _DataManager._bulk_get_or_create(
  414. [user_pickle_dn_config, generated_pickle_dn_1_config, generated_pickle_dn_2_config]
  415. )
  416. user_pickle_dn = dns[user_pickle_dn_config]
  417. generated_pickle_dn_1 = dns[generated_pickle_dn_1_config]
  418. generated_pickle_dn_2 = dns[generated_pickle_dn_2_config]
  419. _DataManager._clean_pickle_file(user_pickle_dn.id)
  420. assert file_exists(user_pickle_dn.path)
  421. _DataManager._clean_pickle_files([generated_pickle_dn_1, generated_pickle_dn_2])
  422. assert not file_exists(generated_pickle_dn_1.path)
  423. assert not file_exists(generated_pickle_dn_2.path)
  424. def test_delete_does_clean_generated_pickle_files(self, pickle_file_path):
  425. user_pickle_dn_config = Config.configure_data_node(
  426. id="d1", storage_type="pickle", path=pickle_file_path, default_data="d"
  427. )
  428. generated_pickle_dn_config_1 = Config.configure_data_node(id="d2", storage_type="pickle", default_data="d")
  429. generated_pickle_dn_config_2 = Config.configure_data_node(id="d3", storage_type="pickle", default_data="d")
  430. generated_pickle_dn_config_3 = Config.configure_data_node(id="d4", storage_type="pickle", default_data="d")
  431. dns = _DataManager._bulk_get_or_create(
  432. [
  433. user_pickle_dn_config,
  434. generated_pickle_dn_config_1,
  435. generated_pickle_dn_config_2,
  436. generated_pickle_dn_config_3,
  437. ]
  438. )
  439. user_pickle_dn = dns[user_pickle_dn_config]
  440. generated_pickle_dn_1 = dns[generated_pickle_dn_config_1]
  441. generated_pickle_dn_2 = dns[generated_pickle_dn_config_2]
  442. generated_pickle_dn_3 = dns[generated_pickle_dn_config_3]
  443. _DataManager._delete(user_pickle_dn.id)
  444. assert file_exists(user_pickle_dn.path)
  445. _DataManager._delete_many([generated_pickle_dn_1.id, generated_pickle_dn_2.id])
  446. assert not file_exists(generated_pickle_dn_1.path)
  447. assert not file_exists(generated_pickle_dn_2.path)
  448. _DataManager._delete_all()
  449. assert not file_exists(generated_pickle_dn_3.path)
  450. def test_create_dn_from_loaded_config_no_scope(self):
  451. file_config = NamedTemporaryFile(
  452. """
  453. [TAIPY]
  454. [DATA_NODE.a]
  455. default_data = "4:int"
  456. [DATA_NODE.b]
  457. [TASK.t]
  458. function = "math.sqrt:function"
  459. inputs = [ "a:SECTION",]
  460. outputs = [ "b:SECTION",]
  461. skippable = "False:bool"
  462. [SCENARIO.s]
  463. tasks = [ "t:SECTION",]
  464. sequences.s_sequence = [ "t:SECTION",]
  465. [SCENARIO.s.comparators]
  466. """
  467. )
  468. from src.taipy import core as tp
  469. Config.override(file_config.filename)
  470. tp.create_scenario(Config.scenarios["s"])
  471. tp.create_scenario(Config.scenarios["s"])
  472. assert len(tp.get_data_nodes()) == 4
  473. def test_create_dn_from_loaded_config_no_storage_type(self):
  474. file_config = NamedTemporaryFile(
  475. """
  476. [TAIPY]
  477. [DATA_NODE.input]
  478. scope = "SCENARIO:SCOPE"
  479. default_data = "21:int"
  480. [DATA_NODE.output]
  481. storage_type = "in_memory"
  482. scope = "SCENARIO:SCOPE"
  483. [TASK.double]
  484. inputs = [ "input:SECTION",]
  485. function = "math.sqrt:function"
  486. outputs = [ "output:SECTION",]
  487. skippable = "False:bool"
  488. [SCENARIO.my_scenario]
  489. tasks = [ "double:SECTION",]
  490. sequences.my_sequence = [ "double:SECTION",]
  491. [SCENARIO.my_scenario.comparators]
  492. """
  493. )
  494. from src.taipy import core as tp
  495. Config.override(file_config.filename)
  496. scenario = tp.create_scenario(Config.scenarios["my_scenario"])
  497. assert isinstance(scenario.input, PickleDataNode)
  498. assert isinstance(scenario.output, InMemoryDataNode)
  499. def test_create_dn_from_loaded_config_modified_default_config(self):
  500. file_config = NamedTemporaryFile(
  501. """
  502. [TAIPY]
  503. [DATA_NODE.input]
  504. scope = "SCENARIO:SCOPE"
  505. default_path="fake/path.csv"
  506. [DATA_NODE.output]
  507. storage_type = "in_memory"
  508. scope = "SCENARIO:SCOPE"
  509. [TASK.double]
  510. inputs = [ "input:SECTION",]
  511. function = "math.sqrt:function"
  512. outputs = [ "output:SECTION",]
  513. skippable = "False:bool"
  514. [SCENARIO.my_scenario]
  515. tasks = [ "double:SECTION",]
  516. sequences.my_sequence = [ "double:SECTION",]
  517. [SCENARIO.my_scenario.comparators]
  518. """
  519. )
  520. from src.taipy import core as tp
  521. Config.set_default_data_node_configuration(storage_type="csv")
  522. Config.override(file_config.filename)
  523. scenario = tp.create_scenario(Config.scenarios["my_scenario"])
  524. assert isinstance(scenario.input, CSVDataNode)
  525. assert isinstance(scenario.output, InMemoryDataNode)
  526. def test_get_tasks_by_config_id(self):
  527. dn_config_1 = Config.configure_data_node("dn_1", scope=Scope.SCENARIO)
  528. dn_config_2 = Config.configure_data_node("dn_2", scope=Scope.SCENARIO)
  529. dn_config_3 = Config.configure_data_node("dn_3", scope=Scope.SCENARIO)
  530. dn_1_1 = _DataManager._create_and_set(dn_config_1, None, None)
  531. dn_1_2 = _DataManager._create_and_set(dn_config_1, None, None)
  532. dn_1_3 = _DataManager._create_and_set(dn_config_1, None, None)
  533. assert len(_DataManager._get_all()) == 3
  534. dn_2_1 = _DataManager._create_and_set(dn_config_2, None, None)
  535. dn_2_2 = _DataManager._create_and_set(dn_config_2, None, None)
  536. assert len(_DataManager._get_all()) == 5
  537. dn_3_1 = _DataManager._create_and_set(dn_config_3, None, None)
  538. assert len(_DataManager._get_all()) == 6
  539. dn_1_datanodes = _DataManager._get_by_config_id(dn_config_1.id)
  540. assert len(dn_1_datanodes) == 3
  541. assert sorted([dn_1_1.id, dn_1_2.id, dn_1_3.id]) == sorted([sequence.id for sequence in dn_1_datanodes])
  542. dn_2_datanodes = _DataManager._get_by_config_id(dn_config_2.id)
  543. assert len(dn_2_datanodes) == 2
  544. assert sorted([dn_2_1.id, dn_2_2.id]) == sorted([sequence.id for sequence in dn_2_datanodes])
  545. dn_3_datanodes = _DataManager._get_by_config_id(dn_config_3.id)
  546. assert len(dn_3_datanodes) == 1
  547. assert sorted([dn_3_1.id]) == sorted([sequence.id for sequence in dn_3_datanodes])
  548. def test_get_data_nodes_by_config_id_in_multiple_versions_environment(self):
  549. dn_config_1 = Config.configure_data_node("dn_1", scope=Scope.SCENARIO)
  550. dn_config_2 = Config.configure_data_node("dn_2", scope=Scope.SCENARIO)
  551. _VersionManager._set_experiment_version("1.0")
  552. _DataManager._create_and_set(dn_config_1, None, None)
  553. _DataManager._create_and_set(dn_config_1, None, None)
  554. _DataManager._create_and_set(dn_config_1, None, None)
  555. _DataManager._create_and_set(dn_config_2, None, None)
  556. _DataManager._create_and_set(dn_config_2, None, None)
  557. assert len(_DataManager._get_by_config_id(dn_config_1.id)) == 3
  558. assert len(_DataManager._get_by_config_id(dn_config_2.id)) == 2
  559. _VersionManager._set_experiment_version("2.0")
  560. _DataManager._create_and_set(dn_config_1, None, None)
  561. _DataManager._create_and_set(dn_config_1, None, None)
  562. _DataManager._create_and_set(dn_config_1, None, None)
  563. _DataManager._create_and_set(dn_config_2, None, None)
  564. _DataManager._create_and_set(dn_config_2, None, None)
  565. assert len(_DataManager._get_by_config_id(dn_config_1.id)) == 3
  566. assert len(_DataManager._get_by_config_id(dn_config_2.id)) == 2