test_data_manager.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813
  1. # Copyright 2021-2025 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import os
  12. import pathlib
  13. import pandas as pd
  14. import pytest
  15. from pandas.testing import assert_frame_equal
  16. from taipy import Scope
  17. from taipy.common.config import Config
  18. from taipy.core._version._version_manager import _VersionManager
  19. from taipy.core.config.data_node_config import DataNodeConfig
  20. from taipy.core.data._data_manager import _DataManager
  21. from taipy.core.data.csv import CSVDataNode
  22. from taipy.core.data.data_node_id import DataNodeId
  23. from taipy.core.data.in_memory import InMemoryDataNode
  24. from taipy.core.data.pickle import PickleDataNode
  25. from taipy.core.exceptions.exceptions import InvalidDataNodeType, ModelNotFound, NoData
  26. from taipy.core.reason import EntityDoesNotExist, NotGlobalScope, WrongConfigType
  27. from tests.core.utils.named_temporary_file import NamedTemporaryFile
  28. def file_exists(file_path: str) -> bool:
  29. return os.path.exists(file_path)
  30. class TestDataManager:
  31. def test_create_data_node_and_modify_properties_does_not_modify_config(self):
  32. dn_config = Config.configure_data_node(id="name", foo="bar")
  33. dn = _DataManager._create(dn_config, None, None)
  34. assert dn_config.properties.get("foo") == "bar"
  35. assert dn_config.properties.get("baz") is None
  36. dn.properties["baz"] = "qux"
  37. _DataManager._update(dn)
  38. assert dn_config.properties.get("foo") == "bar"
  39. assert dn_config.properties.get("baz") is None
  40. assert dn.properties.get("foo") == "bar"
  41. assert dn.properties.get("baz") == "qux"
  42. def test_can_create(self):
  43. dn_config = Config.configure_data_node("dn", 10, scope=Scope.SCENARIO)
  44. global_dn_config = Config.configure_data_node(
  45. id="global_dn", storage_type="in_memory", scope=Scope.GLOBAL, data=10
  46. )
  47. reasons = _DataManager._can_create()
  48. assert bool(reasons) is True
  49. assert reasons._reasons == {}
  50. reasons = _DataManager._can_create(global_dn_config)
  51. assert bool(reasons) is True
  52. assert reasons._reasons == {}
  53. reasons = _DataManager._can_create(dn_config)
  54. assert bool(reasons) is False
  55. assert reasons._reasons[dn_config.id] == {NotGlobalScope(dn_config.id)}
  56. assert (
  57. str(list(reasons._reasons[dn_config.id])[0])
  58. == f"Data node config '{dn_config.id}' does not have GLOBAL scope"
  59. )
  60. reasons = _DataManager._can_create(1)
  61. assert bool(reasons) is False
  62. assert reasons._reasons["1"] == {WrongConfigType("1", DataNodeConfig.__name__)}
  63. assert str(list(reasons._reasons["1"])[0]) == "Object '1' must be a valid DataNodeConfig"
  64. def test_create_data_node_with_name_provided(self):
  65. dn_config = Config.configure_data_node(id="dn", foo="bar", name="acb")
  66. dn = _DataManager._create(dn_config, None, None)
  67. assert dn.name == "acb"
  68. def test_create_and_get_csv_data_node(self):
  69. # Test we can instantiate a CsvDataNode from DataNodeConfig with :
  70. # - a csv type
  71. # - a default scenario scope
  72. # - No owner_id
  73. csv_dn_config = Config.configure_data_node(id="foo", storage_type="csv", path="bar", has_header=True)
  74. csv_dn = _DataManager._create(csv_dn_config, None, None)
  75. assert isinstance(csv_dn, CSVDataNode)
  76. assert isinstance(_DataManager._get(csv_dn.id), CSVDataNode)
  77. assert _DataManager._exists(csv_dn.id)
  78. assert _DataManager._get(csv_dn.id) is not None
  79. assert _DataManager._get(csv_dn.id).id == csv_dn.id
  80. assert _DataManager._get(csv_dn.id).config_id == "foo"
  81. assert _DataManager._get(csv_dn.id).config_id == csv_dn.config_id
  82. assert _DataManager._get(csv_dn.id).scope == Scope.SCENARIO
  83. assert _DataManager._get(csv_dn.id).scope == csv_dn.scope
  84. assert _DataManager._get(csv_dn.id).owner_id is None
  85. assert _DataManager._get(csv_dn.id).owner_id == csv_dn.owner_id
  86. assert _DataManager._get(csv_dn.id).parent_ids == set()
  87. assert _DataManager._get(csv_dn.id).parent_ids == csv_dn.parent_ids
  88. assert _DataManager._get(csv_dn.id).last_edit_date is None
  89. assert _DataManager._get(csv_dn.id).last_edit_date == csv_dn.last_edit_date
  90. assert _DataManager._get(csv_dn.id).job_ids == []
  91. assert _DataManager._get(csv_dn.id).job_ids == csv_dn.job_ids
  92. assert not _DataManager._get(csv_dn.id).is_ready_for_reading
  93. assert _DataManager._get(csv_dn.id).is_ready_for_reading == csv_dn.is_ready_for_reading
  94. assert (
  95. len(_DataManager._get(csv_dn.id).properties) == 5
  96. ) # path, encoding, has_header, exposed_type, is_generated
  97. assert _DataManager._get(csv_dn.id).properties.get("path") == "bar"
  98. assert _DataManager._get(csv_dn.id).properties.get("encoding") == "utf-8"
  99. assert _DataManager._get(csv_dn.id).properties.get("has_header") is True
  100. assert _DataManager._get(csv_dn.id).properties.get("exposed_type") == "pandas"
  101. assert _DataManager._get(csv_dn.id).properties.get("is_generated") is False
  102. assert _DataManager._get(csv_dn.id).properties == csv_dn.properties
  103. assert _DataManager._get(csv_dn.id).edit_in_progress is False
  104. assert _DataManager._get(csv_dn.id)._editor_id is None
  105. assert _DataManager._get(csv_dn.id)._editor_expiration_date is None
  106. assert _DataManager._get(csv_dn) is not None
  107. assert _DataManager._get(csv_dn).id == csv_dn.id
  108. assert _DataManager._get(csv_dn).config_id == "foo"
  109. assert _DataManager._get(csv_dn).config_id == csv_dn.config_id
  110. assert _DataManager._get(csv_dn).scope == Scope.SCENARIO
  111. assert _DataManager._get(csv_dn).scope == csv_dn.scope
  112. assert _DataManager._get(csv_dn).owner_id is None
  113. assert _DataManager._get(csv_dn).owner_id == csv_dn.owner_id
  114. assert _DataManager._get(csv_dn).parent_ids == set()
  115. assert _DataManager._get(csv_dn).parent_ids == csv_dn.parent_ids
  116. assert _DataManager._get(csv_dn).last_edit_date is None
  117. assert _DataManager._get(csv_dn).last_edit_date == csv_dn.last_edit_date
  118. assert _DataManager._get(csv_dn).job_ids == []
  119. assert _DataManager._get(csv_dn).job_ids == csv_dn.job_ids
  120. assert not _DataManager._get(csv_dn).is_ready_for_reading
  121. assert _DataManager._get(csv_dn).is_ready_for_reading == csv_dn.is_ready_for_reading
  122. assert len(_DataManager._get(csv_dn).properties) == 5 # path, encoding, has_header, exposed_type, is_generated
  123. assert _DataManager._get(csv_dn).properties.get("path") == "bar"
  124. assert _DataManager._get(csv_dn).properties.get("encoding") == "utf-8"
  125. assert _DataManager._get(csv_dn).properties.get("has_header") is True
  126. assert _DataManager._get(csv_dn.id).properties.get("exposed_type") == "pandas"
  127. assert _DataManager._get(csv_dn.id).properties.get("is_generated") is False
  128. assert _DataManager._get(csv_dn).properties == csv_dn.properties
  129. assert _DataManager._get(csv_dn.id).edit_in_progress is False
  130. assert _DataManager._get(csv_dn.id)._editor_id is None
  131. assert _DataManager._get(csv_dn.id)._editor_expiration_date is None
  132. def test_edit_and_get_data_node(self):
  133. config = Config.configure_pickle_data_node(id="foo")
  134. dn = _DataManager._create(config, None, None)
  135. assert _DataManager._get(dn.id).last_edit_date is None
  136. assert len(_DataManager._get(dn.id).properties) == 2 # is_generated and path
  137. assert isinstance(_DataManager._get(dn.id).properties.get("path"), str)
  138. assert _DataManager._get(dn.id).properties.get("is_generated") is True
  139. assert not _DataManager._get(dn.id).edit_in_progress
  140. assert _DataManager._get(dn.id)._editor_id is None
  141. assert _DataManager._get(dn.id)._editor_expiration_date is None
  142. dn.lock_edit("foo")
  143. assert _DataManager._get(dn.id).last_edit_date is None
  144. assert len(_DataManager._get(dn.id).properties) == 2 # is_generated and path
  145. assert isinstance(_DataManager._get(dn.id).properties.get("path"), str)
  146. assert _DataManager._get(dn.id).properties.get("is_generated") is True
  147. assert _DataManager._get(dn.id).edit_in_progress
  148. assert _DataManager._get(dn.id).editor_id == "foo"
  149. assert _DataManager._get(dn.id).editor_expiration_date is not None
  150. dn.unlock_edit("foo")
  151. assert _DataManager._get(dn.id).last_edit_date is None
  152. assert len(_DataManager._get(dn.id).properties) == 2 # is_generated and path
  153. assert isinstance(_DataManager._get(dn.id).properties.get("path"), str)
  154. assert _DataManager._get(dn.id).properties.get("is_generated") is True
  155. assert not _DataManager._get(dn.id).edit_in_progress
  156. assert _DataManager._get(dn.id).editor_id is None
  157. assert _DataManager._get(dn.id).editor_expiration_date is None
  158. def test_create_and_get_in_memory_data_node(self):
  159. # Test we can instantiate an InMemoryDataNode from DataNodeConfig with :
  160. # - an in_memory type
  161. # - a scenario scope
  162. # - an owner id
  163. # - some default data
  164. in_memory_dn_config = Config.configure_data_node(
  165. id="baz", storage_type="in_memory", scope=Scope.SCENARIO, default_data="qux", other_data="foo"
  166. )
  167. in_mem_dn = _DataManager._create(in_memory_dn_config, "Scenario_id", {"task_id"})
  168. assert isinstance(in_mem_dn, InMemoryDataNode)
  169. assert isinstance(_DataManager._get(in_mem_dn.id), InMemoryDataNode)
  170. assert _DataManager._exists(in_mem_dn.id)
  171. assert _DataManager._get(in_mem_dn.id) is not None
  172. assert _DataManager._get(in_mem_dn.id).id == in_mem_dn.id
  173. assert _DataManager._get(in_mem_dn.id).config_id == "baz"
  174. assert _DataManager._get(in_mem_dn.id).config_id == in_mem_dn.config_id
  175. assert _DataManager._get(in_mem_dn.id).scope == Scope.SCENARIO
  176. assert _DataManager._get(in_mem_dn.id).scope == in_mem_dn.scope
  177. assert _DataManager._get(in_mem_dn.id).owner_id == "Scenario_id"
  178. assert _DataManager._get(in_mem_dn.id).owner_id == in_mem_dn.owner_id
  179. assert _DataManager._get(in_mem_dn.id).parent_ids == {"task_id"}
  180. assert _DataManager._get(in_mem_dn.id).parent_ids == in_mem_dn.parent_ids
  181. assert _DataManager._get(in_mem_dn.id).last_edit_date is not None
  182. assert _DataManager._get(in_mem_dn.id).last_edit_date == in_mem_dn.last_edit_date
  183. assert _DataManager._get(in_mem_dn.id).job_ids == []
  184. assert _DataManager._get(in_mem_dn.id).job_ids == in_mem_dn.job_ids
  185. assert _DataManager._get(in_mem_dn.id).is_ready_for_reading
  186. assert _DataManager._get(in_mem_dn.id).is_ready_for_reading == in_mem_dn.is_ready_for_reading
  187. assert len(_DataManager._get(in_mem_dn.id).properties) == 1
  188. assert _DataManager._get(in_mem_dn.id).properties.get("other_data") == "foo"
  189. assert _DataManager._get(in_mem_dn.id).properties == in_mem_dn.properties
  190. assert _DataManager._get(in_mem_dn) is not None
  191. assert _DataManager._get(in_mem_dn).id == in_mem_dn.id
  192. assert _DataManager._get(in_mem_dn).config_id == "baz"
  193. assert _DataManager._get(in_mem_dn).config_id == in_mem_dn.config_id
  194. assert _DataManager._get(in_mem_dn).scope == Scope.SCENARIO
  195. assert _DataManager._get(in_mem_dn).scope == in_mem_dn.scope
  196. assert _DataManager._get(in_mem_dn).owner_id == "Scenario_id"
  197. assert _DataManager._get(in_mem_dn).owner_id == in_mem_dn.owner_id
  198. assert _DataManager._get(in_mem_dn).parent_ids == {"task_id"}
  199. assert _DataManager._get(in_mem_dn).parent_ids == in_mem_dn.parent_ids
  200. assert _DataManager._get(in_mem_dn).last_edit_date is not None
  201. assert _DataManager._get(in_mem_dn).last_edit_date == in_mem_dn.last_edit_date
  202. assert _DataManager._get(in_mem_dn).job_ids == []
  203. assert _DataManager._get(in_mem_dn).job_ids == in_mem_dn.job_ids
  204. assert _DataManager._get(in_mem_dn).is_ready_for_reading
  205. assert _DataManager._get(in_mem_dn).is_ready_for_reading == in_mem_dn.is_ready_for_reading
  206. assert len(_DataManager._get(in_mem_dn).properties) == 1
  207. assert _DataManager._get(in_mem_dn).properties.get("other_data") == "foo"
  208. assert _DataManager._get(in_mem_dn).properties == in_mem_dn.properties
  209. def test_create_and_get_pickle_data_node(self):
  210. # Test we can instantiate a PickleDataNode from DataNodeConfig with :
  211. # - an in_memory type
  212. # - a business cycle scope
  213. # - No owner id
  214. # - no default data
  215. dn_config = Config.configure_data_node(id="plop", storage_type="pickle", scope=Scope.CYCLE)
  216. pickle_dn = _DataManager._create(dn_config, None, {"task_id_1", "task_id_2"})
  217. assert isinstance(pickle_dn, PickleDataNode)
  218. assert isinstance(_DataManager._get(pickle_dn.id), PickleDataNode)
  219. assert _DataManager._exists(pickle_dn.id)
  220. assert _DataManager._get(pickle_dn.id) is not None
  221. assert _DataManager._get(pickle_dn.id).id == pickle_dn.id
  222. assert _DataManager._get(pickle_dn.id).config_id == "plop"
  223. assert _DataManager._get(pickle_dn.id).config_id == pickle_dn.config_id
  224. assert _DataManager._get(pickle_dn.id).scope == Scope.CYCLE
  225. assert _DataManager._get(pickle_dn.id).scope == pickle_dn.scope
  226. assert _DataManager._get(pickle_dn.id).owner_id is None
  227. assert _DataManager._get(pickle_dn.id).owner_id == pickle_dn.owner_id
  228. assert _DataManager._get(pickle_dn.id).parent_ids == {"task_id_1", "task_id_2"}
  229. assert _DataManager._get(pickle_dn.id).parent_ids == pickle_dn.parent_ids
  230. assert _DataManager._get(pickle_dn.id).last_edit_date is None
  231. assert _DataManager._get(pickle_dn.id).last_edit_date == pickle_dn.last_edit_date
  232. assert _DataManager._get(pickle_dn.id).job_ids == []
  233. assert _DataManager._get(pickle_dn.id).job_ids == pickle_dn.job_ids
  234. assert not _DataManager._get(pickle_dn.id).is_ready_for_reading
  235. assert _DataManager._get(pickle_dn.id).is_ready_for_reading == pickle_dn.is_ready_for_reading
  236. assert len(_DataManager._get(pickle_dn.id).properties) == 2 # is_generated and path
  237. assert _DataManager._get(pickle_dn.id).properties == pickle_dn.properties
  238. assert _DataManager._get(pickle_dn) is not None
  239. assert _DataManager._get(pickle_dn).id == pickle_dn.id
  240. assert _DataManager._get(pickle_dn).config_id == "plop"
  241. assert _DataManager._get(pickle_dn).config_id == pickle_dn.config_id
  242. assert _DataManager._get(pickle_dn).scope == Scope.CYCLE
  243. assert _DataManager._get(pickle_dn).scope == pickle_dn.scope
  244. assert _DataManager._get(pickle_dn).owner_id is None
  245. assert _DataManager._get(pickle_dn).owner_id == pickle_dn.owner_id
  246. assert _DataManager._get(pickle_dn).parent_ids == {"task_id_1", "task_id_2"}
  247. assert _DataManager._get(pickle_dn).parent_ids == pickle_dn.parent_ids
  248. assert _DataManager._get(pickle_dn).last_edit_date is None
  249. assert _DataManager._get(pickle_dn).last_edit_date == pickle_dn.last_edit_date
  250. assert _DataManager._get(pickle_dn).job_ids == []
  251. assert _DataManager._get(pickle_dn).job_ids == pickle_dn.job_ids
  252. assert not _DataManager._get(pickle_dn).is_ready_for_reading
  253. assert _DataManager._get(pickle_dn).is_ready_for_reading == pickle_dn.is_ready_for_reading
  254. assert len(_DataManager._get(pickle_dn).properties) == 2 # is_generated and path
  255. assert _DataManager._get(pickle_dn).properties == pickle_dn.properties
  256. def test_create_raises_exception_with_wrong_type(self):
  257. wrong_type_dn_config = DataNodeConfig(id="foo", storage_type="bar", scope=DataNodeConfig._DEFAULT_SCOPE)
  258. with pytest.raises(InvalidDataNodeType):
  259. _DataManager._create(wrong_type_dn_config, None, None)
  260. def test_create_from_same_config_generates_new_data_node_and_new_id(self):
  261. dn_config = Config.configure_data_node(id="foo", storage_type="in_memory")
  262. dn = _DataManager._create(dn_config, None, None)
  263. dn_2 = _DataManager._create(dn_config, None, None)
  264. assert dn_2.id != dn.id
  265. def test_create_uses_overridden_attributes_in_config_file(self):
  266. Config.override(os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/config.toml"))
  267. csv_dn_cfg = Config.configure_data_node(id="foo", storage_type="csv", path="bar", has_header=True)
  268. csv_dn = _DataManager._create(csv_dn_cfg, None, None)
  269. assert csv_dn.config_id == "foo"
  270. assert isinstance(csv_dn, CSVDataNode)
  271. assert csv_dn._path == "path_from_config_file"
  272. assert csv_dn.properties["has_header"]
  273. csv_dn_cfg = Config.configure_data_node(id="baz", storage_type="csv", path="bar", has_header=True)
  274. csv_dn = _DataManager._create(csv_dn_cfg, None, None)
  275. assert csv_dn.config_id == "baz"
  276. assert isinstance(csv_dn, CSVDataNode)
  277. assert csv_dn._path == "bar"
  278. assert csv_dn.properties["has_header"]
  279. def test_get_if_not_exists(self):
  280. with pytest.raises(ModelNotFound):
  281. _DataManager._repository._load("test_data_node_2")
  282. def test_get_all(self):
  283. assert len(_DataManager._get_all()) == 0
  284. dn_config_1 = Config.configure_data_node(id="foo", storage_type="in_memory")
  285. _DataManager._create(dn_config_1, None, None)
  286. assert len(_DataManager._get_all()) == 1
  287. dn_config_2 = Config.configure_data_node(id="baz", storage_type="in_memory")
  288. _DataManager._create(dn_config_2, None, None)
  289. _DataManager._create(dn_config_2, None, None)
  290. assert len(_DataManager._get_all()) == 3
  291. assert len([dn for dn in _DataManager._get_all() if dn.config_id == "foo"]) == 1
  292. assert len([dn for dn in _DataManager._get_all() if dn.config_id == "baz"]) == 2
  293. def test_get_all_on_multiple_versions_environment(self):
  294. # Create 5 data nodes with 2 versions each
  295. # Only version 1.0 has the data node with config_id = "config_id_1"
  296. # Only version 2.0 has the data node with config_id = "config_id_6"
  297. for version in range(1, 3):
  298. for i in range(5):
  299. _DataManager._repository._save(
  300. InMemoryDataNode(
  301. f"config_id_{i + version}",
  302. Scope.SCENARIO,
  303. id=DataNodeId(f"id{i}_v{version}"),
  304. version=f"{version}.0",
  305. )
  306. )
  307. _VersionManager._set_experiment_version("1.0")
  308. assert len(_DataManager._get_all()) == 5
  309. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_1"}])) == 1
  310. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_6"}])) == 0
  311. _VersionManager._set_development_version("1.0")
  312. assert len(_DataManager._get_all()) == 5
  313. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_1"}])) == 1
  314. assert len(_DataManager._get_all_by(filters=[{"version": "1.0", "config_id": "config_id_6"}])) == 0
  315. _VersionManager._set_experiment_version("2.0")
  316. assert len(_DataManager._get_all()) == 5
  317. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_1"}])) == 0
  318. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_6"}])) == 1
  319. _VersionManager._set_development_version("2.0")
  320. assert len(_DataManager._get_all()) == 5
  321. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_1"}])) == 0
  322. assert len(_DataManager._get_all_by(filters=[{"version": "2.0", "config_id": "config_id_6"}])) == 1
  323. def test_save_and_update(self):
  324. dn = InMemoryDataNode(
  325. "config_id",
  326. Scope.SCENARIO,
  327. id=DataNodeId("id"),
  328. owner_id=None,
  329. parent_ids={"task_id_1"},
  330. last_edit_date=None,
  331. edits=[],
  332. edit_in_progress=False,
  333. properties={"foo": "bar"},
  334. )
  335. assert len(_DataManager._get_all()) == 0
  336. assert not _DataManager._exists(dn.id)
  337. _DataManager._repository._save(dn)
  338. assert len(_DataManager._get_all()) == 1
  339. assert _DataManager._exists(dn.id)
  340. # changing data node attribute
  341. dn._config_id = "foo"
  342. assert dn.config_id == "foo"
  343. _DataManager._update(dn)
  344. assert len(_DataManager._get_all()) == 1
  345. assert dn.config_id == "foo"
  346. assert _DataManager._get(dn.id).config_id == "foo"
  347. def test_delete(self):
  348. dn_1 = InMemoryDataNode("config_id", Scope.SCENARIO, id=DataNodeId("id_1"))
  349. dn_2 = InMemoryDataNode("config_id", Scope.SCENARIO, id=DataNodeId("id_2"))
  350. dn_3 = InMemoryDataNode("config_id", Scope.SCENARIO, id=DataNodeId("id_3"))
  351. assert len(_DataManager._get_all()) == 0
  352. _DataManager._repository._save(dn_1)
  353. _DataManager._repository._save(dn_2)
  354. _DataManager._repository._save(dn_3)
  355. assert len(_DataManager._get_all()) == 3
  356. assert all(_DataManager._exists(dn.id) for dn in [dn_1, dn_2, dn_3])
  357. _DataManager._delete(dn_1.id)
  358. assert len(_DataManager._get_all()) == 2
  359. assert _DataManager._get(dn_2.id).id == dn_2.id
  360. assert _DataManager._get(dn_3.id).id == dn_3.id
  361. assert _DataManager._get(dn_1.id) is None
  362. assert all(_DataManager._exists(dn.id) for dn in [dn_2, dn_3])
  363. assert not _DataManager._exists(dn_1.id)
  364. _DataManager._delete_all()
  365. assert len(_DataManager._get_all()) == 0
  366. assert not any(_DataManager._exists(dn.id) for dn in [dn_2, dn_3])
  367. def test_get_or_create(self):
  368. def _get_or_create_dn(config, *args):
  369. return _DataManager._bulk_get_or_create([config], *args)[config]
  370. _DataManager._delete_all()
  371. global_dn_config = Config.configure_data_node(
  372. id="test_data_node", storage_type="in_memory", scope=Scope.GLOBAL, data="In memory Data Node"
  373. )
  374. cycle_dn_config = Config.configure_data_node(
  375. id="test_data_node1", storage_type="in_memory", scope=Scope.CYCLE, data="In memory Data Node"
  376. )
  377. scenario_dn_config = Config.configure_data_node(
  378. id="test_data_node2", storage_type="in_memory", scope=Scope.SCENARIO, data="In memory scenario"
  379. )
  380. assert len(_DataManager._get_all()) == 0
  381. global_dn = _get_or_create_dn(global_dn_config, None, None)
  382. assert len(_DataManager._get_all()) == 1
  383. global_dn_bis = _get_or_create_dn(global_dn_config, None)
  384. assert len(_DataManager._get_all()) == 1
  385. assert global_dn.id == global_dn_bis.id
  386. scenario_dn = _get_or_create_dn(scenario_dn_config, None, "scenario_id")
  387. assert len(_DataManager._get_all()) == 2
  388. scenario_dn_bis = _get_or_create_dn(scenario_dn_config, None, "scenario_id")
  389. assert len(_DataManager._get_all()) == 2
  390. assert scenario_dn.id == scenario_dn_bis.id
  391. scenario_dn_ter = _get_or_create_dn(scenario_dn_config, None, "scenario_id")
  392. assert len(_DataManager._get_all()) == 2
  393. assert scenario_dn.id == scenario_dn_bis.id
  394. assert scenario_dn_bis.id == scenario_dn_ter.id
  395. scenario_dn_quater = _get_or_create_dn(scenario_dn_config, None, "scenario_id_2")
  396. assert len(_DataManager._get_all()) == 3
  397. assert scenario_dn.id == scenario_dn_bis.id
  398. assert scenario_dn_bis.id == scenario_dn_ter.id
  399. assert scenario_dn_ter.id != scenario_dn_quater.id
  400. assert len(_DataManager._get_all()) == 3
  401. cycle_dn = _get_or_create_dn(cycle_dn_config, "cycle_id", None)
  402. assert len(_DataManager._get_all()) == 4
  403. cycle_dn_1 = _get_or_create_dn(cycle_dn_config, "cycle_id", None)
  404. assert len(_DataManager._get_all()) == 4
  405. assert cycle_dn.id == cycle_dn_1.id
  406. cycle_dn_2 = _get_or_create_dn(cycle_dn_config, "cycle_id", "scenario_id")
  407. assert len(_DataManager._get_all()) == 4
  408. assert cycle_dn.id == cycle_dn_2.id
  409. cycle_dn_3 = _get_or_create_dn(cycle_dn_config, "cycle_id", None)
  410. assert len(_DataManager._get_all()) == 4
  411. assert cycle_dn.id == cycle_dn_3.id
  412. cycle_dn_4 = _get_or_create_dn(cycle_dn_config, "cycle_id", "scenario_id")
  413. assert len(_DataManager._get_all()) == 4
  414. assert cycle_dn.id == cycle_dn_4.id
  415. cycle_dn_5 = _get_or_create_dn(cycle_dn_config, "cycle_id", "scenario_id_2")
  416. assert len(_DataManager._get_all()) == 4
  417. assert cycle_dn.id == cycle_dn_5.id
  418. assert cycle_dn_1.id == cycle_dn_2.id
  419. assert cycle_dn_2.id == cycle_dn_3.id
  420. assert cycle_dn_3.id == cycle_dn_4.id
  421. assert cycle_dn_4.id == cycle_dn_5.id
  422. def test_ensure_persistence_of_data_node(self):
  423. dm = _DataManager()
  424. dm._delete_all()
  425. dn_config_1 = Config.configure_data_node(
  426. id="data_node_1", storage_type="in_memory", data="In memory sequence 2"
  427. )
  428. dn_config_2 = Config.configure_data_node(
  429. id="data_node_2", storage_type="in_memory", data="In memory sequence 2"
  430. )
  431. dm._bulk_get_or_create([dn_config_1, dn_config_2])
  432. assert len(dm._get_all()) == 2
  433. # Delete the DataManager to ensure it's get from the storage system
  434. del dm
  435. dm = _DataManager()
  436. dm._bulk_get_or_create([dn_config_1])
  437. assert len(dm._get_all()) == 2
  438. dm._delete_all()
  439. @pytest.mark.parametrize(
  440. "storage_type,path",
  441. [
  442. ("pickle", "pickle_file_path"),
  443. ("csv", "csv_file"),
  444. ("excel", "excel_file"),
  445. ("json", "json_file"),
  446. ("parquet", "parquet_file_path"),
  447. ],
  448. )
  449. def test_read(self, storage_type, path, request):
  450. path = request.getfixturevalue(path)
  451. non_exist_dn_config = Config.configure_data_node(id="d1", storage_type=storage_type, path="non_exist_path")
  452. dn_config = Config.configure_data_node(id="d2", storage_type=storage_type, path=path)
  453. dn_1 = _DataManager._create(non_exist_dn_config, None, None)
  454. dn_2 = _DataManager._create(dn_config, None, None)
  455. with pytest.raises(NoData):
  456. _DataManager._read(dn_1)
  457. assert dn_2._read() is not None
  458. @pytest.mark.parametrize(
  459. "storage_type,path",
  460. [
  461. ("pickle", "pickle_file_path"),
  462. ("csv", "csv_file"),
  463. ("parquet", "parquet_file_path"),
  464. ],
  465. )
  466. def test_write(self, storage_type, path, request):
  467. path = request.getfixturevalue(path)
  468. dn_config = Config.configure_data_node(id="d2", storage_type=storage_type, path=path)
  469. dn = _DataManager._create(dn_config, None, None)
  470. new_data = pd.DataFrame([{"a": 11, "b": 12, "c": 13}, {"a": 14, "b": 15, "c": 16}])
  471. _DataManager._write(dn, new_data)
  472. assert_frame_equal(dn._read(), new_data)
  473. @pytest.mark.parametrize(
  474. "storage_type,path",
  475. [
  476. ("csv", "csv_file"),
  477. ("parquet", "parquet_file_path"),
  478. ],
  479. )
  480. def test_append(self, storage_type, path, request):
  481. path = request.getfixturevalue(path)
  482. dn_config = Config.configure_data_node(id="d2", storage_type=storage_type, path=path)
  483. dn = _DataManager._create(dn_config, None, None)
  484. old_data = _DataManager._read(dn)
  485. new_data = pd.DataFrame([{"a": 11, "b": 12, "c": 13}, {"a": 14, "b": 15, "c": 16}])
  486. _DataManager._append(dn, new_data)
  487. assert_frame_equal(dn._read(), pd.concat([old_data, new_data], ignore_index=True))
  488. @pytest.mark.parametrize(
  489. "storage_type,path",
  490. [
  491. ("pickle", "pickle_file_path"),
  492. ("csv", "csv_file"),
  493. ("excel", "excel_file"),
  494. ("json", "json_file"),
  495. ("parquet", "parquet_file_path"),
  496. ],
  497. )
  498. def test_clean_generated_files(self, storage_type, path, request):
  499. path = request.getfixturevalue(path)
  500. user_dn_config = Config.configure_data_node(
  501. id="d1", storage_type=storage_type, path=path, default_data={"a": [1], "b": [2]}
  502. )
  503. generated_dn_1_config = Config.configure_data_node(
  504. id="d2", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  505. )
  506. generated_dn_2_config = Config.configure_data_node(
  507. id="d3", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  508. )
  509. dns = _DataManager._bulk_get_or_create([user_dn_config, generated_dn_1_config, generated_dn_2_config])
  510. user_dn = dns[user_dn_config]
  511. generated_dn_1 = dns[generated_dn_1_config]
  512. generated_dn_2 = dns[generated_dn_2_config]
  513. _DataManager._clean_generated_file(user_dn.id)
  514. assert file_exists(user_dn.path)
  515. _DataManager._clean_generated_files([generated_dn_1, generated_dn_2])
  516. assert not file_exists(generated_dn_1.path)
  517. assert not file_exists(generated_dn_2.path)
  518. @pytest.mark.parametrize(
  519. "storage_type,path",
  520. [
  521. ("pickle", "pickle_file_path"),
  522. ("csv", "csv_file"),
  523. ("excel", "excel_file"),
  524. ("json", "json_file"),
  525. ("parquet", "parquet_file_path"),
  526. ],
  527. )
  528. def test_delete_does_clean_generated_pickle_files(self, storage_type, path, request):
  529. path = request.getfixturevalue(path)
  530. user_dn_config = Config.configure_data_node(
  531. id="d1", storage_type=storage_type, path=path, default_data={"a": [1], "b": [2]}
  532. )
  533. generated_dn_config_1 = Config.configure_data_node(
  534. id="d2", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  535. )
  536. generated_dn_config_2 = Config.configure_data_node(
  537. id="d3", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  538. )
  539. generated_dn_config_3 = Config.configure_data_node(
  540. id="d4", storage_type=storage_type, default_data={"a": [1], "b": [2]}
  541. )
  542. dns = _DataManager._bulk_get_or_create(
  543. [
  544. user_dn_config,
  545. generated_dn_config_1,
  546. generated_dn_config_2,
  547. generated_dn_config_3,
  548. ]
  549. )
  550. user_dn = dns[user_dn_config]
  551. generated_dn_1 = dns[generated_dn_config_1]
  552. generated_dn_2 = dns[generated_dn_config_2]
  553. generated_dn_3 = dns[generated_dn_config_3]
  554. _DataManager._delete(user_dn.id)
  555. assert file_exists(user_dn.path)
  556. _DataManager._delete_many([generated_dn_1.id, generated_dn_2.id])
  557. assert not file_exists(generated_dn_1.path)
  558. assert not file_exists(generated_dn_2.path)
  559. _DataManager._delete_all()
  560. assert not file_exists(generated_dn_3.path)
  561. def test_create_dn_from_loaded_config_no_scope(self):
  562. file_config = NamedTemporaryFile(
  563. """
  564. [TAIPY]
  565. [DATA_NODE.a]
  566. default_data = "4:int"
  567. [DATA_NODE.b]
  568. [TASK.t]
  569. function = "math.sqrt:function"
  570. inputs = [ "a:SECTION",]
  571. outputs = [ "b:SECTION",]
  572. skippable = "False:bool"
  573. [SCENARIO.s]
  574. tasks = [ "t:SECTION",]
  575. sequences.s_sequence = [ "t:SECTION",]
  576. [SCENARIO.s.comparators]
  577. """
  578. )
  579. from taipy import core as tp
  580. Config.override(file_config.filename)
  581. tp.create_scenario(Config.scenarios["s"])
  582. tp.create_scenario(Config.scenarios["s"])
  583. assert len(tp.get_data_nodes()) == 4
  584. def test_create_dn_from_loaded_config_no_storage_type(self):
  585. file_config = NamedTemporaryFile(
  586. """
  587. [TAIPY]
  588. [DATA_NODE.input_dn]
  589. scope = "SCENARIO:SCOPE"
  590. default_data = "21:int"
  591. [DATA_NODE.output_dn]
  592. storage_type = "in_memory"
  593. scope = "SCENARIO:SCOPE"
  594. [TASK.double]
  595. inputs = [ "input_dn:SECTION",]
  596. function = "math.sqrt:function"
  597. outputs = [ "output_dn:SECTION",]
  598. skippable = "False:bool"
  599. [SCENARIO.my_scenario]
  600. tasks = [ "double:SECTION",]
  601. sequences.my_sequence = [ "double:SECTION",]
  602. [SCENARIO.my_scenario.comparators]
  603. """
  604. )
  605. from taipy import core as tp
  606. Config.override(file_config.filename)
  607. scenario = tp.create_scenario(Config.scenarios["my_scenario"])
  608. assert isinstance(scenario.input_dn, PickleDataNode)
  609. assert isinstance(scenario.output_dn, InMemoryDataNode)
  610. def test_create_dn_from_loaded_config_modified_default_config(self):
  611. file_config = NamedTemporaryFile(
  612. """
  613. [TAIPY]
  614. [DATA_NODE.input_dn]
  615. scope = "SCENARIO:SCOPE"
  616. default_path="fake/path.csv"
  617. [DATA_NODE.output_dn]
  618. storage_type = "in_memory"
  619. scope = "SCENARIO:SCOPE"
  620. [TASK.double]
  621. inputs = [ "input_dn:SECTION",]
  622. function = "math.sqrt:function"
  623. outputs = [ "output_dn:SECTION",]
  624. skippable = "False:bool"
  625. [SCENARIO.my_scenario]
  626. tasks = [ "double:SECTION",]
  627. sequences.my_sequence = [ "double:SECTION",]
  628. [SCENARIO.my_scenario.comparators]
  629. """
  630. )
  631. from taipy import core as tp
  632. Config.set_default_data_node_configuration(storage_type="csv")
  633. Config.override(file_config.filename)
  634. scenario = tp.create_scenario(Config.scenarios["my_scenario"])
  635. assert isinstance(scenario.input_dn, CSVDataNode)
  636. assert isinstance(scenario.output_dn, InMemoryDataNode)
  637. def test_get_tasks_by_config_id(self):
  638. dn_config_1 = Config.configure_data_node("dn_1", scope=Scope.SCENARIO)
  639. dn_config_2 = Config.configure_data_node("dn_2", scope=Scope.SCENARIO)
  640. dn_config_3 = Config.configure_data_node("dn_3", scope=Scope.SCENARIO)
  641. dn_1_1 = _DataManager._create(dn_config_1, None, None)
  642. dn_1_2 = _DataManager._create(dn_config_1, None, None)
  643. dn_1_3 = _DataManager._create(dn_config_1, None, None)
  644. assert len(_DataManager._get_all()) == 3
  645. dn_2_1 = _DataManager._create(dn_config_2, None, None)
  646. dn_2_2 = _DataManager._create(dn_config_2, None, None)
  647. assert len(_DataManager._get_all()) == 5
  648. dn_3_1 = _DataManager._create(dn_config_3, None, None)
  649. assert len(_DataManager._get_all()) == 6
  650. dn_1_datanodes = _DataManager._get_by_config_id(dn_config_1.id)
  651. assert len(dn_1_datanodes) == 3
  652. assert sorted([dn_1_1.id, dn_1_2.id, dn_1_3.id]) == sorted([sequence.id for sequence in dn_1_datanodes])
  653. dn_2_datanodes = _DataManager._get_by_config_id(dn_config_2.id)
  654. assert len(dn_2_datanodes) == 2
  655. assert sorted([dn_2_1.id, dn_2_2.id]) == sorted([sequence.id for sequence in dn_2_datanodes])
  656. dn_3_datanodes = _DataManager._get_by_config_id(dn_config_3.id)
  657. assert len(dn_3_datanodes) == 1
  658. assert sorted([dn_3_1.id]) == sorted([sequence.id for sequence in dn_3_datanodes])
  659. def test_get_data_nodes_by_config_id_in_multiple_versions_environment(self):
  660. dn_config_1 = Config.configure_data_node("dn_1", scope=Scope.SCENARIO)
  661. dn_config_2 = Config.configure_data_node("dn_2", scope=Scope.SCENARIO)
  662. _VersionManager._set_experiment_version("1.0")
  663. _DataManager._create(dn_config_1, None, None)
  664. _DataManager._create(dn_config_1, None, None)
  665. _DataManager._create(dn_config_1, None, None)
  666. _DataManager._create(dn_config_2, None, None)
  667. _DataManager._create(dn_config_2, None, None)
  668. assert len(_DataManager._get_by_config_id(dn_config_1.id)) == 3
  669. assert len(_DataManager._get_by_config_id(dn_config_2.id)) == 2
  670. _VersionManager._set_experiment_version("2.0")
  671. _DataManager._create(dn_config_1, None, None)
  672. _DataManager._create(dn_config_1, None, None)
  673. _DataManager._create(dn_config_1, None, None)
  674. _DataManager._create(dn_config_2, None, None)
  675. _DataManager._create(dn_config_2, None, None)
  676. assert len(_DataManager._get_by_config_id(dn_config_1.id)) == 3
  677. assert len(_DataManager._get_by_config_id(dn_config_2.id)) == 2
  678. def test_can_duplicate(self):
  679. dn_config = Config.configure_data_node("dn_1")
  680. dn = _DataManager._create(dn_config, None, None)
  681. reasons = _DataManager._can_duplicate(dn.id)
  682. assert bool(reasons)
  683. assert reasons._reasons == {}
  684. reasons = _DataManager._can_duplicate(dn)
  685. assert bool(reasons)
  686. assert reasons._reasons == {}
  687. reasons = _DataManager._can_duplicate("1")
  688. assert not bool(reasons)
  689. assert reasons._reasons["1"] == {EntityDoesNotExist("1")}
  690. assert str(list(reasons._reasons["1"])[0]) == "Entity '1' does not exist in the repository"