test_pickle_data_node.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. # Copyright 2023 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import os
  12. import pathlib
  13. from datetime import datetime
  14. from time import sleep
  15. import modin.pandas as modin_pd
  16. import pandas as pd
  17. import pytest
  18. from taipy.config.common.scope import Scope
  19. from taipy.config.config import Config
  20. from taipy.config.exceptions.exceptions import InvalidConfigurationId
  21. from taipy.core.data._data_manager import _DataManager
  22. from taipy.core.data.pickle import PickleDataNode
  23. from taipy.core.exceptions.exceptions import NoData
  24. @pytest.fixture(scope="function", autouse=True)
  25. def cleanup():
  26. yield
  27. path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/temp.p")
  28. if os.path.isfile(path):
  29. os.remove(path)
  30. class TestPickleDataNodeEntity:
  31. @pytest.fixture(scope="function", autouse=True)
  32. def remove_pickle_files(self):
  33. yield
  34. import glob
  35. for f in glob.glob("*.p"):
  36. print(f"deleting file {f}")
  37. os.remove(f)
  38. def test_create(self):
  39. dn = PickleDataNode("foobar_bazxyxea", Scope.SCENARIO, properties={"default_data": "Data"})
  40. assert os.path.isfile(Config.core.storage_folder + "pickles/" + dn.id + ".p")
  41. assert isinstance(dn, PickleDataNode)
  42. assert dn.storage_type() == "pickle"
  43. assert dn.config_id == "foobar_bazxyxea"
  44. assert dn.scope == Scope.SCENARIO
  45. assert dn.id is not None
  46. assert dn.name is None
  47. assert dn.owner_id is None
  48. assert dn.last_edit_date is not None
  49. assert dn.job_ids == []
  50. assert dn.is_ready_for_reading
  51. assert dn.read() == "Data"
  52. assert dn.last_edit_date is not None
  53. assert dn.job_ids == []
  54. with pytest.raises(InvalidConfigurationId):
  55. PickleDataNode("foobar bazxyxea", Scope.SCENARIO, properties={"default_data": "Data"})
  56. def test_get_user_properties(self, pickle_file_path):
  57. dn_1 = PickleDataNode("dn_1", Scope.SCENARIO, properties={"path": pickle_file_path})
  58. assert dn_1._get_user_properties() == {}
  59. dn_2 = PickleDataNode(
  60. "dn_2",
  61. Scope.SCENARIO,
  62. properties={
  63. "default_data": "foo",
  64. "default_path": pickle_file_path,
  65. "foo": "bar",
  66. },
  67. )
  68. # default_data, default_path, path, is_generated are filtered out
  69. assert dn_2._get_user_properties() == {"foo": "bar"}
  70. def test_new_pickle_data_node_with_existing_file_is_ready_for_reading(self):
  71. not_ready_dn_cfg = Config.configure_data_node("not_ready_data_node_config_id", "pickle", path="NOT_EXISTING.p")
  72. path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.p")
  73. ready_dn_cfg = Config.configure_data_node("ready_data_node_config_id", "pickle", path=path)
  74. dns = _DataManager._bulk_get_or_create([not_ready_dn_cfg, ready_dn_cfg])
  75. assert not dns[not_ready_dn_cfg].is_ready_for_reading
  76. assert dns[ready_dn_cfg].is_ready_for_reading
  77. def test_create_with_file_name(self):
  78. dn = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": "bar", "path": "foo.FILE.p"})
  79. assert os.path.isfile("foo.FILE.p")
  80. assert dn.read() == "bar"
  81. dn.write("qux")
  82. assert dn.read() == "qux"
  83. dn.write(1998)
  84. assert dn.read() == 1998
  85. def test_read_and_write(self):
  86. no_data_dn = PickleDataNode("foo", Scope.SCENARIO)
  87. with pytest.raises(NoData):
  88. assert no_data_dn.read() is None
  89. no_data_dn.read_or_raise()
  90. pickle_str = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": "bar"})
  91. assert isinstance(pickle_str.read(), str)
  92. assert pickle_str.read() == "bar"
  93. pickle_str.properties["default_data"] = "baz" # this modifies the default data value but not the data itself
  94. assert pickle_str.read() == "bar"
  95. pickle_str.write("qux")
  96. assert pickle_str.read() == "qux"
  97. pickle_str.write(1998)
  98. assert pickle_str.read() == 1998
  99. assert isinstance(pickle_str.read(), int)
  100. pickle_int = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": 197})
  101. assert isinstance(pickle_int.read(), int)
  102. assert pickle_int.read() == 197
  103. pickle_dict = PickleDataNode(
  104. "foo", Scope.SCENARIO, properties={"default_data": {"bar": 12, "baz": "qux", "quux": [13]}}
  105. )
  106. assert isinstance(pickle_dict.read(), dict)
  107. assert pickle_dict.read() == {"bar": 12, "baz": "qux", "quux": [13]}
  108. @pytest.mark.modin
  109. def test_read_and_write_modin(self):
  110. default_pandas = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  111. new_pandas_df = pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
  112. default_modin = modin_pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  113. new_modin_df = modin_pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
  114. pickle_pandas = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": default_pandas})
  115. assert isinstance(pickle_pandas.read(), pd.DataFrame)
  116. assert default_pandas.equals(pickle_pandas.read())
  117. pickle_pandas.write(new_pandas_df)
  118. assert new_pandas_df.equals(pickle_pandas.read())
  119. assert isinstance(pickle_pandas.read(), pd.DataFrame)
  120. pickle_pandas.write(new_modin_df)
  121. assert new_modin_df.equals(pickle_pandas.read())
  122. assert isinstance(pickle_pandas.read(), modin_pd.DataFrame)
  123. pickle_pandas.write(1998)
  124. assert pickle_pandas.read() == 1998
  125. assert isinstance(pickle_pandas.read(), int)
  126. pickle_modin = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": default_modin})
  127. assert isinstance(pickle_modin.read(), modin_pd.DataFrame)
  128. assert default_modin.equals(pickle_modin.read())
  129. pickle_modin.write(new_modin_df)
  130. assert new_modin_df.equals(pickle_modin.read())
  131. assert isinstance(pickle_modin.read(), modin_pd.DataFrame)
  132. pickle_modin.write(new_pandas_df)
  133. assert new_pandas_df.equals(pickle_modin.read())
  134. assert isinstance(pickle_modin.read(), pd.DataFrame)
  135. pickle_modin.write(1998)
  136. assert pickle_modin.read() == 1998
  137. assert isinstance(pickle_modin.read(), int)
  138. def test_path_overrides_default_path(self):
  139. dn = PickleDataNode(
  140. "foo",
  141. Scope.SCENARIO,
  142. properties={
  143. "default_data": "bar",
  144. "default_path": "foo.FILE.p",
  145. "path": "bar.FILE.p",
  146. },
  147. )
  148. assert dn.path == "bar.FILE.p"
  149. def test_set_path(self):
  150. dn = PickleDataNode("foo", Scope.SCENARIO, properties={"default_path": "foo.p"})
  151. assert dn.path == "foo.p"
  152. dn.path = "bar.p"
  153. assert dn.path == "bar.p"
  154. def test_is_generated(self):
  155. dn = PickleDataNode("foo", Scope.SCENARIO, properties={})
  156. assert dn.is_generated
  157. dn.path = "bar.p"
  158. assert not dn.is_generated
  159. def test_read_write_after_modify_path(self):
  160. path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.p")
  161. new_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/temp.p")
  162. dn = PickleDataNode("foo", Scope.SCENARIO, properties={"default_path": path})
  163. read_data = dn.read()
  164. assert read_data is not None
  165. dn.path = new_path
  166. with pytest.raises(FileNotFoundError):
  167. dn.read()
  168. dn.write({"other": "stuff"})
  169. assert dn.read() == {"other": "stuff"}
  170. def test_get_system_modified_date_instead_of_last_edit_date(self, tmpdir_factory):
  171. temp_file_path = str(tmpdir_factory.mktemp("data").join("temp.pickle"))
  172. pd.DataFrame([]).to_pickle(temp_file_path)
  173. dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": temp_file_path, "exposed_type": "pandas"})
  174. dn.write(pd.DataFrame([1, 2, 3]))
  175. previous_edit_date = dn.last_edit_date
  176. sleep(0.1)
  177. pd.DataFrame([4, 5, 6]).to_pickle(temp_file_path)
  178. new_edit_date = datetime.fromtimestamp(os.path.getmtime(temp_file_path))
  179. assert previous_edit_date < dn.last_edit_date
  180. assert new_edit_date == dn.last_edit_date
  181. sleep(0.1)
  182. dn.write(pd.DataFrame([7, 8, 9]))
  183. assert new_edit_date < dn.last_edit_date
  184. os.unlink(temp_file_path)