test_pickle_data_node.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. # Copyright 2023 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import os
  12. import pathlib
  13. from datetime import datetime
  14. from time import sleep
  15. import modin.pandas as modin_pd
  16. import pandas as pd
  17. import pytest
  18. from taipy.config.common.scope import Scope
  19. from taipy.config.config import Config
  20. from taipy.config.exceptions.exceptions import InvalidConfigurationId
  21. from taipy.core.data._data_manager import _DataManager
  22. from taipy.core.data.pickle import PickleDataNode
  23. from taipy.core.exceptions.exceptions import NoData
  24. @pytest.fixture(scope="function", autouse=True)
  25. def cleanup():
  26. yield
  27. path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/temp.p")
  28. if os.path.isfile(path):
  29. os.remove(path)
  30. class TestPickleDataNodeEntity:
  31. @pytest.fixture(scope="function", autouse=True)
  32. def remove_pickle_files(self):
  33. yield
  34. import glob
  35. for f in glob.glob("*.p"):
  36. os.remove(f)
  37. def test_create(self):
  38. dn = PickleDataNode("foobar_bazxyxea", Scope.SCENARIO, properties={"default_data": "Data"})
  39. assert os.path.isfile(Config.core.storage_folder + "pickles/" + dn.id + ".p")
  40. assert isinstance(dn, PickleDataNode)
  41. assert dn.storage_type() == "pickle"
  42. assert dn.config_id == "foobar_bazxyxea"
  43. assert dn.scope == Scope.SCENARIO
  44. assert dn.id is not None
  45. assert dn.name is None
  46. assert dn.owner_id is None
  47. assert dn.last_edit_date is not None
  48. assert dn.job_ids == []
  49. assert dn.is_ready_for_reading
  50. assert dn.read() == "Data"
  51. assert dn.last_edit_date is not None
  52. assert dn.job_ids == []
  53. with pytest.raises(InvalidConfigurationId):
  54. PickleDataNode("foobar bazxyxea", Scope.SCENARIO, properties={"default_data": "Data"})
  55. def test_get_user_properties(self, pickle_file_path):
  56. dn_1 = PickleDataNode("dn_1", Scope.SCENARIO, properties={"path": pickle_file_path})
  57. assert dn_1._get_user_properties() == {}
  58. dn_2 = PickleDataNode(
  59. "dn_2",
  60. Scope.SCENARIO,
  61. properties={
  62. "default_data": "foo",
  63. "default_path": pickle_file_path,
  64. "foo": "bar",
  65. },
  66. )
  67. # default_data, default_path, path, is_generated are filtered out
  68. assert dn_2._get_user_properties() == {"foo": "bar"}
  69. def test_new_pickle_data_node_with_existing_file_is_ready_for_reading(self):
  70. not_ready_dn_cfg = Config.configure_data_node("not_ready_data_node_config_id", "pickle", path="NOT_EXISTING.p")
  71. path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.p")
  72. ready_dn_cfg = Config.configure_data_node("ready_data_node_config_id", "pickle", path=path)
  73. dns = _DataManager._bulk_get_or_create([not_ready_dn_cfg, ready_dn_cfg])
  74. assert not dns[not_ready_dn_cfg].is_ready_for_reading
  75. assert dns[ready_dn_cfg].is_ready_for_reading
  76. def test_create_with_file_name(self):
  77. dn = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": "bar", "path": "foo.FILE.p"})
  78. assert os.path.isfile("foo.FILE.p")
  79. assert dn.read() == "bar"
  80. dn.write("qux")
  81. assert dn.read() == "qux"
  82. dn.write(1998)
  83. assert dn.read() == 1998
  84. def test_read_and_write(self):
  85. no_data_dn = PickleDataNode("foo", Scope.SCENARIO)
  86. with pytest.raises(NoData):
  87. assert no_data_dn.read() is None
  88. no_data_dn.read_or_raise()
  89. pickle_str = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": "bar"})
  90. assert isinstance(pickle_str.read(), str)
  91. assert pickle_str.read() == "bar"
  92. pickle_str.properties["default_data"] = "baz" # this modifies the default data value but not the data itself
  93. assert pickle_str.read() == "bar"
  94. pickle_str.write("qux")
  95. assert pickle_str.read() == "qux"
  96. pickle_str.write(1998)
  97. assert pickle_str.read() == 1998
  98. assert isinstance(pickle_str.read(), int)
  99. pickle_int = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": 197})
  100. assert isinstance(pickle_int.read(), int)
  101. assert pickle_int.read() == 197
  102. pickle_dict = PickleDataNode(
  103. "foo", Scope.SCENARIO, properties={"default_data": {"bar": 12, "baz": "qux", "quux": [13]}}
  104. )
  105. assert isinstance(pickle_dict.read(), dict)
  106. assert pickle_dict.read() == {"bar": 12, "baz": "qux", "quux": [13]}
  107. default_pandas = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  108. new_pandas_df = pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
  109. default_modin = modin_pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  110. new_modin_df = modin_pd.DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
  111. pickle_pandas = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": default_pandas})
  112. assert isinstance(pickle_pandas.read(), pd.DataFrame)
  113. assert default_pandas.equals(pickle_pandas.read())
  114. pickle_pandas.write(new_pandas_df)
  115. assert new_pandas_df.equals(pickle_pandas.read())
  116. assert isinstance(pickle_pandas.read(), pd.DataFrame)
  117. pickle_pandas.write(new_modin_df)
  118. assert new_modin_df.equals(pickle_pandas.read())
  119. assert isinstance(pickle_pandas.read(), modin_pd.DataFrame)
  120. pickle_pandas.write(1998)
  121. assert pickle_pandas.read() == 1998
  122. assert isinstance(pickle_pandas.read(), int)
  123. pickle_modin = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": default_modin})
  124. assert isinstance(pickle_modin.read(), modin_pd.DataFrame)
  125. assert default_modin.equals(pickle_modin.read())
  126. pickle_modin.write(new_modin_df)
  127. assert new_modin_df.equals(pickle_modin.read())
  128. assert isinstance(pickle_modin.read(), modin_pd.DataFrame)
  129. pickle_modin.write(new_pandas_df)
  130. assert new_pandas_df.equals(pickle_modin.read())
  131. assert isinstance(pickle_modin.read(), pd.DataFrame)
  132. pickle_modin.write(1998)
  133. assert pickle_modin.read() == 1998
  134. assert isinstance(pickle_modin.read(), int)
  135. def test_path_overrides_default_path(self):
  136. dn = PickleDataNode(
  137. "foo",
  138. Scope.SCENARIO,
  139. properties={
  140. "default_data": "bar",
  141. "default_path": "foo.FILE.p",
  142. "path": "bar.FILE.p",
  143. },
  144. )
  145. assert dn.path == "bar.FILE.p"
  146. def test_set_path(self):
  147. dn = PickleDataNode("foo", Scope.SCENARIO, properties={"default_path": "foo.p"})
  148. assert dn.path == "foo.p"
  149. dn.path = "bar.p"
  150. assert dn.path == "bar.p"
  151. def test_is_generated(self):
  152. dn = PickleDataNode("foo", Scope.SCENARIO, properties={})
  153. assert dn.is_generated
  154. dn.path = "bar.p"
  155. assert not dn.is_generated
  156. def test_read_write_after_modify_path(self):
  157. path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.p")
  158. new_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/temp.p")
  159. dn = PickleDataNode("foo", Scope.SCENARIO, properties={"default_path": path})
  160. read_data = dn.read()
  161. assert read_data is not None
  162. dn.path = new_path
  163. with pytest.raises(FileNotFoundError):
  164. dn.read()
  165. dn.write({"other": "stuff"})
  166. assert dn.read() == {"other": "stuff"}
  167. def test_get_system_modified_date_instead_of_last_edit_date(self, tmpdir_factory):
  168. temp_file_path = str(tmpdir_factory.mktemp("data").join("temp.pickle"))
  169. pd.DataFrame([]).to_pickle(temp_file_path)
  170. dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": temp_file_path, "exposed_type": "pandas"})
  171. dn.write(pd.DataFrame([1, 2, 3]))
  172. previous_edit_date = dn.last_edit_date
  173. sleep(0.1)
  174. pd.DataFrame([4, 5, 6]).to_pickle(temp_file_path)
  175. new_edit_date = datetime.fromtimestamp(os.path.getmtime(temp_file_path))
  176. assert previous_edit_date < dn.last_edit_date
  177. assert new_edit_date == dn.last_edit_date
  178. sleep(0.1)
  179. dn.write(pd.DataFrame([7, 8, 9]))
  180. assert new_edit_date < dn.last_edit_date
  181. os.unlink(temp_file_path)