test_pandas_data_accessor.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. # Copyright 2021-2025 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import inspect
  12. import os
  13. import warnings
  14. from datetime import datetime
  15. from importlib import util
  16. from unittest.mock import Mock
  17. import numpy
  18. import pandas
  19. import pytest
  20. from flask import g
  21. from taipy.gui import Gui
  22. from taipy.gui.data.data_format import _DataFormat
  23. from taipy.gui.data.decimator import ScatterDecimator
  24. from taipy.gui.data.pandas_data_accessor import _PandasDataAccessor
  25. # Define a mock to simulate _DataFormat behavior with a "value" attribute
  26. class MockDataFormat:
  27. LIST = Mock(value="list")
  28. CSV = Mock(value="csv")
  29. @pytest.fixture
  30. def pandas_accessor():
  31. gui = Mock()
  32. return _PandasDataAccessor(gui=gui)
  33. @pytest.fixture
  34. def sample_df():
  35. data = {
  36. "StringCol": ["Apple", "Banana", "Cherry", "apple"],
  37. "NumberCol": [10, 20, 30, 40],
  38. "BoolCol": [True, False, True, False],
  39. "DateCol": pandas.to_datetime(["2020-01-01", "2021-06-15", "2022-08-22", "2023-03-05"]),
  40. }
  41. return pandas.DataFrame(data)
  42. def test_simple_data(gui: Gui, helpers, small_dataframe):
  43. accessor = _PandasDataAccessor(gui)
  44. pd = pandas.DataFrame(data=small_dataframe)
  45. ret_data = accessor.get_data("x", pd, {"start": 0, "end": -1}, _DataFormat.JSON)
  46. assert ret_data
  47. value = ret_data["value"]
  48. assert value
  49. assert value["rowcount"] == 3
  50. data = value["data"]
  51. assert len(data) == 3
  52. def test_simple_data_with_arrow(gui: Gui, helpers, small_dataframe):
  53. if util.find_spec("pyarrow"):
  54. accessor = _PandasDataAccessor(gui)
  55. pd = pandas.DataFrame(data=small_dataframe)
  56. ret_data = accessor.get_data("x", pd, {"start": 0, "end": -1}, _DataFormat.APACHE_ARROW)
  57. assert ret_data
  58. value = ret_data["value"]
  59. assert value
  60. assert value["rowcount"] == 3
  61. data = value["data"]
  62. assert isinstance(data, bytes)
  63. def test_get_all_simple_data(gui: Gui, helpers, small_dataframe):
  64. accessor = _PandasDataAccessor(gui)
  65. pd = pandas.DataFrame(data=small_dataframe)
  66. ret_data = accessor.get_data("x", pd, {"alldata": True}, _DataFormat.JSON)
  67. assert ret_data
  68. assert ret_data["alldata"] is True
  69. value = ret_data["value"]
  70. assert value
  71. data = value["data"]
  72. assert data == small_dataframe
  73. def test_slice(gui: Gui, helpers, small_dataframe):
  74. accessor = _PandasDataAccessor(gui)
  75. pd = pandas.DataFrame(data=small_dataframe)
  76. value = accessor.get_data("x", pd, {"start": 0, "end": 1}, _DataFormat.JSON)["value"]
  77. assert value["rowcount"] == 3
  78. data = value["data"]
  79. assert len(data) == 2
  80. value = accessor.get_data("x", pd, {"start": "0", "end": "1"}, _DataFormat.JSON)["value"]
  81. data = value["data"]
  82. assert len(data) == 2
  83. def test_style(gui: Gui, helpers, small_dataframe):
  84. accessor = _PandasDataAccessor(gui)
  85. pd = pandas.DataFrame(data=small_dataframe)
  86. gui.run(run_server=False)
  87. cid = helpers.create_scope_and_get_sid(gui)
  88. with gui.get_server_instance().test_request_context(f"/taipy-jsx/test/?client_id={cid}", data={"client_id": cid}):
  89. g.client_id = cid
  90. value = accessor.get_data("x", pd, {"start": 0, "end": 1, "styles": {"st": "test_style"}}, _DataFormat.JSON)[
  91. "value"
  92. ]
  93. assert value["rowcount"] == 3
  94. data = value["data"]
  95. assert len(data) == 2
  96. assert "test_style" in data[0]
  97. def test_tooltip(gui: Gui, helpers, small_dataframe):
  98. def tt(state, value, index: int, row, column_name: str):
  99. return f"{column_name}[{index}]: {value}"
  100. accessor = _PandasDataAccessor(gui)
  101. pd = pandas.DataFrame(data=small_dataframe)
  102. gui.run(run_server=False)
  103. cid = helpers.create_scope_and_get_sid(gui)
  104. with gui.get_server_instance().test_request_context(f"/taipy-jsx/test/?client_id={cid}", data={"client_id": cid}):
  105. gui._bind_var_val("tt", tt)
  106. gui._get_locals_bind_from_context(None)["tt"] = tt
  107. g.client_id = cid
  108. value = accessor.get_data("x", pd, {"start": 0, "end": 1, "tooltips": {"tt": "tt"}}, _DataFormat.JSON)["value"]
  109. assert value["rowcount"] == 3
  110. data = value["data"]
  111. assert len(data) == 2
  112. assert "tt" in data[0]
  113. def test_format_fn(gui: Gui, helpers, small_dataframe):
  114. def ff(state, value, index: int, row, column_name: str):
  115. return f"{column_name}[{index}]: {value}"
  116. accessor = _PandasDataAccessor(gui)
  117. pd = pandas.DataFrame(data=small_dataframe)
  118. gui.run(run_server=False)
  119. cid = helpers.create_scope_and_get_sid(gui)
  120. with gui.get_server_instance().test_request_context(f"/taipy-jsx/test/?client_id={cid}", data={"client_id": cid}):
  121. gui._bind_var_val("ff", ff)
  122. gui._get_locals_bind_from_context(None)["ff"] = ff
  123. g.client_id = cid
  124. value = accessor.get_data("x", pd, {"start": 0, "end": 1, "formats": {"ff": "ff"}}, _DataFormat.JSON)["value"]
  125. assert value["rowcount"] == 3
  126. data = value["data"]
  127. assert len(data) == 2
  128. assert "ff" in data[0]
  129. def test_sort(gui: Gui, helpers, small_dataframe):
  130. accessor = _PandasDataAccessor(gui)
  131. pd = pandas.DataFrame(data=small_dataframe)
  132. query = {"columns": ["name", "value"], "start": 0, "end": -1, "orderby": "name", "sort": "desc"}
  133. data = accessor.get_data("x", pd, query, _DataFormat.JSON)["value"]["data"]
  134. assert data[0]["name"] == "C"
  135. def test_aggregate(gui: Gui, helpers, small_dataframe):
  136. accessor = _PandasDataAccessor(gui)
  137. pd = pandas.DataFrame(data=small_dataframe)
  138. pd = pandas.concat(
  139. [pd, pandas.DataFrame(data={"name": ["A"], "value": [4]})], axis=0, join="outer", ignore_index=True
  140. )
  141. query = {"columns": ["name", "value"], "start": 0, "end": -1, "aggregates": ["name"], "applies": {"value": "sum"}}
  142. value = accessor.get_data("x", pd, query, _DataFormat.JSON)["value"]
  143. assert value["rowcount"] == 3
  144. data = value["data"]
  145. assert next(v.get("value") for v in data if v.get("name") == "A") == 5
  146. def test_filters(gui: Gui, helpers, small_dataframe):
  147. accessor = _PandasDataAccessor(gui)
  148. pd = pandas.DataFrame(data=small_dataframe)
  149. pd = pandas.concat(
  150. [pd, pandas.DataFrame(data={"name": ["A"], "value": [4]})], axis=0, join="outer", ignore_index=True
  151. )
  152. query = {
  153. "columns": ["name", "value"],
  154. "start": 0,
  155. "end": -1,
  156. "filters": [{"col": "name", "action": "!=", "value": ""}],
  157. }
  158. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  159. assert len(value["value"]["data"]) == 4
  160. query = {
  161. "columns": ["name", "value"],
  162. "start": 0,
  163. "end": -1,
  164. "filters": [{"col": "name", "action": "==", "value": ""}],
  165. }
  166. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  167. assert len(value["value"]["data"]) == 0
  168. query = {
  169. "columns": ["name", "value"],
  170. "start": 0,
  171. "end": -1,
  172. "filters": [{"col": "name", "action": "==", "value": "A"}],
  173. }
  174. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  175. assert len(value["value"]["data"]) == 2
  176. query = {
  177. "columns": ["name", "value"],
  178. "start": 0,
  179. "end": -1,
  180. "filters": [{"col": "name", "action": "==", "value": "A"}, {"col": "value", "action": "==", "value": 2}],
  181. }
  182. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  183. assert len(value["value"]["data"]) == 0
  184. query = {
  185. "columns": ["name", "value"],
  186. "start": 0,
  187. "end": -1,
  188. "filters": [{"col": "name", "action": "!=", "value": "A"}, {"col": "value", "action": "==", "value": 2}],
  189. }
  190. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  191. assert len(value["value"]["data"]) == 1
  192. assert value["value"]["data"][0]["_tp_index"] == 1
  193. def test_filter_by_date(gui: Gui, helpers, small_dataframe):
  194. accessor = _PandasDataAccessor(gui)
  195. pd = pandas.DataFrame(data=small_dataframe)
  196. pd["a date"] = [
  197. datetime.fromisocalendar(2022, 28, 1),
  198. datetime.fromisocalendar(2022, 28, 2),
  199. datetime.fromisocalendar(2022, 28, 3),
  200. ]
  201. query = {
  202. "columns": ["name", "value"],
  203. "start": 0,
  204. "end": -1,
  205. "filters": [{"col": "a date", "action": ">", "value": datetime.fromisocalendar(2022, 28, 3).isoformat() + "Z"}],
  206. }
  207. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  208. assert len(value["value"]["data"]) == 0
  209. query = {
  210. "columns": ["name", "value"],
  211. "start": 0,
  212. "end": -1,
  213. "filters": [{"col": "a date", "action": ">", "value": datetime.fromisocalendar(2022, 28, 2).isoformat() + "Z"}],
  214. }
  215. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  216. assert len(value["value"]["data"]) == 1
  217. query = {
  218. "columns": ["name", "value"],
  219. "start": 0,
  220. "end": -1,
  221. "filters": [{"col": "a date", "action": "<", "value": datetime.fromisocalendar(2022, 28, 3).isoformat() + "Z"}],
  222. }
  223. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  224. assert len(value["value"]["data"]) == 2
  225. query = {
  226. "columns": ["name", "value"],
  227. "start": 0,
  228. "end": -1,
  229. "filters": [
  230. {"col": "a date", "action": "<", "value": datetime.fromisocalendar(2022, 28, 2).isoformat() + "Z"},
  231. {"col": "a date", "action": ">", "value": datetime.fromisocalendar(2022, 28, 2).isoformat() + "Z"},
  232. ],
  233. }
  234. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  235. assert len(value["value"]["data"]) == 0
  236. query = {
  237. "columns": ["name", "value"],
  238. "start": 0,
  239. "end": -1,
  240. "filters": [
  241. {"col": "a date", "action": "<", "value": datetime.fromisocalendar(2022, 28, 3).isoformat() + "Z"},
  242. {"col": "a date", "action": ">", "value": datetime.fromisocalendar(2022, 28, 1).isoformat() + "Z"},
  243. ],
  244. }
  245. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  246. assert len(value["value"]["data"]) == 1
  247. def test_contains_case_sensitive(pandas_accessor, sample_df):
  248. payload = {"filters": [{"col": "StringCol", "value": "Apple", "action": "contains", "matchCase": True}]}
  249. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  250. filtered_data = pandas.DataFrame(result["value"]["data"])
  251. assert len(filtered_data) == 1
  252. assert filtered_data.iloc[0]["StringCol"] == "Apple"
  253. def test_contains_case_insensitive(pandas_accessor, sample_df):
  254. payload = {"filters": [{"col": "StringCol", "value": "apple", "action": "contains", "matchCase": False}]}
  255. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  256. filtered_data = pandas.DataFrame(result["value"]["data"])
  257. assert len(filtered_data) == 2
  258. assert "Apple" in filtered_data["StringCol"].values
  259. assert "apple" in filtered_data["StringCol"].values
  260. def test_equals_case_sensitive(pandas_accessor, sample_df):
  261. payload = {"filters": [{"col": "StringCol", "value": "Apple", "action": "==", "matchCase": True}]}
  262. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  263. filtered_data = pandas.DataFrame(result["value"]["data"])
  264. assert len(filtered_data) == 1
  265. assert filtered_data.iloc[0]["StringCol"] == "Apple"
  266. def test_equals_case_insensitive(pandas_accessor, sample_df):
  267. payload = {"filters": [{"col": "StringCol", "value": "apple", "action": "==", "matchCase": False}]}
  268. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  269. filtered_data = pandas.DataFrame(result["value"]["data"])
  270. assert len(filtered_data) == 2
  271. assert "Apple" in filtered_data["StringCol"].values
  272. assert "apple" in filtered_data["StringCol"].values
  273. def test_not_equals_case_insensitive(pandas_accessor, sample_df):
  274. payload = {"filters": [{"col": "StringCol", "value": "apple", "action": "!=", "matchCase": False}]}
  275. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  276. filtered_data = pandas.DataFrame(result["value"]["data"])
  277. assert len(filtered_data) == 2
  278. assert "Banana" in filtered_data["StringCol"].values
  279. assert "Cherry" in filtered_data["StringCol"].values
  280. def test_decimator(gui: Gui, helpers, small_dataframe):
  281. a_decimator = ScatterDecimator(threshold=1) # noqa: F841
  282. accessor = _PandasDataAccessor(gui)
  283. pd = pandas.DataFrame(data=small_dataframe)
  284. # set gui frame
  285. gui._set_frame(inspect.currentframe())
  286. gui.add_page("test", "<|Hello {a_decimator}|button|>")
  287. gui.run(run_server=False)
  288. flask_client = gui._server.test_client()
  289. cid = helpers.create_scope_and_get_sid(gui)
  290. # Get the jsx once so that the page will be evaluated -> variable will be registered
  291. flask_client.get(f"/taipy-jsx/test?client_id={cid}")
  292. with gui.get_server_instance().test_request_context(f"/taipy-jsx/test/?client_id={cid}", data={"client_id": cid}):
  293. g.client_id = cid
  294. ret_data = accessor.get_data(
  295. "x",
  296. pd,
  297. {
  298. "start": 0,
  299. "end": -1,
  300. "alldata": True,
  301. "decimatorPayload": {
  302. "decimators": [
  303. {"decimator": "a_decimator", "chartMode": "markers", "xAxis": "name", "yAxis": "value"}
  304. ],
  305. "width": 100,
  306. },
  307. },
  308. _DataFormat.JSON,
  309. )
  310. assert ret_data
  311. value = ret_data["value"]
  312. assert value
  313. data = value["data"]
  314. assert len(data) == 2
  315. def test_edit(gui, small_dataframe):
  316. accessor = _PandasDataAccessor(gui)
  317. pd = pandas.DataFrame(small_dataframe)
  318. ln = len(pd)
  319. assert pd["value"].iloc[0] != 10
  320. ret_data = accessor.on_edit(pd, {"index": 0, "col": "value", "value": 10})
  321. assert isinstance(ret_data, pandas.DataFrame)
  322. assert len(ret_data) == ln
  323. assert ret_data["value"].iloc[0] == 10
  324. def test_delete(gui, small_dataframe):
  325. accessor = _PandasDataAccessor(gui)
  326. pd = pandas.DataFrame(small_dataframe)
  327. ln = len(pd)
  328. ret_data = accessor.on_delete(pd, {"index": 0})
  329. assert isinstance(ret_data, pandas.DataFrame)
  330. assert len(ret_data) == ln - 1
  331. def test_add(gui, small_dataframe):
  332. accessor = _PandasDataAccessor(gui)
  333. pd = pandas.DataFrame(small_dataframe)
  334. ln = len(pd)
  335. ret_data = accessor.on_add(pd, {"index": 0})
  336. assert isinstance(ret_data, pandas.DataFrame)
  337. assert len(ret_data) == ln + 1
  338. assert ret_data["value"].iloc[0] == 0
  339. assert ret_data["name"].iloc[0] == ""
  340. ret_data = accessor.on_add(pd, {"index": 2})
  341. assert isinstance(ret_data, pandas.DataFrame)
  342. assert len(ret_data) == ln + 1
  343. assert ret_data["value"].iloc[2] == 0
  344. assert ret_data["name"].iloc[2] == ""
  345. ret_data = accessor.on_add(pd, {"index": 0}, ["New", 100])
  346. assert isinstance(ret_data, pandas.DataFrame)
  347. assert len(ret_data) == ln + 1
  348. assert ret_data["value"].iloc[0] == 100
  349. assert ret_data["name"].iloc[0] == "New"
  350. ret_data = accessor.on_add(pd, {"index": 2}, ["New", 100])
  351. assert isinstance(ret_data, pandas.DataFrame)
  352. assert len(ret_data) == ln + 1
  353. assert ret_data["value"].iloc[2] == 100
  354. assert ret_data["name"].iloc[2] == "New"
  355. def test_csv(gui, small_dataframe):
  356. accessor = _PandasDataAccessor(gui)
  357. pd = pandas.DataFrame(small_dataframe)
  358. path = accessor.to_csv("", pd)
  359. assert path is not None
  360. assert os.path.getsize(path) > 0
  361. def test_multi_index(gui):
  362. pandas_accessor = _PandasDataAccessor(gui)
  363. iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
  364. index = pandas.MultiIndex.from_product(iterables, names=["first", "second"])
  365. df = pandas.DataFrame({"col 1": numpy.random.randn(8), "col 2": numpy.random.randn(8)}, index=index)
  366. with warnings.catch_warnings(record=True):
  367. result = pandas_accessor.get_data("test_var", df, {}, MockDataFormat.LIST)
  368. assert result.get("error") is None
  369. assert result["value"] is not None
  370. def test_multi_index_columns(gui):
  371. pandas_accessor = _PandasDataAccessor(gui)
  372. iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
  373. index = pandas.MultiIndex.from_product(iterables, names=["first", "second"])
  374. df = pandas.DataFrame(numpy.random.randn(3, 8), index=["A", "B", "C"], columns=index)
  375. with warnings.catch_warnings(record=True):
  376. result = pandas_accessor.get_data("test_var", df, {}, MockDataFormat.LIST)
  377. assert result.get("error") is not None
  378. assert result.get("value") is not None