test_pandas_data_accessor.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. # Copyright 2021-2024 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import inspect
  12. import os
  13. from datetime import datetime
  14. from importlib import util
  15. from unittest.mock import Mock
  16. import pandas
  17. import pandas as pd
  18. import pytest
  19. from flask import g
  20. from taipy.gui import Gui
  21. from taipy.gui.data.data_format import _DataFormat
  22. from taipy.gui.data.decimator import ScatterDecimator
  23. from taipy.gui.data.pandas_data_accessor import _PandasDataAccessor
  24. # Define a mock to simulate _DataFormat behavior with a 'value' attribute
  25. class MockDataFormat:
  26. LIST = Mock(value="list")
  27. CSV = Mock(value="csv")
  28. @pytest.fixture
  29. def pandas_accessor():
  30. gui = Mock()
  31. return _PandasDataAccessor(gui=gui)
  32. @pytest.fixture
  33. def sample_df():
  34. data = {
  35. "StringCol": ["Apple", "Banana", "Cherry", "apple"],
  36. "NumberCol": [10, 20, 30, 40],
  37. "BoolCol": [True, False, True, False],
  38. "DateCol": pd.to_datetime(["2020-01-01", "2021-06-15", "2022-08-22", "2023-03-05"])
  39. }
  40. return pd.DataFrame(data)
  41. def test_simple_data(gui: Gui, helpers, small_dataframe):
  42. accessor = _PandasDataAccessor(gui)
  43. pd = pandas.DataFrame(data=small_dataframe)
  44. ret_data = accessor.get_data("x", pd, {"start": 0, "end": -1}, _DataFormat.JSON)
  45. assert ret_data
  46. value = ret_data["value"]
  47. assert value
  48. assert value["rowcount"] == 3
  49. data = value["data"]
  50. assert len(data) == 3
  51. def test_simple_data_with_arrow(gui: Gui, helpers, small_dataframe):
  52. if util.find_spec("pyarrow"):
  53. accessor = _PandasDataAccessor(gui)
  54. pd = pandas.DataFrame(data=small_dataframe)
  55. ret_data = accessor.get_data("x", pd, {"start": 0, "end": -1}, _DataFormat.APACHE_ARROW)
  56. assert ret_data
  57. value = ret_data["value"]
  58. assert value
  59. assert value["rowcount"] == 3
  60. data = value["data"]
  61. assert isinstance(data, bytes)
  62. def test_get_all_simple_data(gui: Gui, helpers, small_dataframe):
  63. accessor = _PandasDataAccessor(gui)
  64. pd = pandas.DataFrame(data=small_dataframe)
  65. ret_data = accessor.get_data("x", pd, {"alldata": True}, _DataFormat.JSON)
  66. assert ret_data
  67. assert ret_data["alldata"] is True
  68. value = ret_data["value"]
  69. assert value
  70. data = value["data"]
  71. assert data == small_dataframe
  72. def test_slice(gui: Gui, helpers, small_dataframe):
  73. accessor = _PandasDataAccessor(gui)
  74. pd = pandas.DataFrame(data=small_dataframe)
  75. value = accessor.get_data("x", pd, {"start": 0, "end": 1}, _DataFormat.JSON)["value"]
  76. assert value["rowcount"] == 3
  77. data = value["data"]
  78. assert len(data) == 2
  79. value = accessor.get_data("x", pd, {"start": "0", "end": "1"}, _DataFormat.JSON)["value"]
  80. data = value["data"]
  81. assert len(data) == 2
  82. def test_style(gui: Gui, helpers, small_dataframe):
  83. accessor = _PandasDataAccessor(gui)
  84. pd = pandas.DataFrame(data=small_dataframe)
  85. gui.run(run_server=False)
  86. cid = helpers.create_scope_and_get_sid(gui)
  87. with gui.get_flask_app().test_request_context(f"/taipy-jsx/test/?client_id={cid}", data={"client_id": cid}):
  88. g.client_id = cid
  89. value = accessor.get_data("x", pd, {"start": 0, "end": 1, "styles": {"st": "test_style"}}, _DataFormat.JSON)[
  90. "value"
  91. ]
  92. assert value["rowcount"] == 3
  93. data = value["data"]
  94. assert len(data) == 2
  95. assert "test_style" in data[0]
  96. def test_tooltip(gui: Gui, helpers, small_dataframe):
  97. def tt(state, value, index: int, row, column_name: str):
  98. return f"{column_name}[{index}]: {value}"
  99. accessor = _PandasDataAccessor(gui)
  100. pd = pandas.DataFrame(data=small_dataframe)
  101. gui.run(run_server=False)
  102. cid = helpers.create_scope_and_get_sid(gui)
  103. with gui.get_flask_app().test_request_context(f"/taipy-jsx/test/?client_id={cid}", data={"client_id": cid}):
  104. gui._bind_var_val("tt", tt)
  105. gui._get_locals_bind_from_context(None)["tt"] = tt
  106. g.client_id = cid
  107. value = accessor.get_data("x", pd, {"start": 0, "end": 1, "tooltips": {"tt": "tt"}}, _DataFormat.JSON)["value"]
  108. assert value["rowcount"] == 3
  109. data = value["data"]
  110. assert len(data) == 2
  111. assert "tt" in data[0]
  112. def test_format_fn(gui: Gui, helpers, small_dataframe):
  113. def ff(state, value, index: int, row, column_name: str):
  114. return f"{column_name}[{index}]: {value}"
  115. accessor = _PandasDataAccessor(gui)
  116. pd = pandas.DataFrame(data=small_dataframe)
  117. gui.run(run_server=False)
  118. cid = helpers.create_scope_and_get_sid(gui)
  119. with gui.get_flask_app().test_request_context(f"/taipy-jsx/test/?client_id={cid}", data={"client_id": cid}):
  120. gui._bind_var_val("ff", ff)
  121. gui._get_locals_bind_from_context(None)["ff"] = ff
  122. g.client_id = cid
  123. value = accessor.get_data("x", pd, {"start": 0, "end": 1, "formats": {"ff": "ff"}}, _DataFormat.JSON)["value"]
  124. assert value["rowcount"] == 3
  125. data = value["data"]
  126. assert len(data) == 2
  127. assert "ff" in data[0]
  128. def test_sort(gui: Gui, helpers, small_dataframe):
  129. accessor = _PandasDataAccessor(gui)
  130. pd = pandas.DataFrame(data=small_dataframe)
  131. query = {"columns": ["name", "value"], "start": 0, "end": -1, "orderby": "name", "sort": "desc"}
  132. data = accessor.get_data("x", pd, query, _DataFormat.JSON)["value"]["data"]
  133. assert data[0]["name"] == "C"
  134. def test_aggregate(gui: Gui, helpers, small_dataframe):
  135. accessor = _PandasDataAccessor(gui)
  136. pd = pandas.DataFrame(data=small_dataframe)
  137. pd = pandas.concat(
  138. [pd, pandas.DataFrame(data={"name": ["A"], "value": [4]})], axis=0, join="outer", ignore_index=True
  139. )
  140. query = {"columns": ["name", "value"], "start": 0, "end": -1, "aggregates": ["name"], "applies": {"value": "sum"}}
  141. value = accessor.get_data("x", pd, query, _DataFormat.JSON)["value"]
  142. assert value["rowcount"] == 3
  143. data = value["data"]
  144. assert next(v.get("value") for v in data if v.get("name") == "A") == 5
  145. def test_filters(gui: Gui, helpers, small_dataframe):
  146. accessor = _PandasDataAccessor(gui)
  147. pd = pandas.DataFrame(data=small_dataframe)
  148. pd = pandas.concat(
  149. [pd, pandas.DataFrame(data={"name": ["A"], "value": [4]})], axis=0, join="outer", ignore_index=True
  150. )
  151. query = {
  152. "columns": ["name", "value"],
  153. "start": 0,
  154. "end": -1,
  155. "filters": [{"col": "name", "action": "!=", "value": ""}],
  156. }
  157. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  158. assert len(value["value"]["data"]) == 4
  159. query = {
  160. "columns": ["name", "value"],
  161. "start": 0,
  162. "end": -1,
  163. "filters": [{"col": "name", "action": "==", "value": ""}],
  164. }
  165. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  166. assert len(value["value"]["data"]) == 0
  167. query = {
  168. "columns": ["name", "value"],
  169. "start": 0,
  170. "end": -1,
  171. "filters": [{"col": "name", "action": "==", "value": "A"}],
  172. }
  173. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  174. assert len(value["value"]["data"]) == 2
  175. query = {
  176. "columns": ["name", "value"],
  177. "start": 0,
  178. "end": -1,
  179. "filters": [{"col": "name", "action": "==", "value": "A"}, {"col": "value", "action": "==", "value": 2}],
  180. }
  181. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  182. assert len(value["value"]["data"]) == 0
  183. query = {
  184. "columns": ["name", "value"],
  185. "start": 0,
  186. "end": -1,
  187. "filters": [{"col": "name", "action": "!=", "value": "A"}, {"col": "value", "action": "==", "value": 2}],
  188. }
  189. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  190. assert len(value["value"]["data"]) == 1
  191. assert value["value"]["data"][0]["_tp_index"] == 1
  192. def test_filter_by_date(gui: Gui, helpers, small_dataframe):
  193. accessor = _PandasDataAccessor(gui)
  194. pd = pandas.DataFrame(data=small_dataframe)
  195. pd["a date"] = [
  196. datetime.fromisocalendar(2022, 28, 1),
  197. datetime.fromisocalendar(2022, 28, 2),
  198. datetime.fromisocalendar(2022, 28, 3),
  199. ]
  200. query = {
  201. "columns": ["name", "value"],
  202. "start": 0,
  203. "end": -1,
  204. "filters": [{"col": "a date", "action": ">", "value": datetime.fromisocalendar(2022, 28, 3).isoformat() + "Z"}],
  205. }
  206. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  207. assert len(value["value"]["data"]) == 0
  208. query = {
  209. "columns": ["name", "value"],
  210. "start": 0,
  211. "end": -1,
  212. "filters": [{"col": "a date", "action": ">", "value": datetime.fromisocalendar(2022, 28, 2).isoformat() + "Z"}],
  213. }
  214. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  215. assert len(value["value"]["data"]) == 1
  216. query = {
  217. "columns": ["name", "value"],
  218. "start": 0,
  219. "end": -1,
  220. "filters": [{"col": "a date", "action": "<", "value": datetime.fromisocalendar(2022, 28, 3).isoformat() + "Z"}],
  221. }
  222. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  223. assert len(value["value"]["data"]) == 2
  224. query = {
  225. "columns": ["name", "value"],
  226. "start": 0,
  227. "end": -1,
  228. "filters": [
  229. {"col": "a date", "action": "<", "value": datetime.fromisocalendar(2022, 28, 2).isoformat() + "Z"},
  230. {"col": "a date", "action": ">", "value": datetime.fromisocalendar(2022, 28, 2).isoformat() + "Z"},
  231. ],
  232. }
  233. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  234. assert len(value["value"]["data"]) == 0
  235. query = {
  236. "columns": ["name", "value"],
  237. "start": 0,
  238. "end": -1,
  239. "filters": [
  240. {"col": "a date", "action": "<", "value": datetime.fromisocalendar(2022, 28, 3).isoformat() + "Z"},
  241. {"col": "a date", "action": ">", "value": datetime.fromisocalendar(2022, 28, 1).isoformat() + "Z"},
  242. ],
  243. }
  244. value = accessor.get_data("x", pd, query, _DataFormat.JSON)
  245. assert len(value["value"]["data"]) == 1
  246. def test_contains_case_sensitive(pandas_accessor, sample_df):
  247. payload = {
  248. "filters": [{"col": "StringCol", "value": "Apple", "action": "contains", "matchCase": True}]
  249. }
  250. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  251. filtered_data = pd.DataFrame(result['value']['data'])
  252. assert len(filtered_data) == 1
  253. assert filtered_data.iloc[0]['StringCol'] == 'Apple'
  254. def test_contains_case_insensitive(pandas_accessor, sample_df):
  255. payload = {
  256. "filters": [{"col": "StringCol", "value": "apple", "action": "contains", "matchCase": False}]
  257. }
  258. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  259. filtered_data = pd.DataFrame(result['value']['data'])
  260. assert len(filtered_data) == 2
  261. assert 'Apple' in filtered_data['StringCol'].values
  262. assert 'apple' in filtered_data['StringCol'].values
  263. def test_equals_case_sensitive(pandas_accessor, sample_df):
  264. payload = {
  265. "filters": [{"col": "StringCol", "value": "Apple", "action": "==", "matchCase": True}]
  266. }
  267. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  268. filtered_data = pd.DataFrame(result['value']['data'])
  269. assert len(filtered_data) == 1
  270. assert filtered_data.iloc[0]['StringCol'] == 'Apple'
  271. def test_equals_case_insensitive(pandas_accessor, sample_df):
  272. payload = {
  273. "filters": [{"col": "StringCol", "value": "apple", "action": "==", "matchCase": False}]
  274. }
  275. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  276. filtered_data = pd.DataFrame(result['value']['data'])
  277. assert len(filtered_data) == 2
  278. assert 'Apple' in filtered_data['StringCol'].values
  279. assert 'apple' in filtered_data['StringCol'].values
  280. def test_not_equals_case_insensitive(pandas_accessor, sample_df):
  281. payload = {
  282. "filters": [{"col": "StringCol", "value": "apple", "action": "!=", "matchCase": False}]
  283. }
  284. result = pandas_accessor.get_data("test_var", sample_df, payload, MockDataFormat.LIST)
  285. filtered_data = pd.DataFrame(result['value']['data'])
  286. assert len(filtered_data) == 2
  287. assert 'Banana' in filtered_data['StringCol'].values
  288. assert 'Cherry' in filtered_data['StringCol'].values
  289. def test_decimator(gui: Gui, helpers, small_dataframe):
  290. a_decimator = ScatterDecimator(threshold=1) # noqa: F841
  291. accessor = _PandasDataAccessor(gui)
  292. pd = pandas.DataFrame(data=small_dataframe)
  293. # set gui frame
  294. gui._set_frame(inspect.currentframe())
  295. gui.add_page("test", "<|Hello {a_decimator}|button|>")
  296. gui.run(run_server=False)
  297. flask_client = gui._server.test_client()
  298. cid = helpers.create_scope_and_get_sid(gui)
  299. # Get the jsx once so that the page will be evaluated -> variable will be registered
  300. flask_client.get(f"/taipy-jsx/test?client_id={cid}")
  301. with gui.get_flask_app().test_request_context(f"/taipy-jsx/test/?client_id={cid}", data={"client_id": cid}):
  302. g.client_id = cid
  303. ret_data = accessor.get_data(
  304. "x",
  305. pd,
  306. {
  307. "start": 0,
  308. "end": -1,
  309. "alldata": True,
  310. "decimatorPayload": {
  311. "decimators": [
  312. {"decimator": "a_decimator", "chartMode": "markers", "xAxis": "name", "yAxis": "value"}
  313. ],
  314. "width": 100,
  315. },
  316. },
  317. _DataFormat.JSON,
  318. )
  319. assert ret_data
  320. value = ret_data["value"]
  321. assert value
  322. data = value["data"]
  323. assert len(data) == 2
  324. def test_edit(gui, small_dataframe):
  325. accessor = _PandasDataAccessor(gui)
  326. pd = pandas.DataFrame(small_dataframe)
  327. ln = len(pd)
  328. assert pd["value"].iloc[0] != 10
  329. ret_data = accessor.on_edit(pd, {"index": 0, "col": "value", "value": 10})
  330. assert isinstance(ret_data, pandas.DataFrame)
  331. assert len(ret_data) == ln
  332. assert ret_data["value"].iloc[0] == 10
  333. def test_delete(gui, small_dataframe):
  334. accessor = _PandasDataAccessor(gui)
  335. pd = pandas.DataFrame(small_dataframe)
  336. ln = len(pd)
  337. ret_data = accessor.on_delete(pd, {"index": 0})
  338. assert isinstance(ret_data, pandas.DataFrame)
  339. assert len(ret_data) == ln - 1
  340. def test_add(gui, small_dataframe):
  341. accessor = _PandasDataAccessor(gui)
  342. pd = pandas.DataFrame(small_dataframe)
  343. ln = len(pd)
  344. ret_data = accessor.on_add(pd, {"index": 0})
  345. assert isinstance(ret_data, pandas.DataFrame)
  346. assert len(ret_data) == ln + 1
  347. assert ret_data["value"].iloc[0] == 0
  348. assert ret_data["name"].iloc[0] == ""
  349. ret_data = accessor.on_add(pd, {"index": 2})
  350. assert isinstance(ret_data, pandas.DataFrame)
  351. assert len(ret_data) == ln + 1
  352. assert ret_data["value"].iloc[2] == 0
  353. assert ret_data["name"].iloc[2] == ""
  354. ret_data = accessor.on_add(pd, {"index": 0}, ["New", 100])
  355. assert isinstance(ret_data, pandas.DataFrame)
  356. assert len(ret_data) == ln + 1
  357. assert ret_data["value"].iloc[0] == 100
  358. assert ret_data["name"].iloc[0] == "New"
  359. ret_data = accessor.on_add(pd, {"index": 2}, ["New", 100])
  360. assert isinstance(ret_data, pandas.DataFrame)
  361. assert len(ret_data) == ln + 1
  362. assert ret_data["value"].iloc[2] == 100
  363. assert ret_data["name"].iloc[2] == "New"
  364. def test_csv(gui, small_dataframe):
  365. accessor = _PandasDataAccessor(gui)
  366. pd = pandas.DataFrame(small_dataframe)
  367. path = accessor.to_csv("", pd)
  368. assert path is not None
  369. assert os.path.getsize(path) > 0