Browse Source

Allow overriding data accessor for pandas (#2437)

* Allow overriding data accessor for pandas

* too long

* improve coverage

* don't ignore file with name including py

* numpy file was ignored by the .gitignore:89 Data rule
As shown by `git check-ignore -v -- *`

---------

Co-authored-by: Fred Lefévère-Laoide <Fred.Lefevere-Laoide@Taipy.io>
Fred Lefévère-Laoide 3 months ago
parent
commit
4cce531e86

+ 1 - 1
.gitignore

@@ -86,7 +86,7 @@ dist/
 .taipy/
 user_data/
 .my_data/
-Data
+/Data
 
 # demo files
 demo[_\-]*

+ 3 - 12
taipy/gui/data/array_dict_data_accessor.py

@@ -14,11 +14,10 @@ import typing as t
 import pandas as pd
 
 from ..utils import _MapDict
-from .data_format import _DataFormat
-from .pandas_data_accessor import _PandasDataAccessor
+from .pandas_based_data_accessor import _PandasBasedDataAccessor
 
 
-class _ArrayDictDataAccessor(_PandasDataAccessor):
+class _ArrayDictDataAccessor(_PandasBasedDataAccessor):
     __types = (dict, list, tuple, _MapDict)
 
     @staticmethod
@@ -64,12 +63,4 @@ class _ArrayDictDataAccessor(_PandasDataAccessor):
                 return value.iloc[:, 0].to_list()
             if data_type is tuple:
                 return tuple(value.iloc[:, 0].to_list())
-        return super()._from_pandas(value, data_type)
-
-    def get_cols_description(self, var_name: str, value: t.Any) -> t.Union[None, t.Dict[str, t.Dict[str, str]]]:  # type: ignore
-        return super().get_cols_description(var_name, self.to_pandas(value))
-
-    def get_data(  # noqa: C901
-        self, var_name: str, value: t.Any, payload: t.Dict[str, t.Any], data_format: _DataFormat
-    ) -> t.Dict[str, t.Any]:
-        return super().get_data(var_name, self.to_pandas(value), payload, data_format)
+        return self._get_pandas_accessor()._from_pandas(value, data_type)

+ 9 - 9
taipy/gui/data/data_accessor.py

@@ -139,7 +139,7 @@ class _DataAccessors(object):
         if cls in self.__access_4_type:
             del self.__access_4_type[cls]
 
-    def __get_instance(self, value: _TaipyData) -> _DataAccessor:  # type: ignore
+    def _get_instance(self, value: _TaipyData) -> _DataAccessor:  # type: ignore
         value = value.get() if isinstance(value, _TaipyData) else value
         access = self.__access_4_type.get(type(value))
         if access is None:
@@ -154,28 +154,28 @@ class _DataAccessors(object):
         return access
 
     def get_data(self, var_name: str, value: _TaipyData, payload: t.Dict[str, t.Any]) -> t.Dict[str, t.Any]:
-        return self.__get_instance(value).get_data(var_name, value.get(), payload, self.__data_format)
+        return self._get_instance(value).get_data(var_name, value.get(), payload, self.__data_format)
 
     def get_cols_description(self, var_name: str, value: _TaipyData) -> t.Dict[str, t.Dict[str, str]]:
-        return self.__get_instance(value).get_cols_description(var_name, value.get())
+        return self._get_instance(value).get_cols_description(var_name, value.get())
 
     def set_data_format(self, data_format: _DataFormat):
         self.__data_format = data_format
 
     def get_dataframe(self, value: t.Any):
-        return self.__get_instance(value).to_pandas(value)
+        return self._get_instance(value).to_pandas(value)
 
     def on_edit(self, value: t.Any, payload: t.Dict[str, t.Any]):
-        return self.__get_instance(value).on_edit(value, payload)
+        return self._get_instance(value).on_edit(value, payload)
 
     def on_delete(self, value: t.Any, payload: t.Dict[str, t.Any]):
-        return self.__get_instance(value).on_delete(value, payload)
+        return self._get_instance(value).on_delete(value, payload)
 
     def on_add(self, value: t.Any, payload: t.Dict[str, t.Any], new_row: t.Optional[t.List[t.Any]] = None):
-        return self.__get_instance(value).on_add(value, payload, new_row)
+        return self._get_instance(value).on_add(value, payload, new_row)
 
     def to_csv(self, var_name: str, value: t.Any):
-        return self.__get_instance(value).to_csv(var_name, value.get())
+        return self._get_instance(value).to_csv(var_name, value.get())
 
     def to_pandas(self, value: t.Any):
-        return self.__get_instance(value).to_pandas(value.get())
+        return self._get_instance(value).to_pandas(value.get())

+ 5 - 5
taipy/gui/data/numpy_data_accessor.py

@@ -14,10 +14,10 @@ import typing as t
 import numpy
 import pandas as pd
 
-from .pandas_data_accessor import _PandasDataAccessor
+from .pandas_based_data_accessor import _PandasBasedDataAccessor
 
 
-class _NumpyDataAccessor(_PandasDataAccessor):
+class _NumpyDataAccessor(_PandasBasedDataAccessor):
     __types = (numpy.ndarray,)
 
     @staticmethod
@@ -27,7 +27,7 @@ class _NumpyDataAccessor(_PandasDataAccessor):
     def to_pandas(self, value: t.Any) -> pd.DataFrame:
         return pd.DataFrame(value)
 
-    def _from_pandas(self, value: pd.DataFrame, type: t.Type):
-        if type is numpy.ndarray:
+    def _from_pandas(self, value: pd.DataFrame, data_type: t.Type):
+        if data_type is numpy.ndarray:
             return value.to_numpy()
-        return super()._from_pandas(value, type)
+        return self._get_pandas_accessor()._from_pandas(value, data_type)

+ 58 - 0
taipy/gui/data/pandas_based_data_accessor.py

@@ -0,0 +1,58 @@
+# Copyright 2021-2025 Avaiga Private Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import typing as t
+from abc import abstractmethod
+
+import pandas as pd
+
+from .data_accessor import _DataAccessor
+from .data_format import _DataFormat
+from .pandas_data_accessor import _PandasDataAccessor
+
+
+class _PandasBasedDataAccessor(_DataAccessor):
+    def __init__(self, gui) -> None:
+        super().__init__(gui)
+        self.__accessor_instance: t.Optional[_PandasDataAccessor] = None
+
+    def _get_pandas_accessor(self):
+        if self.__accessor_instance is None:
+            self.__accessor_instance = self._gui._get_accessor()._get_instance(pd.DataFrame({}))  # type: ignore[arg-type, assignment]
+        return t.cast(_PandasDataAccessor, self.__accessor_instance)
+
+    @abstractmethod
+    def _from_pandas(self, value: pd.DataFrame, data_type: t.Type) -> t.Any:
+        pass
+
+    def get_cols_description(self, var_name: str, value: t.Any) -> t.Union[None, t.Dict[str, t.Dict[str, str]]]:  # type: ignore
+        return self._get_pandas_accessor().get_cols_description(var_name, self.to_pandas(value))
+
+    def get_data(
+        self, var_name: str, value: t.Any, payload: t.Dict[str, t.Any], data_format: _DataFormat
+    ) -> t.Dict[str, t.Any]:
+        return self._get_pandas_accessor().get_data(var_name, self.to_pandas(value), payload, data_format)
+
+    def on_edit(self, value: t.Any, payload: t.Dict[str, t.Any]) -> t.Optional[t.Any]:
+        return self._from_pandas(self._get_pandas_accessor().on_edit(self.to_pandas(value), payload), type(value))
+
+    def on_delete(self, value: t.Any, payload: t.Dict[str, t.Any]) -> t.Optional[t.Any]:
+        return self._from_pandas(self._get_pandas_accessor().on_delete(self.to_pandas(value), payload), type(value))
+
+    def on_add(
+        self, value: t.Any, payload: t.Dict[str, t.Any], new_row: t.Optional[t.List[t.Any]] = None
+    ) -> t.Optional[t.Any]:
+        return self._from_pandas(
+            self._get_pandas_accessor().on_add(self.to_pandas(value), payload, new_row), type(value)
+        )
+
+    def to_csv(self, var_name: str, value: t.Any) -> t.Optional[str]:
+        return self._get_pandas_accessor().to_csv(var_name, self.to_pandas(value))

+ 1 - 1
tests/gui/data/test_accessors.py

@@ -62,7 +62,7 @@ def test_custom_accessor(gui: Gui):
     data = mock_taipy_data(123)
 
     # Testing when accessor is not registered
-    data_accessor = data_accessors._DataAccessors__get_instance(mock_taipy_data)  # type: ignore
+    data_accessor = data_accessors._get_instance(mock_taipy_data)  # type: ignore
     assert isinstance(
         data_accessor, _InvalidDataAccessor
     ), f"Expected _InvalidDataAccessor but got {type(data_accessor)}"

+ 36 - 0
tests/gui/data/test_array_dict_data_accessor.py

@@ -12,6 +12,8 @@
 import os
 from importlib import util
 
+import pandas
+
 from taipy.gui import Gui
 from taipy.gui.data.array_dict_data_accessor import _ArrayDictDataAccessor
 from taipy.gui.data.data_format import _DataFormat
@@ -266,3 +268,37 @@ def test_csv(gui, small_dataframe):
     path = accessor.to_csv("", pd)
     assert path is not None
     assert os.path.getsize(path) > 0
+
+def test__from_pandas_dict(gui, small_dataframe):
+    accessor = _ArrayDictDataAccessor(gui)
+    pd = small_dataframe
+    ad = accessor._from_pandas(pandas.DataFrame(pd), dict)
+    assert isinstance(ad, dict)
+    assert len(ad) == 2
+    assert len(ad["name"]) == len(pd["name"])
+    assert len(ad["value"]) == len(pd["value"])
+    assert ad["name"][0] == pd["name"][0]
+    assert ad["value"][0] == pd["value"][0]
+    assert ad["name"][-1] == pd["name"][-1]
+    assert ad["value"][-1] == pd["value"][-1]
+
+def test__from_pandas_MapDict(gui, small_dataframe):
+    accessor = _ArrayDictDataAccessor(gui)
+    pd = small_dataframe
+    ad = accessor._from_pandas(pandas.DataFrame(pd), _MapDict)
+    assert isinstance(ad, _MapDict)
+    assert len(ad) == 2
+    assert len(ad["name"]) == len(pd["name"])
+    assert len(ad["value"]) == len(pd["value"])
+    assert ad["name"][0] == pd["name"][0]
+    assert ad["value"][0] == pd["value"][0]
+    assert ad["name"][-1] == pd["name"][-1]
+    assert ad["value"][-1] == pd["value"][-1]
+
+def test__from_pandas_list(gui, small_dataframe):
+    accessor = _ArrayDictDataAccessor(gui)
+    pd = {"name": small_dataframe["name"]}
+    ad = accessor._from_pandas(pandas.DataFrame(pd), list)
+    assert isinstance(ad, list)
+    assert len(ad) == 3
+    assert ad[0] == pd["name"][0]

+ 71 - 0
tests/gui/data/test_numpy_data_accessor.py

@@ -0,0 +1,71 @@
+# Copyright 2021-2025 Avaiga Private Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import os
+from importlib import util
+
+import numpy
+import pandas
+
+from taipy.gui import Gui
+from taipy.gui.data.data_format import _DataFormat
+from taipy.gui.data.numpy_data_accessor import _NumpyDataAccessor
+
+a_numpy_array = numpy.array([1, 2, 3])
+
+
+def test_simple_data(gui: Gui, helpers):
+    accessor = _NumpyDataAccessor(gui)
+    ret_data = accessor.get_data("x", a_numpy_array, {"start": 0, "end": -1}, _DataFormat.JSON)
+    assert ret_data
+    value = ret_data["value"]
+    assert value
+    assert value["rowcount"] == 3
+    data = value["data"]
+    assert len(data) == 3
+
+
+def test_simple_data_with_arrow(gui: Gui, helpers):
+    if util.find_spec("pyarrow"):
+        accessor = _NumpyDataAccessor(gui)
+        ret_data = accessor.get_data("x", a_numpy_array, {"start": 0, "end": -1}, _DataFormat.APACHE_ARROW)
+        assert ret_data
+        value = ret_data["value"]
+        assert value
+        assert value["rowcount"] == 3
+        data = value["data"]
+        assert isinstance(data, bytes)
+
+
+def test_slice(gui: Gui, helpers):
+    accessor = _NumpyDataAccessor(gui)
+    value = accessor.get_data("x", a_numpy_array, {"start": 0, "end": 1}, _DataFormat.JSON)["value"]
+    assert value["rowcount"] == 3
+    data = value["data"]
+    assert len(data) == 2
+    value = accessor.get_data("x", a_numpy_array, {"start": "0", "end": "1"}, _DataFormat.JSON)["value"]
+    data = value["data"]
+    assert len(data) == 2
+
+
+def test_csv(gui, small_dataframe):
+    accessor = _NumpyDataAccessor(gui)
+    pd = small_dataframe
+    path = accessor.to_csv("", pd)
+    assert path is not None
+    assert os.path.getsize(path) > 0
+
+def test__from_pandas(gui):
+    accessor = _NumpyDataAccessor(gui)
+    ad = accessor._from_pandas(pandas.DataFrame(a_numpy_array), numpy.ndarray)
+    assert isinstance(ad, numpy.ndarray)
+    assert len(ad) == 3
+