浏览代码

Merge pull request #2296 from sohamkumar05/feature/#398-expand-exposed-type-parameter

 feature/#398-expand-exposed-type-parameter
Đỗ Trường Giang 4 月之前
父节点
当前提交
2e143bb26a

+ 7 - 0
taipy/core/config/data_node_config.py

@@ -14,6 +14,9 @@ from copy import copy
 from datetime import timedelta
 from typing import Any, Callable, Dict, List, Optional, Union
 
+import numpy
+import pandas
+
 from taipy.common.config import Config
 from taipy.common.config._config import _Config
 from taipy.common.config.common._config_blocker import _ConfigBlocker
@@ -71,11 +74,15 @@ class DataNodeConfig(Section):
     _EXPOSED_TYPE_PANDAS = "pandas"
     _EXPOSED_TYPE_MODIN = "modin"  # Deprecated in favor of pandas since 3.1.0
     _EXPOSED_TYPE_NUMPY = "numpy"
+    _EXPOSED_TYPE_PANDAS_DATAFRAME = pandas.DataFrame
+    _EXPOSED_TYPE_NUMPY_NDARRAY = numpy.ndarray
     _DEFAULT_EXPOSED_TYPE = _EXPOSED_TYPE_PANDAS
 
     _ALL_EXPOSED_TYPES = [
         _EXPOSED_TYPE_PANDAS,
         _EXPOSED_TYPE_NUMPY,
+        _EXPOSED_TYPE_PANDAS_DATAFRAME,
+        _EXPOSED_TYPE_NUMPY_NDARRAY,
     ]
 
     _OPTIONAL_ENCODING_PROPERTY = "encoding"

+ 8 - 3
taipy/core/data/_tabular_datanode_mixin.py

@@ -26,6 +26,8 @@ class _TabularDataNodeMixin(object):
     _EXPOSED_TYPE_PANDAS = "pandas"
     _EXPOSED_TYPE_MODIN = "modin"  # Deprecated in favor of pandas since 3.1.0
     _VALID_STRING_EXPOSED_TYPES = [_EXPOSED_TYPE_PANDAS, _EXPOSED_TYPE_NUMPY]
+    _EXPOSED_TYPE_PANDAS_DATAFRAME = pd.DataFrame
+    _EXPOSED_TYPE_NUMPY_NDARRAY = np.ndarray
 
     def __init__(self, **kwargs) -> None:
         self._decoder: Union[Callable, Any]
@@ -44,11 +46,14 @@ class _TabularDataNodeMixin(object):
         if callable(custom_encoder):
             self._encoder = custom_encoder
 
-
     def _convert_data_to_dataframe(self, exposed_type: Any, data: Any) -> Union[pd.DataFrame, pd.Series]:
-        if exposed_type == self._EXPOSED_TYPE_PANDAS and isinstance(data, (pd.DataFrame, pd.Series)):
+        if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME] and isinstance(
+            data, (pd.DataFrame, pd.Series)
+        ):
             return data
-        elif exposed_type == self._EXPOSED_TYPE_NUMPY and isinstance(data, np.ndarray):
+        elif exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY] and isinstance(
+            data, np.ndarray
+        ):
             return pd.DataFrame(data)
         elif isinstance(data, list) and not isinstance(exposed_type, str):
             return pd.DataFrame.from_records([self._encoder(row) for row in data])

+ 2 - 2
taipy/core/data/csv.py

@@ -134,9 +134,9 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             path = self._path
 
         properties = self.properties
-        if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
+        if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path=path)
-        if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
+        if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path=path)
         return self._read_as(path=path)
 

+ 9 - 5
taipy/core/data/excel.py

@@ -137,7 +137,7 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
 
     @staticmethod
     def _check_exposed_type(exposed_type):
-        if isinstance(exposed_type, str):
+        if isinstance(exposed_type, str) or exposed_type in [pd.DataFrame, np.ndarray]:
             _TabularDataNodeMixin._check_exposed_type(exposed_type)
         elif isinstance(exposed_type, list):
             for t in exposed_type:
@@ -154,18 +154,18 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             path = self._path
 
         exposed_type = self.properties[self._EXPOSED_TYPE_PROPERTY]
-        if exposed_type == self._EXPOSED_TYPE_PANDAS:
+        if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path=path)
-        if exposed_type == self._EXPOSED_TYPE_NUMPY:
+        if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path=path)
         return self._read_as(path=path)
 
     def _read_sheet_with_exposed_type(
         self, path: str, sheet_exposed_type: str, sheet_name: str
     ) -> Optional[Union[np.ndarray, pd.DataFrame]]:
-        if sheet_exposed_type == self._EXPOSED_TYPE_NUMPY:
+        if sheet_exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path, sheet_name)
-        elif sheet_exposed_type == self._EXPOSED_TYPE_PANDAS:
+        elif sheet_exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path, sheet_name)  # type: ignore
         return None
 
@@ -202,6 +202,10 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
                         sheet_exposed_type = exposed_type.get(sheet_name, self._EXPOSED_TYPE_PANDAS)
                     elif isinstance(exposed_type, List):
                         sheet_exposed_type = exposed_type[i]
+                    elif exposed_type == np.ndarray:
+                        sheet_exposed_type = self._EXPOSED_TYPE_NUMPY
+                    elif exposed_type == pd.DataFrame:
+                        sheet_exposed_type = self._EXPOSED_TYPE_PANDAS
 
                     if isinstance(sheet_exposed_type, str):
                         sheet_data = self._read_sheet_with_exposed_type(path, sheet_exposed_type, sheet_name)

+ 2 - 3
taipy/core/data/parquet.py

@@ -227,9 +227,9 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         return self._do_read_from_path(path, properties[self._EXPOSED_TYPE_PROPERTY], kwargs)
 
     def _do_read_from_path(self, path: str, exposed_type: str, kwargs: Dict) -> Any:
-        if exposed_type == self._EXPOSED_TYPE_PANDAS:
+        if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path, kwargs)
-        if exposed_type == self._EXPOSED_TYPE_NUMPY:
+        if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path, kwargs)
         return self._read_as(path, kwargs)
 
@@ -249,4 +249,3 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
 
     def _write(self, data: Any):
         self._write_with_kwargs(data)
-

+ 52 - 0
tests/core/data/test_csv_data_node.py

@@ -157,6 +157,58 @@ class TestCSVDataNode:
         dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
         assert isinstance(dn.read(), pd.DataFrame)
 
+    def test_pandas_dataframe_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": pd.DataFrame})
+        assert isinstance(dn.read(), pd.DataFrame)
+
+    def test_pandas_dataframe_exposed_type_a(self):
+        import pandas
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": pandas.DataFrame})
+        assert isinstance(dn.read(), pandas.DataFrame)
+
+    def test_pandas_dataframe_exposed_type_b(self):
+        from pandas import DataFrame
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": DataFrame})
+        assert isinstance(dn.read(), DataFrame)
+
+    def test_pandas_dataframe_exposed_type_c(self):
+        from pandas import DataFrame as DF
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": DF})
+        assert isinstance(dn.read(), DF)
+
+    def test_numpy_ndarray_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": np.ndarray})
+        assert isinstance(dn.read(), np.ndarray)
+
+    def test_numpy_ndarray_exposed_type_a(self):
+        import numpy
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": numpy.ndarray})
+        assert isinstance(dn.read(), numpy.ndarray)
+
+    def test_numpy_ndarray_exposed_type_b(self):
+        from numpy import ndarray
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": ndarray})
+        assert isinstance(dn.read(), ndarray)
+
+    def test_numpy_ndarray_exposed_type_c(self):
+        from numpy import ndarray as nd_array
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": nd_array})
+        assert isinstance(dn.read(), nd_array)
+
     def test_raise_error_invalid_exposed_type(self):
         path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
         with pytest.raises(InvalidExposedType):

+ 94 - 0
tests/core/data/test_excel_data_node.py

@@ -302,6 +302,100 @@ class TestExcelDataNode:
         data = dn.read()
         assert isinstance(data, pd.DataFrame)
 
+    def test_pandas_dataframe_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo",
+            Scope.SCENARIO,
+            properties={"default_path": path, "exposed_type": pd.DataFrame, "sheet_name": "Sheet1"},
+        )
+        assert dn.properties["exposed_type"] == pd.DataFrame
+        data = dn.read()
+        assert isinstance(data, pd.DataFrame)
+
+    def test_pandas_dataframe_exposed_type_a(self):
+        import pandas
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo",
+            Scope.SCENARIO,
+            properties={"default_path": path, "exposed_type": pandas.DataFrame, "sheet_name": "Sheet1"},
+        )
+        assert dn.properties["exposed_type"] == pandas.DataFrame
+        data = dn.read()
+        assert isinstance(data, pandas.DataFrame)
+
+    def test_pandas_dataframe_exposed_type_b(self):
+        from pandas import DataFrame
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo",
+            Scope.SCENARIO,
+            properties={"default_path": path, "exposed_type": DataFrame, "sheet_name": "Sheet1"},
+        )
+        assert dn.properties["exposed_type"] == DataFrame
+        data = dn.read()
+        assert isinstance(data, DataFrame)
+
+    def test_pandas_dataframe_exposed_type_c(self):
+        from pandas import DataFrame as DF
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo",
+            Scope.SCENARIO,
+            properties={"default_path": path, "exposed_type": DF, "sheet_name": "Sheet1"},
+        )
+        assert dn.properties["exposed_type"] == DF
+        data = dn.read()
+        assert isinstance(data, DF)
+
+    def test_numpy_ndarray_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": np.ndarray, "sheet_name": "Sheet1"}
+        )
+        assert dn.properties["exposed_type"] == np.ndarray
+        data = dn.read()
+        assert isinstance(data, np.ndarray)
+
+    def test_numpy_ndarray_exposed_type_a(self):
+        import numpy
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo",
+            Scope.SCENARIO,
+            properties={"default_path": path, "exposed_type": numpy.ndarray, "sheet_name": "Sheet1"},
+        )
+        assert dn.properties["exposed_type"] == numpy.ndarray
+        data = dn.read()
+        assert isinstance(data, numpy.ndarray)
+
+    def test_numpy_ndarray_exposed_type_b(self):
+        from numpy import ndarray
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": ndarray, "sheet_name": "Sheet1"}
+        )
+        assert dn.properties["exposed_type"] == ndarray
+        data = dn.read()
+        assert isinstance(data, ndarray)
+
+    def test_numpy_ndarray_exposed_type_c(self):
+        from numpy import ndarray as nd_array
+
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": nd_array, "sheet_name": "Sheet1"}
+        )
+        assert dn.properties["exposed_type"] == nd_array
+        data = dn.read()
+        assert isinstance(data, nd_array)
+
     def test_complex_exposed_type_dict(self):
         # ["Sheet1", "Sheet2", "Sheet3", "Sheet4", "Sheet5"]
         path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example_4.xlsx")

+ 14 - 0
tests/core/data/test_parquet_data_node.py

@@ -98,6 +98,20 @@ class TestParquetDataNode:
         assert isinstance(dn_1, ParquetDataNode)
         assert dn_1.properties["exposed_type"] == MyCustomObject
 
+        parquet_dn_config_2 = Config.configure_parquet_data_node(
+            id="bar", default_path=path, compression=compression, exposed_type=np.ndarray
+        )
+        dn_2 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_2, None, None)
+        assert isinstance(dn_2, ParquetDataNode)
+        assert dn_2.properties["exposed_type"] == np.ndarray
+
+        parquet_dn_config_3 = Config.configure_parquet_data_node(
+            id="bar", default_path=path, compression=compression, exposed_type=pd.DataFrame
+        )
+        dn_3 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_3, None, None)
+        assert isinstance(dn_3, ParquetDataNode)
+        assert dn_3.properties["exposed_type"] == pd.DataFrame
+
         with pytest.raises(InvalidConfigurationId):
             dn = ParquetDataNode("foo bar", Scope.SCENARIO, properties={"path": path, "name": "super name"})