Sohamkumar Chauhan 5 mēneši atpakaļ
vecāks
revīzija
ccc935e1e9

+ 1 - 0
Pipfile

@@ -36,6 +36,7 @@ boto3 = "==1.29.1"
 watchdog = "==4.0.0"
 watchdog = "==4.0.0"
 charset-normalizer = "==3.3.2"
 charset-normalizer = "==3.3.2"
 numpy = "<2.0.0"
 numpy = "<2.0.0"
+pre-commit = "*"
 
 
 [dev-packages]
 [dev-packages]
 freezegun = "*"
 freezegun = "*"

+ 0 - 0
git


+ 10 - 1
taipy/core/config/data_node_config.py

@@ -14,6 +14,9 @@ from copy import copy
 from datetime import timedelta
 from datetime import timedelta
 from typing import Any, Callable, Dict, List, Optional, Union
 from typing import Any, Callable, Dict, List, Optional, Union
 
 
+import numpy
+import pandas
+
 from taipy.common.config import Config
 from taipy.common.config import Config
 from taipy.common.config._config import _Config
 from taipy.common.config._config import _Config
 from taipy.common.config.common._config_blocker import _ConfigBlocker
 from taipy.common.config.common._config_blocker import _ConfigBlocker
@@ -70,11 +73,17 @@ class DataNodeConfig(Section):
     _EXPOSED_TYPE_PANDAS = "pandas"
     _EXPOSED_TYPE_PANDAS = "pandas"
     _EXPOSED_TYPE_MODIN = "modin"  # Deprecated in favor of pandas since 3.1.0
     _EXPOSED_TYPE_MODIN = "modin"  # Deprecated in favor of pandas since 3.1.0
     _EXPOSED_TYPE_NUMPY = "numpy"
     _EXPOSED_TYPE_NUMPY = "numpy"
+    _EXPOSED_TYPE_PANDAS_DATAFRAME = pandas.DataFrame
+    _EXPOSED_TYPE_NUMPY_NDARRAY = numpy.ndarray
+    _EXPOSED_TYPE_LIST = List
     _DEFAULT_EXPOSED_TYPE = _EXPOSED_TYPE_PANDAS
     _DEFAULT_EXPOSED_TYPE = _EXPOSED_TYPE_PANDAS
 
 
     _ALL_EXPOSED_TYPES = [
     _ALL_EXPOSED_TYPES = [
         _EXPOSED_TYPE_PANDAS,
         _EXPOSED_TYPE_PANDAS,
         _EXPOSED_TYPE_NUMPY,
         _EXPOSED_TYPE_NUMPY,
+        _EXPOSED_TYPE_PANDAS_DATAFRAME,
+        _EXPOSED_TYPE_NUMPY_NDARRAY,
+        _EXPOSED_TYPE_LIST,
     ]
     ]
 
 
     _OPTIONAL_ENCODING_PROPERTY = "encoding"
     _OPTIONAL_ENCODING_PROPERTY = "encoding"
@@ -322,7 +331,7 @@ class DataNodeConfig(Section):
 
 
     @property
     @property
     def validity_period(self) -> Optional[timedelta]:
     def validity_period(self) -> Optional[timedelta]:
-        """ The validity period of the data nodes instantiated from the data node config.
+        """The validity period of the data nodes instantiated from the data node config.
 
 
         It corresponds to the duration since the last edit date for which the data node
         It corresponds to the duration since the last edit date for which the data node
         can be considered valid. Once the validity period has passed, the data node is
         can be considered valid. Once the validity period has passed, the data node is

+ 9 - 3
taipy/core/data/_tabular_datanode_mixin.py

@@ -26,6 +26,9 @@ class _TabularDataNodeMixin(object):
     _EXPOSED_TYPE_PANDAS = "pandas"
     _EXPOSED_TYPE_PANDAS = "pandas"
     _EXPOSED_TYPE_MODIN = "modin"  # Deprecated in favor of pandas since 3.1.0
     _EXPOSED_TYPE_MODIN = "modin"  # Deprecated in favor of pandas since 3.1.0
     _VALID_STRING_EXPOSED_TYPES = [_EXPOSED_TYPE_PANDAS, _EXPOSED_TYPE_NUMPY]
     _VALID_STRING_EXPOSED_TYPES = [_EXPOSED_TYPE_PANDAS, _EXPOSED_TYPE_NUMPY]
+    _VALID_OTHER_EXPOSED_TYPES = [pd.DataFrame, np.ndarray]
+    _EXPOSED_TYPE_PANDAS_DATAFRAME = pd.DataFrame
+    _EXPOSED_TYPE_NUMPY_NDARRAY = np.ndarray
 
 
     def __init__(self, **kwargs) -> None:
     def __init__(self, **kwargs) -> None:
         self._decoder: Union[Callable, Any]
         self._decoder: Union[Callable, Any]
@@ -44,11 +47,14 @@ class _TabularDataNodeMixin(object):
         if callable(custom_encoder):
         if callable(custom_encoder):
             self._encoder = custom_encoder
             self._encoder = custom_encoder
 
 
-
     def _convert_data_to_dataframe(self, exposed_type: Any, data: Any) -> Union[pd.DataFrame, pd.Series]:
     def _convert_data_to_dataframe(self, exposed_type: Any, data: Any) -> Union[pd.DataFrame, pd.Series]:
-        if exposed_type == self._EXPOSED_TYPE_PANDAS and isinstance(data, (pd.DataFrame, pd.Series)):
+        if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME] and isinstance(
+            data, (pd.DataFrame, pd.Series)
+        ):
             return data
             return data
-        elif exposed_type == self._EXPOSED_TYPE_NUMPY and isinstance(data, np.ndarray):
+        elif exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY] and isinstance(
+            data, np.ndarray
+        ):
             return pd.DataFrame(data)
             return pd.DataFrame(data)
         elif isinstance(data, list) and not isinstance(exposed_type, str):
         elif isinstance(data, list) and not isinstance(exposed_type, str):
             return pd.DataFrame.from_records([self._encoder(row) for row in data])
             return pd.DataFrame.from_records([self._encoder(row) for row in data])

+ 2 - 2
taipy/core/data/csv.py

@@ -134,9 +134,9 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             path = self._path
             path = self._path
 
 
         properties = self.properties
         properties = self.properties
-        if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
+        if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path=path)
             return self._read_as_pandas_dataframe(path=path)
-        if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
+        if properties[self._EXPOSED_TYPE_PROPERTY] in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path=path)
             return self._read_as_numpy(path=path)
         return self._read_as(path=path)
         return self._read_as(path=path)
 
 

+ 9 - 5
taipy/core/data/excel.py

@@ -137,7 +137,7 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
 
 
     @staticmethod
     @staticmethod
     def _check_exposed_type(exposed_type):
     def _check_exposed_type(exposed_type):
-        if isinstance(exposed_type, str):
+        if isinstance(exposed_type, str) or exposed_type in [pd.DataFrame, np.ndarray]:
             _TabularDataNodeMixin._check_exposed_type(exposed_type)
             _TabularDataNodeMixin._check_exposed_type(exposed_type)
         elif isinstance(exposed_type, list):
         elif isinstance(exposed_type, list):
             for t in exposed_type:
             for t in exposed_type:
@@ -154,18 +154,18 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             path = self._path
             path = self._path
 
 
         exposed_type = self.properties[self._EXPOSED_TYPE_PROPERTY]
         exposed_type = self.properties[self._EXPOSED_TYPE_PROPERTY]
-        if exposed_type == self._EXPOSED_TYPE_PANDAS:
+        if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path=path)
             return self._read_as_pandas_dataframe(path=path)
-        if exposed_type == self._EXPOSED_TYPE_NUMPY:
+        if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path=path)
             return self._read_as_numpy(path=path)
         return self._read_as(path=path)
         return self._read_as(path=path)
 
 
     def _read_sheet_with_exposed_type(
     def _read_sheet_with_exposed_type(
         self, path: str, sheet_exposed_type: str, sheet_name: str
         self, path: str, sheet_exposed_type: str, sheet_name: str
     ) -> Optional[Union[np.ndarray, pd.DataFrame]]:
     ) -> Optional[Union[np.ndarray, pd.DataFrame]]:
-        if sheet_exposed_type == self._EXPOSED_TYPE_NUMPY:
+        if sheet_exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path, sheet_name)
             return self._read_as_numpy(path, sheet_name)
-        elif sheet_exposed_type == self._EXPOSED_TYPE_PANDAS:
+        elif sheet_exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path, sheet_name)  # type: ignore
             return self._read_as_pandas_dataframe(path, sheet_name)  # type: ignore
         return None
         return None
 
 
@@ -202,6 +202,10 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
                         sheet_exposed_type = exposed_type.get(sheet_name, self._EXPOSED_TYPE_PANDAS)
                         sheet_exposed_type = exposed_type.get(sheet_name, self._EXPOSED_TYPE_PANDAS)
                     elif isinstance(exposed_type, List):
                     elif isinstance(exposed_type, List):
                         sheet_exposed_type = exposed_type[i]
                         sheet_exposed_type = exposed_type[i]
+                    elif exposed_type == np.ndarray:
+                        sheet_exposed_type = self._EXPOSED_TYPE_NUMPY
+                    elif exposed_type == pd.DataFrame:
+                        sheet_exposed_type = self._EXPOSED_TYPE_PANDAS
 
 
                     if isinstance(sheet_exposed_type, str):
                     if isinstance(sheet_exposed_type, str):
                         sheet_data = self._read_sheet_with_exposed_type(path, sheet_exposed_type, sheet_name)
                         sheet_data = self._read_sheet_with_exposed_type(path, sheet_exposed_type, sheet_name)

+ 2 - 3
taipy/core/data/parquet.py

@@ -227,9 +227,9 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         return self._do_read_from_path(path, properties[self._EXPOSED_TYPE_PROPERTY], kwargs)
         return self._do_read_from_path(path, properties[self._EXPOSED_TYPE_PROPERTY], kwargs)
 
 
     def _do_read_from_path(self, path: str, exposed_type: str, kwargs: Dict) -> Any:
     def _do_read_from_path(self, path: str, exposed_type: str, kwargs: Dict) -> Any:
-        if exposed_type == self._EXPOSED_TYPE_PANDAS:
+        if exposed_type in [self._EXPOSED_TYPE_PANDAS, self._EXPOSED_TYPE_PANDAS_DATAFRAME]:
             return self._read_as_pandas_dataframe(path, kwargs)
             return self._read_as_pandas_dataframe(path, kwargs)
-        if exposed_type == self._EXPOSED_TYPE_NUMPY:
+        if exposed_type in [self._EXPOSED_TYPE_NUMPY, self._EXPOSED_TYPE_NUMPY_NDARRAY]:
             return self._read_as_numpy(path, kwargs)
             return self._read_as_numpy(path, kwargs)
         return self._read_as(path, kwargs)
         return self._read_as(path, kwargs)
 
 
@@ -249,4 +249,3 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
 
 
     def _write(self, data: Any):
     def _write(self, data: Any):
         self._write_with_kwargs(data)
         self._write_with_kwargs(data)
-

+ 10 - 0
tests/core/data/test_csv_data_node.py

@@ -155,6 +155,16 @@ class TestCSVDataNode:
         dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
         dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
         assert isinstance(dn.read(), pd.DataFrame)
         assert isinstance(dn.read(), pd.DataFrame)
 
 
+    def test_pandas_dataframe_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": pd.DataFrame})
+        assert isinstance(dn.read(), pd.DataFrame)
+
+    def test_numpy_ndarray_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": np.ndarray})
+        assert isinstance(dn.read(), np.ndarray)
+
     def test_raise_error_invalid_exposed_type(self):
     def test_raise_error_invalid_exposed_type(self):
         path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
         path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
         with pytest.raises(InvalidExposedType):
         with pytest.raises(InvalidExposedType):

+ 20 - 0
tests/core/data/test_excel_data_node.py

@@ -300,6 +300,26 @@ class TestExcelDataNode:
         data = dn.read()
         data = dn.read()
         assert isinstance(data, pd.DataFrame)
         assert isinstance(data, pd.DataFrame)
 
 
+    def test_pandas_dataframe_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo",
+            Scope.SCENARIO,
+            properties={"default_path": path, "exposed_type": pd.DataFrame, "sheet_name": "Sheet1"},
+        )
+        assert dn.properties["exposed_type"] == pd.DataFrame
+        data = dn.read()
+        assert isinstance(data, pd.DataFrame)
+
+    def test_numpy_ndarray_exposed_type(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode(
+            "foo", Scope.SCENARIO, properties={"default_path": path, "exposed_type": np.ndarray, "sheet_name": "Sheet1"}
+        )
+        assert dn.properties["exposed_type"] == np.ndarray
+        data = dn.read()
+        assert isinstance(data, np.ndarray)
+
     def test_complex_exposed_type_dict(self):
     def test_complex_exposed_type_dict(self):
         # ["Sheet1", "Sheet2", "Sheet3", "Sheet4", "Sheet5"]
         # ["Sheet1", "Sheet2", "Sheet3", "Sheet4", "Sheet5"]
         path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example_4.xlsx")
         path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example_4.xlsx")

+ 14 - 0
tests/core/data/test_parquet_data_node.py

@@ -96,6 +96,20 @@ class TestParquetDataNode:
         assert isinstance(dn_1, ParquetDataNode)
         assert isinstance(dn_1, ParquetDataNode)
         assert dn_1.properties["exposed_type"] == MyCustomObject
         assert dn_1.properties["exposed_type"] == MyCustomObject
 
 
+        parquet_dn_config_2 = Config.configure_parquet_data_node(
+            id="bar", default_path=path, compression=compression, exposed_type=np.ndarray
+        )
+        dn_2 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_2, None, None)
+        assert isinstance(dn_2, ParquetDataNode)
+        assert dn_2.properties["exposed_type"] == np.ndarray
+
+        parquet_dn_config_3 = Config.configure_parquet_data_node(
+            id="bar", default_path=path, compression=compression, exposed_type=pd.DataFrame
+        )
+        dn_3 = _DataManagerFactory._build_manager()._create_and_set(parquet_dn_config_3, None, None)
+        assert isinstance(dn_3, ParquetDataNode)
+        assert dn_3.properties["exposed_type"] == pd.DataFrame
+
         with pytest.raises(InvalidConfigurationId):
         with pytest.raises(InvalidConfigurationId):
             dn = ParquetDataNode("foo bar", Scope.SCENARIO, properties={"path": path, "name": "super name"})
             dn = ParquetDataNode("foo bar", Scope.SCENARIO, properties={"path": path, "name": "super name"})