Bladeren bron

feat: add download and upload api for file-based datanodes

trgiangdo 10 maanden geleden
bovenliggende
commit
2826816215

+ 48 - 1
taipy/core/data/_file_datanode_mixin.py

@@ -14,7 +14,7 @@ import pathlib
 import shutil
 from datetime import datetime
 from os.path import isfile
-from typing import Any, Dict, Optional
+from typing import Any, Callable, Dict, Optional
 
 from taipy.config.config import Config
 
@@ -92,3 +92,50 @@ class _FileDataNodeMixin(object):
         if os.path.exists(old_path):
             shutil.move(old_path, new_path)
         return new_path
+
+    def _get_downloadable_path(self) -> str:
+        """Get the downloadable path of the file data of the data node.
+
+        Returns:
+            The downloadable path of the file data of the data node if it exists, otherwise an empty string.
+        """
+        if os.path.exists(self.path) and isfile(self._path):
+            return self.path
+
+        return ""
+
+    def _upload(self, path: str, upload_checker: Optional[Callable[[str, Any], bool]] = None) -> bool:
+        """Upload a csv file data to the data node.
+
+        Parameters:
+            path (str): The path of the file to upload to the data node.
+            upload_checker (Optional[Callable[[str, Any], bool]]): A function to check if the upload is allowed.
+                The function takes the title of the upload data and the data itself as arguments and returns
+                True if the upload is allowed, otherwise False.
+
+        Returns:
+            True if the upload was successful, otherwise False.
+        """
+        from ._data_manager_factory import _DataManagerFactory
+
+        upload_path = pathlib.Path(path)
+
+        if upload_checker is not None:
+            try:
+                upload_data = self._read_from_path(upload_path.name)
+            except Exception:
+                return False
+
+            if not upload_checker(upload_path.name, upload_data):
+                return False
+
+        shutil.copy(upload_path, self.path)
+
+        self.track_edit(timestamp=datetime.now())  # type: ignore[attr-defined]
+        self.unlock_edit()  # type: ignore[attr-defined]
+        _DataManagerFactory._build_manager()._set(self)  # type: ignore[arg-type]
+
+        return True
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        raise NotImplementedError

+ 22 - 13
taipy/core/data/csv.py

@@ -27,7 +27,7 @@ from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
 
-class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
+class CSVDataNode(_FileDataNodeMixin, DataNode, _TabularDataNodeMixin):
     """Data Node stored as a CSV file.
 
     Attributes:
@@ -137,14 +137,20 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         return cls.__STORAGE_TYPE
 
     def _read(self):
+        return self._read_from_path()
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if not path:
+            path = self._path
+
         if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
-            return self._read_as_pandas_dataframe()
+            return self._read_as_pandas_dataframe(path=path)
         if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
-            return self._read_as_numpy()
-        return self._read_as()
+            return self._read_as_numpy(path=path)
+        return self._read_as(path=path)
 
-    def _read_as(self):
-        with open(self._path, encoding=self.properties[self.__ENCODING_KEY]) as csvFile:
+    def _read_as(self, path: str):
+        with open(path, encoding=self.properties[self.__ENCODING_KEY]) as csvFile:
             if self.properties[self._HAS_HEADER_PROPERTY]:
                 reader = csv.DictReader(csvFile)
             else:
@@ -152,23 +158,26 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
 
             return [self._decoder(line) for line in reader]
 
-    def _read_as_numpy(self) -> np.ndarray:
-        return self._read_as_pandas_dataframe().to_numpy()
+    def _read_as_numpy(self, path: str) -> np.ndarray:
+        return self._read_as_pandas_dataframe(path=path).to_numpy()
 
     def _read_as_pandas_dataframe(
-        self, usecols: Optional[List[int]] = None, column_names: Optional[List[str]] = None
+        self,
+        path: str,
+        usecols: Optional[List[int]] = None,
+        column_names: Optional[List[str]] = None,
     ) -> pd.DataFrame:
         try:
             if self.properties[self._HAS_HEADER_PROPERTY]:
                 if column_names:
-                    return pd.read_csv(self._path, encoding=self.properties[self.__ENCODING_KEY])[column_names]
-                return pd.read_csv(self._path, encoding=self.properties[self.__ENCODING_KEY])
+                    return pd.read_csv(path, encoding=self.properties[self.__ENCODING_KEY])[column_names]
+                return pd.read_csv(path, encoding=self.properties[self.__ENCODING_KEY])
             else:
                 if usecols:
                     return pd.read_csv(
-                        self._path, encoding=self.properties[self.__ENCODING_KEY], header=None, usecols=usecols
+                        path, encoding=self.properties[self.__ENCODING_KEY], header=None, usecols=usecols
                     )
-                return pd.read_csv(self._path, encoding=self.properties[self.__ENCODING_KEY], header=None)
+                return pd.read_csv(path, encoding=self.properties[self.__ENCODING_KEY], header=None)
         except pd.errors.EmptyDataError:
             return pd.DataFrame()
 

+ 23 - 1
taipy/core/data/data_node.py

@@ -14,7 +14,7 @@ import os
 import uuid
 from abc import abstractmethod
 from datetime import datetime, timedelta
-from typing import Any, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
 
 import networkx as nx
 
@@ -373,6 +373,28 @@ class DataNode(_Entity, _Labeled):
     def storage_type(cls) -> str:
         raise NotImplementedError
 
+    def _get_downloadable_path(self) -> str:
+        """Get the downloadable path of the file data of the data node.
+
+        Returns:
+            The downloadable path of the file data of the data node.
+        """
+        raise NotImplementedError
+
+    def _upload(self, path: str, upload_checker: Optional[Callable[[str, Any], bool]] = None) -> bool:
+        """Upload the file data of the data node.
+
+        Parameters:
+            path (str): The path of the file to upload to the data node.
+            upload_checker (Optional[Callable[[str, Any], bool]]): A function to check if the upload is allowed.
+                The function takes the title of the upload data and the data itself as arguments and returns
+                True if the upload is allowed, otherwise False.
+
+        Returns:
+            True if the upload was successful, otherwise False.
+        """
+        raise NotImplementedError
+
     def read_or_raise(self) -> Any:
         """Read the data referenced by this data node.
 

+ 25 - 15
taipy/core/data/excel.py

@@ -28,7 +28,7 @@ from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
 
-class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
+class ExcelDataNode(_FileDataNodeMixin, DataNode, _TabularDataNodeMixin):
     """Data Node stored as an Excel file.
 
     The Excel file format is _xlsx_.
@@ -150,15 +150,21 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
                 _TabularDataNodeMixin._check_exposed_type(t)
 
     def _read(self):
+        return self._read_from_path()
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if not path:
+            path = self._path
+
         if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
-            return self._read_as_pandas_dataframe()
+            return self._read_as_pandas_dataframe(path=path)
         if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
-            return self._read_as_numpy()
-        return self._read_as()
+            return self._read_as_numpy(path=path)
+        return self._read_as(path=path)
 
-    def _read_as(self):
+    def _read_as(self, path: str):
         try:
-            excel_file = load_workbook(self._path)
+            excel_file = load_workbook(path)
             exposed_type = self.properties[self._EXPOSED_TYPE_PROPERTY]
             work_books = {}
             sheet_names = excel_file.sheetnames
@@ -171,7 +177,7 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
 
             for sheet_name in provided_sheet_names:
                 if sheet_name not in sheet_names:
-                    raise NonExistingExcelSheet(sheet_name, self._path)
+                    raise NonExistingExcelSheet(sheet_name, path)
 
             if isinstance(exposed_type, List):
                 if len(provided_sheet_names) != len(self.properties[self._EXPOSED_TYPE_PROPERTY]):
@@ -192,9 +198,9 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
 
                     if isinstance(sheet_exposed_type, str):
                         if sheet_exposed_type == self._EXPOSED_TYPE_NUMPY:
-                            work_books[sheet_name] = self._read_as_pandas_dataframe(sheet_name).to_numpy()
+                            work_books[sheet_name] = self._read_as_pandas_dataframe(path, sheet_name).to_numpy()
                         elif sheet_exposed_type == self._EXPOSED_TYPE_PANDAS:
-                            work_books[sheet_name] = self._read_as_pandas_dataframe(sheet_name)
+                            work_books[sheet_name] = self._read_as_pandas_dataframe(path, sheet_name)
                         continue
 
                 res = [[col.value for col in row] for row in work_sheet.rows]
@@ -214,14 +220,16 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
 
         return work_books
 
-    def _read_as_numpy(self):
-        sheets = self._read_as_pandas_dataframe()
+    def _read_as_numpy(self, path: str):
+        sheets = self._read_as_pandas_dataframe(path=path)
         if isinstance(sheets, dict):
             return {sheet_name: df.to_numpy() for sheet_name, df in sheets.items()}
         return sheets.to_numpy()
 
-    def _do_read_excel(self, sheet_names, kwargs) -> Union[Dict[Union[int, str], pd.DataFrame], pd.DataFrame]:
-        return pd.read_excel(self._path, sheet_name=sheet_names, **kwargs)
+    def _do_read_excel(
+        self, path: str, sheet_names, kwargs
+    ) -> Union[Dict[Union[int, str], pd.DataFrame], pd.DataFrame]:
+        return pd.read_excel(path, sheet_name=sheet_names, **kwargs)
 
     def __get_sheet_names_and_header(self, sheet_names):
         kwargs = {}
@@ -231,10 +239,12 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             kwargs["header"] = None
         return sheet_names, kwargs
 
-    def _read_as_pandas_dataframe(self, sheet_names=None) -> Union[Dict[Union[int, str], pd.DataFrame], pd.DataFrame]:
+    def _read_as_pandas_dataframe(
+        self, path: str, sheet_names=None
+    ) -> Union[Dict[Union[int, str], pd.DataFrame], pd.DataFrame]:
         sheet_names, kwargs = self.__get_sheet_names_and_header(sheet_names)
         try:
-            return self._do_read_excel(sheet_names, kwargs)
+            return self._do_read_excel(path, sheet_names, kwargs)
         except pd.errors.EmptyDataError:
             return pd.DataFrame()
 

+ 8 - 2
taipy/core/data/json.py

@@ -25,7 +25,7 @@ from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
 
-class JSONDataNode(DataNode, _FileDataNodeMixin):
+class JSONDataNode(_FileDataNodeMixin, DataNode):
     """Data Node stored as a JSON file.
 
     Attributes:
@@ -150,7 +150,13 @@ class JSONDataNode(DataNode, _FileDataNodeMixin):
         self.properties[self._DECODER_KEY] = decoder
 
     def _read(self):
-        with open(self._path, "r", encoding=self.properties[self.__ENCODING_KEY]) as f:
+        return self._read_from_path()
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if not path:
+            path = self._path
+
+        with open(path, "r", encoding=self.properties[self.__ENCODING_KEY]) as f:
             return json.load(f, cls=self._decoder)
 
     def _append(self, data: Any):

+ 34 - 28
taipy/core/data/parquet.py

@@ -28,7 +28,7 @@ from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
 
-class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
+class ParquetDataNode(_FileDataNodeMixin, DataNode, _TabularDataNodeMixin):
     """Data Node stored as a Parquet file.
 
     Attributes:
@@ -178,18 +178,43 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         return cls.__STORAGE_TYPE
 
     def _read(self):
-        return self.read_with_kwargs()
+        return self._read_from_path()
 
-    def _read_as(self, read_kwargs: Dict):
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if not path:
+            path = self._path
+
+        # return None if data was never written
+        if not self.last_edit_date:
+            self._DataNode__logger.warning(
+                f"Data node {self.id} from config {self.config_id} is being read but has never been written."
+            )
+            return None
+
+        kwargs = self.properties[self.__READ_KWARGS_PROPERTY]
+        kwargs.update(
+            {
+                self.__ENGINE_PROPERTY: self.properties[self.__ENGINE_PROPERTY],
+            }
+        )
+        kwargs.update(read_kwargs)
+
+        if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
+            return self._read_as_pandas_dataframe(path, kwargs)
+        if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
+            return self._read_as_numpy(path, kwargs)
+        return self._read_as(path, kwargs)
+
+    def _read_as(self, path: str, read_kwargs: Dict):
         custom_class = self.properties[self._EXPOSED_TYPE_PROPERTY]
-        list_of_dicts = self._read_as_pandas_dataframe(read_kwargs).to_dict(orient="records")
+        list_of_dicts = self._read_as_pandas_dataframe(path, read_kwargs).to_dict(orient="records")
         return [custom_class(**dct) for dct in list_of_dicts]
 
-    def _read_as_numpy(self, read_kwargs: Dict) -> np.ndarray:
-        return self._read_as_pandas_dataframe(read_kwargs).to_numpy()
+    def _read_as_numpy(self, path: str, read_kwargs: Dict) -> np.ndarray:
+        return self._read_as_pandas_dataframe(path, read_kwargs).to_numpy()
 
-    def _read_as_pandas_dataframe(self, read_kwargs: Dict) -> pd.DataFrame:
-        return pd.read_parquet(self._path, **read_kwargs)
+    def _read_as_pandas_dataframe(self, path: str, read_kwargs: Dict) -> pd.DataFrame:
+        return pd.read_parquet(path, **read_kwargs)
 
     def _append(self, data: Any):
         self.write_with_kwargs(data, engine="fastparquet", append=True)
@@ -233,23 +258,4 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             **read_kwargs (dict[str, any]): The keyword arguments passed to the function
                 `pandas.read_parquet()`.
         """
-        # return None if data was never written
-        if not self.last_edit_date:
-            self._DataNode__logger.warning(
-                f"Data node {self.id} from config {self.config_id} is being read but has never been written."
-            )
-            return None
-
-        kwargs = self.properties[self.__READ_KWARGS_PROPERTY]
-        kwargs.update(
-            {
-                self.__ENGINE_PROPERTY: self.properties[self.__ENGINE_PROPERTY],
-            }
-        )
-        kwargs.update(read_kwargs)
-
-        if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
-            return self._read_as_pandas_dataframe(kwargs)
-        if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
-            return self._read_as_numpy(kwargs)
-        return self._read_as(kwargs)
+        return self._read_from_path(**read_kwargs)

+ 9 - 3
taipy/core/data/pickle.py

@@ -11,7 +11,7 @@
 
 import pickle
 from datetime import datetime, timedelta
-from typing import List, Optional, Set
+from typing import Any, List, Optional, Set
 
 from taipy.config.common.scope import Scope
 
@@ -22,7 +22,7 @@ from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
 
-class PickleDataNode(DataNode, _FileDataNodeMixin):
+class PickleDataNode(_FileDataNodeMixin, DataNode):
     """Data Node stored as a pickle file.
 
     Attributes:
@@ -116,7 +116,13 @@ class PickleDataNode(DataNode, _FileDataNodeMixin):
         return cls.__STORAGE_TYPE
 
     def _read(self):
-        with open(self._path, "rb") as pf:
+        return self._read_from_path()
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if not path:
+            path = self._path
+
+        with open(path, "rb") as pf:
             return pickle.load(pf)
 
     def _write(self, data):

+ 86 - 0
tests/core/data/test_csv_data_node.py

@@ -15,8 +15,10 @@ import uuid
 from datetime import datetime
 from time import sleep
 
+import numpy as np
 import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 from taipy.config.common.scope import Scope
 from taipy.config.config import Config
@@ -169,3 +171,87 @@ class TestCSVDataNode:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_downloadable_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_downloadable_path_with_not_existing_file(self):
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTING.csv", "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, csv_file, tmpdir_factory):
+        old_csv_path = tmpdir_factory.mktemp("data").join("df.csv").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": old_csv_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        upload_content = pd.read_csv(csv_file)
+        dn._upload(csv_file)
+
+        assert_frame_equal(dn.read(), upload_content)  # The content of the dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_csv_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check_pandas(self, csv_file, tmpdir_factory):
+        old_csv_path = tmpdir_factory.mktemp("data").join("df.csv").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": old_csv_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_column(upload_path, upload_data):
+            return upload_path.endswith(".csv") and upload_data.columns.tolist() == ["a", "b", "c"]
+
+        wrong_format_not_csv_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_csv").strpath
+        wrong_format_csv_path = tmpdir_factory.mktemp("data").join("wrong_format_df.csv").strpath
+        pd.DataFrame([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}]).to_csv(wrong_format_csv_path, index=False)
+
+        # The upload should fail when the file is not a csv
+        assert not dn._upload(wrong_format_not_csv_path, upload_checker=check_data_column)
+
+        # The upload should fail when check_data_column() return False
+        assert not dn._upload(wrong_format_csv_path, upload_checker=check_data_column)
+
+        assert_frame_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_csv_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_column() return True
+        assert dn._upload(csv_file, upload_checker=check_data_column)
+
+    def test_upload_with_upload_check_numpy(self, tmpdir_factory):
+        old_csv_path = tmpdir_factory.mktemp("data").join("df.csv").strpath
+        old_data = np.array([[1, 2, 3], [4, 5, 6]])
+
+        new_csv_path = tmpdir_factory.mktemp("data").join("new_upload_data.csv").strpath
+        new_data = np.array([[1, 2, 3], [4, 5, 6]])
+        pd.DataFrame(new_data).to_csv(new_csv_path, index=False)
+
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": old_csv_path, "exposed_type": "numpy"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_is_positive(upload_path, upload_data):
+            return upload_path.endswith(".csv") and np.all(upload_data > 0)
+
+        wrong_format_not_csv_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_csv").strpath
+        wrong_format_csv_path = tmpdir_factory.mktemp("data").join("wrong_format_df.csv").strpath
+        pd.DataFrame(np.array([[-1, 2, 3], [-4, -5, -6]])).to_csv(wrong_format_csv_path, index=False)
+
+        # The upload should fail when the file is not a csv
+        assert not dn._upload(wrong_format_not_csv_path, upload_checker=check_data_is_positive)
+
+        # The upload should fail when check_data_is_positive() return False
+        assert not dn._upload(wrong_format_csv_path, upload_checker=check_data_is_positive)
+
+        np.array_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_csv_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_is_positive() return True
+        assert dn._upload(new_csv_path, upload_checker=check_data_is_positive)

+ 88 - 0
tests/core/data/test_excel_data_node.py

@@ -19,6 +19,7 @@ from typing import Dict
 import numpy as np
 import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 from taipy.config.common.scope import Scope
 from taipy.config.config import Config
@@ -365,3 +366,90 @@ class TestExcelDataNode:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_download_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_downloadable_path_with_not_existing_file(self):
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTING.xlsx", "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, excel_file, tmpdir_factory):
+        old_xlsx_path = tmpdir_factory.mktemp("data").join("df.xlsx").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": old_xlsx_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        upload_content = pd.read_excel(excel_file)
+        dn._upload(excel_file)
+
+        assert_frame_equal(dn.read()["Sheet1"], upload_content)  # The data of dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_xlsx_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check_pandas(self, excel_file, tmpdir_factory):
+        old_xlsx_path = tmpdir_factory.mktemp("data").join("df.xlsx").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": old_xlsx_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_column(upload_path, upload_data):
+            """Check if the uploaded data has the correct file format and
+            the sheet named "Sheet1" has the correct columns.
+            """
+            return upload_path.endswith(".xlsx") and upload_data["Sheet1"].columns.tolist() == ["a", "b", "c"]
+
+        wrong_format_not_xlsx_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_xlsx").strpath
+        wrong_format_xlsx_path = tmpdir_factory.mktemp("data").join("wrong_format_df.xlsx").strpath
+        pd.DataFrame([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}]).to_excel(wrong_format_xlsx_path, index=False)
+
+        # The upload should fail when the file is not a xlsx
+        assert not dn._upload(wrong_format_not_xlsx_path, upload_checker=check_data_column)
+
+        # The upload should fail when check_data_column() return False
+        assert not dn._upload(wrong_format_xlsx_path, upload_checker=check_data_column)
+
+        assert_frame_equal(dn.read()["Sheet1"], old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_xlsx_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_column() return True
+        assert dn._upload(excel_file, upload_checker=check_data_column)
+
+    def test_upload_with_upload_check_numpy(self, tmpdir_factory):
+        old_excel_path = tmpdir_factory.mktemp("data").join("df.xlsx").strpath
+        old_data = np.array([[1, 2, 3], [4, 5, 6]])
+
+        new_excel_path = tmpdir_factory.mktemp("data").join("new_upload_data.xlsx").strpath
+        new_data = np.array([[1, 2, 3], [4, 5, 6]])
+        pd.DataFrame(new_data).to_excel(new_excel_path, index=False)
+
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": old_excel_path, "exposed_type": "numpy"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_is_positive(upload_path, upload_data):
+            return upload_path.endswith(".xlsx") and np.all(upload_data["Sheet1"] > 0)
+
+        wrong_format_not_excel_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_excel").strpath
+        wrong_format_excel_path = tmpdir_factory.mktemp("data").join("wrong_format_df.xlsx").strpath
+        pd.DataFrame(np.array([[-1, 2, 3], [-4, -5, -6]])).to_excel(wrong_format_excel_path, index=False)
+
+        # The upload should fail when the file is not a excel
+        assert not dn._upload(wrong_format_not_excel_path, upload_checker=check_data_is_positive)
+
+        # The upload should fail when check_data_is_positive() return False
+        assert not dn._upload(wrong_format_excel_path, upload_checker=check_data_is_positive)
+
+        np.array_equal(dn.read()["Sheet1"], old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_excel_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_is_positive() return True
+        assert dn._upload(new_excel_path, upload_checker=check_data_is_positive)

+ 55 - 0
tests/core/data/test_json_data_node.py

@@ -370,3 +370,58 @@ class TestJSONDataNode:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_download_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/json/example_dict.json")
+        dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": path})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_download_path_with_not_existed_file(self):
+        dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTED.json"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, json_file, tmpdir_factory):
+        old_json_path = tmpdir_factory.mktemp("data").join("df.json").strpath
+        old_data = [{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}]
+
+        dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": old_json_path})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        with open(json_file, "r") as f:
+            upload_content = json.load(f)
+        dn._upload(json_file)
+
+        assert dn.read() == upload_content  # The content of the dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_json_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check(self, json_file, tmpdir_factory):
+        old_json_path = tmpdir_factory.mktemp("data").join("df.json").strpath
+        old_data = [{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}]
+
+        dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": old_json_path})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_keys(upload_path, upload_data):
+            all_column_is_abc = all(data.keys() == {"a", "b", "c"} for data in upload_data)
+            return upload_path.endswith(".json") and all_column_is_abc
+
+        wrong_format_not_json_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_json").strpath
+        wrong_format_json_path = tmpdir_factory.mktemp("data").join("wrong_format_df.json").strpath
+        with open(wrong_format_not_json_path, "w") as f:
+            json.dump([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}], f)
+
+        # The upload should fail when the file is not a json
+        assert not dn._upload(wrong_format_not_json_path, upload_checker=check_data_keys)
+
+        # The upload should fail when check_data_keys() return False
+        assert not dn._upload(wrong_format_json_path, upload_checker=check_data_keys)
+
+        assert dn.read() == old_data  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_json_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_keys() return True
+        assert dn._upload(json_file, upload_checker=check_data_keys)

+ 93 - 0
tests/core/data/test_parquet_data_node.py

@@ -16,8 +16,10 @@ from datetime import datetime
 from importlib import util
 from time import sleep
 
+import numpy as np
 import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 from taipy.config.common.scope import Scope
 from taipy.config.config import Config
@@ -221,3 +223,94 @@ class TestParquetDataNode:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_downloadable_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.parquet")
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_downloadable_path_with_not_existing_file(self):
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTING.parquet"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_get_downloadable_path_as_directory_should_return_nothing(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/parquet_example")
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": path})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, parquet_file_path, tmpdir_factory):
+        old_parquet_path = tmpdir_factory.mktemp("data").join("df.parquet").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": old_parquet_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        upload_content = pd.read_parquet(parquet_file_path)
+        dn._upload(parquet_file_path)
+
+        assert_frame_equal(dn.read(), upload_content)  # The content of the dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_parquet_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check_pandas(self, parquet_file_path, tmpdir_factory):
+        old_parquet_path = tmpdir_factory.mktemp("data").join("df.parquet").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": old_parquet_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_column(upload_path, upload_data):
+            return upload_path.endswith(".parquet") and upload_data.columns.tolist() == ["a", "b", "c"]
+
+        wrong_format_not_parquet_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_parquet").strpath
+        wrong_format_parquet_path = tmpdir_factory.mktemp("data").join("wrong_format_df.parquet").strpath
+        pd.DataFrame([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}]).to_parquet(
+            wrong_format_parquet_path, index=False
+        )
+
+        # The upload should fail when the file is not a parquet
+        assert not dn._upload(wrong_format_not_parquet_path, upload_checker=check_data_column)
+
+        # The upload should fail when check_data_column() return False
+        assert not dn._upload(wrong_format_parquet_path, upload_checker=check_data_column)
+
+        assert_frame_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_parquet_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_column() return True
+        assert dn._upload(parquet_file_path, upload_checker=check_data_column)
+
+    def test_upload_with_upload_check_numpy(self, tmpdir_factory):
+        old_parquet_path = tmpdir_factory.mktemp("data").join("df.parquet").strpath
+        old_data = np.array([[1, 2, 3], [4, 5, 6]])
+
+        new_parquet_path = tmpdir_factory.mktemp("data").join("new_upload_data.parquet").strpath
+        new_data = np.array([[1, 2, 3], [4, 5, 6]])
+        pd.DataFrame(new_data).to_parquet(new_parquet_path, index=False)
+
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": old_parquet_path, "exposed_type": "numpy"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_is_positive(upload_path, upload_data):
+            return upload_path.endswith(".parquet") and np.all(upload_data > 0)
+
+        wrong_format_not_parquet_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_parquet").strpath
+        wrong_format_parquet_path = tmpdir_factory.mktemp("data").join("wrong_format_df.parquet").strpath
+        pd.DataFrame(np.array([[-1, 2, 3], [-4, -5, -6]])).to_parquet(wrong_format_parquet_path, index=False)
+
+        # The upload should fail when the file is not a parquet
+        assert not dn._upload(wrong_format_not_parquet_path, upload_checker=check_data_is_positive)
+
+        # The upload should fail when check_data_is_positive() return False
+        assert not dn._upload(wrong_format_parquet_path, upload_checker=check_data_is_positive)
+
+        np.array_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_parquet_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_is_positive() return True
+        assert dn._upload(new_parquet_path, upload_checker=check_data_is_positive)

+ 55 - 0
tests/core/data/test_pickle_data_node.py

@@ -11,11 +11,13 @@
 
 import os
 import pathlib
+import pickle
 from datetime import datetime
 from time import sleep
 
 import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 from taipy.config.common.scope import Scope
 from taipy.config.config import Config
@@ -192,3 +194,56 @@ class TestPickleDataNodeEntity:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_download_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.p")
+        dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": path})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_download_path_with_not_existed_file(self):
+        dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTED.p"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, pickle_file_path, tmpdir_factory):
+        old_pickle_path = tmpdir_factory.mktemp("data").join("df.p").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": old_pickle_path})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        upload_content = pd.read_pickle(pickle_file_path)
+        dn._upload(pickle_file_path)
+
+        assert_frame_equal(dn.read(), upload_content)  # The content of the dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_pickle_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check(self, pickle_file_path, tmpdir_factory):
+        old_pickle_path = tmpdir_factory.mktemp("data").join("df.p").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": old_pickle_path})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_column(upload_path, upload_data):
+            return upload_path.endswith(".p") and upload_data.columns.tolist() == ["a", "b", "c"]
+
+        wrong_format_not_pickle_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_pickle").strpath
+        wrong_format_pickle_path = tmpdir_factory.mktemp("data").join("wrong_format_df.p").strpath
+        with open(str(wrong_format_pickle_path), "wb") as f:
+            pickle.dump(pd.DataFrame([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}]), f)
+
+        # The upload should fail when the file is not a pickle
+        assert not dn._upload(wrong_format_not_pickle_path, upload_checker=check_data_column)
+
+        # The upload should fail when check_data_column() return False
+        assert not dn._upload(wrong_format_pickle_path, upload_checker=check_data_column)
+
+        assert_frame_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_pickle_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_column() return True
+        assert dn._upload(pickle_file_path, upload_checker=check_data_column)