Переглянути джерело

Merge pull request #1474 from Avaiga/feature/#1196-datanode-download-upload-api

Feature/#1196 - Add download and upload api for file-based datanodes
Đỗ Trường Giang 10 місяців тому
батько
коміт
380b29d658

+ 58 - 1
taipy/core/data/_file_datanode_mixin.py

@@ -14,11 +14,13 @@ import pathlib
 import shutil
 from datetime import datetime
 from os.path import isfile
-from typing import Any, Dict, Optional
+from typing import Any, Callable, Dict, Optional
 
 from taipy.config.config import Config
+from taipy.logger._taipy_logger import _TaipyLogger
 
 from .._entity._reload import _self_reload
+from ..reason import InvalidUploadFile, ReasonCollection, UploadFileCanNotBeRead
 from .data_node import DataNode
 from .data_node_id import Edit
 
@@ -34,6 +36,8 @@ class _FileDataNodeMixin(object):
     _DEFAULT_PATH_KEY = "default_path"
     _IS_GENERATED_KEY = "is_generated"
 
+    __logger = _TaipyLogger._get_logger()
+
     def __init__(self, properties: Dict) -> None:
         self._path: str = properties.get(self._PATH_KEY, properties.get(self._DEFAULT_PATH_KEY))
         self._is_generated: bool = properties.get(self._IS_GENERATED_KEY, self._path is None)
@@ -92,3 +96,56 @@ class _FileDataNodeMixin(object):
         if os.path.exists(old_path):
             shutil.move(old_path, new_path)
         return new_path
+
+    def _get_downloadable_path(self) -> str:
+        """Get the downloadable path of the file data of the data node.
+
+        Returns:
+            The downloadable path of the file data of the data node if it exists, otherwise an empty string.
+        """
+        if os.path.exists(self.path) and isfile(self._path):
+            return self.path
+
+        return ""
+
+    def _upload(self, path: str, upload_checker: Optional[Callable[[str, Any], bool]] = None) -> ReasonCollection:
+        """Upload a file data to the data node.
+
+        Parameters:
+            path (str): The path of the file to upload to the data node.
+            upload_checker (Optional[Callable[[str, Any], bool]]): A function to check if the upload is allowed.
+                The function takes the title of the upload data and the data itself as arguments and returns
+                True if the upload is allowed, otherwise False.
+
+        Returns:
+            True if the upload was successful, otherwise False.
+        """
+        from ._data_manager_factory import _DataManagerFactory
+
+        reason_collection = ReasonCollection()
+
+        upload_path = pathlib.Path(path)
+
+        try:
+            upload_data = self._read_from_path(str(upload_path))
+        except Exception as err:
+            self.__logger.error(f"Error while uploading {upload_path.name} to data node {self.id}:")  # type: ignore[attr-defined]
+            self.__logger.error(f"Error: {err}")
+            reason_collection._add_reason(self.id, UploadFileCanNotBeRead(upload_path.name, self.id))  # type: ignore[attr-defined]
+            return reason_collection
+
+        if upload_checker is not None:
+            if not upload_checker(upload_path.name, upload_data):
+                reason_collection._add_reason(self.id, InvalidUploadFile(upload_path.name, self.id))  # type: ignore[attr-defined]
+                return reason_collection
+
+        shutil.copy(upload_path, self.path)
+
+        self.track_edit(timestamp=datetime.now())  # type: ignore[attr-defined]
+        self.unlock_edit()  # type: ignore[attr-defined]
+        _DataManagerFactory._build_manager()._set(self)  # type: ignore[arg-type]
+
+        return reason_collection
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        raise NotImplementedError

+ 6 - 5
taipy/core/data/_tabular_datanode_mixin.py

@@ -29,15 +29,16 @@ class _TabularDataNodeMixin(object):
     _VALID_STRING_EXPOSED_TYPES = [_EXPOSED_TYPE_PANDAS, _EXPOSED_TYPE_NUMPY]
 
     def __init__(self, **kwargs) -> None:
-        self._decoder: Union[Callable[[List[Any]], Any], Callable[[Dict[Any, Any]], Any]]
+        self._decoder: Union[Callable, Any]
         self.custom_document = kwargs.get(self._EXPOSED_TYPE_PROPERTY)
-        if kwargs.get(self._HAS_HEADER_PROPERTY, True):
-            self._decoder = self._default_decoder_with_header
-        else:
-            self._decoder = self._default_decoder_without_header
+
         custom_decoder = getattr(self.custom_document, "decode", None)
         if callable(custom_decoder):
             self._decoder = custom_decoder
+        elif kwargs.get(self._HAS_HEADER_PROPERTY, True):
+            self._decoder = self._default_decoder_with_header
+        else:
+            self._decoder = self._default_decoder_without_header
 
         self._encoder = self._default_encoder
         custom_encoder = getattr(self.custom_document, "encode", None)

+ 25 - 18
taipy/core/data/csv.py

@@ -137,41 +137,48 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         return cls.__STORAGE_TYPE
 
     def _read(self):
+        return self._read_from_path()
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if path is None:
+            path = self._path
+
         properties = self.properties
         if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
-            return self._read_as_pandas_dataframe()
+            return self._read_as_pandas_dataframe(path=path)
         if properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
-            return self._read_as_numpy()
-        return self._read_as()
+            return self._read_as_numpy(path=path)
+        return self._read_as(path=path)
 
-    def _read_as(self):
+    def _read_as(self, path: str):
         properties = self.properties
-        with open(self._path, encoding=properties[self.__ENCODING_KEY]) as csvFile:
+        with open(path, encoding=properties[self.__ENCODING_KEY]) as csvFile:
             if properties[self._HAS_HEADER_PROPERTY]:
-                reader = csv.DictReader(csvFile)
-            else:
-                reader = csv.reader(csvFile)
+                reader_with_header = csv.DictReader(csvFile)
+                return [self._decoder(line) for line in reader_with_header]
 
-            return [self._decoder(line) for line in reader]
+            reader_without_header = csv.reader(csvFile)
+            return [self._decoder(line) for line in reader_without_header]
 
-    def _read_as_numpy(self) -> np.ndarray:
-        return self._read_as_pandas_dataframe().to_numpy()
+    def _read_as_numpy(self, path: str) -> np.ndarray:
+        return self._read_as_pandas_dataframe(path=path).to_numpy()
 
     def _read_as_pandas_dataframe(
-        self, usecols: Optional[List[int]] = None, column_names: Optional[List[str]] = None
+        self,
+        path: str,
+        usecols: Optional[List[int]] = None,
+        column_names: Optional[List[str]] = None,
     ) -> pd.DataFrame:
         try:
             properties = self.properties
             if properties[self._HAS_HEADER_PROPERTY]:
                 if column_names:
-                    return pd.read_csv(self._path, encoding=properties[self.__ENCODING_KEY])[column_names]
-                return pd.read_csv(self._path, encoding=properties[self.__ENCODING_KEY])
+                    return pd.read_csv(path, encoding=properties[self.__ENCODING_KEY])[column_names]
+                return pd.read_csv(path, encoding=properties[self.__ENCODING_KEY])
             else:
                 if usecols:
-                    return pd.read_csv(
-                        self._path, encoding=properties[self.__ENCODING_KEY], header=None, usecols=usecols
-                    )
-                return pd.read_csv(self._path, encoding=properties[self.__ENCODING_KEY], header=None)
+                    return pd.read_csv(path, encoding=properties[self.__ENCODING_KEY], header=None, usecols=usecols)
+                return pd.read_csv(path, encoding=properties[self.__ENCODING_KEY], header=None)
         except pd.errors.EmptyDataError:
             return pd.DataFrame()
 

+ 28 - 18
taipy/core/data/excel.py

@@ -10,7 +10,7 @@
 # specific language governing permissions and limitations under the License.
 
 from datetime import datetime, timedelta
-from typing import Any, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Union
 
 import numpy as np
 import pandas as pd
@@ -150,39 +150,45 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
                 _TabularDataNodeMixin._check_exposed_type(t)
 
     def _read(self):
+        return self._read_from_path()
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if path is None:
+            path = self._path
+
         exposed_type = self.properties[self._EXPOSED_TYPE_PROPERTY]
         if exposed_type == self._EXPOSED_TYPE_PANDAS:
-            return self._read_as_pandas_dataframe()
+            return self._read_as_pandas_dataframe(path=path)
         if exposed_type == self._EXPOSED_TYPE_NUMPY:
-            return self._read_as_numpy()
-        return self._read_as()
+            return self._read_as_numpy(path=path)
+        return self._read_as(path=path)
 
     def _read_sheet_with_exposed_type(
-        self, sheet_exposed_type: str, sheet_name: str
+        self, path: str, sheet_exposed_type: str, sheet_name: str
     ) -> Optional[Union[np.ndarray, pd.DataFrame]]:
         if sheet_exposed_type == self._EXPOSED_TYPE_NUMPY:
-            return self._read_as_pandas_dataframe(sheet_name).to_numpy()  # type: ignore
+            return self._read_as_numpy(path, sheet_name)
         elif sheet_exposed_type == self._EXPOSED_TYPE_PANDAS:
-            return self._read_as_pandas_dataframe(sheet_name)
+            return self._read_as_pandas_dataframe(path, sheet_name)
         return None
 
-    def _read_as(self):
+    def _read_as(self, path: str):
         try:
             properties = self.properties
-            excel_file = load_workbook(self._path)
+            excel_file = load_workbook(path)
             exposed_type = properties[self._EXPOSED_TYPE_PROPERTY]
             work_books = {}
             sheet_names = excel_file.sheetnames
 
             user_provided_sheet_names = properties.get(self.__SHEET_NAME_PROPERTY) or []
-            if not isinstance(user_provided_sheet_names, (List, Set, Tuple)):
+            if not isinstance(user_provided_sheet_names, (list, set, tuple)):
                 user_provided_sheet_names = [user_provided_sheet_names]
 
             provided_sheet_names = user_provided_sheet_names or sheet_names
 
             for sheet_name in provided_sheet_names:
                 if sheet_name not in sheet_names:
-                    raise NonExistingExcelSheet(sheet_name, self._path)
+                    raise NonExistingExcelSheet(sheet_name, path)
 
             if isinstance(exposed_type, List):
                 if len(provided_sheet_names) != len(exposed_type):
@@ -201,7 +207,7 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
                         sheet_exposed_type = exposed_type[i]
 
                     if isinstance(sheet_exposed_type, str):
-                        sheet_data = self._read_sheet_with_exposed_type(sheet_exposed_type, sheet_name)
+                        sheet_data = self._read_sheet_with_exposed_type(path, sheet_exposed_type, sheet_name)
                         if sheet_data is not None:
                             work_books[sheet_name] = sheet_data
                         continue
@@ -223,14 +229,16 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
 
         return work_books
 
-    def _read_as_numpy(self):
-        sheets = self._read_as_pandas_dataframe()
+    def _read_as_numpy(self, path: str, sheet_names=None):
+        sheets = self._read_as_pandas_dataframe(path=path, sheet_names=sheet_names)
         if isinstance(sheets, dict):
             return {sheet_name: df.to_numpy() for sheet_name, df in sheets.items()}
         return sheets.to_numpy()
 
-    def _do_read_excel(self, sheet_names, kwargs) -> Union[Dict[Union[int, str], pd.DataFrame], pd.DataFrame]:
-        return pd.read_excel(self._path, sheet_name=sheet_names, **kwargs)
+    def _do_read_excel(
+        self, path: str, sheet_names, kwargs
+    ) -> Union[Dict[Union[int, str], pd.DataFrame], pd.DataFrame]:
+        return pd.read_excel(path, sheet_name=sheet_names, **kwargs)
 
     def __get_sheet_names_and_header(self, sheet_names):
         kwargs = {}
@@ -241,10 +249,12 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             kwargs["header"] = None
         return sheet_names, kwargs
 
-    def _read_as_pandas_dataframe(self, sheet_names=None) -> Union[Dict[Union[int, str], pd.DataFrame], pd.DataFrame]:
+    def _read_as_pandas_dataframe(
+        self, path: str, sheet_names=None
+    ) -> Union[Dict[Union[int, str], pd.DataFrame], pd.DataFrame]:
         sheet_names, kwargs = self.__get_sheet_names_and_header(sheet_names)
         try:
-            return self._do_read_excel(sheet_names, kwargs)
+            return self._do_read_excel(path, sheet_names, kwargs)
         except pd.errors.EmptyDataError:
             return pd.DataFrame()
 

+ 7 - 1
taipy/core/data/json.py

@@ -150,7 +150,13 @@ class JSONDataNode(DataNode, _FileDataNodeMixin):
         self.properties[self._DECODER_KEY] = decoder
 
     def _read(self):
-        with open(self._path, "r", encoding=self.properties[self.__ENCODING_KEY]) as f:
+        return self._read_from_path()
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if path is None:
+            path = self._path
+
+        with open(path, "r", encoding=self.properties[self.__ENCODING_KEY]) as f:
             return json.load(f, cls=self._decoder)
 
     def _append(self, data: Any):

+ 34 - 33
taipy/core/data/parquet.py

@@ -11,7 +11,7 @@
 
 from datetime import datetime, timedelta
 from os.path import isdir, isfile
-from typing import Any, Dict, List, Optional, Set, Union
+from typing import Any, Dict, List, Optional, Set
 
 import numpy as np
 import pandas as pd
@@ -181,18 +181,43 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         return cls.__STORAGE_TYPE
 
     def _read(self):
-        return self.read_with_kwargs()
+        return self._read_from_path()
 
-    def _read_as(self, read_kwargs: Dict):
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if path is None:
+            path = self._path
+
+        # return None if data was never written
+        if not self.last_edit_date:
+            self._DataNode__logger.warning(
+                f"Data node {self.id} from config {self.config_id} is being read but has never been written."
+            )
+            return None
+
+        kwargs = self.properties[self.__READ_KWARGS_PROPERTY]
+        kwargs.update(
+            {
+                self.__ENGINE_PROPERTY: self.properties[self.__ENGINE_PROPERTY],
+            }
+        )
+        kwargs.update(read_kwargs)
+
+        if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
+            return self._read_as_pandas_dataframe(path, kwargs)
+        if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_NUMPY:
+            return self._read_as_numpy(path, kwargs)
+        return self._read_as(path, kwargs)
+
+    def _read_as(self, path: str, read_kwargs: Dict):
         custom_class = self.properties[self._EXPOSED_TYPE_PROPERTY]
-        list_of_dicts = self._read_as_pandas_dataframe(read_kwargs).to_dict(orient="records")
+        list_of_dicts = self._read_as_pandas_dataframe(path, read_kwargs).to_dict(orient="records")
         return [custom_class(**dct) for dct in list_of_dicts]
 
-    def _read_as_numpy(self, read_kwargs: Dict) -> np.ndarray:
-        return self._read_as_pandas_dataframe(read_kwargs).to_numpy()
+    def _read_as_numpy(self, path: str, read_kwargs: Dict) -> np.ndarray:
+        return self._read_as_pandas_dataframe(path, read_kwargs).to_numpy()
 
-    def _read_as_pandas_dataframe(self, read_kwargs: Dict) -> pd.DataFrame:
-        return pd.read_parquet(self._path, **read_kwargs)
+    def _read_as_pandas_dataframe(self, path: str, read_kwargs: Dict) -> pd.DataFrame:
+        return pd.read_parquet(path, **read_kwargs)
 
     def _append(self, data: Any):
         self.write_with_kwargs(data, engine="fastparquet", append=True)
@@ -237,28 +262,4 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             **read_kwargs (dict[str, any]): The keyword arguments passed to the function
                 `pandas.read_parquet()`.
         """
-        # return None if data was never written
-        if not self.last_edit_date:
-            self._DataNode__logger.warning(
-                f"Data node {self.id} from config {self.config_id} is being read but has never been written."
-            )
-            return None
-
-        properties = self.properties
-        exposed_type = properties[self._EXPOSED_TYPE_PROPERTY]
-        kwargs = properties[self.__READ_KWARGS_PROPERTY]
-        kwargs.update(
-            {
-                self.__ENGINE_PROPERTY: properties[self.__ENGINE_PROPERTY],
-            }
-        )
-        kwargs.update(read_kwargs)
-
-        return self._do_read_with_kwargs(exposed_type, kwargs)
-
-    def _do_read_with_kwargs(self, exposed_type, read_kwargs) -> Union[pd.DataFrame, np.ndarray, List]:
-        if exposed_type == self._EXPOSED_TYPE_PANDAS:
-            return self._read_as_pandas_dataframe(read_kwargs)
-        if exposed_type == self._EXPOSED_TYPE_NUMPY:
-            return self._read_as_numpy(read_kwargs)
-        return self._read_as(read_kwargs)
+        return self._read_from_path(**read_kwargs)

+ 8 - 2
taipy/core/data/pickle.py

@@ -11,7 +11,7 @@
 
 import pickle
 from datetime import datetime, timedelta
-from typing import List, Optional, Set
+from typing import Any, List, Optional, Set
 
 from taipy.config.common.scope import Scope
 
@@ -116,7 +116,13 @@ class PickleDataNode(DataNode, _FileDataNodeMixin):
         return cls.__STORAGE_TYPE
 
     def _read(self):
-        with open(self._path, "rb") as pf:
+        return self._read_from_path()
+
+    def _read_from_path(self, path: Optional[str] = None, **read_kwargs) -> Any:
+        if path is None:
+            path = self._path
+
+        with open(path, "rb") as pf:
             return pickle.load(pf)
 
     def _write(self, data):

+ 2 - 0
taipy/core/reason/__init__.py

@@ -13,8 +13,10 @@ from .reason import (
     DataNodeEditInProgress,
     DataNodeIsNotWritten,
     EntityIsNotSubmittableEntity,
+    InvalidUploadFile,
     NotGlobalScope,
     Reason,
+    UploadFileCanNotBeRead,
     WrongConfigType,
 )
 from .reason_collection import ReasonCollection

+ 32 - 0
taipy/core/reason/reason.py

@@ -123,3 +123,35 @@ class NotGlobalScope(Reason):
 
     def __init__(self, config_id: str):
         Reason.__init__(self, f'Data node config "{config_id}" does not have GLOBAL scope')
+
+
+class UploadFileCanNotBeRead(Reason, _DataNodeReasonMixin):
+    """
+    The uploaded file can not be read, therefore is not a valid data file for the data node.
+
+    Attributes:
+        file_name (str): The name of the file that was uploaded.
+        datanode_id (str): The datanode id that the file is intended to upload to.
+    """
+
+    def __init__(self, file_name: str, datanode_id: str):
+        Reason.__init__(
+            self,
+            f"The uploaded file {file_name} can not be read, "
+            f'therefore is not a valid data file for data node "{datanode_id}"',
+        )
+        _DataNodeReasonMixin.__init__(self, datanode_id)
+
+
+class InvalidUploadFile(Reason, _DataNodeReasonMixin):
+    """
+    The uploaded file has invalid data, therefore is not a valid data file for the data node.
+
+    Attributes:
+        file_name (str): The name of the file that was uploaded.
+        datanode_id (str): The datanode id that the file is intended to upload to.
+    """
+
+    def __init__(self, file_name: str, datanode_id: str):
+        Reason.__init__(self, f'The uploaded file {file_name} has invalid data for data node "{datanode_id}"')
+        _DataNodeReasonMixin.__init__(self, datanode_id)

+ 126 - 1
tests/core/data/test_csv_data_node.py

@@ -13,11 +13,14 @@ import dataclasses
 import os
 import pathlib
 import uuid
-from datetime import datetime
+from datetime import datetime, timedelta
 from time import sleep
 
+import freezegun
+import numpy as np
 import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 from taipy.config.common.scope import Scope
 from taipy.config.config import Config
@@ -190,3 +193,125 @@ class TestCSVDataNode:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_downloadable_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.csv")
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_downloadable_path_with_not_existing_file(self):
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTING.csv", "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, csv_file, tmpdir_factory):
+        old_csv_path = tmpdir_factory.mktemp("data").join("df.csv").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": old_csv_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        upload_content = pd.read_csv(csv_file)
+
+        with freezegun.freeze_time(old_last_edit_date + timedelta(seconds=1)):
+            dn._upload(csv_file)
+
+        assert_frame_equal(dn.read(), upload_content)  # The content of the dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_csv_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check_pandas(self, csv_file, tmpdir_factory):
+        old_csv_path = tmpdir_factory.mktemp("data").join("df.csv").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": old_csv_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_column(upload_path, upload_data):
+            return upload_path.endswith(".csv") and upload_data.columns.tolist() == ["a", "b", "c"]
+
+        not_exists_csv_path = tmpdir_factory.mktemp("data").join("not_exists.csv").strpath
+        reasons = dn._upload(not_exists_csv_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0]) == "The uploaded file not_exists.csv can not be read,"
+            f' therefore is not a valid data file for data node "{dn.id}"'
+        )
+
+        not_csv_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_csv").strpath
+        old_data.to_csv(not_csv_path, index=False)
+        # The upload should fail when the file is not a csv
+        reasons = dn._upload(not_csv_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.not_csv has invalid data for data node "{dn.id}"'
+        )
+
+        wrong_format_csv_path = tmpdir_factory.mktemp("data").join("wrong_format_df.csv").strpath
+        pd.DataFrame([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}]).to_csv(wrong_format_csv_path, index=False)
+        # The upload should fail when check_data_column() return False
+        reasons = dn._upload(wrong_format_csv_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.csv has invalid data for data node "{dn.id}"'
+        )
+
+        assert_frame_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_csv_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_column() return True
+        assert dn._upload(csv_file, upload_checker=check_data_column)
+
+    def test_upload_with_upload_check_numpy(self, tmpdir_factory):
+        old_csv_path = tmpdir_factory.mktemp("data").join("df.csv").strpath
+        old_data = np.array([[1, 2, 3], [4, 5, 6]])
+
+        new_csv_path = tmpdir_factory.mktemp("data").join("new_upload_data.csv").strpath
+        new_data = np.array([[1, 2, 3], [4, 5, 6]])
+        pd.DataFrame(new_data).to_csv(new_csv_path, index=False)
+
+        dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": old_csv_path, "exposed_type": "numpy"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_is_positive(upload_path, upload_data):
+            return upload_path.endswith(".csv") and np.all(upload_data > 0)
+
+        not_exists_csv_path = tmpdir_factory.mktemp("data").join("not_exists.csv").strpath
+        reasons = dn._upload(not_exists_csv_path, upload_checker=check_data_is_positive)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0]) == "The uploaded file not_exists.csv can not be read"
+            f', therefore is not a valid data file for data node "{dn.id}"'
+        )
+
+        not_csv_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_csv").strpath
+        pd.DataFrame(old_data).to_csv(not_csv_path, index=False)
+        # The upload should fail when the file is not a csv
+        reasons = dn._upload(not_csv_path, upload_checker=check_data_is_positive)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.not_csv has invalid data for data node "{dn.id}"'
+        )
+
+        wrong_format_csv_path = tmpdir_factory.mktemp("data").join("wrong_format_df.csv").strpath
+        pd.DataFrame(np.array([[-1, 2, 3], [-4, -5, -6]])).to_csv(wrong_format_csv_path, index=False)
+        # The upload should fail when check_data_is_positive() return False
+        reasons = dn._upload(wrong_format_csv_path, upload_checker=check_data_is_positive)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.csv has invalid data for data node "{dn.id}"'
+        )
+
+        np.array_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_csv_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_is_positive() return True
+        assert dn._upload(new_csv_path, upload_checker=check_data_is_positive)

+ 127 - 1
tests/core/data/test_excel_data_node.py

@@ -12,13 +12,15 @@
 import os
 import pathlib
 import uuid
-from datetime import datetime
+from datetime import datetime, timedelta
 from time import sleep
 from typing import Dict
 
+import freezegun
 import numpy as np
 import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 from taipy.config.common.scope import Scope
 from taipy.config.config import Config
@@ -406,3 +408,127 @@ class TestExcelDataNode:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_download_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_downloadable_path_with_not_existing_file(self):
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTING.xlsx", "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, excel_file, tmpdir_factory):
+        old_xlsx_path = tmpdir_factory.mktemp("data").join("df.xlsx").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": old_xlsx_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        upload_content = pd.read_excel(excel_file)
+
+        with freezegun.freeze_time(old_last_edit_date + timedelta(seconds=1)):
+            dn._upload(excel_file)
+
+        assert_frame_equal(dn.read()["Sheet1"], upload_content)  # The data of dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_xlsx_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check_pandas(self, excel_file, tmpdir_factory):
+        old_xlsx_path = tmpdir_factory.mktemp("data").join("df.xlsx").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": old_xlsx_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_column(upload_path, upload_data):
+            """Check if the uploaded data has the correct file format and
+            the sheet named "Sheet1" has the correct columns.
+            """
+            return upload_path.endswith(".xlsx") and upload_data["Sheet1"].columns.tolist() == ["a", "b", "c"]
+
+        not_exists_xlsx_path = tmpdir_factory.mktemp("data").join("not_exists.xlsx").strpath
+        reasons = dn._upload(not_exists_xlsx_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0]) == "The uploaded file not_exists.xlsx can not be read,"
+            f' therefore is not a valid data file for data node "{dn.id}"'
+        )
+
+        not_xlsx_path = tmpdir_factory.mktemp("data").join("wrong_format_df.xlsm").strpath
+        old_data.to_excel(not_xlsx_path, index=False)
+        # The upload should fail when the file is not a xlsx
+        reasons = dn._upload(not_xlsx_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.xlsm has invalid data for data node "{dn.id}"'
+        )
+
+        wrong_format_xlsx_path = tmpdir_factory.mktemp("data").join("wrong_format_df.xlsx").strpath
+        pd.DataFrame([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}]).to_excel(wrong_format_xlsx_path, index=False)
+        # The upload should fail when check_data_column() return False
+        reasons = dn._upload(wrong_format_xlsx_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.xlsx has invalid data for data node "{dn.id}"'
+        )
+
+        assert_frame_equal(dn.read()["Sheet1"], old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_xlsx_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_column() return True
+        assert dn._upload(excel_file, upload_checker=check_data_column)
+
+    def test_upload_with_upload_check_numpy(self, tmpdir_factory):
+        old_excel_path = tmpdir_factory.mktemp("data").join("df.xlsx").strpath
+        old_data = np.array([[1, 2, 3], [4, 5, 6]])
+
+        new_excel_path = tmpdir_factory.mktemp("data").join("new_upload_data.xlsx").strpath
+        new_data = np.array([[1, 2, 3], [4, 5, 6]])
+        pd.DataFrame(new_data).to_excel(new_excel_path, index=False)
+
+        dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": old_excel_path, "exposed_type": "numpy"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_is_positive(upload_path, upload_data):
+            return upload_path.endswith(".xlsx") and np.all(upload_data["Sheet1"] > 0)
+
+        not_exists_xlsx_path = tmpdir_factory.mktemp("data").join("not_exists.xlsx").strpath
+        reasons = dn._upload(not_exists_xlsx_path, upload_checker=check_data_is_positive)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0]) == "The uploaded file not_exists.xlsx can not be read,"
+            f' therefore is not a valid data file for data node "{dn.id}"'
+        )
+
+        wrong_format_not_excel_path = tmpdir_factory.mktemp("data").join("wrong_format_df.xlsm").strpath
+        pd.DataFrame(old_data).to_excel(wrong_format_not_excel_path, index=False)
+        # The upload should fail when the file is not a excel
+        reasons = dn._upload(wrong_format_not_excel_path, upload_checker=check_data_is_positive)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.xlsm has invalid data for data node "{dn.id}"'
+        )
+
+        not_xlsx_path = tmpdir_factory.mktemp("data").join("wrong_format_df.xlsx").strpath
+        pd.DataFrame(np.array([[-1, 2, 3], [-4, -5, -6]])).to_excel(not_xlsx_path, index=False)
+        # The upload should fail when check_data_is_positive() return False
+        reasons = dn._upload(not_xlsx_path, upload_checker=check_data_is_positive)
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.xlsx has invalid data for data node "{dn.id}"'
+        )
+
+        np.array_equal(dn.read()["Sheet1"], old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_excel_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_is_positive() return True
+        assert dn._upload(new_excel_path, upload_checker=check_data_is_positive)

+ 77 - 0
tests/core/data/test_json_data_node.py

@@ -18,6 +18,7 @@ from dataclasses import dataclass
 from enum import Enum
 from time import sleep
 
+import freezegun
 import numpy as np
 import pandas as pd
 import pytest
@@ -390,3 +391,79 @@ class TestJSONDataNode:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_download_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/json/example_dict.json")
+        dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": path})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_download_path_with_not_existed_file(self):
+        dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTED.json"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, json_file, tmpdir_factory):
+        old_json_path = tmpdir_factory.mktemp("data").join("df.json").strpath
+        old_data = [{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}]
+
+        dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": old_json_path})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        with open(json_file, "r") as f:
+            upload_content = json.load(f)
+
+        with freezegun.freeze_time(old_last_edit_date + datetime.timedelta(seconds=1)):
+            dn._upload(json_file)
+
+        assert dn.read() == upload_content  # The content of the dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_json_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check(self, json_file, tmpdir_factory):
+        old_json_path = tmpdir_factory.mktemp("data").join("df.json").strpath
+        old_data = [{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}]
+
+        dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": old_json_path})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_keys(upload_path, upload_data):
+            all_column_is_abc = all(data.keys() == {"a", "b", "c"} for data in upload_data)
+            return upload_path.endswith(".json") and all_column_is_abc
+
+        not_exists_json_path = tmpdir_factory.mktemp("data").join("not_exists.json").strpath
+        reasons = dn._upload(not_exists_json_path, upload_checker=check_data_keys)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0]) == "The uploaded file not_exists.json can not be read,"
+            f' therefore is not a valid data file for data node "{dn.id}"'
+        )
+
+        not_json_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_json").strpath
+        with open(not_json_path, "w") as f:
+            json.dump([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}], f)
+        # The upload should fail when the file is not a json
+        reasons = dn._upload(not_json_path, upload_checker=check_data_keys)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.not_json has invalid data for data node "{dn.id}"'
+        )
+
+        wrong_format_json_path = tmpdir_factory.mktemp("data").join("wrong_format_df.json").strpath
+        with open(wrong_format_json_path, "w") as f:
+            json.dump([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}], f)
+        # The upload should fail when check_data_keys() return False
+        reasons = dn._upload(wrong_format_json_path, upload_checker=check_data_keys)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.json has invalid data for data node "{dn.id}"'
+        )
+
+        assert dn.read() == old_data  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_json_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_keys() return True
+        assert dn._upload(json_file, upload_checker=check_data_keys)

+ 133 - 1
tests/core/data/test_parquet_data_node.py

@@ -12,12 +12,15 @@
 import os
 import pathlib
 import uuid
-from datetime import datetime
+from datetime import datetime, timedelta
 from importlib import util
 from time import sleep
 
+import freezegun
+import numpy as np
 import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 from taipy.config.common.scope import Scope
 from taipy.config.config import Config
@@ -230,3 +233,132 @@ class TestParquetDataNode:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_downloadable_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.parquet")
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_downloadable_path_with_not_existing_file(self):
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTING.parquet"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_get_downloadable_path_as_directory_should_return_nothing(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/parquet_example")
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": path})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, parquet_file_path, tmpdir_factory):
+        old_parquet_path = tmpdir_factory.mktemp("data").join("df.parquet").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": old_parquet_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        upload_content = pd.read_parquet(parquet_file_path)
+
+        with freezegun.freeze_time(old_last_edit_date + timedelta(seconds=1)):
+            dn._upload(parquet_file_path)
+
+        assert_frame_equal(dn.read(), upload_content)  # The content of the dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_parquet_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check_pandas(self, parquet_file_path, tmpdir_factory):
+        old_parquet_path = tmpdir_factory.mktemp("data").join("df.parquet").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": old_parquet_path, "exposed_type": "pandas"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_column(upload_path, upload_data):
+            return upload_path.endswith(".parquet") and upload_data.columns.tolist() == ["a", "b", "c"]
+
+        not_exists_parquet_path = tmpdir_factory.mktemp("data").join("not_exists.parquet").strpath
+        reasons = dn._upload(not_exists_parquet_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0]) == "The uploaded file not_exists.parquet can not be read,"
+            f' therefore is not a valid data file for data node "{dn.id}"'
+        )
+
+        not_parquet_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_parquet").strpath
+        old_data.to_parquet(not_parquet_path, index=False)
+        # The upload should fail when the file is not a parquet
+        reasons = dn._upload(not_parquet_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.not_parquet has invalid data for data node "{dn.id}"'
+        )
+
+        wrong_format_parquet_path = tmpdir_factory.mktemp("data").join("wrong_format_df.parquet").strpath
+        pd.DataFrame([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}]).to_parquet(
+            wrong_format_parquet_path, index=False
+        )
+        # The upload should fail when check_data_column() return False
+        reasons = dn._upload(wrong_format_parquet_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.parquet has invalid data for data node "{dn.id}"'
+        )
+
+        assert_frame_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_parquet_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_column() return True
+        assert dn._upload(parquet_file_path, upload_checker=check_data_column)
+
+    def test_upload_with_upload_check_numpy(self, tmpdir_factory):
+        old_parquet_path = tmpdir_factory.mktemp("data").join("df.parquet").strpath
+        old_data = np.array([[1, 2, 3], [4, 5, 6]])
+
+        new_parquet_path = tmpdir_factory.mktemp("data").join("new_upload_data.parquet").strpath
+        new_data = np.array([[1, 2, 3], [4, 5, 6]])
+        pd.DataFrame(new_data, columns=["a", "b", "c"]).to_parquet(new_parquet_path, index=False)
+
+        dn = ParquetDataNode("foo", Scope.SCENARIO, properties={"path": old_parquet_path, "exposed_type": "numpy"})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_is_positive(upload_path, upload_data):
+            return upload_path.endswith(".parquet") and np.all(upload_data > 0)
+
+        not_exists_parquet_path = tmpdir_factory.mktemp("data").join("not_exists.parquet").strpath
+        reasons = dn._upload(not_exists_parquet_path, upload_checker=check_data_is_positive)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0]) == "The uploaded file not_exists.parquet can not be read,"
+            f' therefore is not a valid data file for data node "{dn.id}"'
+        )
+
+        not_parquet_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_parquet").strpath
+        pd.DataFrame(old_data, columns=["a", "b", "c"]).to_parquet(not_parquet_path, index=False)
+        # The upload should fail when the file is not a parquet
+        reasons = dn._upload(not_parquet_path, upload_checker=check_data_is_positive)
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.not_parquet has invalid data for data node "{dn.id}"'
+        )
+
+        wrong_format_parquet_path = tmpdir_factory.mktemp("data").join("wrong_format_df.parquet").strpath
+        pd.DataFrame(np.array([[-1, 2, 3], [-4, -5, -6]]), columns=["a", "b", "c"]).to_parquet(
+            wrong_format_parquet_path, index=False
+        )
+        # The upload should fail when check_data_is_positive() return False
+        reasons = dn._upload(wrong_format_parquet_path, upload_checker=check_data_is_positive)
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.parquet has invalid data for data node "{dn.id}"'
+        )
+
+        np.array_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_parquet_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_is_positive() return True
+        assert dn._upload(new_parquet_path, upload_checker=check_data_is_positive)

+ 78 - 1
tests/core/data/test_pickle_data_node.py

@@ -11,11 +11,14 @@
 
 import os
 import pathlib
-from datetime import datetime
+import pickle
+from datetime import datetime, timedelta
 from time import sleep
 
+import freezegun
 import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 from taipy.config.common.scope import Scope
 from taipy.config.config import Config
@@ -201,3 +204,77 @@ class TestPickleDataNodeEntity:
 
         assert ".data" not in dn.path
         assert os.path.exists(dn.path)
+
+    def test_get_download_path(self):
+        path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.p")
+        dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": path})
+        assert dn._get_downloadable_path() == path
+
+    def test_get_download_path_with_not_existed_file(self):
+        dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": "NOT_EXISTED.p"})
+        assert dn._get_downloadable_path() == ""
+
+    def test_upload(self, pickle_file_path, tmpdir_factory):
+        old_pickle_path = tmpdir_factory.mktemp("data").join("df.p").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": old_pickle_path})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        upload_content = pd.read_pickle(pickle_file_path)
+
+        with freezegun.freeze_time(old_last_edit_date + timedelta(seconds=1)):
+            dn._upload(pickle_file_path)
+
+        assert_frame_equal(dn.read(), upload_content)  # The content of the dn should change to the uploaded content
+        assert dn.last_edit_date > old_last_edit_date
+        assert dn.path == old_pickle_path  # The path of the dn should not change
+
+    def test_upload_with_upload_check(self, pickle_file_path, tmpdir_factory):
+        old_pickle_path = tmpdir_factory.mktemp("data").join("df.p").strpath
+        old_data = pd.DataFrame([{"a": 0, "b": 1, "c": 2}, {"a": 3, "b": 4, "c": 5}])
+
+        dn = PickleDataNode("foo", Scope.SCENARIO, properties={"path": old_pickle_path})
+        dn.write(old_data)
+        old_last_edit_date = dn.last_edit_date
+
+        def check_data_column(upload_path, upload_data):
+            return upload_path.endswith(".p") and upload_data.columns.tolist() == ["a", "b", "c"]
+
+        not_exists_json_path = tmpdir_factory.mktemp("data").join("not_exists.json").strpath
+        reasons = dn._upload(not_exists_json_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0]) == "The uploaded file not_exists.json can not be read,"
+            f' therefore is not a valid data file for data node "{dn.id}"'
+        )
+
+        not_pickle_path = tmpdir_factory.mktemp("data").join("wrong_format_df.not_pickle").strpath
+        with open(str(not_pickle_path), "wb") as f:
+            pickle.dump(pd.DataFrame([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}]), f)
+        # The upload should fail when the file is not a pickle
+        reasons = dn._upload(not_pickle_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.not_pickle has invalid data for data node "{dn.id}"'
+        )
+
+        wrong_format_pickle_path = tmpdir_factory.mktemp("data").join("wrong_format_df.p").strpath
+        with open(str(wrong_format_pickle_path), "wb") as f:
+            pickle.dump(pd.DataFrame([{"a": 1, "b": 2, "d": 3}, {"a": 4, "b": 5, "d": 6}]), f)
+        # The upload should fail when check_data_column() return False
+        reasons = dn._upload(wrong_format_pickle_path, upload_checker=check_data_column)
+        assert bool(reasons) is False
+        assert (
+            str(list(reasons._reasons[dn.id])[0])
+            == f'The uploaded file wrong_format_df.p has invalid data for data node "{dn.id}"'
+        )
+
+        assert_frame_equal(dn.read(), old_data)  # The content of the dn should not change when upload fails
+        assert dn.last_edit_date == old_last_edit_date  # The last edit date should not change when upload fails
+        assert dn.path == old_pickle_path  # The path of the dn should not change
+
+        # The upload should succeed when check_data_column() return True
+        assert dn._upload(pickle_file_path, upload_checker=check_data_column)

+ 94 - 3
tests/core/data/test_read_excel_data_node.py

@@ -58,6 +58,7 @@ class MyCustomObject2:
 excel_file_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "data_sample/example.xlsx")
 sheet_names = ["Sheet1", "Sheet2"]
 custom_class_dict = {"Sheet1": MyCustomObject1, "Sheet2": MyCustomObject2}
+custom_pandas_numpy_exposed_type_dict = {"Sheet1": "pandas", "Sheet2": "numpy"}
 
 
 def test_raise_no_data_with_header():
@@ -400,7 +401,7 @@ def test_read_multi_sheet_with_header_single_custom_exposed_type():
             assert row_custom_no_sheet_name.text == row_custom.text
 
 
-def test_read_multi_sheet_with_header_multiple_custom_exposed_type():
+def test_read_multi_sheet_with_header_multiple_custom_object_exposed_type():
     data_pandas = pd.read_excel(excel_file_path, sheet_name=sheet_names)
 
     # With sheet name
@@ -461,6 +462,48 @@ def test_read_multi_sheet_with_header_multiple_custom_exposed_type():
             assert row_custom_no_sheet_name.text == row_custom.text
 
 
+def test_read_multi_sheet_with_header_multiple_custom_pandas_numpy_exposed_type():
+    # With sheet name
+    excel_dn_as_pandas_numpy = ExcelDataNode(
+        "bar",
+        Scope.SCENARIO,
+        properties={
+            "path": excel_file_path,
+            "sheet_name": sheet_names,
+            "exposed_type": custom_pandas_numpy_exposed_type_dict,
+        },
+    )
+    assert excel_dn_as_pandas_numpy.properties["exposed_type"] == custom_pandas_numpy_exposed_type_dict
+    multi_data_custom = excel_dn_as_pandas_numpy.read()
+    assert isinstance(multi_data_custom["Sheet1"], pd.DataFrame)
+    assert isinstance(multi_data_custom["Sheet2"], np.ndarray)
+
+    excel_dn_as_pandas_numpy = ExcelDataNode(
+        "bar",
+        Scope.SCENARIO,
+        properties={
+            "path": excel_file_path,
+            "sheet_name": sheet_names,
+            "exposed_type": ["pandas", "numpy"],
+        },
+    )
+    assert excel_dn_as_pandas_numpy.properties["exposed_type"] == ["pandas", "numpy"]
+    multi_data_custom = excel_dn_as_pandas_numpy.read()
+    assert isinstance(multi_data_custom["Sheet1"], pd.DataFrame)
+    assert isinstance(multi_data_custom["Sheet2"], np.ndarray)
+
+    # Without sheet name
+    excel_dn_as_pandas_numpy_no_sheet_name = ExcelDataNode(
+        "bar",
+        Scope.SCENARIO,
+        properties={"path": excel_file_path, "exposed_type": custom_pandas_numpy_exposed_type_dict},
+    )
+    assert excel_dn_as_pandas_numpy_no_sheet_name.properties["exposed_type"] == custom_pandas_numpy_exposed_type_dict
+    multi_data_custom_no_sheet_name = excel_dn_as_pandas_numpy_no_sheet_name.read()
+    assert isinstance(multi_data_custom_no_sheet_name["Sheet1"], pd.DataFrame)
+    assert isinstance(multi_data_custom_no_sheet_name["Sheet2"], np.ndarray)
+
+
 def test_read_multi_sheet_without_header_pandas():
     # With sheet name
     excel_data_node_as_pandas = ExcelDataNode(
@@ -525,7 +568,7 @@ def test_read_multi_sheet_without_header_numpy():
         assert np.array_equal(data_numpy[key], data_numpy_no_sheet_name[key])
 
 
-def test_read_multi_sheet_without_header_single_custom_exposed_type():
+def test_read_multi_sheet_without_header_single_custom_object_exposed_type():
     data_pandas = pd.read_excel(excel_file_path, header=None, sheet_name=sheet_names)
 
     # With sheet name
@@ -579,7 +622,7 @@ def test_read_multi_sheet_without_header_single_custom_exposed_type():
             assert row_custom_no_sheet_name.text == row_custom.text
 
 
-def test_read_multi_sheet_without_header_multiple_custom_exposed_type():
+def test_read_multi_sheet_without_header_multiple_custom_object_exposed_type():
     data_pandas = pd.read_excel(excel_file_path, header=None, sheet_name=sheet_names)
 
     # With sheet names
@@ -643,3 +686,51 @@ def test_read_multi_sheet_without_header_multiple_custom_exposed_type():
             assert row_custom_no_sheet_name.id == row_custom.id
             assert row_custom_no_sheet_name.integer == row_custom.integer
             assert row_custom_no_sheet_name.text == row_custom.text
+
+
+def test_read_multi_sheet_without_header_multiple_custom_pandas_numpy_exposed_type():
+    # With sheet names
+    excel_dn_as_pandas_numpy = ExcelDataNode(
+        "bar",
+        Scope.SCENARIO,
+        properties={
+            "path": excel_file_path,
+            "sheet_name": sheet_names,
+            "exposed_type": custom_pandas_numpy_exposed_type_dict,
+            "has_header": False,
+        },
+    )
+    assert excel_dn_as_pandas_numpy.properties["exposed_type"] == custom_pandas_numpy_exposed_type_dict
+    multi_data_custom = excel_dn_as_pandas_numpy.read()
+    assert isinstance(multi_data_custom["Sheet1"], pd.DataFrame)
+    assert isinstance(multi_data_custom["Sheet2"], np.ndarray)
+
+    excel_dn_as_pandas_numpy = ExcelDataNode(
+        "bar",
+        Scope.SCENARIO,
+        properties={
+            "path": excel_file_path,
+            "sheet_name": sheet_names,
+            "exposed_type": ["pandas", "numpy"],
+            "has_header": False,
+        },
+    )
+    assert excel_dn_as_pandas_numpy.properties["exposed_type"] == ["pandas", "numpy"]
+    multi_data_custom = excel_dn_as_pandas_numpy.read()
+    assert isinstance(multi_data_custom["Sheet1"], pd.DataFrame)
+    assert isinstance(multi_data_custom["Sheet2"], np.ndarray)
+
+    # Without sheet names
+    excel_dn_as_pandas_numpy_no_sheet_name = ExcelDataNode(
+        "bar",
+        Scope.SCENARIO,
+        properties={
+            "path": excel_file_path,
+            "has_header": False,
+            "exposed_type": custom_pandas_numpy_exposed_type_dict,
+        },
+    )
+    multi_data_custom_no_sheet_name = excel_dn_as_pandas_numpy_no_sheet_name.read()
+    multi_data_custom_no_sheet_name = excel_dn_as_pandas_numpy.read()
+    assert isinstance(multi_data_custom_no_sheet_name["Sheet1"], pd.DataFrame)
+    assert isinstance(multi_data_custom_no_sheet_name["Sheet2"], np.ndarray)