Browse Source

Merge pull request #1078 from Avaiga/refactor/file-based-mixin

Refactor - All file-based datanode should have consistent default data writing behavior
Đỗ Trường Giang 1 year ago
parent
commit
f0c22c29b4

+ 6 - 4
taipy/core/_entity/_properties.py

@@ -11,6 +11,8 @@
 
 from collections import UserDict
 
+from taipy.config.common._template_handler import _TemplateHandler as _tpl
+
 from ..notification import EventOperation, Notifier, _make_event
 
 
@@ -25,9 +27,10 @@ class _Properties(UserDict):
 
     def __setitem__(self, key, value):
         super(_Properties, self).__setitem__(key, value)
-        from ... import core as tp
 
         if hasattr(self, "_entity_owner"):
+            from ... import core as tp
+
             event = _make_event(
                 self._entity_owner,
                 EventOperation.UPDATE,
@@ -44,15 +47,14 @@ class _Properties(UserDict):
                 self._entity_owner._in_context_attributes_changed_collector.append(event)
 
     def __getitem__(self, key):
-        from taipy.config.common._template_handler import _TemplateHandler as _tpl
-
         return _tpl._replace_templates(super(_Properties, self).__getitem__(key))
 
     def __delitem__(self, key):
         super(_Properties, self).__delitem__(key)
-        from ... import core as tp
 
         if hasattr(self, "_entity_owner"):
+            from ... import core as tp
+
             event = _make_event(
                 self._entity_owner,
                 EventOperation.UPDATE,

+ 0 - 35
taipy/core/data/_abstract_file.py

@@ -1,35 +0,0 @@
-# Copyright 2021-2024 Avaiga Private Limited
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-#        http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-import os
-import pathlib
-import shutil
-
-
-class _FileDataNodeMixin(object):
-    """Mixin class designed to handle file-based data nodes
-    (CSVDataNode, ParquetDataNode, ExcelDataNode, PickleDataNode, JSONDataNode, etc.)."""
-
-    __EXTENSION_MAP = {"csv": "csv", "excel": "xlsx", "parquet": "parquet", "pickle": "p", "json": "json"}
-
-    def _build_path(self, storage_type):
-        from taipy.config.config import Config
-
-        folder = f"{storage_type}s"
-        dir_path = pathlib.Path(Config.core.storage_folder) / folder
-        if not dir_path.exists():
-            dir_path.mkdir(parents=True, exist_ok=True)
-        return dir_path / f"{self.id}.{self.__EXTENSION_MAP.get(storage_type)}"
-
-    def _migrate_path(self, storage_type, old_path):
-        new_path = self._build_path(storage_type)
-        if os.path.exists(old_path):
-            shutil.move(old_path, new_path)
-        return new_path

+ 1 - 1
taipy/core/data/_abstract_sql.py

@@ -25,7 +25,7 @@ from taipy.config.common.scope import Scope
 from .._version._version_manager_factory import _VersionManagerFactory
 from ..data.operator import JoinOperator, Operator
 from ..exceptions.exceptions import MissingRequiredProperty, UnknownDatabaseEngine
-from ._abstract_tabular import _TabularDataNodeMixin
+from ._tabular_datanode_mixin import _TabularDataNodeMixin
 from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 

+ 9 - 10
taipy/core/data/_data_manager.py

@@ -26,11 +26,10 @@ from ..exceptions.exceptions import InvalidDataNodeType
 from ..notification import Event, EventEntityType, EventOperation, Notifier, _make_event
 from ..scenario.scenario_id import ScenarioId
 from ..sequence.sequence_id import SequenceId
-from ._abstract_file import _FileDataNodeMixin
 from ._data_fs_repository import _DataFSRepository
+from ._file_datanode_mixin import _FileDataNodeMixin
 from .data_node import DataNode
 from .data_node_id import DataNodeId
-from .pickle import PickleDataNode
 
 
 class _DataManager(_Manager[DataNode], _VersionMixin):
@@ -112,21 +111,21 @@ class _DataManager(_Manager[DataNode], _VersionMixin):
         return cls._repository._load_all(filters)
 
     @classmethod
-    def _clean_pickle_file(cls, data_node: DataNode):
-        if not isinstance(data_node, PickleDataNode):
+    def _clean_generated_file(cls, data_node: DataNode):
+        if not isinstance(data_node, _FileDataNodeMixin):
             return
         if data_node.is_generated and os.path.exists(data_node.path):
             os.remove(data_node.path)
 
     @classmethod
-    def _clean_pickle_files(cls, data_nodes: Iterable[DataNode]):
+    def _clean_generated_files(cls, data_nodes: Iterable[DataNode]):
         for data_node in data_nodes:
-            cls._clean_pickle_file(data_node)
+            cls._clean_generated_file(data_node)
 
     @classmethod
     def _delete(cls, data_node_id: DataNodeId):
         if data_node := cls._get(data_node_id, None):
-            cls._clean_pickle_file(data_node)
+            cls._clean_generated_file(data_node)
         super()._delete(data_node_id)
 
     @classmethod
@@ -135,19 +134,19 @@ class _DataManager(_Manager[DataNode], _VersionMixin):
         for data_node_id in data_node_ids:
             if data_node := cls._get(data_node_id):
                 data_nodes.append(data_node)
-        cls._clean_pickle_files(data_nodes)
+        cls._clean_generated_files(data_nodes)
         super()._delete_many(data_node_ids)
 
     @classmethod
     def _delete_all(cls):
         data_nodes = cls._get_all()
-        cls._clean_pickle_files(data_nodes)
+        cls._clean_generated_files(data_nodes)
         super()._delete_all()
 
     @classmethod
     def _delete_by_version(cls, version_number: str):
         data_nodes = cls._get_all(version_number)
-        cls._clean_pickle_files(data_nodes)
+        cls._clean_generated_files(data_nodes)
         cls._repository._delete_by(attribute="version", value=version_number)
         Notifier.publish(
             Event(EventEntityType.DATA_NODE, EventOperation.DELETION, metadata={"delete_by_version": version_number})

+ 94 - 0
taipy/core/data/_file_datanode_mixin.py

@@ -0,0 +1,94 @@
+# Copyright 2021-2024 Avaiga Private Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import os
+import pathlib
+import shutil
+from datetime import datetime
+from os.path import isfile
+from typing import Any, Dict, Optional
+
+from taipy.config.config import Config
+
+from .._entity._reload import _self_reload
+from .data_node import DataNode
+from .data_node_id import Edit
+
+
+class _FileDataNodeMixin(object):
+    """Mixin class designed to handle file-based data nodes
+    (CSVDataNode, ParquetDataNode, ExcelDataNode, PickleDataNode, JSONDataNode, etc.)."""
+
+    __EXTENSION_MAP = {"csv": "csv", "excel": "xlsx", "parquet": "parquet", "pickle": "p", "json": "json"}
+
+    _DEFAULT_DATA_KEY = "default_data"
+    _PATH_KEY = "path"
+    _DEFAULT_PATH_KEY = "default_path"
+    _IS_GENERATED_KEY = "is_generated"
+
+    def __init__(self, properties: Dict) -> None:
+        self._path: str = properties.get(self._PATH_KEY, properties.get(self._DEFAULT_PATH_KEY))
+        self._is_generated: bool = properties.get(self._IS_GENERATED_KEY, self._path is None)
+        self._last_edit_date: Optional[datetime] = None
+
+        if self._path and ".data" in self._path:
+            self._path = self._migrate_path(self.storage_type(), self._path)  # type: ignore[attr-defined]
+        if not self._path:
+            self._path = self._build_path(self.storage_type())  # type: ignore[attr-defined]
+
+        properties[self._IS_GENERATED_KEY] = self._is_generated
+        properties[self._PATH_KEY] = self._path
+
+    def _write_default_data(self, default_value: Any):
+        if default_value is not None and not os.path.exists(self._path):
+            self._write(default_value)  # type: ignore[attr-defined]
+            self._last_edit_date = DataNode._get_last_modified_datetime(self._path) or datetime.now()
+            self._edits.append(  # type: ignore[attr-defined]
+                Edit(
+                    {
+                        "timestamp": self._last_edit_date,
+                        "writer_identifier": "TAIPY",
+                        "comments": "Default data written.",
+                    }
+                )
+            )
+
+        if not self._last_edit_date and isfile(self._path):
+            self._last_edit_date = datetime.now()
+
+    @property  # type: ignore
+    @_self_reload(DataNode._MANAGER_NAME)
+    def is_generated(self) -> bool:
+        return self._is_generated
+
+    @property  # type: ignore
+    @_self_reload(DataNode._MANAGER_NAME)
+    def path(self) -> Any:
+        return self._path
+
+    @path.setter
+    def path(self, value):
+        self._path = value
+        self.properties[self._PATH_KEY] = value
+        self.properties[self._IS_GENERATED_KEY] = False
+
+    def _build_path(self, storage_type) -> str:
+        folder = f"{storage_type}s"
+        dir_path = pathlib.Path(Config.core.storage_folder) / folder
+        if not dir_path.exists():
+            dir_path.mkdir(parents=True, exist_ok=True)
+        return str(dir_path / f"{self.id}.{self.__EXTENSION_MAP.get(storage_type)}")  # type: ignore[attr-defined]
+
+    def _migrate_path(self, storage_type, old_path) -> str:
+        new_path = self._build_path(storage_type)
+        if os.path.exists(old_path):
+            shutil.move(old_path, new_path)
+        return new_path

+ 0 - 0
taipy/core/data/_abstract_tabular.py → taipy/core/data/_tabular_datanode_mixin.py


+ 16 - 48
taipy/core/data/csv.py

@@ -10,9 +10,7 @@
 # specific language governing permissions and limitations under the License.
 
 import csv
-import os
 from datetime import datetime, timedelta
-from os.path import isfile
 from typing import Any, Dict, List, Optional, Set
 
 import numpy as np
@@ -20,11 +18,10 @@ import pandas as pd
 
 from taipy.config.common.scope import Scope
 
-from .._entity._reload import _self_reload
 from .._version._version_manager_factory import _VersionManagerFactory
 from ..job.job_id import JobId
-from ._abstract_file import _FileDataNodeMixin
-from ._abstract_tabular import _TabularDataNodeMixin
+from ._file_datanode_mixin import _FileDataNodeMixin
+from ._tabular_datanode_mixin import _TabularDataNodeMixin
 from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
@@ -64,10 +61,8 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
     """
 
     __STORAGE_TYPE = "csv"
-    __PATH_KEY = "path"
-    __DEFAULT_PATH_KEY = "default_path"
     __ENCODING_KEY = "encoding"
-    __DEFAULT_DATA_KEY = "default_data"
+
     _REQUIRED_PROPERTIES: List[str] = []
 
     def __init__(
@@ -86,11 +81,11 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         editor_expiration_date: Optional[datetime] = None,
         properties: Optional[Dict] = None,
     ):
+        self.id = id or self._new_id(config_id)
+
         if properties is None:
             properties = {}
 
-        default_value = properties.pop(self.__DEFAULT_DATA_KEY, None)
-
         if self.__ENCODING_KEY not in properties.keys():
             properties[self.__ENCODING_KEY] = "utf-8"
 
@@ -100,11 +95,15 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         properties[self._EXPOSED_TYPE_PROPERTY] = _TabularDataNodeMixin._get_valid_exposed_type(properties)
         self._check_exposed_type(properties[self._EXPOSED_TYPE_PROPERTY])
 
+        default_value = properties.pop(self._DEFAULT_DATA_KEY, None)
+        _FileDataNodeMixin.__init__(self, properties)
+        _TabularDataNodeMixin.__init__(self, **properties)
+
         DataNode.__init__(
             self,
             config_id,
             scope,
-            id,
+            self.id,
             owner_id,
             parent_ids,
             last_edit_date,
@@ -116,39 +115,18 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             editor_expiration_date,
             **properties,
         )
-        _TabularDataNodeMixin.__init__(self, **properties)
 
-        self._path = properties.get(self.__PATH_KEY, properties.get(self.__DEFAULT_PATH_KEY))
-        if self._path and ".data" in self._path:
-            self._path = self._migrate_path(self.storage_type(), self._path)
-
-        if not self._path:
-            self._path = self._build_path(self.storage_type())
-        properties[self.__PATH_KEY] = self._path
-
-        if default_value is not None and not os.path.exists(self._path):
-            self._write(default_value)
-            self._last_edit_date = datetime.now()
-            self._edits.append(
-                Edit(
-                    {
-                        "timestamp": self._last_edit_date,
-                        "writer_identifier": "TAIPY",
-                        "comments": "Default data written.",
-                    }
-                )
-            )
-        if not self._last_edit_date and isfile(self._path):
-            self._last_edit_date = datetime.now()
+        self._write_default_data(default_value)
 
         self._TAIPY_PROPERTIES.update(
             {
+                self._PATH_KEY,
+                self._DEFAULT_PATH_KEY,
+                self._DEFAULT_DATA_KEY,
+                self._IS_GENERATED_KEY,
+                self._HAS_HEADER_PROPERTY,
                 self._EXPOSED_TYPE_PROPERTY,
-                self.__PATH_KEY,
-                self.__DEFAULT_PATH_KEY,
                 self.__ENCODING_KEY,
-                self.__DEFAULT_DATA_KEY,
-                self._HAS_HEADER_PROPERTY,
             }
         )
 
@@ -156,16 +134,6 @@ class CSVDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
     def storage_type(cls) -> str:
         return cls.__STORAGE_TYPE
 
-    @property  # type: ignore
-    @_self_reload(DataNode._MANAGER_NAME)
-    def path(self):
-        return self._path
-
-    @path.setter
-    def path(self, value):
-        self._path = value
-        self.properties[self.__PATH_KEY] = value
-
     def _read(self):
         if self.properties[self._EXPOSED_TYPE_PROPERTY] == self._EXPOSED_TYPE_PANDAS:
             return self._read_as_pandas_dataframe()

+ 15 - 6
taipy/core/data/data_node.py

@@ -83,7 +83,7 @@ class DataNode(_Entity, _Labeled):
     __logger = _TaipyLogger._get_logger()
     _REQUIRED_PROPERTIES: List[str] = []
     _MANAGER_NAME: str = "data"
-    __PATH_KEY = "path"
+    _PATH_KEY = "path"
     __EDIT_TIMEOUT = 30
 
     _TAIPY_PROPERTIES: Set[str] = set()
@@ -105,7 +105,7 @@ class DataNode(_Entity, _Labeled):
         **kwargs,
     ) -> None:
         self._config_id = _validate_id(config_id)
-        self.id = id or DataNodeId(self.__ID_SEPARATOR.join([self._ID_PREFIX, self.config_id, str(uuid.uuid4())]))
+        self.id = id or self._new_id(self._config_id)
         self._owner_id = owner_id
         self._parent_ids = parent_ids or set()
         self._scope = scope
@@ -121,6 +121,13 @@ class DataNode(_Entity, _Labeled):
 
         self._properties = _Properties(self, **kwargs)
 
+    @staticmethod
+    def _new_id(config_id: str) -> DataNodeId:
+        """Generate a unique datanode identifier."""
+        return DataNodeId(
+            DataNode.__ID_SEPARATOR.join([DataNode._ID_PREFIX, _validate_id(config_id), str(uuid.uuid4())])
+        )
+
     @property
     def config_id(self):
         return self._config_id
@@ -158,7 +165,7 @@ class DataNode(_Entity, _Labeled):
     @property  # type: ignore
     @_self_reload(_MANAGER_NAME)
     def last_edit_date(self):
-        last_modified_datetime = self.__get_last_modified_datetime()
+        last_modified_datetime = self._get_last_modified_datetime(self._properties.get(self._PATH_KEY, None))
         if last_modified_datetime and last_modified_datetime > self._last_edit_date:
             return last_modified_datetime
         else:
@@ -290,8 +297,8 @@ class DataNode(_Entity, _Labeled):
             return self._properties[protected_attribute_name]
         raise AttributeError(f"{attribute_name} is not an attribute of data node {self.id}")
 
-    def __get_last_modified_datetime(self) -> Optional[datetime]:
-        path = self._properties.get(self.__PATH_KEY, None)
+    @classmethod
+    def _get_last_modified_datetime(cls, path: Optional[str] = None) -> Optional[datetime]:
         if path and os.path.isfile(path):
             return datetime.fromtimestamp(os.path.getmtime(path))
 
@@ -380,7 +387,9 @@ class DataNode(_Entity, _Labeled):
         """
         edit = {k: v for k, v in options.items() if v is not None}
         if "timestamp" not in edit:
-            edit["timestamp"] = datetime.now()
+            edit["timestamp"] = (
+                self._get_last_modified_datetime(self._properties.get(self._PATH_KEY, None)) or datetime.now()
+            )
         self.last_edit_date = edit.get("timestamp")
         self._edits.append(edit)
 

+ 16 - 49
taipy/core/data/excel.py

@@ -9,9 +9,7 @@
 # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations under the License.
 
-import os
 from datetime import datetime, timedelta
-from os.path import isfile
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 import numpy as np
@@ -20,12 +18,11 @@ from openpyxl import load_workbook
 
 from taipy.config.common.scope import Scope
 
-from .._entity._reload import _self_reload
 from .._version._version_manager_factory import _VersionManagerFactory
 from ..exceptions.exceptions import ExposedTypeLengthMismatch, NonExistingExcelSheet, SheetNameLengthMismatch
 from ..job.job_id import JobId
-from ._abstract_file import _FileDataNodeMixin
-from ._abstract_tabular import _TabularDataNodeMixin
+from ._file_datanode_mixin import _FileDataNodeMixin
+from ._tabular_datanode_mixin import _TabularDataNodeMixin
 from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
@@ -68,10 +65,8 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
     """
 
     __STORAGE_TYPE = "excel"
-    __PATH_KEY = "path"
-    __DEFAULT_DATA_KEY = "default_data"
-    __DEFAULT_PATH_KEY = "default_path"
     __SHEET_NAME_PROPERTY = "sheet_name"
+
     _REQUIRED_PROPERTIES: List[str] = []
 
     def __init__(
@@ -90,13 +85,11 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         editor_expiration_date: Optional[datetime] = None,
         properties: Dict = None,
     ):
+        self.id = id or self._new_id(config_id)
+
         if properties is None:
             properties = {}
 
-        default_value = properties.pop(self.__DEFAULT_DATA_KEY, None)
-        self._path = properties.get(self.__PATH_KEY, properties.get(self.__DEFAULT_PATH_KEY))
-        properties[self.__PATH_KEY] = self._path
-
         if self.__SHEET_NAME_PROPERTY not in properties.keys():
             properties[self.__SHEET_NAME_PROPERTY] = None
         if self._HAS_HEADER_PROPERTY not in properties.keys():
@@ -104,11 +97,15 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         properties[self._EXPOSED_TYPE_PROPERTY] = _TabularDataNodeMixin._get_valid_exposed_type(properties)
         self._check_exposed_type(properties[self._EXPOSED_TYPE_PROPERTY])
 
+        default_value = properties.pop(self._DEFAULT_DATA_KEY, None)
+        _FileDataNodeMixin.__init__(self, properties)
+        _TabularDataNodeMixin.__init__(self, **properties)
+
         DataNode.__init__(
             self,
             config_id,
             scope,
-            id,
+            self.id,
             owner_id,
             parent_ids,
             last_edit_date,
@@ -120,51 +117,21 @@ class ExcelDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             editor_expiration_date,
             **properties,
         )
-        _TabularDataNodeMixin.__init__(self, **properties)
-        if self._path and ".data" in self._path:
-            self._path = self._migrate_path(self.storage_type(), self._path)
-
-        if not self._path:
-            self._path = self._build_path(self.storage_type())
-            properties[self.__PATH_KEY] = self._path
-
-        if default_value is not None and not os.path.exists(self._path):
-            self._write(default_value)
-            self._last_edit_date = datetime.now()
-            self._edits.append(
-                Edit(
-                    {
-                        "timestamp": self._last_edit_date,
-                        "writer_identifier": "TAIPY",
-                        "comments": "Default data written.",
-                    }
-                )
-            )
 
-        if not self._last_edit_date and isfile(self._path):
-            self._last_edit_date = datetime.now()
+        self._write_default_data(default_value)
 
         self._TAIPY_PROPERTIES.update(
             {
-                self._EXPOSED_TYPE_PROPERTY,
-                self.__PATH_KEY,
-                self.__DEFAULT_PATH_KEY,
-                self.__DEFAULT_DATA_KEY,
+                self._PATH_KEY,
+                self._DEFAULT_PATH_KEY,
+                self._DEFAULT_DATA_KEY,
+                self._IS_GENERATED_KEY,
                 self._HAS_HEADER_PROPERTY,
+                self._EXPOSED_TYPE_PROPERTY,
                 self.__SHEET_NAME_PROPERTY,
             }
         )
 
-    @property  # type: ignore
-    @_self_reload(DataNode._MANAGER_NAME)
-    def path(self):
-        return self._path
-
-    @path.setter
-    def path(self, value):
-        self._path = value
-        self.properties[self.__PATH_KEY] = value
-
     @classmethod
     def storage_type(cls) -> str:
         return cls.__STORAGE_TYPE

+ 14 - 45
taipy/core/data/json.py

@@ -11,10 +11,8 @@
 
 import dataclasses
 import json
-import os
 from datetime import date, datetime, timedelta
 from enum import Enum
-from os.path import isfile
 from pydoc import locate
 from typing import Any, Dict, List, Optional, Set
 
@@ -22,7 +20,7 @@ from taipy.config.common.scope import Scope
 
 from .._entity._reload import _self_reload
 from .._version._version_manager_factory import _VersionManagerFactory
-from ._abstract_file import _FileDataNodeMixin
+from ._file_datanode_mixin import _FileDataNodeMixin
 from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
@@ -62,9 +60,6 @@ class JSONDataNode(DataNode, _FileDataNodeMixin):
     """
 
     __STORAGE_TYPE = "json"
-    __DEFAULT_DATA_KEY = "default_data"
-    __DEFAULT_PATH_KEY = "default_path"
-    __PATH_KEY = "path"
     __ENCODING_KEY = "encoding"
     _ENCODER_KEY = "encoder"
     _DECODER_KEY = "decoder"
@@ -86,18 +81,22 @@ class JSONDataNode(DataNode, _FileDataNodeMixin):
         editor_expiration_date: Optional[datetime] = None,
         properties: Optional[Dict] = None,
     ):
+        self.id = id or self._new_id(config_id)
+
         if properties is None:
             properties = {}
 
-        default_value = properties.pop(self.__DEFAULT_DATA_KEY, None)
-
         if self.__ENCODING_KEY not in properties.keys():
             properties[self.__ENCODING_KEY] = "utf-8"
 
-        super().__init__(
+        default_value = properties.pop(self._DEFAULT_DATA_KEY, None)
+        _FileDataNodeMixin.__init__(self, properties)
+
+        DataNode.__init__(
+            self,
             config_id,
             scope,
-            id,
+            self.id,
             owner_id,
             parent_ids,
             last_edit_date,
@@ -109,39 +108,19 @@ class JSONDataNode(DataNode, _FileDataNodeMixin):
             editor_expiration_date,
             **properties,
         )
-        self._path = properties.get(self.__PATH_KEY, properties.get(self.__DEFAULT_PATH_KEY))
-        if self._path and ".data" in self._path:
-            self._path = self._migrate_path(self.storage_type(), self._path)
-
-        if not self._path:
-            self._path = self._build_path(self.storage_type())
-        properties[self.__PATH_KEY] = self._path
 
         self._decoder = self._properties.get(self._DECODER_KEY, _DefaultJSONDecoder)
         self._encoder = self._properties.get(self._ENCODER_KEY, _DefaultJSONEncoder)
 
-        if default_value is not None and not os.path.exists(self._path):
-            self._write(default_value)
-            self._last_edit_date = datetime.now()
-            self._edits.append(
-                Edit(
-                    {
-                        "timestamp": self._last_edit_date,
-                        "writer_identifier": "TAIPY",
-                        "comments": "Default data written.",
-                    }
-                )
-            )
-
-        if not self._last_edit_date and isfile(self._path):  # type: ignore
-            self._last_edit_date = datetime.now()
+        self._write_default_data(default_value)
 
         self._TAIPY_PROPERTIES.update(
             {
-                self.__PATH_KEY,
-                self.__DEFAULT_PATH_KEY,
+                self._PATH_KEY,
+                self._DEFAULT_PATH_KEY,
+                self._DEFAULT_DATA_KEY,
+                self._IS_GENERATED_KEY,
                 self.__ENCODING_KEY,
-                self.__DEFAULT_DATA_KEY,
                 self._ENCODER_KEY,
                 self._DECODER_KEY,
             }
@@ -151,16 +130,6 @@ class JSONDataNode(DataNode, _FileDataNodeMixin):
     def storage_type(cls) -> str:
         return cls.__STORAGE_TYPE
 
-    @property  # type: ignore
-    @_self_reload(DataNode._MANAGER_NAME)
-    def path(self):
-        return self._path
-
-    @path.setter
-    def path(self, value):
-        self._path = value
-        self.properties[self.__PATH_KEY] = value
-
     @property  # type: ignore
     @_self_reload(DataNode._MANAGER_NAME)
     def encoder(self):

+ 14 - 46
taipy/core/data/parquet.py

@@ -9,7 +9,6 @@
 # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations under the License.
 
-import os
 from datetime import datetime, timedelta
 from os.path import isdir, isfile
 from typing import Any, Dict, List, Optional, Set
@@ -19,12 +18,11 @@ import pandas as pd
 
 from taipy.config.common.scope import Scope
 
-from .._entity._reload import _self_reload
 from .._version._version_manager_factory import _VersionManagerFactory
 from ..exceptions.exceptions import UnknownCompressionAlgorithm, UnknownParquetEngine
 from ..job.job_id import JobId
-from ._abstract_file import _FileDataNodeMixin
-from ._abstract_tabular import _TabularDataNodeMixin
+from ._file_datanode_mixin import _FileDataNodeMixin
+from ._tabular_datanode_mixin import _TabularDataNodeMixin
 from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
@@ -75,9 +73,6 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
     """
 
     __STORAGE_TYPE = "parquet"
-    __PATH_KEY = "path"
-    __DEFAULT_DATA_KEY = "default_data"
-    __DEFAULT_PATH_KEY = "default_path"
     __ENGINE_PROPERTY = "engine"
     __VALID_PARQUET_ENGINES = ["pyarrow", "fastparquet"]
     __COMPRESSION_PROPERTY = "compression"
@@ -102,11 +97,11 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         editor_expiration_date: Optional[datetime] = None,
         properties: Optional[Dict] = None,
     ):
+        self.id = id or self._new_id(config_id)
+
         if properties is None:
             properties = {}
 
-        default_value = properties.pop(self.__DEFAULT_DATA_KEY, None)
-
         if self.__ENGINE_PROPERTY not in properties.keys():
             properties[self.__ENGINE_PROPERTY] = "pyarrow"
         if properties[self.__ENGINE_PROPERTY] not in self.__VALID_PARQUET_ENGINES:
@@ -137,11 +132,15 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
         properties[self._EXPOSED_TYPE_PROPERTY] = _TabularDataNodeMixin._get_valid_exposed_type(properties)
         self._check_exposed_type(properties[self._EXPOSED_TYPE_PROPERTY])
 
+        default_value = properties.pop(self._DEFAULT_DATA_KEY, None)
+        _FileDataNodeMixin.__init__(self, properties)
+        _TabularDataNodeMixin.__init__(self, **properties)
+
         DataNode.__init__(
             self,
             config_id,
             scope,
-            id,
+            self.id,
             owner_id,
             parent_ids,
             last_edit_date,
@@ -153,39 +152,18 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
             editor_expiration_date,
             **properties,
         )
-        _TabularDataNodeMixin.__init__(self, **properties)
-
-        self._path = properties.get(self.__PATH_KEY, properties.get(self.__DEFAULT_PATH_KEY))
-
-        if self._path and ".data" in self._path:
-            self._path = self._migrate_path(self.storage_type(), self._path)
-        if not self._path:
-            self._path = self._build_path(self.storage_type())
 
-        properties[self.__PATH_KEY] = self._path
-
-        if default_value is not None and not os.path.exists(self._path):
-            self._write(default_value)
-            self._last_edit_date = datetime.now()
-            self._edits.append(
-                Edit(
-                    {
-                        "timestamp": self._last_edit_date,
-                        "writer_identifier": "TAIPY",
-                        "comments": "Default data written.",
-                    }
-                )
-            )
+        self._write_default_data(default_value)
 
         if not self._last_edit_date and (isfile(self._path) or isdir(self._path)):
             self._last_edit_date = datetime.now()
-
         self._TAIPY_PROPERTIES.update(
             {
                 self._EXPOSED_TYPE_PROPERTY,
-                self.__PATH_KEY,
-                self.__DEFAULT_PATH_KEY,
-                self.__DEFAULT_DATA_KEY,
+                self._PATH_KEY,
+                self._DEFAULT_PATH_KEY,
+                self._DEFAULT_DATA_KEY,
+                self._IS_GENERATED_KEY,
                 self.__ENGINE_PROPERTY,
                 self.__COMPRESSION_PROPERTY,
                 self.__READ_KWARGS_PROPERTY,
@@ -197,16 +175,6 @@ class ParquetDataNode(DataNode, _FileDataNodeMixin, _TabularDataNodeMixin):
     def storage_type(cls) -> str:
         return cls.__STORAGE_TYPE
 
-    @property  # type: ignore
-    @_self_reload(DataNode._MANAGER_NAME)
-    def path(self):
-        return self._path
-
-    @path.setter
-    def path(self, value):
-        self._path = value
-        self.properties[self.__PATH_KEY] = value
-
     def _read(self):
         return self.read_with_kwargs()
 

+ 18 - 56
taipy/core/data/pickle.py

@@ -9,16 +9,14 @@
 # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations under the License.
 
-import os
 import pickle
 from datetime import datetime, timedelta
-from typing import Any, List, Optional, Set
+from typing import List, Optional, Set
 
 from taipy.config.common.scope import Scope
 
-from .._entity._reload import _self_reload
 from .._version._version_manager_factory import _VersionManagerFactory
-from ._abstract_file import _FileDataNodeMixin
+from ._file_datanode_mixin import _FileDataNodeMixin
 from .data_node import DataNode
 from .data_node_id import DataNodeId, Edit
 
@@ -56,10 +54,7 @@ class PickleDataNode(DataNode, _FileDataNodeMixin):
     """
 
     __STORAGE_TYPE = "pickle"
-    __PATH_KEY = "path"
-    __DEFAULT_PATH_KEY = "default_path"
-    __DEFAULT_DATA_KEY = "default_data"
-    __IS_GENERATED_KEY = "is_generated"
+
     _REQUIRED_PROPERTIES: List[str] = []
 
     def __init__(
@@ -78,18 +73,19 @@ class PickleDataNode(DataNode, _FileDataNodeMixin):
         editor_expiration_date: Optional[datetime] = None,
         properties=None,
     ):
+        self.id = id or self._new_id(config_id)
+
         if properties is None:
             properties = {}
-        default_value = properties.pop(self.__DEFAULT_DATA_KEY, None)
-        self._path = properties.get(self.__PATH_KEY, properties.get(self.__DEFAULT_PATH_KEY))
-        if self._path is not None:
-            properties[self.__PATH_KEY] = self._path
-        self._is_generated = properties.get(self.__IS_GENERATED_KEY, self._path is None)
-        properties[self.__IS_GENERATED_KEY] = self._is_generated
-        super().__init__(
+
+        default_value = properties.pop(self._DEFAULT_DATA_KEY, None)
+        _FileDataNodeMixin.__init__(self, properties)
+
+        DataNode.__init__(
+            self,
             config_id,
             scope,
-            id,
+            self.id,
             owner_id,
             parent_ids,
             last_edit_date,
@@ -101,33 +97,15 @@ class PickleDataNode(DataNode, _FileDataNodeMixin):
             editor_expiration_date,
             **properties,
         )
-        if self._path and ".data" in self._path:
-            self._path = self._migrate_path(self.storage_type(), self._path)
-
-        if self._path is None:
-            self._path = self._build_path(self.storage_type())
-
-        if default_value is not None and not os.path.exists(self._path):
-            self._write(default_value)
-            self._last_edit_date = datetime.now()
-            self._edits.append(
-                Edit(
-                    {
-                        "timestamp": self._last_edit_date,
-                        "writer_identifier": "TAIPY",
-                        "comments": "Default data written.",
-                    }
-                )
-            )
-        if not self._last_edit_date and os.path.exists(self._path):
-            self._last_edit_date = datetime.now()
+
+        self._write_default_data(default_value)
 
         self._TAIPY_PROPERTIES.update(
             {
-                self.__PATH_KEY,
-                self.__DEFAULT_PATH_KEY,
-                self.__DEFAULT_DATA_KEY,
-                self.__IS_GENERATED_KEY,
+                self._PATH_KEY,
+                self._DEFAULT_PATH_KEY,
+                self._DEFAULT_DATA_KEY,
+                self._IS_GENERATED_KEY,
             }
         )
 
@@ -135,22 +113,6 @@ class PickleDataNode(DataNode, _FileDataNodeMixin):
     def storage_type(cls) -> str:
         return cls.__STORAGE_TYPE
 
-    @property  # type: ignore
-    @_self_reload(DataNode._MANAGER_NAME)
-    def path(self) -> Any:
-        return self._path
-
-    @path.setter
-    def path(self, value):
-        self._path = value
-        self.properties[self.__PATH_KEY] = value
-        self.properties[self.__IS_GENERATED_KEY] = False
-
-    @property  # type: ignore
-    @_self_reload(DataNode._MANAGER_NAME)
-    def is_generated(self) -> bool:
-        return self._is_generated
-
     def _read(self):
         with open(self._path, "rb") as pf:
             return pickle.load(pf)

+ 1 - 1
taipy/gui_core/_context.py

@@ -52,7 +52,7 @@ from taipy.core import (
 from taipy.core import delete as core_delete
 from taipy.core import get as core_get
 from taipy.core import submit as core_submit
-from taipy.core.data._abstract_tabular import _TabularDataNodeMixin
+from taipy.core.data._tabular_datanode_mixin import _TabularDataNodeMixin
 from taipy.core.notification import CoreEventConsumerBase, EventEntityType
 from taipy.core.notification.event import Event, EventOperation
 from taipy.core.notification.notifier import Notifier

+ 2 - 3
tests/core/_orchestrator/_dispatcher/test_dispatcher__needs_to_run.py

@@ -77,9 +77,8 @@ def test_need_to_run_skippable_task_with_validity_period_on_output():
 
     assert dispatcher._needs_to_run(task)  # output data is not edited
 
-    output_edit_time = datetime.now()  # edit time
-    with freezegun.freeze_time(output_edit_time):
-        task.output["output"].write("Hello world !")  # output data is edited
+    task.output["output"].write("Hello world !")  # output data is edited
+    output_edit_time = task.output["output"].last_edit_date
 
     with freezegun.freeze_time(output_edit_time + timedelta(minutes=30)):  # 30 min after edit time
         assert not dispatcher._needs_to_run(task)  # output data is written and validity period not expired

+ 7 - 4
tests/core/_orchestrator/test_orchestrator__submit.py

@@ -8,6 +8,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations under the License.
+
 from datetime import datetime, timedelta
 from unittest import mock
 
@@ -62,9 +63,9 @@ def test_submit_scenario_development_mode():
 
     # data nodes should have been written (except the input dn_0)
     assert scenario.dn_0.last_edit_date < submit_time
-    assert scenario.dn_1.last_edit_date == submit_time
-    assert scenario.dn_2.last_edit_date == submit_time
-    assert scenario.dn_3.last_edit_date == submit_time
+    assert scenario.dn_1.last_edit_date is not None
+    assert scenario.dn_2.last_edit_date is not None
+    assert scenario.dn_3.last_edit_date is not None
 
     # jobs are created in a specific order and are correct
     assert len(jobs) == 4
@@ -339,7 +340,9 @@ def test_submit_sequence_development_mode():
 
     # data nodes should have been written (except the input dn_0)
     assert sce.dn_0.last_edit_date < submit_time
-    assert sce.dn_1.last_edit_date == submit_time == sce.dn_2.last_edit_date == sce.dn_3.last_edit_date
+    assert sce.dn_1.last_edit_date is not None
+    assert sce.dn_2.last_edit_date is not None
+    assert sce.dn_3.last_edit_date is not None
 
     # jobs are created in a specific order and are correct
     assert len(jobs) == 3

+ 4 - 3
tests/core/_orchestrator/test_orchestrator__submit_task.py

@@ -8,7 +8,8 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations under the License.
-from datetime import datetime
+
+from datetime import datetime, timedelta
 from unittest import mock
 
 import freezegun
@@ -46,7 +47,7 @@ def test_submit_task_development_mode():
     scenario = create_scenario()
     orchestrator = _OrchestratorFactory._build_orchestrator()
 
-    submit_time = datetime.now()
+    submit_time = datetime.now() + timedelta(seconds=1)  # +1 to ensure the edit time of dn_0 is before the submit time
     with freezegun.freeze_time(submit_time):
         submission = orchestrator.submit_task(
             scenario.t1, no_of_retry=10, log=True, log_file="file_path"
@@ -54,7 +55,7 @@ def test_submit_task_development_mode():
         job = submission.jobs[0]
 
     # task output should have been written
-    assert scenario.dn_1.last_edit_date == submit_time
+    assert scenario.dn_1.last_edit_date is not None
 
     # job exists and is correct
     assert job.task == scenario.t1

+ 13 - 9
tests/core/conftest.py

@@ -176,15 +176,6 @@ def default_multi_sheet_data_frame():
     }
 
 
-@pytest.fixture(scope="session", autouse=True)
-def cleanup_files():
-    yield
-
-    for path in [".data", ".my_data", "user_data", ".taipy"]:
-        if os.path.exists(path):
-            shutil.rmtree(path, ignore_errors=True)
-
-
 @pytest.fixture(scope="function")
 def current_datetime():
     return current_time
@@ -314,6 +305,19 @@ def tmp_sqlite(tmpdir_factory):
     return os.path.join(fn.strpath, "test.db")
 
 
+@pytest.fixture(scope="session", autouse=True)
+def cleanup_files():
+    for path in [".data", ".my_data", "user_data", ".taipy"]:
+        if os.path.exists(path):
+            shutil.rmtree(path, ignore_errors=True)
+
+    yield
+
+    for path in [".data", ".my_data", "user_data", ".taipy"]:
+        if os.path.exists(path):
+            shutil.rmtree(path, ignore_errors=True)
+
+
 @pytest.fixture(scope="function", autouse=True)
 def clean_repository(init_config, init_managers, init_orchestrator, init_notifier, clean_argparser):
     clean_argparser()

+ 4 - 2
tests/core/data/test_csv_data_node.py

@@ -11,6 +11,7 @@
 
 import os
 import pathlib
+import uuid
 from datetime import datetime
 from time import sleep
 
@@ -102,7 +103,8 @@ class TestCSVDataNode:
         ],
     )
     def test_create_with_default_data(self, properties, exists):
-        dn = CSVDataNode("foo", Scope.SCENARIO, DataNodeId("dn_id"), properties=properties)
+        dn = CSVDataNode("foo", Scope.SCENARIO, DataNodeId(f"dn_id_{uuid.uuid4()}"), properties=properties)
+        assert dn.path == os.path.join(Config.core.storage_folder.strip("/"), "csvs", dn.id + ".csv")
         assert os.path.exists(dn.path) is exists
 
     def test_set_path(self):
@@ -165,5 +167,5 @@ class TestCSVDataNode:
 
         dn = CSVDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
 
-        assert ".data" not in dn.path.name
+        assert ".data" not in dn.path
         assert os.path.exists(dn.path)

+ 87 - 49
tests/core/data/test_data_manager.py

@@ -8,6 +8,7 @@
 # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations under the License.
+
 import os
 import pathlib
 
@@ -77,11 +78,14 @@ class TestDataManager:
         assert _DataManager._get(csv_dn.id).job_ids == csv_dn.job_ids
         assert not _DataManager._get(csv_dn.id).is_ready_for_reading
         assert _DataManager._get(csv_dn.id).is_ready_for_reading == csv_dn.is_ready_for_reading
-        assert len(_DataManager._get(csv_dn.id).properties) == 4
+        assert (
+            len(_DataManager._get(csv_dn.id).properties) == 5
+        )  # path, encoding, has_header, exposed_type, is_generated
         assert _DataManager._get(csv_dn.id).properties.get("path") == "bar"
         assert _DataManager._get(csv_dn.id).properties.get("encoding") == "utf-8"
         assert _DataManager._get(csv_dn.id).properties.get("has_header") is True
         assert _DataManager._get(csv_dn.id).properties.get("exposed_type") == "pandas"
+        assert _DataManager._get(csv_dn.id).properties.get("is_generated") is False
         assert _DataManager._get(csv_dn.id).properties == csv_dn.properties
         assert _DataManager._get(csv_dn.id).edit_in_progress is False
         assert _DataManager._get(csv_dn.id)._editor_id is None
@@ -103,11 +107,12 @@ class TestDataManager:
         assert _DataManager._get(csv_dn).job_ids == csv_dn.job_ids
         assert not _DataManager._get(csv_dn).is_ready_for_reading
         assert _DataManager._get(csv_dn).is_ready_for_reading == csv_dn.is_ready_for_reading
-        assert len(_DataManager._get(csv_dn).properties) == 4
+        assert len(_DataManager._get(csv_dn).properties) == 5  # path, encoding, has_header, exposed_type, is_generated
         assert _DataManager._get(csv_dn).properties.get("path") == "bar"
         assert _DataManager._get(csv_dn).properties.get("encoding") == "utf-8"
         assert _DataManager._get(csv_dn).properties.get("has_header") is True
         assert _DataManager._get(csv_dn.id).properties.get("exposed_type") == "pandas"
+        assert _DataManager._get(csv_dn.id).properties.get("is_generated") is False
         assert _DataManager._get(csv_dn).properties == csv_dn.properties
         assert _DataManager._get(csv_dn.id).edit_in_progress is False
         assert _DataManager._get(csv_dn.id)._editor_id is None
@@ -118,8 +123,9 @@ class TestDataManager:
         dn = _DataManager._create_and_set(config, None, None)
 
         assert _DataManager._get(dn.id).last_edit_date is None
-        assert len(_DataManager._get(dn.id).properties) == 1
-        assert _DataManager._get(dn.id).properties.get("is_generated")
+        assert len(_DataManager._get(dn.id).properties) == 2  # is_generated and path
+        assert isinstance(_DataManager._get(dn.id).properties.get("path"), str)
+        assert _DataManager._get(dn.id).properties.get("is_generated") is True
         assert not _DataManager._get(dn.id).edit_in_progress
         assert _DataManager._get(dn.id)._editor_id is None
         assert _DataManager._get(dn.id)._editor_expiration_date is None
@@ -127,8 +133,9 @@ class TestDataManager:
         dn.lock_edit("foo")
 
         assert _DataManager._get(dn.id).last_edit_date is None
-        assert len(_DataManager._get(dn.id).properties) == 1
-        assert _DataManager._get(dn.id).properties.get("is_generated")
+        assert len(_DataManager._get(dn.id).properties) == 2  # is_generated and path
+        assert isinstance(_DataManager._get(dn.id).properties.get("path"), str)
+        assert _DataManager._get(dn.id).properties.get("is_generated") is True
         assert _DataManager._get(dn.id).edit_in_progress
         assert _DataManager._get(dn.id).editor_id == "foo"
         assert _DataManager._get(dn.id).editor_expiration_date is not None
@@ -136,8 +143,9 @@ class TestDataManager:
         dn.unlock_edit("foo")
 
         assert _DataManager._get(dn.id).last_edit_date is None
-        assert len(_DataManager._get(dn.id).properties) == 1
-        assert _DataManager._get(dn.id).properties.get("is_generated")
+        assert len(_DataManager._get(dn.id).properties) == 2  # is_generated and path
+        assert isinstance(_DataManager._get(dn.id).properties.get("path"), str)
+        assert _DataManager._get(dn.id).properties.get("is_generated") is True
         assert not _DataManager._get(dn.id).edit_in_progress
         assert _DataManager._get(dn.id).editor_id is None
         assert _DataManager._get(dn.id).editor_expiration_date is None
@@ -226,7 +234,7 @@ class TestDataManager:
         assert _DataManager._get(pickle_dn.id).job_ids == pickle_dn.job_ids
         assert not _DataManager._get(pickle_dn.id).is_ready_for_reading
         assert _DataManager._get(pickle_dn.id).is_ready_for_reading == pickle_dn.is_ready_for_reading
-        assert len(_DataManager._get(pickle_dn.id).properties) == 1
+        assert len(_DataManager._get(pickle_dn.id).properties) == 2  # is_generated and path
         assert _DataManager._get(pickle_dn.id).properties == pickle_dn.properties
 
         assert _DataManager._get(pickle_dn) is not None
@@ -245,7 +253,7 @@ class TestDataManager:
         assert _DataManager._get(pickle_dn).job_ids == pickle_dn.job_ids
         assert not _DataManager._get(pickle_dn).is_ready_for_reading
         assert _DataManager._get(pickle_dn).is_ready_for_reading == pickle_dn.is_ready_for_reading
-        assert len(_DataManager._get(pickle_dn).properties) == 1
+        assert len(_DataManager._get(pickle_dn).properties) == 2  # is_generated and path
         assert _DataManager._get(pickle_dn).properties == pickle_dn.properties
 
     def test_create_raises_exception_with_wrong_type(self):
@@ -459,59 +467,89 @@ class TestDataManager:
 
         dm._delete_all()
 
-    def test_clean_generated_pickle_files(self, pickle_file_path):
-        user_pickle_dn_config = Config.configure_data_node(
-            id="d1", storage_type="pickle", path=pickle_file_path, default_data="d"
+    @pytest.mark.parametrize(
+        "storage_type,path",
+        [
+            ("pickle", "pickle_file_path"),
+            ("csv", "csv_file"),
+            ("excel", "excel_file"),
+            ("json", "json_file"),
+            ("parquet", "parquet_file_path"),
+        ],
+    )
+    def test_clean_generated_files(self, storage_type, path, request):
+        path = request.getfixturevalue(path)
+        user_dn_config = Config.configure_data_node(
+            id="d1", storage_type=storage_type, path=path, default_data={"a": [1], "b": [2]}
         )
-        generated_pickle_dn_1_config = Config.configure_data_node(id="d2", storage_type="pickle", default_data="d")
-        generated_pickle_dn_2_config = Config.configure_data_node(id="d3", storage_type="pickle", default_data="d")
-
-        dns = _DataManager._bulk_get_or_create(
-            [user_pickle_dn_config, generated_pickle_dn_1_config, generated_pickle_dn_2_config]
+        generated_dn_1_config = Config.configure_data_node(
+            id="d2", storage_type=storage_type, default_data={"a": [1], "b": [2]}
+        )
+        generated_dn_2_config = Config.configure_data_node(
+            id="d3", storage_type=storage_type, default_data={"a": [1], "b": [2]}
         )
 
-        user_pickle_dn = dns[user_pickle_dn_config]
-        generated_pickle_dn_1 = dns[generated_pickle_dn_1_config]
-        generated_pickle_dn_2 = dns[generated_pickle_dn_2_config]
-
-        _DataManager._clean_pickle_file(user_pickle_dn.id)
-        assert file_exists(user_pickle_dn.path)
-
-        _DataManager._clean_pickle_files([generated_pickle_dn_1, generated_pickle_dn_2])
-        assert not file_exists(generated_pickle_dn_1.path)
-        assert not file_exists(generated_pickle_dn_2.path)
-
-    def test_delete_does_clean_generated_pickle_files(self, pickle_file_path):
-        user_pickle_dn_config = Config.configure_data_node(
-            id="d1", storage_type="pickle", path=pickle_file_path, default_data="d"
+        dns = _DataManager._bulk_get_or_create([user_dn_config, generated_dn_1_config, generated_dn_2_config])
+
+        user_dn = dns[user_dn_config]
+        generated_dn_1 = dns[generated_dn_1_config]
+        generated_dn_2 = dns[generated_dn_2_config]
+
+        _DataManager._clean_generated_file(user_dn.id)
+        assert file_exists(user_dn.path)
+
+        _DataManager._clean_generated_files([generated_dn_1, generated_dn_2])
+        assert not file_exists(generated_dn_1.path)
+        assert not file_exists(generated_dn_2.path)
+
+    @pytest.mark.parametrize(
+        "storage_type,path",
+        [
+            ("pickle", "pickle_file_path"),
+            ("csv", "csv_file"),
+            ("excel", "excel_file"),
+            ("json", "json_file"),
+            ("parquet", "parquet_file_path"),
+        ],
+    )
+    def test_delete_does_clean_generated_pickle_files(self, storage_type, path, request):
+        path = request.getfixturevalue(path)
+        user_dn_config = Config.configure_data_node(
+            id="d1", storage_type=storage_type, path=path, default_data={"a": [1], "b": [2]}
+        )
+        generated_dn_config_1 = Config.configure_data_node(
+            id="d2", storage_type=storage_type, default_data={"a": [1], "b": [2]}
+        )
+        generated_dn_config_2 = Config.configure_data_node(
+            id="d3", storage_type=storage_type, default_data={"a": [1], "b": [2]}
+        )
+        generated_dn_config_3 = Config.configure_data_node(
+            id="d4", storage_type=storage_type, default_data={"a": [1], "b": [2]}
         )
-        generated_pickle_dn_config_1 = Config.configure_data_node(id="d2", storage_type="pickle", default_data="d")
-        generated_pickle_dn_config_2 = Config.configure_data_node(id="d3", storage_type="pickle", default_data="d")
-        generated_pickle_dn_config_3 = Config.configure_data_node(id="d4", storage_type="pickle", default_data="d")
 
         dns = _DataManager._bulk_get_or_create(
             [
-                user_pickle_dn_config,
-                generated_pickle_dn_config_1,
-                generated_pickle_dn_config_2,
-                generated_pickle_dn_config_3,
+                user_dn_config,
+                generated_dn_config_1,
+                generated_dn_config_2,
+                generated_dn_config_3,
             ]
         )
 
-        user_pickle_dn = dns[user_pickle_dn_config]
-        generated_pickle_dn_1 = dns[generated_pickle_dn_config_1]
-        generated_pickle_dn_2 = dns[generated_pickle_dn_config_2]
-        generated_pickle_dn_3 = dns[generated_pickle_dn_config_3]
+        user_dn = dns[user_dn_config]
+        generated_dn_1 = dns[generated_dn_config_1]
+        generated_dn_2 = dns[generated_dn_config_2]
+        generated_dn_3 = dns[generated_dn_config_3]
 
-        _DataManager._delete(user_pickle_dn.id)
-        assert file_exists(user_pickle_dn.path)
+        _DataManager._delete(user_dn.id)
+        assert file_exists(user_dn.path)
 
-        _DataManager._delete_many([generated_pickle_dn_1.id, generated_pickle_dn_2.id])
-        assert not file_exists(generated_pickle_dn_1.path)
-        assert not file_exists(generated_pickle_dn_2.path)
+        _DataManager._delete_many([generated_dn_1.id, generated_dn_2.id])
+        assert not file_exists(generated_dn_1.path)
+        assert not file_exists(generated_dn_2.path)
 
         _DataManager._delete_all()
-        assert not file_exists(generated_pickle_dn_3.path)
+        assert not file_exists(generated_dn_3.path)
 
     def test_create_dn_from_loaded_config_no_scope(self):
         file_config = NamedTemporaryFile(

+ 5 - 3
tests/core/data/test_excel_data_node.py

@@ -11,6 +11,7 @@
 
 import os
 import pathlib
+import uuid
 from datetime import datetime
 from time import sleep
 from typing import Dict
@@ -132,12 +133,13 @@ class TestExcelDataNode:
     @pytest.mark.parametrize(
         ["properties", "exists"],
         [
-            ({}, False),
             ({"default_data": {"a": ["foo", "bar"]}}, True),
+            ({}, False),
         ],
     )
     def test_create_with_default_data(self, properties, exists):
-        dn = ExcelDataNode("foo", Scope.SCENARIO, DataNodeId("dn_id"), properties=properties)
+        dn = ExcelDataNode("foo", Scope.SCENARIO, DataNodeId(f"dn_id_{uuid.uuid4()}"), properties=properties)
+        assert dn.path == os.path.join(Config.core.storage_folder.strip("/"), "excels", dn.id + ".xlsx")
         assert os.path.exists(dn.path) is exists
 
     def test_read_write_after_modify_path(self):
@@ -361,5 +363,5 @@ class TestExcelDataNode:
 
         dn = ExcelDataNode("foo", Scope.SCENARIO, properties={"path": path, "exposed_type": "pandas"})
 
-        assert ".data" not in dn.path.name
+        assert ".data" not in dn.path
         assert os.path.exists(dn.path)

+ 5 - 3
tests/core/data/test_json_data_node.py

@@ -13,6 +13,7 @@ import datetime
 import json
 import os
 import pathlib
+import uuid
 from dataclasses import dataclass
 from enum import Enum
 from time import sleep
@@ -308,12 +309,13 @@ class TestJSONDataNode:
     @pytest.mark.parametrize(
         ["properties", "exists"],
         [
-            ({}, False),
             ({"default_data": {"foo": "bar"}}, True),
+            ({}, False),
         ],
     )
     def test_create_with_default_data(self, properties, exists):
-        dn = JSONDataNode("foo", Scope.SCENARIO, DataNodeId("dn_id"), properties=properties)
+        dn = JSONDataNode("foo", Scope.SCENARIO, DataNodeId(f"dn_id_{uuid.uuid4()}"), properties=properties)
+        assert dn.path == os.path.join(Config.core.storage_folder.strip("/"), "jsons", dn.id + ".json")
         assert os.path.exists(dn.path) is exists
 
     def test_set_path(self):
@@ -366,5 +368,5 @@ class TestJSONDataNode:
 
         dn = JSONDataNode("foo", Scope.SCENARIO, properties={"path": path})
 
-        assert ".data" not in dn.path.name
+        assert ".data" not in dn.path
         assert os.path.exists(dn.path)

+ 4 - 2
tests/core/data/test_parquet_data_node.py

@@ -11,6 +11,7 @@
 
 import os
 import pathlib
+import uuid
 from datetime import datetime
 from importlib import util
 from time import sleep
@@ -127,7 +128,8 @@ class TestParquetDataNode:
         ],
     )
     def test_create_with_default_data(self, properties, exists):
-        dn = ParquetDataNode("foo", Scope.SCENARIO, DataNodeId("dn_id"), properties=properties)
+        dn = ParquetDataNode("foo", Scope.SCENARIO, DataNodeId(f"dn_id_{uuid.uuid4()}"), properties=properties)
+        assert dn.path == os.path.join(Config.core.storage_folder.strip("/"), "parquets", dn.id + ".parquet")
         assert os.path.exists(dn.path) is exists
 
     @pytest.mark.parametrize("engine", __engine)
@@ -217,5 +219,5 @@ class TestParquetDataNode:
 
         dn = ParquetDataNode("foo_bar", Scope.SCENARIO, properties={"path": path, "name": "super name"})
 
-        assert ".data" not in dn.path.name
+        assert ".data" not in dn.path
         assert os.path.exists(dn.path)

+ 2 - 2
tests/core/data/test_pickle_data_node.py

@@ -44,7 +44,7 @@ class TestPickleDataNodeEntity:
 
     def test_create(self):
         dn = PickleDataNode("foobar_bazxyxea", Scope.SCENARIO, properties={"default_data": "Data"})
-        assert os.path.isfile(Config.core.storage_folder + "pickles/" + dn.id + ".p")
+        assert os.path.isfile(os.path.join(Config.core.storage_folder.strip("/"), "pickles", dn.id + ".p"))
         assert isinstance(dn, PickleDataNode)
         assert dn.storage_type() == "pickle"
         assert dn.config_id == "foobar_bazxyxea"
@@ -190,5 +190,5 @@ class TestPickleDataNodeEntity:
 
         dn = PickleDataNode("foo", Scope.SCENARIO, properties={"default_data": "bar", "path": path})
 
-        assert ".data" not in dn.path.name
+        assert ".data" not in dn.path
         assert os.path.exists(dn.path)