瀏覽代碼

Merge pull request #1093 from Avaiga/feature/add-include_data-option-to-export-api

Feature - Expanding the `export_scenario()` api
Đỗ Trường Giang 1 年之前
父節點
當前提交
d8bccf53fd

+ 1 - 4
taipy/core/_manager/_manager.py

@@ -154,10 +154,7 @@ class _Manager(Generic[EntityType]):
         _SubmissionManagerFactory._build_manager()._delete_many(_entity_ids.submission_ids)
         _SubmissionManagerFactory._build_manager()._delete_many(_entity_ids.submission_ids)
 
 
     @classmethod
     @classmethod
-    def _export(cls, id: str, folder_path: Union[str, pathlib.Path]):
-        """
-        Export an entity.
-        """
+    def _export(cls, id: str, folder_path: Union[str, pathlib.Path], **kwargs):
         return cls._repository._export(id, folder_path)
         return cls._repository._export(id, folder_path)
 
 
     @classmethod
     @classmethod

+ 1 - 7
taipy/core/_repository/_filesystem_repository.py

@@ -19,7 +19,7 @@ from taipy.config.config import Config
 
 
 from ..common._utils import _retry_repository_operation
 from ..common._utils import _retry_repository_operation
 from ..common.typing import Converter, Entity, Json, ModelType
 from ..common.typing import Converter, Entity, Json, ModelType
-from ..exceptions import FileCannotBeRead, InvalidExportPath, ModelNotFound
+from ..exceptions import FileCannotBeRead, ModelNotFound
 from ._abstract_repository import _AbstractRepository
 from ._abstract_repository import _AbstractRepository
 from ._decoder import _Decoder
 from ._decoder import _Decoder
 from ._encoder import _Encoder
 from ._encoder import _Encoder
@@ -123,17 +123,11 @@ class _FileSystemRepository(_AbstractRepository[ModelType, Entity]):
         else:
         else:
             folder = folder_path
             folder = folder_path
 
 
-        if folder.resolve() == self._storage_folder.resolve():
-            raise InvalidExportPath("The export folder must not be the storage folder.")
-
         export_dir = folder / self._dir_name
         export_dir = folder / self._dir_name
         if not export_dir.exists():
         if not export_dir.exists():
             export_dir.mkdir(parents=True)
             export_dir.mkdir(parents=True)
 
 
         export_path = export_dir / f"{entity_id}.json"
         export_path = export_dir / f"{entity_id}.json"
-        # Delete if exists.
-        if export_path.exists():
-            export_path.unlink()
 
 
         shutil.copy2(self.__get_path(entity_id), export_path)
         shutil.copy2(self.__get_path(entity_id), export_path)
 
 

+ 28 - 0
taipy/core/data/_data_manager.py

@@ -10,6 +10,8 @@
 # specific language governing permissions and limitations under the License.
 # specific language governing permissions and limitations under the License.
 
 
 import os
 import os
+import pathlib
+import shutil
 from typing import Dict, Iterable, List, Optional, Set, Union
 from typing import Dict, Iterable, List, Optional, Set, Union
 
 
 from taipy.config._config import _Config
 from taipy.config._config import _Config
@@ -24,6 +26,7 @@ from ..exceptions.exceptions import InvalidDataNodeType
 from ..notification import Event, EventEntityType, EventOperation, Notifier, _make_event
 from ..notification import Event, EventEntityType, EventOperation, Notifier, _make_event
 from ..scenario.scenario_id import ScenarioId
 from ..scenario.scenario_id import ScenarioId
 from ..sequence.sequence_id import SequenceId
 from ..sequence.sequence_id import SequenceId
+from ._abstract_file import _FileDataNodeMixin
 from ._data_fs_repository import _DataFSRepository
 from ._data_fs_repository import _DataFSRepository
 from .data_node import DataNode
 from .data_node import DataNode
 from .data_node_id import DataNodeId
 from .data_node_id import DataNodeId
@@ -161,3 +164,28 @@ class _DataManager(_Manager[DataNode], _VersionMixin):
         for fil in filters:
         for fil in filters:
             fil.update({"config_id": config_id})
             fil.update({"config_id": config_id})
         return cls._repository._load_all(filters)
         return cls._repository._load_all(filters)
+
+    @classmethod
+    def _export(cls, id: str, folder_path: Union[str, pathlib.Path], **kwargs):
+        cls._repository._export(id, folder_path)
+
+        if not kwargs.get("include_data"):
+            return
+
+        data_node = cls._get(id)
+        if not isinstance(data_node, _FileDataNodeMixin):
+            cls._logger.warning(f"Data node {id} is not a file-based data node and the data will not be exported.")
+            return
+
+        if isinstance(folder_path, str):
+            folder: pathlib.Path = pathlib.Path(folder_path)
+        else:
+            folder = folder_path
+
+        data_export_dir = folder / Config.core.storage_folder
+        if not data_export_dir.exists():
+            data_export_dir.mkdir(parents=True)
+
+        data_export_path = data_export_dir / os.path.basename(data_node.path)
+        if os.path.exists(data_node.path):
+            shutil.copy(data_node.path, data_export_path)

+ 10 - 0
taipy/core/exceptions/exceptions.py

@@ -373,6 +373,16 @@ class FileCannotBeRead(Exception):
     """Raised when a file cannot be read."""
     """Raised when a file cannot be read."""
 
 
 
 
+class ExportFolderAlreadyExists(Exception):
+    """Raised when the export folder already exists."""
+
+    def __init__(self, folder_path: str, scenario_id: str):
+        self.message = (
+            f"Folder '{folder_path}' already exists and can not be used to export scenario '{scenario_id}'."
+            " Please use the 'override' parameter to override it."
+        )
+
+
 class SQLQueryCannotBeExecuted(Exception):
 class SQLQueryCannotBeExecuted(Exception):
     """Raised when an SQL Query cannot be executed."""
     """Raised when an SQL Query cannot be executed."""
 
 

+ 1 - 1
taipy/core/scenario/_scenario_manager.py

@@ -416,7 +416,7 @@ class _ScenarioManager(_Manager[Scenario], _VersionMixin):
         submissions = _SubmissionManagerFactory._build_manager()._get_all()
         submissions = _SubmissionManagerFactory._build_manager()._get_all()
         submitted_entity_ids = list(entity_ids.scenario_ids.union(entity_ids.sequence_ids, entity_ids.task_ids))
         submitted_entity_ids = list(entity_ids.scenario_ids.union(entity_ids.sequence_ids, entity_ids.task_ids))
         for submission in submissions:
         for submission in submissions:
-            if submission.entity_id in submitted_entity_ids:
+            if submission.entity_id in submitted_entity_ids or submission.entity_id == scenario.id:
                 entity_ids.submission_ids.add(submission.id)
                 entity_ids.submission_ids.add(submission.id)
 
 
         return entity_ids
         return entity_ids

+ 12 - 1
taipy/core/scenario/scenario.py

@@ -585,15 +585,26 @@ class Scenario(_Entity, Submittable, _Labeled):
     def export(
     def export(
         self,
         self,
         folder_path: Union[str, pathlib.Path],
         folder_path: Union[str, pathlib.Path],
+        override: bool = False,
+        include_data: bool = False,
     ):
     ):
         """Export all related entities of this scenario to a folder.
         """Export all related entities of this scenario to a folder.
 
 
         Parameters:
         Parameters:
             folder_path (Union[str, pathlib.Path]): The folder path to export the scenario to.
             folder_path (Union[str, pathlib.Path]): The folder path to export the scenario to.
+                If the path exists and the override parameter is False, an exception is raised.
+            override (bool): If True, the existing folder will be overridden. Default is False.
+            include_data (bool): If True, the file-based data nodes are exported as well.
+                This includes Pickle, CSV, Excel, Parquet, and JSON data nodes.
+                If the scenario has a data node that is not file-based, a warning will be logged, and the data node
+                will not be exported. The default value is False.
+
+        Raises:
+            ExportFolderAlreadyExist^: If the `folder_path` already exists and the override parameter is False.
         """
         """
         from ... import core as tp
         from ... import core as tp
 
 
-        return tp.export_scenario(self.id, folder_path)
+        return tp.export_scenario(self.id, folder_path, override, include_data)
 
 
     def set_primary(self):
     def set_primary(self):
         """Promote the scenario as the primary scenario of its cycle.
         """Promote the scenario as the primary scenario of its cycle.

+ 1 - 1
taipy/core/sequence/_sequence_manager.py

@@ -344,7 +344,7 @@ class _SequenceManager(_Manager[Sequence], _VersionMixin):
         return True if cls._get(entity_id) else False
         return True if cls._get(entity_id) else False
 
 
     @classmethod
     @classmethod
-    def _export(cls, id: str, folder_path: Union[str, pathlib.Path]):
+    def _export(cls, id: str, folder_path: Union[str, pathlib.Path], **kwargs):
         """
         """
         Export a Sequence entity.
         Export a Sequence entity.
         """
         """

+ 30 - 5
taipy/core/taipy.py

@@ -9,12 +9,13 @@
 # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations under the License.
 # specific language governing permissions and limitations under the License.
 
 
+import os
 import pathlib
 import pathlib
 import shutil
 import shutil
 from datetime import datetime
 from datetime import datetime
 from typing import Any, Callable, Dict, List, Optional, Set, Union, overload
 from typing import Any, Callable, Dict, List, Optional, Set, Union, overload
 
 
-from taipy.config.common.scope import Scope
+from taipy.config import Config, Scope
 from taipy.logger._taipy_logger import _TaipyLogger
 from taipy.logger._taipy_logger import _TaipyLogger
 
 
 from ._core import Core
 from ._core import Core
@@ -40,6 +41,8 @@ from .data.data_node import DataNode
 from .data.data_node_id import DataNodeId
 from .data.data_node_id import DataNodeId
 from .exceptions.exceptions import (
 from .exceptions.exceptions import (
     DataNodeConfigIsNotGlobal,
     DataNodeConfigIsNotGlobal,
+    ExportFolderAlreadyExists,
+    InvalidExportPath,
     ModelNotFound,
     ModelNotFound,
     NonExistingVersion,
     NonExistingVersion,
     VersionIsNotProductionVersion,
     VersionIsNotProductionVersion,
@@ -942,6 +945,8 @@ def clean_all_entities(version_number: str) -> bool:
 def export_scenario(
 def export_scenario(
     scenario_id: ScenarioId,
     scenario_id: ScenarioId,
     folder_path: Union[str, pathlib.Path],
     folder_path: Union[str, pathlib.Path],
+    override: bool = False,
+    include_data: bool = False,
 ):
 ):
     """Export all related entities of a scenario to a folder.
     """Export all related entities of a scenario to a folder.
 
 
@@ -951,18 +956,35 @@ def export_scenario(
     Parameters:
     Parameters:
         scenario_id (ScenarioId): The ID of the scenario to export.
         scenario_id (ScenarioId): The ID of the scenario to export.
         folder_path (Union[str, pathlib.Path]): The folder path to export the scenario to.
         folder_path (Union[str, pathlib.Path]): The folder path to export the scenario to.
-    """
+            If the path exists and the override parameter is False, an exception is raised.
+        override (bool): If True, the existing folder will be overridden. Default is False.
+        include_data (bool): If True, the file-based data nodes are exported as well.
+            This includes Pickle, CSV, Excel, Parquet, and JSON data nodes.
+            If the scenario has a data node that is not file-based, a warning will be logged, and the data node
+            will not be exported. The default value is False.
 
 
+    Raises:
+        ExportFolderAlreadyExist^: If the `folder_path` already exists and the override parameter is False.
+    """
     manager = _ScenarioManagerFactory._build_manager()
     manager = _ScenarioManagerFactory._build_manager()
     scenario = manager._get(scenario_id)
     scenario = manager._get(scenario_id)
     entity_ids = manager._get_children_entity_ids(scenario)
     entity_ids = manager._get_children_entity_ids(scenario)
     entity_ids.scenario_ids = {scenario_id}
     entity_ids.scenario_ids = {scenario_id}
-    entity_ids.cycle_ids = {scenario.cycle.id}
+    if scenario.cycle:
+        entity_ids.cycle_ids = {scenario.cycle.id}
 
 
-    shutil.rmtree(folder_path, ignore_errors=True)
+    if folder_path == Config.core.taipy_storage_folder:
+        raise InvalidExportPath("The export folder must not be the storage folder.")
+
+    if os.path.exists(folder_path):
+        if override:
+            __logger.warning(f"Override the existing folder '{folder_path}'")
+            shutil.rmtree(folder_path, ignore_errors=True)
+        else:
+            raise ExportFolderAlreadyExists(str(folder_path), scenario_id)
 
 
     for data_node_id in entity_ids.data_node_ids:
     for data_node_id in entity_ids.data_node_ids:
-        _DataManagerFactory._build_manager()._export(data_node_id, folder_path)
+        _DataManagerFactory._build_manager()._export(data_node_id, folder_path, include_data=include_data)
     for task_id in entity_ids.task_ids:
     for task_id in entity_ids.task_ids:
         _TaskManagerFactory._build_manager()._export(task_id, folder_path)
         _TaskManagerFactory._build_manager()._export(task_id, folder_path)
     for sequence_id in entity_ids.sequence_ids:
     for sequence_id in entity_ids.sequence_ids:
@@ -973,6 +995,9 @@ def export_scenario(
         _ScenarioManagerFactory._build_manager()._export(scenario_id, folder_path)
         _ScenarioManagerFactory._build_manager()._export(scenario_id, folder_path)
     for job_id in entity_ids.job_ids:
     for job_id in entity_ids.job_ids:
         _JobManagerFactory._build_manager()._export(job_id, folder_path)
         _JobManagerFactory._build_manager()._export(job_id, folder_path)
+    for submission_id in entity_ids.submission_ids:
+        _SubmissionManagerFactory._build_manager()._export(submission_id, folder_path)
+    _VersionManagerFactory._build_manager()._export(scenario.version, folder_path)
 
 
 
 
 def get_parents(
 def get_parents(

+ 0 - 7
tests/core/repository/test_repositories.py

@@ -16,9 +16,6 @@ import shutil
 
 
 import pytest
 import pytest
 
 
-from taipy.config.config import Config
-from taipy.core.exceptions.exceptions import InvalidExportPath
-
 from .mocks import MockConverter, MockFSRepository, MockModel, MockObj, MockSQLRepository
 from .mocks import MockConverter, MockFSRepository, MockModel, MockObj, MockSQLRepository
 
 
 
 
@@ -165,8 +162,4 @@ class TestRepositoriesStorage:
         r._export("uuid", export_path)
         r._export("uuid", export_path)
         assert pathlib.Path(os.path.join(export_path, "mock_model/uuid.json")).exists()
         assert pathlib.Path(os.path.join(export_path, "mock_model/uuid.json")).exists()
 
 
-        if mock_repo == MockFSRepository:
-            with pytest.raises(InvalidExportPath):
-                r._export("uuid", Config.core.storage_folder)
-
         shutil.rmtree(export_path, ignore_errors=True)
         shutil.rmtree(export_path, ignore_errors=True)

+ 1 - 56
tests/core/test_taipy.py

@@ -10,9 +10,6 @@
 # specific language governing permissions and limitations under the License.
 # specific language governing permissions and limitations under the License.
 
 
 import datetime
 import datetime
-import os
-import pathlib
-import shutil
 from unittest import mock
 from unittest import mock
 
 
 import pytest
 import pytest
@@ -44,7 +41,7 @@ from taipy.core.config.scenario_config import ScenarioConfig
 from taipy.core.cycle._cycle_manager import _CycleManager
 from taipy.core.cycle._cycle_manager import _CycleManager
 from taipy.core.data._data_manager import _DataManager
 from taipy.core.data._data_manager import _DataManager
 from taipy.core.data.pickle import PickleDataNode
 from taipy.core.data.pickle import PickleDataNode
-from taipy.core.exceptions.exceptions import DataNodeConfigIsNotGlobal, InvalidExportPath
+from taipy.core.exceptions.exceptions import DataNodeConfigIsNotGlobal
 from taipy.core.job._job_manager import _JobManager
 from taipy.core.job._job_manager import _JobManager
 from taipy.core.job.job import Job
 from taipy.core.job.job import Job
 from taipy.core.scenario._scenario_manager import _ScenarioManager
 from taipy.core.scenario._scenario_manager import _ScenarioManager
@@ -668,58 +665,6 @@ class TestTaipy:
             tp.create_scenario(scenario_config, datetime.datetime(2022, 2, 5), "displayable_name")
             tp.create_scenario(scenario_config, datetime.datetime(2022, 2, 5), "displayable_name")
             mck.assert_called_once_with(scenario_config, datetime.datetime(2022, 2, 5), "displayable_name")
             mck.assert_called_once_with(scenario_config, datetime.datetime(2022, 2, 5), "displayable_name")
 
 
-    def test_export_scenario_filesystem(self):
-        shutil.rmtree("./tmp", ignore_errors=True)
-
-        input_cfg_1 = Config.configure_data_node(id="i1", storage_type="pickle", scope=Scope.SCENARIO, default_data=1)
-        output_cfg_1 = Config.configure_data_node(id="o1", storage_type="pickle", scope=Scope.SCENARIO)
-        task_cfg_1 = Config.configure_task("t1", print, input_cfg_1, output_cfg_1)
-        scenario_cfg_1 = Config.configure_scenario("s1", [task_cfg_1], [], Frequency.DAILY)
-
-        input_cfg_2 = Config.configure_data_node(id="i2", storage_type="pickle", scope=Scope.SCENARIO, default_data=2)
-        output_cfg_2 = Config.configure_data_node(id="o2", storage_type="pickle", scope=Scope.SCENARIO)
-        task_cfg_2 = Config.configure_task("t2", print, input_cfg_2, output_cfg_2)
-        scenario_cfg_2 = Config.configure_scenario("s2", [task_cfg_2], [], Frequency.DAILY)
-
-        scenario_1 = tp.create_scenario(scenario_cfg_1)
-        job_1 = tp.submit(scenario_1).jobs[0]
-
-        # Export scenario 1
-        tp.export_scenario(scenario_1.id, "./tmp/exp_scenario_1")
-        assert sorted(os.listdir("./tmp/exp_scenario_1/data_nodes")) == sorted(
-            [f"{scenario_1.i1.id}.json", f"{scenario_1.o1.id}.json"]
-        )
-        assert sorted(os.listdir("./tmp/exp_scenario_1/tasks")) == sorted([f"{scenario_1.t1.id}.json"])
-        assert sorted(os.listdir("./tmp/exp_scenario_1/scenarios")) == sorted([f"{scenario_1.id}.json"])
-        assert sorted(os.listdir("./tmp/exp_scenario_1/jobs")) == sorted([f"{job_1.id}.json"])
-        assert sorted(os.listdir("./tmp/exp_scenario_1/cycles")) == sorted([f"{scenario_1.cycle.id}.json"])
-
-        scenario_2 = tp.create_scenario(scenario_cfg_2)
-        job_2 = tp.submit(scenario_2).jobs[0]
-
-        # Export scenario 2
-        scenario_2.export(pathlib.Path.cwd() / "./tmp/exp_scenario_2")
-        assert sorted(os.listdir("./tmp/exp_scenario_2/data_nodes")) == sorted(
-            [f"{scenario_2.i2.id}.json", f"{scenario_2.o2.id}.json"]
-        )
-        assert sorted(os.listdir("./tmp/exp_scenario_2/tasks")) == sorted([f"{scenario_2.t2.id}.json"])
-        assert sorted(os.listdir("./tmp/exp_scenario_2/scenarios")) == sorted([f"{scenario_2.id}.json"])
-        assert sorted(os.listdir("./tmp/exp_scenario_2/jobs")) == sorted([f"{job_2.id}.json"])
-        assert sorted(os.listdir("./tmp/exp_scenario_2/cycles")) == sorted([f"{scenario_2.cycle.id}.json"])
-
-        # Export scenario 2 into the folder containing scenario 1 files
-        tp.export_scenario(scenario_2.id, "./tmp/exp_scenario_1")
-        # Should have the files as scenario 1 only
-        assert sorted(os.listdir("./tmp/exp_scenario_1/tasks")) == sorted([f"{scenario_2.t2.id}.json"])
-        assert sorted(os.listdir("./tmp/exp_scenario_1/scenarios")) == sorted([f"{scenario_2.id}.json"])
-        assert sorted(os.listdir("./tmp/exp_scenario_1/jobs")) == sorted([f"{job_2.id}.json"])
-        assert sorted(os.listdir("./tmp/exp_scenario_1/cycles")) == sorted([f"{scenario_2.cycle.id}.json"])
-
-        with pytest.raises(InvalidExportPath):
-            tp.export_scenario(scenario_2.id, Config.core.taipy_storage_folder)
-
-        shutil.rmtree("./tmp", ignore_errors=True)
-
     def test_get_parents(self):
     def test_get_parents(self):
         def assert_result_parents_and_expected_parents(parents, expected_parents):
         def assert_result_parents_and_expected_parents(parents, expected_parents):
             for key, items in expected_parents.items():
             for key, items in expected_parents.items():

+ 193 - 0
tests/core/test_taipy/test_export.py

@@ -0,0 +1,193 @@
+# Copyright 2021-2024 Avaiga Private Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import os
+import shutil
+
+import pandas as pd
+import pytest
+
+import taipy.core.taipy as tp
+from taipy import Config, Frequency, Scope
+from taipy.core.exceptions import ExportFolderAlreadyExists, InvalidExportPath
+
+
+@pytest.fixture(scope="function", autouse=True)
+def clean_tmp_folder():
+    shutil.rmtree("./tmp", ignore_errors=True)
+    yield
+    shutil.rmtree("./tmp", ignore_errors=True)
+
+
+def plus_1(x):
+    return x + 1
+
+
+def plus_1_dataframe(x):
+    return pd.DataFrame({"output": [x + 1]})
+
+
+def configure_test_scenario(input_data, frequency=None):
+    input_cfg = Config.configure_data_node(
+        id=f"i_{input_data}", storage_type="pickle", scope=Scope.SCENARIO, default_data=input_data
+    )
+    csv_output_cfg = Config.configure_data_node(id=f"o_{input_data}_csv", storage_type="csv")
+    excel_output_cfg = Config.configure_data_node(id=f"o_{input_data}_excel", storage_type="excel")
+    parquet_output_cfg = Config.configure_data_node(id=f"o_{input_data}_parquet", storage_type="parquet")
+    json_output_cfg = Config.configure_data_node(id=f"o_{input_data}_json", storage_type="json")
+
+    csv_task_cfg = Config.configure_task(f"t_{input_data}_csv", plus_1_dataframe, input_cfg, csv_output_cfg)
+    excel_task_cfg = Config.configure_task(f"t_{input_data}_excel", plus_1_dataframe, input_cfg, excel_output_cfg)
+    parquet_task_cfg = Config.configure_task(f"t_{input_data}_parquet", plus_1_dataframe, input_cfg, parquet_output_cfg)
+    json_task_cfg = Config.configure_task(f"t_{input_data}_json", plus_1, input_cfg, json_output_cfg)
+    scenario_cfg = Config.configure_scenario(
+        id=f"s_{input_data}",
+        task_configs=[csv_task_cfg, excel_task_cfg, parquet_task_cfg, json_task_cfg],
+        frequency=frequency,
+    )
+
+    return scenario_cfg
+
+
+def test_export_scenario_to_the_storage_folder():
+    scenario_cfg = configure_test_scenario(1, frequency=Frequency.DAILY)
+    scenario = tp.create_scenario(scenario_cfg)
+
+    with pytest.raises(InvalidExportPath):
+        tp.export_scenario(scenario.id, Config.core.taipy_storage_folder)
+
+
+def test_export_scenario_with_cycle():
+    scenario_cfg = configure_test_scenario(1, frequency=Frequency.DAILY)
+
+    scenario = tp.create_scenario(scenario_cfg)
+    submission = tp.submit(scenario)
+    jobs = submission.jobs
+
+    # Export the submitted scenario
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario")
+
+    assert sorted(os.listdir("./tmp/exp_scenario/data_nodes")) == sorted(
+        [
+            f"{scenario.i_1.id}.json",
+            f"{scenario.o_1_csv.id}.json",
+            f"{scenario.o_1_excel.id}.json",
+            f"{scenario.o_1_parquet.id}.json",
+            f"{scenario.o_1_json.id}.json",
+        ]
+    )
+    assert sorted(os.listdir("./tmp/exp_scenario/tasks")) == sorted(
+        [
+            f"{scenario.t_1_csv.id}.json",
+            f"{scenario.t_1_excel.id}.json",
+            f"{scenario.t_1_parquet.id}.json",
+            f"{scenario.t_1_json.id}.json",
+        ]
+    )
+    assert sorted(os.listdir("./tmp/exp_scenario/scenarios")) == sorted([f"{scenario.id}.json"])
+    assert sorted(os.listdir("./tmp/exp_scenario/jobs")) == sorted(
+        [f"{jobs[0].id}.json", f"{jobs[1].id}.json", f"{jobs[2].id}.json", f"{jobs[3].id}.json"]
+    )
+    assert os.listdir("./tmp/exp_scenario/submission") == [f"{submission.id}.json"]
+    assert sorted(os.listdir("./tmp/exp_scenario/cycles")) == sorted([f"{scenario.cycle.id}.json"])
+
+
+def test_export_scenario_without_cycle():
+    scenario_cfg = configure_test_scenario(1)
+
+    scenario = tp.create_scenario(scenario_cfg)
+    tp.submit(scenario)
+
+    # Export the submitted scenario
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario")
+
+    assert os.path.exists("./tmp/exp_scenario/data_nodes")
+    assert os.path.exists("./tmp/exp_scenario/tasks")
+    assert os.path.exists("./tmp/exp_scenario/scenarios")
+    assert os.path.exists("./tmp/exp_scenario/jobs")
+    assert os.path.exists("./tmp/exp_scenario/submission")
+    assert not os.path.exists("./tmp/exp_scenario/cycles")  # No cycle
+
+
+def test_export_scenario_override_existing_files():
+    scenario_1_cfg = configure_test_scenario(1, frequency=Frequency.DAILY)
+    scenario_2_cfg = configure_test_scenario(2)
+
+    scenario_1 = tp.create_scenario(scenario_1_cfg)
+    tp.submit(scenario_1)
+
+    # Export the submitted scenario_1
+    tp.export_scenario(scenario_1.id, "./tmp/exp_scenario")
+    assert os.path.exists("./tmp/exp_scenario/data_nodes")
+    assert os.path.exists("./tmp/exp_scenario/tasks")
+    assert os.path.exists("./tmp/exp_scenario/scenarios")
+    assert os.path.exists("./tmp/exp_scenario/jobs")
+    assert os.path.exists("./tmp/exp_scenario/submission")
+    assert os.path.exists("./tmp/exp_scenario/cycles")
+
+    scenario_2 = tp.create_scenario(scenario_2_cfg)
+    tp.submit(scenario_2)
+
+    # Export the submitted scenario_2 to the same folder should raise an error
+    with pytest.raises(ExportFolderAlreadyExists):
+        tp.export_scenario(scenario_2.id, "./tmp/exp_scenario")
+
+    # Export the submitted scenario_2 without a cycle and override the existing files
+    tp.export_scenario(scenario_2.id, "./tmp/exp_scenario", override=True)
+    assert os.path.exists("./tmp/exp_scenario/data_nodes")
+    assert os.path.exists("./tmp/exp_scenario/tasks")
+    assert os.path.exists("./tmp/exp_scenario/scenarios")
+    assert os.path.exists("./tmp/exp_scenario/jobs")
+    assert os.path.exists("./tmp/exp_scenario/submission")
+    # The cycles folder should be removed when overriding
+    assert not os.path.exists("./tmp/exp_scenario/cycles")
+
+
+def test_export_scenario_filesystem_with_data():
+    scenario_cfg = configure_test_scenario(1)
+    scenario = tp.create_scenario(scenario_cfg)
+    tp.submit(scenario)
+
+    # Export scenario without data
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario")
+    assert not os.path.exists("./tmp/exp_scenario/user_data")
+
+    # Export scenario with data
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario", include_data=True, override=True)
+    assert os.path.exists("./tmp/exp_scenario/user_data")
+    data_files = [f for _, _, files in os.walk("./tmp/exp_scenario/user_data") for f in files]
+    assert sorted(data_files) == sorted(
+        [
+            f"{scenario.i_1.id}.p",
+            f"{scenario.o_1_csv.id}.csv",
+            f"{scenario.o_1_excel.id}.xlsx",
+            f"{scenario.o_1_parquet.id}.parquet",
+            f"{scenario.o_1_json.id}.json",
+        ]
+    )
+
+
+def test_export_non_file_based_data_node_raise_warning(caplog):
+    input_cfg = Config.configure_data_node(id="i", storage_type="pickle", scope=Scope.SCENARIO, default_data=1)
+    csv_output_cfg = Config.configure_data_node(id="o_csv", storage_type="csv")
+    in_mem_output_cfg = Config.configure_data_node(id="o_mem", storage_type="in_memory")
+
+    csv_task_cfg = Config.configure_task("t_csv", plus_1_dataframe, input_cfg, csv_output_cfg)
+    in_mem_task_cfg = Config.configure_task("t_mem", plus_1, input_cfg, in_mem_output_cfg)
+    scenario_cfg = Config.configure_scenario(id="s", task_configs=[csv_task_cfg, in_mem_task_cfg])
+
+    scenario = tp.create_scenario(scenario_cfg)
+    tp.submit(scenario)
+
+    # Export scenario with in-memory data node
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario", include_data=True)
+    expected_warning = f"Data node {scenario.o_mem.id} is not a file-based data node and the data will not be exported"
+    assert expected_warning in caplog.text

+ 193 - 0
tests/core/test_taipy/test_export_with_sql_repo.py

@@ -0,0 +1,193 @@
+# Copyright 2021-2024 Avaiga Private Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import os
+import shutil
+
+import pandas as pd
+import pytest
+
+import taipy.core.taipy as tp
+from taipy import Config, Frequency, Scope
+from taipy.core.exceptions import ExportFolderAlreadyExists, InvalidExportPath
+
+
+@pytest.fixture(scope="function", autouse=True)
+def clean_tmp_folder():
+    shutil.rmtree("./tmp", ignore_errors=True)
+    yield
+    shutil.rmtree("./tmp", ignore_errors=True)
+
+
+def plus_1(x):
+    return x + 1
+
+
+def plus_1_dataframe(x):
+    return pd.DataFrame({"output": [x + 1]})
+
+
+def configure_test_scenario(input_data, frequency=None):
+    input_cfg = Config.configure_data_node(
+        id=f"i_{input_data}", storage_type="pickle", scope=Scope.SCENARIO, default_data=input_data
+    )
+    csv_output_cfg = Config.configure_data_node(id=f"o_{input_data}_csv", storage_type="csv")
+    excel_output_cfg = Config.configure_data_node(id=f"o_{input_data}_excel", storage_type="excel")
+    parquet_output_cfg = Config.configure_data_node(id=f"o_{input_data}_parquet", storage_type="parquet")
+    json_output_cfg = Config.configure_data_node(id=f"o_{input_data}_json", storage_type="json")
+
+    csv_task_cfg = Config.configure_task(f"t_{input_data}_csv", plus_1_dataframe, input_cfg, csv_output_cfg)
+    excel_task_cfg = Config.configure_task(f"t_{input_data}_excel", plus_1_dataframe, input_cfg, excel_output_cfg)
+    parquet_task_cfg = Config.configure_task(f"t_{input_data}_parquet", plus_1_dataframe, input_cfg, parquet_output_cfg)
+    json_task_cfg = Config.configure_task(f"t_{input_data}_json", plus_1, input_cfg, json_output_cfg)
+    scenario_cfg = Config.configure_scenario(
+        id=f"s_{input_data}",
+        task_configs=[csv_task_cfg, excel_task_cfg, parquet_task_cfg, json_task_cfg],
+        frequency=frequency,
+    )
+
+    return scenario_cfg
+
+
+def test_export_scenario_to_the_storage_folder(init_sql_repo):
+    scenario_cfg = configure_test_scenario(1, frequency=Frequency.DAILY)
+    scenario = tp.create_scenario(scenario_cfg)
+
+    with pytest.raises(InvalidExportPath):
+        tp.export_scenario(scenario.id, Config.core.taipy_storage_folder)
+
+
+def test_export_scenario_with_cycle(init_sql_repo):
+    scenario_cfg = configure_test_scenario(1, frequency=Frequency.DAILY)
+
+    scenario = tp.create_scenario(scenario_cfg)
+    submission = tp.submit(scenario)
+    jobs = submission.jobs
+
+    # Export the submitted scenario
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario")
+
+    assert sorted(os.listdir("./tmp/exp_scenario/data_node")) == sorted(
+        [
+            f"{scenario.i_1.id}.json",
+            f"{scenario.o_1_csv.id}.json",
+            f"{scenario.o_1_excel.id}.json",
+            f"{scenario.o_1_parquet.id}.json",
+            f"{scenario.o_1_json.id}.json",
+        ]
+    )
+    assert sorted(os.listdir("./tmp/exp_scenario/task")) == sorted(
+        [
+            f"{scenario.t_1_csv.id}.json",
+            f"{scenario.t_1_excel.id}.json",
+            f"{scenario.t_1_parquet.id}.json",
+            f"{scenario.t_1_json.id}.json",
+        ]
+    )
+    assert sorted(os.listdir("./tmp/exp_scenario/scenario")) == sorted([f"{scenario.id}.json"])
+    assert sorted(os.listdir("./tmp/exp_scenario/job")) == sorted(
+        [f"{jobs[0].id}.json", f"{jobs[1].id}.json", f"{jobs[2].id}.json", f"{jobs[3].id}.json"]
+    )
+    assert os.listdir("./tmp/exp_scenario/submission") == [f"{submission.id}.json"]
+    assert sorted(os.listdir("./tmp/exp_scenario/cycle")) == sorted([f"{scenario.cycle.id}.json"])
+
+
+def test_export_scenario_without_cycle(init_sql_repo):
+    scenario_cfg = configure_test_scenario(1)
+
+    scenario = tp.create_scenario(scenario_cfg)
+    tp.submit(scenario)
+
+    # Export the submitted scenario
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario")
+
+    assert os.path.exists("./tmp/exp_scenario/data_node")
+    assert os.path.exists("./tmp/exp_scenario/task")
+    assert os.path.exists("./tmp/exp_scenario/scenario")
+    assert os.path.exists("./tmp/exp_scenario/job")
+    assert os.path.exists("./tmp/exp_scenario/submission")
+    assert not os.path.exists("./tmp/exp_scenario/cycle")  # No cycle
+
+
+def test_export_scenario_override_existing_files(init_sql_repo):
+    scenario_1_cfg = configure_test_scenario(1, frequency=Frequency.DAILY)
+    scenario_2_cfg = configure_test_scenario(2)
+
+    scenario_1 = tp.create_scenario(scenario_1_cfg)
+    tp.submit(scenario_1)
+
+    # Export the submitted scenario_1
+    tp.export_scenario(scenario_1.id, "./tmp/exp_scenario")
+    assert os.path.exists("./tmp/exp_scenario/data_node")
+    assert os.path.exists("./tmp/exp_scenario/task")
+    assert os.path.exists("./tmp/exp_scenario/scenario")
+    assert os.path.exists("./tmp/exp_scenario/job")
+    assert os.path.exists("./tmp/exp_scenario/submission")
+    assert os.path.exists("./tmp/exp_scenario/cycle")
+
+    scenario_2 = tp.create_scenario(scenario_2_cfg)
+    tp.submit(scenario_2)
+
+    # Export the submitted scenario_2 to the same folder should raise an error
+    with pytest.raises(ExportFolderAlreadyExists):
+        tp.export_scenario(scenario_2.id, "./tmp/exp_scenario")
+
+    # Export the submitted scenario_2 without a cycle and override the existing files
+    tp.export_scenario(scenario_2.id, "./tmp/exp_scenario", override=True)
+    assert os.path.exists("./tmp/exp_scenario/data_node")
+    assert os.path.exists("./tmp/exp_scenario/task")
+    assert os.path.exists("./tmp/exp_scenario/scenario")
+    assert os.path.exists("./tmp/exp_scenario/job")
+    assert os.path.exists("./tmp/exp_scenario/submission")
+    # The cycles folder should be removed when overriding
+    assert not os.path.exists("./tmp/exp_scenario/cycle")
+
+
+def test_export_scenario_filesystem_with_data(init_sql_repo):
+    scenario_cfg = configure_test_scenario(1)
+    scenario = tp.create_scenario(scenario_cfg)
+    tp.submit(scenario)
+
+    # Export scenario without data
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario")
+    assert not os.path.exists("./tmp/exp_scenario/user_data")
+
+    # Export scenario with data
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario", include_data=True, override=True)
+    assert os.path.exists("./tmp/exp_scenario/user_data")
+    data_files = [f for _, _, files in os.walk("./tmp/exp_scenario/user_data") for f in files]
+    assert sorted(data_files) == sorted(
+        [
+            f"{scenario.i_1.id}.p",
+            f"{scenario.o_1_csv.id}.csv",
+            f"{scenario.o_1_excel.id}.xlsx",
+            f"{scenario.o_1_parquet.id}.parquet",
+            f"{scenario.o_1_json.id}.json",
+        ]
+    )
+
+
+def test_export_non_file_based_data_node_raise_warning(init_sql_repo, caplog):
+    input_cfg = Config.configure_data_node(id="i", storage_type="pickle", scope=Scope.SCENARIO, default_data=1)
+    csv_output_cfg = Config.configure_data_node(id="o_csv", storage_type="csv")
+    in_mem_output_cfg = Config.configure_data_node(id="o_mem", storage_type="in_memory")
+
+    csv_task_cfg = Config.configure_task("t_csv", plus_1_dataframe, input_cfg, csv_output_cfg)
+    in_mem_task_cfg = Config.configure_task("t_mem", plus_1, input_cfg, in_mem_output_cfg)
+    scenario_cfg = Config.configure_scenario(id="s", task_configs=[csv_task_cfg, in_mem_task_cfg])
+
+    scenario = tp.create_scenario(scenario_cfg)
+    tp.submit(scenario)
+
+    # Export scenario with in-memory data node
+    tp.export_scenario(scenario.id, "./tmp/exp_scenario", include_data=True)
+    expected_warning = f"Data node {scenario.o_mem.id} is not a file-based data node and the data will not be exported"
+    assert expected_warning in caplog.text