aws_s3.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. # Copyright 2021-2024 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. from datetime import datetime, timedelta
  12. from importlib import util
  13. from typing import Any, Dict, List, Optional, Set
  14. from ..common._check_dependencies import _check_dependency_is_installed
  15. if util.find_spec("boto3"):
  16. import boto3
  17. from taipy.config.common.scope import Scope
  18. from .._version._version_manager_factory import _VersionManagerFactory
  19. from ..exceptions.exceptions import MissingRequiredProperty
  20. from .data_node import DataNode
  21. from .data_node_id import DataNodeId, Edit
  22. class S3ObjectDataNode(DataNode):
  23. """Data Node object stored in an Amazon Web Service S3 Bucket.
  24. Attributes:
  25. config_id (str): Identifier of the data node configuration. It must be a valid Python
  26. identifier.
  27. scope (Scope^): The scope of this data node.
  28. id (str): The unique identifier of this data node.
  29. owner_id (str): The identifier of the owner (sequence_id, scenario_id, cycle_id) or
  30. None.
  31. parent_ids (Optional[Set[str]]): The identifiers of the parent tasks or `None`.
  32. last_edit_date (datetime): The date and time of the last modification.
  33. edits (List[Edit^]): The ordered list of edits for that job.
  34. version (str): The string indicates the application version of the data node to instantiate. If not provided,
  35. the current version is used.
  36. validity_period (Optional[timedelta]): The duration implemented as a timedelta since the last edit date for
  37. which the data node can be considered up-to-date. Once the validity period has passed, the data node is
  38. considered stale and relevant tasks will run even if they are skippable (see the
  39. [Task management page](../core/entities/task-mgt.md) for more details).
  40. If _validity_period_ is set to `None`, the data node is always up-to-date.
  41. edit_in_progress (bool): True if a task computing the data node has been submitted
  42. and not completed yet. False otherwise.
  43. editor_id (Optional[str]): The identifier of the user who is currently editing the data node.
  44. editor_expiration_date (Optional[datetime]): The expiration date of the editor lock.
  45. properties (dict[str, Any]): A dictionary of additional properties. Note that the
  46. _properties_ parameter must at least contain an entry for _"aws_access_key"_ , _"aws_secret_access_key"_ ,
  47. _aws_s3_bucket_name_ and _aws_s3_object_key_ :
  48. - _"aws_access_key"_ `(str)`: Amazon Web Services ID for to identify account\n
  49. - _"aws_secret_access_key"_ `(str)`: Amazon Web Services access key to authenticate programmatic requests.\n
  50. - _"aws_region"_ `(Any)`: Self-contained geographic area where Amazon Web Services (AWS) infrastructure is
  51. located.\n
  52. - _"aws_s3_bucket_name"_ `(str)`: unique identifier for a container that stores objects in Amazon Simple
  53. Storage Service (S3).\n
  54. - _"aws_s3_object_key"_ `(str)`: unique idntifier for the name of the object(file) that has to be read
  55. or written. \n
  56. - _"aws _s3_object_parameters"_ `(str)`: A dictionary of additional arguments to be passed to interact with
  57. the AWS service\n
  58. """
  59. __STORAGE_TYPE = "s3_object"
  60. __AWS_ACCESS_KEY_ID = "aws_access_key"
  61. __AWS_SECRET_ACCESS_KEY = "aws_secret_access_key"
  62. __AWS_STORAGE_BUCKET_NAME = "aws_s3_bucket_name"
  63. __AWS_S3_OBJECT_KEY = "aws_s3_object_key"
  64. __AWS_REGION = "aws_region"
  65. __AWS_S3_OBJECT_PARAMETERS = "aws_s3_object_parameters"
  66. _REQUIRED_PROPERTIES: List[str] = [
  67. __AWS_ACCESS_KEY_ID,
  68. __AWS_SECRET_ACCESS_KEY,
  69. __AWS_STORAGE_BUCKET_NAME,
  70. __AWS_S3_OBJECT_KEY,
  71. ]
  72. def __init__(
  73. self,
  74. config_id: str,
  75. scope: Scope,
  76. id: Optional[DataNodeId] = None,
  77. owner_id: Optional[str] = None,
  78. parent_ids: Optional[Set[str]] = None,
  79. last_edit_date: Optional[datetime] = None,
  80. edits: Optional[List[Edit]] = None,
  81. version: str = None,
  82. validity_period: Optional[timedelta] = None,
  83. edit_in_progress: bool = False,
  84. editor_id: Optional[str] = None,
  85. editor_expiration_date: Optional[datetime] = None,
  86. properties: Optional[Dict] = None,
  87. ) -> None:
  88. _check_dependency_is_installed("S3 Data Node", "boto3")
  89. if properties is None:
  90. properties = {}
  91. required = self._REQUIRED_PROPERTIES
  92. if missing := set(required) - set(properties.keys()):
  93. raise MissingRequiredProperty(
  94. f"The following properties {', '.join(missing)} were not informed and are required."
  95. )
  96. super().__init__(
  97. config_id,
  98. scope,
  99. id,
  100. owner_id,
  101. parent_ids,
  102. last_edit_date,
  103. edits,
  104. version or _VersionManagerFactory._build_manager()._get_latest_version(),
  105. validity_period,
  106. edit_in_progress,
  107. editor_id,
  108. editor_expiration_date,
  109. **properties,
  110. )
  111. self._s3_client = boto3.client(
  112. "s3",
  113. aws_access_key_id=properties.get(self.__AWS_ACCESS_KEY_ID),
  114. aws_secret_access_key=properties.get(self.__AWS_SECRET_ACCESS_KEY),
  115. )
  116. if not self._last_edit_date: # type: ignore
  117. self._last_edit_date = datetime.now()
  118. self._TAIPY_PROPERTIES.update(
  119. {
  120. self.__AWS_ACCESS_KEY_ID,
  121. self.__AWS_SECRET_ACCESS_KEY,
  122. self.__AWS_STORAGE_BUCKET_NAME,
  123. self.__AWS_S3_OBJECT_KEY,
  124. self.__AWS_REGION,
  125. self.__AWS_S3_OBJECT_PARAMETERS,
  126. }
  127. )
  128. @classmethod
  129. def storage_type(cls) -> str:
  130. return cls.__STORAGE_TYPE
  131. def _read(self):
  132. aws_s3_object = self._s3_client.get_object(
  133. Bucket=self.properties[self.__AWS_STORAGE_BUCKET_NAME],
  134. Key=self.properties[self.__AWS_S3_OBJECT_KEY],
  135. )
  136. return aws_s3_object["Body"].read().decode("utf-8")
  137. def _write(self, data: Any):
  138. self._s3_client.put_object(
  139. Bucket=self.properties[self.__AWS_STORAGE_BUCKET_NAME],
  140. Key=self.properties[self.__AWS_S3_OBJECT_KEY],
  141. Body=data,
  142. )