data_node_config.py 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113
  1. # Copyright 2021-2024 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. import json
  12. from copy import copy
  13. from datetime import timedelta
  14. from typing import Any, Callable, Dict, List, Optional, Union
  15. from taipy.config._config import _Config
  16. from taipy.config.common._config_blocker import _ConfigBlocker
  17. from taipy.config.common._template_handler import _TemplateHandler as _tpl
  18. from taipy.config.common.scope import Scope
  19. from taipy.config.config import Config
  20. from taipy.config.section import Section
  21. from ..common._warnings import _warn_deprecated
  22. from ..common.mongo_default_document import MongoDefaultDocument
  23. class DataNodeConfig(Section):
  24. """
  25. Configuration fields needed to instantiate a `DataNode^`.
  26. A Data Node config is made to be used as a generator for actual data nodes. It holds configuration information
  27. needed to create an actual data node.
  28. Attributes:
  29. id (str): Unique identifier of the data node config. It must be a valid Python variable name.
  30. storage_type (str): Storage type of the data nodes created from the data node config. The possible values
  31. are : "csv", "excel", "pickle", "sql_table", "sql", "mongo_collection", "generic", "json", "parquet",
  32. "in_memory and "s3_object".
  33. The default value is "pickle".
  34. Note that the "in_memory" value can only be used when `JobConfig^` mode is "development".
  35. scope (Optional[Scope^]): The optional `Scope^` of the data nodes instantiated from the data node config.
  36. The default value is SCENARIO.
  37. **properties (dict[str, any]): A dictionary of additional properties.
  38. """
  39. name = "DATA_NODE"
  40. _STORAGE_TYPE_KEY = "storage_type"
  41. _STORAGE_TYPE_VALUE_PICKLE = "pickle"
  42. _STORAGE_TYPE_VALUE_SQL_TABLE = "sql_table"
  43. _STORAGE_TYPE_VALUE_SQL = "sql"
  44. _STORAGE_TYPE_VALUE_MONGO_COLLECTION = "mongo_collection"
  45. _STORAGE_TYPE_VALUE_CSV = "csv"
  46. _STORAGE_TYPE_VALUE_EXCEL = "excel"
  47. _STORAGE_TYPE_VALUE_IN_MEMORY = "in_memory"
  48. _STORAGE_TYPE_VALUE_GENERIC = "generic"
  49. _STORAGE_TYPE_VALUE_JSON = "json"
  50. _STORAGE_TYPE_VALUE_PARQUET = "parquet"
  51. _STORAGE_TYPE_VALUE_S3_OBJECT = "s3_object"
  52. _DEFAULT_STORAGE_TYPE = _STORAGE_TYPE_VALUE_PICKLE
  53. _ALL_STORAGE_TYPES = [
  54. _STORAGE_TYPE_VALUE_PICKLE,
  55. _STORAGE_TYPE_VALUE_SQL_TABLE,
  56. _STORAGE_TYPE_VALUE_SQL,
  57. _STORAGE_TYPE_VALUE_MONGO_COLLECTION,
  58. _STORAGE_TYPE_VALUE_CSV,
  59. _STORAGE_TYPE_VALUE_EXCEL,
  60. _STORAGE_TYPE_VALUE_IN_MEMORY,
  61. _STORAGE_TYPE_VALUE_GENERIC,
  62. _STORAGE_TYPE_VALUE_JSON,
  63. _STORAGE_TYPE_VALUE_PARQUET,
  64. _STORAGE_TYPE_VALUE_S3_OBJECT,
  65. ]
  66. _EXPOSED_TYPE_KEY = "exposed_type"
  67. _EXPOSED_TYPE_PANDAS = "pandas"
  68. _EXPOSED_TYPE_MODIN = "modin" # Deprecated in favor of pandas since 3.1.0
  69. _EXPOSED_TYPE_NUMPY = "numpy"
  70. _DEFAULT_EXPOSED_TYPE = _EXPOSED_TYPE_PANDAS
  71. _ALL_EXPOSED_TYPES = [
  72. _EXPOSED_TYPE_PANDAS,
  73. _EXPOSED_TYPE_NUMPY,
  74. ]
  75. _OPTIONAL_ENCODING_PROPERTY = "encoding"
  76. _DEFAULT_ENCODING_VALUE = "utf-8"
  77. # Generic
  78. _OPTIONAL_READ_FUNCTION_GENERIC_PROPERTY = "read_fct"
  79. _OPTIONAL_READ_FUNCTION_ARGS_GENERIC_PROPERTY = "read_fct_args"
  80. _OPTIONAL_WRITE_FUNCTION_GENERIC_PROPERTY = "write_fct"
  81. _OPTIONAL_WRITE_FUNCTION_ARGS_GENERIC_PROPERTY = "write_fct_args"
  82. # CSV
  83. _OPTIONAL_EXPOSED_TYPE_CSV_PROPERTY = "exposed_type"
  84. _OPTIONAL_DEFAULT_PATH_CSV_PROPERTY = "default_path"
  85. _OPTIONAL_HAS_HEADER_CSV_PROPERTY = "has_header"
  86. # Excel
  87. _OPTIONAL_EXPOSED_TYPE_EXCEL_PROPERTY = "exposed_type"
  88. _OPTIONAL_DEFAULT_PATH_EXCEL_PROPERTY = "default_path"
  89. _OPTIONAL_HAS_HEADER_EXCEL_PROPERTY = "has_header"
  90. _OPTIONAL_SHEET_NAME_EXCEL_PROPERTY = "sheet_name"
  91. # In memory
  92. _OPTIONAL_DEFAULT_DATA_IN_MEMORY_PROPERTY = "default_data"
  93. # SQL
  94. _REQUIRED_DB_NAME_SQL_PROPERTY = "db_name"
  95. _REQUIRED_DB_ENGINE_SQL_PROPERTY = "db_engine"
  96. _DB_ENGINE_SQLITE = "sqlite"
  97. _OPTIONAL_FOLDER_PATH_SQLITE_PROPERTY = "sqlite_folder_path"
  98. _OPTIONAL_FILE_EXTENSION_SQLITE_PROPERTY = "sqlite_file_extension"
  99. _OPTIONAL_DB_PASSWORD_SQL_PROPERTY = "db_password"
  100. _OPTIONAL_DB_USERNAME_SQL_PROPERTY = "db_username"
  101. _OPTIONAL_PORT_SQL_PROPERTY = "db_port"
  102. _OPTIONAL_HOST_SQL_PROPERTY = "db_host"
  103. _OPTIONAL_DRIVER_SQL_PROPERTY = "db_driver"
  104. _OPTIONAL_DB_EXTRA_ARGS_SQL_PROPERTY = "db_extra_args"
  105. _OPTIONAL_EXPOSED_TYPE_SQL_PROPERTY = "exposed_type"
  106. # SQL_TABLE
  107. _REQUIRED_TABLE_NAME_SQL_TABLE_PROPERTY = "table_name"
  108. # SQL
  109. _REQUIRED_READ_QUERY_SQL_PROPERTY = "read_query"
  110. _REQUIRED_WRITE_QUERY_BUILDER_SQL_PROPERTY = "write_query_builder"
  111. _OPTIONAL_APPEND_QUERY_BUILDER_SQL_PROPERTY = "append_query_builder"
  112. # MONGO
  113. _REQUIRED_DB_NAME_MONGO_PROPERTY = "db_name"
  114. _REQUIRED_COLLECTION_NAME_MONGO_PROPERTY = "collection_name"
  115. _OPTIONAL_CUSTOM_DOCUMENT_MONGO_PROPERTY = "custom_document"
  116. _OPTIONAL_USERNAME_MONGO_PROPERTY = "db_username"
  117. _OPTIONAL_PASSWORD_MONGO_PROPERTY = "db_password"
  118. _OPTIONAL_HOST_MONGO_PROPERTY = "db_host"
  119. _OPTIONAL_PORT_MONGO_PROPERTY = "db_port"
  120. _OPTIONAL_DRIVER_MONGO_PROPERTY = "db_driver"
  121. _OPTIONAL_DB_EXTRA_ARGS_MONGO_PROPERTY = "db_extra_args"
  122. # Pickle
  123. _OPTIONAL_DEFAULT_PATH_PICKLE_PROPERTY = "default_path"
  124. _OPTIONAL_DEFAULT_DATA_PICKLE_PROPERTY = "default_data"
  125. # JSON
  126. _OPTIONAL_ENCODER_JSON_PROPERTY = "encoder"
  127. _OPTIONAL_DECODER_JSON_PROPERTY = "decoder"
  128. _OPTIONAL_DEFAULT_PATH_JSON_PROPERTY = "default_path"
  129. # Parquet
  130. _OPTIONAL_EXPOSED_TYPE_PARQUET_PROPERTY = "exposed_type"
  131. _OPTIONAL_DEFAULT_PATH_PARQUET_PROPERTY = "default_path"
  132. _OPTIONAL_ENGINE_PARQUET_PROPERTY = "engine"
  133. _OPTIONAL_COMPRESSION_PARQUET_PROPERTY = "compression"
  134. _OPTIONAL_READ_KWARGS_PARQUET_PROPERTY = "read_kwargs"
  135. _OPTIONAL_WRITE_KWARGS_PARQUET_PROPERTY = "write_kwargs"
  136. # S3object
  137. _REQUIRED_AWS_ACCESS_KEY_ID_PROPERTY = "aws_access_key"
  138. _REQUIRED_AWS_SECRET_ACCESS_KEY_PROPERTY = "aws_secret_access_key"
  139. _REQUIRED_AWS_STORAGE_BUCKET_NAME_PROPERTY = "aws_s3_bucket_name"
  140. _REQUIRED_AWS_S3_OBJECT_KEY_PROPERTY = "aws_s3_object_key"
  141. _OPTIONAL_AWS_REGION_PROPERTY = "aws_region"
  142. _OPTIONAL_AWS_S3_OBJECT_PARAMETERS_PROPERTY = "aws_s3_object_parameters"
  143. _REQUIRED_PROPERTIES: Dict[str, List] = {
  144. _STORAGE_TYPE_VALUE_PICKLE: [],
  145. _STORAGE_TYPE_VALUE_SQL_TABLE: [
  146. _REQUIRED_DB_NAME_SQL_PROPERTY,
  147. _REQUIRED_DB_ENGINE_SQL_PROPERTY,
  148. _REQUIRED_TABLE_NAME_SQL_TABLE_PROPERTY,
  149. ],
  150. _STORAGE_TYPE_VALUE_SQL: [
  151. _REQUIRED_DB_NAME_SQL_PROPERTY,
  152. _REQUIRED_DB_ENGINE_SQL_PROPERTY,
  153. _REQUIRED_READ_QUERY_SQL_PROPERTY,
  154. _REQUIRED_WRITE_QUERY_BUILDER_SQL_PROPERTY,
  155. ],
  156. _STORAGE_TYPE_VALUE_MONGO_COLLECTION: [
  157. _REQUIRED_DB_NAME_MONGO_PROPERTY,
  158. _REQUIRED_COLLECTION_NAME_MONGO_PROPERTY,
  159. ],
  160. _STORAGE_TYPE_VALUE_CSV: [],
  161. _STORAGE_TYPE_VALUE_EXCEL: [],
  162. _STORAGE_TYPE_VALUE_IN_MEMORY: [],
  163. _STORAGE_TYPE_VALUE_GENERIC: [],
  164. _STORAGE_TYPE_VALUE_JSON: [],
  165. _STORAGE_TYPE_VALUE_PARQUET: [],
  166. _STORAGE_TYPE_VALUE_S3_OBJECT: [
  167. _REQUIRED_AWS_ACCESS_KEY_ID_PROPERTY,
  168. _REQUIRED_AWS_SECRET_ACCESS_KEY_PROPERTY,
  169. _REQUIRED_AWS_STORAGE_BUCKET_NAME_PROPERTY,
  170. _REQUIRED_AWS_S3_OBJECT_KEY_PROPERTY,
  171. ],
  172. }
  173. _OPTIONAL_PROPERTIES = {
  174. _STORAGE_TYPE_VALUE_GENERIC: {
  175. _OPTIONAL_READ_FUNCTION_GENERIC_PROPERTY: None,
  176. _OPTIONAL_WRITE_FUNCTION_GENERIC_PROPERTY: None,
  177. _OPTIONAL_READ_FUNCTION_ARGS_GENERIC_PROPERTY: None,
  178. _OPTIONAL_WRITE_FUNCTION_ARGS_GENERIC_PROPERTY: None,
  179. },
  180. _STORAGE_TYPE_VALUE_CSV: {
  181. _OPTIONAL_DEFAULT_PATH_CSV_PROPERTY: None,
  182. _OPTIONAL_ENCODING_PROPERTY: _DEFAULT_ENCODING_VALUE,
  183. _OPTIONAL_HAS_HEADER_CSV_PROPERTY: True,
  184. _OPTIONAL_EXPOSED_TYPE_CSV_PROPERTY: _DEFAULT_EXPOSED_TYPE,
  185. },
  186. _STORAGE_TYPE_VALUE_EXCEL: {
  187. _OPTIONAL_DEFAULT_PATH_EXCEL_PROPERTY: None,
  188. _OPTIONAL_HAS_HEADER_EXCEL_PROPERTY: True,
  189. _OPTIONAL_SHEET_NAME_EXCEL_PROPERTY: None,
  190. _OPTIONAL_EXPOSED_TYPE_EXCEL_PROPERTY: _DEFAULT_EXPOSED_TYPE,
  191. },
  192. _STORAGE_TYPE_VALUE_IN_MEMORY: {_OPTIONAL_DEFAULT_DATA_IN_MEMORY_PROPERTY: None},
  193. _STORAGE_TYPE_VALUE_SQL_TABLE: {
  194. _OPTIONAL_DB_USERNAME_SQL_PROPERTY: None,
  195. _OPTIONAL_DB_PASSWORD_SQL_PROPERTY: None,
  196. _OPTIONAL_HOST_SQL_PROPERTY: "localhost",
  197. _OPTIONAL_PORT_SQL_PROPERTY: 1433,
  198. _OPTIONAL_DRIVER_SQL_PROPERTY: "",
  199. _OPTIONAL_FOLDER_PATH_SQLITE_PROPERTY: None,
  200. _OPTIONAL_FILE_EXTENSION_SQLITE_PROPERTY: ".db",
  201. _OPTIONAL_DB_EXTRA_ARGS_SQL_PROPERTY: None,
  202. _OPTIONAL_EXPOSED_TYPE_SQL_PROPERTY: _DEFAULT_EXPOSED_TYPE,
  203. },
  204. _STORAGE_TYPE_VALUE_SQL: {
  205. _OPTIONAL_DB_USERNAME_SQL_PROPERTY: None,
  206. _OPTIONAL_DB_PASSWORD_SQL_PROPERTY: None,
  207. _OPTIONAL_HOST_SQL_PROPERTY: "localhost",
  208. _OPTIONAL_PORT_SQL_PROPERTY: 1433,
  209. _OPTIONAL_DRIVER_SQL_PROPERTY: "",
  210. _OPTIONAL_APPEND_QUERY_BUILDER_SQL_PROPERTY: None,
  211. _OPTIONAL_FOLDER_PATH_SQLITE_PROPERTY: None,
  212. _OPTIONAL_FILE_EXTENSION_SQLITE_PROPERTY: ".db",
  213. _OPTIONAL_DB_EXTRA_ARGS_SQL_PROPERTY: None,
  214. _OPTIONAL_EXPOSED_TYPE_SQL_PROPERTY: _DEFAULT_EXPOSED_TYPE,
  215. },
  216. _STORAGE_TYPE_VALUE_MONGO_COLLECTION: {
  217. _OPTIONAL_CUSTOM_DOCUMENT_MONGO_PROPERTY: MongoDefaultDocument,
  218. _OPTIONAL_USERNAME_MONGO_PROPERTY: "",
  219. _OPTIONAL_PASSWORD_MONGO_PROPERTY: "",
  220. _OPTIONAL_HOST_MONGO_PROPERTY: "localhost",
  221. _OPTIONAL_PORT_MONGO_PROPERTY: 27017,
  222. _OPTIONAL_DRIVER_MONGO_PROPERTY: "",
  223. _OPTIONAL_DB_EXTRA_ARGS_MONGO_PROPERTY: None,
  224. },
  225. _STORAGE_TYPE_VALUE_PICKLE: {
  226. _OPTIONAL_DEFAULT_PATH_PICKLE_PROPERTY: None,
  227. _OPTIONAL_DEFAULT_DATA_PICKLE_PROPERTY: None,
  228. },
  229. _STORAGE_TYPE_VALUE_JSON: {
  230. _OPTIONAL_DEFAULT_PATH_PICKLE_PROPERTY: None,
  231. _OPTIONAL_ENCODING_PROPERTY: _DEFAULT_ENCODING_VALUE,
  232. _OPTIONAL_ENCODER_JSON_PROPERTY: None,
  233. _OPTIONAL_DECODER_JSON_PROPERTY: None,
  234. },
  235. _STORAGE_TYPE_VALUE_PARQUET: {
  236. _OPTIONAL_DEFAULT_PATH_PARQUET_PROPERTY: None,
  237. _OPTIONAL_ENGINE_PARQUET_PROPERTY: "pyarrow",
  238. _OPTIONAL_COMPRESSION_PARQUET_PROPERTY: "snappy",
  239. _OPTIONAL_READ_KWARGS_PARQUET_PROPERTY: None,
  240. _OPTIONAL_WRITE_KWARGS_PARQUET_PROPERTY: None,
  241. _OPTIONAL_EXPOSED_TYPE_PARQUET_PROPERTY: _DEFAULT_EXPOSED_TYPE,
  242. },
  243. _STORAGE_TYPE_VALUE_S3_OBJECT: {
  244. _OPTIONAL_AWS_REGION_PROPERTY: None,
  245. _OPTIONAL_AWS_S3_OBJECT_PARAMETERS_PROPERTY: None,
  246. },
  247. }
  248. _SCOPE_KEY = "scope"
  249. _DEFAULT_SCOPE = Scope.SCENARIO
  250. _VALIDITY_PERIOD_KEY = "validity_period"
  251. _DEFAULT_VALIDITY_PERIOD = None
  252. def __init__(
  253. self,
  254. id: str,
  255. storage_type: Optional[str] = None,
  256. scope: Optional[Scope] = None,
  257. validity_period: Optional[timedelta] = None,
  258. **properties,
  259. ):
  260. self._storage_type = storage_type
  261. self._scope = scope
  262. self._validity_period = validity_period
  263. super().__init__(id, **properties)
  264. # modin exposed type is deprecated since taipy 3.1.0
  265. # It is automatically replaced by pandas
  266. if "exposed_type" in properties and properties["exposed_type"] == DataNodeConfig._EXPOSED_TYPE_MODIN:
  267. _warn_deprecated(
  268. "exposed_type='modin'",
  269. suggest="exposed_type='pandas'",
  270. )
  271. properties["exposed_type"] = DataNodeConfig._EXPOSED_TYPE_PANDAS
  272. def __copy__(self):
  273. return DataNodeConfig(self.id, self._storage_type, self._scope, self._validity_period, **copy(self._properties))
  274. def __getattr__(self, item: str) -> Optional[Any]:
  275. return _tpl._replace_templates(self._properties.get(item))
  276. @property
  277. def storage_type(self):
  278. return _tpl._replace_templates(self._storage_type)
  279. @storage_type.setter # type: ignore
  280. @_ConfigBlocker._check()
  281. def storage_type(self, val):
  282. self._storage_type = val
  283. @property
  284. def scope(self):
  285. return _tpl._replace_templates(self._scope)
  286. @scope.setter # type: ignore
  287. @_ConfigBlocker._check()
  288. def scope(self, val):
  289. self._scope = val
  290. @property
  291. def validity_period(self):
  292. return _tpl._replace_templates(self._validity_period)
  293. @validity_period.setter # type: ignore
  294. @_ConfigBlocker._check()
  295. def validity_period(self, val):
  296. self._validity_period = val
  297. @property
  298. def cacheable(self):
  299. _warn_deprecated("cacheable", suggest="the skippable feature")
  300. cacheable = self._properties.get("cacheable")
  301. return _tpl._replace_templates(cacheable) if cacheable is not None else False
  302. @cacheable.setter # type: ignore
  303. @_ConfigBlocker._check()
  304. def cacheable(self, val):
  305. _warn_deprecated("cacheable", suggest="the skippable feature")
  306. self._properties["cacheable"] = val
  307. @classmethod
  308. def default_config(cls):
  309. return DataNodeConfig(
  310. cls._DEFAULT_KEY, cls._DEFAULT_STORAGE_TYPE, cls._DEFAULT_SCOPE, cls._DEFAULT_VALIDITY_PERIOD
  311. )
  312. def _clean(self):
  313. self._storage_type = self._DEFAULT_STORAGE_TYPE
  314. self._scope = self._DEFAULT_SCOPE
  315. self._validity_period = self._DEFAULT_VALIDITY_PERIOD
  316. self._properties.clear()
  317. def _to_dict(self):
  318. as_dict = {}
  319. if self._storage_type is not None:
  320. as_dict[self._STORAGE_TYPE_KEY] = self._storage_type
  321. if self._scope is not None:
  322. as_dict[self._SCOPE_KEY] = self._scope
  323. if self._validity_period is not None:
  324. as_dict[self._VALIDITY_PERIOD_KEY] = self._validity_period
  325. as_dict.update(self._properties)
  326. return as_dict
  327. @classmethod
  328. def _from_dict(cls, as_dict: Dict[str, Any], id: str, config: Optional[_Config] = None):
  329. as_dict.pop(cls._ID_KEY, id)
  330. storage_type = as_dict.pop(cls._STORAGE_TYPE_KEY, None)
  331. scope = as_dict.pop(cls._SCOPE_KEY, None)
  332. validity_perid = as_dict.pop(cls._VALIDITY_PERIOD_KEY, None)
  333. return DataNodeConfig(id=id, storage_type=storage_type, scope=scope, validity_period=validity_perid, **as_dict)
  334. def _update(self, as_dict, default_section=None):
  335. self._storage_type = as_dict.pop(self._STORAGE_TYPE_KEY, self._storage_type)
  336. if self._storage_type is None and default_section:
  337. self._storage_type = default_section.storage_type
  338. self._scope = as_dict.pop(self._SCOPE_KEY, self._scope)
  339. if self._scope is None and default_section:
  340. if default_section.scope and self._storage_type == default_section.storage_type:
  341. self._scope = default_section.scope
  342. else:
  343. self._scope = self._DEFAULT_SCOPE
  344. self._validity_period = as_dict.pop(self._VALIDITY_PERIOD_KEY, self._validity_period)
  345. if self._validity_period is None and default_section:
  346. self._validity_period = default_section.validity_period
  347. self._properties.update(as_dict)
  348. if default_section and self._storage_type == default_section.storage_type:
  349. self._properties = {**default_section.properties, **self._properties}
  350. # Assign default value to optional properties if not defined by user
  351. if self._OPTIONAL_PROPERTIES.get(self._storage_type):
  352. for optional_property, default_value in self._OPTIONAL_PROPERTIES[self._storage_type].items():
  353. if default_value is not None and self._properties.get(optional_property) is None:
  354. self._properties[optional_property] = default_value
  355. @staticmethod
  356. def _set_default_configuration(
  357. storage_type: str, scope: Optional[Scope] = None, validity_period: Optional[timedelta] = None, **properties
  358. ) -> "DataNodeConfig":
  359. """Set the default values for data node configurations.
  360. This function creates the _default data node configuration_ object,
  361. where all data node configuration objects will find their default
  362. values when needed.
  363. Parameters:
  364. storage_type (str): The default storage type for all data node configurations.
  365. The possible values are *"pickle"* (the default value), *"csv"*, *"excel"*,
  366. *"sql"*, *"mongo_collection"*, *"in_memory"*, *"json"*, *"parquet"*, *"generic"*,
  367. or *"s3_object"*.
  368. scope (Optional[Scope^]): The default scope for all data node configurations.<br/>
  369. The default value is `Scope.SCENARIO`.
  370. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  371. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  372. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  373. If *validity_period* is set to None, the data node is always up-to-date.
  374. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  375. Returns:
  376. The default data node configuration.
  377. """ # noqa: E501
  378. section = DataNodeConfig(_Config.DEFAULT_KEY, storage_type, scope, validity_period, **properties)
  379. Config._register_default(section)
  380. return Config.sections[DataNodeConfig.name][_Config.DEFAULT_KEY]
  381. @classmethod
  382. def _configure_from(
  383. cls,
  384. source_configuration: "DataNodeConfig",
  385. id: str,
  386. **properties,
  387. ) -> "DataNodeConfig":
  388. """Configure a new data node configuration from an existing one.
  389. Parameters:
  390. source_configuration (DataNodeConfig): The source data node configuration.
  391. id (str): The unique identifier of the new data node configuration.
  392. **properties (dict[str, any]): A keyworded variable length list of additional arguments.<br/>
  393. The default properties are the properties of the source data node configuration.
  394. Returns:
  395. The new data node configuration.
  396. """
  397. scope = properties.pop("scope", None) or source_configuration.scope
  398. validity_period = properties.pop("validity_period", None) or source_configuration.validity_period
  399. properties = {**source_configuration.properties, **properties} # type: ignore
  400. return cls.__configure(id, source_configuration.storage_type, scope, validity_period, **properties)
  401. @classmethod
  402. def _configure(
  403. cls,
  404. id: str,
  405. storage_type: Optional[str] = None,
  406. scope: Optional[Scope] = None,
  407. validity_period: Optional[timedelta] = None,
  408. **properties,
  409. ) -> "DataNodeConfig":
  410. """Configure a new data node configuration.
  411. Parameters:
  412. id (str): The unique identifier of the new data node configuration.
  413. storage_type (Optional[str]): The data node configuration storage type. The possible values
  414. are None (which is the default value of *"pickle"*, unless it has been overloaded by the
  415. *storage_type* value set in the default data node configuration
  416. (see `(Config.)set_default_data_node_configuration()^`)), *"pickle"*, *"csv"*, *"excel"*,
  417. *"sql_table"*, *"sql"*, *"json"*, *"parquet"*, *"mongo_collection"*, *"in_memory"*, or
  418. *"generic"*.
  419. scope (Optional[Scope^]): The scope of the data node configuration.<br/>
  420. The default value is `Scope.SCENARIO` (or the one specified in
  421. `(Config.)set_default_data_node_configuration()^`).
  422. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  423. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  424. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  425. If *validity_period* is set to None, the data node is always up-to-date.
  426. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  427. Returns:
  428. The new data node configuration.
  429. """ # noqa: E501
  430. configuration_map: Dict[str, Callable] = {
  431. cls._STORAGE_TYPE_VALUE_PICKLE: cls._configure_pickle,
  432. cls._STORAGE_TYPE_VALUE_SQL_TABLE: cls._configure_sql_table,
  433. cls._STORAGE_TYPE_VALUE_SQL: cls._configure_sql,
  434. cls._STORAGE_TYPE_VALUE_MONGO_COLLECTION: cls._configure_mongo_collection,
  435. cls._STORAGE_TYPE_VALUE_CSV: cls._configure_csv,
  436. cls._STORAGE_TYPE_VALUE_EXCEL: cls._configure_excel,
  437. cls._STORAGE_TYPE_VALUE_IN_MEMORY: cls._configure_in_memory,
  438. cls._STORAGE_TYPE_VALUE_GENERIC: cls._configure_generic,
  439. cls._STORAGE_TYPE_VALUE_JSON: cls._configure_json,
  440. cls._STORAGE_TYPE_VALUE_PARQUET: cls._configure_parquet,
  441. cls._STORAGE_TYPE_VALUE_S3_OBJECT: cls._configure_s3_object,
  442. }
  443. if storage_type in cls._ALL_STORAGE_TYPES:
  444. return configuration_map[storage_type](id=id, scope=scope, validity_period=validity_period, **properties)
  445. return cls.__configure(id, storage_type, scope, validity_period, **properties)
  446. @classmethod
  447. def _configure_csv(
  448. cls,
  449. id: str,
  450. default_path: Optional[str] = None,
  451. encoding: Optional[str] = None,
  452. has_header: Optional[bool] = None,
  453. exposed_type: Optional[str] = None,
  454. scope: Optional[Scope] = None,
  455. validity_period: Optional[timedelta] = None,
  456. **properties,
  457. ) -> "DataNodeConfig":
  458. """Configure a new CSV data node configuration.
  459. Parameters:
  460. id (str): The unique identifier of the new CSV data node configuration.
  461. default_path (Optional[str]): The default path of the CSV file.
  462. encoding (Optional[str]): The encoding of the CSV file.
  463. has_header (Optional[bool]): If True, indicates that the CSV file has a header.
  464. exposed_type (Optional[str]): The exposed type of the data read from CSV file.<br/>
  465. The default value is `pandas`.
  466. scope (Optional[Scope^]): The scope of the CSV data node configuration.<br/>
  467. The default value is `Scope.SCENARIO`.
  468. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  469. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  470. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  471. If *validity_period* is set to None, the data node is always up-to-date.
  472. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  473. Returns:
  474. The new CSV data node configuration.
  475. """ # noqa: E501
  476. if default_path is not None:
  477. properties[cls._OPTIONAL_DEFAULT_PATH_CSV_PROPERTY] = default_path
  478. if encoding is not None:
  479. properties[cls._OPTIONAL_ENCODING_PROPERTY] = encoding
  480. if has_header is not None:
  481. properties[cls._OPTIONAL_HAS_HEADER_CSV_PROPERTY] = has_header
  482. if exposed_type is not None:
  483. properties[cls._OPTIONAL_EXPOSED_TYPE_CSV_PROPERTY] = exposed_type
  484. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_CSV, scope, validity_period, **properties)
  485. @classmethod
  486. def _configure_json(
  487. cls,
  488. id: str,
  489. default_path: Optional[str] = None,
  490. encoding: Optional[str] = None,
  491. encoder: Optional[json.JSONEncoder] = None,
  492. decoder: Optional[json.JSONDecoder] = None,
  493. scope: Optional[Scope] = None,
  494. validity_period: Optional[timedelta] = None,
  495. **properties,
  496. ) -> "DataNodeConfig":
  497. """Configure a new JSON data node configuration.
  498. Parameters:
  499. id (str): The unique identifier of the new JSON data node configuration.
  500. default_path (Optional[str]): The default path of the JSON file.
  501. encoding (Optional[str]): The encoding of the JSON file.
  502. encoder (Optional[json.JSONEncoder]): The JSON encoder used to write data into the JSON file.
  503. decoder (Optional[json.JSONDecoder]): The JSON decoder used to read data from the JSON file.
  504. scope (Optional[Scope^]): The scope of the JSON data node configuration.<br/>
  505. The default value is `Scope.SCENARIO`.
  506. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  507. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  508. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  509. If *validity_period* is set to None, the data node is always up-to-date.
  510. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  511. Returns:
  512. The new JSON data node configuration.
  513. """ # noqa: E501
  514. if default_path is not None:
  515. properties[cls._OPTIONAL_DEFAULT_PATH_JSON_PROPERTY] = default_path
  516. if encoding is not None:
  517. properties[cls._OPTIONAL_ENCODING_PROPERTY] = encoding
  518. if encoder is not None:
  519. properties[cls._OPTIONAL_ENCODER_JSON_PROPERTY] = encoder
  520. if decoder is not None:
  521. properties[cls._OPTIONAL_DECODER_JSON_PROPERTY] = decoder
  522. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_JSON, scope, validity_period, **properties)
  523. @classmethod
  524. def _configure_parquet(
  525. cls,
  526. id: str,
  527. default_path: Optional[str] = None,
  528. engine: Optional[str] = None,
  529. compression: Optional[str] = None,
  530. read_kwargs: Optional[Dict] = None,
  531. write_kwargs: Optional[Dict] = None,
  532. exposed_type: Optional[str] = None,
  533. scope: Optional[Scope] = None,
  534. validity_period: Optional[timedelta] = None,
  535. **properties,
  536. ) -> "DataNodeConfig":
  537. """Configure a new Parquet data node configuration.
  538. Parameters:
  539. id (str): The unique identifier of the new Parquet data node configuration.
  540. default_path (Optional[str]): The default path of the Parquet file.
  541. engine (Optional[str]): Parquet library to use. Possible values are *"fastparquet"* or
  542. *"pyarrow"*.<br/>
  543. The default value is *"pyarrow"*.
  544. compression (Optional[str]): Name of the compression to use. Possible values are *"snappy"*,
  545. *"gzip"*, *"brotli"*, or *"none"* (no compression). The default value is *"snappy"*.
  546. read_kwargs (Optional[dict]): Additional parameters passed to the `pandas.read_parquet()`
  547. function.
  548. write_kwargs (Optional[dict]): Additional parameters passed to the
  549. `pandas.DataFrame.write_parquet()` function.<br/>
  550. The parameters in *read_kwargs* and *write_kwargs* have a **higher precedence** than the
  551. top-level parameters which are also passed to Pandas.
  552. exposed_type (Optional[str]): The exposed type of the data read from Parquet file.<br/>
  553. The default value is `pandas`.
  554. scope (Optional[Scope^]): The scope of the Parquet data node configuration.<br/>
  555. The default value is `Scope.SCENARIO`.
  556. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  557. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  558. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  559. If *validity_period* is set to None, the data node is always up-to-date.
  560. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  561. Returns:
  562. The new Parquet data node configuration.
  563. """ # noqa: E501
  564. if default_path is not None:
  565. properties[cls._OPTIONAL_DEFAULT_PATH_PARQUET_PROPERTY] = default_path
  566. if engine is not None:
  567. properties[cls._OPTIONAL_ENGINE_PARQUET_PROPERTY] = engine
  568. if compression is not None:
  569. properties[cls._OPTIONAL_COMPRESSION_PARQUET_PROPERTY] = compression
  570. if read_kwargs is not None:
  571. properties[cls._OPTIONAL_READ_KWARGS_PARQUET_PROPERTY] = read_kwargs
  572. if write_kwargs is not None:
  573. properties[cls._OPTIONAL_WRITE_KWARGS_PARQUET_PROPERTY] = write_kwargs
  574. if exposed_type is not None:
  575. properties[cls._OPTIONAL_EXPOSED_TYPE_PARQUET_PROPERTY] = exposed_type
  576. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_PARQUET, scope, validity_period, **properties)
  577. @classmethod
  578. def _configure_excel(
  579. cls,
  580. id: str,
  581. default_path: Optional[str] = None,
  582. has_header: Optional[bool] = None,
  583. sheet_name: Optional[Union[List[str], str]] = None,
  584. exposed_type: Optional[str] = None,
  585. scope: Optional[Scope] = None,
  586. validity_period: Optional[timedelta] = None,
  587. **properties,
  588. ) -> "DataNodeConfig":
  589. """Configure a new Excel data node configuration.
  590. Parameters:
  591. id (str): The unique identifier of the new Excel data node configuration.
  592. default_path (Optional[str]): The path of the Excel file.
  593. has_header (Optional[bool]): If True, indicates that the Excel file has a header.
  594. sheet_name (Optional[Union[List[str], str]]): The list of sheet names to be used.
  595. This can be a unique name.
  596. exposed_type (Optional[str]): The exposed type of the data read from Excel file.<br/>
  597. The default value is `pandas`.
  598. scope (Optional[Scope^]): The scope of the Excel data node configuration.<br/>
  599. The default value is `Scope.SCENARIO`.
  600. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  601. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  602. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  603. If *validity_period* is set to None, the data node is always up-to-date.
  604. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  605. Returns:
  606. The new Excel data node configuration.
  607. """ # noqa: E501
  608. if default_path is not None:
  609. properties[cls._OPTIONAL_DEFAULT_PATH_EXCEL_PROPERTY] = default_path
  610. if has_header is not None:
  611. properties[cls._OPTIONAL_HAS_HEADER_EXCEL_PROPERTY] = has_header
  612. if sheet_name is not None:
  613. properties[cls._OPTIONAL_SHEET_NAME_EXCEL_PROPERTY] = sheet_name
  614. if exposed_type is not None:
  615. properties[cls._OPTIONAL_EXPOSED_TYPE_EXCEL_PROPERTY] = exposed_type
  616. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_EXCEL, scope, validity_period, **properties)
  617. @classmethod
  618. def _configure_generic(
  619. cls,
  620. id: str,
  621. read_fct: Optional[Callable] = None,
  622. write_fct: Optional[Callable] = None,
  623. read_fct_args: Optional[List] = None,
  624. write_fct_args: Optional[List] = None,
  625. scope: Optional[Scope] = None,
  626. validity_period: Optional[timedelta] = None,
  627. **properties,
  628. ) -> "DataNodeConfig":
  629. """Configure a new generic data node configuration.
  630. Parameters:
  631. id (str): The unique identifier of the new generic data node configuration.
  632. read_fct (Optional[Callable]): The Python function called to read the data.
  633. write_fct (Optional[Callable]): The Python function called to write the data.
  634. The provided function must have at least one parameter that receives the data to be written.
  635. read_fct_args (Optional[List]): The list of arguments that are passed to the function
  636. *read_fct* to read data.
  637. write_fct_args (Optional[List]): The list of arguments that are passed to the function
  638. *write_fct* to write the data.
  639. scope (Optional[Scope^]): The scope of the Generic data node configuration.<br/>
  640. The default value is `Scope.SCENARIO`.
  641. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  642. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  643. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  644. If *validity_period* is set to None, the data node is always up-to-date.
  645. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  646. Returns:
  647. The new Generic data node configuration.
  648. """ # noqa: E501
  649. if read_fct is not None:
  650. properties[cls._OPTIONAL_READ_FUNCTION_GENERIC_PROPERTY] = read_fct
  651. if write_fct is not None:
  652. properties[cls._OPTIONAL_WRITE_FUNCTION_GENERIC_PROPERTY] = write_fct
  653. if read_fct_args is not None:
  654. properties[cls._OPTIONAL_READ_FUNCTION_ARGS_GENERIC_PROPERTY] = read_fct_args
  655. if write_fct_args is not None:
  656. properties[cls._OPTIONAL_WRITE_FUNCTION_ARGS_GENERIC_PROPERTY] = write_fct_args
  657. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_GENERIC, scope, validity_period, **properties)
  658. @classmethod
  659. def _configure_in_memory(
  660. cls,
  661. id: str,
  662. default_data: Optional[Any] = None,
  663. scope: Optional[Scope] = None,
  664. validity_period: Optional[timedelta] = None,
  665. **properties,
  666. ) -> "DataNodeConfig":
  667. """Configure a new *in-memory* data node configuration.
  668. Parameters:
  669. id (str): The unique identifier of the new in_memory data node configuration.
  670. default_data (Optional[any]): The default data of the data nodes instantiated from
  671. this in_memory data node configuration.
  672. If provided, note that the default_data will be stored as a configuration attribute.
  673. So it is designed to handle small data values like parameters, and it must be Json serializable.
  674. scope (Optional[Scope^]): The scope of the in_memory data node configuration.<br/>
  675. The default value is `Scope.SCENARIO`.
  676. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  677. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  678. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  679. If *validity_period* is set to None, the data node is always up-to-date.
  680. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  681. Returns:
  682. The new *in-memory* data node configuration.
  683. """ # noqa: E501
  684. if default_data is not None:
  685. properties[cls._OPTIONAL_DEFAULT_DATA_IN_MEMORY_PROPERTY] = default_data
  686. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_IN_MEMORY, scope, validity_period, **properties)
  687. @classmethod
  688. def _configure_pickle(
  689. cls,
  690. id: str,
  691. default_path: Optional[str] = None,
  692. default_data: Optional[Any] = None,
  693. scope: Optional[Scope] = None,
  694. validity_period: Optional[timedelta] = None,
  695. **properties,
  696. ) -> "DataNodeConfig":
  697. """Configure a new pickle data node configuration.
  698. Parameters:
  699. id (str): The unique identifier of the new pickle data node configuration.
  700. default_path (Optional[str]): The path of the pickle file.
  701. default_data (Optional[any]): The default data of the data nodes instantiated from
  702. this pickle data node configuration.
  703. If provided, note that the default_data will be stored as a configuration attribute.
  704. So it is designed to handle small data values like parameters, and it must be Json serializable.
  705. scope (Optional[Scope^]): The scope of the pickle data node configuration.<br/>
  706. The default value is `Scope.SCENARIO`.
  707. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  708. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  709. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  710. If *validity_period* is set to None, the data node is always up-to-date.
  711. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  712. Returns:
  713. The new pickle data node configuration.
  714. """ # noqa: E501
  715. if default_path is not None:
  716. properties[cls._OPTIONAL_DEFAULT_PATH_PICKLE_PROPERTY] = default_path
  717. if default_data is not None:
  718. properties[cls._OPTIONAL_DEFAULT_DATA_PICKLE_PROPERTY] = default_data
  719. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_PICKLE, scope, validity_period, **properties)
  720. @classmethod
  721. def _configure_sql_table(
  722. cls,
  723. id: str,
  724. db_name: str,
  725. db_engine: str,
  726. table_name: str,
  727. db_username: Optional[str] = None,
  728. db_password: Optional[str] = None,
  729. db_host: Optional[str] = None,
  730. db_port: Optional[int] = None,
  731. db_driver: Optional[str] = None,
  732. sqlite_folder_path: Optional[str] = None,
  733. sqlite_file_extension: Optional[str] = None,
  734. db_extra_args: Optional[Dict[str, Any]] = None,
  735. exposed_type: Optional[str] = None,
  736. scope: Optional[Scope] = None,
  737. validity_period: Optional[timedelta] = None,
  738. **properties,
  739. ) -> "DataNodeConfig":
  740. """Configure a new SQL table data node configuration.
  741. Parameters:
  742. id (str): The unique identifier of the new SQL data node configuration.
  743. db_name (str): The database name, or the name of the SQLite database file.
  744. db_engine (str): The database engine. Possible values are *"sqlite"*, *"mssql"*, *"mysql"*,
  745. or *"postgresql"*.
  746. table_name (str): The name of the SQL table.
  747. db_username (Optional[str]): The database username. Required by the *"mssql"*, *"mysql"*, and
  748. *"postgresql"* engines.
  749. db_password (Optional[str]): The database password. Required by the *"mssql"*, *"mysql"*, and
  750. *"postgresql"* engines.
  751. db_host (Optional[str]): The database host.<br/>
  752. The default value is "localhost".
  753. db_port (Optional[int]): The database port.<br/>
  754. The default value is 1433.
  755. db_driver (Optional[str]): The database driver.
  756. sqlite_folder_path (Optional[str]): The path to the folder that contains SQLite file.<br/>
  757. The default value is the current working folder.
  758. sqlite_file_extension (Optional[str]): The file extension of the SQLite file.<br/>
  759. The default value is ".db".
  760. db_extra_args (Optional[dict[str, any]]): A dictionary of additional arguments to be passed
  761. into database connection string.
  762. exposed_type (Optional[str]): The exposed type of the data read from SQL table.<br/>
  763. The default value is "pandas".
  764. scope (Optional[Scope^]): The scope of the SQL data node configuration.<br/>
  765. The default value is `Scope.SCENARIO`.
  766. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  767. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  768. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  769. If *validity_period* is set to None, the data node is always up-to-date.
  770. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  771. Returns:
  772. The new SQL data node configuration.
  773. """ # noqa: E501
  774. properties.update(
  775. {
  776. cls._REQUIRED_DB_NAME_SQL_PROPERTY: db_name,
  777. cls._REQUIRED_DB_ENGINE_SQL_PROPERTY: db_engine,
  778. cls._REQUIRED_TABLE_NAME_SQL_TABLE_PROPERTY: table_name,
  779. }
  780. )
  781. if db_username is not None:
  782. properties[cls._OPTIONAL_DB_USERNAME_SQL_PROPERTY] = db_username
  783. if db_password is not None:
  784. properties[cls._OPTIONAL_DB_PASSWORD_SQL_PROPERTY] = db_password
  785. if db_host is not None:
  786. properties[cls._OPTIONAL_HOST_SQL_PROPERTY] = db_host
  787. if db_port is not None:
  788. properties[cls._OPTIONAL_PORT_SQL_PROPERTY] = db_port
  789. if db_driver is not None:
  790. properties[cls._OPTIONAL_DRIVER_SQL_PROPERTY] = db_driver
  791. if sqlite_folder_path is not None:
  792. properties[cls._OPTIONAL_FOLDER_PATH_SQLITE_PROPERTY] = sqlite_folder_path
  793. if sqlite_file_extension is not None:
  794. properties[cls._OPTIONAL_FILE_EXTENSION_SQLITE_PROPERTY] = sqlite_file_extension
  795. if db_extra_args is not None:
  796. properties[cls._OPTIONAL_DB_EXTRA_ARGS_SQL_PROPERTY] = db_extra_args
  797. if exposed_type is not None:
  798. properties[cls._OPTIONAL_EXPOSED_TYPE_SQL_PROPERTY] = exposed_type
  799. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_SQL_TABLE, scope, validity_period, **properties)
  800. @classmethod
  801. def _configure_sql(
  802. cls,
  803. id: str,
  804. db_name: str,
  805. db_engine: str,
  806. read_query: str,
  807. write_query_builder: Callable,
  808. append_query_builder: Optional[Callable] = None,
  809. db_username: Optional[str] = None,
  810. db_password: Optional[str] = None,
  811. db_host: Optional[str] = None,
  812. db_port: Optional[int] = None,
  813. db_driver: Optional[str] = None,
  814. sqlite_folder_path: Optional[str] = None,
  815. sqlite_file_extension: Optional[str] = None,
  816. db_extra_args: Optional[Dict[str, Any]] = None,
  817. exposed_type: Optional[str] = None,
  818. scope: Optional[Scope] = None,
  819. validity_period: Optional[timedelta] = None,
  820. **properties,
  821. ) -> "DataNodeConfig":
  822. """Configure a new SQL data node configuration.
  823. Parameters:
  824. id (str): The unique identifier of the new SQL data node configuration.
  825. db_name (str): The database name, or the name of the SQLite database file.
  826. db_engine (str): The database engine. Possible values are *"sqlite"*, *"mssql"*, *"mysql"*,
  827. or *"postgresql"*.
  828. read_query (str): The SQL query string used to read the data from the database.
  829. write_query_builder (Callable): A callback function that takes the data as an input parameter
  830. and returns a list of SQL queries to be executed when writing data to the data node.
  831. append_query_builder (Optional[Callable]): A callback function that takes the data as an input parameter
  832. and returns a list of SQL queries to be executed when appending data to the data node.
  833. db_username (Optional[str]): The database username. Required by the *"mssql"*, *"mysql"*, and
  834. *"postgresql"* engines.
  835. db_password (Optional[str]): The database password. Required by the *"mssql"*, *"mysql"*, and
  836. *"postgresql"* engines.
  837. db_host (Optional[str]): The database host.<br/>
  838. The default value is "localhost".
  839. db_port (Optional[int]): The database port.<br/>
  840. The default value is 1433.
  841. db_driver (Optional[str]): The database driver.
  842. sqlite_folder_path (Optional[str]): The path to the folder that contains SQLite file.<br/>
  843. The default value is the current working folder.
  844. sqlite_file_extension (Optional[str]): The file extension of the SQLite file.<br/>
  845. The default value is ".db".
  846. db_extra_args (Optional[dict[str, any]]): A dictionary of additional arguments to be passed
  847. into database connection string.
  848. exposed_type (Optional[str]): The exposed type of the data read from SQL query.<br/>
  849. The default value is "pandas".
  850. scope (Optional[Scope^]): The scope of the SQL data node configuration.<br/>
  851. The default value is `Scope.SCENARIO`.
  852. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  853. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  854. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  855. If *validity_period* is set to None, the data node is always up-to-date.
  856. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  857. Returns:
  858. The new SQL data node configuration.
  859. """ # noqa: E501
  860. properties.update(
  861. {
  862. cls._REQUIRED_DB_NAME_SQL_PROPERTY: db_name,
  863. cls._REQUIRED_DB_ENGINE_SQL_PROPERTY: db_engine,
  864. cls._REQUIRED_READ_QUERY_SQL_PROPERTY: read_query,
  865. cls._REQUIRED_WRITE_QUERY_BUILDER_SQL_PROPERTY: write_query_builder,
  866. }
  867. )
  868. if append_query_builder is not None:
  869. properties[cls._OPTIONAL_APPEND_QUERY_BUILDER_SQL_PROPERTY] = append_query_builder
  870. if db_username is not None:
  871. properties[cls._OPTIONAL_DB_USERNAME_SQL_PROPERTY] = db_username
  872. if db_password is not None:
  873. properties[cls._OPTIONAL_DB_PASSWORD_SQL_PROPERTY] = db_password
  874. if db_host is not None:
  875. properties[cls._OPTIONAL_HOST_SQL_PROPERTY] = db_host
  876. if db_port is not None:
  877. properties[cls._OPTIONAL_PORT_SQL_PROPERTY] = db_port
  878. if db_driver is not None:
  879. properties[cls._OPTIONAL_DRIVER_SQL_PROPERTY] = db_driver
  880. if sqlite_folder_path is not None:
  881. properties[cls._OPTIONAL_FOLDER_PATH_SQLITE_PROPERTY] = sqlite_folder_path
  882. if sqlite_file_extension is not None:
  883. properties[cls._OPTIONAL_FILE_EXTENSION_SQLITE_PROPERTY] = sqlite_file_extension
  884. if db_extra_args is not None:
  885. properties[cls._OPTIONAL_DB_EXTRA_ARGS_SQL_PROPERTY] = db_extra_args
  886. if exposed_type is not None:
  887. properties[cls._OPTIONAL_EXPOSED_TYPE_SQL_PROPERTY] = exposed_type
  888. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_SQL, scope, validity_period, **properties)
  889. @classmethod
  890. def _configure_mongo_collection(
  891. cls,
  892. id: str,
  893. db_name: str,
  894. collection_name: str,
  895. custom_document: Optional[Any] = None,
  896. db_username: Optional[str] = None,
  897. db_password: Optional[str] = None,
  898. db_host: Optional[str] = None,
  899. db_port: Optional[int] = None,
  900. db_driver: Optional[str] = None,
  901. db_extra_args: Optional[Dict[str, Any]] = None,
  902. scope: Optional[Scope] = None,
  903. validity_period: Optional[timedelta] = None,
  904. **properties,
  905. ) -> "DataNodeConfig":
  906. """Configure a new Mongo collection data node configuration.
  907. Parameters:
  908. id (str): The unique identifier of the new Mongo collection data node configuration.
  909. db_name (str): The database name.
  910. collection_name (str): The collection in the database to read from and to write the data to.
  911. custom_document (Optional[any]): The custom document class to store, encode, and decode data
  912. when reading and writing to a Mongo collection. The custom_document can have an optional
  913. *decode()* method to decode data in the Mongo collection to a custom object, and an
  914. optional *encode()*) method to encode the object's properties to the Mongo collection
  915. when writing.
  916. db_username (Optional[str]): The database username.
  917. db_password (Optional[str]): The database password.
  918. db_host (Optional[str]): The database host.<br/>
  919. The default value is "localhost".
  920. db_port (Optional[int]): The database port.<br/>
  921. The default value is 27017.
  922. db_driver (Optional[str]): The database driver.
  923. db_extra_args (Optional[dict[str, any]]): A dictionary of additional arguments to be passed
  924. into database connection string.
  925. scope (Optional[Scope^]): The scope of the Mongo collection data node configuration.<br/>
  926. The default value is `Scope.SCENARIO`.
  927. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  928. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  929. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  930. If *validity_period* is set to None, the data node is always up-to-date.
  931. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  932. Returns:
  933. The new Mongo collection data node configuration.
  934. """ # noqa: E501
  935. properties.update(
  936. {
  937. cls._REQUIRED_DB_NAME_MONGO_PROPERTY: db_name,
  938. cls._REQUIRED_COLLECTION_NAME_MONGO_PROPERTY: collection_name,
  939. }
  940. )
  941. if custom_document is not None:
  942. properties[cls._OPTIONAL_CUSTOM_DOCUMENT_MONGO_PROPERTY] = custom_document
  943. if db_username is not None:
  944. properties[cls._OPTIONAL_USERNAME_MONGO_PROPERTY] = db_username
  945. if db_password is not None:
  946. properties[cls._OPTIONAL_PASSWORD_MONGO_PROPERTY] = db_password
  947. if db_host is not None:
  948. properties[cls._OPTIONAL_HOST_MONGO_PROPERTY] = db_host
  949. if db_port is not None:
  950. properties[cls._OPTIONAL_PORT_MONGO_PROPERTY] = db_port
  951. if db_driver is not None:
  952. properties[cls._OPTIONAL_DRIVER_MONGO_PROPERTY] = db_driver
  953. if db_extra_args is not None:
  954. properties[cls._OPTIONAL_DB_EXTRA_ARGS_MONGO_PROPERTY] = db_extra_args
  955. return cls.__configure(
  956. id, DataNodeConfig._STORAGE_TYPE_VALUE_MONGO_COLLECTION, scope, validity_period, **properties
  957. )
  958. @classmethod
  959. def _configure_s3_object(
  960. cls,
  961. id: str,
  962. aws_access_key: str,
  963. aws_secret_access_key: str,
  964. aws_s3_bucket_name: str,
  965. aws_s3_object_key: str,
  966. aws_region: Optional[str] = None,
  967. aws_s3_object_parameters: Optional[Dict[str, Any]] = None,
  968. scope: Optional[Scope] = None,
  969. validity_period: Optional[timedelta] = None,
  970. **properties,
  971. ) -> "DataNodeConfig":
  972. """Configure a new S3 object data node configuration.
  973. Parameters:
  974. id (str): The unique identifier of the new S3 Object data node configuration.
  975. aws_access_key (str): Amazon Web Services ID for to identify account.
  976. aws_secret_access_key (str): Amazon Web Services access key to authenticate programmatic requests.
  977. aws_s3_bucket_name (str): The bucket in S3 to read from and to write the data to.
  978. aws_region (Optional[str]): Self-contained geographic area where Amazon Web Services (AWS)
  979. infrastructure is located.
  980. aws_s3_object_parameters (Optional[dict[str, any]]): A dictionary of additional arguments to be passed
  981. into AWS S3 bucket access string.
  982. scope (Optional[Scope^]): The scope of the S3 Object data node configuration.<br/>
  983. The default value is `Scope.SCENARIO`.
  984. validity_period (Optional[timedelta]): The duration since the last edit date for which the data node can be
  985. considered up-to-date. Once the validity period has passed, the data node is considered stale and
  986. relevant tasks will run even if they are skippable (see the [Task configs page](../../userman/task-orchestration/scenario-config.md#from-task-configurations) for more details).
  987. If *validity_period* is set to None, the data node is always up-to-date.
  988. **properties (dict[str, any]): A keyworded variable length list of additional arguments.
  989. Returns:
  990. The new S3 object data node configuration.
  991. """ # noqa: E501
  992. properties.update(
  993. {
  994. cls._REQUIRED_AWS_ACCESS_KEY_ID_PROPERTY: aws_access_key,
  995. cls._REQUIRED_AWS_SECRET_ACCESS_KEY_PROPERTY: aws_secret_access_key,
  996. cls._REQUIRED_AWS_STORAGE_BUCKET_NAME_PROPERTY: aws_s3_bucket_name,
  997. cls._REQUIRED_AWS_S3_OBJECT_KEY_PROPERTY: aws_s3_object_key,
  998. }
  999. )
  1000. if aws_region is not None:
  1001. properties[cls._OPTIONAL_AWS_REGION_PROPERTY] = aws_region
  1002. if aws_s3_object_parameters is not None:
  1003. properties[cls._OPTIONAL_AWS_S3_OBJECT_PARAMETERS_PROPERTY] = aws_s3_object_parameters
  1004. return cls.__configure(id, DataNodeConfig._STORAGE_TYPE_VALUE_S3_OBJECT, scope, validity_period, **properties)
  1005. @staticmethod
  1006. def __configure(
  1007. id: str,
  1008. storage_type: Optional[str] = None,
  1009. scope: Optional[Scope] = None,
  1010. validity_period: Optional[timedelta] = None,
  1011. **properties,
  1012. ):
  1013. section = DataNodeConfig(id, storage_type, scope, validity_period, **properties)
  1014. Config._register(section)
  1015. return Config.sections[DataNodeConfig.name][id]