1
0

advanced_large_datasets.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. # Copyright 2021-2024 Avaiga Private Limited
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4. # the License. You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9. # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
  10. # specific language governing permissions and limitations under the License.
  11. # -----------------------------------------------------------------------------------------
  12. # To execute this script, make sure that the taipy-gui package is installed in your
  13. # Python environment and run:
  14. # python <script>
  15. # -----------------------------------------------------------------------------------------
  16. from enum import Enum
  17. import numpy as np
  18. import pandas as pd
  19. from taipy.gui import Gui
  20. from taipy.gui.data.decimator import MinMaxDecimator
  21. # Processing techniques
  22. class Processing(Enum):
  23. # Don't process the data
  24. NONE = 0
  25. # Sub-sample the dataset
  26. SUB_SAMPLING = 1
  27. # Compute local average in the dataset
  28. AVERAGE = 2
  29. # Use decimation
  30. DECIMATION = 3
  31. # Choose a processing technique
  32. processing = Processing.DECIMATION
  33. # Generate a random dataset
  34. # Compute the 'X' data
  35. # Generate 50000 x values (a sequence of integers)
  36. x_values = np.linspace(1, 100, 50000)
  37. # Compute the 'Y' data
  38. # Define the combined log-sine function
  39. y_values = np.log(x_values) * np.sin(x_values / 5)
  40. # Introduce some noise
  41. # Create a mask with a True value with a 1 % probability
  42. noise_mask = np.random.rand(*y_values.shape) < 0.01
  43. # The noise values
  44. noise_values = np.random.uniform(-0.5, 0.5, size=np.sum(noise_mask))
  45. # Add the noise to the 'Y' values
  46. y_values_noise = np.copy(y_values) # Copy original array
  47. y_values_noise[noise_mask] += noise_values
  48. # Use no decimator by default
  49. decimator = None
  50. # Transform the 'Y' dataset with the chosen processing technique
  51. if processing == Processing.SUB_SAMPLING:
  52. # Pick one every 100 data points
  53. x_values = x_values[::100]
  54. y_values_noise = y_values_noise[::100]
  55. elif processing == Processing.AVERAGE:
  56. # Average of 100 successive values
  57. x_values = x_values[::100]
  58. y_values_noise = y_values_noise.reshape(-1, 100)
  59. # Compute the average of each group of 100 values
  60. y_values_noise = np.mean(y_values_noise, axis=1)
  61. elif processing == Processing.DECIMATION:
  62. # Use Taipy's decimation processing
  63. decimator = MinMaxDecimator(200)
  64. print(f"Using {str(processing)}: Dataset has {y_values_noise.size} data points") # noqa: F401, T201
  65. # Create the DataFrame
  66. data = pd.DataFrame({"X": x_values, "Y": y_values_noise})
  67. page = "<|{data}|chart|x=X|y=Y|mode=lines|decimator=decimator|>"
  68. if __name__ == "__main__":
  69. Gui(page).run(title="Chart - Advanced - Large datasets")