FileCacheService.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. // METADATA // {"ai-commented":{"service":"xai"}}
  2. /*
  3. * Copyright (C) 2024 Puter Technologies Inc.
  4. *
  5. * This file is part of Puter.
  6. *
  7. * Puter is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published
  9. * by the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  19. */
  20. const { AdvancedBase } = require("@heyputer/putility");
  21. const { FileTracker } = require("./FileTracker");
  22. const { pausing_tee } = require("../../util/streamutil");
  23. /**
  24. * FileCacheService
  25. *
  26. * Initial naive cache implementation which stores whole files on disk.
  27. * It is assumed that files are only accessed by one server at a given time,
  28. * so this will need to be revised when ACL and sharing is implemented.
  29. */
  30. /**
  31. * @class FileCacheService
  32. * @extends AdvancedBase
  33. * @description
  34. * The FileCacheService class manages a cache for file storage and retrieval in the Puter system.
  35. * This service provides functionalities to:
  36. * - Cache files either in memory (precache) or on disk.
  37. * - Track file usage with FileTracker instances to manage cache eviction policies.
  38. * - Ensure files are stored within configured limits for both disk and memory usage.
  39. * - Provide methods for initializing the cache, storing, retrieving, and invalidating cached files.
  40. * - Register commands for managing and inspecting the cache status.
  41. *
  42. * @property {Object} MODULES - Static property containing module dependencies.
  43. * @property {number} disk_limit - The maximum size allowed for disk storage of cached files.
  44. * @property {number} disk_max_size - The maximum size of a file that can be cached on disk.
  45. * @property {number} precache_size - The size limit for memory (precache) storage.
  46. * @property {string} path - The directory path where cached files are stored on disk.
  47. * @property {number} ttl - Time-to-live for cached files, after which they are considered for eviction.
  48. * @property {Map} precache - A Map to hold files in memory.
  49. * @property {Map} uid_to_tracker - A Map to track each file with its FileTracker instance.
  50. */
  51. class FileCacheService extends AdvancedBase {
  52. static MODULES = {
  53. fs: require('fs'),
  54. path_: require('path'),
  55. }
  56. constructor ({ services, my_config }) {
  57. super({ services });
  58. this.log = services.get('log-service').create(this.constructor.name);
  59. this.errors = services.get('error-service').create(this.log);
  60. this.disk_limit = my_config.disk_limit;
  61. this.disk_max_size = my_config.disk_max_size;
  62. this.precache_size = my_config.precache_size;
  63. this.path = my_config.path;
  64. this.ttl = my_config.ttl || (5 * 1000);
  65. this.precache = new Map();
  66. this.uid_to_tracker = new Map();
  67. this.init();
  68. this._register_commands(services.get('commands'));
  69. }
  70. /**
  71. * Retrieves the amount of precache space currently used.
  72. *
  73. * @returns {number} The total size in bytes of files stored in the precache.
  74. */
  75. get _precache_used () {
  76. let used = 0;
  77. // Iterate over file trackers in PHASE_PRECACHE
  78. for (const tracker of this.uid_to_tracker.values()) {
  79. if (tracker.phase !== FileTracker.PHASE_PRECACHE) continue;
  80. used += tracker.size;
  81. }
  82. return used;
  83. }
  84. /**
  85. * Calculates the total disk space used by files in the PHASE_DISK phase.
  86. *
  87. * @returns {number} The total size of all files currently stored on disk.
  88. */
  89. get _disk_used () {
  90. let used = 0;
  91. // Iterate over file trackers in PHASE_DISK
  92. for (const tracker of this.uid_to_tracker.values()) {
  93. if (tracker.phase !== FileTracker.PHASE_DISK) continue;
  94. used += tracker.size;
  95. }
  96. return used;
  97. }
  98. /**
  99. * Initializes the cache by ensuring the storage directory exists.
  100. *
  101. * @async
  102. * @method init
  103. * @returns {Promise<void>} A promise that resolves when the initialization is complete.
  104. * @throws {Error} If there's an error creating the directory.
  105. */
  106. async init () {
  107. const { fs } = this.modules;
  108. // Ensure storage path exists
  109. await fs.promises.mkdir(this.path, { recursive: true });
  110. }
  111. _get_path (uid) {
  112. const { path_ } = this.modules;
  113. return path_.join(this.path, uid);
  114. }
  115. /**
  116. * Get the file path for a given file UID.
  117. *
  118. * @param {string} uid - The unique identifier of the file.
  119. * @returns {string} The full path where the file is stored on disk.
  120. */
  121. async try_get (fsNode, opt_log) {
  122. const tracker = this.uid_to_tracker.get(await fsNode.get('uid'));
  123. if ( ! tracker ) {
  124. return null;
  125. }
  126. if ( tracker.age > this.ttl ) {
  127. await this.invalidate(fsNode);
  128. return null;
  129. }
  130. tracker.touch();
  131. if ( tracker.phase === FileTracker.PHASE_PRECACHE ) {
  132. if ( opt_log ) opt_log.info('obtained from precache');
  133. return this.precache.get(await fsNode.get('uid'));
  134. }
  135. if ( tracker.phase === FileTracker.PHASE_DISK ) {
  136. if ( opt_log ) opt_log.info('obtained from disk');
  137. const { fs } = this.modules;
  138. const path = this._get_path(await fsNode.get('uid'));
  139. try {
  140. const data = await fs.promises.readFile(path);
  141. return data;
  142. } catch ( e ) {
  143. this.errors.report('file_cache:read_error', {
  144. source: e,
  145. trace: true,
  146. alarm: true,
  147. });
  148. }
  149. }
  150. this.errors.report('file_cache:unexpected-cache-state', {
  151. message: `Unexpected cache state: ${tracker.phase?.label}`,
  152. trace: true,
  153. alarm: true,
  154. extra: {
  155. phase: tracker.phase?.label,
  156. }
  157. });
  158. return null;
  159. }
  160. /**
  161. * Attempts to retrieve a cached file.
  162. *
  163. * This method first checks if the file exists in the cache by its UID.
  164. * If found, it verifies the file's age against the TTL (time-to-live).
  165. * If the file is expired, it invalidates the cache entry. Otherwise,
  166. * it returns the cached data or null if not found or invalidated.
  167. *
  168. * @param {Object} fsNode - The file system node representing the file.
  169. * @param {Object} [opt_log] - Optional logging service to log cache hits.
  170. * @returns {Promise<Buffer|null>} - The file data if found, or null.
  171. */
  172. async maybe_store (fsNode, stream) {
  173. const size = await fsNode.get('size');
  174. // If the file is too big, don't cache it
  175. if (size > this.disk_max_size) {
  176. return { cached: false };
  177. }
  178. const key = await fsNode.get('uid');
  179. // If the file is already cached, don't cache it again
  180. if (this.uid_to_tracker.has(key)) {
  181. return { cached: true };
  182. }
  183. // Add file tracker
  184. const tracker = new FileTracker({ key, size });
  185. this.uid_to_tracker.set(key, tracker);
  186. tracker.touch();
  187. // Store binary data in memory (precache)
  188. const data = Buffer.alloc(size);
  189. const [replace_stream, store_stream] = pausing_tee(stream, 2);
  190. (async () => {
  191. let offset = 0;
  192. for await (const chunk of store_stream) {
  193. chunk.copy(data, offset);
  194. offset += chunk.length;
  195. }
  196. await this._precache_make_room(size);
  197. this.precache.set(key, data);
  198. tracker.phase = FileTracker.PHASE_PRECACHE;
  199. })()
  200. return { cached: true, stream: replace_stream };
  201. }
  202. /**
  203. * Invalidates a file from the cache.
  204. *
  205. * @param {FsNode} fsNode - The file system node to invalidate.
  206. * @returns {Promise<void>} A promise that resolves when the file has been invalidated.
  207. *
  208. * @description
  209. * This method checks if the given file is in the cache, and if so, removes it from both
  210. * the precache and disk storage, ensuring that any references to this file are cleaned up.
  211. * If the file is not found in the cache, the method does nothing.
  212. */
  213. async invalidate (fsNode) {
  214. const key = await fsNode.get('uid');
  215. if ( ! this.uid_to_tracker.has(key) ) return;
  216. const tracker = this.uid_to_tracker.get(key);
  217. if ( tracker.phase === FileTracker.PHASE_PRECACHE ) {
  218. this.precache.delete(key);
  219. }
  220. if ( tracker.phase === FileTracker.PHASE_DISK ) {
  221. await this._disk_evict(tracker);
  222. }
  223. this.uid_to_tracker.delete(key);
  224. }
  225. /**
  226. * Invalidates a file from the cache.
  227. *
  228. * @param {Object} fsNode - The file system node representing the file to invalidate.
  229. * @returns {Promise<void>} A promise that resolves when the file has been invalidated from both precache and disk.
  230. *
  231. * @note This method removes the file's tracker from the cache, deletes the file from precache if present,
  232. * and ensures the file is evicted from disk storage if it exists there.
  233. */
  234. async _precache_make_room (size) {
  235. if (this._precache_used + size > this.precache_size) {
  236. await this._precache_evict(
  237. this._precache_used + size - this.precache_size
  238. );
  239. }
  240. }
  241. /**
  242. * Evicts files from precache to make room for new files.
  243. * This method sorts all trackers by score and evicts the lowest scoring
  244. * files in precache phase until the specified capacity is freed.
  245. *
  246. * @param {number} capacity_needed - The amount of capacity (in bytes) that needs to be freed in precache.
  247. */
  248. async _precache_evict (capacity_needed) {
  249. // Sort by score from tracker
  250. const sorted = Array.from(this.uid_to_tracker.values())
  251. .sort((a, b) => b.score - a.score);
  252. let capacity = 0;
  253. for (const tracker of sorted) {
  254. if (tracker.phase !== FileTracker.PHASE_PRECACHE) continue;
  255. capacity += tracker.size;
  256. await this._maybe_promote_to_disk(tracker);
  257. if (capacity >= capacity_needed) break;
  258. }
  259. }
  260. /**
  261. * Evicts files from the precache to make room for new files.
  262. *
  263. * @param {number} capacity_needed - The amount of space needed to be freed in bytes.
  264. *
  265. * @description
  266. * This method sorts all cached files by their score in descending order,
  267. * then iterates through them to evict files from the precache to disk
  268. * until the required capacity is met. If a file is already on disk, it is skipped.
  269. */
  270. async _maybe_promote_to_disk (tracker) {
  271. if (tracker.phase !== FileTracker.PHASE_PRECACHE) return;
  272. // It's important to check that the score of this file is
  273. // higher than the combined score of the N files that
  274. // would be evicted to make room for it.
  275. const sorted = Array.from(this.uid_to_tracker.values())
  276. .sort((a, b) => b.score - a.score);
  277. let capacity = 0;
  278. let score_needed = 0;
  279. const capacity_needed = this._disk_used + tracker.size - this.disk_limit;
  280. for (const tracker of sorted) {
  281. if (tracker.phase !== FileTracker.PHASE_DISK) continue;
  282. capacity += tracker.size;
  283. score_needed += tracker.score;
  284. if (capacity >= capacity_needed) break;
  285. }
  286. if (tracker.score < score_needed) return;
  287. // Now we can remove the lowest scoring files
  288. // to make room for this file.
  289. capacity = 0;
  290. for (const tracker of sorted) {
  291. if (tracker.phase !== FileTracker.PHASE_DISK) continue;
  292. capacity += tracker.size;
  293. await this._disk_evict(tracker);
  294. if (capacity >= capacity_needed) break;
  295. }
  296. const { fs } = this.modules;
  297. const path = this._get_path(tracker.key);
  298. console.log(`precache fetch key`, tracker.key);
  299. const data = this.precache.get(tracker.key);
  300. await fs.promises.writeFile(path, data);
  301. this.precache.delete(tracker.key);
  302. tracker.phase = FileTracker.PHASE_DISK;
  303. }
  304. /**
  305. * Evicts a file from disk cache.
  306. *
  307. * @param {FileTracker} tracker - The FileTracker instance representing the file to be evicted.
  308. * @returns {Promise<void>} A promise that resolves when the file is evicted or if the tracker is not in the disk phase.
  309. *
  310. * @note This method ensures that the file is removed from the disk cache and the tracker's phase is updated to GONE.
  311. */
  312. async _disk_evict (tracker) {
  313. if (tracker.phase !== FileTracker.PHASE_DISK) return;
  314. const { fs } = this.modules;
  315. const path = this._get_path(tracker.key);
  316. await fs.promises.unlink(path);
  317. tracker.phase = FileTracker.PHASE_GONE;
  318. this.uid_to_tracker.delete(tracker.key);
  319. }
  320. _register_commands (commands) {
  321. commands.registerCommands('fsc', [
  322. {
  323. id: 'status',
  324. handler: async (args, log) => {
  325. const status = {
  326. precache: {
  327. used: this._precache_used,
  328. max: this.precache_size,
  329. },
  330. disk: {
  331. used: this._disk_used,
  332. max: this.disk_limit,
  333. },
  334. };
  335. log.log(JSON.stringify(status, null, 2));
  336. }
  337. }
  338. ]);
  339. }
  340. }
  341. module.exports = {
  342. FileCacheService
  343. };