Sfoglia il codice sorgente

feat: add /healthcheck endpoint

KernelDeimos 1 anno fa
parent
commit
c166560ff4

+ 32 - 0
packages/backend/src/routers/healthcheck.js

@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2024 Puter Technologies Inc.
+ *
+ * This file is part of Puter.
+ *
+ * Puter is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+"use strict"
+const express = require('express');
+const router = new express.Router();
+
+// -----------------------------------------------------------------------// 
+// GET /healthcheck
+// -----------------------------------------------------------------------//
+router.get('/healthcheck', async (req, res) => {
+    const svc_serverHealth = req.services.get('server-health');
+
+    const status = await svc_serverHealth.get_status();
+    res.json(status);
+})
+module.exports = router

+ 1 - 0
packages/backend/src/services/PuterAPIService.js

@@ -71,6 +71,7 @@ class PuterAPIService extends BaseService {
         app.use(require('../routers/sites'))
         // app.use(require('../routers/filesystem_api/stat'))
         app.use(require('../routers/suggest_apps'))
+        app.use(require('../routers/healthcheck'))
         app.use(require('../routers/test'))
         app.use(require('../routers/update-taskbar-items'))
         require('../routers/whoami')(app);

+ 11 - 0
packages/backend/src/services/database/SqliteDatabaseAccessService.js

@@ -126,6 +126,17 @@ class SqliteDatabaseAccessService extends BaseDatabaseAccessService {
                 svc_devConsole.add_widget(this.database_update_notice);
             })();
         }
+
+        const svc_serverHealth = this.services.get('server-health');
+
+        svc_serverHealth.add_check('sqlite', async () => {
+            const [{ user_version }] = await this._requireRead('PRAGMA user_version');
+            if ( user_version !== TARGET_VERSION ) {
+                throw new Error(
+                    `Database version mismatch: expected ${TARGET_VERSION}, ` +
+                    `got ${user_version}`);
+            }
+        });
     }
 
     async _read (query, params = []) {

+ 57 - 6
packages/backend/src/services/runtime-analysis/ServerHealthService.js

@@ -19,14 +19,18 @@
 const BaseService = require("../BaseService");
 const { SECOND } = require("../../util/time");
 const { parse_meminfo } = require("../../util/linux");
-const { asyncSafeSetInterval } = require("../../util/promise");
+const { asyncSafeSetInterval, TeePromise } = require("../../util/promise");
 
 class ServerHealthService extends BaseService {
     static MODULES = {
         fs: require('fs'),
     }
+    _construct () {
+        this.checks_ = [];
+        this.failures_ = [];
+    }
     async _init () {
-        const ram_poll_interval = 10 * SECOND;
+        this.init_service_checks_();
 
         /*
             There's an interesting thread here:
@@ -53,7 +57,7 @@ class ServerHealthService extends BaseService {
             return;
         }
 
-        asyncSafeSetInterval(async () => {
+        this.add_check('ram-usage', async () => {
             const meminfo_text = await this.modules.fs.promises.readFile(
                 '/proc/meminfo', 'utf8'
             );
@@ -69,11 +73,46 @@ class ServerHealthService extends BaseService {
             if ( meminfo.MemAvailable < min_available_KiB ) {
                 svc_alarm.create('low-available-memory', 'Low available memory', alarm_fields);
             }
-        }, ram_poll_interval, null,{
+        });
+    }
+
+    init_service_checks_ () {
+        const svc_alarm = this.services.get('alarm');
+        asyncSafeSetInterval(async () => {
+            const check_failures = [];
+            for ( const { name, fn } of this.checks_ ) {
+                const p_timeout = new TeePromise();
+                const timeout = setTimeout(() => {
+                    p_timeout.reject(new Error('Health check timed out'));
+                }, 5 * SECOND);
+                try {
+                    await Promise.race([
+                        fn(),
+                        p_timeout,
+                    ]);
+                    clearTimeout(timeout);
+                } catch ( err ) {
+                    // Trigger an alarm if this check isn't already in the failure list
+                    
+                    if ( this.failures_.some(v => v.name === name) ) {
+                        return;
+                    }
+
+                    svc_alarm.create(
+                        'health-check-failure',
+                        `Health check ${name} failed`,
+                        { error: err }
+                    );
+                    check_failures.push({ name });
+                }
+            }
+
+            this.failures_ = check_failures;
+        }, 10 * SECOND, null, {
             onBehindSchedule: (drift) => {
                 svc_alarm.create(
-                    'ram-usage-poll-behind-schedule',
-                    'RAM usage poll is behind schedule',
+                    'health-checks-behind-schedule',
+                    'Health checks are behind schedule',
                     { drift }
                 );
             }
@@ -83,6 +122,18 @@ class ServerHealthService extends BaseService {
     async get_stats () {
         return { ...this.stats_ };
     }
+
+    add_check (name, fn) {
+        this.checks_.push({ name, fn });
+    }
+
+    get_status () {
+        const failures = this.failures_.map(v => v.name);
+        return {
+            ok: failures.length === 0,
+            ...(failures.length ? { failed: failures } : {}),
+        };
+    }
 }
 
 module.exports = { ServerHealthService };