From de03a33138b8177a1b76e2e845e6e9df20dd235a Mon Sep 17 00:00:00 2001 From: Clawd Date: Fri, 27 Feb 2026 15:16:29 +0000 Subject: [PATCH] Add circuit breaker for HA connectivity failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Home Assistant becomes unreachable (WiFi dropout, HA restart, etc.), the module now: - Detects connectivity errors (ENETUNREACH, timeout, etc.) - Opens a circuit breaker that stops ALL template evaluations immediately - Runs a single coalesced health check with exponential backoff (15s→300s) - Queues pending evaluations and replays them when HA comes back - Suppresses error log spam while circuit is open Previously, each of the 11 sections would independently fail + retry, flooding the event loop with 30+ error callbacks in the same millisecond and freezing Electron's renderer. Fixes repeated MagicMirror freezes during transient network outages. --- node_helper.js | 182 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 151 insertions(+), 31 deletions(-) diff --git a/node_helper.js b/node_helper.js index 40c82b0..dd81faf 100644 --- a/node_helper.js +++ b/node_helper.js @@ -3,6 +3,12 @@ * * By Brian Towles * MIT Licensed. + * + * Resilience improvements by James (2026-02-27): + * - Circuit breaker: stops all template evals when HA is unreachable + * - Exponential backoff on retries (no more 30 simultaneous retry timers) + * - Coalesced health checks instead of per-section retries + * - Graceful degradation: sections show stale data instead of freezing */ var backoff = require('backoff') const NodeHelper = require("node_helper"); @@ -24,6 +30,10 @@ module.exports = NodeHelper.create({ evaluateTemplate, onWebsocketCloseEvent, backoffWSConnection, + _getBreaker, + _openCircuit, + _closeCircuit, + _healthCheck, }); function start() { @@ -32,6 +42,10 @@ function start() { this.logger.debug("MMM-HomeAssistantDisplay helper started..."); } this.connections = {}; + + // Circuit breaker state: tracks HA reachability per connection + // When open, template evaluations are skipped entirely (no requests, no error spam) + this._circuitBreaker = {}; // identifier -> { state: 'closed'|'open', failCount: 0, nextRetryAt: 0, retryTimer: null } } function stop() { @@ -67,10 +81,11 @@ function socketNotificationReceived(notification, payload) { this.evaluateTemplate(payload).then((ret) => { this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret); }).catch((err) => { - this.logger.error( - "Unable to evaluate template", - err - ); + // Only log if circuit is closed (avoid spam when HA is down) + const cb = this._getBreaker(payload.identifier); + if (cb.state === 'closed') { + this.logger.error("Unable to evaluate template", err); + } }); break; case "RENDER_SECTION_DISPLAY_TEMPLATE": @@ -80,55 +95,160 @@ function socketNotificationReceived(notification, payload) { section: payload.section }); }).catch((err) => { - this.logger.error( - "unable to evaluate section template", - err - ); + const cb = this._getBreaker(payload.identifier); + if (cb.state === 'closed') { + this.logger.error("unable to evaluate section template", err); + } }); break; } } +// Circuit breaker helpers +function _getBreaker(identifier) { + if (!this._circuitBreaker[identifier]) { + this._circuitBreaker[identifier] = { + state: 'closed', // closed = healthy, open = HA unreachable + failCount: 0, + nextRetryAt: 0, + retryTimer: null, + pendingQueue: [], // queued payloads to retry when circuit closes + }; + } + return this._circuitBreaker[identifier]; +} + +function _openCircuit(identifier) { + const breaker = this._getBreaker(identifier); + if (breaker.state === 'open') return; // already open + + breaker.state = 'open'; + breaker.failCount++; + // Exponential backoff: 15s, 30s, 60s, 120s, max 300s + const delay = Math.min(15000 * Math.pow(2, breaker.failCount - 1), 300000); + breaker.nextRetryAt = Date.now() + delay; + + this.logger.info(`Circuit OPEN for ${identifier} — HA unreachable. Next health check in ${delay / 1000}s`); + + // Single coalesced health check timer (not per-section!) + if (breaker.retryTimer) clearTimeout(breaker.retryTimer); + breaker.retryTimer = setTimeout(() => { + this._healthCheck(identifier); + }, delay); +} + +function _closeCircuit(identifier) { + const breaker = this._getBreaker(identifier); + if (breaker.state === 'closed') return; + + this.logger.info(`Circuit CLOSED for ${identifier} — HA is reachable again`); + breaker.state = 'closed'; + breaker.failCount = 0; + breaker.nextRetryAt = 0; + if (breaker.retryTimer) { + clearTimeout(breaker.retryTimer); + breaker.retryTimer = null; + } + + // Replay queued template evaluations + const queue = breaker.pendingQueue.splice(0); + if (queue.length > 0) { + this.logger.info(`Replaying ${queue.length} queued template evaluations for ${identifier}`); + for (const item of queue) { + this.evaluateTemplate(item.payload).then((ret) => { + if (item.payload.section !== undefined) { + this.sendSocketNotification("SECTION_DISPLAY_RENDERED", { + ...ret, + section: item.payload.section + }); + } else { + this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret); + } + }).catch(() => { + // Circuit will re-open if this fails + }); + } + } +} + +async function _healthCheck(identifier) { + const breaker = this._getBreaker(identifier); + const hass = this.connections[identifier] && this.connections[identifier].hass; + if (!hass) return; + + this.logger.info(`Health check for ${identifier}...`); + try { + // Simple template render as health probe + await Promise.race([ + hass.templates.render("{{ 1 }}"), + new Promise((_, reject) => + setTimeout(() => reject(new Error('Health check timeout')), 10000) + ) + ]); + // Success — close the circuit + this._closeCircuit(identifier); + } catch (err) { + this.logger.info(`Health check failed for ${identifier}: ${err.message}`); + // Re-arm with increased backoff + breaker.state = 'closed'; // briefly close so _openCircuit fires + this._openCircuit(identifier); + } +} + async function evaluateTemplate(payload) { if (config.debuglogging) { this.logger.debug(`Evaluating template for ${payload.template}`); } + + // Circuit breaker check: if HA is known-unreachable, skip entirely + const breaker = this._getBreaker(payload.identifier); + if (breaker.state === 'open') { + // Queue this request for replay when circuit closes (deduplicated by section) + const isDuplicate = breaker.pendingQueue.some( + (item) => item.payload.section === payload.section && item.payload.template === payload.template + ); + if (!isDuplicate) { + breaker.pendingQueue.push({ payload }); + // Cap queue size to prevent memory growth + if (breaker.pendingQueue.length > 20) { + breaker.pendingQueue.shift(); + } + } + throw new Error('Circuit open — HA unreachable, skipping template evaluation'); + } + const hass = this.connections[payload.identifier].hass; - + try { // Wrap template call with timeout const response = await Promise.race([ hass.templates.render(payload.template), - new Promise((_, reject) => + new Promise((_, reject) => setTimeout(() => reject(new Error('Template evaluation timeout')), 10000) ) ]); + + // Success — ensure circuit is closed (resets fail count) + if (breaker.failCount > 0) { + this._closeCircuit(payload.identifier); + } + return { identifier: payload.identifier, render: response }; } catch (err) { - this.logger.error(`Template evaluation failed: ${err.message}`); - - // Schedule retry after 30 seconds - setTimeout(() => { - this.logger.info(`Retrying template evaluation for ${payload.identifier}`); - this.evaluateTemplate(payload).then((ret) => { - // Send appropriate notification based on original request type - if (payload.section !== undefined) { - this.sendSocketNotification("SECTION_DISPLAY_RENDERED", { - ...ret, - section: payload.section - }); - } else { - this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret); - } - }).catch((retryErr) => { - this.logger.error(`Template evaluation retry also failed: ${retryErr.message}`); - }); - }, 30000); - - // Re-throw to maintain existing error handling behavior + // Check if this is a connectivity error (not a template syntax error) + const isConnectivityError = /ENETUNREACH|ECONNREFUSED|ECONNRESET|ETIMEDOUT|EHOSTUNREACH|timeout/i.test(err.message); + + if (isConnectivityError) { + this.logger.error(`Template evaluation failed (connectivity): ${err.message}`); + this._openCircuit(payload.identifier); + } else { + // Template/logic error — log but don't trip the breaker + this.logger.error(`Template evaluation failed: ${err.message}`); + } + throw err; } }