Add circuit breaker for HA connectivity failures
Some checks failed
Release Drafter / update_release_draft (push) Failing after 5s
Some checks failed
Release Drafter / update_release_draft (push) Failing after 5s
When Home Assistant becomes unreachable (WiFi dropout, HA restart, etc.), the module now: - Detects connectivity errors (ENETUNREACH, timeout, etc.) - Opens a circuit breaker that stops ALL template evaluations immediately - Runs a single coalesced health check with exponential backoff (15s→300s) - Queues pending evaluations and replays them when HA comes back - Suppresses error log spam while circuit is open Previously, each of the 11 sections would independently fail + retry, flooding the event loop with 30+ error callbacks in the same millisecond and freezing Electron's renderer. Fixes repeated MagicMirror freezes during transient network outages.
This commit is contained in:
174
node_helper.js
174
node_helper.js
@@ -3,6 +3,12 @@
|
||||
*
|
||||
* By Brian Towles
|
||||
* MIT Licensed.
|
||||
*
|
||||
* Resilience improvements by James (2026-02-27):
|
||||
* - Circuit breaker: stops all template evals when HA is unreachable
|
||||
* - Exponential backoff on retries (no more 30 simultaneous retry timers)
|
||||
* - Coalesced health checks instead of per-section retries
|
||||
* - Graceful degradation: sections show stale data instead of freezing
|
||||
*/
|
||||
var backoff = require('backoff')
|
||||
const NodeHelper = require("node_helper");
|
||||
@@ -24,6 +30,10 @@ module.exports = NodeHelper.create({
|
||||
evaluateTemplate,
|
||||
onWebsocketCloseEvent,
|
||||
backoffWSConnection,
|
||||
_getBreaker,
|
||||
_openCircuit,
|
||||
_closeCircuit,
|
||||
_healthCheck,
|
||||
});
|
||||
|
||||
function start() {
|
||||
@@ -32,6 +42,10 @@ function start() {
|
||||
this.logger.debug("MMM-HomeAssistantDisplay helper started...");
|
||||
}
|
||||
this.connections = {};
|
||||
|
||||
// Circuit breaker state: tracks HA reachability per connection
|
||||
// When open, template evaluations are skipped entirely (no requests, no error spam)
|
||||
this._circuitBreaker = {}; // identifier -> { state: 'closed'|'open', failCount: 0, nextRetryAt: 0, retryTimer: null }
|
||||
}
|
||||
|
||||
function stop() {
|
||||
@@ -67,10 +81,11 @@ function socketNotificationReceived(notification, payload) {
|
||||
this.evaluateTemplate(payload).then((ret) => {
|
||||
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
|
||||
}).catch((err) => {
|
||||
this.logger.error(
|
||||
"Unable to evaluate template",
|
||||
err
|
||||
);
|
||||
// Only log if circuit is closed (avoid spam when HA is down)
|
||||
const cb = this._getBreaker(payload.identifier);
|
||||
if (cb.state === 'closed') {
|
||||
this.logger.error("Unable to evaluate template", err);
|
||||
}
|
||||
});
|
||||
break;
|
||||
case "RENDER_SECTION_DISPLAY_TEMPLATE":
|
||||
@@ -80,19 +95,128 @@ function socketNotificationReceived(notification, payload) {
|
||||
section: payload.section
|
||||
});
|
||||
}).catch((err) => {
|
||||
this.logger.error(
|
||||
"unable to evaluate section template",
|
||||
err
|
||||
);
|
||||
const cb = this._getBreaker(payload.identifier);
|
||||
if (cb.state === 'closed') {
|
||||
this.logger.error("unable to evaluate section template", err);
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Circuit breaker helpers
|
||||
function _getBreaker(identifier) {
|
||||
if (!this._circuitBreaker[identifier]) {
|
||||
this._circuitBreaker[identifier] = {
|
||||
state: 'closed', // closed = healthy, open = HA unreachable
|
||||
failCount: 0,
|
||||
nextRetryAt: 0,
|
||||
retryTimer: null,
|
||||
pendingQueue: [], // queued payloads to retry when circuit closes
|
||||
};
|
||||
}
|
||||
return this._circuitBreaker[identifier];
|
||||
}
|
||||
|
||||
function _openCircuit(identifier) {
|
||||
const breaker = this._getBreaker(identifier);
|
||||
if (breaker.state === 'open') return; // already open
|
||||
|
||||
breaker.state = 'open';
|
||||
breaker.failCount++;
|
||||
// Exponential backoff: 15s, 30s, 60s, 120s, max 300s
|
||||
const delay = Math.min(15000 * Math.pow(2, breaker.failCount - 1), 300000);
|
||||
breaker.nextRetryAt = Date.now() + delay;
|
||||
|
||||
this.logger.info(`Circuit OPEN for ${identifier} — HA unreachable. Next health check in ${delay / 1000}s`);
|
||||
|
||||
// Single coalesced health check timer (not per-section!)
|
||||
if (breaker.retryTimer) clearTimeout(breaker.retryTimer);
|
||||
breaker.retryTimer = setTimeout(() => {
|
||||
this._healthCheck(identifier);
|
||||
}, delay);
|
||||
}
|
||||
|
||||
function _closeCircuit(identifier) {
|
||||
const breaker = this._getBreaker(identifier);
|
||||
if (breaker.state === 'closed') return;
|
||||
|
||||
this.logger.info(`Circuit CLOSED for ${identifier} — HA is reachable again`);
|
||||
breaker.state = 'closed';
|
||||
breaker.failCount = 0;
|
||||
breaker.nextRetryAt = 0;
|
||||
if (breaker.retryTimer) {
|
||||
clearTimeout(breaker.retryTimer);
|
||||
breaker.retryTimer = null;
|
||||
}
|
||||
|
||||
// Replay queued template evaluations
|
||||
const queue = breaker.pendingQueue.splice(0);
|
||||
if (queue.length > 0) {
|
||||
this.logger.info(`Replaying ${queue.length} queued template evaluations for ${identifier}`);
|
||||
for (const item of queue) {
|
||||
this.evaluateTemplate(item.payload).then((ret) => {
|
||||
if (item.payload.section !== undefined) {
|
||||
this.sendSocketNotification("SECTION_DISPLAY_RENDERED", {
|
||||
...ret,
|
||||
section: item.payload.section
|
||||
});
|
||||
} else {
|
||||
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
|
||||
}
|
||||
}).catch(() => {
|
||||
// Circuit will re-open if this fails
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function _healthCheck(identifier) {
|
||||
const breaker = this._getBreaker(identifier);
|
||||
const hass = this.connections[identifier] && this.connections[identifier].hass;
|
||||
if (!hass) return;
|
||||
|
||||
this.logger.info(`Health check for ${identifier}...`);
|
||||
try {
|
||||
// Simple template render as health probe
|
||||
await Promise.race([
|
||||
hass.templates.render("{{ 1 }}"),
|
||||
new Promise((_, reject) =>
|
||||
setTimeout(() => reject(new Error('Health check timeout')), 10000)
|
||||
)
|
||||
]);
|
||||
// Success — close the circuit
|
||||
this._closeCircuit(identifier);
|
||||
} catch (err) {
|
||||
this.logger.info(`Health check failed for ${identifier}: ${err.message}`);
|
||||
// Re-arm with increased backoff
|
||||
breaker.state = 'closed'; // briefly close so _openCircuit fires
|
||||
this._openCircuit(identifier);
|
||||
}
|
||||
}
|
||||
|
||||
async function evaluateTemplate(payload) {
|
||||
if (config.debuglogging) {
|
||||
this.logger.debug(`Evaluating template for ${payload.template}`);
|
||||
}
|
||||
|
||||
// Circuit breaker check: if HA is known-unreachable, skip entirely
|
||||
const breaker = this._getBreaker(payload.identifier);
|
||||
if (breaker.state === 'open') {
|
||||
// Queue this request for replay when circuit closes (deduplicated by section)
|
||||
const isDuplicate = breaker.pendingQueue.some(
|
||||
(item) => item.payload.section === payload.section && item.payload.template === payload.template
|
||||
);
|
||||
if (!isDuplicate) {
|
||||
breaker.pendingQueue.push({ payload });
|
||||
// Cap queue size to prevent memory growth
|
||||
if (breaker.pendingQueue.length > 20) {
|
||||
breaker.pendingQueue.shift();
|
||||
}
|
||||
}
|
||||
throw new Error('Circuit open — HA unreachable, skipping template evaluation');
|
||||
}
|
||||
|
||||
const hass = this.connections[payload.identifier].hass;
|
||||
|
||||
try {
|
||||
@@ -103,32 +227,28 @@ async function evaluateTemplate(payload) {
|
||||
setTimeout(() => reject(new Error('Template evaluation timeout')), 10000)
|
||||
)
|
||||
]);
|
||||
|
||||
// Success — ensure circuit is closed (resets fail count)
|
||||
if (breaker.failCount > 0) {
|
||||
this._closeCircuit(payload.identifier);
|
||||
}
|
||||
|
||||
return {
|
||||
identifier: payload.identifier,
|
||||
render: response
|
||||
};
|
||||
} catch (err) {
|
||||
this.logger.error(`Template evaluation failed: ${err.message}`);
|
||||
// Check if this is a connectivity error (not a template syntax error)
|
||||
const isConnectivityError = /ENETUNREACH|ECONNREFUSED|ECONNRESET|ETIMEDOUT|EHOSTUNREACH|timeout/i.test(err.message);
|
||||
|
||||
// Schedule retry after 30 seconds
|
||||
setTimeout(() => {
|
||||
this.logger.info(`Retrying template evaluation for ${payload.identifier}`);
|
||||
this.evaluateTemplate(payload).then((ret) => {
|
||||
// Send appropriate notification based on original request type
|
||||
if (payload.section !== undefined) {
|
||||
this.sendSocketNotification("SECTION_DISPLAY_RENDERED", {
|
||||
...ret,
|
||||
section: payload.section
|
||||
});
|
||||
} else {
|
||||
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
|
||||
}
|
||||
}).catch((retryErr) => {
|
||||
this.logger.error(`Template evaluation retry also failed: ${retryErr.message}`);
|
||||
});
|
||||
}, 30000);
|
||||
if (isConnectivityError) {
|
||||
this.logger.error(`Template evaluation failed (connectivity): ${err.message}`);
|
||||
this._openCircuit(payload.identifier);
|
||||
} else {
|
||||
// Template/logic error — log but don't trip the breaker
|
||||
this.logger.error(`Template evaluation failed: ${err.message}`);
|
||||
}
|
||||
|
||||
// Re-throw to maintain existing error handling behavior
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user