Add circuit breaker for HA connectivity failures
Some checks failed
Release Drafter / update_release_draft (push) Failing after 5s

When Home Assistant becomes unreachable (WiFi dropout, HA restart, etc.),
the module now:
- Detects connectivity errors (ENETUNREACH, timeout, etc.)
- Opens a circuit breaker that stops ALL template evaluations immediately
- Runs a single coalesced health check with exponential backoff (15s→300s)
- Queues pending evaluations and replays them when HA comes back
- Suppresses error log spam while circuit is open

Previously, each of the 11 sections would independently fail + retry,
flooding the event loop with 30+ error callbacks in the same millisecond
and freezing Electron's renderer.

Fixes repeated MagicMirror freezes during transient network outages.
This commit is contained in:
Clawd
2026-02-27 15:16:29 +00:00
parent 8831ee9e13
commit de03a33138

View File

@@ -3,6 +3,12 @@
*
* By Brian Towles
* MIT Licensed.
*
* Resilience improvements by James (2026-02-27):
* - Circuit breaker: stops all template evals when HA is unreachable
* - Exponential backoff on retries (no more 30 simultaneous retry timers)
* - Coalesced health checks instead of per-section retries
* - Graceful degradation: sections show stale data instead of freezing
*/
var backoff = require('backoff')
const NodeHelper = require("node_helper");
@@ -24,6 +30,10 @@ module.exports = NodeHelper.create({
evaluateTemplate,
onWebsocketCloseEvent,
backoffWSConnection,
_getBreaker,
_openCircuit,
_closeCircuit,
_healthCheck,
});
function start() {
@@ -32,6 +42,10 @@ function start() {
this.logger.debug("MMM-HomeAssistantDisplay helper started...");
}
this.connections = {};
// Circuit breaker state: tracks HA reachability per connection
// When open, template evaluations are skipped entirely (no requests, no error spam)
this._circuitBreaker = {}; // identifier -> { state: 'closed'|'open', failCount: 0, nextRetryAt: 0, retryTimer: null }
}
function stop() {
@@ -67,10 +81,11 @@ function socketNotificationReceived(notification, payload) {
this.evaluateTemplate(payload).then((ret) => {
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
}).catch((err) => {
this.logger.error(
"Unable to evaluate template",
err
);
// Only log if circuit is closed (avoid spam when HA is down)
const cb = this._getBreaker(payload.identifier);
if (cb.state === 'closed') {
this.logger.error("Unable to evaluate template", err);
}
});
break;
case "RENDER_SECTION_DISPLAY_TEMPLATE":
@@ -80,19 +95,128 @@ function socketNotificationReceived(notification, payload) {
section: payload.section
});
}).catch((err) => {
this.logger.error(
"unable to evaluate section template",
err
);
const cb = this._getBreaker(payload.identifier);
if (cb.state === 'closed') {
this.logger.error("unable to evaluate section template", err);
}
});
break;
}
}
// Circuit breaker helpers
function _getBreaker(identifier) {
if (!this._circuitBreaker[identifier]) {
this._circuitBreaker[identifier] = {
state: 'closed', // closed = healthy, open = HA unreachable
failCount: 0,
nextRetryAt: 0,
retryTimer: null,
pendingQueue: [], // queued payloads to retry when circuit closes
};
}
return this._circuitBreaker[identifier];
}
function _openCircuit(identifier) {
const breaker = this._getBreaker(identifier);
if (breaker.state === 'open') return; // already open
breaker.state = 'open';
breaker.failCount++;
// Exponential backoff: 15s, 30s, 60s, 120s, max 300s
const delay = Math.min(15000 * Math.pow(2, breaker.failCount - 1), 300000);
breaker.nextRetryAt = Date.now() + delay;
this.logger.info(`Circuit OPEN for ${identifier} — HA unreachable. Next health check in ${delay / 1000}s`);
// Single coalesced health check timer (not per-section!)
if (breaker.retryTimer) clearTimeout(breaker.retryTimer);
breaker.retryTimer = setTimeout(() => {
this._healthCheck(identifier);
}, delay);
}
function _closeCircuit(identifier) {
const breaker = this._getBreaker(identifier);
if (breaker.state === 'closed') return;
this.logger.info(`Circuit CLOSED for ${identifier} — HA is reachable again`);
breaker.state = 'closed';
breaker.failCount = 0;
breaker.nextRetryAt = 0;
if (breaker.retryTimer) {
clearTimeout(breaker.retryTimer);
breaker.retryTimer = null;
}
// Replay queued template evaluations
const queue = breaker.pendingQueue.splice(0);
if (queue.length > 0) {
this.logger.info(`Replaying ${queue.length} queued template evaluations for ${identifier}`);
for (const item of queue) {
this.evaluateTemplate(item.payload).then((ret) => {
if (item.payload.section !== undefined) {
this.sendSocketNotification("SECTION_DISPLAY_RENDERED", {
...ret,
section: item.payload.section
});
} else {
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
}
}).catch(() => {
// Circuit will re-open if this fails
});
}
}
}
async function _healthCheck(identifier) {
const breaker = this._getBreaker(identifier);
const hass = this.connections[identifier] && this.connections[identifier].hass;
if (!hass) return;
this.logger.info(`Health check for ${identifier}...`);
try {
// Simple template render as health probe
await Promise.race([
hass.templates.render("{{ 1 }}"),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Health check timeout')), 10000)
)
]);
// Success — close the circuit
this._closeCircuit(identifier);
} catch (err) {
this.logger.info(`Health check failed for ${identifier}: ${err.message}`);
// Re-arm with increased backoff
breaker.state = 'closed'; // briefly close so _openCircuit fires
this._openCircuit(identifier);
}
}
async function evaluateTemplate(payload) {
if (config.debuglogging) {
this.logger.debug(`Evaluating template for ${payload.template}`);
}
// Circuit breaker check: if HA is known-unreachable, skip entirely
const breaker = this._getBreaker(payload.identifier);
if (breaker.state === 'open') {
// Queue this request for replay when circuit closes (deduplicated by section)
const isDuplicate = breaker.pendingQueue.some(
(item) => item.payload.section === payload.section && item.payload.template === payload.template
);
if (!isDuplicate) {
breaker.pendingQueue.push({ payload });
// Cap queue size to prevent memory growth
if (breaker.pendingQueue.length > 20) {
breaker.pendingQueue.shift();
}
}
throw new Error('Circuit open — HA unreachable, skipping template evaluation');
}
const hass = this.connections[payload.identifier].hass;
try {
@@ -103,32 +227,28 @@ async function evaluateTemplate(payload) {
setTimeout(() => reject(new Error('Template evaluation timeout')), 10000)
)
]);
// Success — ensure circuit is closed (resets fail count)
if (breaker.failCount > 0) {
this._closeCircuit(payload.identifier);
}
return {
identifier: payload.identifier,
render: response
};
} catch (err) {
this.logger.error(`Template evaluation failed: ${err.message}`);
// Check if this is a connectivity error (not a template syntax error)
const isConnectivityError = /ENETUNREACH|ECONNREFUSED|ECONNRESET|ETIMEDOUT|EHOSTUNREACH|timeout/i.test(err.message);
// Schedule retry after 30 seconds
setTimeout(() => {
this.logger.info(`Retrying template evaluation for ${payload.identifier}`);
this.evaluateTemplate(payload).then((ret) => {
// Send appropriate notification based on original request type
if (payload.section !== undefined) {
this.sendSocketNotification("SECTION_DISPLAY_RENDERED", {
...ret,
section: payload.section
});
} else {
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
}
}).catch((retryErr) => {
this.logger.error(`Template evaluation retry also failed: ${retryErr.message}`);
});
}, 30000);
if (isConnectivityError) {
this.logger.error(`Template evaluation failed (connectivity): ${err.message}`);
this._openCircuit(payload.identifier);
} else {
// Template/logic error — log but don't trip the breaker
this.logger.error(`Template evaluation failed: ${err.message}`);
}
// Re-throw to maintain existing error handling behavior
throw err;
}
}