Add circuit breaker for HA connectivity failures
Some checks failed
Release Drafter / update_release_draft (push) Failing after 5s
Some checks failed
Release Drafter / update_release_draft (push) Failing after 5s
When Home Assistant becomes unreachable (WiFi dropout, HA restart, etc.), the module now: - Detects connectivity errors (ENETUNREACH, timeout, etc.) - Opens a circuit breaker that stops ALL template evaluations immediately - Runs a single coalesced health check with exponential backoff (15s→300s) - Queues pending evaluations and replays them when HA comes back - Suppresses error log spam while circuit is open Previously, each of the 11 sections would independently fail + retry, flooding the event loop with 30+ error callbacks in the same millisecond and freezing Electron's renderer. Fixes repeated MagicMirror freezes during transient network outages.
This commit is contained in:
174
node_helper.js
174
node_helper.js
@@ -3,6 +3,12 @@
|
|||||||
*
|
*
|
||||||
* By Brian Towles
|
* By Brian Towles
|
||||||
* MIT Licensed.
|
* MIT Licensed.
|
||||||
|
*
|
||||||
|
* Resilience improvements by James (2026-02-27):
|
||||||
|
* - Circuit breaker: stops all template evals when HA is unreachable
|
||||||
|
* - Exponential backoff on retries (no more 30 simultaneous retry timers)
|
||||||
|
* - Coalesced health checks instead of per-section retries
|
||||||
|
* - Graceful degradation: sections show stale data instead of freezing
|
||||||
*/
|
*/
|
||||||
var backoff = require('backoff')
|
var backoff = require('backoff')
|
||||||
const NodeHelper = require("node_helper");
|
const NodeHelper = require("node_helper");
|
||||||
@@ -24,6 +30,10 @@ module.exports = NodeHelper.create({
|
|||||||
evaluateTemplate,
|
evaluateTemplate,
|
||||||
onWebsocketCloseEvent,
|
onWebsocketCloseEvent,
|
||||||
backoffWSConnection,
|
backoffWSConnection,
|
||||||
|
_getBreaker,
|
||||||
|
_openCircuit,
|
||||||
|
_closeCircuit,
|
||||||
|
_healthCheck,
|
||||||
});
|
});
|
||||||
|
|
||||||
function start() {
|
function start() {
|
||||||
@@ -32,6 +42,10 @@ function start() {
|
|||||||
this.logger.debug("MMM-HomeAssistantDisplay helper started...");
|
this.logger.debug("MMM-HomeAssistantDisplay helper started...");
|
||||||
}
|
}
|
||||||
this.connections = {};
|
this.connections = {};
|
||||||
|
|
||||||
|
// Circuit breaker state: tracks HA reachability per connection
|
||||||
|
// When open, template evaluations are skipped entirely (no requests, no error spam)
|
||||||
|
this._circuitBreaker = {}; // identifier -> { state: 'closed'|'open', failCount: 0, nextRetryAt: 0, retryTimer: null }
|
||||||
}
|
}
|
||||||
|
|
||||||
function stop() {
|
function stop() {
|
||||||
@@ -67,10 +81,11 @@ function socketNotificationReceived(notification, payload) {
|
|||||||
this.evaluateTemplate(payload).then((ret) => {
|
this.evaluateTemplate(payload).then((ret) => {
|
||||||
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
|
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
|
||||||
}).catch((err) => {
|
}).catch((err) => {
|
||||||
this.logger.error(
|
// Only log if circuit is closed (avoid spam when HA is down)
|
||||||
"Unable to evaluate template",
|
const cb = this._getBreaker(payload.identifier);
|
||||||
err
|
if (cb.state === 'closed') {
|
||||||
);
|
this.logger.error("Unable to evaluate template", err);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
case "RENDER_SECTION_DISPLAY_TEMPLATE":
|
case "RENDER_SECTION_DISPLAY_TEMPLATE":
|
||||||
@@ -80,19 +95,128 @@ function socketNotificationReceived(notification, payload) {
|
|||||||
section: payload.section
|
section: payload.section
|
||||||
});
|
});
|
||||||
}).catch((err) => {
|
}).catch((err) => {
|
||||||
this.logger.error(
|
const cb = this._getBreaker(payload.identifier);
|
||||||
"unable to evaluate section template",
|
if (cb.state === 'closed') {
|
||||||
err
|
this.logger.error("unable to evaluate section template", err);
|
||||||
);
|
}
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Circuit breaker helpers
|
||||||
|
function _getBreaker(identifier) {
|
||||||
|
if (!this._circuitBreaker[identifier]) {
|
||||||
|
this._circuitBreaker[identifier] = {
|
||||||
|
state: 'closed', // closed = healthy, open = HA unreachable
|
||||||
|
failCount: 0,
|
||||||
|
nextRetryAt: 0,
|
||||||
|
retryTimer: null,
|
||||||
|
pendingQueue: [], // queued payloads to retry when circuit closes
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return this._circuitBreaker[identifier];
|
||||||
|
}
|
||||||
|
|
||||||
|
function _openCircuit(identifier) {
|
||||||
|
const breaker = this._getBreaker(identifier);
|
||||||
|
if (breaker.state === 'open') return; // already open
|
||||||
|
|
||||||
|
breaker.state = 'open';
|
||||||
|
breaker.failCount++;
|
||||||
|
// Exponential backoff: 15s, 30s, 60s, 120s, max 300s
|
||||||
|
const delay = Math.min(15000 * Math.pow(2, breaker.failCount - 1), 300000);
|
||||||
|
breaker.nextRetryAt = Date.now() + delay;
|
||||||
|
|
||||||
|
this.logger.info(`Circuit OPEN for ${identifier} — HA unreachable. Next health check in ${delay / 1000}s`);
|
||||||
|
|
||||||
|
// Single coalesced health check timer (not per-section!)
|
||||||
|
if (breaker.retryTimer) clearTimeout(breaker.retryTimer);
|
||||||
|
breaker.retryTimer = setTimeout(() => {
|
||||||
|
this._healthCheck(identifier);
|
||||||
|
}, delay);
|
||||||
|
}
|
||||||
|
|
||||||
|
function _closeCircuit(identifier) {
|
||||||
|
const breaker = this._getBreaker(identifier);
|
||||||
|
if (breaker.state === 'closed') return;
|
||||||
|
|
||||||
|
this.logger.info(`Circuit CLOSED for ${identifier} — HA is reachable again`);
|
||||||
|
breaker.state = 'closed';
|
||||||
|
breaker.failCount = 0;
|
||||||
|
breaker.nextRetryAt = 0;
|
||||||
|
if (breaker.retryTimer) {
|
||||||
|
clearTimeout(breaker.retryTimer);
|
||||||
|
breaker.retryTimer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replay queued template evaluations
|
||||||
|
const queue = breaker.pendingQueue.splice(0);
|
||||||
|
if (queue.length > 0) {
|
||||||
|
this.logger.info(`Replaying ${queue.length} queued template evaluations for ${identifier}`);
|
||||||
|
for (const item of queue) {
|
||||||
|
this.evaluateTemplate(item.payload).then((ret) => {
|
||||||
|
if (item.payload.section !== undefined) {
|
||||||
|
this.sendSocketNotification("SECTION_DISPLAY_RENDERED", {
|
||||||
|
...ret,
|
||||||
|
section: item.payload.section
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
|
||||||
|
}
|
||||||
|
}).catch(() => {
|
||||||
|
// Circuit will re-open if this fails
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function _healthCheck(identifier) {
|
||||||
|
const breaker = this._getBreaker(identifier);
|
||||||
|
const hass = this.connections[identifier] && this.connections[identifier].hass;
|
||||||
|
if (!hass) return;
|
||||||
|
|
||||||
|
this.logger.info(`Health check for ${identifier}...`);
|
||||||
|
try {
|
||||||
|
// Simple template render as health probe
|
||||||
|
await Promise.race([
|
||||||
|
hass.templates.render("{{ 1 }}"),
|
||||||
|
new Promise((_, reject) =>
|
||||||
|
setTimeout(() => reject(new Error('Health check timeout')), 10000)
|
||||||
|
)
|
||||||
|
]);
|
||||||
|
// Success — close the circuit
|
||||||
|
this._closeCircuit(identifier);
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.info(`Health check failed for ${identifier}: ${err.message}`);
|
||||||
|
// Re-arm with increased backoff
|
||||||
|
breaker.state = 'closed'; // briefly close so _openCircuit fires
|
||||||
|
this._openCircuit(identifier);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function evaluateTemplate(payload) {
|
async function evaluateTemplate(payload) {
|
||||||
if (config.debuglogging) {
|
if (config.debuglogging) {
|
||||||
this.logger.debug(`Evaluating template for ${payload.template}`);
|
this.logger.debug(`Evaluating template for ${payload.template}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Circuit breaker check: if HA is known-unreachable, skip entirely
|
||||||
|
const breaker = this._getBreaker(payload.identifier);
|
||||||
|
if (breaker.state === 'open') {
|
||||||
|
// Queue this request for replay when circuit closes (deduplicated by section)
|
||||||
|
const isDuplicate = breaker.pendingQueue.some(
|
||||||
|
(item) => item.payload.section === payload.section && item.payload.template === payload.template
|
||||||
|
);
|
||||||
|
if (!isDuplicate) {
|
||||||
|
breaker.pendingQueue.push({ payload });
|
||||||
|
// Cap queue size to prevent memory growth
|
||||||
|
if (breaker.pendingQueue.length > 20) {
|
||||||
|
breaker.pendingQueue.shift();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new Error('Circuit open — HA unreachable, skipping template evaluation');
|
||||||
|
}
|
||||||
|
|
||||||
const hass = this.connections[payload.identifier].hass;
|
const hass = this.connections[payload.identifier].hass;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -103,32 +227,28 @@ async function evaluateTemplate(payload) {
|
|||||||
setTimeout(() => reject(new Error('Template evaluation timeout')), 10000)
|
setTimeout(() => reject(new Error('Template evaluation timeout')), 10000)
|
||||||
)
|
)
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
// Success — ensure circuit is closed (resets fail count)
|
||||||
|
if (breaker.failCount > 0) {
|
||||||
|
this._closeCircuit(payload.identifier);
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
identifier: payload.identifier,
|
identifier: payload.identifier,
|
||||||
render: response
|
render: response
|
||||||
};
|
};
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
this.logger.error(`Template evaluation failed: ${err.message}`);
|
// Check if this is a connectivity error (not a template syntax error)
|
||||||
|
const isConnectivityError = /ENETUNREACH|ECONNREFUSED|ECONNRESET|ETIMEDOUT|EHOSTUNREACH|timeout/i.test(err.message);
|
||||||
|
|
||||||
// Schedule retry after 30 seconds
|
if (isConnectivityError) {
|
||||||
setTimeout(() => {
|
this.logger.error(`Template evaluation failed (connectivity): ${err.message}`);
|
||||||
this.logger.info(`Retrying template evaluation for ${payload.identifier}`);
|
this._openCircuit(payload.identifier);
|
||||||
this.evaluateTemplate(payload).then((ret) => {
|
} else {
|
||||||
// Send appropriate notification based on original request type
|
// Template/logic error — log but don't trip the breaker
|
||||||
if (payload.section !== undefined) {
|
this.logger.error(`Template evaluation failed: ${err.message}`);
|
||||||
this.sendSocketNotification("SECTION_DISPLAY_RENDERED", {
|
}
|
||||||
...ret,
|
|
||||||
section: payload.section
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
|
|
||||||
}
|
|
||||||
}).catch((retryErr) => {
|
|
||||||
this.logger.error(`Template evaluation retry also failed: ${retryErr.message}`);
|
|
||||||
});
|
|
||||||
}, 30000);
|
|
||||||
|
|
||||||
// Re-throw to maintain existing error handling behavior
|
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user