Compare commits

..

5 Commits

Author SHA1 Message Date
Clawd
5cdc4e2ce2 Fix stale data after HA outage recovery
Some checks failed
Release Drafter / update_release_draft (push) Failing after 5s
Two bugs caused the module to show outdated data permanently after HA
came back online:

1. refreshTimer was only checked at section level, but config sets it at
   the module config level. The setInterval never started, so there was
   no periodic re-render fallback when the WebSocket died.

2. _closeCircuit replayed queued templates but never reconnected the
   WebSocket. Without WS, no state_changed events fire, so the only
   render path was the (broken) refreshTimer.

Also fixes a race condition in _healthCheck where breaker.state was
briefly set to 'closed' before calling _openCircuit on failure. Now
uses 'half-open' state instead.
2026-03-01 08:59:15 +00:00
Clawd
de03a33138 Add circuit breaker for HA connectivity failures
Some checks failed
Release Drafter / update_release_draft (push) Failing after 5s
When Home Assistant becomes unreachable (WiFi dropout, HA restart, etc.),
the module now:
- Detects connectivity errors (ENETUNREACH, timeout, etc.)
- Opens a circuit breaker that stops ALL template evaluations immediately
- Runs a single coalesced health check with exponential backoff (15s→300s)
- Queues pending evaluations and replays them when HA comes back
- Suppresses error log spam while circuit is open

Previously, each of the 11 sections would independently fail + retry,
flooding the event loop with 30+ error callbacks in the same millisecond
and freezing Electron's renderer.

Fixes repeated MagicMirror freezes during transient network outages.
2026-02-27 15:16:29 +00:00
Clawd
8831ee9e13 Remove debug log spam from hot path entirely
Some checks failed
Release Drafter / update_release_draft (push) Failing after 4s
The conditional config.debuglogging checks were not working reliably
in node_helper context. Removed debug logging from onStateChangedEvent
entirely (it fires on every entity state change - multiple times per
second for power sensors). WS disconnect promoted to info level since
it's actually useful.
2026-02-23 20:11:51 +00:00
Clawd
a2955ceab4 Fix debug log spam by gating debug logs with config.debuglogging
Some checks failed
Release Drafter / update_release_draft (push) Failing after 4s
- Added config.debuglogging check to 'Found listening connection' debug log
- Added config.debuglogging check to 'Hass WS Disconnected' debug log
- Prevents log spam when debuglogging is disabled (default)
- Fixes issue causing multi-GB log files from frequent state change events
2026-02-23 19:54:06 +00:00
Clawd
9ba31cbafe Fix template evaluation timeouts and add retry logic
Some checks failed
Release Drafter / update_release_draft (push) Failing after 4s
- Add 10 second timeout to template evaluation HTTP calls
- Add retry logic that schedules retry after 30 seconds on failure
- Maintains existing error handling while preventing silent failures
- Fixes issue where template errors cause data to stay stale until next WebSocket event
2026-02-23 18:01:36 +00:00
2 changed files with 198 additions and 24 deletions

View File

@@ -52,15 +52,29 @@ Module.register("MMM-HomeAssistantDisplay", {
entity: section.triggerEntities[entity]
});
}
// Set up a timer to trigger re-rendering outside of any entity state update
if (section.refreshTimer) {
setInterval(()=> {
this.renderTemplates("timeout");
this.updateDom();
}, section.refreshTimer * 1000);
}
}
// Refresh timer: check section-level first, then fall back to config-level.
// One interval per module instance — renderTemplates already hits all sections.
var refreshInterval = null;
if (this.config.sections) {
for (const sectioid in this.config.sections) {
if (this.config.sections[sectioid].refreshTimer) {
refreshInterval = this.config.sections[sectioid].refreshTimer;
break;
}
}
}
if (!refreshInterval && this.config.refreshTimer) {
refreshInterval = this.config.refreshTimer;
}
if (refreshInterval) {
setInterval(() => {
this.renderTemplates("refreshTimer");
this.updateDom();
}, refreshInterval * 1000);
}
this.renderTemplates("foo");
self.updateDom(self.config.animationSpeed);
},

View File

@@ -3,6 +3,12 @@
*
* By Brian Towles
* MIT Licensed.
*
* Resilience improvements by James (2026-02-27):
* - Circuit breaker: stops all template evals when HA is unreachable
* - Exponential backoff on retries (no more 30 simultaneous retry timers)
* - Coalesced health checks instead of per-section retries
* - Graceful degradation: sections show stale data instead of freezing
*/
var backoff = require('backoff')
const NodeHelper = require("node_helper");
@@ -24,6 +30,10 @@ module.exports = NodeHelper.create({
evaluateTemplate,
onWebsocketCloseEvent,
backoffWSConnection,
_getBreaker,
_openCircuit,
_closeCircuit,
_healthCheck,
});
function start() {
@@ -32,6 +42,10 @@ function start() {
this.logger.debug("MMM-HomeAssistantDisplay helper started...");
}
this.connections = {};
// Circuit breaker state: tracks HA reachability per connection
// When open, template evaluations are skipped entirely (no requests, no error spam)
this._circuitBreaker = {}; // identifier -> { state: 'closed'|'open', failCount: 0, nextRetryAt: 0, retryTimer: null }
}
function stop() {
@@ -67,10 +81,11 @@ function socketNotificationReceived(notification, payload) {
this.evaluateTemplate(payload).then((ret) => {
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
}).catch((err) => {
this.logger.error(
"Unable to evaluate template",
err
);
// Only log if circuit is closed (avoid spam when HA is down)
const cb = this._getBreaker(payload.identifier);
if (cb.state === 'closed') {
this.logger.error("Unable to evaluate template", err);
}
});
break;
case "RENDER_SECTION_DISPLAY_TEMPLATE":
@@ -80,24 +95,172 @@ function socketNotificationReceived(notification, payload) {
section: payload.section
});
}).catch((err) => {
this.logger.error(
"unable to evaluate section template",
err
);
const cb = this._getBreaker(payload.identifier);
if (cb.state === 'closed') {
this.logger.error("unable to evaluate section template", err);
}
});
break;
}
}
// Circuit breaker helpers
function _getBreaker(identifier) {
if (!this._circuitBreaker[identifier]) {
this._circuitBreaker[identifier] = {
state: 'closed', // closed = healthy, open = HA unreachable
failCount: 0,
nextRetryAt: 0,
retryTimer: null,
pendingQueue: [], // queued payloads to retry when circuit closes
};
}
return this._circuitBreaker[identifier];
}
function _openCircuit(identifier) {
const breaker = this._getBreaker(identifier);
if (breaker.state === 'open') return; // already open
// Accepts 'closed' (first failure) or 'half-open' (health check retry failed)
breaker.state = 'open';
breaker.failCount++;
// Exponential backoff: 15s, 30s, 60s, 120s, max 300s
const delay = Math.min(15000 * Math.pow(2, breaker.failCount - 1), 300000);
breaker.nextRetryAt = Date.now() + delay;
this.logger.info(`Circuit OPEN for ${identifier} — HA unreachable. Next health check in ${delay / 1000}s`);
// Single coalesced health check timer (not per-section!)
if (breaker.retryTimer) clearTimeout(breaker.retryTimer);
breaker.retryTimer = setTimeout(() => {
this._healthCheck(identifier);
}, delay);
}
function _closeCircuit(identifier) {
const breaker = this._getBreaker(identifier);
if (breaker.state === 'closed') return;
this.logger.info(`Circuit CLOSED for ${identifier} — HA is reachable again`);
breaker.state = 'closed';
breaker.failCount = 0;
breaker.nextRetryAt = 0;
if (breaker.retryTimer) {
clearTimeout(breaker.retryTimer);
breaker.retryTimer = null;
}
// Reconnect WebSocket — it likely died during the outage and won't
// recover on its own because the frontend only sends RECONNECT_WS in
// response to HASSWS_DISCONNECTED, which the node_helper may never
// have emitted if the socket died silently.
const conn = this.connections[identifier];
if (conn && conn.connectionConfig) {
this.logger.info(`Reconnecting WebSocket for ${identifier} after circuit recovery`);
this.backoffWSConnection(identifier, conn.connectionConfig);
}
// Replay queued template evaluations
const queue = breaker.pendingQueue.splice(0);
if (queue.length > 0) {
this.logger.info(`Replaying ${queue.length} queued template evaluations for ${identifier}`);
for (const item of queue) {
this.evaluateTemplate(item.payload).then((ret) => {
if (item.payload.section !== undefined) {
this.sendSocketNotification("SECTION_DISPLAY_RENDERED", {
...ret,
section: item.payload.section
});
} else {
this.sendSocketNotification("MODULE_DISPLAY_RENDERED", ret);
}
}).catch(() => {
// Circuit will re-open if this fails
});
}
}
}
async function _healthCheck(identifier) {
const breaker = this._getBreaker(identifier);
const hass = this.connections[identifier] && this.connections[identifier].hass;
if (!hass) return;
this.logger.info(`Health check for ${identifier}...`);
try {
// Simple template render as health probe
await Promise.race([
hass.templates.render("{{ 1 }}"),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Health check timeout')), 10000)
)
]);
// Success — close the circuit
this._closeCircuit(identifier);
} catch (err) {
this.logger.info(`Health check failed for ${identifier}: ${err.message}`);
// Re-arm with increased backoff — reset state so _openCircuit can fire
breaker.state = 'half-open';
this._openCircuit(identifier);
}
}
async function evaluateTemplate(payload) {
if (config.debuglogging) {
this.logger.debug(`Evaluating template for ${payload.template}`);
}
// Circuit breaker check: if HA is known-unreachable, skip entirely
const breaker = this._getBreaker(payload.identifier);
if (breaker.state === 'open') {
// Queue this request for replay when circuit closes (deduplicated by section)
const isDuplicate = breaker.pendingQueue.some(
(item) => item.payload.section === payload.section && item.payload.template === payload.template
);
if (!isDuplicate) {
breaker.pendingQueue.push({ payload });
// Cap queue size to prevent memory growth
if (breaker.pendingQueue.length > 20) {
breaker.pendingQueue.shift();
}
}
throw new Error('Circuit open — HA unreachable, skipping template evaluation');
}
const hass = this.connections[payload.identifier].hass;
const response = await hass.templates.render(payload.template);
return {
identifier: payload.identifier,
render: response
try {
// Wrap template call with timeout
const response = await Promise.race([
hass.templates.render(payload.template),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Template evaluation timeout')), 10000)
)
]);
// Success — ensure circuit is closed (resets fail count)
if (breaker.failCount > 0) {
this._closeCircuit(payload.identifier);
}
return {
identifier: payload.identifier,
render: response
};
} catch (err) {
// Check if this is a connectivity error (not a template syntax error)
const isConnectivityError = /ENETUNREACH|ECONNREFUSED|ECONNRESET|ETIMEDOUT|EHOSTUNREACH|timeout/i.test(err.message);
if (isConnectivityError) {
this.logger.error(`Template evaluation failed (connectivity): ${err.message}`);
this._openCircuit(payload.identifier);
} else {
// Template/logic error — log but don't trip the breaker
this.logger.error(`Template evaluation failed: ${err.message}`);
}
throw err;
}
}
@@ -125,7 +288,8 @@ async function connect(payload) {
this.logger.info(`HomeAssistant connected for ${payload.identifier}`);
this.connections[payload.identifier] = {
hass,
entities: []
entities: [],
connectionConfig,
};
await this.backoffWSConnection(payload.identifier, connectionConfig)
@@ -192,12 +356,8 @@ async function reconnectWebsocket(payload) {
}
function onStateChangedEvent(event) {
if (config.debuglogging) {
this.logger.debug(`Got state change for ${event.data.entity_id}`);
}
for (const connection in this.connections) {
if (this.connections[connection].entities.includes(event.data.entity_id)) {
this.logger.debug(`Found listening connection (${connection}) for entity ${event.data.entity_id}`);
this.sendSocketNotification("CHANGED_STATE", {
identifier: connection,
cause: event.data.entity_id,
@@ -209,7 +369,7 @@ function onStateChangedEvent(event) {
function onWebsocketCloseEvent(event) {
for (const connection in this.connections) {
if (event.target == this.connections[connection].websocket.rawClient.ws) {
this.logger.debug(`Hass WS Disconnected (${connection})`);
this.logger.info(`Hass WS Disconnected (${connection})`);
this.sendSocketNotification("HASSWS_DISCONNECTED", {
identifier: connection,
});