diff --git a/.github/workflows/manual-build-artifacts.yml b/.github/workflows/manual-build-artifacts.yml index ba11029..f96e265 100644 --- a/.github/workflows/manual-build-artifacts.yml +++ b/.github/workflows/manual-build-artifacts.yml @@ -4,9 +4,9 @@ on: workflow_dispatch: inputs: board_envs: - description: 'Comma-separated PlatformIO envs (e.g., "esp32s3,esp32c3").' + description: 'Comma-separated PlatformIO envs (e.g., "esp32s3,esp32c3,esp32c6,esp32c6m,seeed_esp32c6").' required: true - default: "esp32s3" + default: "esp32s3,seeed_esp32c3,esp32,seeed_esp32s3,esp32c3,esp32c3supermini,esp32c6,esp32c6m,seeed_esp32c6" git_ref: description: 'Git ref to build (tag, branch, or SHA).' required: true diff --git a/README.md b/README.md index 7030cdd..651e16b 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,18 @@ This creates an isolated build environment in `tools/.venv/` and `tools/.platfor python tools/setup_local_env.py ``` +### Stress mode (optional) + +To accelerate loops and increase internal load for crash reproduction, build with: + +```ini +# platformio.ini +# Add to an environment's build_flags (preserving existing flags): +build_flags = + ${common.build_flags} + -DSTRESS_MODE +``` + ## Contributing - Run `python tools/build_and_flash.py --local` before opening a PR (verifies the build still succeeds). diff --git a/distributor/README.md b/distributor/README.md index 14a69a8..341b4b1 100644 --- a/distributor/README.md +++ b/distributor/README.md @@ -18,7 +18,7 @@ python -m http.server 8000 Then open `http://localhost:8000`. Use `?tag=v0.6.1-alpha` (or omit to use the latest release). ## Expected release assets -For each PlatformIO env (e.g., `esp32s3`, `esp32c3`, `esp32`, `seeed_esp32c3`, `seeed_esp32s3`, `esp32c3supermini`), the Release must include: +For each PlatformIO env (e.g., `esp32s3`, `esp32c3`, `esp32`, `seeed_esp32c3`, `seeed_esp32s3`, `esp32c3supermini`, `esp32c6`, `esp32c6m`, `seeed_esp32c6`), the Release must include: - `-firmware_merged.bin` (used for WebSerial flashing) - `-firmware.bin` (OTA app) - `-littlefs.bin` (OTA filesystem) diff --git a/distributor/boards.json b/distributor/boards.json index 6559672..1d227dd 100644 --- a/distributor/boards.json +++ b/distributor/boards.json @@ -101,6 +101,54 @@ "files": [ "firmware/seeed_esp32c3/firmware_merged.bin" ] + }, + { + "id": "esp32c6", + "name": "OpenFilamentSensor", + "variant": "ESP32-C6", + "chipFamily": "ESP32-C6", + "status": "experimental", + "version": "0.5.0", + "released": "2026-03-01", + "manifest": "firmware/esp32c6/manifest.json", + "notes": [ + "Experimental C6 build - untested" + ], + "files": [ + "firmware/esp32c6/firmware_merged.bin" + ] + }, + { + "id": "esp32c6m", + "name": "OpenFilamentSensor", + "variant": "ESP32-C6 DevKitM-1", + "chipFamily": "ESP32-C6", + "status": "experimental", + "version": "0.5.0", + "released": "2026-03-01", + "manifest": "firmware/esp32c6m/manifest.json", + "notes": [ + "Experimental C6 DevKitM-1 build - untested" + ], + "files": [ + "firmware/esp32c6m/firmware_merged.bin" + ] + }, + { + "id": "seeed_esp32c6", + "name": "OpenFilamentSensor", + "variant": "Seeed XIAO ESP32-C6", + "chipFamily": "ESP32-C6", + "status": "experimental", + "version": "0.5.0", + "released": "2026-03-01", + "manifest": "firmware/seeed_esp32c6/manifest.json", + "notes": [ + "Experimental Seeed XIAO ESP32-C6 build - untested" + ], + "files": [ + "firmware/seeed_esp32c6/firmware_merged.bin" + ] } ] } \ No newline at end of file diff --git a/platformio.ini b/platformio.ini index e4fd37f..b5949d8 100644 --- a/platformio.ini +++ b/platformio.ini @@ -29,16 +29,16 @@ lib_deps = adafruit/Adafruit GFX Library@^1.11.9 build_flags = -D ELEGANTOTA_USE_ASYNC_WEBSERVER=1 - ; -D FILAMENT_RUNOUT_PIN=12 - ; -D MOVEMENT_SENSOR_PIN=13 + -D FILAMENT_RUNOUT_PIN=12 + -D MOVEMENT_SENSOR_PIN=13 ; -D INVERT_RUNOUT_PIN=1 ; Uncomment in board config if pin logic is inverted ; Coredump configuration - saves crash info to flash partition for analysis ; -D CONFIG_APP_REPRODUCIBLE_BUILD=y ; commented out because ; Firmware ThumbprintFilesystemThumbprint,Project Status ; and Version displayed in the WebUI about tab might depend on them --D CONFIG_ESP_COREDUMP_ENABLE_TO_FLASH=1 --D CONFIG_ESP_COREDUMP_DATA_FORMAT_ELF=1 --D CONFIG_ESP_COREDUMP_CHECKSUM_CRC32=1 + -D CONFIG_ESP_COREDUMP_ENABLE_TO_FLASH=1 + -D CONFIG_ESP_COREDUMP_DATA_FORMAT_ELF=1 + -D CONFIG_ESP_COREDUMP_CHECKSUM_CRC32=1 ; Crash testing endpoint (/api/panic) and UI section. Disable for release builds. ; -D ENABLE_CRASH_TESTING=1 extra_scripts = @@ -197,7 +197,7 @@ build_unflags = lib_deps = ${common.lib_deps} lib_ignore = - WebServer ; arduino 3.x has its own synchrnous WebServer implementation that must be removed + WebServer ; arduino 3.x has its own synchronous WebServer implementation that must be removed extra_scripts = ${common.extra_scripts} post:tools/merge_bin.py diff --git a/src/ElegooCC.cpp b/src/ElegooCC.cpp index 3272116..e8d7dc3 100644 --- a/src/ElegooCC.cpp +++ b/src/ElegooCC.cpp @@ -80,9 +80,9 @@ printer_info_t ElegooCC::getCurrentInformation() info.runoutPauseCommanded = runoutPauseCommanded; info.runoutPauseRemainingMm = runoutPauseRemainingMm; info.runoutPauseDelayMm = runoutPauseDelayMm; - info.mainboardID = mainboardID; - info.taskId = taskId; - info.filename = filename; + strlcpy(info.mainboardID, mainboardID.c_str(), sizeof(info.mainboardID)); + strlcpy(info.taskId, taskId.c_str(), sizeof(info.taskId)); + strlcpy(info.filename, filename.c_str(), sizeof(info.filename)); info.printStatus = printStatus; info.isPrinting = (printStatus == SDCP_PRINT_STATUS_PRINTING && (machineStatusMask & (1 << SDCP_MACHINE_STATUS_PRINTING)) != 0); info.currentLayer = currentLayer; diff --git a/src/ElegooCC.h b/src/ElegooCC.h index 2c9f95a..bd233c2 100644 --- a/src/ElegooCC.h +++ b/src/ElegooCC.h @@ -78,12 +78,12 @@ typedef enum SDCP_COMMAND_STOP_FEEDING_MATERIAL = 132, } sdcp_command_t; -// Struct to hold current printer information +// Struct to hold current printer information (uses fixed-size buffers to avoid heap allocations) typedef struct { - String mainboardID; - String taskId; - String filename; + char mainboardID[64]; + char taskId[64]; + char filename[128]; sdcp_print_status_t printStatus; bool filamentStopped; bool filamentRunout; diff --git a/src/Logger.cpp b/src/Logger.cpp index 109e935..e48e1a6 100644 --- a/src/Logger.cpp +++ b/src/Logger.cpp @@ -91,16 +91,17 @@ void Logger::logInternal(const char *message, LogLevel level) return; } - // Generate UUID - char uuid[37]; - generateUUID(uuid); - - // Get current timestamp + // Get current timestamp (safe to call outside critical section) unsigned long timestamp = getTime(); - // Critical section for buffer update + // Critical section for buffer update - UUID generation is inside + // to prevent duplicate uuidCounter values from concurrent log() calls portENTER_CRITICAL(&_logMutex); + // Generate UUID inside critical section (uuidCounter++ must be atomic) + char uuid[37]; + generateUUID(uuid); + // Store in circular buffer with fixed-size copy strncpy(logBuffer[currentIndex].uuid, uuid, sizeof(logBuffer[currentIndex].uuid) - 1); logBuffer[currentIndex].uuid[sizeof(logBuffer[currentIndex].uuid) - 1] = '\0'; @@ -211,9 +212,11 @@ String Logger::getLogsAsText(int maxEntries) return result; } - // Snapshot indices atomically to avoid race conditions + // Snapshot indices atomically under mutex (same pattern as streamLogs) + portENTER_CRITICAL(&_logMutex); int snapshotIndex = currentIndex; int snapshotCount = totalEntries; + portEXIT_CRITICAL(&_logMutex); // Validate snapshot if (snapshotCount < 0 || snapshotCount > logCapacity) @@ -344,14 +347,15 @@ void Logger::streamLogs(Print* printer) void Logger::clearLogs() { - currentIndex = 0; - totalEntries = 0; if (logCapacity == 0 || logBuffer == nullptr) { return; } - // Clear the buffer + // Clear the buffer - indices MUST be inside the critical section + // to avoid TOCTOU race with logInternal() portENTER_CRITICAL(&_logMutex); + currentIndex = 0; + totalEntries = 0; for (int i = 0; i < logCapacity; i++) { memset(logBuffer[i].uuid, 0, sizeof(logBuffer[i].uuid)); diff --git a/src/WebServer.cpp b/src/WebServer.cpp index 8a682f8..87c18bc 100644 --- a/src/WebServer.cpp +++ b/src/WebServer.cpp @@ -99,146 +99,123 @@ String getBuildVersion() { return version.length() > 0 ? version : "0.0.0"; } +// CRC32 for SSE payload deduplication (replaces full String comparison) +uint32_t WebServer::crc32(const char *data, size_t length) { + uint32_t crc = 0xFFFFFFFF; + for (size_t i = 0; i < length; i++) { + crc ^= (uint8_t)data[i]; + for (int j = 0; j < 8; j++) { + crc = (crc >> 1) ^ (0xEDB88320 & -(crc & 1)); + } + } + return ~crc; +} + WebServer::WebServer(int port) : server(port), statusEvents(kRouteStatusEvents) {} void WebServer::begin() { server.begin(); - // Get settings endpoint + // --- GET /get_settings --- + // Serves pre-built cached settings JSON (built in loop() on main task) + // Thread-safe: double-buffered copy, short lock, no heap allocation server.on(kRouteGetSettings, HTTP_GET, - [](AsyncWebServerRequest *request) + [this](AsyncWebServerRequest *request) { - String jsonResponse = settingsManager.toJson(false); - request->send(200, "application/json", jsonResponse); + char jsonBuf[kCacheBufSize]; + size_t len = cachedSettings.read(jsonBuf, sizeof(jsonBuf)); + + if (len == 0) + { + request->send(503, "application/json", "{\"error\":\"initializing\"}"); + } + else + { + request->send(200, "application/json", jsonBuf); + } }); + // --- POST /update_settings --- + // Thread-safe: copies JSON into pendingSettingsDoc and sets flag; + // actual settings mutation happens in loop() on the main task server.addHandler(new AsyncCallbackJsonWebHandler( kRouteUpdateSettings, [this](AsyncWebServerRequest *request, JsonVariant &json) { - JsonObject jsonObj = json.as(); + portENTER_CRITICAL(&pendingMutex); + bool alreadyPending = pendingSettingsUpdate; + portEXIT_CRITICAL(&pendingMutex); - // Track if IP address changed to trigger reconnect - String oldIp = settingsManager.getElegooIP(); - bool ipChanged = false; - - // Only update fields that are present in the request - if (jsonObj.containsKey("elegooip")) - { - String newIp = jsonObj["elegooip"].as(); - ipChanged = (oldIp != newIp) && newIp.length() > 0; - settingsManager.setElegooIP(newIp); - } - if (jsonObj.containsKey("ssid")) - settingsManager.setSSID(jsonObj["ssid"].as()); - if (jsonObj.containsKey("passwd") && jsonObj["passwd"].as().length() > 0) - settingsManager.setPassword(jsonObj["passwd"].as()); - if (jsonObj.containsKey("ap_mode")) - settingsManager.setAPMode(jsonObj["ap_mode"].as()); - if (jsonObj.containsKey("pause_on_runout")) - settingsManager.setPauseOnRunout(jsonObj["pause_on_runout"].as()); - if (jsonObj.containsKey("enabled")) - settingsManager.setEnabled(jsonObj["enabled"].as()); - if (jsonObj.containsKey("detection_length_mm")) - settingsManager.setDetectionHardJamMm(jsonObj["detection_length_mm"].as()); - if (jsonObj.containsKey("detection_grace_period_ms")) - settingsManager.setDetectionGracePeriodMs(jsonObj["detection_grace_period_ms"].as()); - if (jsonObj.containsKey("detection_ratio_threshold")) - settingsManager.setDetectionRatioThreshold(jsonObj["detection_ratio_threshold"].as()); - if (jsonObj.containsKey("detection_hard_jam_mm")) - settingsManager.setDetectionHardJamMm(jsonObj["detection_hard_jam_mm"].as()); - if (jsonObj.containsKey("detection_soft_jam_time_ms")) - settingsManager.setDetectionSoftJamTimeMs(jsonObj["detection_soft_jam_time_ms"].as()); - if (jsonObj.containsKey("detection_hard_jam_time_ms")) - settingsManager.setDetectionHardJamTimeMs(jsonObj["detection_hard_jam_time_ms"].as()); - if (jsonObj.containsKey("detection_mode")) - settingsManager.setDetectionMode(jsonObj["detection_mode"].as()); - if (jsonObj.containsKey("sdcp_loss_behavior")) - settingsManager.setSdcpLossBehavior(jsonObj["sdcp_loss_behavior"].as()); - if (jsonObj.containsKey("flow_telemetry_stale_ms")) - settingsManager.setFlowTelemetryStaleMs(jsonObj["flow_telemetry_stale_ms"].as()); - if (jsonObj.containsKey("ui_refresh_interval_ms")) - settingsManager.setUiRefreshIntervalMs(jsonObj["ui_refresh_interval_ms"].as()); - if (jsonObj.containsKey("suppress_pause_commands")) - settingsManager.setSuppressPauseCommands(jsonObj["suppress_pause_commands"].as()); - if (jsonObj.containsKey("log_level")) - settingsManager.setLogLevel(jsonObj["log_level"].as()); - if (jsonObj.containsKey("movement_mm_per_pulse")) - settingsManager.setMovementMmPerPulse(jsonObj["movement_mm_per_pulse"].as()); - if (jsonObj.containsKey("auto_calibrate_sensor")) - settingsManager.setAutoCalibrateSensor(jsonObj["auto_calibrate_sensor"].as()); - if (jsonObj.containsKey("pulse_reduction_percent")) - settingsManager.setPulseReductionPercent(jsonObj["pulse_reduction_percent"].as()); - if (jsonObj.containsKey("test_recording_mode")) - settingsManager.setTestRecordingMode(jsonObj["test_recording_mode"].as()); - if (jsonObj.containsKey("show_debug_page")) - settingsManager.setShowDebugPage(jsonObj["show_debug_page"].as()); - if (jsonObj.containsKey("timezone_offset_minutes")) - settingsManager.setTimezoneOffsetMinutes(jsonObj["timezone_offset_minutes"].as()); - - bool saved = settingsManager.save(); - if (saved) + if (alreadyPending) { - elegooCC.refreshCaches(); - if (ipChanged) - { - elegooCC.reconnect(); - } - request->send(200, "application/json", "{\"status\":\"ok\"}"); + request->send(429, "application/json", + "{\"error\":\"Settings update already pending\"}"); + return; } - else + + portENTER_CRITICAL(&pendingMutex); + pendingSettingsDoc.clear(); + JsonObject src = json.as(); + JsonObject dst = pendingSettingsDoc.to(); + for (JsonPair kv : src) { - request->send(500, "application/json", - "{\"error\":\"Failed to save settings to flash\"}"); + dst[kv.key()] = kv.value(); } + pendingSettingsUpdate = true; + portEXIT_CRITICAL(&pendingMutex); + + request->send(200, "application/json", "{\"status\":\"ok\"}"); })); + // --- POST /test_pause --- + // Thread-safe: sets flag, loop() calls pausePrint() server.on(kRouteTestPause, HTTP_POST, - [](AsyncWebServerRequest *request) + [this](AsyncWebServerRequest *request) { - elegooCC.pausePrint(); + pendingPause = true; request->send(200, "text/plain", "ok"); }); + // --- POST /test_resume --- + // Thread-safe: sets flag, loop() calls continuePrint() server.on(kRouteTestResume, HTTP_POST, - [](AsyncWebServerRequest *request) + [this](AsyncWebServerRequest *request) { - elegooCC.continuePrint(); + pendingResume = true; request->send(200, "text/plain", "ok"); }); // POST /discover_printer - Start discovery scan + // Thread-safe: sets flag, loop() calls startDiscoveryAsync() server.on(kRouteDiscoverPrinter, HTTP_POST, - [](AsyncWebServerRequest *request) + [this](AsyncWebServerRequest *request) { if (elegooCC.isDiscoveryActive()) { request->send(200, "application/json", "{\"active\":true}"); return; } - elegooCC.startDiscoveryAsync(5000, nullptr); // 5 seconds with socket recycling + pendingDiscovery = true; request->send(200, "application/json", "{\"started\":true}"); }); // GET /discover_printer - Poll discovery status and results + // Thread-safe: double-buffered copy, short lock, no heap allocation server.on(kRouteDiscoverPrinter, HTTP_GET, - [](AsyncWebServerRequest *request) + [this](AsyncWebServerRequest *request) { - DynamicJsonDocument jsonDoc(1024); - jsonDoc["active"] = elegooCC.isDiscoveryActive(); + char jsonBuf[kCacheBufSize]; + size_t len = cachedDiscovery.read(jsonBuf, sizeof(jsonBuf)); - JsonArray printers = jsonDoc.createNestedArray("printers"); - for (const auto &res : elegooCC.getDiscoveryResults()) + if (len == 0) { - JsonObject p = printers.createNestedObject(); - p["ip"] = res.ip; - p["payload"] = res.payload; + request->send(200, "application/json", "{\"active\":false,\"printers\":[]}"); + } + else + { + request->send(200, "application/json", jsonBuf); } - - String response; - serializeJson(jsonDoc, response); - request->send(200, "application/json", response); }); // Setup ElegantOTA @@ -255,73 +232,50 @@ void WebServer::begin() ESP.restart(); }); - statusEvents.onConnect([](AsyncEventSourceClient *client) { + // SSE client connect handler with client cap + statusEvents.onConnect([this](AsyncEventSourceClient *client) { + if (statusEvents.count() > kMaxSSEClients) + { + // Over limit - the library already added the client, so we + // just won't send data and it will be cleaned up on next sweep. + logger.logf("SSE client rejected (count=%d, max=%d)", + statusEvents.count(), kMaxSSEClients); + return; + } client->send("connected", "init", millis(), 1000); }); server.addHandler(&statusEvents); - // Sensor status endpoint + // --- GET /sensor_status --- + // Thread-safe: double-buffered copy, short lock, no heap allocation server.on(kRouteSensorStatus, HTTP_GET, [this](AsyncWebServerRequest *request) { - printer_info_t elegooStatus = elegooCC.getCurrentInformation(); - - // JSON allocation: 576 bytes heap (was 768 bytes) - // Measured actual: ~480 bytes (83% utilization, 17% margin) - // Last measured: 2025-11-26 - // See: .claude/hardcoded-allocations.md for maintenance notes - DynamicJsonDocument jsonDoc(576); - buildStatusJson(jsonDoc, elegooStatus); - - String jsonResponse; - jsonResponse.reserve(576); // Pre-allocate to prevent fragmentation - serializeJson(jsonDoc, jsonResponse); + char jsonBuf[kCacheBufSize]; + size_t len = cachedSensorStatus.read(jsonBuf, sizeof(jsonBuf)); - // Pin Values level: Check if approaching allocation limit - if (settingsManager.getLogLevel() >= LOG_PIN_VALUES) + if (len == 0) { - size_t actualSize = measureJson(jsonDoc); - static bool logged = false; - if (!logged && actualSize > 490) // >85% of 576 bytes - { - logger.logf(LOG_PIN_VALUES, "WebServer sensor_status JSON size: %zu / 576 bytes (%.1f%%)", - actualSize, (actualSize * 100.0f / 576.0f)); - logged = true; // Only log once per session - } + request->send(503, "application/json", "{\"error\":\"initializing\"}"); + } + else + { + request->send(200, "application/json", jsonBuf); } - - request->send(200, "application/json", jsonResponse); }); // Logs endpoint (DISABLED - JSON serialization of 1024 entries exceeds 32KB buffer) // Use /api/logs_live or /api/logs_text instead - // server.on("/api/logs", HTTP_GET, - // [](AsyncWebServerRequest *request) - // { - // String jsonResponse = logger.getLogsAsJson(); - // request->send(200, "application/json", jsonResponse); - // }); // Raw text logs endpoint (full logs for download) server.on(kRouteLogsText, HTTP_GET, [](AsyncWebServerRequest *request) { - /* - AsyncWebServerResponse *response = request->beginChunkedResponse("text/plain", - [](uint8_t *buffer, size_t maxLen, size_t index) -> size_t { - // This uses a custom shim to adapt the Print interface to the chunked response - // but since we don't have a direct "Print to buffer" adapter easily available for AsyncWebServer's callback style - // without a class, we need a different approach. - // Actually, Logger::streamLogs takes a Print*. AsyncResponseStream inherits from Print! - return 0; - }); - */ - AsyncResponseStream *streamResponse = request->beginResponseStream("text/plain"); streamResponse->addHeader("Content-Disposition", "attachment; filename=\"logs.txt\""); - + logger.streamLogs(streamResponse); - + request->send(streamResponse); }); @@ -443,31 +397,33 @@ void WebServer::begin() }); #endif - // Version endpoint + // Build version JSON once at startup (avoids LittleFS reads from async handler) + { + #ifdef BUILD_DATE + const char* buildDate = BUILD_DATE; + const char* buildTime = BUILD_TIME; + #else + const char* buildDate = __DATE__; + const char* buildTime = __TIME__; + #endif + + StaticJsonDocument<512> jsonDoc; + jsonDoc["firmware_version"] = firmwareVersion; + jsonDoc["chip_family"] = chipFamily; + jsonDoc["build_date"] = buildDate; + jsonDoc["build_time"] = buildTime; + jsonDoc["firmware_thumbprint"] = getBuildThumbprint(buildDate, buildTime); + jsonDoc["filesystem_thumbprint"] = getFilesystemThumbprint(); + jsonDoc["build_version"] = getBuildVersion(); + + serializeJson(jsonDoc, cachedVersionJson, sizeof(cachedVersionJson)); + } + + // Version endpoint - serves pre-built JSON (no LittleFS access, thread-safe) server.on(kRouteVersion, HTTP_GET, - [](AsyncWebServerRequest *request) + [this](AsyncWebServerRequest *request) { - // Use BUILD_DATE and BUILD_TIME if set by build script, otherwise fall back to __DATE__ and __TIME__ - #ifdef BUILD_DATE - const char* buildDate = BUILD_DATE; - const char* buildTime = BUILD_TIME; - #else - const char* buildDate = __DATE__; - const char* buildTime = __TIME__; - #endif - - DynamicJsonDocument jsonDoc(512); - jsonDoc["firmware_version"] = firmwareVersion; - jsonDoc["chip_family"] = chipFamily; - jsonDoc["build_date"] = buildDate; - jsonDoc["build_time"] = buildTime; - jsonDoc["firmware_thumbprint"] = getBuildThumbprint(buildDate, buildTime); - jsonDoc["filesystem_thumbprint"] = getFilesystemThumbprint(); - jsonDoc["build_version"] = getBuildVersion(); - - String jsonResponse; - serializeJson(jsonDoc, jsonResponse); - request->send(200, "application/json", jsonResponse); + request->send(200, "application/json", cachedVersionJson); }); // Serve lightweight UI from /lite (if available) @@ -496,11 +452,209 @@ void WebServer::begin() }); } +void WebServer::processPendingCommands() +{ + // Process pending pause command + if (pendingPause) + { + pendingPause = false; + elegooCC.pausePrint(); + } + + // Process pending resume command + if (pendingResume) + { + pendingResume = false; + elegooCC.continuePrint(); + } + + // Process pending discovery + if (pendingDiscovery) + { + pendingDiscovery = false; + if (!elegooCC.isDiscoveryActive()) + { + elegooCC.startDiscoveryAsync(5000, nullptr); + } + } + + // Process pending reconnect (triggered by IP change in settings update) + if (pendingReconnect) + { + pendingReconnect = false; + elegooCC.reconnect(); + } + + // Process pending settings update + if (pendingSettingsUpdate) + { + portENTER_CRITICAL(&pendingMutex); + // Copy the doc locally so we can release the mutex quickly + StaticJsonDocument<1024> localDoc; + localDoc.set(pendingSettingsDoc); + pendingSettingsUpdate = false; + portEXIT_CRITICAL(&pendingMutex); + + JsonObject jsonObj = localDoc.as(); + + // Track if IP address changed to trigger reconnect + String oldIp = settingsManager.getElegooIP(); + bool ipChanged = false; + + // Only update fields that are present in the request + if (jsonObj.containsKey("elegooip")) + { + String newIp = jsonObj["elegooip"].as(); + ipChanged = (oldIp != newIp) && newIp.length() > 0; + settingsManager.setElegooIP(newIp); + } + if (jsonObj.containsKey("ssid")) + settingsManager.setSSID(jsonObj["ssid"].as()); + if (jsonObj.containsKey("passwd") && jsonObj["passwd"].as().length() > 0) + settingsManager.setPassword(jsonObj["passwd"].as()); + if (jsonObj.containsKey("ap_mode")) + settingsManager.setAPMode(jsonObj["ap_mode"].as()); + if (jsonObj.containsKey("pause_on_runout")) + settingsManager.setPauseOnRunout(jsonObj["pause_on_runout"].as()); + if (jsonObj.containsKey("enabled")) + settingsManager.setEnabled(jsonObj["enabled"].as()); + if (jsonObj.containsKey("detection_length_mm")) + settingsManager.setDetectionHardJamMm(jsonObj["detection_length_mm"].as()); + if (jsonObj.containsKey("detection_grace_period_ms")) + settingsManager.setDetectionGracePeriodMs(jsonObj["detection_grace_period_ms"].as()); + if (jsonObj.containsKey("detection_ratio_threshold")) + settingsManager.setDetectionRatioThreshold(jsonObj["detection_ratio_threshold"].as()); + if (jsonObj.containsKey("detection_hard_jam_mm")) + settingsManager.setDetectionHardJamMm(jsonObj["detection_hard_jam_mm"].as()); + if (jsonObj.containsKey("detection_soft_jam_time_ms")) + settingsManager.setDetectionSoftJamTimeMs(jsonObj["detection_soft_jam_time_ms"].as()); + if (jsonObj.containsKey("detection_hard_jam_time_ms")) + settingsManager.setDetectionHardJamTimeMs(jsonObj["detection_hard_jam_time_ms"].as()); + if (jsonObj.containsKey("detection_mode")) + settingsManager.setDetectionMode(jsonObj["detection_mode"].as()); + if (jsonObj.containsKey("sdcp_loss_behavior")) + settingsManager.setSdcpLossBehavior(jsonObj["sdcp_loss_behavior"].as()); + if (jsonObj.containsKey("flow_telemetry_stale_ms")) + settingsManager.setFlowTelemetryStaleMs(jsonObj["flow_telemetry_stale_ms"].as()); + if (jsonObj.containsKey("ui_refresh_interval_ms")) + settingsManager.setUiRefreshIntervalMs(jsonObj["ui_refresh_interval_ms"].as()); + if (jsonObj.containsKey("suppress_pause_commands")) + settingsManager.setSuppressPauseCommands(jsonObj["suppress_pause_commands"].as()); + if (jsonObj.containsKey("log_level")) + settingsManager.setLogLevel(jsonObj["log_level"].as()); + if (jsonObj.containsKey("movement_mm_per_pulse")) + settingsManager.setMovementMmPerPulse(jsonObj["movement_mm_per_pulse"].as()); + if (jsonObj.containsKey("auto_calibrate_sensor")) + settingsManager.setAutoCalibrateSensor(jsonObj["auto_calibrate_sensor"].as()); + if (jsonObj.containsKey("pulse_reduction_percent")) + settingsManager.setPulseReductionPercent(jsonObj["pulse_reduction_percent"].as()); + if (jsonObj.containsKey("test_recording_mode")) + settingsManager.setTestRecordingMode(jsonObj["test_recording_mode"].as()); + if (jsonObj.containsKey("show_debug_page")) + settingsManager.setShowDebugPage(jsonObj["show_debug_page"].as()); + if (jsonObj.containsKey("timezone_offset_minutes")) + settingsManager.setTimezoneOffsetMinutes(jsonObj["timezone_offset_minutes"].as()); + + bool saved = settingsManager.save(); + if (saved) + { + elegooCC.refreshCaches(); + settingsJsonDirty = true; // Rebuild cached settings JSON + if (ipChanged) + { + pendingReconnect = true; + } + } + else + { + logger.log("Failed to save settings from pending update"); + } + } +} + +void WebServer::refreshCachedResponses() +{ + // Rebuild sensor status JSON (called every loop iteration from main task) + { + printer_info_t elegooStatus = elegooCC.getCurrentInformation(); + StaticJsonDocument<768> jsonDoc; + buildStatusJson(jsonDoc, elegooStatus); + + char jsonBuf[kCacheBufSize]; + size_t len = serializeJson(jsonDoc, jsonBuf, sizeof(jsonBuf)); + + cachedSensorStatus.publish(jsonBuf, len); + cachedPrintStatus = elegooStatus.printStatus; + } + + // Rebuild discovery JSON (only while discovery is active or results exist) + { + StaticJsonDocument<1024> jsonDoc; + jsonDoc["active"] = elegooCC.isDiscoveryActive(); + + JsonArray printers = jsonDoc.createNestedArray("printers"); + for (const auto &res : elegooCC.getDiscoveryResults()) + { + JsonObject p = printers.createNestedObject(); + p["ip"] = res.ip; + p["payload"] = res.payload; + } + + char jsonBuf[kCacheBufSize]; + size_t len = serializeJson(jsonDoc, jsonBuf, sizeof(jsonBuf)); + + cachedDiscovery.publish(jsonBuf, len); + } + + // Rebuild settings JSON only when dirty + if (settingsJsonDirty) + { + settingsJsonDirty = false; + String newSettingsJson = settingsManager.toJson(false); + + cachedSettings.publish(newSettingsJson.c_str(), newSettingsJson.length()); + } +} + +void WebServer::cleanupSSEClients() +{ + unsigned long now = millis(); + if (now - lastSSECleanupMs < 30000) + { + return; + } + lastSSECleanupMs = now; + + int clientCount = statusEvents.count(); + if (clientCount > kMaxSSEClients) + { + logger.logf("SSE cleanup: %d clients (max %d), closing stale connections", + clientCount, kMaxSSEClients); + statusEvents.close(); + } +} + void WebServer::loop() { ElegantOTA.loop(); unsigned long now = millis(); + // Process commands queued by async web handlers (thread-safe) + processPendingCommands(); + + // Rebuild cached JSON responses on the main task (thread-safe) + refreshCachedResponses(); + if (kStressCacheRefreshes > 0) + { + for (int i = 0; i < kStressCacheRefreshes; i++) + { + refreshCachedResponses(); + } + } + + // Periodic SSE client cleanup + cleanupSSEClients(); + // Periodic web server diagnostics (every 30s) static unsigned long lastDiagMs = 0; if (now - lastDiagMs >= 30000) @@ -523,7 +677,7 @@ void WebServer::loop() } } -void WebServer::buildStatusJson(DynamicJsonDocument &jsonDoc, const printer_info_t &elegooStatus) +void WebServer::buildStatusJson(StaticJsonDocument<768> &jsonDoc, const printer_info_t &elegooStatus) { jsonDoc["stopped"] = elegooStatus.filamentStopped; jsonDoc["filamentRunout"] = elegooStatus.filamentRunout; @@ -570,47 +724,39 @@ void WebServer::buildStatusJson(DynamicJsonDocument &jsonDoc, const printer_info void WebServer::broadcastStatusUpdate() { - printer_info_t elegooStatus = elegooCC.getCurrentInformation(); - // JSON allocation: 576 bytes heap (was 768 bytes) - // Measured actual: ~480 bytes (83% utilization, 17% margin) - // Last measured: 2025-11-26 - // See: .claude/hardcoded-allocations.md for maintenance notes - DynamicJsonDocument jsonDoc(576); - buildStatusJson(jsonDoc, elegooStatus); - String payload; - payload.reserve(576); // Pre-allocate to prevent fragmentation - serializeJson(jsonDoc, payload); - - // Pin Values level: Check if approaching allocation limit - if (settingsManager.getLogLevel() >= LOG_PIN_VALUES) + // Use the pre-built cached sensor status JSON (double-buffered, short-lock copy) + char payloadBuf[kCacheBufSize]; + size_t payloadLen = cachedSensorStatus.read(payloadBuf, sizeof(payloadBuf)); + sdcp_print_status_t printStatus = cachedPrintStatus; + + if (payloadLen == 0) { - size_t actualSize = measureJson(jsonDoc); - static bool logged = false; - if (!logged && actualSize > 490) // >85% of 576 bytes - { - logger.logf(LOG_PIN_VALUES, "WebServer broadcastStatusUpdate JSON size: %zu / 576 bytes (%.1f%%)", - actualSize, (actualSize * 100.0f / 576.0f)); - logged = true; // Only log once per session - } + return; } - bool idleState = (elegooStatus.printStatus == 0 || elegooStatus.printStatus == 9); + bool idleState = (printStatus == SDCP_PRINT_STATUS_IDLE || + printStatus == SDCP_PRINT_STATUS_COMPLETE); + if (idleState) { - if (payload == lastIdlePayload) + uint32_t payloadCrc = crc32(payloadBuf, payloadLen); + if (hasLastIdlePayload && payloadCrc == lastIdlePayloadCrc) { - statusBroadcastIntervalMs = 5000; + statusBroadcastIntervalMs = kStatusBroadcastIntervalMsDefault; return; } - lastIdlePayload = payload; + lastIdlePayloadCrc = payloadCrc; + hasLastIdlePayload = true; } else { - lastIdlePayload = ""; + hasLastIdlePayload = false; } - statusEvents.send(payload.c_str(), "status"); + statusEvents.send(payloadBuf, "status"); - bool isPrinting = elegooStatus.printStatus != 0 && elegooStatus.printStatus != 9; - statusBroadcastIntervalMs = isPrinting ? 1000 : 5000; + bool isPrinting = (printStatus != SDCP_PRINT_STATUS_IDLE && + printStatus != SDCP_PRINT_STATUS_COMPLETE); + statusBroadcastIntervalMs = isPrinting ? kStatusBroadcastIntervalMsPrinting + : kStatusBroadcastIntervalMsDefault; } diff --git a/src/WebServer.h b/src/WebServer.h index 3cd7e4d..f4e44d6 100644 --- a/src/WebServer.h +++ b/src/WebServer.h @@ -15,17 +15,108 @@ // Define SPIFFS as LittleFS #define SPIFFS LittleFS +// Maximum SSE clients allowed simultaneously +static constexpr int kMaxSSEClients = 4; + +#ifdef STRESS_MODE +static constexpr unsigned long kStatusBroadcastIntervalMsDefault = 200; +static constexpr unsigned long kStatusBroadcastIntervalMsPrinting = 50; +static constexpr int kStressCacheRefreshes = 3; +#else +static constexpr unsigned long kStatusBroadcastIntervalMsDefault = 5000; +static constexpr unsigned long kStatusBroadcastIntervalMsPrinting = 1000; +static constexpr int kStressCacheRefreshes = 0; +#endif + class WebServer { private: AsyncWebServer server; AsyncEventSource statusEvents; unsigned long lastStatusBroadcastMs = 0; - unsigned long statusBroadcastIntervalMs = 5000; - String lastIdlePayload; + unsigned long statusBroadcastIntervalMs = kStatusBroadcastIntervalMsDefault; + + // --- Thread-safe command queue (async handlers set flags, loop() processes) --- + + // Pending settings update: async handler parses JSON into this doc, loop() applies it + volatile bool pendingSettingsUpdate = false; + StaticJsonDocument<1024> pendingSettingsDoc; + portMUX_TYPE pendingMutex = portMUX_INITIALIZER_UNLOCKED; + + // Pending action commands from web handlers + volatile bool pendingPause = false; + volatile bool pendingResume = false; + volatile bool pendingDiscovery = false; + volatile bool pendingReconnect = false; // Set when IP changed during settings update + + // --- Pre-built cached responses (double-buffered, short-lock copy) --- + // Main loop writes to buf[!activeIdx], then flips activeIdx. + // Async handlers snapshot activeIdx/len under a short lock (no heap allocation). + static constexpr size_t kCacheBufSize = 1536; // Fits sensor (~600B), settings (~1KB), discovery (~1KB) + + struct CachedResponse { + char buf[2][kCacheBufSize]; + size_t len[2] = {0, 0}; + volatile int activeIdx = 0; // Single-word write is atomic on ESP32 + + // Main loop: write to inactive buffer, then flip + void publish(const char *json, size_t jsonLen) { + int writeIdx = !activeIdx; + size_t copyLen = (jsonLen < kCacheBufSize - 1) ? jsonLen : (kCacheBufSize - 1); + memcpy(buf[writeIdx], json, copyLen); + buf[writeIdx][copyLen] = '\0'; + len[writeIdx] = copyLen; + // Memory barrier + atomic flip + portENTER_CRITICAL(&_mutex); + activeIdx = writeIdx; + portEXIT_CRITICAL(&_mutex); + } - void buildStatusJson(DynamicJsonDocument &jsonDoc, const printer_info_t &elegooStatus); + // Async handler: copy active buffer under short lock (no heap alloc) + size_t read(char *out, size_t outSize) const { + if (outSize == 0) { + return 0; + } + size_t copyLen = 0; + portENTER_CRITICAL(&_mutex); + int idx = activeIdx; + copyLen = len[idx]; + if (copyLen >= outSize) { + copyLen = outSize - 1; + } + memcpy(out, buf[idx], copyLen); + out[copyLen] = '\0'; + portEXIT_CRITICAL(&_mutex); + return copyLen; + } + + mutable portMUX_TYPE _mutex = portMUX_INITIALIZER_UNLOCKED; + }; + + CachedResponse cachedSensorStatus; + CachedResponse cachedSettings; + CachedResponse cachedDiscovery; + sdcp_print_status_t cachedPrintStatus = SDCP_PRINT_STATUS_IDLE; + volatile bool settingsJsonDirty = true; // Start dirty to build initial cache + + // Cached version JSON (built once at startup, never changes) + char cachedVersionJson[512] = {0}; + + // --- SSE deduplication --- + // CRC32 of last idle payload for dedup (replaces full String comparison) + uint32_t lastIdlePayloadCrc = 0; + bool hasLastIdlePayload = false; + + // SSE client cleanup tracking + unsigned long lastSSECleanupMs = 0; + + void buildStatusJson(StaticJsonDocument<768> &jsonDoc, const printer_info_t &elegooStatus); void broadcastStatusUpdate(); + void processPendingCommands(); + void refreshCachedResponses(); + void cleanupSSEClients(); + + static uint32_t crc32(const char *data, size_t length); public: WebServer(int port = 80); diff --git a/src/main.cpp b/src/main.cpp index 00150ea..8d8600f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -49,6 +49,12 @@ bool isWebServerSetup = false; // Store reset reason for diagnostics static esp_reset_reason_t lastResetReason = ESP_RST_UNKNOWN; +#ifdef STRESS_MODE +static constexpr uint32_t kMainLoopDelayMs = 0; +#else +static constexpr uint32_t kMainLoopDelayMs = 1; +#endif + static const char* getResetReasonString(esp_reset_reason_t reason) { switch (reason) { @@ -82,6 +88,9 @@ void setup() logger.logf("Firmware version: %s", firmwareVersion); logger.logf("Chip family: %s", chipFamily); logger.logf("Build timestamp (UTC compile time): %s", buildTimestamp); +#ifdef STRESS_MODE + logger.log("STRESS_MODE enabled: reduced delays and extra cache refreshes"); +#endif SPIFFS.begin(); // note: this must be done before wifi/server setup logger.log("Filesystem initialized"); @@ -168,5 +177,5 @@ void loop() // - Motion sensor: ~60ms between pulses at typical speeds // - Jam detector: 250ms update interval // - Printer polling: 250ms status interval - vTaskDelay(pdMS_TO_TICKS(1)); -} \ No newline at end of file + vTaskDelay(pdMS_TO_TICKS(kMainLoopDelayMs)); +} diff --git a/test/build_and_run_all_tests.sh b/test/build_and_run_all_tests.sh index 50a309b..33511d6 100755 --- a/test/build_and_run_all_tests.sh +++ b/test/build_and_run_all_tests.sh @@ -11,6 +11,8 @@ # --quick Run only pulse_simulator (fast smoke test) # --filter NAME Run only tests matching NAME # --verbose Show full compilation output +# --sanitize Compile with AddressSanitizer (detects heap/stack errors) +# --tsan Compile with ThreadSanitizer (detects data races) # --no-python Skip Python tests # --no-node Skip Node/JavaScript tests # --help Show this help message @@ -32,6 +34,7 @@ FILTER="" VERBOSE=false SKIP_PYTHON=false SKIP_NODE=false +SANITIZER_FLAGS="" while [[ $# -gt 0 ]]; do case $1 in @@ -47,6 +50,14 @@ while [[ $# -gt 0 ]]; do VERBOSE=true shift ;; + --sanitize) + SANITIZER_FLAGS="-fsanitize=address -fno-omit-frame-pointer -g" + shift + ;; + --tsan) + SANITIZER_FLAGS="-fsanitize=thread -g" + shift + ;; --no-python) SKIP_PYTHON=true shift @@ -56,7 +67,7 @@ while [[ $# -gt 0 ]]; do shift ;; --help) - head -25 "$0" | tail -20 + head -27 "$0" | tail -22 exit 0 ;; *) @@ -82,6 +93,11 @@ if [ -n "$FILTER" ]; then echo "" fi +if [ -n "$SANITIZER_FLAGS" ]; then + echo -e "${YELLOW}Sanitizer: $SANITIZER_FLAGS${NC}" + echo "" +fi + EXIT_CODE=0 TESTS_RUN=0 TESTS_PASSED=0 @@ -145,19 +161,25 @@ compile_test() { local extra_sources=() # test_jam_detector and test_additional_edge_cases include sources directly # and use mocks, so they don't need extra sources compiled - if [ "$output_name" = "test_jam_detector" ] || [ "$output_name" = "test_additional_edge_cases" ]; then + if [ "$output_name" = "test_jam_detector" ] || [ "$output_name" = "test_additional_edge_cases" ] || [ "$output_name" = "test_soak" ] || [ "$output_name" = "test_thread_safety" ]; then : # No extra sources - tests include what they need directly elif [ -f "../src/${output_name#test_}.cpp" ]; then extra_sources+=("../src/${output_name#test_}.cpp") fi + # Thread safety test needs pthreads + local extra_flags="" + if [ "$output_name" = "test_thread_safety" ]; then + extra_flags="-lpthread" + fi + # Combine main source file with extra sources local all_sources=("${source_files[@]}" "${extra_sources[@]}") if [ "$VERBOSE" = true ]; then - g++ -std=c++17 -Wno-redefined-macros -o "$output_name" "${all_sources[@]}" $INCLUDE_PATHS + g++ -std=c++17 -Wno-redefined-macros $SANITIZER_FLAGS -o "$output_name" "${all_sources[@]}" $INCLUDE_PATHS $extra_flags else - g++ -std=c++17 -Wno-redefined-macros -o "$output_name" "${all_sources[@]}" $INCLUDE_PATHS 2>&1 | head -30 + g++ -std=c++17 -Wno-redefined-macros $SANITIZER_FLAGS -o "$output_name" "${all_sources[@]}" $INCLUDE_PATHS $extra_flags 2>&1 | head -30 fi # Use PIPESTATUS to get the exit code of g++, not head @@ -202,6 +224,8 @@ declare -a CPP_TESTS=( "test_settings_manager:SettingsManager Unit Tests" "test_logger:Logger Unit Tests" "test_integration:Integration Tests" + "test_thread_safety:Thread Safety Stress Tests" + "test_soak:Soak Tests" ) # In quick mode, only run pulse_simulator diff --git a/test/test_elegoo_cc.cpp b/test/test_elegoo_cc.cpp index f7bf440..2284d9d 100644 --- a/test/test_elegoo_cc.cpp +++ b/test/test_elegoo_cc.cpp @@ -183,9 +183,9 @@ typedef enum { #include "../src/FilamentMotionSensor.h" #include "../src/FilamentMotionSensor.cpp" -// Simplified printer info struct for testing +// Simplified printer info struct for testing (uses fixed-size buffers matching production) struct printer_info_t { - String mainboardID; + char mainboardID[64]; sdcp_print_status_t printStatus; bool filamentStopped; bool filamentRunout; @@ -363,7 +363,7 @@ class TestableElegooCC { printer_info_t getCurrentInformation() { printer_info_t info; - info.mainboardID = mainboardID; + strlcpy(info.mainboardID, mainboardID.c_str(), sizeof(info.mainboardID)); info.printStatus = printStatus; info.filamentStopped = false; info.filamentRunout = filamentRunout; @@ -618,7 +618,7 @@ void testGetCurrentInformation() { printer_info_t info = ecc.getCurrentInformation(); - TEST_ASSERT(info.mainboardID == "test-board-123", "Mainboard ID should match"); + TEST_ASSERT(strcmp(info.mainboardID, "test-board-123") == 0, "Mainboard ID should match"); TEST_ASSERT(info.printStatus == SDCP_PRINT_STATUS_PRINTING, "Print status should match"); TEST_ASSERT(info.currentLayer == 50, "Current layer should match"); TEST_ASSERT(info.totalLayer == 100, "Total layer should match"); diff --git a/test/test_soak b/test/test_soak new file mode 100755 index 0000000..2d730a5 Binary files /dev/null and b/test/test_soak differ diff --git a/test/test_soak.cpp b/test/test_soak.cpp new file mode 100644 index 0000000..6412b67 --- /dev/null +++ b/test/test_soak.cpp @@ -0,0 +1,731 @@ +/** + * Soak Test - Long-Duration Stress Simulation + * + * Simulates ~24 hours of firmware runtime in minutes by running + * the full detection pipeline (FilamentMotionSensor + JamDetector + Logger) + * for millions of iterations. + * + * Goals: + * - Detect slow-burn state corruption (counter overflow, index wraparound) + * - Catch memory leaks via ASan (run with --sanitize flag) + * - Verify circular buffer integrity after millions of writes + * - Stress print start/stop/pause/resume cycling + * + * Run: + * g++ -std=c++17 -I. -I./mocks -I../src -o test_soak test_soak.cpp + * ./test_soak + * + * With AddressSanitizer: + * g++ -std=c++17 -fsanitize=address -fno-omit-frame-pointer -g \ + * -I. -I./mocks -I../src -o test_soak test_soak.cpp && ./test_soak + */ + +#include +#include +#include +#include +#include +#include +#include + +// Define mock globals before including mocks +unsigned long _mockMillis = 0; +int testsPassed = 0; +int testsFailed = 0; + +// Pre-define header guards to prevent real headers from being included +#define LOGGER_H +#define SETTINGS_DATA_H + +// Define LogLevel enum needed by JamDetector +enum LogLevel { + LOG_NORMAL = 0, + LOG_VERBOSE = 1, + LOG_PIN_VALUES = 2 +}; + +// Mock Logger singleton +class Logger { +public: + static Logger& getInstance() { + static Logger instance; + return instance; + } + void log(const char* msg, LogLevel level = LOG_NORMAL) {} + void logf(const char* fmt, ...) {} + void logf(LogLevel level, const char* fmt, ...) {} +}; + +// Mock SettingsManager singleton +class SettingsManager { +public: + static SettingsManager& getInstance() { + static SettingsManager instance; + return instance; + } + bool getVerboseLogging() const { return false; } + int getLogLevel() const { return 0; } +}; + +// Include shared mocks +#include "mocks/test_mocks.h" +#include "mocks/arduino_mocks.h" + +// Mock instances +MockLogger logger; +MockSettingsManager settingsManager; +MockSerial Serial; + +// Include real implementations +#include "../src/FilamentMotionSensor.h" +#include "../src/FilamentMotionSensor.cpp" +#include "../src/JamDetector.h" +#include "../src/JamDetector.cpp" + +// ============================================================================ +// Soak Test Harness (extended from integration test pattern) +// ============================================================================ + +class SoakHarness { +public: + FilamentMotionSensor sensor; + JamDetector detector; + JamConfig config; + + unsigned long printStartTime; + bool isPrinting; + unsigned long pulseCount; + float totalExtrusionMm; + + // Stats tracking + unsigned long totalIterations; + unsigned long printStarts; + unsigned long printStops; + unsigned long pauseResumeCycles; + unsigned long jamDetections; + unsigned long maxPulseCount; + + SoakHarness() : printStartTime(0), isPrinting(false), pulseCount(0), + totalExtrusionMm(0.0f), totalIterations(0), printStarts(0), + printStops(0), pauseResumeCycles(0), jamDetections(0), + maxPulseCount(0) { + config.graceTimeMs = 5000; + config.hardJamMm = 5.0f; + config.softJamTimeMs = 10000; + config.hardJamTimeMs = 3000; + config.ratioThreshold = 0.70f; + config.detectionMode = DetectionMode::BOTH; + } + + void startPrint() { + printStartTime = millis(); + isPrinting = true; + sensor.reset(); + detector.reset(printStartTime); + pulseCount = 0; + totalExtrusionMm = 0.0f; + printStarts++; + } + + void stopPrint() { + isPrinting = false; + printStops++; + } + + void pauseResume() { + detector.onResume(millis(), pulseCount, sensor.getSensorDistance()); + pauseResumeCycles++; + } + + void addExtrusion(float deltaMm) { + totalExtrusionMm += deltaMm; + sensor.updateExpectedPosition(totalExtrusionMm); + } + + void addPulse(float mmPerPulse = 2.88f) { + sensor.addSensorPulse(mmPerPulse); + pulseCount++; + if (pulseCount > maxPulseCount) maxPulseCount = pulseCount; + } + + JamState runDetection() { + float expectedRate, actualRate; + sensor.getWindowedRates(expectedRate, actualRate); + + totalIterations++; + + JamState state = detector.update( + sensor.getExpectedDistance(), + sensor.getSensorDistance(), + pulseCount, + isPrinting, + true, + millis(), + printStartTime, + config, + expectedRate, + actualRate + ); + + if (state.jammed) jamDetections++; + return state; + } +}; + +// ============================================================================ +// Logger Circular Buffer (standalone for soak testing) +// ============================================================================ + +struct LogEntry { + char uuid[37]; + unsigned long timestamp; + char message[256]; + uint8_t level; +}; + +class SoakLogger { +public: + static const int MAX_LOG_ENTRIES = 250; + + LogEntry logBuffer[MAX_LOG_ENTRIES]; + int logCapacity; + int currentIndex; + int totalEntries; + uint32_t uuidCounter; + + SoakLogger() : logCapacity(MAX_LOG_ENTRIES), currentIndex(0), + totalEntries(0), uuidCounter(0) { + memset(logBuffer, 0, sizeof(logBuffer)); + } + + void log(const char *message) { + uuidCounter++; + snprintf(logBuffer[currentIndex].uuid, 37, "soak-%08x", uuidCounter); + logBuffer[currentIndex].timestamp = millis(); + strncpy(logBuffer[currentIndex].message, message, + sizeof(logBuffer[currentIndex].message) - 1); + logBuffer[currentIndex].message[sizeof(logBuffer[currentIndex].message) - 1] = '\0'; + logBuffer[currentIndex].level = 0; + + currentIndex = (currentIndex + 1) % logCapacity; + if (totalEntries < logCapacity) { + totalEntries++; + } + } + + void clearLogs() { + currentIndex = 0; + totalEntries = 0; + memset(logBuffer, 0, sizeof(logBuffer)); + } + + // Validate internal consistency + bool isConsistent() const { + if (currentIndex < 0 || currentIndex >= logCapacity) return false; + if (totalEntries < 0 || totalEntries > logCapacity) return false; + return true; + } + + // Check that the most recent entry has a valid UUID + bool lastEntryValid() const { + if (totalEntries == 0) return true; + int lastIdx = (currentIndex - 1 + logCapacity) % logCapacity; + return logBuffer[lastIdx].uuid[0] != '\0' && + strlen(logBuffer[lastIdx].uuid) > 0 && + strlen(logBuffer[lastIdx].uuid) < 37; + } +}; + +// ============================================================================ +// CachedResponse double-buffer (standalone soak version) +// ============================================================================ + +static const size_t kCacheBufSize = 1536; + +struct CachedResponse { + char buf[2][kCacheBufSize]; + size_t len[2]; + int activeIdx; + + CachedResponse() : activeIdx(0) { + memset(buf, 0, sizeof(buf)); + len[0] = 0; + len[1] = 0; + } + + void publish(const char *json, size_t jsonLen) { + int writeIdx = !activeIdx; + size_t copyLen = (jsonLen < kCacheBufSize - 1) ? jsonLen : (kCacheBufSize - 1); + memcpy(buf[writeIdx], json, copyLen); + buf[writeIdx][copyLen] = '\0'; + len[writeIdx] = copyLen; + activeIdx = writeIdx; + } + + const char *read(size_t &outLen) const { + int idx = activeIdx; + outLen = len[idx]; + return buf[idx]; + } + + bool isConsistent() const { + if (activeIdx != 0 && activeIdx != 1) return false; + if (len[activeIdx] > kCacheBufSize - 1) return false; + // Active buffer should be null-terminated + if (len[activeIdx] > 0 && buf[activeIdx][len[activeIdx]] != '\0') return false; + return true; + } +}; + +// ============================================================================ +// Soak Tests +// ============================================================================ + +// Total simulated iterations. 10M iterations @ 250ms = ~694 hours simulated. +// Reduced to 2M for reasonable test time (~5-10 seconds). +static const long SOAK_ITERATIONS = 2000000; +static const int TICK_MS = 250; // Simulated time per iteration + +/** + * Test 1: Full pipeline soak - cycling through print states + * + * Simulates realistic firmware lifecycle: + * idle(10s) -> print(60s, healthy) -> soft jam(5s) -> recovery(10s) -> + * pause(3s) -> resume(30s) -> stop -> repeat + */ +void testFullPipelineSoak() { + TEST_SECTION("Full Pipeline Soak (" + std::to_string(SOAK_ITERATIONS) + " iterations)"); + + resetMockTime(); + SoakHarness harness; + + // State machine for cycling through conditions + enum Phase { IDLE, GRACE, HEALTHY, SOFT_JAM, RECOVERY, PAUSED, POST_RESUME }; + Phase phase = IDLE; + unsigned long phaseStart = 0; + int cycleCount = 0; + + // Phase durations (in iterations, not ms) + const int IDLE_ITERS = 40; // 10s + const int GRACE_ITERS = 24; // 6s (past 5s grace) + const int HEALTHY_ITERS = 240; // 60s + const int SOFT_JAM_ITERS = 20; // 5s + const int RECOVERY_ITERS = 40; // 10s + const int PAUSED_ITERS = 12; // 3s + const int RESUME_ITERS = 120; // 30s + + int phaseIter = 0; + + for (long i = 0; i < SOAK_ITERATIONS; i++) { + advanceTime(TICK_MS); + phaseIter++; + + switch (phase) { + case IDLE: + if (phaseIter >= IDLE_ITERS) { + harness.startPrint(); + phase = GRACE; + phaseIter = 0; + } + break; + + case GRACE: + // During grace period: normal extrusion + pulses + harness.addExtrusion(2.0f); + harness.addPulse(2.88f); + harness.runDetection(); + + if (phaseIter >= GRACE_ITERS) { + phase = HEALTHY; + phaseIter = 0; + } + break; + + case HEALTHY: + // Normal healthy print + harness.addExtrusion(2.0f); + harness.addPulse(2.88f); + { + JamState s = harness.runDetection(); + // After grace, should not jam with healthy flow + (void)s; + } + + if (phaseIter >= HEALTHY_ITERS) { + phase = SOFT_JAM; + phaseIter = 0; + } + break; + + case SOFT_JAM: + // Under-extrusion: expected movement, sparse pulses + harness.addExtrusion(3.0f); + if (phaseIter % 4 == 0) { + harness.addPulse(2.88f); // ~25% flow + } + harness.runDetection(); + + if (phaseIter >= SOFT_JAM_ITERS) { + phase = RECOVERY; + phaseIter = 0; + } + break; + + case RECOVERY: + // Full flow again - jam percentages should decrease + harness.addExtrusion(2.0f); + harness.addPulse(2.88f); + harness.runDetection(); + + if (phaseIter >= RECOVERY_ITERS) { + phase = PAUSED; + phaseIter = 0; + } + break; + + case PAUSED: + // No extrusion, no pulses + if (phaseIter >= PAUSED_ITERS) { + harness.pauseResume(); + phase = POST_RESUME; + phaseIter = 0; + } + break; + + case POST_RESUME: + // Post-resume: normal flow + harness.addExtrusion(2.0f); + harness.addPulse(2.88f); + harness.runDetection(); + + if (phaseIter >= RESUME_ITERS) { + harness.stopPrint(); + phase = IDLE; + phaseIter = 0; + cycleCount++; + } + break; + } + } + + // Verify state is sane after soak + TEST_ASSERT(harness.totalIterations > 0, "Should have run iterations"); + TEST_ASSERT(harness.printStarts > 0, "Should have started prints"); + TEST_ASSERT(harness.printStarts == harness.printStops || + harness.printStarts == harness.printStops + 1, + "Start/stop counts should be balanced (off by at most 1)"); + TEST_ASSERT(harness.pulseCount < ULONG_MAX / 2, + "Pulse count should not have overflowed"); + + // Sensor state should be valid + float expected = harness.sensor.getExpectedDistance(); + float actual = harness.sensor.getSensorDistance(); + TEST_ASSERT(!std::isnan(expected), "Expected distance should not be NaN"); + TEST_ASSERT(!std::isnan(actual), "Actual distance should not be NaN"); + TEST_ASSERT(!std::isinf(expected), "Expected distance should not be Inf"); + TEST_ASSERT(!std::isinf(actual), "Actual distance should not be Inf"); + + // JamState should be valid + JamState finalState = harness.detector.getState(); + TEST_ASSERT(!std::isnan(finalState.passRatio), "passRatio should not be NaN"); + TEST_ASSERT(!std::isnan(finalState.deficit), "deficit should not be NaN"); + TEST_ASSERT(!std::isnan(finalState.hardJamPercent), "hardJamPercent should not be NaN"); + TEST_ASSERT(!std::isnan(finalState.softJamPercent), "softJamPercent should not be NaN"); + TEST_ASSERT(finalState.hardJamPercent >= 0.0f && finalState.hardJamPercent <= 100.0f, + "hardJamPercent should be in [0, 100]"); + TEST_ASSERT(finalState.softJamPercent >= 0.0f && finalState.softJamPercent <= 100.0f, + "softJamPercent should be in [0, 100]"); + + std::cout << " Cycles: " << cycleCount << std::endl; + std::cout << " Print starts: " << harness.printStarts << std::endl; + std::cout << " Pause/resumes: " << harness.pauseResumeCycles << std::endl; + std::cout << " Jam detections: " << harness.jamDetections << std::endl; + std::cout << " Max pulse count: " << harness.maxPulseCount << std::endl; + std::cout << " Simulated time: " << (_mockMillis / 3600000) << "h " + << ((_mockMillis % 3600000) / 60000) << "m" << std::endl; + + TEST_PASS("Full pipeline soak: " + std::to_string(cycleCount) + " cycles, " + + std::to_string(SOAK_ITERATIONS) + " iterations"); +} + +/** + * Test 2: Logger circular buffer soak + * + * Writes millions of log entries, periodically clears, and verifies + * buffer integrity at each checkpoint. + */ +void testLoggerCircularBufferSoak() { + TEST_SECTION("Logger Circular Buffer Soak"); + + resetMockTime(); + SoakLogger soakLog; + + const long LOG_ITERATIONS = 1000000; + const int CLEAR_EVERY = 50000; + int clearCount = 0; + + for (long i = 0; i < LOG_ITERATIONS; i++) { + _mockMillis = (unsigned long)i; + + char msg[64]; + snprintf(msg, sizeof(msg), "Soak log entry %ld at %lu", i, _mockMillis); + soakLog.log(msg); + + // Periodic clear + if (i > 0 && i % CLEAR_EVERY == 0) { + soakLog.clearLogs(); + clearCount++; + + // Verify consistency after clear + TEST_ASSERT(soakLog.isConsistent(), "Logger should be consistent after clear"); + TEST_ASSERT(soakLog.currentIndex == 0, "Index should be 0 after clear"); + TEST_ASSERT(soakLog.totalEntries == 0, "Count should be 0 after clear"); + } + + // Periodic consistency check + if (i % 100000 == 0) { + TEST_ASSERT(soakLog.isConsistent(), "Logger consistent at iteration " + std::to_string(i)); + TEST_ASSERT(soakLog.lastEntryValid(), "Last entry valid at iteration " + std::to_string(i)); + } + } + + // Final checks + TEST_ASSERT(soakLog.isConsistent(), "Logger should be consistent after soak"); + TEST_ASSERT(soakLog.lastEntryValid(), "Last entry should be valid"); + TEST_ASSERT(soakLog.uuidCounter == (uint32_t)LOG_ITERATIONS, + "UUID counter should match iteration count"); + + // Verify the buffer contains recent entries (not garbage) + int lastIdx = (soakLog.currentIndex - 1 + soakLog.logCapacity) % soakLog.logCapacity; + TEST_ASSERT(soakLog.logBuffer[lastIdx].timestamp > 0, "Last entry should have timestamp"); + TEST_ASSERT(strlen(soakLog.logBuffer[lastIdx].message) > 0, "Last entry should have message"); + + std::cout << " Logged: " << LOG_ITERATIONS << " entries" << std::endl; + std::cout << " Clears: " << clearCount << std::endl; + std::cout << " Final totalEntries: " << soakLog.totalEntries << std::endl; + std::cout << " UUID counter: " << soakLog.uuidCounter << std::endl; + + TEST_PASS("Logger circular buffer survived " + std::to_string(LOG_ITERATIONS) + " entries"); +} + +/** + * Test 3: CachedResponse double-buffer soak + * + * Publishes millions of different JSON payloads and verifies + * the buffer never corrupts. + */ +void testCachedResponseSoak() { + TEST_SECTION("CachedResponse Double-Buffer Soak"); + + CachedResponse cache; + + const long PUBLISH_ITERATIONS = 1000000; + char payload[512]; + + for (long i = 0; i < PUBLISH_ITERATIONS; i++) { + int len = snprintf(payload, sizeof(payload), + "{\"iter\":%ld,\"status\":%d,\"value\":%.4f}", + i, (int)(i % 10), (float)i * 0.001f); + + cache.publish(payload, (size_t)len); + + // Periodic consistency check + if (i % 100000 == 0) { + TEST_ASSERT(cache.isConsistent(), "Cache consistent at iteration " + std::to_string(i)); + + // Verify read returns valid data + size_t readLen; + const char *data = cache.read(readLen); + TEST_ASSERT(readLen > 0, "Cache should have data"); + TEST_ASSERT(data[0] == '{', "JSON should start with {"); + TEST_ASSERT(data[readLen - 1] == '}', "JSON should end with }"); + TEST_ASSERT(data[readLen] == '\0', "Should be null-terminated"); + } + } + + // Final check + TEST_ASSERT(cache.isConsistent(), "Cache consistent after soak"); + size_t finalLen; + const char *finalData = cache.read(finalLen); + TEST_ASSERT(finalLen > 0, "Final read should have data"); + TEST_ASSERT(finalData[0] == '{', "Final JSON valid"); + + std::cout << " Published: " << PUBLISH_ITERATIONS << " payloads" << std::endl; + std::cout << " Final payload length: " << finalLen << std::endl; + + TEST_PASS("CachedResponse survived " + std::to_string(PUBLISH_ITERATIONS) + " publishes"); +} + +/** + * Test 4: millis() rollover simulation + * + * Tests that the system handles millis() wrapping around ULONG_MAX. + * On ESP32, millis() wraps after ~49.7 days. + */ +void testMillisRollover() { + TEST_SECTION("millis() Rollover Handling"); + + SoakHarness harness; + + // Start near rollover point + _mockMillis = ULONG_MAX - 30000; // 30 seconds before rollover + harness.startPrint(); + + // Run through the rollover + float totalExtrusion = 0.0f; + bool crossedRollover = false; + int iterations = 0; + unsigned long prevMillis = _mockMillis; + + for (int i = 0; i < 500; i++) { + advanceTime(TICK_MS); + iterations++; + + if (_mockMillis < prevMillis) { + crossedRollover = true; + } + prevMillis = _mockMillis; + + totalExtrusion += 2.0f; + harness.addExtrusion(2.0f); + harness.addPulse(2.88f); + + JamState state = harness.runDetection(); + + // Should not crash or produce NaN + TEST_ASSERT(!std::isnan(state.passRatio), "passRatio should not be NaN after rollover"); + TEST_ASSERT(!std::isnan(state.deficit), "deficit should not be NaN after rollover"); + } + + TEST_ASSERT(crossedRollover, "Should have crossed millis() rollover point"); + + // State should still be sane + JamState finalState = harness.detector.getState(); + TEST_ASSERT(!std::isnan(finalState.hardJamPercent), "hardJamPercent valid after rollover"); + TEST_ASSERT(!std::isnan(finalState.softJamPercent), "softJamPercent valid after rollover"); + + std::cout << " Crossed rollover at iteration with millis()=" << _mockMillis << std::endl; + + TEST_PASS("System handles millis() rollover without corruption"); +} + +/** + * Test 5: Rapid print start/stop cycling + * + * Some users rapidly start/stop prints. Tests that repeated + * reset() calls don't leak state. + */ +void testRapidStartStopCycling() { + TEST_SECTION("Rapid Print Start/Stop Cycling"); + + resetMockTime(); + SoakHarness harness; + + const int CYCLES = 100000; + + for (int i = 0; i < CYCLES; i++) { + advanceTime(100); + + harness.startPrint(); + + // Brief print (a few iterations) + for (int j = 0; j < 5; j++) { + advanceTime(TICK_MS); + harness.addExtrusion(1.0f); + harness.addPulse(2.88f); + harness.runDetection(); + } + + harness.stopPrint(); + } + + // Verify no state corruption after rapid cycling + TEST_ASSERT(harness.printStarts == CYCLES, "Print start count should match"); + TEST_ASSERT(harness.printStops == CYCLES, "Print stop count should match"); + + float expected = harness.sensor.getExpectedDistance(); + float actual = harness.sensor.getSensorDistance(); + TEST_ASSERT(!std::isnan(expected), "Expected should not be NaN"); + TEST_ASSERT(!std::isnan(actual), "Actual should not be NaN"); + TEST_ASSERT(!std::isinf(expected), "Expected should not be Inf"); + TEST_ASSERT(!std::isinf(actual), "Actual should not be Inf"); + + std::cout << " Cycles: " << CYCLES << std::endl; + std::cout << " Total detection iterations: " << harness.totalIterations << std::endl; + + TEST_PASS("Rapid start/stop: " + std::to_string(CYCLES) + " cycles without corruption"); +} + +/** + * Test 6: Extrusion value accumulation precision + * + * Verifies that floating point accumulation over long prints + * doesn't drift excessively. + */ +void testExtrusionAccumulationPrecision() { + TEST_SECTION("Extrusion Accumulation Precision"); + + resetMockTime(); + FilamentMotionSensor sensor; + sensor.reset(); + + // Simulate a very long print: 10000 updates of 0.5mm each = 5000mm total + const int UPDATES = 10000; + const float DELTA_MM = 0.5f; + float manualTotal = 0.0f; + + for (int i = 0; i < UPDATES; i++) { + advanceTime(250); + manualTotal += DELTA_MM; + sensor.updateExpectedPosition(manualTotal); + sensor.addSensorPulse(DELTA_MM); // Perfect flow + } + + float expected = sensor.getExpectedDistance(); + float actual = sensor.getSensorDistance(); + + // Check that windowed values are reasonable (not NaN, not absurdly large) + TEST_ASSERT(!std::isnan(expected), "Expected should not be NaN"); + TEST_ASSERT(!std::isnan(actual), "Actual should not be NaN"); + TEST_ASSERT(expected >= 0.0f, "Expected should be non-negative"); + TEST_ASSERT(actual >= 0.0f, "Actual should be non-negative"); + + // With perfect 1:1 flow, deficit should be near zero + float deficit = sensor.getDeficit(); + TEST_ASSERT(!std::isnan(deficit), "Deficit should not be NaN"); + + std::cout << " Total expected extrusion: " << manualTotal << "mm" << std::endl; + std::cout << " Windowed expected: " << expected << "mm" << std::endl; + std::cout << " Windowed actual: " << actual << "mm" << std::endl; + std::cout << " Deficit: " << deficit << "mm" << std::endl; + + TEST_PASS("Extrusion accumulation maintains precision over " + + std::to_string(UPDATES) + " updates"); +} + +// ============================================================================ +// Main +// ============================================================================ + +int main() { + std::cout << "\n========================================" << std::endl; + std::cout << " Soak Test Suite" << std::endl; + std::cout << "========================================" << std::endl; + + testFullPipelineSoak(); + testLoggerCircularBufferSoak(); + testCachedResponseSoak(); + testMillisRollover(); + testRapidStartStopCycling(); + testExtrusionAccumulationPrecision(); + + std::cout << "\n========================================" << std::endl; + std::cout << "Test Results:" << std::endl; + std::cout << "\033[32m Passed: " << testsPassed << "\033[0m" << std::endl; + if (testsFailed > 0) { + std::cout << "\033[31m Failed: " << testsFailed << "\033[0m" << std::endl; + } + std::cout << "========================================\n" << std::endl; + + return testsFailed > 0 ? 1 : 0; +} diff --git a/test/test_thread_safety b/test/test_thread_safety new file mode 100755 index 0000000..b242ac6 Binary files /dev/null and b/test/test_thread_safety differ diff --git a/test/test_thread_safety.cpp b/test/test_thread_safety.cpp new file mode 100644 index 0000000..5ac3112 --- /dev/null +++ b/test/test_thread_safety.cpp @@ -0,0 +1,699 @@ +/** + * Thread Safety Stress Tests + * + * Uses POSIX threads to simulate the ESP32's two-task architecture: + * Thread A (main loop): writes logs, updates caches + * Thread B (async handler): reads logs, clears logs, reads caches + * + * Run with ThreadSanitizer to detect data races: + * g++ -std=c++17 -fsanitize=thread -g -fPIE -pie -lpthread -I. -I./mocks -I../src \ + * -o test_thread_safety test_thread_safety.cpp && \ + * setarch "$(uname -m)" -R ./test_thread_safety + * + * Run with AddressSanitizer to detect heap corruption: + * g++ -std=c++17 -fsanitize=address -fno-omit-frame-pointer -g -lpthread \ + * -I. -I./mocks -I../src -o test_thread_safety test_thread_safety.cpp \ + * && ./test_thread_safety + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ============================================================================ +// Mock ESP32 primitives using POSIX mutexes +// ============================================================================ + +// portMUX_TYPE backed by std::mutex (simulates ESP32 spinlock) +// Uses a pointer to mutex since std::mutex is non-copyable/non-movable +struct portMUX_TYPE { + std::mutex *mtx; + portMUX_TYPE() : mtx(new std::mutex()) {} + ~portMUX_TYPE() { delete mtx; } + portMUX_TYPE(const portMUX_TYPE &) : mtx(new std::mutex()) {} + portMUX_TYPE &operator=(const portMUX_TYPE &) { + // Keep existing mutex, don't copy + return *this; + } +}; + +#define portMUX_INITIALIZER_UNLOCKED portMUX_TYPE() + +inline void portENTER_CRITICAL(portMUX_TYPE *mux) { + mux->mtx->lock(); +} + +inline void portEXIT_CRITICAL(portMUX_TYPE *mux) { + mux->mtx->unlock(); +} + +// Mock millis / time +static std::atomic _mockMillis{0}; +unsigned long millis() { return _mockMillis.load(std::memory_order_relaxed); } +unsigned long getTime() { return _mockMillis.load(std::memory_order_relaxed); } +void yield() {} + +// Mock Serial +struct MockSerial { + void println(const char *) {} + void print(const char *) {} + void print(unsigned long) {} +}; +MockSerial Serial; + +// Mock ESP +struct MockESP { + unsigned long getCycleCount() { return _mockMillis.load(); } +}; +MockESP ESP; + +// Mock String class (minimal, for getLogsAsText return) +class String { +public: + String() : data_(nullptr), len_(0) {} + String(const char *s) { + if (s) { + len_ = strlen(s); + data_ = new char[len_ + 1]; + memcpy(data_, s, len_ + 1); + } else { + data_ = nullptr; + len_ = 0; + } + } + String(unsigned long val) { + char buf[24]; + snprintf(buf, sizeof(buf), "%lu", val); + len_ = strlen(buf); + data_ = new char[len_ + 1]; + memcpy(data_, buf, len_ + 1); + } + String(const String &o) { + if (o.data_) { + len_ = o.len_; + data_ = new char[len_ + 1]; + memcpy(data_, o.data_, len_ + 1); + } else { + data_ = nullptr; + len_ = 0; + } + } + String &operator=(const String &o) { + if (this != &o) { + delete[] data_; + if (o.data_) { + len_ = o.len_; + data_ = new char[len_ + 1]; + memcpy(data_, o.data_, len_ + 1); + } else { + data_ = nullptr; + len_ = 0; + } + } + return *this; + } + ~String() { delete[] data_; } + + String &operator+=(const char *s) { + if (!s) return *this; + size_t slen = strlen(s); + char *nd = new char[len_ + slen + 1]; + if (data_) memcpy(nd, data_, len_); + memcpy(nd + len_, s, slen + 1); + delete[] data_; + data_ = nd; + len_ += slen; + return *this; + } + String &operator+=(const String &o) { return *this += o.c_str(); } + + void reserve(size_t) {} // no-op + const char *c_str() const { return data_ ? data_ : ""; } + size_t length() const { return len_; } + int indexOf(const char *s) const { + if (!data_ || !s) return -1; + char *f = strstr(data_, s); + return f ? (int)(f - data_) : -1; + } + +private: + char *data_; + size_t len_; +}; + +// ============================================================================ +// Reproduce the Logger class (matching src/Logger.h + src/Logger.cpp) +// with the FIXED code from this commit +// ============================================================================ + +enum LogLevel : uint8_t { + LOG_NORMAL = 0, + LOG_VERBOSE = 1, + LOG_PIN_VALUES = 2 +}; + +struct LogEntry { + char uuid[37]; + unsigned long timestamp; + char message[256]; + LogLevel level; +}; + +class TestLogger { +public: + static const int MAX_LOG_ENTRIES = 250; + + LogEntry logBuffer[MAX_LOG_ENTRIES]; + int logCapacity; + volatile int currentIndex; + volatile int totalEntries; + uint32_t uuidCounter; + LogLevel currentLogLevel; + portMUX_TYPE _logMutex; + + TestLogger() + : logCapacity(MAX_LOG_ENTRIES), currentIndex(0), totalEntries(0), + uuidCounter(0), currentLogLevel(LOG_PIN_VALUES) { + _logMutex = portMUX_INITIALIZER_UNLOCKED; + memset(logBuffer, 0, sizeof(logBuffer)); + } + + void generateUUID(char *buffer) { + uuidCounter++; + snprintf(buffer, 37, "%08lx-%04x-%04x-%04x-%08lx%04x", + (unsigned long)millis(), + (unsigned int)((uuidCounter >> 16) & 0xFFFF), + (unsigned int)(uuidCounter & 0xFFFF), + (unsigned int)((uuidCounter >> 8) & 0xFFFF), + (unsigned long)ESP.getCycleCount(), + (unsigned int)(uuidCounter & 0xFFFF)); + } + + void logInternal(const char *message, LogLevel level) { + if (level > currentLogLevel) return; + + unsigned long timestamp = getTime(); + + // FIXED: UUID generation inside critical section + portENTER_CRITICAL(&_logMutex); + + char uuid[37]; + generateUUID(uuid); + + strncpy(logBuffer[currentIndex].uuid, uuid, sizeof(logBuffer[currentIndex].uuid) - 1); + logBuffer[currentIndex].uuid[sizeof(logBuffer[currentIndex].uuid) - 1] = '\0'; + logBuffer[currentIndex].timestamp = timestamp; + strncpy(logBuffer[currentIndex].message, message, sizeof(logBuffer[currentIndex].message) - 1); + logBuffer[currentIndex].message[sizeof(logBuffer[currentIndex].message) - 1] = '\0'; + logBuffer[currentIndex].level = level; + + currentIndex = (currentIndex + 1) % logCapacity; + if (totalEntries < logCapacity) { + totalEntries = totalEntries + 1; + } + portEXIT_CRITICAL(&_logMutex); + } + + String getLogsAsText(int maxEntries) { + String result; + + // FIXED: Snapshot indices under mutex + portENTER_CRITICAL(&_logMutex); + int snapshotIndex = currentIndex; + int snapshotCount = totalEntries; + portEXIT_CRITICAL(&_logMutex); + + if (snapshotCount < 0 || snapshotCount > logCapacity) snapshotCount = 0; + if (snapshotIndex < 0 || snapshotIndex >= logCapacity) snapshotIndex = 0; + if (snapshotCount == 0) return result; + + int returnCount = snapshotCount; + if (returnCount > maxEntries) returnCount = maxEntries; + + result.reserve(returnCount * 80 + 100); + + int startIndex = (snapshotCount < logCapacity) ? 0 : snapshotIndex; + if (snapshotCount > returnCount) { + startIndex = (startIndex + (snapshotCount - returnCount)) % logCapacity; + } + + for (int i = 0; i < returnCount; i++) { + int bufferIndex = (startIndex + i) % logCapacity; + if (bufferIndex < 0 || bufferIndex >= logCapacity) continue; + + // Read entry under short lock to avoid tearing + LogEntry entryCopy; + portENTER_CRITICAL(&_logMutex); + entryCopy = logBuffer[bufferIndex]; + portEXIT_CRITICAL(&_logMutex); + + result += String(entryCopy.timestamp); + result += " "; + result += entryCopy.message; + result += "\n"; + } + + return result; + } + + void clearLogs() { + // FIXED: Indices inside critical section + portENTER_CRITICAL(&_logMutex); + currentIndex = 0; + totalEntries = 0; + for (int i = 0; i < logCapacity; i++) { + memset(logBuffer[i].uuid, 0, sizeof(logBuffer[i].uuid)); + logBuffer[i].timestamp = 0; + memset(logBuffer[i].message, 0, sizeof(logBuffer[i].message)); + logBuffer[i].level = LOG_NORMAL; + } + portEXIT_CRITICAL(&_logMutex); + } + + int getLogCount() { return totalEntries; } +}; + +// ============================================================================ +// Reproduce the CachedResponse double-buffer (matching WebServer.h fix) +// ============================================================================ + +static const size_t kCacheBufSize = 1536; + +struct CachedResponse { + char buf[2][kCacheBufSize]; + size_t len[2] = {0, 0}; + volatile int activeIdx = 0; + mutable portMUX_TYPE _mutex = portMUX_INITIALIZER_UNLOCKED; + + void publish(const char *json, size_t jsonLen) { + int writeIdx = !activeIdx; + size_t copyLen = (jsonLen < kCacheBufSize - 1) ? jsonLen : (kCacheBufSize - 1); + memcpy(buf[writeIdx], json, copyLen); + buf[writeIdx][copyLen] = '\0'; + len[writeIdx] = copyLen; + portENTER_CRITICAL(&_mutex); + activeIdx = writeIdx; + portEXIT_CRITICAL(&_mutex); + } + + size_t read(char *out, size_t outSize) const { + if (outSize == 0) { + return 0; + } + size_t copyLen = 0; + portENTER_CRITICAL(&_mutex); + int idx = activeIdx; + copyLen = len[idx]; + if (copyLen >= outSize) { + copyLen = outSize - 1; + } + memcpy(out, buf[idx], copyLen); + out[copyLen] = '\0'; + portEXIT_CRITICAL(&_mutex); + return copyLen; + } +}; + +// ============================================================================ +// Test framework +// ============================================================================ + +static int testsPassed = 0; +static int testsFailed = 0; + +#define TEST_SECTION(name) \ + std::cout << "\n=== Test: " << name << " ===" << std::endl + +#define TEST_ASSERT(cond, msg) \ + do { \ + if (!(cond)) { \ + std::cout << "\033[31mFAIL: " << msg << "\033[0m" << std::endl; \ + std::cout << " at " << __FILE__ << ":" << __LINE__ << std::endl; \ + testsFailed++; \ + return; \ + } \ + } while(0) + +#define TEST_PASS(msg) \ + do { \ + std::cout << "\033[32mPASS: " << msg << "\033[0m" << std::endl; \ + testsPassed++; \ + } while(0) + +// ============================================================================ +// Stress Tests +// ============================================================================ + +static const int STRESS_ITERATIONS = 100000; + +/** + * Test 1: Concurrent log + getLogsAsText + * Thread A: logs messages rapidly + * Thread B: reads logs rapidly + * Without proper synchronization, TSan would flag the race on currentIndex/totalEntries. + */ +void testConcurrentLogAndRead() { + TEST_SECTION("Concurrent log() + getLogsAsText()"); + + TestLogger logger; + std::atomic running{true}; + std::atomic readCount{0}; + + // Thread A: writer (simulates main loop calling logInternal) + std::thread writer([&]() { + for (int i = 0; i < STRESS_ITERATIONS; i++) { + char msg[64]; + snprintf(msg, sizeof(msg), "Log entry %d", i); + logger.logInternal(msg, LOG_NORMAL); + _mockMillis.fetch_add(1, std::memory_order_relaxed); + } + running.store(false); + }); + + // Thread B: reader (simulates async handler calling getLogsAsText) + std::thread reader([&]() { + while (running.load()) { + String logs = logger.getLogsAsText(50); + readCount.fetch_add(1); + // Verify the returned string is not corrupted + // (null-terminated, reasonable length) + if (logs.length() > 0) { + const char *c = logs.c_str(); + (void)c; // Just ensure we can dereference it without crash + } + } + }); + + writer.join(); + reader.join(); + + TEST_ASSERT(logger.getLogCount() > 0, "Logger should have entries after stress test"); + TEST_ASSERT(readCount.load() > 0, "Reader thread should have completed reads"); + TEST_PASS("Concurrent log + getLogsAsText survived " + + std::to_string(STRESS_ITERATIONS) + " iterations, " + + std::to_string(readCount.load()) + " reads"); +} + +/** + * Test 2: Concurrent log + clearLogs + * Thread A: logs messages + * Thread B: periodically clears logs + * Tests the fixed clearLogs() where indices are inside critical section. + */ +void testConcurrentLogAndClear() { + TEST_SECTION("Concurrent log() + clearLogs()"); + + TestLogger logger; + std::atomic running{true}; + std::atomic clearCount{0}; + + std::thread writer([&]() { + for (int i = 0; i < STRESS_ITERATIONS; i++) { + char msg[64]; + snprintf(msg, sizeof(msg), "Entry %d", i); + logger.logInternal(msg, LOG_NORMAL); + _mockMillis.fetch_add(1, std::memory_order_relaxed); + } + running.store(false); + }); + + std::thread clearer([&]() { + while (running.load()) { + logger.clearLogs(); + clearCount.fetch_add(1); + } + }); + + writer.join(); + clearer.join(); + + // After test, indices should be consistent + int idx = logger.currentIndex; + int cnt = logger.totalEntries; + TEST_ASSERT(idx >= 0 && idx < logger.logCapacity, + "currentIndex should be in bounds after stress"); + TEST_ASSERT(cnt >= 0 && cnt <= logger.logCapacity, + "totalEntries should be in bounds after stress"); + TEST_ASSERT(clearCount.load() > 0, "Clear thread should have run"); + TEST_PASS("Concurrent log + clearLogs survived " + + std::to_string(STRESS_ITERATIONS) + " iterations, " + + std::to_string(clearCount.load()) + " clears"); +} + +/** + * Test 3: Triple contention - log + read + clear simultaneously + * This is the worst-case scenario that most closely matches the real firmware. + */ +void testTripleContention() { + TEST_SECTION("Triple contention: log + read + clear"); + + TestLogger logger; + std::atomic running{true}; + std::atomic readCount{0}; + std::atomic clearCount{0}; + + // Writer thread + std::thread writer([&]() { + for (int i = 0; i < STRESS_ITERATIONS; i++) { + char msg[64]; + snprintf(msg, sizeof(msg), "Triple %d", i); + logger.logInternal(msg, LOG_NORMAL); + _mockMillis.fetch_add(1, std::memory_order_relaxed); + } + running.store(false); + }); + + // Reader thread + std::thread reader([&]() { + while (running.load()) { + String logs = logger.getLogsAsText(100); + readCount.fetch_add(1); + } + }); + + // Clearer thread + std::thread clearer([&]() { + int i = 0; + while (running.load()) { + // Clear less frequently to let some logs accumulate + if (i++ % 100 == 0) { + logger.clearLogs(); + clearCount.fetch_add(1); + } + } + }); + + writer.join(); + reader.join(); + clearer.join(); + + TEST_ASSERT(readCount.load() > 0, "Reader should have completed reads"); + TEST_ASSERT(clearCount.load() > 0, "Clearer should have run"); + TEST_PASS("Triple contention survived " + + std::to_string(STRESS_ITERATIONS) + " writes"); +} + +/** + * Test 4: CachedResponse double-buffer concurrent access + * Thread A: publishes JSON strings + * Thread B: reads the cached response + * Verifies no torn reads (partial old + partial new data). + */ +void testCachedResponseDoubleBuffer() { + TEST_SECTION("CachedResponse double-buffer concurrent access"); + + CachedResponse cache; + std::atomic running{true}; + std::atomic readCount{0}; + std::atomic tornReads{0}; + + // Writer: alternates between two known payloads + const char *payloadA = "{\"type\":\"AAAAAAAAAA\",\"value\":111111111}"; + const char *payloadB = "{\"type\":\"BBBBBBBBBB\",\"value\":222222222}"; + size_t lenA = strlen(payloadA); + size_t lenB = strlen(payloadB); + + std::thread writer([&]() { + for (int i = 0; i < STRESS_ITERATIONS; i++) { + if (i % 2 == 0) + cache.publish(payloadA, lenA); + else + cache.publish(payloadB, lenB); + } + running.store(false); + }); + + std::thread reader([&]() { + while (running.load()) { + char dataBuf[kCacheBufSize]; + size_t len = cache.read(dataBuf, sizeof(dataBuf)); + readCount.fetch_add(1); + + if (len > 0) { + // Check that we got one of the two payloads, not a mix + bool isA = (strncmp(dataBuf, payloadA, len) == 0 && len == lenA); + bool isB = (strncmp(dataBuf, payloadB, len) == 0 && len == lenB); + if (!isA && !isB) { + tornReads.fetch_add(1); + } + } + } + }); + + writer.join(); + reader.join(); + + TEST_ASSERT(readCount.load() > 0, "Reader should have completed reads"); + TEST_ASSERT(tornReads.load() == 0, + "No torn reads should occur with double buffering (got " + + std::to_string(tornReads.load()) + ")"); + TEST_PASS("Double-buffer: " + std::to_string(readCount.load()) + + " reads, 0 torn"); +} + +/** + * Test 5: UUID uniqueness under contention + * Multiple threads call logInternal simultaneously. + * Verify no duplicate UUIDs are generated. + */ +void testUuidUniquenessUnderContention() { + TEST_SECTION("UUID uniqueness under contention"); + + TestLogger logger; + const int THREADS = 4; + const int PER_THREAD = 10000; + + std::vector threads; + for (int t = 0; t < THREADS; t++) { + threads.emplace_back([&logger, t]() { + for (int i = 0; i < PER_THREAD; i++) { + char msg[64]; + snprintf(msg, sizeof(msg), "T%d-%d", t, i); + logger.logInternal(msg, LOG_NORMAL); + } + }); + } + + for (auto &th : threads) th.join(); + + // Check last N entries for UUID uniqueness + // (can't check all because circular buffer overwrites) + int entriesToCheck = std::min(logger.getLogCount(), 250); + std::vector uuids; + uuids.reserve(entriesToCheck); + + portENTER_CRITICAL(&logger._logMutex); + int idx = logger.currentIndex; + int count = logger.totalEntries; + portEXIT_CRITICAL(&logger._logMutex); + + int start = (count < logger.logCapacity) ? 0 : idx; + int skip = count - entriesToCheck; + start = (start + skip) % logger.logCapacity; + + for (int i = 0; i < entriesToCheck; i++) { + int bufIdx = (start + i) % logger.logCapacity; + uuids.push_back(std::string(logger.logBuffer[bufIdx].uuid)); + } + + // Count duplicates + int duplicates = 0; + for (size_t i = 0; i < uuids.size(); i++) { + for (size_t j = i + 1; j < uuids.size(); j++) { + if (uuids[i] == uuids[j] && !uuids[i].empty()) { + duplicates++; + } + } + } + + TEST_ASSERT(duplicates == 0, + "No duplicate UUIDs (found " + std::to_string(duplicates) + ")"); + TEST_PASS("UUID uniqueness: " + std::to_string(entriesToCheck) + + " entries checked, 0 duplicates"); +} + +/** + * Test 6: Cache publish during read doesn't crash + * Rapidly alternate publishing large payloads while reader accesses + * the buffer. Tests memory safety of the double-buffer approach. + */ +void testCachePublishLargePayload() { + TEST_SECTION("CachedResponse large payload stress"); + + CachedResponse cache; + std::atomic running{true}; + std::atomic publishCount{0}; + std::atomic readCount{0}; + + // Build a large (~1KB) JSON-like payload + char largePayload[1024]; + memset(largePayload, 'X', sizeof(largePayload) - 1); + largePayload[0] = '{'; + largePayload[sizeof(largePayload) - 2] = '}'; + largePayload[sizeof(largePayload) - 1] = '\0'; + size_t largeLen = strlen(largePayload); + + std::thread writer([&]() { + for (int i = 0; i < STRESS_ITERATIONS; i++) { + // Slightly vary the payload to make torn reads detectable + largePayload[1] = 'A' + (i % 26); + cache.publish(largePayload, largeLen); + publishCount.fetch_add(1); + } + running.store(false); + }); + + std::thread reader([&]() { + while (running.load()) { + char dataBuf[kCacheBufSize]; + size_t len = cache.read(dataBuf, sizeof(dataBuf)); + if (len > 0) { + // Verify first and last chars are consistent + assert(dataBuf[0] == '{'); + assert(dataBuf[len - 1] == '}'); + } + readCount.fetch_add(1); + } + }); + + writer.join(); + reader.join(); + + TEST_ASSERT(publishCount.load() == STRESS_ITERATIONS, "All publishes completed"); + TEST_ASSERT(readCount.load() > 0, "Reader completed reads"); + TEST_PASS("Large payload stress: " + std::to_string(publishCount.load()) + + " publishes, " + std::to_string(readCount.load()) + " reads"); +} + +// ============================================================================ +// Main +// ============================================================================ + +int main() { + std::cout << "\n========================================" << std::endl; + std::cout << " Thread Safety Stress Test Suite" << std::endl; + std::cout << "========================================" << std::endl; + + testConcurrentLogAndRead(); + testConcurrentLogAndClear(); + testTripleContention(); + testCachedResponseDoubleBuffer(); + testUuidUniquenessUnderContention(); + testCachePublishLargePayload(); + + std::cout << "\n========================================" << std::endl; + std::cout << "Test Results:" << std::endl; + std::cout << "\033[32m Passed: " << testsPassed << "\033[0m" << std::endl; + if (testsFailed > 0) { + std::cout << "\033[31m Failed: " << testsFailed << "\033[0m" << std::endl; + } + std::cout << "========================================\n" << std::endl; + + return testsFailed > 0 ? 1 : 0; +} diff --git a/test/test_thread_safety_asan b/test/test_thread_safety_asan new file mode 100755 index 0000000..a00dd5a Binary files /dev/null and b/test/test_thread_safety_asan differ diff --git a/test/test_thread_safety_tsan b/test/test_thread_safety_tsan new file mode 100755 index 0000000..8268d34 Binary files /dev/null and b/test/test_thread_safety_tsan differ diff --git a/test/test_thread_safety_tsan2 b/test/test_thread_safety_tsan2 new file mode 100755 index 0000000..fc268f9 Binary files /dev/null and b/test/test_thread_safety_tsan2 differ diff --git a/test/test_thread_safety_tsan3 b/test/test_thread_safety_tsan3 new file mode 100755 index 0000000..e1fcd17 Binary files /dev/null and b/test/test_thread_safety_tsan3 differ diff --git a/test/tsan_full.log b/test/tsan_full.log new file mode 100644 index 0000000..5b659d0 --- /dev/null +++ b/test/tsan_full.log @@ -0,0 +1 @@ +timeout: the monitored command dumped core diff --git a/test/tsan_output.txt b/test/tsan_output.txt new file mode 100644 index 0000000..2d6ffa9 --- /dev/null +++ b/test/tsan_output.txt @@ -0,0 +1 @@ +FATAL: ThreadSanitizer: unexpected memory mapping 0x640c7f664000-0x640c7f667000