// ====================================================== // XIAO ML KIT (OR XIAO ESP32S3 SENSE) // FULL VISION ML // Small Image collection, training, inference for education and proof of concept // // SD card stores: images in class folders // SD card stores: headers in bin and .h text char array format // Serial monitor and OLED output // By jeremy Ellis // Use at your own risk! // MIT license // Github Profile https://github.com/hpssjellis // LinkedIn https://www.linkedin.com/in/jeremy-ellis-4237a9bb/ // // For platformio you need theU8g2 library decalred in the platformio.ini file and OPI PSRAM set // lib_deps = olikraus/U8g2 @ ^2.35.30 // ; Overriding defaults to enable OPI PSRAM // build_flags = // -DBOARD_HAS_PSRAM // -DARDUINO_USB_CDC_ON_BOOT=1 // board_build.arduino.memory_type = qio_opi // board_build.flash_mode = qio // board_upload.flash_size = 8MB // // possibly include the new onewire library // lib_deps = pstolarz/OneWireNg @ ^0.13.0 // ====================================================== // STANDALONE TESTING: // Uncomment ONE of these to test individual parts: // #define TEST_PART1_STANDALONE // Test image collection only // #define TEST_PART2_STANDALONE // Test training only // #define TEST_PART3_STANDALONE // Test inference only // #define TEST_PART4_STANDALONE // Test menu system only // ====================================================== // ██████████████████████████████████████████████████████████████████████████████ // ██ ██ // ██ PART 0: CORE SYSTEM (ALWAYS INCLUDED) ██ // ██ Headers, Defines, Pins, Globals, Memory, Weights, Setup, Loop ██ // ██ ██ // ██████████████████████████████████████████████████████████████████████████████ #include "esp_camera.h" #include "img_converters.h" #include "FS.h" #include "SD.h" #include "SPI.h" #include #include #include U8G2_SSD1306_72X40_ER_1_HW_I2C u8g2(U8G2_R2, U8X8_PIN_NONE); // ====================================================== // CONFIGURATION & ML HYPERPARAMETERS (MOVED UP) // ====================================================== const int myTotalItems = 5; const int myThresholdPress = 1100; const int myThresholdRelease = 900; const unsigned long myScreenTimeout = 300000; String myClassLabels[3] = {"0Blank", "1Circle", "2Square"}; float LEARNING_RATE = 0.0003; int BATCH_SIZE = 12; int TARGET_EPOCHS = 10; // ====================================================== // UNIFIED TOUCH INPUT SYSTEM - IMPROVED FOR COMPUTATION // ====================================================== struct TouchState { bool isTouching = false; int tapCount = 0; unsigned long firstTapTime = 0; unsigned long lastReleaseTime = 0; unsigned long lastCheckTime = 0; // NEW: track when we last checked const unsigned long tapWindow = 800; // INCREASED from 450ms for slow contexts const int longPressTaps = 3; // 3+ taps = long press const unsigned long debounceDelay = 50; // debounce time }; TouchState myTouch; // SYSTEM LOGIC VARIABLES unsigned long myLastActivityTime = 0; unsigned long myLastTapTime = 0; const int myTapCooldown = 250; int myMenuIndex = 1; bool myIsSelected = false; // (removed myIsTouching and myLongPressTriggered - now in TouchState myTouch) // XIAO ESP32-S3 Camera Pins #define PWDN_GPIO_NUM -1 #define RESET_GPIO_NUM -1 #define XCLK_GPIO_NUM 10 #define SIOD_GPIO_NUM 40 #define SIOC_GPIO_NUM 39 #define Y9_GPIO_NUM 48 #define Y8_GPIO_NUM 11 #define Y7_GPIO_NUM 12 #define Y6_GPIO_NUM 14 #define Y5_GPIO_NUM 16 #define Y4_GPIO_NUM 18 #define Y3_GPIO_NUM 17 #define Y2_GPIO_NUM 15 #define VSYNC_GPIO_NUM 38 #define HREF_GPIO_NUM 47 #define PCLK_GPIO_NUM 13 // ====================================================== // CONFIGURABLE INPUT RESOLUTION // ====================================================== #define INPUT_SIZE 64 // ====================================================== // CNN ARCHITECTURE CONSTANTS // ====================================================== #define CONV1_KERNEL_SIZE 3 #define CONV1_FILTERS 4 #define CONV1_WEIGHTS (CONV1_KERNEL_SIZE * CONV1_KERNEL_SIZE * 3 * CONV1_FILTERS) #define CONV2_KERNEL_SIZE 3 #define CONV2_FILTERS 8 #define CONV2_WEIGHTS (CONV2_KERNEL_SIZE * CONV2_KERNEL_SIZE * 4 * CONV2_FILTERS) #define CONV1_OUTPUT_SIZE (INPUT_SIZE - 2) #define POOL1_OUTPUT_SIZE (CONV1_OUTPUT_SIZE / 2) #define CONV2_OUTPUT_SIZE (POOL1_OUTPUT_SIZE - 2) #define FLATTENED_SIZE (CONV2_OUTPUT_SIZE * CONV2_OUTPUT_SIZE * CONV2_FILTERS) #define NUM_CLASSES 3 #define OUTPUT_WEIGHTS (FLATTENED_SIZE * NUM_CLASSES) // ====================================================== // GLOBAL VARIABLE DEFINITIONS // ====================================================== // Add near line 150 with other global buffers: uint8_t* myRgbBuffer = nullptr; // Reusable RGB buffer for inference // ML Buffers (PSRAM) float* myInputBuffer = nullptr; float* myConv1_w = nullptr; float* myConv1_b = nullptr; float* myConv2_w = nullptr; float* myConv2_b = nullptr; float* myOutput_w = nullptr; float* myOutput_b = nullptr; // Gradient buffers float* myConv1_w_grad = nullptr; float* myConv1_b_grad = nullptr; float* myConv2_w_grad = nullptr; float* myConv2_b_grad = nullptr; float* myOutput_w_grad = nullptr; float* myOutput_b_grad = nullptr; // Adam optimizer momentum buffers float* myConv1_w_m = nullptr; float* myConv1_w_v = nullptr; float* myConv1_b_m = nullptr; float* myConv1_b_v = nullptr; float* myConv2_w_m = nullptr; float* myConv2_w_v = nullptr; float* myConv2_b_m = nullptr; float* myConv2_b_v = nullptr; float* myOutput_w_m = nullptr; float* myOutput_w_v = nullptr; float* myOutput_b_m = nullptr; float* myOutput_b_v = nullptr; // Forward pass buffers float* myConv1_output = nullptr; float* myPool1_output = nullptr; float* myConv2_output = nullptr; float* myDense_output = nullptr; // Backward pass buffers float* myDense_grad = nullptr; float* myConv2_grad = nullptr; float* myPool1_grad = nullptr; float* myConv1_grad = nullptr; struct TrainingItem { String path; int label; }; std::vector myTrainingData; // ====================================================== // UTILITY FUNCTIONS // ====================================================== inline float clip_value(float v, float mn=-100, float mx=100) { if(isnan(v)||isinf(v)) return 0; return constrain(v,mn,mx); } inline float leaky_relu(float x) { return x>0 ? x : 0.1f*x; } inline float leaky_relu_deriv(float x) { return x>0 ? 1.0f : 0.1f; } // ====================================================== // UNIFIED TOUCH INPUT FUNCTIONS - NEW! // ====================================================== int myReadTouch() { int sum = 0; for (int i = 0; i < 3; i++) { sum += analogRead(A0); delayMicroseconds(100); } return sum / 3; } void myResetTouchState() { myTouch.isTouching = false; myTouch.tapCount = 0; myTouch.firstTapTime = 0; myTouch.lastReleaseTime = 0; myTouch.lastCheckTime = 0; } // NEW: Background touch monitor that can be called less frequently void myUpdateTouchState() { unsigned long now = millis(); // Only check every 20ms to avoid overwhelming analogRead if (now - myTouch.lastCheckTime < 20) return; myTouch.lastCheckTime = now; int val = myReadTouch(); bool touchActive = myTouch.isTouching ? (val > myThresholdRelease) : (val > myThresholdPress); // Touch just started if (touchActive && !myTouch.isTouching) { if (now - myTouch.lastReleaseTime < myTouch.debounceDelay) { return; // Debounce } myTouch.isTouching = true; // First tap or within tap window? if (myTouch.tapCount == 0 || (now - myTouch.firstTapTime < myTouch.tapWindow)) { if (myTouch.tapCount == 0) { myTouch.firstTapTime = now; } myTouch.tapCount++; Serial.printf("Tap #%d\n", myTouch.tapCount); } else { // Window expired, reset myTouch.tapCount = 1; myTouch.firstTapTime = now; Serial.println("Tap #1 (new window)"); } } // Touch released if (!touchActive && myTouch.isTouching) { myTouch.isTouching = false; myTouch.lastReleaseTime = now; } } // Returns: 0=no action, 1=tap, 2=long press (3+ taps) // NOTE: Always call myUpdateTouchState() before this in tight loops int myCheckTouchInput() { myUpdateTouchState(); // Update state first unsigned long now = millis(); // Check if tap window expired and we have taps if (myTouch.tapCount > 0 && !myTouch.isTouching) { if (now - myTouch.firstTapTime > myTouch.tapWindow) { int result = (myTouch.tapCount >= myTouch.longPressTaps) ? 2 : 1; int count = myTouch.tapCount; myResetTouchState(); if (result == 2) { Serial.printf("LONG PRESS detected (%d taps)\n", count); } else { Serial.printf("TAP detected (%d tap%s)\n", count, count > 1 ? "s" : ""); } return result; } } return 0; } // NEW: Non-blocking check - just updates state without consuming events // Use this in heavy computation loops void myCheckTouchBackground() { myUpdateTouchState(); } // NEW: Check if we have a pending action without consuming it int myPeekTouchAction() { myUpdateTouchState(); unsigned long now = millis(); if (myTouch.tapCount > 0 && !myTouch.isTouching) { if (now - myTouch.firstTapTime > myTouch.tapWindow) { return (myTouch.tapCount >= myTouch.longPressTaps) ? 2 : 1; } } return 0; } // ====================================================== // MEMORY ALLOCATION // ====================================================== void myAllocateMemory() { if (myInputBuffer != nullptr) return; Serial.println("\n=== Allocating Memory ==="); myInputBuffer = (float*)ps_malloc(INPUT_SIZE * INPUT_SIZE * 3 * sizeof(float)); myConv1_w = (float*)ps_malloc(CONV1_WEIGHTS * sizeof(float)); myConv1_b = (float*)ps_malloc(CONV1_FILTERS * sizeof(float)); myConv2_w = (float*)ps_malloc(CONV2_WEIGHTS * sizeof(float)); myConv2_b = (float*)ps_malloc(CONV2_FILTERS * sizeof(float)); myOutput_w = (float*)ps_malloc(OUTPUT_WEIGHTS * sizeof(float)); myOutput_b = (float*)ps_malloc(NUM_CLASSES * sizeof(float)); myConv1_w_grad = (float*)ps_malloc(CONV1_WEIGHTS * sizeof(float)); myConv1_b_grad = (float*)ps_malloc(CONV1_FILTERS * sizeof(float)); myConv2_w_grad = (float*)ps_malloc(CONV2_WEIGHTS * sizeof(float)); myConv2_b_grad = (float*)ps_malloc(CONV2_FILTERS * sizeof(float)); myOutput_w_grad = (float*)ps_malloc(OUTPUT_WEIGHTS * sizeof(float)); myOutput_b_grad = (float*)ps_malloc(NUM_CLASSES * sizeof(float)); myConv1_w_m = (float*)ps_calloc(CONV1_WEIGHTS, sizeof(float)); myConv1_w_v = (float*)ps_calloc(CONV1_WEIGHTS, sizeof(float)); myConv1_b_m = (float*)ps_calloc(CONV1_FILTERS, sizeof(float)); myConv1_b_v = (float*)ps_calloc(CONV1_FILTERS, sizeof(float)); myConv2_w_m = (float*)ps_calloc(CONV2_WEIGHTS, sizeof(float)); myConv2_w_v = (float*)ps_calloc(CONV2_WEIGHTS, sizeof(float)); myConv2_b_m = (float*)ps_calloc(CONV2_FILTERS, sizeof(float)); myConv2_b_v = (float*)ps_calloc(CONV2_FILTERS, sizeof(float)); myOutput_w_m = (float*)ps_calloc(OUTPUT_WEIGHTS, sizeof(float)); myOutput_w_v = (float*)ps_calloc(OUTPUT_WEIGHTS, sizeof(float)); myOutput_b_m = (float*)ps_calloc(NUM_CLASSES, sizeof(float)); myOutput_b_v = (float*)ps_calloc(NUM_CLASSES, sizeof(float)); myConv1_output = (float*)ps_malloc(CONV1_OUTPUT_SIZE*CONV1_OUTPUT_SIZE*CONV1_FILTERS*sizeof(float)); myPool1_output = (float*)ps_malloc(POOL1_OUTPUT_SIZE*POOL1_OUTPUT_SIZE*CONV1_FILTERS*sizeof(float)); myConv2_output = (float*)ps_malloc(CONV2_OUTPUT_SIZE*CONV2_OUTPUT_SIZE*CONV2_FILTERS*sizeof(float)); myDense_output = (float*)ps_malloc(NUM_CLASSES*sizeof(float)); myDense_grad = (float*)ps_malloc(FLATTENED_SIZE*sizeof(float)); myConv2_grad = (float*)ps_malloc(CONV2_OUTPUT_SIZE*CONV2_OUTPUT_SIZE*CONV2_FILTERS*sizeof(float)); myPool1_grad = (float*)ps_malloc(POOL1_OUTPUT_SIZE*POOL1_OUTPUT_SIZE*CONV1_FILTERS*sizeof(float)); myConv1_grad = (float*)ps_malloc(CONV1_OUTPUT_SIZE*CONV1_OUTPUT_SIZE*CONV1_FILTERS*sizeof(float)); if (!myInputBuffer || !myConv1_w || !myConv2_w || !myOutput_w || !myConv1_output || !myPool1_output || !myConv2_output) { Serial.println("FATAL: PSRAM allocation failed!"); u8g2.firstPage(); do { u8g2.drawStr(0, 15, "PSRAM ERROR!"); } while (u8g2.nextPage()); while(1) { delay(1000); } } Serial.printf("Free PSRAM after allocation: %d bytes\n", ESP.getFreePsram()); // Initialize weights with He initialization float c1std = sqrt(2.0/(9.0*3)); for(int i=0; i239) sy=239; if(sx>239) sx=239; int srcIdx = (sy*240 + sx)*3; int dstIdx = (y*INPUT_SIZE + x)*3; buf[dstIdx] = rgb[srcIdx]/255.0f; buf[dstIdx+1] = rgb[srcIdx+1]/255.0f; buf[dstIdx+2] = rgb[srcIdx+2]/255.0f; } } free(rgb); return true; } // ====================================================== // PART 0: SETUP AND LOOP // ====================================================== // Forward declarations for functions defined in other parts void myActionCollect(int classIdx); void myActionTrain(); void myActionInfer(); void myResetMenuState(); void myHandleMenuNavigation(); void myDrawMenu(); void setup() { Serial.begin(115200); while (!Serial && millis() < 3000); delay(1000); // slow down the startup Serial.println("\n=== XIAO ESP32-S3 ML System Starting ==="); Serial.printf("Free heap: %d bytes\n", ESP.getFreeHeap()); Serial.printf("Free PSRAM: %d bytes\n", ESP.getFreePsram()); // Add in setup() function: myRgbBuffer = (uint8_t*)ps_malloc(240*240*3); if (!myRgbBuffer) { Serial.println("Failed to allocate RGB buffer!"); } pinMode(A0, INPUT); u8g2.begin(); if (!SD.begin(21)) { Serial.println("SD card initialization failed"); u8g2.firstPage(); do { u8g2.drawStr(0, 15, "SD CARD ERROR!"); } while (u8g2.nextPage()); while(1) { delay(1000); } } Serial.println("SD card mounted successfully"); camera_config_t config; config.ledc_channel = LEDC_CHANNEL_0; config.ledc_timer = LEDC_TIMER_0; config.pin_d0 = Y2_GPIO_NUM; config.pin_d1 = Y3_GPIO_NUM; config.pin_d2 = Y4_GPIO_NUM; config.pin_d3 = Y5_GPIO_NUM; config.pin_d4 = Y6_GPIO_NUM; config.pin_d5 = Y7_GPIO_NUM; config.pin_d6 = Y8_GPIO_NUM; config.pin_d7 = Y9_GPIO_NUM; config.pin_xclk = XCLK_GPIO_NUM; config.pin_pclk = PCLK_GPIO_NUM; config.pin_vsync = VSYNC_GPIO_NUM; config.pin_href = HREF_GPIO_NUM; config.pin_sccb_sda = SIOD_GPIO_NUM; config.pin_sccb_scl = SIOC_GPIO_NUM; config.pin_pwdn = PWDN_GPIO_NUM; config.pin_reset = RESET_GPIO_NUM; config.xclk_freq_hz = 20000000; config.pixel_format = PIXFORMAT_JPEG; config.frame_size = FRAMESIZE_240X240; config.jpeg_quality = 12; config.fb_count = 1; esp_camera_init(&config); Serial.println("Camera initialized"); myLastActivityTime = millis(); myResetMenuState(); delay(2000); // time to get things started like the serial monitor Serial.println("System ready - Tap A0 to navigate, 3+ taps to select"); myDrawMenu(); } void loop() { myHandleMenuNavigation(); } // END OF FIXED1 - Continue with your original Part 1, Part 2, Part 3, Part 4 // Replace only the THREE action functions (myActionCollect, myActionTrain, myActionInfer) // with the versions in FIXED2 // ██████████████████████████████████████████████████████████████████████████████ // ██ ██ // ██ PART 1: IMAGE COLLECTION FUNCTIONS ██ // ██ ██ // ██ DEPENDENCIES (functions called from Part 0): ██ // ██ - myResetMenuState() [Part 4] ██ // ██ - myReadTouch() [Part 4] ██ // ██ ██ // ██ VARIABLES USED (defined in Part 0): ██ // ██ - myClassLabels[3], myThresholdPress, myLongPressTime ██ // ██ - u8g2 (OLED display object) ██ // ██ ██ // ██████████████████████████████████████████████████████████████████████████████ #ifdef TEST_PART1_STANDALONE // Stubs for testing Part 1 standalone int myReadTouch() { return 500; // Return value below threshold } void myResetMenuState() { Serial.println("STUB: myResetMenuState() called"); } #endif void myDisplayImageOnOLED(camera_fb_t* fb, int imageCount) { size_t myRgbBufferSize = fb->width * fb->height * 3; uint8_t* myRgbBuffer = (uint8_t*)ps_malloc(myRgbBufferSize); if (myRgbBuffer == NULL) { Serial.println("Failed to allocate RGB buffer for OLED preview"); return; } bool conversionSuccess = fmt2rgb888(fb->buf, fb->len, fb->format, myRgbBuffer); if (!conversionSuccess) { free(myRgbBuffer); Serial.println("Failed to convert JPEG to RGB888 for OLED"); return; } int myOledWidth = u8g2.getDisplayWidth(); int myOledHeight = u8g2.getDisplayHeight(); int myImageWidth = fb->width; int myImageHeight = fb->height; int myScaleX = myImageWidth / myOledWidth; int myScaleY = myImageHeight / myOledHeight; u8g2.firstPage(); do { for (int myOledX = 0; myOledX < myOledWidth; myOledX++) { for (int myOledY = 0; myOledY < myOledHeight; myOledY++) { int myImageX = myOledX * myScaleX; int myImageY = myOledY * myScaleY; size_t myPixelIndex = (myImageY * myImageWidth + myImageX) * 3; if (myPixelIndex + 2 < myRgbBufferSize) { uint8_t myRed = myRgbBuffer[myPixelIndex]; uint8_t myGreen = myRgbBuffer[myPixelIndex + 1]; uint8_t myBlue = myRgbBuffer[myPixelIndex + 2]; uint8_t myBrightness = (myRed + myGreen + myBlue) / 3; if (myBrightness > 100) { u8g2.drawPixel(myOledX, myOledY); } } } } u8g2.setFont(u8g2_font_ncenB10_tr); u8g2.setColorIndex(0); u8g2.drawBox(0, 0, 20, 15); u8g2.setColorIndex(1); u8g2.setCursor(3, 10); u8g2.print(String(imageCount)); } while (u8g2.nextPage()); free(myRgbBuffer); } void myDisplayLiveCameraPreview() { camera_fb_t * fb = esp_camera_fb_get(); if (!fb) return; size_t myRgbBufferSize = fb->width * fb->height * 3; uint8_t* myRgbBuffer = (uint8_t*)ps_malloc(myRgbBufferSize); if (myRgbBuffer && fmt2rgb888(fb->buf, fb->len, fb->format, myRgbBuffer)) { int myOledWidth = u8g2.getDisplayWidth(); int myOledHeight = u8g2.getDisplayHeight(); int myImageWidth = fb->width; int myImageHeight = fb->height; int myScaleX = myImageWidth / myOledWidth; int myScaleY = myImageHeight / myOledHeight; u8g2.firstPage(); do { for (int myOledX = 0; myOledX < myOledWidth; myOledX++) { for (int myOledY = 0; myOledY < myOledHeight; myOledY++) { int myImageX = myOledX * myScaleX; int myImageY = myOledY * myScaleY; size_t myPixelIndex = (myImageY * myImageWidth + myImageX) * 3; if (myPixelIndex + 2 < myRgbBufferSize) { uint8_t myBrightness = (myRgbBuffer[myPixelIndex] + myRgbBuffer[myPixelIndex + 1] + myRgbBuffer[myPixelIndex + 2]) / 3; if (myBrightness > 100) { u8g2.drawPixel(myOledX, myOledY); } } } } u8g2.setFont(u8g2_font_5x7_tf); u8g2.setColorIndex(0); u8g2.drawBox(50, 0, 22, 8); u8g2.setColorIndex(1); u8g2.drawStr(52, 7, "LIVE"); } while (u8g2.nextPage()); free(myRgbBuffer); } esp_camera_fb_return(fb); } void myActionCollect(int classIdx) { Serial.printf("\n>>> Collection mode: %s\n", myClassLabels[classIdx].c_str()); Serial.println("Instructions:"); Serial.println(" TAP (1-2 taps) = Capture image"); Serial.println(" LONG PRESS (3+ taps) = Exit to menu"); Serial.println(" Serial: 'T'=capture, 'L'=exit"); myResetTouchState(); // Clear touch state when entering String path = "/images/" + myClassLabels[classIdx]; if (!SD.exists("/images")) SD.mkdir("/images"); if (!SD.exists(path)) SD.mkdir(path); int counts[3] = {0, 0, 0}; for(int i=0; i<3; i++) { File root = SD.open("/images/" + myClassLabels[i]); if(root) { while(File file = root.openNextFile()) { if(!file.isDirectory() && (String(file.name()).endsWith(".jpg") || String(file.name()).endsWith(".JPG"))) { counts[i]++; } file.close(); } root.close(); } } unsigned long lastPreview = 0; bool shouldCapture = false; while (true) { // Live preview if (millis() - lastPreview > 100) { myDisplayLiveCameraPreview(); lastPreview = millis(); } // Serial input if (Serial.available()) { char c = Serial.read(); if (c == 'l' || c == 'L') { myResetMenuState(); return; } else if (c == 't' || c == 'T') { shouldCapture = true; } } // Touch input - unified system int touchAction = myCheckTouchInput(); if (touchAction == 2) { // Long press (3+ taps) - exit Serial.println("Exiting collection mode"); myResetMenuState(); return; } else if (touchAction == 1) { // Tap (1-2 taps) - capture shouldCapture = true; } // Perform capture if triggered if (shouldCapture) { shouldCapture = false; camera_fb_t * fb = esp_camera_fb_get(); if (fb) { String fileName = path + "/img_" + String(millis()) + ".jpg"; File file = SD.open(fileName, FILE_WRITE); if (file) { file.write(fb->buf, fb->len); file.close(); counts[classIdx]++; Serial.printf("Saved: %s (Total: %d)\n", fileName.c_str(), counts[classIdx]); myDisplayImageOnOLED(fb, counts[classIdx]); delay(300); } esp_camera_fb_return(fb); } } delay(10); } } // ██████████████████████████████████████████████████████████████████████████████ // ██ ██ // ██ PART 2: TRAINING FUNCTIONS (FORWARD/BACKWARD PASS, OPTIMIZER) ██ // ██ ██ // ██ DEPENDENCIES (functions called from Part 0): ██ // ██ - myAllocateMemory() [Part 0] ██ // ██ - myLoadWeights() [Part 0] ██ // ██ - mySaveWeights() [Part 0] ██ // ██ - myLoadImageFromFile() [Part 0] ██ // ██ ██ // ██ VARIABLES USED (defined in Part 0): ██ // ██ - All neural network weight/gradient buffers ██ // ██ - myClassLabels[3], LEARNING_RATE, BATCH_SIZE, TARGET_EPOCHS ██ // ██ - myTrainingData vector, myInputBuffer ██ // ██ - u8g2 (OLED display object) ██ // ██ ██ // ██████████████████████████████████████████████████████████████████████████████ #ifdef TEST_PART2_STANDALONE // Stubs for testing Part 2 standalone void myAllocateMemory() { Serial.println("STUB: myAllocateMemory() called - would allocate PSRAM for neural network"); } bool myLoadWeights() { Serial.println("STUB: myLoadWeights() called - would load weights from SD card"); return false; } void mySaveWeights() { Serial.println("STUB: mySaveWeights() called - would save weights to SD card"); } bool myLoadImageFromFile(const char* path, float* buf) { Serial.printf("STUB: myLoadImageFromFile(%s) called - would load and resize image\n", path); return true; } #endif // ====================================================== // FORWARD PASS // ====================================================== void myForwardPass(float* input, float* logits) { // Conv1: INPUT_SIZE x INPUT_SIZE x 3 -> CONV1_OUTPUT_SIZE x CONV1_OUTPUT_SIZE x CONV1_FILTERS for(int f=0; f POOL1_OUTPUT_SIZE x POOL1_OUTPUT_SIZE for(int f=0; f CONV2_OUTPUT_SIZE x CONV2_OUTPUT_SIZE x CONV2_FILTERS for(int f=0; f>> Training mode"); Serial.println("Instructions:"); Serial.println(" During training: 3+ taps = Save and exit"); Serial.println(" After completion: TAP = Train again, 3+ taps = Exit"); Serial.println(" Serial: 'T'=train again, 'L'=exit"); myResetTouchState(); // Clear touch state when entering u8g2.firstPage(); do { u8g2.setFont(u8g2_font_6x10_tf); u8g2.drawStr(0, 12, "TRAINING MODE"); u8g2.drawStr(0, 24, "Loading..."); } while (u8g2.nextPage()); if (myLoadWeights()) { Serial.println("Continuing from saved weights"); } else { myAllocateMemory(); Serial.println("Starting fresh training"); } while (true) { // Load training data myTrainingData.clear(); for(int i=0; i<3; i++) { File root = SD.open("/images/" + myClassLabels[i]); if (root) { while(File file = root.openNextFile()) { if(!file.isDirectory()) { String fn = String(file.name()); if(fn.endsWith(".jpg") || fn.endsWith(".JPG")) { myTrainingData.push_back({file.path(), i}); } } file.close(); } root.close(); } } if(myTrainingData.empty()) { u8g2.firstPage(); do { u8g2.drawStr(0, 20, "No Images!"); } while (u8g2.nextPage()); delay(2000); myResetMenuState(); return; } int total = myTrainingData.size(); int batchesPerEpoch = (total + BATCH_SIZE - 1) / BATCH_SIZE; int totalBatches = TARGET_EPOCHS * batchesPerEpoch; Serial.printf("Training: %d images, %d batches\n", total, totalBatches); // Training loop std::vector indices; for(int i=0; i0; i--) { int j = random(i+1); int tmp = indices[i]; indices[i] = indices[j]; indices[j] = tmp; } } int batchStart = (batch % batchesPerEpoch) * BATCH_SIZE; int batchEnd = min(batchStart + BATCH_SIZE, total); float batchLoss = 0; int correctCount = 0; // Train on batch for(int i=batchStart; i myDense_output[pred]) pred = j; if(pred == img.label) correctCount++; myBackwardDense(img.label); myBackwardConv2(); myBackwardPool1(); myBackwardConv1(); // Update touch state during heavy computation if (i % 3 == 0) myCheckTouchBackground(); } myUpdateWeights(batch+1); float avgLoss = batchLoss / (batchEnd - batchStart); float batchAcc = (float)correctCount / (batchEnd - batchStart); runningLoss += avgLoss; lossCount++; // Update display if((batch+1) % 5 == 0) { float displayLoss = runningLoss / lossCount; u8g2.firstPage(); do { u8g2.setFont(u8g2_font_6x10_tf); u8g2.setCursor(0, 12); u8g2.print("Training..."); u8g2.setCursor(0, 24); u8g2.print("B:"); u8g2.print(batch+1); u8g2.print("/"); u8g2.print(totalBatches); u8g2.setCursor(0, 36); u8g2.print("L:"); u8g2.print(displayLoss, 3); u8g2.print(" A:"); u8g2.print((int)(batchAcc*100)); u8g2.print("%"); } while (u8g2.nextPage()); runningLoss = 0; lossCount = 0; } if((batch+1) % 10 == 0) { Serial.printf("Batch %d/%d - Loss: %.4f - Acc: %.1f%%\n", batch+1, totalBatches, avgLoss, batchAcc*100); } } Serial.println("\n--- Training Complete ---"); mySaveWeights(); u8g2.firstPage(); do { u8g2.drawStr(0, 12, "DONE!"); u8g2.drawStr(0, 24, "Tap:Again"); u8g2.drawStr(0, 36, "3+Taps:Exit"); } while (u8g2.nextPage()); myResetTouchState(); // Clear touch state before waiting for input // Wait for user decision - ACTIVE MONITORING Serial.println("Waiting for input (tap or 3+ taps)..."); while (true) { if (Serial.available()) { char c = Serial.read(); if (c == 'x' || c == 'X') { myResetMenuState(); return; } else if (c == 't' || c == 'T') { break; // Train again } } // Actively monitor touch in tight loop int touchAction = myCheckTouchInput(); if (touchAction == 2) { // Long press - exit myResetMenuState(); return; } else if (touchAction == 1) { // Tap - train again Serial.println("Starting new training cycle"); break; } delay(10); // Keep this small for responsiveness } } } // ██████████████████████████████████████████████████████████████████████████████ // ██ ██ // ██ PART 3: INFERENCE FUNCTION - OPTIMIZED ██ // ██ ██ // ██ DEPENDENCIES (functions called from Part 0): ██ // ██ - myLoadWeights() [Part 0] ██ // ██ - myForwardPass() [Part 2] ██ // ██ - myRgbBuffer (global, allocated in setup) ██ // ██ ██ // ██ VARIABLES USED (defined in Part 0): ██ // ██ - myInputBuffer, myDense_output (probabilities) ██ // ██ - myClassLabels[3], myThresholdPress ██ // ██ - u8g2 (OLED display object) ██ // ██ ██ // ██████████████████████████████████████████████████████████████████████████████ #ifdef TEST_PART3_STANDALONE // Stubs for testing Part 3 standalone bool myLoadWeights() { Serial.println("STUB: myLoadWeights() called - would load weights from SD card"); return true; } void myForwardPass(float* input, float* logits) { Serial.println("STUB: myForwardPass() called - would run neural network inference"); // Fake some output for testing if(myDense_output) { myDense_output[0] = 0.1; myDense_output[1] = 0.7; myDense_output[2] = 0.2; } } #endif void myActionInfer() { Serial.println("\n>>> Inference mode - OPTIMIZED"); Serial.println("Instructions:"); Serial.println(" T or L exit to menu"); myResetTouchState(); // Clear touch state when entering if (!myLoadWeights()) { u8g2.firstPage(); do { u8g2.drawStr(0, 15, "NOT TRAINED!"); } while (u8g2.nextPage()); delay(2000); myResetMenuState(); return; } // Pre-compute resize lookup tables (done once) static int sy_lookup[INPUT_SIZE]; static int sx_lookup[INPUT_SIZE]; static bool lookup_initialized = false; if (!lookup_initialized) { for(int i=0; ibuf, fb->len, PIXFORMAT_JPEG, myRgbBuffer)) { // Optimized resize using lookup tables for(int y=0; y myDense_output[pred]) pred = i; } } esp_camera_fb_return(fb); // Record frame timing frameTimes[frameIndex] = millis() - frameStart; float fps2 = 1000.0 / frameTimes[frameIndex]; Serial.printf("Frame %d: %lu ms (%.1f FPS) ", frameIndex+1, frameTimes[frameIndex], fps2); frameIndex++; Serial.printf("Current Pred: %s (%.1f%%) | All: %.0f%% %.0f%% %.0f%%\n", myClassLabels[pred].c_str(), myDense_output[pred]*100, myDense_output[0]*100, myDense_output[1]*100, myDense_output[2]*100); // Every 10th frame: do expensive operations if (frameIndex >= 10) { // 1. Update OLED display u8g2.firstPage(); do { u8g2.setFont(u8g2_font_6x10_tf); u8g2.drawStr(0, 12, "RESULT:"); u8g2.setFont(u8g2_font_7x14_tf); u8g2.drawStr(0, 28, myClassLabels[pred].c_str()); u8g2.setFont(u8g2_font_5x7_tf); u8g2.setCursor(0, 38); u8g2.print((int)(myDense_output[pred] * 100)); u8g2.print("%"); } while (u8g2.nextPage()); /* // 2. Print all 10 frame timings Serial.println("\n=== Last 10 Frames ==="); for (int i = 0; i < 10; i++) { float fps = 1000.0 / frameTimes[i]; Serial.printf("Frame %d: %lu ms (%.1f FPS)\n", i+1, frameTimes[i], fps); } Serial.printf("Current Pred: %s (%.1f%%) | All: %.0f%% %.0f%% %.0f%%\n\n", myClassLabels[pred].c_str(), myDense_output[pred]*100, myDense_output[0]*100, myDense_output[1]*100, myDense_output[2]*100); */ // 3. Check for touch to exit (simplified) int touchVal = myReadTouch(); if (touchVal > myThresholdPress) { Serial.println("Touch detected - exiting inference"); delay(200); // Brief debounce myResetMenuState(); return; } // Reset frame counter frameIndex = 0; } } } // ██████████████████████████████████████████████████████████████████████████████ // ██ ██ // ██ PART 4: MENU SYSTEM FUNCTIONS ██ // ██ ██ // ██ DEPENDENCIES (functions called from Part 0): ██ // ██ - myActionCollect(int classIdx) [Part 1] ██ // ██ - myActionTrain() [Part 2] ██ // ██ - myActionInfer() [Part 3] ██ // ██ ██ // ██ VARIABLES USED (defined in Part 0): ██ // ██ - myClassLabels[3] ██ // ██ - myTotalItems, myThresholdPress, myThresholdRelease ██ // ██ - myScreenTimeout ██ // ██ - myLastActivityTime, myLastTapTime, myTapCooldown ██ // ██ - myIsTouching, myLongPressTriggered, myMenuIndex, myIsSelected ██ // ██ - u8g2 (OLED display object) ██ // ██ ██ // ██ NOTE: This part is called from loop() in Part 0 ██ // ██ ██ // ██████████████████████████████████████████████████████████████████████████████ #ifdef TEST_PART4_STANDALONE void myActionCollect(int classIdx) { Serial.printf("STUB: myActionCollect(%d) called - would collect images for class %s\n", classIdx, myClassLabels[classIdx].c_str()); delay(1000); myResetMenuState(); } void myActionTrain() { Serial.println("STUB: myActionTrain() called - would train the neural network"); delay(1000); myResetMenuState(); } void myActionInfer() { Serial.println("STUB: myActionInfer() called - would run inference on camera feed"); delay(1000); myResetMenuState(); } #endif void myResetMenuState() { myIsSelected = false; myResetTouchState(); // Use unified touch reset myLastActivityTime = millis(); myDrawMenu(); } void myDrawMenu() { // ===== SERIAL MENU ===== Serial.println("\n=== MENU ==="); for (int i = 1; i <= myTotalItems; i++) { String label = (i <= 3) ? myClassLabels[i - 1] : (i == 4) ? "Train" : "Infer"; if (i == myMenuIndex) Serial.print(" > "); else Serial.print(" "); Serial.printf("%d. %s\n", i, label.c_str()); } Serial.println("Commands: t=next (tap) l=select (longpress)"); // ===== OLED MENU ===== u8g2.firstPage(); do { u8g2.setFont(u8g2_font_6x10_tf); u8g2.drawStr(0, 8, "TAP:Next HOLD:Ok"); int myStartItem = (myMenuIndex <= 3) ? 1 : myMenuIndex - 2; for (int i = 0; i < 3; i++) { int cur = myStartItem + i; if (cur > myTotalItems) break; String label = (cur <= 3) ? myClassLabels[cur - 1] : (cur == 4) ? "Train" : "Infer"; int y = 18 + i * 9; if (cur == myMenuIndex) u8g2.drawStr(0, y, ("> " + label).c_str()); else u8g2.drawStr(0, y, (" " + label).c_str()); } } while (u8g2.nextPage()); } void myHandleMenuNavigation() { unsigned long myCurrentMillis = millis(); // -------------------------------------------------------------------------- // SERIAL INPUT // -------------------------------------------------------------------------- if (!myIsSelected && Serial.available()) { char c = Serial.read(); if (c >= '1' && c <= '5') { int newIndex = c - '0'; if (newIndex <= myTotalItems) { myMenuIndex = newIndex; myIsSelected = true; myLastActivityTime = myCurrentMillis; if (myMenuIndex == 1) myActionCollect(0); else if (myMenuIndex == 2) myActionCollect(1); else if (myMenuIndex == 3) myActionCollect(2); else if (myMenuIndex == 4) myActionTrain(); else if (myMenuIndex == 5) myActionInfer(); } } else if (c == 't' || c == 'T') { if (myCurrentMillis - myLastTapTime > myTapCooldown) { myMenuIndex++; if (myMenuIndex > myTotalItems) myMenuIndex = 1; myDrawMenu(); myLastTapTime = myCurrentMillis; myLastActivityTime = myCurrentMillis; } } else if (c == 'l' || c == 'L') { myIsSelected = true; myLastActivityTime = myCurrentMillis; if (myMenuIndex == 1) myActionCollect(0); else if (myMenuIndex == 2) myActionCollect(1); else if (myMenuIndex == 3) myActionCollect(2); else if (myMenuIndex == 4) myActionTrain(); else if (myMenuIndex == 5) myActionInfer(); } } // -------------------------------------------------------------------------- // TOUCH INPUT - NOW USING UNIFIED SYSTEM // -------------------------------------------------------------------------- if (!myIsSelected) { int touchAction = myCheckTouchInput(); if (touchAction == 1) { // Tap detected - advance menu if (myCurrentMillis - myLastTapTime > myTapCooldown) { myMenuIndex++; if (myMenuIndex > myTotalItems) myMenuIndex = 1; myDrawMenu(); myLastTapTime = myCurrentMillis; myLastActivityTime = myCurrentMillis; } } else if (touchAction == 2) { // Long press detected - select menu item myIsSelected = true; myLastActivityTime = myCurrentMillis; if (myMenuIndex == 1) myActionCollect(0); else if (myMenuIndex == 2) myActionCollect(1); else if (myMenuIndex == 3) myActionCollect(2); else if (myMenuIndex == 4) myActionTrain(); else if (myMenuIndex == 5) myActionInfer(); } } }