Implement 4-quadrant dirty rectangle optimization and 30 FPS limiting for ST7796

Add intelligent partial screen update system using bitwise XOR change detection
and 4-quadrant tracking (top-left, top-right, bottom-left, bottom-right). Each
changed pixel is routed to its quadrant, with sophisticated merge logic that
combines adjacent rectangles when beneficial (<40% overhead). This dramatically
reduces SPI bandwidth for UIs with scattered updates (e.g., corners, sidebars).

Key changes:
- 4-quadrant dirty rectangle tracking with automatic merging
- XOR-based change detection for fast byte-level comparison
- Expose st7796_set_window() for partial region updates
- 30 FPS frame rate limiter (33ms per frame) to prevent excessive refreshes
- Smart sleep timing when frame rate limit is active

Performance: Up to 99% reduction in SPI traffic for corner-based UIs
(e.g., 4 small regions vs full 480x320 screen updates).

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Adolfo Reyna
2026-02-11 12:56:10 -05:00
parent b59d716965
commit eacc03a38c
5 changed files with 370 additions and 48 deletions

View File

@@ -367,7 +367,14 @@ int main()
delete display;
return -1;
}
// Enable dirty rectangle optimization for ST7796 displays
if (display->get_type() == DISPLAY_TYPE_ST7796) {
LowLevelDisplayST7796* st7796_display = static_cast<LowLevelDisplayST7796*>(display);
st7796_display->enable_dirty_rect(true);
printf("Dirty rectangle optimization enabled (4 quadrants: TL/TR/BL/BR split)\n");
}
// Launch Core 1 for display refresh handling
printf("Launching Core 1 for display refresh...\n");
multicore_launch_core1(core1_entry);
@@ -546,11 +553,16 @@ int main()
printf("Dimming check timer set to %d seconds\n", DIM_CHECK_INTERVAL_MS / 1000);
printf("\nEntering reactive game loop (Core 0 - input & logic)\n");
printf("Display refreshes handled by Core 1\n\n");
printf("Display refreshes handled by Core 1\n");
printf("Frame rate limited to 30 FPS (33.3ms per frame)\n\n");
Game* current_game = nullptr;
uint32_t game_start_time = 0;
// Frame rate limiting (30 FPS = 33.33ms per frame)
const uint32_t TARGET_FRAME_TIME_MS = 33; // 1000ms / 30fps ≈ 33ms
uint32_t last_frame_time = 0;
while (1) {
// Determine if we should sleep or stay awake for updates
bool stay_awake = false;
@@ -666,30 +678,45 @@ int main()
}
}
// 4. Redraw and queue async refresh on Core 1
// 4. Redraw and queue async refresh on Core 1 (with 30 FPS limiting)
if (needs_refresh || pending_refresh) {
// Only draw if Core 1 is finished with the buffer
if (!is_refresh_in_progress()) {
// Clear buffer and redraw entire UI with updated state
memset(bit_buffer, 0, V_WIDTH * V_HEIGHT / 8);
if (launcher.is_game_selected()) {
current_game = launcher.get_selected_game();
current_game->draw();
} else {
launcher.draw();
}
// Request async refresh (non-blocking - handled by Core 1)
bool refresh_started = refresh_screen_async(bit_buffer, display);
if (refresh_started) {
pending_refresh = false; // Refresh queued successfully
// Check frame rate limiting
uint32_t current_time = to_ms_since_boot(get_absolute_time());
uint32_t time_since_last_frame = current_time - last_frame_time;
// Only proceed if enough time has passed since last frame
if (time_since_last_frame >= TARGET_FRAME_TIME_MS) {
// Only draw if Core 1 is finished with the buffer
if (!is_refresh_in_progress()) {
// Clear buffer and redraw entire UI with updated state
memset(bit_buffer, 0, V_WIDTH * V_HEIGHT / 8);
if (launcher.is_game_selected()) {
current_game = launcher.get_selected_game();
current_game->draw();
} else {
launcher.draw();
}
// Request async refresh (non-blocking - handled by Core 1)
bool refresh_started = refresh_screen_async(bit_buffer, display);
if (refresh_started) {
pending_refresh = false; // Refresh queued successfully
last_frame_time = current_time; // Update frame time
} else {
pending_refresh = true;
}
} else {
pending_refresh = true;
}
} else {
pending_refresh = true;
// Frame rate limit: skip this frame, wait for next opportunity
// Sleep for the remaining time to reach target frame time
uint32_t remaining_time = TARGET_FRAME_TIME_MS - time_since_last_frame;
if (remaining_time > 1) {
sleep_ms(remaining_time - 1); // -1 to account for overhead
}
}
}

View File

@@ -1,13 +1,19 @@
#include "low_level_display_st7796.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cstdlib> // For abs()
// RGB565 color definitions
#define COLOR_BLACK 0x0000
#define COLOR_WHITE 0xFFFF
LowLevelDisplayST7796::LowLevelDisplayST7796(const st7796_config* cfg, int w, int h, bool invert)
: config(cfg), width(w), height(h), initialized(false), rgb_buffer(nullptr), invert_color(invert) {
: config(cfg), width(w), height(h), initialized(false), rgb_buffer(nullptr), invert_color(invert),
prev_bit_buffer(nullptr), dirty_rect_enabled(true) {
for (int i = 0; i < MAX_DIRTY_RECTS; i++) {
dirty_rects[i].reset();
}
}
LowLevelDisplayST7796::~LowLevelDisplayST7796() {
@@ -15,6 +21,10 @@ LowLevelDisplayST7796::~LowLevelDisplayST7796() {
free(rgb_buffer);
rgb_buffer = nullptr;
}
if (prev_bit_buffer) {
free(prev_bit_buffer);
prev_bit_buffer = nullptr;
}
}
bool LowLevelDisplayST7796::init() {
@@ -49,23 +59,204 @@ void LowLevelDisplayST7796::draw_pixel(int x, int y, bool white) {
void LowLevelDisplayST7796::draw_buffer(const uint8_t* bit_buffer) {
if (!bit_buffer || !rgb_buffer) return;
// Convert 1-bit buffer to RGB565 using persistent buffer
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int byte_index = (y * width + x) / 8;
int bit_index = 7 - (x % 8);
bool pixel_white = (bit_buffer[byte_index] >> bit_index) & 0x01;
bool out_white = invert_color ? !pixel_white : pixel_white;
rgb_buffer[y * width + x] = out_white ? COLOR_WHITE : COLOR_BLACK;
// Calculate buffer size
size_t bit_buffer_size = (width * height + 7) / 8;
// If dirty rectangle tracking is enabled and we have a previous buffer
if (dirty_rect_enabled && prev_bit_buffer) {
// Reset all dirty rectangles
for (int i = 0; i < MAX_DIRTY_RECTS; i++) {
dirty_rects[i].reset();
}
// Split screen into 4 quadrants
int mid_x = width / 2;
int mid_y = height / 2;
// Use bitwise XOR to quickly detect changed bytes
for (size_t byte_idx = 0; byte_idx < bit_buffer_size; byte_idx++) {
uint8_t diff = bit_buffer[byte_idx] ^ prev_bit_buffer[byte_idx];
// If this byte has changes
if (diff != 0) {
// Calculate pixel coordinates for this byte
int pixel_idx = byte_idx * 8;
int base_x = pixel_idx % width;
int base_y = pixel_idx / width;
// Check each changed bit/pixel in this byte
for (int bit = 0; bit < 8 && (pixel_idx + bit) < (width * height); bit++) {
if (diff & (0x80 >> bit)) {
int x = base_x + bit;
int y = base_y;
// Adjust coordinates if we wrapped to next row
if (x >= width) {
x -= width;
y++;
}
// Route to appropriate quadrant based on X and Y position
// Quadrant 0: Top-left (x < mid_x, y < mid_y)
// Quadrant 1: Top-right (x >= mid_x, y < mid_y)
// Quadrant 2: Bottom-left (x < mid_x, y >= mid_y)
// Quadrant 3: Bottom-right (x >= mid_x, y >= mid_y)
int rect_idx = ((y >= mid_y) ? 2 : 0) + ((x >= mid_x) ? 1 : 0);
dirty_rects[rect_idx].expand(x, y);
}
}
}
}
// Check if we have any valid dirty rectangles
int valid_rects = 0;
for (int i = 0; i < MAX_DIRTY_RECTS; i++) {
if (dirty_rects[i].is_valid) {
valid_rects++;
}
}
// If there are no changes, skip the update
if (valid_rects == 0) {
return;
}
// Optimization: Merge adjacent rectangles if beneficial
// Check pairs of rectangles and merge if they overlap or are close
if (valid_rects >= 2) {
// Try merging adjacent quadrants
// Check top row (0,1) merge
if (dirty_rects[0].is_valid && dirty_rects[1].is_valid) {
int gap_x = dirty_rects[1].x0 - dirty_rects[0].x1;
int gap_y = abs(dirty_rects[0].y0 - dirty_rects[1].y0) + abs(dirty_rects[0].y1 - dirty_rects[1].y1);
if (gap_x < 30 && gap_y < 20) {
dirty_rects[0].merge(dirty_rects[1]);
dirty_rects[1].reset();
valid_rects--;
}
}
// Check bottom row (2,3) merge
if (dirty_rects[2].is_valid && dirty_rects[3].is_valid) {
int gap_x = dirty_rects[3].x0 - dirty_rects[2].x1;
int gap_y = abs(dirty_rects[2].y0 - dirty_rects[3].y0) + abs(dirty_rects[2].y1 - dirty_rects[3].y1);
if (gap_x < 30 && gap_y < 20) {
dirty_rects[2].merge(dirty_rects[3]);
dirty_rects[3].reset();
valid_rects--;
}
}
// Check left column (0,2) merge
if (dirty_rects[0].is_valid && dirty_rects[2].is_valid) {
int gap_y = dirty_rects[2].y0 - dirty_rects[0].y1;
int gap_x = abs(dirty_rects[0].x0 - dirty_rects[2].x0) + abs(dirty_rects[0].x1 - dirty_rects[2].x1);
if (gap_y < 30 && gap_x < 20) {
dirty_rects[0].merge(dirty_rects[2]);
dirty_rects[2].reset();
valid_rects--;
}
}
// Check right column (1,3) merge
if (dirty_rects[1].is_valid && dirty_rects[3].is_valid) {
int gap_y = dirty_rects[3].y0 - dirty_rects[1].y1;
int gap_x = abs(dirty_rects[1].x0 - dirty_rects[3].x0) + abs(dirty_rects[1].x1 - dirty_rects[3].x1);
if (gap_y < 30 && gap_x < 20) {
dirty_rects[1].merge(dirty_rects[3]);
dirty_rects[3].reset();
valid_rects--;
}
}
// Final pass: merge any remaining valid rectangles if they're very close
for (int i = 0; i < MAX_DIRTY_RECTS - 1; i++) {
if (!dirty_rects[i].is_valid) continue;
for (int j = i + 1; j < MAX_DIRTY_RECTS; j++) {
if (!dirty_rects[j].is_valid) continue;
DirtyRect merged = dirty_rects[i];
merged.merge(dirty_rects[j]);
int combined_area = dirty_rects[i].get_area() + dirty_rects[j].get_area();
int merged_area = merged.get_area();
// Merge if the combined overhead is less than 40%
if (merged_area < combined_area * 1.4f) {
dirty_rects[i] = merged;
dirty_rects[j].reset();
valid_rects--;
break; // Move to next i
}
}
}
}
// Copy current buffer to previous buffer for next frame comparison
memcpy(prev_bit_buffer, bit_buffer, bit_buffer_size);
// Process each valid dirty rectangle
for (int rect_idx = 0; rect_idx < MAX_DIRTY_RECTS; rect_idx++) {
if (!dirty_rects[rect_idx].is_valid) continue;
DirtyRect& rect = dirty_rects[rect_idx];
// Convert only the dirty rectangle region to RGB565
for (int y = rect.y0; y <= rect.y1; y++) {
for (int x = rect.x0; x <= rect.x1; x++) {
int byte_index = (y * width + x) / 8;
int bit_index = 7 - (x % 8);
bool pixel_white = (bit_buffer[byte_index] >> bit_index) & 0x01;
bool out_white = invert_color ? !pixel_white : pixel_white;
rgb_buffer[y * width + x] = out_white ? COLOR_WHITE : COLOR_BLACK;
}
}
// Draw only this dirty rectangle
st7796_set_window(rect.x0, rect.y0, rect.x1, rect.y1);
// Calculate size of dirty region
int dirty_width = rect.get_width();
int dirty_height = rect.get_height();
// Write only the dirty rectangle pixels
// We need to extract rows from the full rgb_buffer
for (int row = 0; row < dirty_height; row++) {
int buffer_offset = (rect.y0 + row) * width + rect.x0;
st7796_write_raw((const uint8_t*)&rgb_buffer[buffer_offset], dirty_width * 2);
}
}
} else {
// Full screen update (original behavior)
// Convert 1-bit buffer to RGB565 using persistent buffer
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int byte_index = (y * width + x) / 8;
int bit_index = 7 - (x % 8);
bool pixel_white = (bit_buffer[byte_index] >> bit_index) & 0x01;
bool out_white = invert_color ? !pixel_white : pixel_white;
rgb_buffer[y * width + x] = out_white ? COLOR_WHITE : COLOR_BLACK;
}
}
// Draw entire buffer at once
st7796_set_cursor(0, 0);
// Use raw write for speed.
// Since we only use 0x0000 (Black) and 0xFFFF (White), endianness doesn't matter.
// 0x0000 -> 0x00, 0x00 (LE) -> Display sees 0x00, 0x00 (0x0000 correct)
// 0xFFFF -> 0xFF, 0xFF (LE) -> Display sees 0xFF, 0xFF (0xFFFF correct)
st7796_write_raw((const uint8_t*)rgb_buffer, width * height * 2);
// If dirty rect is enabled, store this buffer for next comparison
if (dirty_rect_enabled && prev_bit_buffer) {
memcpy(prev_bit_buffer, bit_buffer, bit_buffer_size);
}
}
// Draw entire buffer at once
st7796_set_cursor(0, 0);
// Use raw write for speed.
// Since we only use 0x0000 (Black) and 0xFFFF (White), endianness doesn't matter.
// 0x0000 -> 0x00, 0x00 (LE) -> Display sees 0x00, 0x00 (0x0000 correct)
// 0xFFFF -> 0xFF, 0xFF (LE) -> Display sees 0xFF, 0xFF (0xFFFF correct)
st7796_write_raw((const uint8_t*)rgb_buffer, width * height * 2);
}
void LowLevelDisplayST7796::refresh() {
@@ -99,6 +290,33 @@ void LowLevelDisplayST7796::set_rotation(uint8_t rotation) {
(void)rotation;
}
void LowLevelDisplayST7796::enable_dirty_rect(bool enabled) {
dirty_rect_enabled = enabled;
if (enabled && !prev_bit_buffer) {
// Allocate buffer to store previous frame for change detection
size_t bit_buffer_size = (width * height + 7) / 8; // 1 bit per pixel
prev_bit_buffer = (uint8_t *)malloc(bit_buffer_size);
if (prev_bit_buffer) {
// Initialize to all zeros (black screen)
memset(prev_bit_buffer, 0, bit_buffer_size);
printf("ST7796: Dirty rectangle tracking enabled (buffer: %zu bytes, max rects: %d)\n",
bit_buffer_size, MAX_DIRTY_RECTS);
} else {
printf("Error: Failed to allocate %zu bytes for dirty rect buffer\n", bit_buffer_size);
dirty_rect_enabled = false;
}
} else if (!enabled && prev_bit_buffer) {
// Disable and free tracking buffer
free(prev_bit_buffer);
prev_bit_buffer = nullptr;
for (int i = 0; i < MAX_DIRTY_RECTS; i++) {
dirty_rects[i].reset();
}
printf("ST7796: Dirty rectangle tracking disabled\n");
}
}
void LowLevelDisplayST7796::on_idle_2min() {
if (!is_dimmed && !is_sleeping) {
saved_brightness = get_brightness();

View File

@@ -3,6 +3,7 @@
#include "low_level_display.h"
#include "st7796.h"
#include <climits>
class LowLevelDisplayST7796 : public LowLevelDisplay {
private:
@@ -13,6 +14,56 @@ private:
uint16_t* rgb_buffer; // Persistent buffer for 1-bit to RGB565 conversion
bool invert_color; // If true, swap black/white
// Dirty rectangle tracking for partial updates
uint8_t* prev_bit_buffer; // Previous frame buffer for change detection
bool dirty_rect_enabled; // Enable/disable dirty rectangle optimization
struct DirtyRect {
int x0, y0; // Top-left corner
int x1, y1; // Bottom-right corner (inclusive)
bool is_valid;
void reset() {
x0 = INT_MAX;
y0 = INT_MAX;
x1 = -1;
y1 = -1;
is_valid = false;
}
void expand(int x, int y) {
if (x < x0) x0 = x;
if (x > x1) x1 = x;
if (y < y0) y0 = y;
if (y > y1) y1 = y;
is_valid = true;
}
int get_width() const { return is_valid ? (x1 - x0 + 1) : 0; }
int get_height() const { return is_valid ? (y1 - y0 + 1) : 0; }
int get_area() const { return is_valid ? get_width() * get_height() : 0; }
bool overlaps(const DirtyRect& other) const {
if (!is_valid || !other.is_valid) return false;
return !(x1 < other.x0 || x0 > other.x1 || y1 < other.y0 || y0 > other.y1);
}
void merge(const DirtyRect& other) {
if (!other.is_valid) return;
if (!is_valid) {
*this = other;
return;
}
x0 = (x0 < other.x0) ? x0 : other.x0;
y0 = (y0 < other.y0) ? y0 : other.y0;
x1 = (x1 > other.x1) ? x1 : other.x1;
y1 = (y1 > other.y1) ? y1 : other.y1;
}
};
static constexpr int MAX_DIRTY_RECTS = 4;
DirtyRect dirty_rects[MAX_DIRTY_RECTS]; // Support up to 4 dirty rectangles (4 quadrants)
public:
LowLevelDisplayST7796(const st7796_config* cfg, int w, int h, bool invert = false);
~LowLevelDisplayST7796() override;
@@ -53,6 +104,10 @@ public:
void on_idle_10min() override;
void on_user_interaction() override;
// Dirty rectangle optimization control
void enable_dirty_rect(bool enabled = true);
bool is_dirty_rect_enabled() const { return dirty_rect_enabled; }
private:
uint8_t saved_brightness = 100;
bool is_dimmed = false;

View File

@@ -268,7 +268,7 @@ static void write_command_with_data(uint8_t cmd, const uint8_t *data, size_t len
* This compensates for displays where the physical screen doesn't align
* with the controller's framebuffer (common with ST7789/ST7796).
*/
static void set_window(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1) {
void st7796_set_window(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1) {
uint8_t data[4];
// Add offsets for display positioning
@@ -475,7 +475,7 @@ void st7796_init(const struct st7796_config *c, uint16_t w, uint16_t h) {
* @param color RGB565 color value (0x0000=black, 0xFFFF=white)
*/
void st7796_fill(uint16_t color) {
set_window(0, 0, width - 1, height - 1);
st7796_set_window(0, 0, width - 1, height - 1);
dc_data();
cs_select();
@@ -535,7 +535,7 @@ void st7796_put(uint16_t color) {
* @param y Starting Y coordinate
*/
void st7796_set_cursor(uint16_t x, uint16_t y) {
set_window(x, y, width - 1, height - 1);
st7796_set_window(x, y, width - 1, height - 1);
}
/**
@@ -614,8 +614,8 @@ void st7796_write_raw(const uint8_t *data, size_t len) {
*/
void st7796_draw_pixel(uint16_t x, uint16_t y, uint16_t color) {
if (x >= width || y >= height) return; // Bounds check
set_window(x, y, x, y); // 1x1 window
st7796_set_window(x, y, x, y); // 1x1 window
uint8_t data[2] = {(color >> 8) & 0xFF, color & 0xFF};
dc_data();
@@ -676,8 +676,8 @@ void st7796_fill_rect(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t c
if (x >= width || y >= height) return;
if (x + w > width) w = width - x;
if (y + h > height) h = height - y;
set_window(x, y, x + w - 1, y + h - 1);
st7796_set_window(x, y, x + w - 1, y + h - 1);
dc_data();
cs_select();

View File

@@ -209,16 +209,38 @@ void st7796_put(uint16_t color);
/**
* @brief Set cursor position for subsequent writes
*
*
* Sets the drawing window starting at (x, y) and extending to the
* bottom-right of the display. Subsequent calls to st7796_put()
* will write pixels starting from this position.
*
*
* @param x X coordinate (0 to width-1)
* @param y Y coordinate (0 to height-1)
*/
void st7796_set_cursor(uint16_t x, uint16_t y);
/**
* @brief Set drawing window to a specific rectangle
*
* Sets the drawing window to a rectangular region defined by (x0, y0)
* as the top-left corner and (x1, y1) as the bottom-right corner.
* Subsequent write operations will only affect this region.
*
* This is useful for partial screen updates (dirty rectangle optimization)
* where only a portion of the screen needs to be redrawn, significantly
* improving performance by reducing SPI data transfer.
*
* @param x0 Top-left X coordinate (0 to width-1)
* @param y0 Top-left Y coordinate (0 to height-1)
* @param x1 Bottom-right X coordinate (x0 to width-1)
* @param y1 Bottom-right Y coordinate (y0 to height-1)
*
* Example: Update only a 100x50 region starting at (50, 50):
* st7796_set_window(50, 50, 149, 99);
* st7796_write_raw(pixel_data, 100 * 50 * 2);
*/
void st7796_set_window(uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1);
/**
* @brief Write multiple pixels at current cursor position
*