From a842800edefbee89f21e71fd912c9dc10f8076cb Mon Sep 17 00:00:00 2001 From: Jared Miller Date: Wed, 17 Dec 2025 21:01:21 -0500 Subject: [PATCH] Add release notes --- releases/0.1.0-unoptimized.txt | 8 ++++++++ releases/0.2.0-texture_blitting.txt | 8 ++++++++ releases/0.3.0-quad_batching.txt | 9 +++++++++ releases/0.3.1-batch_buffer.txt | 11 +++++++++++ releases/0.4.0-gpu_instancing.txt | 13 +++++++++++++ releases/0.5.0-ssbo_instancing.txt | 17 +++++++++++++++++ releases/0.5.1-windows_build.txt | 5 +++++ releases/0.6.0-zoom_zoom.txt | 10 ++++++++++ releases/0.6.1-q_to_quit.txt | 5 +++++ releases/0.7.0-compute_shader.txt | 11 +++++++++++ 10 files changed, 97 insertions(+) create mode 100644 releases/0.1.0-unoptimized.txt create mode 100644 releases/0.2.0-texture_blitting.txt create mode 100644 releases/0.3.0-quad_batching.txt create mode 100644 releases/0.3.1-batch_buffer.txt create mode 100644 releases/0.4.0-gpu_instancing.txt create mode 100644 releases/0.5.0-ssbo_instancing.txt create mode 100644 releases/0.5.1-windows_build.txt create mode 100644 releases/0.6.0-zoom_zoom.txt create mode 100644 releases/0.6.1-q_to_quit.txt create mode 100644 releases/0.7.0-compute_shader.txt diff --git a/releases/0.1.0-unoptimized.txt b/releases/0.1.0-unoptimized.txt new file mode 100644 index 0000000..8fea844 --- /dev/null +++ b/releases/0.1.0-unoptimized.txt @@ -0,0 +1,8 @@ +the baseline: one draw call per entity, pure and simple + +- individual rl.drawCircle() calls in a loop +- ~5k entities at 60fps before frame times tank +- linear scaling: 10k = ~43ms, 20k = ~77ms +- render-bound (update loop stays under 1ms even at 30k) +- each circle is its own GPU draw call +- the starting point for optimization experiments diff --git a/releases/0.2.0-texture_blitting.txt b/releases/0.2.0-texture_blitting.txt new file mode 100644 index 0000000..62c8475 --- /dev/null +++ b/releases/0.2.0-texture_blitting.txt @@ -0,0 +1,8 @@ +pre-render once, blit many: 10x improvement + +- render circle to 16x16 texture at startup +- drawTexture() per entity instead of drawCircle() +- raylib batches same-texture draws internally +- ~50k entities at 60fps +- simple change, big win +- still one function call per entity, but GPU work is batched diff --git a/releases/0.3.0-quad_batching.txt b/releases/0.3.0-quad_batching.txt new file mode 100644 index 0000000..b72717c --- /dev/null +++ b/releases/0.3.0-quad_batching.txt @@ -0,0 +1,9 @@ +bypass the wrapper, go straight to rlgl: 2x more + +- skip drawTexture(), submit vertices directly via rl.gl +- manually build quads: rlTexCoord2f + rlVertex2f per corner +- rlBegin/rlEnd wraps the whole entity loop +- ~100k entities at 60fps +- eliminates per-call function overhead +- vertices go straight to GPU buffer +- 20x improvement over baseline diff --git a/releases/0.3.1-batch_buffer.txt b/releases/0.3.1-batch_buffer.txt new file mode 100644 index 0000000..4bddfe8 --- /dev/null +++ b/releases/0.3.1-batch_buffer.txt @@ -0,0 +1,11 @@ +bigger buffer, fewer flushes: squeezing out more headroom + +- increased raylib batch buffer from 8192 to 32768 vertices +- ~140k entities at 60fps on i5-6500T +- ~40% improvement over default buffer +- fewer GPU flushes per frame +- also added: release workflows for github and forgejo +- added OPTIMIZATIONS.md documenting the journey +- added README, UI panel with FPS display +- heap allocated entity array to support 1 million entities +- per-entity RGB colors diff --git a/releases/0.4.0-gpu_instancing.txt b/releases/0.4.0-gpu_instancing.txt new file mode 100644 index 0000000..03015dc --- /dev/null +++ b/releases/0.4.0-gpu_instancing.txt @@ -0,0 +1,13 @@ +gpu instancing: a disappointing discovery + +- drawMeshInstanced() with per-entity transform matrices +- ~150k entities at 60fps - barely better than rlgl batching +- negligible improvement on integrated graphics +- why it didn't help: + - integrated GPU shares system RAM (no PCIe transfer savings) + - 64-byte matrix per entity vs ~80 bytes for rlgl vertices + - bottleneck is memory bandwidth, not draw call overhead + - rlgl batching already minimizes draw calls effectively +- orthographic camera setup for 2D-like rendering +- heap-allocated transforms buffer (64MB too big for stack) +- lesson learned: not all "advanced" techniques are wins diff --git a/releases/0.5.0-ssbo_instancing.txt b/releases/0.5.0-ssbo_instancing.txt new file mode 100644 index 0000000..80c4bd7 --- /dev/null +++ b/releases/0.5.0-ssbo_instancing.txt @@ -0,0 +1,17 @@ +ssbo breakthrough: 5x gain by shrinking the data + +- pack entity data (x, y, color) into 12-byte struct +- upload via shader storage buffer object (SSBO) +- ~700k entities at 60fps (i5-6500T / HD 530) +- ~950k entities at ~57fps +- 5x improvement over previous best +- 140x total from baseline +- why it works: + - 12 bytes vs 64 bytes (matrices) = 5.3x less bandwidth + - 12 bytes vs 80 bytes (rlgl vertices) = 6.7x less bandwidth + - no CPU-side matrix calculations + - GPU does NDC conversion and color unpacking +- custom vertex/fragment shaders +- single rlDrawVertexArrayInstanced() call for all entities +- shaders embedded at build time +- removed FPS cap, added optional vsync arg diff --git a/releases/0.5.1-windows_build.txt b/releases/0.5.1-windows_build.txt new file mode 100644 index 0000000..b60fae3 --- /dev/null +++ b/releases/0.5.1-windows_build.txt @@ -0,0 +1,5 @@ +cross-platform release: adding windows to the party + +- updated github release workflow +- builds for both linux and windows now +- no code changes, just CI/CD work diff --git a/releases/0.6.0-zoom_zoom.txt b/releases/0.6.0-zoom_zoom.txt new file mode 100644 index 0000000..54de2b9 --- /dev/null +++ b/releases/0.6.0-zoom_zoom.txt @@ -0,0 +1,10 @@ +zoom and pan: making millions of entities explorable + +- mouse wheel zoom +- click and drag panning +- orthographic camera transforms +- memory panel showing entity buffer sizes +- background draws immediately (no flicker) +- tab key toggles UI panels +- explained "lofivor" name in README (lo-fi survivor) +- shader updated for zoom/pan transforms diff --git a/releases/0.6.1-q_to_quit.txt b/releases/0.6.1-q_to_quit.txt new file mode 100644 index 0000000..c999f34 --- /dev/null +++ b/releases/0.6.1-q_to_quit.txt @@ -0,0 +1,5 @@ +quick exit: zoom out then quit + +- q key first zooms out, second press quits +- nice way to see the full entity field before closing +- minor UI text fix diff --git a/releases/0.7.0-compute_shader.txt b/releases/0.7.0-compute_shader.txt new file mode 100644 index 0000000..c65ab2f --- /dev/null +++ b/releases/0.7.0-compute_shader.txt @@ -0,0 +1,11 @@ +compute shader: moving physics to the GPU + +- entity position updates now run on GPU via compute shader +- GPU-based RNG for entity velocity randomization +- full simulation loop stays on GPU, no CPU roundtrip +- new compute.zig module for shader management +- GpuEntity struct with position, velocity, and color +- tracy profiling integration +- FPS display turns green (good) or red (bad) +- added design docs for zoom/pan and compute shader work +- cross-platform alignment fixes for shader data