Add rlgl quad batching optimization
This commit is contained in:
parent
c010746897
commit
2fb9f1c3e5
6 changed files with 117 additions and 10 deletions
6
TODO.md
6
TODO.md
|
|
@ -30,13 +30,17 @@ findings (AMD Radeon test):
|
|||
based on phase 2 results:
|
||||
|
||||
- [x] batch rendering via texture blitting (10x improvement)
|
||||
- [x] rlgl quad batching (2x improvement on top)
|
||||
- [x] ~~if cpu-bound: SIMD, struct-of-arrays, multithreading~~ (not needed)
|
||||
- [x] re-test after each change
|
||||
|
||||
findings:
|
||||
- texture blitting: pre-render circle to texture, drawTexture() per entity
|
||||
- rlgl batching: submit vertices directly via rl.gl, bypass drawTexture overhead
|
||||
- baseline: 60fps @ ~5k entities
|
||||
- optimized: 60fps @ ~50k entities, 30fps @ 100k entities
|
||||
- after texture blitting: 60fps @ ~50k entities
|
||||
- after rlgl batching: 60fps @ ~100k entities
|
||||
- total: ~20x improvement from baseline
|
||||
- see journal.txt for detailed benchmarks
|
||||
|
||||
## phase 4: add collision
|
||||
|
|
|
|||
17
benchmarks/benchmark2.log
Normal file
17
benchmarks/benchmark2.log
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# lofivor sandbox benchmark
|
||||
# time entities frame_ms update_ms render_ms note
|
||||
[0.1s] entities=0 frame=103.6ms update=0.0ms render=14.1ms [!60fps]
|
||||
[0.1s] entities=0 frame=16.7ms update=0.0ms render=99.5ms [+60fps]
|
||||
[0.2s] entities=0 frame=104.5ms update=0.0ms render=15.8ms [!60fps]
|
||||
[0.2s] entities=0 frame=16.7ms update=0.0ms render=16.8ms [+60fps]
|
||||
[10.0s] entities=4000 frame=16.7ms update=0.0ms render=16.6ms
|
||||
[20.0s] entities=10000 frame=16.7ms update=0.1ms render=16.7ms
|
||||
[21.5s] entities=11000 frame=19.7ms update=0.1ms render=16.6ms [!60fps]
|
||||
[21.5s] entities=11000 frame=16.7ms update=0.1ms render=16.5ms [+60fps]
|
||||
[21.5s] entities=11000 frame=27.1ms update=0.0ms render=16.6ms [!60fps]
|
||||
[21.5s] entities=11000 frame=16.7ms update=0.1ms render=16.7ms [+60fps]
|
||||
[30.0s] entities=23000 frame=16.7ms update=0.2ms render=16.5ms
|
||||
[38.3s] entities=52000 frame=18.8ms update=0.3ms render=16.6ms [!60fps]
|
||||
[38.4s] entities=53000 frame=16.7ms update=0.2ms render=16.4ms [+60fps]
|
||||
[38.9s] entities=55000 frame=21.0ms update=0.3ms render=17.1ms [!60fps]
|
||||
[40.0s] entities=59000 frame=20.6ms update=0.3ms render=18.4ms
|
||||
15
benchmarks/benchmark3.log
Normal file
15
benchmarks/benchmark3.log
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
# lofivor sandbox benchmark
|
||||
# time entities frame_ms update_ms render_ms note
|
||||
[0.1s] entities=0 frame=78.7ms update=0.0ms render=12.2ms [!60fps]
|
||||
[0.1s] entities=0 frame=16.7ms update=0.0ms render=15.0ms [+60fps]
|
||||
[10.0s] entities=40000 frame=16.7ms update=0.3ms render=16.5ms
|
||||
[13.5s] entities=100000 frame=19.5ms update=0.5ms render=16.6ms [!60fps]
|
||||
[13.5s] entities=100000 frame=16.7ms update=0.4ms render=16.4ms [+60fps]
|
||||
[15.0s] entities=100000 frame=18.8ms update=0.9ms render=16.7ms [!60fps]
|
||||
[15.0s] entities=100000 frame=16.7ms update=0.5ms render=16.2ms [+60fps]
|
||||
[15.3s] entities=100000 frame=19.2ms update=0.7ms render=18.8ms [!60fps]
|
||||
[15.6s] entities=100000 frame=16.7ms update=0.5ms render=16.2ms [+60fps]
|
||||
[16.1s] entities=100000 frame=18.8ms update=0.5ms render=17.8ms [!60fps]
|
||||
[16.2s] entities=100000 frame=16.7ms update=0.4ms render=16.3ms [+60fps]
|
||||
[17.2s] entities=100000 frame=18.7ms update=0.5ms render=17.7ms [!60fps]
|
||||
[18.4s] entities=100000 frame=16.7ms update=0.3ms render=16.3ms [+60fps]
|
||||
24
benchmarks/benchmark_original.log
Normal file
24
benchmarks/benchmark_original.log
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# lofivor sandbox benchmark
|
||||
# time entities frame_ms update_ms render_ms note
|
||||
[0.1s] entities=0 frame=83.0ms update=0.0ms render=12.4ms [!60fps]
|
||||
[0.1s] entities=0 frame=16.7ms update=0.0ms render=12.4ms [+60fps]
|
||||
[10.0s] entities=4000 frame=16.7ms update=0.0ms render=16.9ms
|
||||
[12.7s] entities=5000 frame=19.9ms update=0.0ms render=19.4ms [!60fps]
|
||||
[14.8s] entities=6000 frame=26.4ms update=0.0ms render=23.5ms [jump]
|
||||
[18.0s] entities=7000 frame=32.4ms update=0.0ms render=27.7ms [jump]
|
||||
[20.0s] entities=8000 frame=30.9ms update=0.0ms render=30.8ms
|
||||
[20.3s] entities=8000 frame=36.1ms update=0.0ms render=29.9ms [jump]
|
||||
[20.4s] entities=8000 frame=52.6ms update=0.0ms render=30.8ms [jump]
|
||||
[30.0s] entities=11000 frame=43.5ms update=0.1ms render=43.4ms
|
||||
[30.6s] entities=12000 frame=54.5ms update=0.1ms render=45.3ms [jump]
|
||||
[38.5s] entities=15000 frame=60.6ms update=0.1ms render=59.2ms [jump]
|
||||
[40.0s] entities=15000 frame=57.1ms update=0.1ms render=57.5ms
|
||||
[40.4s] entities=16000 frame=62.7ms update=0.1ms render=62.8ms [jump]
|
||||
[42.6s] entities=17000 frame=72.9ms update=0.1ms render=65.5ms [jump]
|
||||
[48.4s] entities=20000 frame=79.1ms update=0.1ms render=75.2ms [jump]
|
||||
[50.1s] entities=20000 frame=77.0ms update=0.1ms render=75.0ms
|
||||
[50.8s] entities=21000 frame=83.1ms update=0.1ms render=80.2ms [jump]
|
||||
[52.9s] entities=22000 frame=92.9ms update=0.1ms render=86.6ms [jump]
|
||||
[57.3s] entities=24000 frame=98.5ms update=0.1ms render=100.2ms [jump]
|
||||
[60.1s] entities=25000 frame=97.4ms update=0.1ms render=95.6ms
|
||||
[62.6s] entities=26000 frame=107.6ms update=0.1ms render=100.2ms [jump]
|
||||
32
journal.txt
32
journal.txt
|
|
@ -53,7 +53,37 @@ remains negligible (<0.6ms even at 100k).
|
|||
|
||||
---
|
||||
|
||||
optimization 2: [pending]
|
||||
optimization 2: rlgl quad batching
|
||||
-----------------------------------
|
||||
technique: bypass drawTexture(), submit vertices directly via rlgl
|
||||
code: sandbox_main.zig:175-197
|
||||
- rl.gl.rlSetTexture() once
|
||||
- rl.gl.rlBegin(rl_quads)
|
||||
- loop: rlTexCoord2f + rlVertex2f for 4 vertices per entity
|
||||
- rl.gl.rlEnd()
|
||||
|
||||
benchmark3.log results:
|
||||
- 40k entities: 16.7ms (vsync-locked)
|
||||
- 100k entities: 16.7-19.2ms (~55-60fps)
|
||||
|
||||
comparison to optimization 1:
|
||||
- texture blitting: 100k @ 33-37ms (~30fps)
|
||||
- rlgl batching: 100k @ 16.7-19ms (~55-60fps)
|
||||
- ~2x improvement
|
||||
|
||||
total improvement from baseline:
|
||||
- baseline: 60fps @ ~5k entities
|
||||
- final: 60fps @ ~100k entities
|
||||
- ~20x improvement overall
|
||||
|
||||
analysis: drawTexture() has per-call overhead (type conversions, batch state
|
||||
checks). rlgl submits vertices directly to GPU buffer. raylib's internal batch
|
||||
(8192 vertices = ~2048 quads) auto-flushes, so 100k entities = ~49 draw calls
|
||||
vs 100k drawTexture calls with their overhead.
|
||||
|
||||
---
|
||||
|
||||
optimization 3: [pending]
|
||||
-------------------------
|
||||
technique:
|
||||
results:
|
||||
|
|
|
|||
|
|
@ -168,17 +168,34 @@ pub fn main() !void {
|
|||
rl.beginDrawing();
|
||||
rl.clearBackground(BG_COLOR);
|
||||
|
||||
// draw entities using pre-rendered circle texture
|
||||
const half_size = @as(f32, @floatFromInt(TEXTURE_SIZE)) / 2.0;
|
||||
// draw entities using rlgl quad batching
|
||||
const size = @as(f32, @floatFromInt(TEXTURE_SIZE));
|
||||
const half = size / 2.0;
|
||||
|
||||
rl.gl.rlSetTexture(circle_texture.id);
|
||||
rl.gl.rlBegin(rl.gl.rl_quads);
|
||||
rl.gl.rlColor4ub(255, 255, 255, 255); // white tint
|
||||
|
||||
for (entities.items[0..entities.count]) |entity| {
|
||||
rl.drawTexture(
|
||||
circle_texture,
|
||||
@intFromFloat(entity.x - half_size),
|
||||
@intFromFloat(entity.y - half_size),
|
||||
rl.Color.white, // tint (white = use original colors)
|
||||
);
|
||||
const x1 = entity.x - half;
|
||||
const y1 = entity.y - half;
|
||||
const x2 = entity.x + half;
|
||||
const y2 = entity.y + half;
|
||||
|
||||
// quad vertices: bottom-left, bottom-right, top-right, top-left
|
||||
rl.gl.rlTexCoord2f(0, 0);
|
||||
rl.gl.rlVertex2f(x1, y2);
|
||||
rl.gl.rlTexCoord2f(1, 0);
|
||||
rl.gl.rlVertex2f(x2, y2);
|
||||
rl.gl.rlTexCoord2f(1, 1);
|
||||
rl.gl.rlVertex2f(x2, y1);
|
||||
rl.gl.rlTexCoord2f(0, 1);
|
||||
rl.gl.rlVertex2f(x1, y1);
|
||||
}
|
||||
|
||||
rl.gl.rlEnd();
|
||||
rl.gl.rlSetTexture(0);
|
||||
|
||||
// metrics overlay
|
||||
drawMetrics(&entities, update_time_us, render_time_us, paused);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue