Add GPU RNG to computer shader

This commit is contained in:
Jared Miller 2025-12-17 10:02:09 -05:00
parent 62d010bdc0
commit 9e8226de32
No known key found for this signature in database
4 changed files with 183 additions and 29 deletions

View file

@ -3,6 +3,7 @@
const std = @import("std"); const std = @import("std");
const rl = @import("raylib"); const rl = @import("raylib");
const sandbox = @import("sandbox.zig");
const comp_source = @embedFile("shaders/entity_update.comp"); const comp_source = @embedFile("shaders/entity_update.comp");
@ -15,6 +16,11 @@ const GlMemoryBarrierFn = *const fn (barriers: u32) callconv(.c) void;
pub const ComputeShader = struct { pub const ComputeShader = struct {
program_id: u32, program_id: u32,
entity_count_loc: i32, entity_count_loc: i32,
frame_number_loc: i32,
screen_size_loc: i32,
center_loc: i32,
respawn_radius_loc: i32,
entity_speed_loc: i32,
glMemoryBarrier: GlMemoryBarrierFn, glMemoryBarrier: GlMemoryBarrierFn,
pub fn init() ?ComputeShader { pub fn init() ?ComputeShader {
@ -38,15 +44,30 @@ pub const ComputeShader = struct {
// get uniform locations // get uniform locations
const entity_count_loc = rl.gl.rlGetLocationUniform(program_id, "entityCount"); const entity_count_loc = rl.gl.rlGetLocationUniform(program_id, "entityCount");
if (entity_count_loc < 0) { const frame_number_loc = rl.gl.rlGetLocationUniform(program_id, "frameNumber");
std.debug.print("compute: warning - entityCount uniform not found\n", .{}); const screen_size_loc = rl.gl.rlGetLocationUniform(program_id, "screenSize");
} const center_loc = rl.gl.rlGetLocationUniform(program_id, "center");
const respawn_radius_loc = rl.gl.rlGetLocationUniform(program_id, "respawnRadius");
const entity_speed_loc = rl.gl.rlGetLocationUniform(program_id, "entitySpeed");
std.debug.print("compute: shader loaded successfully (program_id={})\n", .{program_id}); std.debug.print("compute: shader loaded (program_id={}, uniforms: count={}, frame={}, screen={}, center={}, radius={}, speed={})\n", .{
program_id,
entity_count_loc,
frame_number_loc,
screen_size_loc,
center_loc,
respawn_radius_loc,
entity_speed_loc,
});
return .{ return .{
.program_id = program_id, .program_id = program_id,
.entity_count_loc = entity_count_loc, .entity_count_loc = entity_count_loc,
.frame_number_loc = frame_number_loc,
.screen_size_loc = screen_size_loc,
.center_loc = center_loc,
.respawn_radius_loc = respawn_radius_loc,
.entity_speed_loc = entity_speed_loc,
.glMemoryBarrier = glMemoryBarrier, .glMemoryBarrier = glMemoryBarrier,
}; };
} }
@ -55,14 +76,32 @@ pub const ComputeShader = struct {
rl.gl.rlUnloadShaderProgram(self.program_id); rl.gl.rlUnloadShaderProgram(self.program_id);
} }
pub fn dispatch(self: *ComputeShader, ssbo_id: u32, entity_count: u32) void { pub fn dispatch(self: *ComputeShader, ssbo_id: u32, entity_count: u32, frame_number: u32) void {
if (entity_count == 0) return; if (entity_count == 0) return;
// constants from sandbox.zig
const screen_w: f32 = @floatFromInt(sandbox.SCREEN_WIDTH);
const screen_h: f32 = @floatFromInt(sandbox.SCREEN_HEIGHT);
const center_x: f32 = screen_w / 2.0;
const center_y: f32 = screen_h / 2.0;
const respawn_radius: f32 = 10.0; // RESPAWN_THRESHOLD
const entity_speed: f32 = 2.0; // ENTITY_SPEED
// bind compute shader // bind compute shader
rl.gl.rlEnableShader(self.program_id); rl.gl.rlEnableShader(self.program_id);
// set entityCount uniform // set uniforms
rl.gl.rlSetUniform(self.entity_count_loc, &entity_count, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1); rl.gl.rlSetUniform(self.entity_count_loc, &entity_count, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1);
rl.gl.rlSetUniform(self.frame_number_loc, &frame_number, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1);
const screen_size = [2]f32{ screen_w, screen_h };
rl.gl.rlSetUniform(self.screen_size_loc, &screen_size, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_vec2), 1);
const center = [2]f32{ center_x, center_y };
rl.gl.rlSetUniform(self.center_loc, &center, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_vec2), 1);
rl.gl.rlSetUniform(self.respawn_radius_loc, &respawn_radius, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_float), 1);
rl.gl.rlSetUniform(self.entity_speed_loc, &entity_speed, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_float), 1);
// bind SSBO to binding point 0 // bind SSBO to binding point 0
rl.gl.rlBindShaderBuffer(ssbo_id, 0); rl.gl.rlBindShaderBuffer(ssbo_id, 0);

View file

@ -306,6 +306,7 @@ pub fn main() !void {
var update_time_us: i64 = 0; var update_time_us: i64 = 0;
var render_time_us: i64 = 0; var render_time_us: i64 = 0;
var elapsed: f32 = 0; var elapsed: f32 = 0;
var frame_number: u32 = 0;
// auto-benchmark state // auto-benchmark state
var last_ramp_time: f32 = 0; var last_ramp_time: f32 = 0;
@ -360,14 +361,11 @@ pub fn main() !void {
defer tracy_update.End(); defer tracy_update.End();
const update_start = std.time.microTimestamp(); const update_start = std.time.microTimestamp();
if (compute_shader != null) { if (compute_shader == null) {
// GPU compute update - positions updated on GPU // CPU update path (positions + respawn)
// still need CPU update for respawn logic until Step 3
sandbox.update(&entities, &rng);
} else {
// CPU update path
sandbox.update(&entities, &rng); sandbox.update(&entities, &rng);
} }
// GPU compute path handles update in render section before draw
update_time_us = std.time.microTimestamp() - update_start; update_time_us = std.time.microTimestamp() - update_start;
} }
@ -383,13 +381,18 @@ pub fn main() !void {
if (use_ssbo) { if (use_ssbo) {
// dispatch compute shader before render (if enabled) // dispatch compute shader before render (if enabled)
if (compute_shader) |*cs| { if (compute_shader) |*cs| {
const tracy_compute = ztracy.ZoneN(@src(), "compute_dispatch"); if (!paused) {
defer tracy_compute.End(); const tracy_compute = ztracy.ZoneN(@src(), "compute_dispatch");
cs.dispatch(ssbo_renderer.?.ssbo_id, @intCast(entities.count)); defer tracy_compute.End();
cs.dispatch(ssbo_renderer.?.ssbo_id, @intCast(entities.count), frame_number);
frame_number +%= 1;
}
// GPU compute mode - only upload new entities, positions updated on GPU
ssbo_renderer.?.renderComputeMode(&entities, zoom, pan);
} else {
// CPU mode - upload entity data to GPU
ssbo_renderer.?.render(&entities, zoom, pan);
} }
// SSBO instanced rendering path (16 bytes per entity)
ssbo_renderer.?.render(&entities, zoom, pan);
} else if (use_instancing) { } else if (use_instancing) {
// GPU instancing path (64 bytes per entity) // GPU instancing path (64 bytes per entity)
const xforms = transforms.?; const xforms = transforms.?;

View file

@ -14,6 +14,29 @@ layout(std430, binding = 0) buffer Entities {
}; };
uniform uint entityCount; uniform uint entityCount;
uniform uint frameNumber;
uniform vec2 screenSize;
uniform vec2 center;
uniform float respawnRadius;
uniform float entitySpeed;
// PCG-style GPU RNG - returns value in [0, 1)
uint pcg(inout uint state) {
state = state * 747796405u + 2891336453u;
uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
return (word >> 22u) ^ word;
}
float randFloat(inout uint state) {
return float(pcg(state)) / 4294967296.0;
}
// pack velocity into fixed-point 8.8 format
int packVelocity(float vx, float vy) {
int vx_fixed = int(clamp(vx * 256.0, -32768.0, 32767.0));
int vy_fixed = int(clamp(vy * 256.0, -32768.0, 32767.0));
return (vx_fixed << 16) | (vy_fixed & 0xFFFF);
}
void main() { void main() {
uint id = gl_GlobalInvocationID.x; uint id = gl_GlobalInvocationID.x;
@ -29,5 +52,46 @@ void main() {
e.x += vx; e.x += vx;
e.y += vy; e.y += vy;
// check if reached center - respawn at edge
float dx = e.x - center.x;
float dy = e.y - center.y;
if (dx*dx + dy*dy < respawnRadius * respawnRadius) {
// init RNG with entity id and frame number
uint rng = id * 1103515245u + frameNumber * 12345u + 1u;
// pick random edge: 0=top, 1=bottom, 2=left, 3=right
uint edge = pcg(rng) & 3u;
float t = randFloat(rng);
// spawn on edge
if (edge == 0u) { // top
e.x = t * screenSize.x;
e.y = 0.0;
} else if (edge == 1u) { // bottom
e.x = t * screenSize.x;
e.y = screenSize.y;
} else if (edge == 2u) { // left
e.x = 0.0;
e.y = t * screenSize.y;
} else { // right
e.x = screenSize.x;
e.y = t * screenSize.y;
}
// velocity toward center
dx = center.x - e.x;
dy = center.y - e.y;
float dist = sqrt(dx*dx + dy*dy);
vx = (dx / dist) * entitySpeed;
vy = (dy / dist) * entitySpeed;
e.packedVel = packVelocity(vx, vy);
// new random color
uint r = pcg(rng) & 0xFFu;
uint g = pcg(rng) & 0xFFu;
uint b = pcg(rng) & 0xFFu;
e.color = (r << 16u) | (g << 8u) | b;
}
entities[id] = e; entities[id] = e;
} }

View file

@ -24,6 +24,7 @@ pub const SsboRenderer = struct {
pan_loc: i32, pan_loc: i32,
circle_texture_id: u32, circle_texture_id: u32,
gpu_buffer: []sandbox.GpuEntity, gpu_buffer: []sandbox.GpuEntity,
last_entity_count: usize, // track count to detect when entities are added
const QUAD_SIZE: f32 = 16.0; const QUAD_SIZE: f32 = 16.0;
@ -125,6 +126,7 @@ pub const SsboRenderer = struct {
.pan_loc = pan_loc, .pan_loc = pan_loc,
.circle_texture_id = circle_texture.id, .circle_texture_id = circle_texture.id,
.gpu_buffer = gpu_buffer, .gpu_buffer = gpu_buffer,
.last_entity_count = 0,
}; };
} }
@ -137,16 +139,22 @@ pub const SsboRenderer = struct {
} }
pub fn render(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void { pub fn render(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void {
self.renderInternal(entities, zoom, pan, false);
}
pub fn renderComputeMode(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void {
if (entities.count == 0) return; if (entities.count == 0) return;
// flush raylib's internal render batch before our custom GL calls // flush raylib's internal render batch before our custom GL calls
rl.gl.rlDrawRenderBatchActive(); rl.gl.rlDrawRenderBatchActive();
// copy entity data to GPU buffer (position + packed velocity + color) // upload NEW entities when count increases (entities added on CPU)
{ if (entities.count > self.last_entity_count) {
const zone = ztracy.ZoneN(@src(), "ssbo_copy"); const zone = ztracy.ZoneN(@src(), "ssbo_upload_new");
defer zone.End(); defer zone.End();
for (entities.items[0..entities.count], 0..) |entity, i| {
// copy new entities to GPU buffer
for (entities.items[self.last_entity_count..entities.count], self.last_entity_count..) |entity, i| {
self.gpu_buffer[i] = .{ self.gpu_buffer[i] = .{
.x = entity.x, .x = entity.x,
.y = entity.y, .y = entity.y,
@ -154,16 +162,56 @@ pub const SsboRenderer = struct {
.color = entity.color, .color = entity.color,
}; };
} }
// upload only the new portion to SSBO
const offset: u32 = @intCast(self.last_entity_count * @sizeOf(sandbox.GpuEntity));
const new_count = entities.count - self.last_entity_count;
const data_size: u32 = @intCast(new_count * @sizeOf(sandbox.GpuEntity));
rl.gl.rlUpdateShaderBuffer(self.ssbo_id, &self.gpu_buffer[self.last_entity_count], data_size, offset);
self.last_entity_count = entities.count;
} else if (entities.count < self.last_entity_count) {
// entities were removed, update count
self.last_entity_count = entities.count;
} }
// upload to SSBO self.drawInstanced(entities.count, zoom, pan);
{ }
const zone = ztracy.ZoneN(@src(), "ssbo_upload");
defer zone.End(); fn renderInternal(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32), skip_upload: bool) void {
const data_size: u32 = @intCast(entities.count * @sizeOf(sandbox.GpuEntity)); if (entities.count == 0) return;
rl.gl.rlUpdateShaderBuffer(self.ssbo_id, self.gpu_buffer.ptr, data_size, 0);
// flush raylib's internal render batch before our custom GL calls
rl.gl.rlDrawRenderBatchActive();
if (!skip_upload) {
// copy entity data to GPU buffer (position + packed velocity + color)
{
const zone = ztracy.ZoneN(@src(), "ssbo_copy");
defer zone.End();
for (entities.items[0..entities.count], 0..) |entity, i| {
self.gpu_buffer[i] = .{
.x = entity.x,
.y = entity.y,
.packed_vel = sandbox.packVelocity(entity.vx, entity.vy),
.color = entity.color,
};
}
}
// upload to SSBO
{
const zone = ztracy.ZoneN(@src(), "ssbo_upload");
defer zone.End();
const data_size: u32 = @intCast(entities.count * @sizeOf(sandbox.GpuEntity));
rl.gl.rlUpdateShaderBuffer(self.ssbo_id, self.gpu_buffer.ptr, data_size, 0);
}
} }
self.drawInstanced(entities.count, zoom, pan);
}
fn drawInstanced(self: *SsboRenderer, entity_count: usize, zoom: f32, pan: @Vector(2, f32)) void {
// bind shader // bind shader
rl.gl.rlEnableShader(self.shader_id); rl.gl.rlEnableShader(self.shader_id);
@ -198,7 +246,7 @@ pub const SsboRenderer = struct {
defer zone.End(); defer zone.End();
_ = rl.gl.rlEnableVertexArray(self.vao_id); _ = rl.gl.rlEnableVertexArray(self.vao_id);
rl.gl.rlEnableVertexBuffer(self.vbo_id); rl.gl.rlEnableVertexBuffer(self.vbo_id);
rl.gl.rlDrawVertexArrayInstanced(0, 6, @intCast(entities.count)); rl.gl.rlDrawVertexArrayInstanced(0, 6, @intCast(entity_count));
} }
// cleanup - restore raylib's expected state // cleanup - restore raylib's expected state