From 9e8226de320d349121dc739f87c64ae695172d66 Mon Sep 17 00:00:00 2001 From: Jared Miller Date: Wed, 17 Dec 2025 10:02:09 -0500 Subject: [PATCH] Add GPU RNG to computer shader --- src/compute.zig | 51 ++++++++++++++++++++++--- src/sandbox_main.zig | 27 +++++++------ src/shaders/entity_update.comp | 64 +++++++++++++++++++++++++++++++ src/ssbo_renderer.zig | 70 ++++++++++++++++++++++++++++------ 4 files changed, 183 insertions(+), 29 deletions(-) diff --git a/src/compute.zig b/src/compute.zig index df59fe0..4f28741 100644 --- a/src/compute.zig +++ b/src/compute.zig @@ -3,6 +3,7 @@ const std = @import("std"); const rl = @import("raylib"); +const sandbox = @import("sandbox.zig"); const comp_source = @embedFile("shaders/entity_update.comp"); @@ -15,6 +16,11 @@ const GlMemoryBarrierFn = *const fn (barriers: u32) callconv(.c) void; pub const ComputeShader = struct { program_id: u32, entity_count_loc: i32, + frame_number_loc: i32, + screen_size_loc: i32, + center_loc: i32, + respawn_radius_loc: i32, + entity_speed_loc: i32, glMemoryBarrier: GlMemoryBarrierFn, pub fn init() ?ComputeShader { @@ -38,15 +44,30 @@ pub const ComputeShader = struct { // get uniform locations const entity_count_loc = rl.gl.rlGetLocationUniform(program_id, "entityCount"); - if (entity_count_loc < 0) { - std.debug.print("compute: warning - entityCount uniform not found\n", .{}); - } + const frame_number_loc = rl.gl.rlGetLocationUniform(program_id, "frameNumber"); + const screen_size_loc = rl.gl.rlGetLocationUniform(program_id, "screenSize"); + const center_loc = rl.gl.rlGetLocationUniform(program_id, "center"); + const respawn_radius_loc = rl.gl.rlGetLocationUniform(program_id, "respawnRadius"); + const entity_speed_loc = rl.gl.rlGetLocationUniform(program_id, "entitySpeed"); - std.debug.print("compute: shader loaded successfully (program_id={})\n", .{program_id}); + std.debug.print("compute: shader loaded (program_id={}, uniforms: count={}, frame={}, screen={}, center={}, radius={}, speed={})\n", .{ + program_id, + entity_count_loc, + frame_number_loc, + screen_size_loc, + center_loc, + respawn_radius_loc, + entity_speed_loc, + }); return .{ .program_id = program_id, .entity_count_loc = entity_count_loc, + .frame_number_loc = frame_number_loc, + .screen_size_loc = screen_size_loc, + .center_loc = center_loc, + .respawn_radius_loc = respawn_radius_loc, + .entity_speed_loc = entity_speed_loc, .glMemoryBarrier = glMemoryBarrier, }; } @@ -55,14 +76,32 @@ pub const ComputeShader = struct { rl.gl.rlUnloadShaderProgram(self.program_id); } - pub fn dispatch(self: *ComputeShader, ssbo_id: u32, entity_count: u32) void { + pub fn dispatch(self: *ComputeShader, ssbo_id: u32, entity_count: u32, frame_number: u32) void { if (entity_count == 0) return; + // constants from sandbox.zig + const screen_w: f32 = @floatFromInt(sandbox.SCREEN_WIDTH); + const screen_h: f32 = @floatFromInt(sandbox.SCREEN_HEIGHT); + const center_x: f32 = screen_w / 2.0; + const center_y: f32 = screen_h / 2.0; + const respawn_radius: f32 = 10.0; // RESPAWN_THRESHOLD + const entity_speed: f32 = 2.0; // ENTITY_SPEED + // bind compute shader rl.gl.rlEnableShader(self.program_id); - // set entityCount uniform + // set uniforms rl.gl.rlSetUniform(self.entity_count_loc, &entity_count, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1); + rl.gl.rlSetUniform(self.frame_number_loc, &frame_number, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1); + + const screen_size = [2]f32{ screen_w, screen_h }; + rl.gl.rlSetUniform(self.screen_size_loc, &screen_size, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_vec2), 1); + + const center = [2]f32{ center_x, center_y }; + rl.gl.rlSetUniform(self.center_loc, ¢er, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_vec2), 1); + + rl.gl.rlSetUniform(self.respawn_radius_loc, &respawn_radius, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_float), 1); + rl.gl.rlSetUniform(self.entity_speed_loc, &entity_speed, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_float), 1); // bind SSBO to binding point 0 rl.gl.rlBindShaderBuffer(ssbo_id, 0); diff --git a/src/sandbox_main.zig b/src/sandbox_main.zig index da6b008..cbb8e38 100644 --- a/src/sandbox_main.zig +++ b/src/sandbox_main.zig @@ -306,6 +306,7 @@ pub fn main() !void { var update_time_us: i64 = 0; var render_time_us: i64 = 0; var elapsed: f32 = 0; + var frame_number: u32 = 0; // auto-benchmark state var last_ramp_time: f32 = 0; @@ -360,14 +361,11 @@ pub fn main() !void { defer tracy_update.End(); const update_start = std.time.microTimestamp(); - if (compute_shader != null) { - // GPU compute update - positions updated on GPU - // still need CPU update for respawn logic until Step 3 - sandbox.update(&entities, &rng); - } else { - // CPU update path + if (compute_shader == null) { + // CPU update path (positions + respawn) sandbox.update(&entities, &rng); } + // GPU compute path handles update in render section before draw update_time_us = std.time.microTimestamp() - update_start; } @@ -383,13 +381,18 @@ pub fn main() !void { if (use_ssbo) { // dispatch compute shader before render (if enabled) if (compute_shader) |*cs| { - const tracy_compute = ztracy.ZoneN(@src(), "compute_dispatch"); - defer tracy_compute.End(); - cs.dispatch(ssbo_renderer.?.ssbo_id, @intCast(entities.count)); + if (!paused) { + const tracy_compute = ztracy.ZoneN(@src(), "compute_dispatch"); + defer tracy_compute.End(); + cs.dispatch(ssbo_renderer.?.ssbo_id, @intCast(entities.count), frame_number); + frame_number +%= 1; + } + // GPU compute mode - only upload new entities, positions updated on GPU + ssbo_renderer.?.renderComputeMode(&entities, zoom, pan); + } else { + // CPU mode - upload entity data to GPU + ssbo_renderer.?.render(&entities, zoom, pan); } - - // SSBO instanced rendering path (16 bytes per entity) - ssbo_renderer.?.render(&entities, zoom, pan); } else if (use_instancing) { // GPU instancing path (64 bytes per entity) const xforms = transforms.?; diff --git a/src/shaders/entity_update.comp b/src/shaders/entity_update.comp index a18ff72..f875559 100644 --- a/src/shaders/entity_update.comp +++ b/src/shaders/entity_update.comp @@ -14,6 +14,29 @@ layout(std430, binding = 0) buffer Entities { }; uniform uint entityCount; +uniform uint frameNumber; +uniform vec2 screenSize; +uniform vec2 center; +uniform float respawnRadius; +uniform float entitySpeed; + +// PCG-style GPU RNG - returns value in [0, 1) +uint pcg(inout uint state) { + state = state * 747796405u + 2891336453u; + uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +float randFloat(inout uint state) { + return float(pcg(state)) / 4294967296.0; +} + +// pack velocity into fixed-point 8.8 format +int packVelocity(float vx, float vy) { + int vx_fixed = int(clamp(vx * 256.0, -32768.0, 32767.0)); + int vy_fixed = int(clamp(vy * 256.0, -32768.0, 32767.0)); + return (vx_fixed << 16) | (vy_fixed & 0xFFFF); +} void main() { uint id = gl_GlobalInvocationID.x; @@ -29,5 +52,46 @@ void main() { e.x += vx; e.y += vy; + // check if reached center - respawn at edge + float dx = e.x - center.x; + float dy = e.y - center.y; + if (dx*dx + dy*dy < respawnRadius * respawnRadius) { + // init RNG with entity id and frame number + uint rng = id * 1103515245u + frameNumber * 12345u + 1u; + + // pick random edge: 0=top, 1=bottom, 2=left, 3=right + uint edge = pcg(rng) & 3u; + float t = randFloat(rng); + + // spawn on edge + if (edge == 0u) { // top + e.x = t * screenSize.x; + e.y = 0.0; + } else if (edge == 1u) { // bottom + e.x = t * screenSize.x; + e.y = screenSize.y; + } else if (edge == 2u) { // left + e.x = 0.0; + e.y = t * screenSize.y; + } else { // right + e.x = screenSize.x; + e.y = t * screenSize.y; + } + + // velocity toward center + dx = center.x - e.x; + dy = center.y - e.y; + float dist = sqrt(dx*dx + dy*dy); + vx = (dx / dist) * entitySpeed; + vy = (dy / dist) * entitySpeed; + e.packedVel = packVelocity(vx, vy); + + // new random color + uint r = pcg(rng) & 0xFFu; + uint g = pcg(rng) & 0xFFu; + uint b = pcg(rng) & 0xFFu; + e.color = (r << 16u) | (g << 8u) | b; + } + entities[id] = e; } diff --git a/src/ssbo_renderer.zig b/src/ssbo_renderer.zig index ee10337..3af1358 100644 --- a/src/ssbo_renderer.zig +++ b/src/ssbo_renderer.zig @@ -24,6 +24,7 @@ pub const SsboRenderer = struct { pan_loc: i32, circle_texture_id: u32, gpu_buffer: []sandbox.GpuEntity, + last_entity_count: usize, // track count to detect when entities are added const QUAD_SIZE: f32 = 16.0; @@ -125,6 +126,7 @@ pub const SsboRenderer = struct { .pan_loc = pan_loc, .circle_texture_id = circle_texture.id, .gpu_buffer = gpu_buffer, + .last_entity_count = 0, }; } @@ -137,16 +139,22 @@ pub const SsboRenderer = struct { } pub fn render(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void { + self.renderInternal(entities, zoom, pan, false); + } + + pub fn renderComputeMode(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void { if (entities.count == 0) return; // flush raylib's internal render batch before our custom GL calls rl.gl.rlDrawRenderBatchActive(); - // copy entity data to GPU buffer (position + packed velocity + color) - { - const zone = ztracy.ZoneN(@src(), "ssbo_copy"); + // upload NEW entities when count increases (entities added on CPU) + if (entities.count > self.last_entity_count) { + const zone = ztracy.ZoneN(@src(), "ssbo_upload_new"); defer zone.End(); - for (entities.items[0..entities.count], 0..) |entity, i| { + + // copy new entities to GPU buffer + for (entities.items[self.last_entity_count..entities.count], self.last_entity_count..) |entity, i| { self.gpu_buffer[i] = .{ .x = entity.x, .y = entity.y, @@ -154,16 +162,56 @@ pub const SsboRenderer = struct { .color = entity.color, }; } + + // upload only the new portion to SSBO + const offset: u32 = @intCast(self.last_entity_count * @sizeOf(sandbox.GpuEntity)); + const new_count = entities.count - self.last_entity_count; + const data_size: u32 = @intCast(new_count * @sizeOf(sandbox.GpuEntity)); + rl.gl.rlUpdateShaderBuffer(self.ssbo_id, &self.gpu_buffer[self.last_entity_count], data_size, offset); + + self.last_entity_count = entities.count; + } else if (entities.count < self.last_entity_count) { + // entities were removed, update count + self.last_entity_count = entities.count; } - // upload to SSBO - { - const zone = ztracy.ZoneN(@src(), "ssbo_upload"); - defer zone.End(); - const data_size: u32 = @intCast(entities.count * @sizeOf(sandbox.GpuEntity)); - rl.gl.rlUpdateShaderBuffer(self.ssbo_id, self.gpu_buffer.ptr, data_size, 0); + self.drawInstanced(entities.count, zoom, pan); + } + + fn renderInternal(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32), skip_upload: bool) void { + if (entities.count == 0) return; + + // flush raylib's internal render batch before our custom GL calls + rl.gl.rlDrawRenderBatchActive(); + + if (!skip_upload) { + // copy entity data to GPU buffer (position + packed velocity + color) + { + const zone = ztracy.ZoneN(@src(), "ssbo_copy"); + defer zone.End(); + for (entities.items[0..entities.count], 0..) |entity, i| { + self.gpu_buffer[i] = .{ + .x = entity.x, + .y = entity.y, + .packed_vel = sandbox.packVelocity(entity.vx, entity.vy), + .color = entity.color, + }; + } + } + + // upload to SSBO + { + const zone = ztracy.ZoneN(@src(), "ssbo_upload"); + defer zone.End(); + const data_size: u32 = @intCast(entities.count * @sizeOf(sandbox.GpuEntity)); + rl.gl.rlUpdateShaderBuffer(self.ssbo_id, self.gpu_buffer.ptr, data_size, 0); + } } + self.drawInstanced(entities.count, zoom, pan); + } + + fn drawInstanced(self: *SsboRenderer, entity_count: usize, zoom: f32, pan: @Vector(2, f32)) void { // bind shader rl.gl.rlEnableShader(self.shader_id); @@ -198,7 +246,7 @@ pub const SsboRenderer = struct { defer zone.End(); _ = rl.gl.rlEnableVertexArray(self.vao_id); rl.gl.rlEnableVertexBuffer(self.vbo_id); - rl.gl.rlDrawVertexArrayInstanced(0, 6, @intCast(entities.count)); + rl.gl.rlDrawVertexArrayInstanced(0, 6, @intCast(entity_count)); } // cleanup - restore raylib's expected state