Add GPU RNG to computer shader
This commit is contained in:
parent
62d010bdc0
commit
9e8226de32
4 changed files with 183 additions and 29 deletions
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
const std = @import("std");
|
||||
const rl = @import("raylib");
|
||||
const sandbox = @import("sandbox.zig");
|
||||
|
||||
const comp_source = @embedFile("shaders/entity_update.comp");
|
||||
|
||||
|
|
@ -15,6 +16,11 @@ const GlMemoryBarrierFn = *const fn (barriers: u32) callconv(.c) void;
|
|||
pub const ComputeShader = struct {
|
||||
program_id: u32,
|
||||
entity_count_loc: i32,
|
||||
frame_number_loc: i32,
|
||||
screen_size_loc: i32,
|
||||
center_loc: i32,
|
||||
respawn_radius_loc: i32,
|
||||
entity_speed_loc: i32,
|
||||
glMemoryBarrier: GlMemoryBarrierFn,
|
||||
|
||||
pub fn init() ?ComputeShader {
|
||||
|
|
@ -38,15 +44,30 @@ pub const ComputeShader = struct {
|
|||
|
||||
// get uniform locations
|
||||
const entity_count_loc = rl.gl.rlGetLocationUniform(program_id, "entityCount");
|
||||
if (entity_count_loc < 0) {
|
||||
std.debug.print("compute: warning - entityCount uniform not found\n", .{});
|
||||
}
|
||||
const frame_number_loc = rl.gl.rlGetLocationUniform(program_id, "frameNumber");
|
||||
const screen_size_loc = rl.gl.rlGetLocationUniform(program_id, "screenSize");
|
||||
const center_loc = rl.gl.rlGetLocationUniform(program_id, "center");
|
||||
const respawn_radius_loc = rl.gl.rlGetLocationUniform(program_id, "respawnRadius");
|
||||
const entity_speed_loc = rl.gl.rlGetLocationUniform(program_id, "entitySpeed");
|
||||
|
||||
std.debug.print("compute: shader loaded successfully (program_id={})\n", .{program_id});
|
||||
std.debug.print("compute: shader loaded (program_id={}, uniforms: count={}, frame={}, screen={}, center={}, radius={}, speed={})\n", .{
|
||||
program_id,
|
||||
entity_count_loc,
|
||||
frame_number_loc,
|
||||
screen_size_loc,
|
||||
center_loc,
|
||||
respawn_radius_loc,
|
||||
entity_speed_loc,
|
||||
});
|
||||
|
||||
return .{
|
||||
.program_id = program_id,
|
||||
.entity_count_loc = entity_count_loc,
|
||||
.frame_number_loc = frame_number_loc,
|
||||
.screen_size_loc = screen_size_loc,
|
||||
.center_loc = center_loc,
|
||||
.respawn_radius_loc = respawn_radius_loc,
|
||||
.entity_speed_loc = entity_speed_loc,
|
||||
.glMemoryBarrier = glMemoryBarrier,
|
||||
};
|
||||
}
|
||||
|
|
@ -55,14 +76,32 @@ pub const ComputeShader = struct {
|
|||
rl.gl.rlUnloadShaderProgram(self.program_id);
|
||||
}
|
||||
|
||||
pub fn dispatch(self: *ComputeShader, ssbo_id: u32, entity_count: u32) void {
|
||||
pub fn dispatch(self: *ComputeShader, ssbo_id: u32, entity_count: u32, frame_number: u32) void {
|
||||
if (entity_count == 0) return;
|
||||
|
||||
// constants from sandbox.zig
|
||||
const screen_w: f32 = @floatFromInt(sandbox.SCREEN_WIDTH);
|
||||
const screen_h: f32 = @floatFromInt(sandbox.SCREEN_HEIGHT);
|
||||
const center_x: f32 = screen_w / 2.0;
|
||||
const center_y: f32 = screen_h / 2.0;
|
||||
const respawn_radius: f32 = 10.0; // RESPAWN_THRESHOLD
|
||||
const entity_speed: f32 = 2.0; // ENTITY_SPEED
|
||||
|
||||
// bind compute shader
|
||||
rl.gl.rlEnableShader(self.program_id);
|
||||
|
||||
// set entityCount uniform
|
||||
// set uniforms
|
||||
rl.gl.rlSetUniform(self.entity_count_loc, &entity_count, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1);
|
||||
rl.gl.rlSetUniform(self.frame_number_loc, &frame_number, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1);
|
||||
|
||||
const screen_size = [2]f32{ screen_w, screen_h };
|
||||
rl.gl.rlSetUniform(self.screen_size_loc, &screen_size, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_vec2), 1);
|
||||
|
||||
const center = [2]f32{ center_x, center_y };
|
||||
rl.gl.rlSetUniform(self.center_loc, ¢er, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_vec2), 1);
|
||||
|
||||
rl.gl.rlSetUniform(self.respawn_radius_loc, &respawn_radius, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_float), 1);
|
||||
rl.gl.rlSetUniform(self.entity_speed_loc, &entity_speed, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_float), 1);
|
||||
|
||||
// bind SSBO to binding point 0
|
||||
rl.gl.rlBindShaderBuffer(ssbo_id, 0);
|
||||
|
|
|
|||
|
|
@ -306,6 +306,7 @@ pub fn main() !void {
|
|||
var update_time_us: i64 = 0;
|
||||
var render_time_us: i64 = 0;
|
||||
var elapsed: f32 = 0;
|
||||
var frame_number: u32 = 0;
|
||||
|
||||
// auto-benchmark state
|
||||
var last_ramp_time: f32 = 0;
|
||||
|
|
@ -360,14 +361,11 @@ pub fn main() !void {
|
|||
defer tracy_update.End();
|
||||
const update_start = std.time.microTimestamp();
|
||||
|
||||
if (compute_shader != null) {
|
||||
// GPU compute update - positions updated on GPU
|
||||
// still need CPU update for respawn logic until Step 3
|
||||
sandbox.update(&entities, &rng);
|
||||
} else {
|
||||
// CPU update path
|
||||
if (compute_shader == null) {
|
||||
// CPU update path (positions + respawn)
|
||||
sandbox.update(&entities, &rng);
|
||||
}
|
||||
// GPU compute path handles update in render section before draw
|
||||
|
||||
update_time_us = std.time.microTimestamp() - update_start;
|
||||
}
|
||||
|
|
@ -383,13 +381,18 @@ pub fn main() !void {
|
|||
if (use_ssbo) {
|
||||
// dispatch compute shader before render (if enabled)
|
||||
if (compute_shader) |*cs| {
|
||||
if (!paused) {
|
||||
const tracy_compute = ztracy.ZoneN(@src(), "compute_dispatch");
|
||||
defer tracy_compute.End();
|
||||
cs.dispatch(ssbo_renderer.?.ssbo_id, @intCast(entities.count));
|
||||
cs.dispatch(ssbo_renderer.?.ssbo_id, @intCast(entities.count), frame_number);
|
||||
frame_number +%= 1;
|
||||
}
|
||||
|
||||
// SSBO instanced rendering path (16 bytes per entity)
|
||||
// GPU compute mode - only upload new entities, positions updated on GPU
|
||||
ssbo_renderer.?.renderComputeMode(&entities, zoom, pan);
|
||||
} else {
|
||||
// CPU mode - upload entity data to GPU
|
||||
ssbo_renderer.?.render(&entities, zoom, pan);
|
||||
}
|
||||
} else if (use_instancing) {
|
||||
// GPU instancing path (64 bytes per entity)
|
||||
const xforms = transforms.?;
|
||||
|
|
|
|||
|
|
@ -14,6 +14,29 @@ layout(std430, binding = 0) buffer Entities {
|
|||
};
|
||||
|
||||
uniform uint entityCount;
|
||||
uniform uint frameNumber;
|
||||
uniform vec2 screenSize;
|
||||
uniform vec2 center;
|
||||
uniform float respawnRadius;
|
||||
uniform float entitySpeed;
|
||||
|
||||
// PCG-style GPU RNG - returns value in [0, 1)
|
||||
uint pcg(inout uint state) {
|
||||
state = state * 747796405u + 2891336453u;
|
||||
uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
|
||||
return (word >> 22u) ^ word;
|
||||
}
|
||||
|
||||
float randFloat(inout uint state) {
|
||||
return float(pcg(state)) / 4294967296.0;
|
||||
}
|
||||
|
||||
// pack velocity into fixed-point 8.8 format
|
||||
int packVelocity(float vx, float vy) {
|
||||
int vx_fixed = int(clamp(vx * 256.0, -32768.0, 32767.0));
|
||||
int vy_fixed = int(clamp(vy * 256.0, -32768.0, 32767.0));
|
||||
return (vx_fixed << 16) | (vy_fixed & 0xFFFF);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint id = gl_GlobalInvocationID.x;
|
||||
|
|
@ -29,5 +52,46 @@ void main() {
|
|||
e.x += vx;
|
||||
e.y += vy;
|
||||
|
||||
// check if reached center - respawn at edge
|
||||
float dx = e.x - center.x;
|
||||
float dy = e.y - center.y;
|
||||
if (dx*dx + dy*dy < respawnRadius * respawnRadius) {
|
||||
// init RNG with entity id and frame number
|
||||
uint rng = id * 1103515245u + frameNumber * 12345u + 1u;
|
||||
|
||||
// pick random edge: 0=top, 1=bottom, 2=left, 3=right
|
||||
uint edge = pcg(rng) & 3u;
|
||||
float t = randFloat(rng);
|
||||
|
||||
// spawn on edge
|
||||
if (edge == 0u) { // top
|
||||
e.x = t * screenSize.x;
|
||||
e.y = 0.0;
|
||||
} else if (edge == 1u) { // bottom
|
||||
e.x = t * screenSize.x;
|
||||
e.y = screenSize.y;
|
||||
} else if (edge == 2u) { // left
|
||||
e.x = 0.0;
|
||||
e.y = t * screenSize.y;
|
||||
} else { // right
|
||||
e.x = screenSize.x;
|
||||
e.y = t * screenSize.y;
|
||||
}
|
||||
|
||||
// velocity toward center
|
||||
dx = center.x - e.x;
|
||||
dy = center.y - e.y;
|
||||
float dist = sqrt(dx*dx + dy*dy);
|
||||
vx = (dx / dist) * entitySpeed;
|
||||
vy = (dy / dist) * entitySpeed;
|
||||
e.packedVel = packVelocity(vx, vy);
|
||||
|
||||
// new random color
|
||||
uint r = pcg(rng) & 0xFFu;
|
||||
uint g = pcg(rng) & 0xFFu;
|
||||
uint b = pcg(rng) & 0xFFu;
|
||||
e.color = (r << 16u) | (g << 8u) | b;
|
||||
}
|
||||
|
||||
entities[id] = e;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ pub const SsboRenderer = struct {
|
|||
pan_loc: i32,
|
||||
circle_texture_id: u32,
|
||||
gpu_buffer: []sandbox.GpuEntity,
|
||||
last_entity_count: usize, // track count to detect when entities are added
|
||||
|
||||
const QUAD_SIZE: f32 = 16.0;
|
||||
|
||||
|
|
@ -125,6 +126,7 @@ pub const SsboRenderer = struct {
|
|||
.pan_loc = pan_loc,
|
||||
.circle_texture_id = circle_texture.id,
|
||||
.gpu_buffer = gpu_buffer,
|
||||
.last_entity_count = 0,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -137,11 +139,52 @@ pub const SsboRenderer = struct {
|
|||
}
|
||||
|
||||
pub fn render(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void {
|
||||
self.renderInternal(entities, zoom, pan, false);
|
||||
}
|
||||
|
||||
pub fn renderComputeMode(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void {
|
||||
if (entities.count == 0) return;
|
||||
|
||||
// flush raylib's internal render batch before our custom GL calls
|
||||
rl.gl.rlDrawRenderBatchActive();
|
||||
|
||||
// upload NEW entities when count increases (entities added on CPU)
|
||||
if (entities.count > self.last_entity_count) {
|
||||
const zone = ztracy.ZoneN(@src(), "ssbo_upload_new");
|
||||
defer zone.End();
|
||||
|
||||
// copy new entities to GPU buffer
|
||||
for (entities.items[self.last_entity_count..entities.count], self.last_entity_count..) |entity, i| {
|
||||
self.gpu_buffer[i] = .{
|
||||
.x = entity.x,
|
||||
.y = entity.y,
|
||||
.packed_vel = sandbox.packVelocity(entity.vx, entity.vy),
|
||||
.color = entity.color,
|
||||
};
|
||||
}
|
||||
|
||||
// upload only the new portion to SSBO
|
||||
const offset: u32 = @intCast(self.last_entity_count * @sizeOf(sandbox.GpuEntity));
|
||||
const new_count = entities.count - self.last_entity_count;
|
||||
const data_size: u32 = @intCast(new_count * @sizeOf(sandbox.GpuEntity));
|
||||
rl.gl.rlUpdateShaderBuffer(self.ssbo_id, &self.gpu_buffer[self.last_entity_count], data_size, offset);
|
||||
|
||||
self.last_entity_count = entities.count;
|
||||
} else if (entities.count < self.last_entity_count) {
|
||||
// entities were removed, update count
|
||||
self.last_entity_count = entities.count;
|
||||
}
|
||||
|
||||
self.drawInstanced(entities.count, zoom, pan);
|
||||
}
|
||||
|
||||
fn renderInternal(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32), skip_upload: bool) void {
|
||||
if (entities.count == 0) return;
|
||||
|
||||
// flush raylib's internal render batch before our custom GL calls
|
||||
rl.gl.rlDrawRenderBatchActive();
|
||||
|
||||
if (!skip_upload) {
|
||||
// copy entity data to GPU buffer (position + packed velocity + color)
|
||||
{
|
||||
const zone = ztracy.ZoneN(@src(), "ssbo_copy");
|
||||
|
|
@ -163,7 +206,12 @@ pub const SsboRenderer = struct {
|
|||
const data_size: u32 = @intCast(entities.count * @sizeOf(sandbox.GpuEntity));
|
||||
rl.gl.rlUpdateShaderBuffer(self.ssbo_id, self.gpu_buffer.ptr, data_size, 0);
|
||||
}
|
||||
}
|
||||
|
||||
self.drawInstanced(entities.count, zoom, pan);
|
||||
}
|
||||
|
||||
fn drawInstanced(self: *SsboRenderer, entity_count: usize, zoom: f32, pan: @Vector(2, f32)) void {
|
||||
// bind shader
|
||||
rl.gl.rlEnableShader(self.shader_id);
|
||||
|
||||
|
|
@ -198,7 +246,7 @@ pub const SsboRenderer = struct {
|
|||
defer zone.End();
|
||||
_ = rl.gl.rlEnableVertexArray(self.vao_id);
|
||||
rl.gl.rlEnableVertexBuffer(self.vbo_id);
|
||||
rl.gl.rlDrawVertexArrayInstanced(0, 6, @intCast(entities.count));
|
||||
rl.gl.rlDrawVertexArrayInstanced(0, 6, @intCast(entity_count));
|
||||
}
|
||||
|
||||
// cleanup - restore raylib's expected state
|
||||
|
|
|
|||
Loading…
Reference in a new issue