Add GPU RNG to computer shader
This commit is contained in:
parent
62d010bdc0
commit
9e8226de32
4 changed files with 183 additions and 29 deletions
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const rl = @import("raylib");
|
const rl = @import("raylib");
|
||||||
|
const sandbox = @import("sandbox.zig");
|
||||||
|
|
||||||
const comp_source = @embedFile("shaders/entity_update.comp");
|
const comp_source = @embedFile("shaders/entity_update.comp");
|
||||||
|
|
||||||
|
|
@ -15,6 +16,11 @@ const GlMemoryBarrierFn = *const fn (barriers: u32) callconv(.c) void;
|
||||||
pub const ComputeShader = struct {
|
pub const ComputeShader = struct {
|
||||||
program_id: u32,
|
program_id: u32,
|
||||||
entity_count_loc: i32,
|
entity_count_loc: i32,
|
||||||
|
frame_number_loc: i32,
|
||||||
|
screen_size_loc: i32,
|
||||||
|
center_loc: i32,
|
||||||
|
respawn_radius_loc: i32,
|
||||||
|
entity_speed_loc: i32,
|
||||||
glMemoryBarrier: GlMemoryBarrierFn,
|
glMemoryBarrier: GlMemoryBarrierFn,
|
||||||
|
|
||||||
pub fn init() ?ComputeShader {
|
pub fn init() ?ComputeShader {
|
||||||
|
|
@ -38,15 +44,30 @@ pub const ComputeShader = struct {
|
||||||
|
|
||||||
// get uniform locations
|
// get uniform locations
|
||||||
const entity_count_loc = rl.gl.rlGetLocationUniform(program_id, "entityCount");
|
const entity_count_loc = rl.gl.rlGetLocationUniform(program_id, "entityCount");
|
||||||
if (entity_count_loc < 0) {
|
const frame_number_loc = rl.gl.rlGetLocationUniform(program_id, "frameNumber");
|
||||||
std.debug.print("compute: warning - entityCount uniform not found\n", .{});
|
const screen_size_loc = rl.gl.rlGetLocationUniform(program_id, "screenSize");
|
||||||
}
|
const center_loc = rl.gl.rlGetLocationUniform(program_id, "center");
|
||||||
|
const respawn_radius_loc = rl.gl.rlGetLocationUniform(program_id, "respawnRadius");
|
||||||
|
const entity_speed_loc = rl.gl.rlGetLocationUniform(program_id, "entitySpeed");
|
||||||
|
|
||||||
std.debug.print("compute: shader loaded successfully (program_id={})\n", .{program_id});
|
std.debug.print("compute: shader loaded (program_id={}, uniforms: count={}, frame={}, screen={}, center={}, radius={}, speed={})\n", .{
|
||||||
|
program_id,
|
||||||
|
entity_count_loc,
|
||||||
|
frame_number_loc,
|
||||||
|
screen_size_loc,
|
||||||
|
center_loc,
|
||||||
|
respawn_radius_loc,
|
||||||
|
entity_speed_loc,
|
||||||
|
});
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
.program_id = program_id,
|
.program_id = program_id,
|
||||||
.entity_count_loc = entity_count_loc,
|
.entity_count_loc = entity_count_loc,
|
||||||
|
.frame_number_loc = frame_number_loc,
|
||||||
|
.screen_size_loc = screen_size_loc,
|
||||||
|
.center_loc = center_loc,
|
||||||
|
.respawn_radius_loc = respawn_radius_loc,
|
||||||
|
.entity_speed_loc = entity_speed_loc,
|
||||||
.glMemoryBarrier = glMemoryBarrier,
|
.glMemoryBarrier = glMemoryBarrier,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -55,14 +76,32 @@ pub const ComputeShader = struct {
|
||||||
rl.gl.rlUnloadShaderProgram(self.program_id);
|
rl.gl.rlUnloadShaderProgram(self.program_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dispatch(self: *ComputeShader, ssbo_id: u32, entity_count: u32) void {
|
pub fn dispatch(self: *ComputeShader, ssbo_id: u32, entity_count: u32, frame_number: u32) void {
|
||||||
if (entity_count == 0) return;
|
if (entity_count == 0) return;
|
||||||
|
|
||||||
|
// constants from sandbox.zig
|
||||||
|
const screen_w: f32 = @floatFromInt(sandbox.SCREEN_WIDTH);
|
||||||
|
const screen_h: f32 = @floatFromInt(sandbox.SCREEN_HEIGHT);
|
||||||
|
const center_x: f32 = screen_w / 2.0;
|
||||||
|
const center_y: f32 = screen_h / 2.0;
|
||||||
|
const respawn_radius: f32 = 10.0; // RESPAWN_THRESHOLD
|
||||||
|
const entity_speed: f32 = 2.0; // ENTITY_SPEED
|
||||||
|
|
||||||
// bind compute shader
|
// bind compute shader
|
||||||
rl.gl.rlEnableShader(self.program_id);
|
rl.gl.rlEnableShader(self.program_id);
|
||||||
|
|
||||||
// set entityCount uniform
|
// set uniforms
|
||||||
rl.gl.rlSetUniform(self.entity_count_loc, &entity_count, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1);
|
rl.gl.rlSetUniform(self.entity_count_loc, &entity_count, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1);
|
||||||
|
rl.gl.rlSetUniform(self.frame_number_loc, &frame_number, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_uint), 1);
|
||||||
|
|
||||||
|
const screen_size = [2]f32{ screen_w, screen_h };
|
||||||
|
rl.gl.rlSetUniform(self.screen_size_loc, &screen_size, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_vec2), 1);
|
||||||
|
|
||||||
|
const center = [2]f32{ center_x, center_y };
|
||||||
|
rl.gl.rlSetUniform(self.center_loc, ¢er, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_vec2), 1);
|
||||||
|
|
||||||
|
rl.gl.rlSetUniform(self.respawn_radius_loc, &respawn_radius, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_float), 1);
|
||||||
|
rl.gl.rlSetUniform(self.entity_speed_loc, &entity_speed, @intFromEnum(rl.gl.rlShaderUniformDataType.rl_shader_uniform_float), 1);
|
||||||
|
|
||||||
// bind SSBO to binding point 0
|
// bind SSBO to binding point 0
|
||||||
rl.gl.rlBindShaderBuffer(ssbo_id, 0);
|
rl.gl.rlBindShaderBuffer(ssbo_id, 0);
|
||||||
|
|
|
||||||
|
|
@ -306,6 +306,7 @@ pub fn main() !void {
|
||||||
var update_time_us: i64 = 0;
|
var update_time_us: i64 = 0;
|
||||||
var render_time_us: i64 = 0;
|
var render_time_us: i64 = 0;
|
||||||
var elapsed: f32 = 0;
|
var elapsed: f32 = 0;
|
||||||
|
var frame_number: u32 = 0;
|
||||||
|
|
||||||
// auto-benchmark state
|
// auto-benchmark state
|
||||||
var last_ramp_time: f32 = 0;
|
var last_ramp_time: f32 = 0;
|
||||||
|
|
@ -360,14 +361,11 @@ pub fn main() !void {
|
||||||
defer tracy_update.End();
|
defer tracy_update.End();
|
||||||
const update_start = std.time.microTimestamp();
|
const update_start = std.time.microTimestamp();
|
||||||
|
|
||||||
if (compute_shader != null) {
|
if (compute_shader == null) {
|
||||||
// GPU compute update - positions updated on GPU
|
// CPU update path (positions + respawn)
|
||||||
// still need CPU update for respawn logic until Step 3
|
|
||||||
sandbox.update(&entities, &rng);
|
|
||||||
} else {
|
|
||||||
// CPU update path
|
|
||||||
sandbox.update(&entities, &rng);
|
sandbox.update(&entities, &rng);
|
||||||
}
|
}
|
||||||
|
// GPU compute path handles update in render section before draw
|
||||||
|
|
||||||
update_time_us = std.time.microTimestamp() - update_start;
|
update_time_us = std.time.microTimestamp() - update_start;
|
||||||
}
|
}
|
||||||
|
|
@ -383,13 +381,18 @@ pub fn main() !void {
|
||||||
if (use_ssbo) {
|
if (use_ssbo) {
|
||||||
// dispatch compute shader before render (if enabled)
|
// dispatch compute shader before render (if enabled)
|
||||||
if (compute_shader) |*cs| {
|
if (compute_shader) |*cs| {
|
||||||
const tracy_compute = ztracy.ZoneN(@src(), "compute_dispatch");
|
if (!paused) {
|
||||||
defer tracy_compute.End();
|
const tracy_compute = ztracy.ZoneN(@src(), "compute_dispatch");
|
||||||
cs.dispatch(ssbo_renderer.?.ssbo_id, @intCast(entities.count));
|
defer tracy_compute.End();
|
||||||
|
cs.dispatch(ssbo_renderer.?.ssbo_id, @intCast(entities.count), frame_number);
|
||||||
|
frame_number +%= 1;
|
||||||
|
}
|
||||||
|
// GPU compute mode - only upload new entities, positions updated on GPU
|
||||||
|
ssbo_renderer.?.renderComputeMode(&entities, zoom, pan);
|
||||||
|
} else {
|
||||||
|
// CPU mode - upload entity data to GPU
|
||||||
|
ssbo_renderer.?.render(&entities, zoom, pan);
|
||||||
}
|
}
|
||||||
|
|
||||||
// SSBO instanced rendering path (16 bytes per entity)
|
|
||||||
ssbo_renderer.?.render(&entities, zoom, pan);
|
|
||||||
} else if (use_instancing) {
|
} else if (use_instancing) {
|
||||||
// GPU instancing path (64 bytes per entity)
|
// GPU instancing path (64 bytes per entity)
|
||||||
const xforms = transforms.?;
|
const xforms = transforms.?;
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,29 @@ layout(std430, binding = 0) buffer Entities {
|
||||||
};
|
};
|
||||||
|
|
||||||
uniform uint entityCount;
|
uniform uint entityCount;
|
||||||
|
uniform uint frameNumber;
|
||||||
|
uniform vec2 screenSize;
|
||||||
|
uniform vec2 center;
|
||||||
|
uniform float respawnRadius;
|
||||||
|
uniform float entitySpeed;
|
||||||
|
|
||||||
|
// PCG-style GPU RNG - returns value in [0, 1)
|
||||||
|
uint pcg(inout uint state) {
|
||||||
|
state = state * 747796405u + 2891336453u;
|
||||||
|
uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
|
||||||
|
return (word >> 22u) ^ word;
|
||||||
|
}
|
||||||
|
|
||||||
|
float randFloat(inout uint state) {
|
||||||
|
return float(pcg(state)) / 4294967296.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// pack velocity into fixed-point 8.8 format
|
||||||
|
int packVelocity(float vx, float vy) {
|
||||||
|
int vx_fixed = int(clamp(vx * 256.0, -32768.0, 32767.0));
|
||||||
|
int vy_fixed = int(clamp(vy * 256.0, -32768.0, 32767.0));
|
||||||
|
return (vx_fixed << 16) | (vy_fixed & 0xFFFF);
|
||||||
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint id = gl_GlobalInvocationID.x;
|
uint id = gl_GlobalInvocationID.x;
|
||||||
|
|
@ -29,5 +52,46 @@ void main() {
|
||||||
e.x += vx;
|
e.x += vx;
|
||||||
e.y += vy;
|
e.y += vy;
|
||||||
|
|
||||||
|
// check if reached center - respawn at edge
|
||||||
|
float dx = e.x - center.x;
|
||||||
|
float dy = e.y - center.y;
|
||||||
|
if (dx*dx + dy*dy < respawnRadius * respawnRadius) {
|
||||||
|
// init RNG with entity id and frame number
|
||||||
|
uint rng = id * 1103515245u + frameNumber * 12345u + 1u;
|
||||||
|
|
||||||
|
// pick random edge: 0=top, 1=bottom, 2=left, 3=right
|
||||||
|
uint edge = pcg(rng) & 3u;
|
||||||
|
float t = randFloat(rng);
|
||||||
|
|
||||||
|
// spawn on edge
|
||||||
|
if (edge == 0u) { // top
|
||||||
|
e.x = t * screenSize.x;
|
||||||
|
e.y = 0.0;
|
||||||
|
} else if (edge == 1u) { // bottom
|
||||||
|
e.x = t * screenSize.x;
|
||||||
|
e.y = screenSize.y;
|
||||||
|
} else if (edge == 2u) { // left
|
||||||
|
e.x = 0.0;
|
||||||
|
e.y = t * screenSize.y;
|
||||||
|
} else { // right
|
||||||
|
e.x = screenSize.x;
|
||||||
|
e.y = t * screenSize.y;
|
||||||
|
}
|
||||||
|
|
||||||
|
// velocity toward center
|
||||||
|
dx = center.x - e.x;
|
||||||
|
dy = center.y - e.y;
|
||||||
|
float dist = sqrt(dx*dx + dy*dy);
|
||||||
|
vx = (dx / dist) * entitySpeed;
|
||||||
|
vy = (dy / dist) * entitySpeed;
|
||||||
|
e.packedVel = packVelocity(vx, vy);
|
||||||
|
|
||||||
|
// new random color
|
||||||
|
uint r = pcg(rng) & 0xFFu;
|
||||||
|
uint g = pcg(rng) & 0xFFu;
|
||||||
|
uint b = pcg(rng) & 0xFFu;
|
||||||
|
e.color = (r << 16u) | (g << 8u) | b;
|
||||||
|
}
|
||||||
|
|
||||||
entities[id] = e;
|
entities[id] = e;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ pub const SsboRenderer = struct {
|
||||||
pan_loc: i32,
|
pan_loc: i32,
|
||||||
circle_texture_id: u32,
|
circle_texture_id: u32,
|
||||||
gpu_buffer: []sandbox.GpuEntity,
|
gpu_buffer: []sandbox.GpuEntity,
|
||||||
|
last_entity_count: usize, // track count to detect when entities are added
|
||||||
|
|
||||||
const QUAD_SIZE: f32 = 16.0;
|
const QUAD_SIZE: f32 = 16.0;
|
||||||
|
|
||||||
|
|
@ -125,6 +126,7 @@ pub const SsboRenderer = struct {
|
||||||
.pan_loc = pan_loc,
|
.pan_loc = pan_loc,
|
||||||
.circle_texture_id = circle_texture.id,
|
.circle_texture_id = circle_texture.id,
|
||||||
.gpu_buffer = gpu_buffer,
|
.gpu_buffer = gpu_buffer,
|
||||||
|
.last_entity_count = 0,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -137,16 +139,22 @@ pub const SsboRenderer = struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn render(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void {
|
pub fn render(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void {
|
||||||
|
self.renderInternal(entities, zoom, pan, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn renderComputeMode(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32)) void {
|
||||||
if (entities.count == 0) return;
|
if (entities.count == 0) return;
|
||||||
|
|
||||||
// flush raylib's internal render batch before our custom GL calls
|
// flush raylib's internal render batch before our custom GL calls
|
||||||
rl.gl.rlDrawRenderBatchActive();
|
rl.gl.rlDrawRenderBatchActive();
|
||||||
|
|
||||||
// copy entity data to GPU buffer (position + packed velocity + color)
|
// upload NEW entities when count increases (entities added on CPU)
|
||||||
{
|
if (entities.count > self.last_entity_count) {
|
||||||
const zone = ztracy.ZoneN(@src(), "ssbo_copy");
|
const zone = ztracy.ZoneN(@src(), "ssbo_upload_new");
|
||||||
defer zone.End();
|
defer zone.End();
|
||||||
for (entities.items[0..entities.count], 0..) |entity, i| {
|
|
||||||
|
// copy new entities to GPU buffer
|
||||||
|
for (entities.items[self.last_entity_count..entities.count], self.last_entity_count..) |entity, i| {
|
||||||
self.gpu_buffer[i] = .{
|
self.gpu_buffer[i] = .{
|
||||||
.x = entity.x,
|
.x = entity.x,
|
||||||
.y = entity.y,
|
.y = entity.y,
|
||||||
|
|
@ -154,16 +162,56 @@ pub const SsboRenderer = struct {
|
||||||
.color = entity.color,
|
.color = entity.color,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// upload only the new portion to SSBO
|
||||||
|
const offset: u32 = @intCast(self.last_entity_count * @sizeOf(sandbox.GpuEntity));
|
||||||
|
const new_count = entities.count - self.last_entity_count;
|
||||||
|
const data_size: u32 = @intCast(new_count * @sizeOf(sandbox.GpuEntity));
|
||||||
|
rl.gl.rlUpdateShaderBuffer(self.ssbo_id, &self.gpu_buffer[self.last_entity_count], data_size, offset);
|
||||||
|
|
||||||
|
self.last_entity_count = entities.count;
|
||||||
|
} else if (entities.count < self.last_entity_count) {
|
||||||
|
// entities were removed, update count
|
||||||
|
self.last_entity_count = entities.count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// upload to SSBO
|
self.drawInstanced(entities.count, zoom, pan);
|
||||||
{
|
}
|
||||||
const zone = ztracy.ZoneN(@src(), "ssbo_upload");
|
|
||||||
defer zone.End();
|
fn renderInternal(self: *SsboRenderer, entities: *const sandbox.Entities, zoom: f32, pan: @Vector(2, f32), skip_upload: bool) void {
|
||||||
const data_size: u32 = @intCast(entities.count * @sizeOf(sandbox.GpuEntity));
|
if (entities.count == 0) return;
|
||||||
rl.gl.rlUpdateShaderBuffer(self.ssbo_id, self.gpu_buffer.ptr, data_size, 0);
|
|
||||||
|
// flush raylib's internal render batch before our custom GL calls
|
||||||
|
rl.gl.rlDrawRenderBatchActive();
|
||||||
|
|
||||||
|
if (!skip_upload) {
|
||||||
|
// copy entity data to GPU buffer (position + packed velocity + color)
|
||||||
|
{
|
||||||
|
const zone = ztracy.ZoneN(@src(), "ssbo_copy");
|
||||||
|
defer zone.End();
|
||||||
|
for (entities.items[0..entities.count], 0..) |entity, i| {
|
||||||
|
self.gpu_buffer[i] = .{
|
||||||
|
.x = entity.x,
|
||||||
|
.y = entity.y,
|
||||||
|
.packed_vel = sandbox.packVelocity(entity.vx, entity.vy),
|
||||||
|
.color = entity.color,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// upload to SSBO
|
||||||
|
{
|
||||||
|
const zone = ztracy.ZoneN(@src(), "ssbo_upload");
|
||||||
|
defer zone.End();
|
||||||
|
const data_size: u32 = @intCast(entities.count * @sizeOf(sandbox.GpuEntity));
|
||||||
|
rl.gl.rlUpdateShaderBuffer(self.ssbo_id, self.gpu_buffer.ptr, data_size, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.drawInstanced(entities.count, zoom, pan);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn drawInstanced(self: *SsboRenderer, entity_count: usize, zoom: f32, pan: @Vector(2, f32)) void {
|
||||||
// bind shader
|
// bind shader
|
||||||
rl.gl.rlEnableShader(self.shader_id);
|
rl.gl.rlEnableShader(self.shader_id);
|
||||||
|
|
||||||
|
|
@ -198,7 +246,7 @@ pub const SsboRenderer = struct {
|
||||||
defer zone.End();
|
defer zone.End();
|
||||||
_ = rl.gl.rlEnableVertexArray(self.vao_id);
|
_ = rl.gl.rlEnableVertexArray(self.vao_id);
|
||||||
rl.gl.rlEnableVertexBuffer(self.vbo_id);
|
rl.gl.rlEnableVertexBuffer(self.vbo_id);
|
||||||
rl.gl.rlDrawVertexArrayInstanced(0, 6, @intCast(entities.count));
|
rl.gl.rlDrawVertexArrayInstanced(0, 6, @intCast(entity_count));
|
||||||
}
|
}
|
||||||
|
|
||||||
// cleanup - restore raylib's expected state
|
// cleanup - restore raylib's expected state
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue