Skip to content

Commit 2b192c1

Browse files
yohjimaneclaude
andcommitted
Fix constant buffer parser to handle structs, uints, and UAV/SRV buffers
- Skip struct/object types before type validation - Add support for uint and void shader variable types - Only process D3D_CT_CBUFFER (not structured buffers as cbuffers) - Improve error messages with variable details Fixes compute shader compilation errors where RWStructuredBuffer was incorrectly parsed as a constant buffer, causing "size must be multiple of 16 bytes" errors. Structured buffers are UAVs/SRVs, not cbuffers. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent c47efb4 commit 2b192c1

12 files changed

Lines changed: 707 additions & 155 deletions
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// compute_test.cs - Simple compute shader test
2+
// Reads input buffer and writes to output buffer to validate compute pipeline
3+
//
4+
#define SM_5_0
5+
#include "common.h"
6+
7+
// Test structure
8+
struct TestData
9+
{
10+
uint value;
11+
float x;
12+
float y;
13+
float z;
14+
};
15+
16+
// Input buffer
17+
StructuredBuffer<TestData> g_input : register(t0);
18+
19+
// Output buffer
20+
RWStructuredBuffer<TestData> g_output : register(u0);
21+
22+
// Counter
23+
RWStructuredBuffer<uint> g_counter : register(u1);
24+
25+
// Constant buffer
26+
// Note: Must be multiple of 16 bytes for DX11
27+
cbuffer TestParams : register(b0)
28+
{
29+
uint g_element_count; // 4 bytes
30+
uint g_iteration_count; // 4 bytes - number of computation cycles to run
31+
float g_multiplier; // 4 bytes
32+
float g_padding; // 4 bytes (total: 16 bytes)
33+
};
34+
35+
[numthreads(256, 1, 1)]
36+
void main(uint3 dispatch_thread_id : SV_DispatchThreadID)
37+
{
38+
uint idx = dispatch_thread_id.x;
39+
40+
if (idx >= g_element_count)
41+
return;
42+
43+
// Force padding to be included in the constant buffer (compiler won't optimize it away)
44+
// This is a common trick to ensure proper constant buffer alignment
45+
if (g_padding < -1e30) return; // Never true, but compiler doesn't know that
46+
47+
// Read input
48+
TestData input_data = g_input[idx];
49+
50+
// Process: Do some actual GPU work
51+
TestData output_data;
52+
output_data.value = input_data.value * 2 + idx;
53+
54+
// Simulate complex computation - N iterations of heavy math
55+
float3 vec = float3(input_data.x, input_data.y, input_data.z);
56+
57+
// Run g_iteration_count cycles of complex math operations
58+
// Note: Can't unroll dynamic loop count, but that's fine - we want to test actual iteration overhead
59+
for (uint i = 0; i < g_iteration_count; ++i)
60+
{
61+
// Complex vector operations
62+
vec = normalize(vec + float3(0.01, 0.02, 0.03));
63+
vec *= g_multiplier;
64+
vec = abs(sin(vec * 3.14159265f));
65+
vec = sqrt(vec + 0.001f);
66+
67+
// Mix in some data dependency
68+
float len = length(vec);
69+
vec = vec / max(len, 0.001f);
70+
vec = vec * vec; // square each component
71+
vec = vec + float3(input_data.x, input_data.y, input_data.z) * 0.001f;
72+
}
73+
74+
output_data.x = vec.x;
75+
output_data.y = vec.y;
76+
output_data.z = vec.z;
77+
78+
// Write output
79+
g_output[idx] = output_data;
80+
81+
// Increment counter atomically (once per element, regardless of iteration count)
82+
InterlockedAdd(g_counter[0], 1);
83+
}
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
// detail_cull.cs - GPU-driven frustum culling for detail objects
2+
// Processes all detail instances and outputs visible indices for indirect drawing
3+
//
4+
#define SM_5_0
5+
#include "common.h"
6+
7+
// ===========================
8+
// Structures (must match C++)
9+
// ===========================
10+
11+
struct DetailInstanceGPU
12+
{
13+
// Transform data (48 bytes)
14+
float3 position;
15+
float scale;
16+
float rotation_y;
17+
float3 padding0;
18+
19+
// Rendering data (32 bytes)
20+
float c_hemi;
21+
float c_sun;
22+
uint object_id;
23+
uint vis_id;
24+
float3 color_rgb;
25+
float padding1;
26+
27+
// Bounding data (32 bytes)
28+
float3 bounds_min;
29+
float bounds_radius;
30+
float3 bounds_max;
31+
float padding2;
32+
33+
// Metadata (16 bytes)
34+
uint slot_x;
35+
uint slot_z;
36+
uint flags;
37+
float fade_distance_sqr;
38+
};
39+
40+
struct FrustumPlane
41+
{
42+
float4 plane; // xyz = normal, w = distance
43+
};
44+
45+
struct IndirectDrawArgs
46+
{
47+
uint index_count;
48+
uint instance_count;
49+
uint start_index;
50+
int base_vertex;
51+
uint start_instance;
52+
};
53+
54+
// ===========================
55+
// Input Buffers
56+
// ===========================
57+
58+
cbuffer CullParams : register(b0)
59+
{
60+
float3 g_camera_pos;
61+
float g_fade_limit_sqr;
62+
63+
float3 g_camera_dir;
64+
float g_fade_start_sqr;
65+
66+
float g_r_ssa_discard;
67+
float g_r_ssa_cheap;
68+
uint g_instance_count;
69+
uint g_frame_number;
70+
71+
float4 g_frustum_planes[6]; // Left, Right, Top, Bottom, Near, Far
72+
};
73+
74+
// All instances (input, read-only)
75+
StructuredBuffer<DetailInstanceGPU> g_instances : register(t0);
76+
77+
// ===========================
78+
// Output Buffers
79+
// ===========================
80+
81+
// Visible instance indices per vis_id (output, append)
82+
RWStructuredBuffer<uint> g_visible_still : register(u0); // vis_id = 0
83+
RWStructuredBuffer<uint> g_visible_wave1 : register(u1); // vis_id = 1
84+
RWStructuredBuffer<uint> g_visible_wave2 : register(u2); // vis_id = 2
85+
86+
// Atomic counters for each vis_id
87+
RWStructuredBuffer<uint> g_counters : register(u3); // [0]=still, [1]=wave1, [2]=wave2
88+
89+
// Indirect draw arguments per vis_id
90+
RWStructuredBuffer<IndirectDrawArgs> g_indirect_args : register(u4);
91+
92+
// ===========================
93+
// Culling Functions
94+
// ===========================
95+
96+
// Test if a sphere intersects or is inside a frustum plane
97+
bool SphereInsidePlane(float3 center, float radius, float4 plane)
98+
{
99+
float distance = dot(plane.xyz, center) + plane.w;
100+
return distance > -radius;
101+
}
102+
103+
// Frustum culling: test sphere against all 6 planes
104+
bool FrustumCullSphere(float3 center, float radius)
105+
{
106+
[unroll]
107+
for (uint i = 0; i < 6; ++i)
108+
{
109+
if (!SphereInsidePlane(center, radius, g_frustum_planes[i]))
110+
return false; // Outside frustum
111+
}
112+
return true; // Inside or intersecting
113+
}
114+
115+
// Compute Screen Space Area (SSA) for LOD selection
116+
float ComputeSSA(float3 world_pos, float radius, float scale)
117+
{
118+
float dist_sqr = dot(world_pos - g_camera_pos, world_pos - g_camera_pos);
119+
if (dist_sqr < 0.001f)
120+
return 1e6f; // Very close, assume visible
121+
122+
// SSA = (scale * radius)^2 / distance^2
123+
float scaled_radius = scale * radius;
124+
return (scaled_radius * scaled_radius) / dist_sqr;
125+
}
126+
127+
// ===========================
128+
// Compute Shader Entry Point
129+
// ===========================
130+
131+
[numthreads(256, 1, 1)]
132+
void main(uint3 dispatch_thread_id : SV_DispatchThreadID)
133+
{
134+
uint instance_idx = dispatch_thread_id.x;
135+
136+
// Early exit if beyond instance count
137+
if (instance_idx >= g_instance_count)
138+
return;
139+
140+
// Load instance data
141+
DetailInstanceGPU inst = g_instances[instance_idx];
142+
143+
// =========================
144+
// Distance Culling
145+
// =========================
146+
float3 to_camera = g_camera_pos - inst.position;
147+
float dist_sqr = dot(to_camera, to_camera);
148+
149+
// Fade limit culling
150+
if (dist_sqr > g_fade_limit_sqr)
151+
return; // Too far, cull
152+
153+
// =========================
154+
// Frustum Culling
155+
// =========================
156+
// Use bounding sphere for conservative culling
157+
float world_radius = inst.bounds_radius * inst.scale;
158+
if (!FrustumCullSphere(inst.position, world_radius))
159+
return; // Outside frustum, cull
160+
161+
// =========================
162+
// SSA (Screen Space Area) Culling
163+
// =========================
164+
float ssa = ComputeSSA(inst.position, inst.bounds_radius, inst.scale);
165+
if (ssa < g_r_ssa_discard)
166+
return; // Too small, cull
167+
168+
// =========================
169+
// Fade Factor (for future use)
170+
// =========================
171+
float fade_alpha = 1.0f;
172+
if (dist_sqr > g_fade_start_sqr)
173+
{
174+
float fade_range = g_fade_limit_sqr - g_fade_start_sqr;
175+
fade_alpha = 1.0f - ((dist_sqr - g_fade_start_sqr) / fade_range);
176+
}
177+
178+
// =========================
179+
// Append to Visible List
180+
// =========================
181+
uint output_idx = 0;
182+
183+
// Determine which output buffer based on vis_id
184+
if (inst.vis_id == 0)
185+
{
186+
// Still (no animation)
187+
InterlockedAdd(g_counters[0], 1, output_idx);
188+
g_visible_still[output_idx] = instance_idx;
189+
}
190+
else if (inst.vis_id == 1)
191+
{
192+
// Wave 1
193+
InterlockedAdd(g_counters[1], 1, output_idx);
194+
g_visible_wave1[output_idx] = instance_idx;
195+
}
196+
else // inst.vis_id == 2
197+
{
198+
// Wave 2
199+
InterlockedAdd(g_counters[2], 1, output_idx);
200+
g_visible_wave2[output_idx] = instance_idx;
201+
}
202+
}

res/gamedata/shaders/r5/compute_test.cs

Lines changed: 0 additions & 57 deletions
This file was deleted.

0 commit comments

Comments
 (0)