Skip to content

Commit 9770405

Browse files
committed
Add timestamp query
1 parent 11e5d28 commit 9770405

3 files changed

Lines changed: 119 additions & 25 deletions

File tree

demos/cli/src/main.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -310,25 +310,33 @@ fn run3d_wgpu(
310310
.await
311311
.map_err(anyhow::Error::from)?;
312312
let out = adapter
313-
.request_device(&wgpu::DeviceDescriptor::default())
313+
.request_device(&wgpu::DeviceDescriptor {
314+
required_features: wgpu::Features::TIMESTAMP_QUERY,
315+
..wgpu::DeviceDescriptor::default()
316+
})
314317
.await?;
315318
Ok::<_, anyhow::Error>(out)
316319
})?;
317320

318-
let mut ctx = fidget::wgpu::render3d::Context::new(device, queue);
321+
let mut ctx = fidget::wgpu::render3d::Context::new(device, queue)?;
319322
let image_size = fidget::render::VoxelSize::from(settings.size);
320323
let cfg = fidget::wgpu::render3d::RenderConfig { world_to_model };
321324
let mut image = Default::default();
322325
let start = std::time::Instant::now();
323326
let buffers = ctx.buffers(image_size);
324327
let shape = ctx.shape(&shape)?;
328+
let mut compute_pass_time = std::time::Duration::ZERO;
325329
for _ in 0..settings.n {
326-
image = ctx.run(&shape, &buffers, cfg);
330+
ctx.submit(&shape, &buffers, &cfg);
331+
let img = ctx.map_image(&buffers);
332+
compute_pass_time += img.time();
333+
image = img.image();
327334
}
328335
info!(
329-
"Rendered {}x at {:?} ms/frame",
336+
"Rendered {}× at {:.2?} ms/frame ({:.2?} ms/compute pass)",
330337
settings.n,
331-
start.elapsed().as_micros() as f64 / 1000.0 / (settings.n as f64)
338+
start.elapsed().as_micros() as f64 / 1000.0 / (settings.n as f64),
339+
compute_pass_time.as_micros() as f64 / 1000.0 / (settings.n as f64)
332340
);
333341
Ok(image)
334342
}

fidget-wgpu/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//! Shader generation and WGPU-based image rendering
2+
#![warn(missing_docs)]
23
pub mod render3d;
34
pub(crate) mod util;
45

fidget-wgpu/src/render3d.rs

Lines changed: 105 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,7 +1261,16 @@ pub struct Buffers {
12611261
geom: wgpu::Buffer,
12621262

12631263
/// Result buffer that can be read back from the host
1264+
///
1265+
/// This is mostly image pixels (as [`GeometryPixel`] values), but also
1266+
/// contains two trailing `u64` values for timestamps.
12641267
image: wgpu::Buffer,
1268+
1269+
/// Query set for timestamps
1270+
timestamps: wgpu::QuerySet,
1271+
1272+
/// Buffer into which we resolve the query
1273+
ts_buf: wgpu::Buffer,
12651274
}
12661275

12671276
impl Buffers {
@@ -1304,14 +1313,27 @@ impl Buffers {
13041313
let image = new_buffer::<GeometryPixel>(
13051314
device,
13061315
"image",
1307-
image_pixels,
1316+
image_pixels + 1, // bonus 16 bytes for query
13081317
wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
13091318
);
13101319

1320+
let ts_buf = new_buffer::<u64>(
1321+
device,
1322+
"ts",
1323+
2,
1324+
wgpu::BufferUsages::QUERY_RESOLVE | wgpu::BufferUsages::COPY_SRC,
1325+
);
1326+
13111327
let tile64 = RootTileBuffers::new(device, render_size);
13121328
let tile16 = TileBuffers::new(device, render_size);
13131329
let tile4 = TileBuffers::new(device, render_size);
13141330

1331+
let timestamps = device.create_query_set(&wgpu::QuerySetDescriptor {
1332+
label: Some("timestamp query set"),
1333+
ty: wgpu::QueryType::Timestamp,
1334+
count: 2,
1335+
});
1336+
13151337
Self {
13161338
image_size,
13171339
tile_tapes,
@@ -1322,6 +1344,8 @@ impl Buffers {
13221344
heightmap,
13231345
geom,
13241346
image,
1347+
timestamps,
1348+
ts_buf,
13251349
}
13261350
}
13271351

@@ -1362,9 +1386,24 @@ impl Buffers {
13621386
}
13631387
}
13641388

1389+
/// Error returned when constructing a context
1390+
#[derive(Debug, thiserror::Error)]
1391+
pub enum ContextError {
1392+
/// Context must have `TIMESTAMP_QUERY` feature enabled
1393+
#[error("WebGPU context must have TIMESTAMP_QUERY feature enabled")]
1394+
RequiresTimestampQuery,
1395+
}
1396+
13651397
impl Context {
13661398
/// Build a new 3D rendering context, given a device and queue
1367-
pub fn new(device: wgpu::Device, queue: wgpu::Queue) -> Self {
1399+
pub fn new(
1400+
device: wgpu::Device,
1401+
queue: wgpu::Queue,
1402+
) -> Result<Self, ContextError> {
1403+
if !device.features().contains(wgpu::Features::TIMESTAMP_QUERY) {
1404+
return Err(ContextError::RequiresTimestampQuery);
1405+
}
1406+
13681407
// Create bind group layout and bind group
13691408
let common_bind_group_layout =
13701409
device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
@@ -1387,7 +1426,7 @@ impl Context {
13871426
let merge_ctx = MergeContext::new(&device, &common_bind_group_layout);
13881427
let clear_ctx = ClearContext::new();
13891428

1390-
Self {
1429+
Ok(Self {
13911430
device,
13921431
queue,
13931432
bind_group_layout: common_bind_group_layout,
@@ -1399,7 +1438,7 @@ impl Context {
13991438
backfill_ctx,
14001439
merge_ctx,
14011440
clear_ctx,
1402-
}
1441+
})
14031442
}
14041443

14051444
/// Builds a new [`Buffers`] object for the given render size
@@ -1432,7 +1471,7 @@ impl Context {
14321471
) -> GeometryBuffer {
14331472
self.submit(shape, buffers, &settings);
14341473
let image = self.map_image(buffers);
1435-
self.read_mapped_image(image)
1474+
image.image()
14361475
}
14371476

14381477
/// Renders the image, with a blocking wait to read pixel data from the GPU
@@ -1481,8 +1520,9 @@ impl Context {
14811520
};
14821521

14831522
{
1484-
// We load the `Config` and the initial tape; the rest of the tape
1485-
// buffer is uninitialized (filled in by the GPU)
1523+
// We load the `Config`; the rest of the tape is already populated
1524+
// in the buffer, and the remaining portion of the buffer is
1525+
// uninitialized (filled in by the GPU).
14861526
let config_len = std::mem::size_of_val(&config);
14871527
let mut writer = self
14881528
.queue
@@ -1506,7 +1546,11 @@ impl Context {
15061546
let mut compute_pass =
15071547
encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
15081548
label: None,
1509-
timestamp_writes: None,
1549+
timestamp_writes: Some(wgpu::ComputePassTimestampWrites {
1550+
query_set: &buffers.timestamps,
1551+
beginning_of_pass_write_index: Some(0),
1552+
end_of_pass_write_index: Some(1),
1553+
}),
15101554
});
15111555

15121556
// Build the common config buffer
@@ -1570,6 +1614,22 @@ impl Context {
15701614
}
15711615
drop(compute_pass);
15721616

1617+
// Resolve the raw GPU ticks into the resolve buffer, then copy them
1618+
// into the last 16 bytes of the image buffer
1619+
encoder.resolve_query_set(
1620+
&buffers.timestamps,
1621+
0..2,
1622+
&buffers.ts_buf,
1623+
0,
1624+
);
1625+
encoder.copy_buffer_to_buffer(
1626+
&buffers.ts_buf,
1627+
0,
1628+
&buffers.image,
1629+
buffers.geom.size(), // offset past the image data
1630+
buffers.ts_buf.size(),
1631+
);
1632+
15731633
// Copy from the STORAGE | COPY_SRC -> COPY_DST | MAP_READ buffer
15741634
encoder.copy_buffer_to_buffer(
15751635
&buffers.geom,
@@ -1593,7 +1653,11 @@ impl Context {
15931653
self.device
15941654
.poll(wgpu::PollType::wait_indefinitely())
15951655
.unwrap();
1596-
MappedImage { buffers, slice }
1656+
MappedImage {
1657+
buffers,
1658+
slice,
1659+
ns_per_tick: self.queue.get_timestamp_period(),
1660+
}
15971661
}
15981662

15991663
/// Asynchronously maps the image buffer
@@ -1609,17 +1673,6 @@ impl Context {
16091673
MappedImage { buffers, slice }
16101674
}
16111675

1612-
/// Reads a mapped image
1613-
pub fn read_mapped_image(&self, image: MappedImage) -> GeometryBuffer {
1614-
// Get the pixel-populated image
1615-
let result =
1616-
<[GeometryPixel]>::ref_from_bytes(&image.slice.get_mapped_range())
1617-
.unwrap()
1618-
.to_owned();
1619-
1620-
GeometryBuffer::build(result, image.buffers.image_size).unwrap()
1621-
}
1622-
16231676
/// Debug function to read a buffer to a `Vec<T>`
16241677
#[allow(unused)]
16251678
fn read_buffer<T: FromBytes + Immutable + Clone + Copy>(
@@ -1658,6 +1711,9 @@ impl Context {
16581711
pub struct MappedImage<'a> {
16591712
buffers: &'a Buffers,
16601713
slice: wgpu::BufferSlice<'a>,
1714+
1715+
/// Nanoseconds per tick, for resolving timestamps
1716+
ns_per_tick: f32,
16611717
}
16621718

16631719
impl Drop for MappedImage<'_> {
@@ -1666,6 +1722,35 @@ impl Drop for MappedImage<'_> {
16661722
}
16671723
}
16681724

1725+
impl MappedImage<'_> {
1726+
/// Returns the image's data
1727+
pub fn image(&self) -> GeometryBuffer {
1728+
// Get the pixel-populated image
1729+
let result = <[GeometryPixel]>::ref_from_bytes(
1730+
&self.slice.get_mapped_range()[..self.image_bytes()],
1731+
)
1732+
.unwrap()
1733+
.to_owned();
1734+
GeometryBuffer::build(result, self.buffers.image_size).unwrap()
1735+
}
1736+
1737+
/// Returns the time spent in the compute pass
1738+
pub fn time(&self) -> std::time::Duration {
1739+
let slice = self.slice.get_mapped_range();
1740+
let ts = <[u64]>::ref_from_bytes(&slice[self.image_bytes()..]).unwrap();
1741+
std::time::Duration::from_nanos(
1742+
(ts[1].saturating_sub(ts[0]) as f64 * self.ns_per_tick as f64)
1743+
as u64,
1744+
)
1745+
}
1746+
1747+
fn image_bytes(&self) -> usize {
1748+
(self.buffers.image_size.width() as usize)
1749+
* (self.buffers.image_size.height() as usize)
1750+
* std::mem::size_of::<GeometryPixel>()
1751+
}
1752+
}
1753+
16691754
struct BackfillContext {
16701755
bind_group_layout: wgpu::BindGroupLayout,
16711756

0 commit comments

Comments
 (0)