Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 5 additions & 40 deletions crates/rustc_codegen_spirv/src/linker/duplicates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use rspirv::binary::Assemble;
use rspirv::dr::{Instruction, Module, Operand};
use rspirv::spirv::{Op, Word};
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_middle::bug;
use smallvec::SmallVec;
use std::collections::hash_map;
use std::mem;
Expand Down Expand Up @@ -104,24 +103,10 @@ fn gather_annotations(annotations: &[Instruction]) -> FxHashMap<Word, Vec<u32>>
.collect()
}

fn gather_names(debug_names: &[Instruction]) -> FxHashMap<Word, String> {
debug_names
.iter()
.filter(|inst| inst.class.opcode == Op::Name)
.map(|inst| {
(
inst.operands[0].unwrap_id_ref(),
inst.operands[1].unwrap_literal_string().to_owned(),
)
})
.collect()
}

fn make_dedupe_key(
inst: &Instruction,
unresolved_forward_pointers: &FxHashSet<Word>,
annotations: &FxHashMap<Word, Vec<u32>>,
names: &FxHashMap<Word, String>,
) -> Vec<u32> {
let mut data = vec![inst.class.opcode as u32];

Expand All @@ -144,29 +129,10 @@ fn make_dedupe_key(
op.assemble_into(&mut data);
}
}
if let Some(id) = inst.result_id {
if let Some(annos) = annotations.get(&id) {
data.extend_from_slice(annos);
}
if inst.class.opcode == Op::Variable {
// Names only matter for OpVariable.
if let Some(name) = names.get(&id) {
// Jump through some hoops to shove a String into a Vec<u32>.
//
// FIXME(eddyb) this should `.assemble_into(&mut data)` the
// `Operand::LiteralString(...)` from the original `Op::Name`.
for chunk in name.as_bytes().chunks(4) {
let slice = match *chunk {
[a] => [a, 0, 0, 0],
[a, b] => [a, b, 0, 0],
[a, b, c] => [a, b, c, 0],
[a, b, c, d] => [a, b, c, d],
_ => bug!(),
};
data.push(u32::from_le_bytes(slice));
}
}
}
if let Some(id) = inst.result_id
&& let Some(annos) = annotations.get(&id)
{
data.extend_from_slice(annos);
}

data
Expand Down Expand Up @@ -198,7 +164,6 @@ pub fn remove_duplicate_types(module: &mut Module) {

// Collect a map from type ID to an annotation "key blob" (to append to the type key)
let annotations = gather_annotations(&module.annotations);
let names = gather_names(&module.debug_names);

for inst in &mut module.types_global_values {
if inst.class.opcode == Op::TypeForwardPointer
Expand All @@ -222,7 +187,7 @@ pub fn remove_duplicate_types(module: &mut Module) {
// all_inst_iter_mut pass below. However, the code is a lil bit cleaner this way I guess.
rewrite_inst_with_rules(inst, &rewrite_rules);

let key = make_dedupe_key(inst, &unresolved_forward_pointers, &annotations, &names);
let key = make_dedupe_key(inst, &unresolved_forward_pointers, &annotations);

match key_to_result_id.entry(key) {
hash_map::Entry::Vacant(entry) => {
Expand Down
60 changes: 60 additions & 0 deletions crates/spirv-std/src/builtin.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//! Functionality to declare builtins, mostly proc macros
//!
//! # Making built-in functions for `spirv-std`
//!
//! Usually, built-ins are implemented as freestanding functions in `spirv-std`. We like to keep function declaration
//! outside the macro to make it easier for users to browse the source code.
//!
//! Example on how to declare an Input Built-in:
//! ```no_run
//! # use spirv_std_macros::gpu_only;
//! #
//! /// GLSL docs short description in #Name section. Remove the first "Contains " since we're using getters instead
//! /// of globals, capitalize and add a dot to the end.
//! ///
//! /// GLSL docs full #Description section.
//! ///
//! /// We're using GLSL documentation of this built-in, which is usually more descriptive than the SPIR-V or WGSL docs.
//! /// Change all references to link with rust-gpu intrinsics.
//! ///
//! /// Update the links of GLSL and WGSL to reference the correct page, keep SPIR-V as is. GLSL may link to the
//! /// [reference](https://registry.khronos.org/OpenGL-Refpages/gl4/) or to the
//! /// [glsl extensions github repo](https://github.com/KhronosGroup/GLSL/tree/main/extensions).
//! /// * GLSL: [`gl_MyBuiltIn`](https://registry.khronos.org/OpenGL-Refpages/gl4/html/gl_LocalInvocationID.xhtml)
//! /// * WGSL: [`my_built_in`](https://www.w3.org/TR/WGSL/#local-invocation-id-builtin-value)
//! /// * SPIRV: [`MyBuiltIn`](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_builtin)
//! #[doc(alias = "gl_MyBuiltIn")]
//! #[doc(alias = "MyBuiltIn")]
//! #[inline]
//! #[gpu_only]
//! pub fn my_built_in() -> u32 {
//! crate::load_builtin!(MyBuiltIn)
//! }
//! ```
//!
//! Reference links:
//! * [WGSL specification describing builtins](https://www.w3.org/TR/WGSL/#builtin-inputs-outputs)
//! * [SPIR-V specification for builtins](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_builtin)
//! * [GLSL reference](https://registry.khronos.org/OpenGL-Refpages/gl4/)
//! * [GLSL reference source code](https://github.com/KhronosGroup/OpenGL-Refpages/tree/main/gl4)
//! * [GLSL extensions](https://github.com/KhronosGroup/GLSL/tree/main/extensions)

/// Query SPIR-V (read-only global) built-in values
///
/// See [module level documentation] on how to use these.
#[macro_export]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I deliberately kept load_builtin! private as an implementation detail. As is, it's very unsafe. Also easy to make something that won't compile but will have an inscrutable error message, or won't validate.

I think this should remain an internal implementation detail.

macro_rules! load_builtin {
($name:ident $(: $ty:ty)?) => {
unsafe {
let mut result $(: $ty)? = Default::default();
::core::arch::asm! {
"%builtin = OpVariable typeof{result_ref} Input",
concat!("OpDecorate %builtin BuiltIn ", stringify!($name)),
"%result = OpLoad typeof*{result_ref} %builtin",
"OpStore {result_ref} %result",
result_ref = in(reg) &mut result,
}
result
}
};
}
93 changes: 93 additions & 0 deletions crates/spirv-std/src/compute.rs
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separating out all the builtins into modules in the root makes it a little harder to find them all. I think (especially for compute), someone using the subgroup builtins are also likely to use the compute builtins, so keeping these modules nested under crate::builtin makes sense to me.

In std (and other crates) I've seen people keep all the low level primitives / intrinsics in std::arch or similar. I think that pattern makes sense.

Any higher-level abstraction on top such as safe collections (alloc::Vec in std or whatever parallel collection for spirv here) could then have its own module in the root.

Copy link
Member

@Firestar99 Firestar99 Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This won't work for the graphics builtins. Here's a message I sent internally last week about this:

  • Easy case: Some built-ins can be "inherited", eg. local_invocation_id from compute shaders are used in mesh shaders and ray gen shaders, cause they fundamentally function like augmented compute shaders. Will likely just define it once for compute shaders and you can just reuse them.
  • medium case: gl_PrimitiveID
    • vertex, tessellation & fragment shaders: an input
    • geometry shaders:
      • gl_PrimitiveIDIn: an input
      • gl_PrimitiveID: an output
    • mesh shaders:
      • gl_MeshPerPrimitiveEXT[].gl_PrimitiveID unsized per-primitive output array.
  • Hard case: gl_Position
    • vertex shader: an output variable that is written to via gl_Position
    • tessellation control | evaluation shaders:
      • gl_in[gl_MaxPatchVertices].gl_Position: a sized arrayed input variable you can read to get the Nth vertex position
      • gl_Position: an output variable you can write
    • geometry shader:
      • an unsized arrayed input variable you can read to get the Nth vertex position via gl_in[].gl_Position
      • gl_Position: an output variable you can write

If you split them into one module per shader type (like in #540), you can define the builtin for each shader separately and exactly like is required.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I meant 1 module per shader type, so crate::builtins::compute::*, crate::builtins::vertex::*, etc. Just as you had in your first pull request and first commit.

As I mentioned elsewhere, but you may have missed, I think output builtin setters would be confusing, as it will be difficult to see in their code when and where they are set.

For compute shaders, I think you always have all the input builtins, whereas e.g. for vertex what is available depends more on what has been configured, is that correct? I also think it would be confusing to not be able to see all the inputs params in one place.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moving just the subgroup builtins from subgroup to compute would be confusing, as these builtins are also hidden behind subgroup capabilities. I wonder if #540 should move spirv_std::arch::subgroup to spirv_std::compute::subgroup instead of spirv_std::subgroup? But I feel like that's creating extra module depth without being any useful.

On output setters: I also think they need more design work, which is why I separated out the compute parts and we'll do graphics later. But have you looked at the current function declaration? Cause it's very much not a setter: fn decl_position_out() -> &'static mut Vec4. Calling it declares the position output variable, and returns a mutable reference to write the position to, thus making it unique. This design still has the issue that you can call it multiple times to declare multiple output positions, which is illegal per shader. And incredibly difficult to validate, since we're sorta forced to merge all variables of the same type, as one crate's function could be inlined in two other crates even though it's only one function.

So I'm currently thinking of having these functions marked as unsafe and hide them a little, so they're only used via the new proc macro (to be designed).

Another idea I just had: We may also be better off with a wrapper type like struct Position(&'static mut Vec4) that represents the OpVariable and is passed as an arg to the entry point, which would prevent duplication problems.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

crate::builtins::subgroup::* for the subgroup ones I think is fine if you want them separate from compute, e.g. if they are re-used for other shader types.

Vulkan 1.1 (released 2018) included the subgroup capabilities I believe, so I doubt they are rare enough to be packaged away separately. From a quick check on vulkan.gpuinfo.org reports (from the last 2 years, excluding CPU implementations), 0.4% of reports supported none of the subgroup features.

Output fns: whether it is a direct setter or returns an &mut, the key problem to me is it is not clear if and where the output value is set. If it's an entry point parameter, you can grep or just read what writes that parameter, and if the parameter is passed as an argument to other fns you must inspect those also. If it's written with an output fn, I think you would need to search the whole codebase for use of that output fn, then figure out which paths are actually called.

Fine in small examples, potentially very nasty in a framework. It's the difference between finding usages of a local variable and a global variable across your code base.

having these functions marked as unsafe and hide them a little, so they're only used via the new proc macro

Sounds fine if they're #[doc(hidden)].

wrapper type like struct Position(&'static mut Vec4) that represents the OpVariable and is passed as an arg to the entry point

Like my first PR? Sounds good to me for a typed unique output variable :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For setting a variable at most once, the output wrapper can be opaque and consumed when you set the output: fn set(self: Position, v: Vec4).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait crate::builtins::subgroup::* there are no modules in builtins, they're all in the crate root. So crate::compute contains the compute builtins, crate::subgroup contains all the subgroup intrinsics and the subgroup builtins, and similarly crate::fragment contains kill(), demote_to_helper_invocation() and will contain fragment builtins such as frag_coord() -> Vec4 in future PRs.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know you have that now, I was proposing low level stuff is grouped somewhere (as crate::arch is now).

But thinking again I prefer grouping by shader type and use case, as you describe.

As you probably know, ray tracing shaders also have subgroup builtins but with different semantics as the subgroup IDs can change during the invocation.

I think I would like the module docs for crate::compute and crate::raytrace to include "see also: crate::subgroup" or similar, if they are both using the same subgroup builtins there.

Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//! compute shader built-ins

use glam::UVec3;

/// The index of work item currently being operated on by a compute shader.
///
/// In the compute language, [`local_invocation_id`] is an input variable containing the n-dimensional index of the
/// local work invocation within the work group that the current shader is executing in. The possible values for this
/// variable range across the local work group size, i.e., `(0,0,0)` to
/// `(workgroup_size.x - 1, workgroup_size.y - 1, workgroup_size.z - 1)`.
///
/// * GLSL: [`gl_LocalInvocationID`](https://registry.khronos.org/OpenGL-Refpages/gl4/html/gl_LocalInvocationID.xhtml)
/// * WGSL: [`local_invocation_id`](https://www.w3.org/TR/WGSL/#local-invocation-id-builtin-value)
/// * SPIR-V: [`LocalInvocationId`](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_builtin)
#[doc(alias = "gl_LocalInvocationID")]
#[doc(alias = "LocalInvocationId")]
#[inline]
#[gpu_only]
pub fn local_invocation_id() -> UVec3 {
crate::load_builtin!(LocalInvocationId)
}

/// The local linear index of work item currently being operated on by a compute shader.
///
/// In the compute language, [`local_invocation_index`] is a derived input variable containing the 1-dimensional
/// linearized index of the work invocation within the work group that the current shader is executing on. The value of
/// [`local_invocation_index`] is equal to [`local_invocation_id`]`.z * workgroup_size.x * workgroup_size.y`
/// `+ `[`local_invocation_id`]`.y * workgroup_size.x + `[`local_invocation_id`]`.x`.
///
/// * GLSL: [`gl_LocalInvocationIndex`](https://registry.khronos.org/OpenGL-Refpages/gl4/html/gl_LocalInvocationIndex.xhtml)
/// * WGSL: [`local_invocation_index`](https://www.w3.org/TR/WGSL/#local-invocation-index-builtin-value)
/// * SPIR-V: [`LocalInvocationIndex`](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_builtin)
#[doc(alias = "gl_LocalInvocationIndex")]
#[doc(alias = "LocalInvocationIndex")]
#[inline]
#[gpu_only]
pub fn local_invocation_index() -> u32 {
crate::load_builtin!(LocalInvocationIndex)
}

/// The global index of work item currently being operated on by a compute shader.
///
/// In the compute language, [`global_invocation_id`] is a derived input variable containing the n-dimensional index of
/// the work invocation within the global work group that the current shader is executing on. The value of
/// [`global_invocation_id`] is equal to [`workgroup_id`]` * workgroup_size + `[`local_invocation_id`].
///
/// * GLSL: [`gl_GlobalInvocationID`](https://registry.khronos.org/OpenGL-Refpages/gl4/html/gl_GlobalInvocationID.xhtml)
/// * WGSL: [`global_invocation_id`](https://www.w3.org/TR/WGSL/#global-invocation-index-builtin-value)
/// * SPIR-V: [`GlobalInvocationId`](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_builtin)
#[doc(alias = "gl_GlobalInvocationID")]
#[doc(alias = "GlobalInvocationId")]
#[inline]
#[gpu_only]
pub fn global_invocation_id() -> UVec3 {
crate::load_builtin!(GlobalInvocationId)
}

// custom: do not mention `glDispatchCompute` directly, be more general across APIs
/// The number of workgroups that have been dispatched to a compute shader.
///
/// In the compute language, [`num_workgroups`] contains the total number of work groups that will execute the compute
/// shader. The components of [`num_workgroups`] are equal to the `x`, `y`, and `z` parameters passed to the dispatch
/// command.
///
/// * GLSL: [`gl_NumWorkGroups`](https://registry.khronos.org/OpenGL-Refpages/gl4/html/gl_NumWorkGroups.xhtml)
/// * WGSL: [`num_workgroups`](https://www.w3.org/TR/WGSL/#num-workgroups-builtin-value)
/// * SPIR-V: [`NumWorkgroups`](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_builtin)
#[doc(alias = "gl_NumWorkGroups")]
#[doc(alias = "NumWorkgroups")]
#[inline]
#[gpu_only]
pub fn num_workgroups() -> UVec3 {
crate::load_builtin!(NumWorkgroups)
}

// custom: do not mention `glDispatchCompute` directly, be more general across APIs
/// The index of the workgroup currently being operated on by a compute shader.
///
/// In the compute language, [`workgroup_id`] contains the 3-dimensional index of the global work group that the current
/// compute shader invocation is executing within. The possible values range across the parameters passed into the
/// dispatch command, i.e., from `(0, 0, 0)` to
/// `(`[`num_workgroups`]`.x - 1, `[`num_workgroups`]`.y - 1, `[`num_workgroups`]`.z - 1)`.
///
/// * GLSL: [`gl_WorkGroupID`](https://registry.khronos.org/OpenGL-Refpages/gl4/html/gl_WorkGroupID.xhtml)
/// * WGSL: [`workgroup_id`](https://www.w3.org/TR/WGSL/#workgroup-id-builtin-value)
/// * SPIR-V: [`WorkgroupId`](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_builtin)
#[doc(alias = "gl_WorkGroupID")]
#[doc(alias = "WorkgroupId")]
#[inline]
#[gpu_only]
pub fn workgroup_id() -> UVec3 {
crate::load_builtin!(WorkgroupId)
}
2 changes: 2 additions & 0 deletions crates/spirv-std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ pub use macros::{debug_printf, debug_printfln};
pub mod arch;
pub mod atomic;
pub mod barrier;
pub mod builtin;
pub mod byte_addressable_buffer;
pub mod compute;
pub mod debug_printf;
pub mod float;
pub mod fragment;
Expand Down
Loading
Loading