- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[HLSL][DXIL][SPIRV] Added WaveActiveBitOr HLSL intrinsic #165156
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \ | ||
| // RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \ | ||
| // RUN: FileCheck %s --check-prefixes=CHECK,CHECK-DXIL | ||
| // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \ | ||
| // RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ | ||
| // RUN: FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV | ||
|  | ||
| // Test basic lowering to runtime function call. | ||
|  | ||
| // CHECK-LABEL: test_uint | ||
| uint test_uint(uint expr) { | ||
| // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.or.i32([[TY]] %[[#]]) | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see  should look something like this for the first one // DXCHECK: %name = call  @llvm.[[ICF:dx]].<intrinsic_name>.(... successive checks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is still a separate DX and SPV check? Or do you mean: Because that wouldn't work as SPV needs  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. my recomendation is to look at how we did these tests git grep -n "\[\ICF:dx\]\]"
clang/test/CodeGenHLSL/builtins/dot.hlsl:23:// DXCHECK: %hlsl.dot = call i32 @llvm.[[ICF:dx]].sdot.v2i32(<2 x i32>
clang/test/CodeGenHLSL/builtins/isinf.hlsl:19:// DXCHECK: %hlsl.isinf = call i1 @llvm.[[ICF:dx]].isinf.f32(
clang/test/CodeGenHLSL/builtins/isnan.hlsl:19:// DXCHECK: %hlsl.isnan = call i1 @llvm.[[ICF:dx]].isnan.f32(
git grep -n "\[\ICF:spv\]\]"
clang/test/CodeGenHLSL/builtins/dot.hlsl:24:// SPVCHECK: %hlsl.dot = call i32 @llvm.[[ICF:spv]].sdot.v2i32(<2 x i32>
clang/test/CodeGenHLSL/builtins/isinf.hlsl:20:// SPVCHECK: %hlsl.isinf = call i1 @llvm.[[ICF:spv]].isinf.f32(
clang/test/CodeGenHLSL/builtins/isnan.hlsl:20:// SPVCHECK: %hlsl.isnan = call i1 @llvm.[[ICF:spv]].isnan.f32(
git grep -n "\[\[FN_TYP.*\]\]" 
clang/test/CodeGenHLSL/builtins/isinf.hlsl:17:// DXCHECK: define hidden [[FN_TYPE:]]noundef i1 @
clang/test/CodeGenHLSL/builtins/isinf.hlsl:18:// SPVCHECK: define hidden [[FN_TYPE:spir_func ]]noundef i1 @
clang/test/CodeGenHLSL/builtins/isinf.hlsl:24:// CHECK: define hidden [[FN_TYPE]]noundef i1 @
clang/test/CodeGenHLSL/builtins/isinf.hlsl:30:// CHECK: define hidden [[FN_TYPE]]noundef <2 x i1> @
clang/test/CodeGenHLSL/builtins/isinf.hlsl:36:// NATIVE_HALF: define hidden [[FN_TYPE]]noundef <3 x i1> @
clang/test/CodeGenHLSL/builtins/isinf.hlsl:42:// NATIVE_HALF: define hidden [[FN_TYPE]]noundef <4 x i1> @
clang/test/CodeGenHLSL/builtins/isinf.hlsl:49:// CHECK: define hidden [[FN_TYPE]]noundef <2 x i1> @
clang/test/CodeGenHLSL/builtins/isinf.hlsl:54:// CHECK: define hidden [[FN_TYPE]]noundef <3 x i1> @
clang/test/CodeGenHLSL/builtins/isinf.hlsl:59:// CHECK: define hidden [[FN_TYPE]]noundef <4 x i1> @
clang/test/CodeGenHLSL/builtins/isnan.hlsl:17:// DXCHECK: define hidden [[FN_TYPE:]]noundef i1 @
clang/test/CodeGenHLSL/builtins/isnan.hlsl:18:// SPVCHECK: define hidden [[FN_TYPE:spir_func ]]noundef i1 @
clang/test/CodeGenHLSL/builtins/isnan.hlsl:24:// CHECK: define hidden [[FN_TYPE]]noundef i1 @
clang/test/CodeGenHLSL/builtins/isnan.hlsl:30:// CHECK: define hidden [[FN_TYPE]]noundef <2 x i1> @
clang/test/CodeGenHLSL/builtins/isnan.hlsl:36:// NATIVE_HALF: define hidden [[FN_TYPE]]noundef <3 x i1> @
clang/test/CodeGenHLSL/builtins/isnan.hlsl:42:// NATIVE_HALF: define hidden [[FN_TYPE]]noundef <4 x i1> @
clang/test/CodeGenHLSL/builtins/isnan.hlsl:49:// CHECK: define hidden [[FN_TYPE]]noundef <2 x i1> @
clang/test/CodeGenHLSL/builtins/isnan.hlsl:54:// CHECK: define hidden [[FN_TYPE]]noundef <3 x i1> @
clang/test/CodeGenHLSL/builtins/isnan.hlsl:59:// CHECK: define hidden [[FN_TYPE]]noundef <4 x i1> @ | ||
| // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.or.i32([[TY]] %[[#]]) | ||
| // CHECK: ret [[TY]] %[[RET]] | ||
| return WaveActiveBitOr(expr); | ||
| } | ||
|  | ||
| // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.or.i32([[TY]]) #[[#attr:]] | ||
| // CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.or.i32([[TY]]) #[[#attr:]] | ||
|  | ||
| // CHECK-LABEL: test_uint64_t | ||
| uint64_t test_uint64_t(uint64_t expr) { | ||
| // CHECK-SPIRV: %[[RET:.*]] = call spir_func [[TY:.*]] @llvm.spv.wave.reduce.or.i64([[TY]] %[[#]]) | ||
| // CHECK-DXIL: %[[RET:.*]] = call [[TY:.*]] @llvm.dx.wave.reduce.or.i64([[TY]] %[[#]]) | ||
| // CHECK: ret [[TY]] %[[RET]] | ||
| return WaveActiveBitOr(expr); | ||
| } | ||
|  | ||
| // CHECK-DXIL: declare [[TY]] @llvm.dx.wave.reduce.or.i64([[TY]]) #[[#attr:]] | ||
| // CHECK-SPIRV: declare [[TY]] @llvm.spv.wave.reduce.or.i64([[TY]]) #[[#attr:]] | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify | ||
|  | ||
| uint test_too_few_arg() { | ||
| return __builtin_hlsl_wave_active_bit_or(); | ||
| // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} | ||
| } | ||
|  | ||
| uint2 test_too_many_arg(uint2 p0) { | ||
| return __builtin_hlsl_wave_active_bit_or(p0, p0); | ||
| // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} | ||
| } | ||
|  | ||
| bool test_expr_bool_type_check(bool p0) { | ||
| return __builtin_hlsl_wave_active_bit_or(p0); | ||
| // expected-error@-1 {{invalid operand of type 'bool'}} | ||
| } | ||
|  | ||
| float test_expr_float_type_check(float p0) { | ||
| return __builtin_hlsl_wave_active_bit_or(p0); | ||
| // expected-error@-1 {{invalid operand of type 'float'}} | ||
| } | ||
|  | ||
| bool2 test_expr_bool_vec_type_check(bool2 p0) { | ||
| return __builtin_hlsl_wave_active_bit_or(p0); | ||
| // expected-error@-1 {{invalid operand of type 'bool2' (aka 'vector<bool, 2>')}} | ||
| } | ||
|  | ||
| float2 test_expr_float_type_check(float2 p0) { | ||
| return __builtin_hlsl_wave_active_bit_or(p0); | ||
| // expected-error@-1 {{invalid operand of type 'float2' (aka 'vector<float, 2>')}} | ||
| } | ||
|  | ||
| struct S { float f; }; | ||
|  | ||
| S test_expr_struct_type_check(S p0) { | ||
| return __builtin_hlsl_wave_active_bit_or(p0); | ||
| // expected-error@-1 {{invalid operand of type 'S' where a scalar or vector is required}} | ||
| } | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -316,6 +316,10 @@ defvar WaveOpKind_Product = 1; | |
| defvar WaveOpKind_Min = 2; | ||
| defvar WaveOpKind_Max = 3; | ||
|  | ||
| defvar WaveBitOpKind_And = 0; | ||
| defvar WaveBitOpKind_Or = 1; | ||
| defvar WaveBitOpKind_Xor = 2; | ||
|  | ||
| defvar SignedOpKind_Signed = 0; | ||
| defvar SignedOpKind_Unsigned = 1; | ||
|  | ||
|  | @@ -1069,6 +1073,24 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> { | |
| let attributes = [Attributes<DXIL1_0, []>]; | ||
| } | ||
|  | ||
| def WaveActiveBit : DXILOp<120, waveActiveBit> { | ||
| let Doc = "returns the result of the operation across waves"; | ||
| let intrinsics = [ | ||
| IntrinSelect<int_dx_wave_reduce_or, | ||
| [ | ||
| IntrinArgIndex<0>, IntrinArgI8<WaveBitOpKind_Or>, | ||
| ]>, | ||
| 
      Comment on lines
    
      +1078
     to 
      +1082
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the formatting seems off There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its the same as DXILOp<119>, so what is wrong? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can run tablegen files through clang-format. There is no ci for this so suspect maybe thats why the  | ||
| ]; | ||
|  | ||
| let arguments = [OverloadTy, Int8Ty]; | ||
| let result = OverloadTy; | ||
| let overloads = [ | ||
| Overloads<DXIL1_0, [Int32Ty, Int64Ty]> | ||
| ]; | ||
| let stages = [Stages<DXIL1_0, [all_stages]>]; | ||
| let attributes = [Attributes<DXIL1_0, []>]; | ||
| } | ||
|  | ||
| def WaveAllBitCount : DXILOp<135, waveAllOp> { | ||
| let Doc = "returns the count of bits set to 1 across the wave"; | ||
| let intrinsics = [IntrinSelect<int_dx_wave_active_countbits>]; | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -54,6 +54,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( | |
| case Intrinsic::dx_saturate: | ||
| case Intrinsic::dx_splitdouble: | ||
| case Intrinsic::dx_wave_readlane: | ||
| case Intrinsic::dx_wave_reduce_or: | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you add a line here then you should be testing this intrinsic gets scalarized There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That happens in WaveActiveBitOr.ll? I am unsure what I would have to add to make a test 'scalar', as the result of the dx or spriv is always a scalar value as it is the same value across the wave. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. run these command and you should find examples  | ||
| case Intrinsic::dx_wave_reduce_max: | ||
| case Intrinsic::dx_wave_reduce_sum: | ||
| case Intrinsic::dx_wave_reduce_umax: | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -219,6 +219,9 @@ class SPIRVInstructionSelector : public InstructionSelector { | |
| bool selectDot4AddPackedExpansion(Register ResVReg, const SPIRVType *ResType, | ||
| MachineInstr &I) const; | ||
|  | ||
| bool selectWaveReduceOr(Register ResVReg, const SPIRVType *ResType, | ||
| MachineInstr &I) const; | ||
|  | ||
| bool selectWaveReduceMax(Register ResVReg, const SPIRVType *ResType, | ||
| MachineInstr &I, bool IsUnsigned) const; | ||
|  | ||
|  | @@ -2012,8 +2015,7 @@ bool SPIRVInstructionSelector::selectAnyOrAll(Register ResVReg, | |
| Register InputRegister = I.getOperand(2).getReg(); | ||
| SPIRVType *InputType = GR.getSPIRVTypeForVReg(InputRegister); | ||
|  | ||
| if (!InputType) | ||
| report_fatal_error("Input Type could not be determined."); | ||
| assert(InputType && "VReg has no type assigned"); | ||
|  | ||
| bool IsBoolTy = GR.isScalarOrVectorOfType(InputRegister, SPIRV::OpTypeBool); | ||
| bool IsVectorTy = InputType->getOpcode() == SPIRV::OpTypeVector; | ||
|  | @@ -2427,6 +2429,32 @@ bool SPIRVInstructionSelector::selectWaveActiveCountBits( | |
| return Result; | ||
| } | ||
|  | ||
| bool SPIRVInstructionSelector::selectWaveReduceOr( | ||
| Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { | ||
|  | ||
| assert(I.getNumOperands() == 3); | ||
| assert(I.getOperand(2).isReg()); | ||
| MachineBasicBlock &BB = *I.getParent(); | ||
| Register InputRegister = I.getOperand(2).getReg(); | ||
| SPIRVType *InputType = GR.getSPIRVTypeForVReg(InputRegister); | ||
|  | ||
| if (!InputType) | ||
| report_fatal_error("Input Type could not be determined."); | ||
|         
                  KungFuDonkey marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
|  | ||
| SPIRVType *IntTy = GR.getOrCreateSPIRVIntegerType(32, I, TII); | ||
|  | ||
| auto Opcode = SPIRV::OpGroupNonUniformBitwiseOr; | ||
|  | ||
| return BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode)) | ||
| .addDef(ResVReg) | ||
| .addUse(GR.getSPIRVTypeID(ResType)) | ||
| .addUse(GR.getOrCreateConstInt(SPIRV::Scope::Subgroup, I, IntTy, TII, | ||
| !STI.isShader())) | ||
| .addImm(SPIRV::GroupOperation::Reduce) | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see this is why you are adding reduce to the name. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, should I still rename wave_reduce_or? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah OpGroupBroadcast, OpGroupReduce, and OpGroupGather are spirv implementation details. I don’t think it makes sense to take the language of spirv and apply it broadly across all the llvm intrinsics needed by this feature. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm i am a little conflicted because it seems like previous implementers are already adding reduce into the intrinsic names. This might be a question for @Keenuts. | ||
| .addUse(I.getOperand(2).getReg()) | ||
| .constrainAllUses(TII, TRI, RBI); | ||
| } | ||
|  | ||
| bool SPIRVInstructionSelector::selectWaveReduceMax(Register ResVReg, | ||
| const SPIRVType *ResType, | ||
| MachineInstr &I, | ||
|  | @@ -3427,6 +3455,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, | |
| return selectWaveOpInst(ResVReg, ResType, I, SPIRV::OpGroupNonUniformAny); | ||
| case Intrinsic::spv_wave_is_first_lane: | ||
| return selectWaveOpInst(ResVReg, ResType, I, SPIRV::OpGroupNonUniformElect); | ||
| case Intrinsic::spv_wave_reduce_or: | ||
| return selectWaveReduceOr(ResVReg, ResType, I); | ||
| case Intrinsic::spv_wave_reduce_umax: | ||
| return selectWaveReduceMax(ResVReg, ResType, I, /*IsUnsigned*/ true); | ||
| case Intrinsic::spv_wave_reduce_max: | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| ; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s | ||
|  | ||
| define noundef i32 @wave_bitor_simple(i32 noundef %p1) { | ||
| entry: | ||
| ; CHECK: call i32 @dx.op.waveActiveBit.i32(i32 120, i32 %p1, i8 1){{$}} | ||
| %ret = call i32 @llvm.dx.wave.reduce.or.i32(i32 %p1) | ||
| ret i32 %ret | ||
| } | ||
|  | ||
| declare i32 @llvm.dx.wave.reduce.or.i32(i32) | ||
|  | ||
| define noundef i64 @wave_bitor_simple64(i64 noundef %p1) { | ||
| entry: | ||
| ; CHECK: call i64 @dx.op.waveActiveBit.i64(i32 120, i64 %p1, i8 1){{$}} | ||
| %ret = call i64 @llvm.dx.wave.reduce.or.i64(i64 %p1) | ||
| ret i64 %ret | ||
| } | ||
|  | ||
| declare i64 @llvm.dx.wave.reduce.or.i64(i64) | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s | ||
| ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %} | ||
|  | ||
| ; Test lowering to spir-v backend for various types and scalar/vector | ||
|  | ||
| ; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 | ||
| ; CHECK-DAG: %[[#uint64:]] = OpTypeInt 64 0 | ||
| ; CHECK-DAG: %[[#scope:]] = OpConstant %[[#uint]] 3 | ||
|  | ||
| ; CHECK-LABEL: Begin function test_uint | ||
| ; CHECK: %[[#iexpr:]] = OpFunctionParameter %[[#uint]] | ||
| define i32 @test_uint(i32 %iexpr) { | ||
| entry: | ||
| ; CHECK: %[[#iret:]] = OpGroupNonUniformBitwiseOr %[[#uint]] %[[#scope]] Reduce %[[#iexpr]] | ||
| %0 = call i32 @llvm.spv.wave.reduce.or.i32(i32 %iexpr) | ||
| ret i32 %0 | ||
| } | ||
|  | ||
| declare i32 @llvm.spv.wave.reduce.or.i32(i32) | ||
|  | ||
| ; CHECK-LABEL: Begin function test_uint64 | ||
| ; CHECK: %[[#iexpr64:]] = OpFunctionParameter %[[#uint64]] | ||
| define i64 @test_uint64(i64 %iexpr64) { | ||
| entry: | ||
| ; CHECK: %[[#iret:]] = OpGroupNonUniformBitwiseOr %[[#uint64]] %[[#scope]] Reduce %[[#iexpr64]] | ||
| %0 = call i64 @llvm.spv.wave.reduce.or.i64(i64 %iexpr64) | ||
| ret i64 %0 | ||
| } | ||
|  | ||
| declare i64 @llvm.spv.wave.reduce.or.i64(i64) | 
Uh oh!
There was an error while loading. Please reload this page.