|
| 1 | +:extension_name: SPV_INTEL_float4 |
| 2 | + |
| 3 | +:hf4_capability_name: Float4E2M1INTEL |
| 4 | +:hf4_capability_token: 6212 |
| 5 | +:hf4_matrix_capability_name: Float4E2M1CooperativeMatrixINTEL |
| 6 | +:hf4_matrix_capability_token: 6213 |
| 7 | +:hf4_encoding: 6214 |
| 8 | + |
| 9 | +:khr_matrix_capability_name: CooperativeMatrixKHR |
| 10 | + |
| 11 | +:joint_matrix_url: https://https://github.com/intel/llvm/tree/sycl/sycl/doc/design/spirv-extensions/SPV_INTEL_joint_matrix.asciidoc |
| 12 | +:fp_conv_url: https://github.com/intel/llvm/tree/sycl/sycl/doc/design/spirv-extensions/SPV_INTEL_fp_conversions.asciidoc |
| 13 | +:coop_matrix_url: https://github.khronos.org/SPIRV-Registry/extensions/KHR/SPV_KHR_cooperative_matrix.html |
| 14 | +:bfloat16_url: https://github.khronos.org/SPIRV-Registry/extensions/KHR/SPV_KHR_bfloat16.html |
| 15 | +:fp8_url: https://github.khronos.org/SPIRV-Registry/extensions/EXT/SPV_EXT_float8.html |
| 16 | + |
| 17 | +{extension_name} |
| 18 | +================ |
| 19 | + |
| 20 | + |
| 21 | +== Name Strings |
| 22 | + |
| 23 | +{extension_name} |
| 24 | + |
| 25 | +== Contributors |
| 26 | + |
| 27 | +- Dmitry Sidorov, Intel + |
| 28 | +- Victor Mustya, Intel + |
| 29 | +- Ben Ashbaugh, Intel + |
| 30 | +- Dounia Khaldi, Intel + |
| 31 | +- Joe Garvey, Intel + |
| 32 | +- Greg Lueck, Intel + |
| 33 | +- Pawel Jurek, Intel + |
| 34 | + |
| 35 | +Notice |
| 36 | +------ |
| 37 | +
|
| 38 | +Copyright (c) 2025 Intel Corporation. All rights reserved. |
| 39 | +
|
| 40 | +Status |
| 41 | +------ |
| 42 | + |
| 43 | +* Working Draft |
| 44 | + |
| 45 | +This is a preview extension specification, intended to provide early access to |
| 46 | +a feature for review and community feedback. When the feature matures, this |
| 47 | +specification may be released as a formal extension. |
| 48 | + |
| 49 | +Because the interfaces defined by this specification are not final and are |
| 50 | +subject to change they are not intended to be used by shipping software |
| 51 | +products. If you are interested in using this feature in your software product, |
| 52 | +please let us know! |
| 53 | + |
| 54 | +== Version |
| 55 | + |
| 56 | +[width="40%",cols="25,25"] |
| 57 | +|======================================== |
| 58 | +| Last Modified Date | 2025-10-24 |
| 59 | +| Revision | 2 |
| 60 | +|======================================== |
| 61 | + |
| 62 | +== Dependencies |
| 63 | + |
| 64 | +This extension is written against the SPIR-V Specification, |
| 65 | +Version 1.6 Revision 4. |
| 66 | + |
| 67 | +This extension interacts with {coop_matrix_url}[*SPV_KHR_cooperative_matrix*] extension. |
| 68 | + |
| 69 | +This extension interacts with {joint_matrix_url}[*SPV_INTEL_joint_matrix*] extension. |
| 70 | + |
| 71 | +This extension interacts with {bfloat16_url}[*SPV_KHR_bfloat16*] extension. |
| 72 | + |
| 73 | +This extension interacts with {fp8_url}[*SPV_EXT_float8*] extension. |
| 74 | + |
| 75 | +This extension interacts with {fp_conv_url}[*SPV_INTEL_fp_conversions*] extension. |
| 76 | + |
| 77 | +This extension requires SPIR-V 1.0. |
| 78 | + |
| 79 | +Overview |
| 80 | +-------- |
| 81 | +
|
| 82 | +This extension extends the *OpTypeFloat* instruction to enable the definition of `FP4E2M1` |
| 83 | +floating-point format that has one sign bit, two exponent bits and one mantissa bits. |
| 84 | +
|
| 85 | +The `FP4E2M1` special values are defined by the table below. |
| 86 | +
|
| 87 | +[options="header"] |
| 88 | +[width="80%"] |
| 89 | +[cols="1,2"] |
| 90 | +|==== |
| 91 | +| ^| `FP4E2M1` |
| 92 | +| Exponent Bias | 1 |
| 93 | +| Max normal |
| 94 | +| S.11.1 = 6.0 (1.5 * 2^2^) |
| 95 | +
|
| 96 | +| Min normal |
| 97 | +| S.01.0 = 1.0 (1.0 * 2^0^) |
| 98 | +
|
| 99 | +| Max subnormal |
| 100 | +| S.00.1 = 0.5 (0.5 * 2^0^) |
| 101 | +
|
| 102 | +| Min subnormal |
| 103 | +| S.00.1 = 0.5 (0.5 * 2^0^) |
| 104 | +
|
| 105 | +| Infinity | N/A |
| 106 | +| NaN | N/A |
| 107 | +
|
| 108 | +|==== |
| 109 | +
|
| 110 | +== Modifications to the SPIR-V Specification, Version 1.6 |
| 111 | +
|
| 112 | +Binary Form |
| 113 | +~~~~~~~~~~~ |
| 114 | +
|
| 115 | +FP Encoding |
| 116 | +~~~~~~~~~~~ |
| 117 | +
|
| 118 | +Add a new enum: |
| 119 | +
|
| 120 | +-- |
| 121 | +[cols="^2,14,2,4",options="header",width = "100%"] |
| 122 | +|==== |
| 123 | +2+^.^| FP Encoding | Width(s) | Enabling Capabilities |
| 124 | +| {hf4_encoding} | *Float4E2M1INTEL* + |
| 125 | +The floating point type is encoded as a 4-bit float type. |
| 126 | +This is encoded with the following encoding parameters: + |
| 127 | +
|
| 128 | + - _bias_ is 1 |
| 129 | + + |
| 130 | + - _sign bit_ is 1 |
| 131 | + + |
| 132 | + - _w_ (exponent) is 2 |
| 133 | + + |
| 134 | + - _t_ (significand) is 1 |
| 135 | + + |
| 136 | + - _k_ (width) is 4 |
| 137 | +| 4 | *Float4E2M1INTEL* |
| 138 | +
|
| 139 | +|=== |
| 140 | +-- |
| 141 | +
|
| 142 | +=== Capabilities |
| 143 | +
|
| 144 | +Modify Section 3.31, Capability, adding rows to the Capability table: |
| 145 | +
|
| 146 | +-- |
| 147 | +[options="header"] |
| 148 | +|==== |
| 149 | +2+^| Capability ^| Implicitly Declares |
| 150 | +| {hf4_capability_token} | *{hf4_capability_name}* + |
| 151 | +Uses *Float4E2M1INTEL* floating-point encoding. + |
| 152 | +| |
| 153 | +| {hf4_matrix_capability_token} | *{hf4_matrix_capability_name}* | *{khr_matrix_capability_name}* |
| 154 | +|==== |
| 155 | +-- |
| 156 | +
|
| 157 | +=== Memory Layout |
| 158 | +
|
| 159 | +Add to Section 2.18.1. Memory Layout, FPE2M1 4 layout: |
| 160 | +
|
| 161 | +Scalar floating point variables with a `Width` of 4 can only be declared in the `Private` or `Function` storage classes. |
| 162 | +In other storage classes, they must be included in an `OpTypeVector` with an even `Component Count`, where the first component in every pair is in bits 0-3 of the corresponding byte, and the second component is in bits 4-7. |
| 163 | +
|
| 164 | +=== Instructions |
| 165 | +
|
| 166 | +==== 3.42.11. Conversion Instructions |
| 167 | +
|
| 168 | +* Add the following paragraphs to *OpFConvert*: |
| 169 | + + |
| 170 | +When converting to floating-point values with the *Float4E2M1INTEL* encoding, out-of-range |
| 171 | +values and infinity and are converted to largest representable finite value with a matching sign. |
| 172 | +Conversion from NaNs is implementation-defined. + |
| 173 | + + |
| 174 | +
|
| 175 | +==== 3.49.6. Type-Declaration Instructions |
| 176 | +
|
| 177 | +Add the following requirement to *OpTypeCooperativeMatrixKHR*: |
| 178 | +
|
| 179 | +If _Component Type_ has a *Float4E2M1INTEL* encoding then *{hf4_matrix_capability_name}* must be declared. |
| 180 | +
|
| 181 | +Validation Rules |
| 182 | +~~~~~~~~~~~~~~~~ |
| 183 | +
|
| 184 | +Add the following bullets to section 2.16.1, Universal Validation Rules: |
| 185 | +
|
| 186 | + * Variables with a type that is or includes a floating-point type with the *Float4E2M1INTEL* encoding must only be used with the following instructions: |
| 187 | + ** https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_miscellaneous_instructions[Miscellaneous Instructions] : |
| 188 | + *** OpUndef |
| 189 | + ** https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_constant_creation_instructions[Constant Creation Instructions] : |
| 190 | + *** OpConstant |
| 191 | + *** OpConstantNull |
| 192 | + *** OpConstantOp |
| 193 | + *** OpConstantComposite |
| 194 | + *** OpConstantCompositeContinuedINTEL |
| 195 | + *** OpCooperativeMatrixConstructCheckedINTEL |
| 196 | + *** OpSpecConstant |
| 197 | + *** OpSpecConstantOp |
| 198 | + *** OpSpecConstantComposite |
| 199 | + *** OpSpecConstantCompositeContinuedINTEL |
| 200 | + ** https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_arithmetic_instructions[Arithmetic Instructions] : |
| 201 | + *** OpCooperativeMatrixMulAddKHR |
| 202 | + *** OpCooperativeMatrixMulAddScaledINTEL |
| 203 | + ** https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_composite_instructions[Composite Instructions] : |
| 204 | + *** OpVectorExtractDynamic |
| 205 | + *** OpVectorInsertDynamic |
| 206 | + *** OpVectorShuffle |
| 207 | + *** OpCompositeConstruct |
| 208 | + *** OpCompositeExtract |
| 209 | + *** OpCompositeInsert |
| 210 | + *** OpCopyObject |
| 211 | + *** OpCopyLogical |
| 212 | + ** https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_memory_instructions[Memory Instructions] : |
| 213 | + *** OpPtrEqual |
| 214 | + *** OpPtrNotEqual |
| 215 | + *** OpPtrDiff |
| 216 | + *** OpCooperativeMatrixLoadKHR |
| 217 | + *** OpCooperativeMatrixStoreKHR |
| 218 | + *** OpCooperativeMatrixLoadCheckedINTEL |
| 219 | + *** OpCooperativeMatrixStoreCheckedINTEL |
| 220 | + ** https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_function_instructions[Function Instructions] : |
| 221 | + *** OpFunction |
| 222 | + *** OpFunctionParameter |
| 223 | + *** OpFunctionCall |
| 224 | + ** https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_conversion_instructions[Conversion Instructions] : |
| 225 | + *** OpConvertSToF |
| 226 | + *** OpFConvert |
| 227 | + *** OpConvertPtrToU |
| 228 | + *** OpConvertUToPtr |
| 229 | + *** OpPtrCastToGeneric |
| 230 | + *** OpGenericCastToPtr |
| 231 | + *** OpGenericCastToPtrExplicit |
| 232 | + *** OpBitcast |
| 233 | + *** OpClampConvertFToFINTEL |
| 234 | + *** OpBiasedRoundFToFINTEL |
| 235 | + *** OpClampBiasedRoundFToFINTEL |
| 236 | + *** OpBiasedRoundFToSINTEL |
| 237 | + ** https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_control_flow_instructions[Control-Flow Instructions] : |
| 238 | + *** OpReturnValue |
| 239 | + *** OpSelect |
| 240 | + *** OpPhi |
| 241 | + *** OpLifetimeStart |
| 242 | + *** OpLifetimeStop |
| 243 | +
|
| 244 | +=== Issues |
| 245 | +
|
| 246 | +- |
| 247 | +
|
| 248 | +Revision History |
| 249 | +---------------- |
| 250 | + |
| 251 | +[cols="5,15,15,70"] |
| 252 | +[grid="rows"] |
| 253 | +[options="header"] |
| 254 | +|======================================== |
| 255 | +|Rev|Date|Author|Changes |
| 256 | +|1|2024-06-15|Dmitry Sidorov|Initial revision |
| 257 | +|2|2025-10-24|Dmitry Sidorov|Prepare to publish |
| 258 | +|======================================== |
0 commit comments