From 759ee4c2e159dc4359aab0fb1f1e243b8c174894 Mon Sep 17 00:00:00 2001 From: Michael Platzer Date: Thu, 23 May 2024 11:51:02 +0200 Subject: [PATCH 1/8] =?UTF-8?q?=F0=9F=A9=B9=20Fix=20result=20of=20not=20eq?= =?UTF-8?q?ual=20compare=20w=20signaling=20NaNs=20(#116)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🩹 Set result bit of not equal compare on signaling NaN * 💡 Update comment w.r.t. signaling NaNs in compares --- src/fpnew_noncomp.sv | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index 8a182617..370e80e9 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -257,10 +257,12 @@ module fpnew_noncomp #( cmp_result = '0; // false cmp_status = '0; // no flags - // Signalling NaNs always compare as false and are illegal - if (signalling_nan) cmp_status.NV = 1'b1; // invalid operation + // Signalling NaNs always compare as false (except for "not equal" compares) and are illegal + if (signalling_nan) begin + cmp_status.NV = 1'b1; // invalid operation + cmp_result = inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN && inp_pipe_op_mod_q[NUM_INP_REGS]; // Otherwise do comparisons - else begin + end else begin unique case (inp_pipe_rnd_mode_q[NUM_INP_REGS]) fpnew_pkg::RNE: begin // Less than or equal if (any_operand_nan) cmp_status.NV = 1'b1; // Signalling comparison: NaNs are invalid From 3bbe483f4d72f9693894304b3db13620e68c6b37 Mon Sep 17 00:00:00 2001 From: Luca Bertaccini <55843305+lucabertaccini@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:36:16 +0200 Subject: [PATCH 2/8] Add new multi-format DivSqrt unit from openC910 (FP64, FP32, FP16 + SIMD) (#8) * Add new multi-format DivSqrt unit from openC910 supporting FP64, FP32, FP16, and SIMD operations --- Bender.yml | 13 + README.license.md | 2 +- README.md | 3 +- docs/CHANGELOG-PULP.md | 6 + docs/README.md | 14 +- src/fpnew_divsqrt_th_64_multi.sv | 482 +++++++ src/fpnew_opgroup_block.sv | 4 +- src/fpnew_opgroup_multifmt_slice.sv | 77 +- src/fpnew_pkg.sv | 16 + src/fpnew_top.sv | 6 +- src_files.yml | 14 + vendor/openc910.lock.hjson | 14 + vendor/openc910.vendor.hjson | 47 + .../gen_rtl/clk/rtl/gated_clk_cell.v | 49 + .../gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v | 520 ++++++++ .../gen_rtl/vfdsu/rtl/ct_vfdsu_double.v | 370 ++++++ .../gen_rtl/vfdsu/rtl/ct_vfdsu_ff1.v | 99 ++ .../gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v | 417 ++++++ .../gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v | 773 +++++++++++ .../gen_rtl/vfdsu/rtl/ct_vfdsu_round.v | 1041 +++++++++++++++ .../gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v | 323 +++++ .../gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v | 691 ++++++++++ .../rtl/ct_vfdsu_srt_radix16_bound_table.v | 1168 +++++++++++++++++ .../rtl/ct_vfdsu_srt_radix16_with_sqrt.v | 1152 ++++++++++++++++ .../gen_rtl/vfdsu/rtl/ct_vfdsu_top.v | 331 +++++ vendor/openc910/LICENSE | 201 +++ vendor/openc910/README.md | 74 ++ 27 files changed, 7880 insertions(+), 27 deletions(-) create mode 100644 src/fpnew_divsqrt_th_64_multi.sv create mode 100644 vendor/openc910.lock.hjson create mode 100644 vendor/openc910.vendor.hjson create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ff1.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_bound_table.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v create mode 100644 vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v create mode 100644 vendor/openc910/LICENSE create mode 100644 vendor/openc910/README.md diff --git a/Bender.yml b/Bender.yml index fff51ec3..b635aa07 100644 --- a/Bender.yml +++ b/Bender.yml @@ -25,7 +25,20 @@ sources: - vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_dp.v - vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_frbus.v - vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_src_type.v +# - vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v # same as the one from E906 + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ff1.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_bound_table.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v + - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v - src/fpnew_divsqrt_th_32.sv + - src/fpnew_divsqrt_th_64_multi.sv - src/fpnew_divsqrt_multi.sv - src/fpnew_fma.sv - src/fpnew_fma_multi.sv diff --git a/README.license.md b/README.license.md index ebbb64d3..15c7b69b 100644 --- a/README.license.md +++ b/README.license.md @@ -2,4 +2,4 @@ FPnew is released under the *SolderPad Hardware License*, which is a permissive license based on Apache 2.0. Please refer to the [SolderPad license file](LICENSE.solderpad) for further information. -The T-Head E906 DivSqrt unit, integrated into FPnew in [`vendor/opene906`](vendor/opene906), is reseased under the *Apache License, Version 2.0*. Please refer to the [Apache 2.0 license file](LICENSE.apache) for further information. +The T-Head E906 and C910 DivSqrt units, integrated into FPnew in [`vendor/opene906`](vendor/opene906) and [`vendor/openc910`](vendor/openc910), are reseased under the *Apache License, Version 2.0*. Please refer to the [Apache 2.0 license file](LICENSE.apache) for further information. diff --git a/README.md b/README.md index 942d5b86..b13f00d1 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,7 @@ It is discouraged to `import` all of `fpnew_pkg` into your source files. Instead fpnew_top #( .Features ( fpnew_pkg::RV64D ), .Implementation ( fpnew_pkg::DEFAULT_NOREGS ), - .TagType ( logic ), - .PulpDivsqrt ( 1'b1 ) + .TagType ( logic ) ) i_fpnew_top ( .clk_i, .rst_ni, diff --git a/docs/CHANGELOG-PULP.md b/docs/CHANGELOG-PULP.md index dd327b66..cd09eda5 100644 --- a/docs/CHANGELOG-PULP.md +++ b/docs/CHANGELOG-PULP.md @@ -7,6 +7,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a In this sense, we interpret the "Public API" of a hardware module as its port/parameter list. Versions of the IP in the same major relase are "pin-compatible" with each other. Minor relases are permitted to add new parameters as long as their default bindings ensure backwards compatibility. +## [pulp-v0.2.0] - 2024-05-29 + +### Added +- Add support for alternative multi-format DivSqrt unit (from openC910), supporting FP64, FP32, FP16 and SIMD operations +- Replace `PulpDivsqrt` top-level parameter with `DivSqrtSel` to choose among the legacy PULP DivSqrt unit (`PULP`), the openE906 DivSqrt (`TH32`), and the openC910 DivSqrt (`THMULTI`). The default choice is set to `THMULTI` + ## [pulp-v0.1.3] - 2023-07-19 ### Fixed diff --git a/docs/README.md b/docs/README.md index 542e53e1..dd8a0e9b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -37,6 +37,7 @@ For more in-depth explanations on how to configure the unit and the layout of th |------------------|------------------------------------------------------------------------------------------------------------------------------| | `Features` | Specifies the features of the FPU, such as the set of supported formats and operations. | | `Implementation` | Allows to control how the above features are implemented, such as the number of pipeline stages and architecture of subunits | +| `DivSqrtSel` | Chooses among the three supported DivSqrt units | | `TagType` | The SystemVerilog data type of the operation tag | | `TrueSIMDClass` | If enabled, the result of a classify operation in vectorial mode will be RISC-V compliant if each output has at least 10 bits| | `EnableSIMDMask` | Enable the RISC-V floating-point status flags masking of inactive vectorial lanes. When disabled, `simd_mask_i` is inactive | @@ -358,7 +359,18 @@ The configuration `pipe_config_t` is an enumeration of type `logic [1:0]` holdi | `INSIDE` | All registers are inserted at roughly the middle of the operational unit (if not possible, `BEFORE`) | | `DISTRIBUTED` | Registers are evenly distributed to `INSIDE`, `BEFORE`, and `AFTER` (if no `INSIDE`, all `BEFORE`) | -### `Stochastic Rounding Implementation` +#### `Division and Square-Root Unit Selection` +The `DivSqrtSel` parameter is used to choose among the support DivSqrt units. +It is of type `divsqrt_unit_t`, which is defined as: +```SystemVerilog +typedef enum logic[1:0] { + PULP, // "PULP" instantiates the PULP DivSqrt unit supports FP64, FP32, FP16, FP16ALT, FP8 and SIMD operations + TH32, // "TH32" instantiates the E906 DivSqrt unit supports only FP32 (no SIMD support) + THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16 and SIMD operations +} divsqrt_unit_t; +``` + +#### `Stochastic Rounding Implementation` The `StochasticRndImplementation` parameter is used to configure the RSR support. It is of type `rsr_impl_t` which is defined as: diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv new file mode 100644 index 00000000..eff0620d --- /dev/null +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -0,0 +1,482 @@ +// Copyright 2019 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Authors: Stefan Mach +// Roman Marquart + + +`include "common_cells/registers.svh" + +module fpnew_divsqrt_th_64_multi #( + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + // FPU configuration + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, + parameter type TagType = logic, + parameter type AuxType = logic, + // Do not change + localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [1:0][WIDTH-1:0] operands_i, // 2 operands + input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + input logic vectorial_op_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + output logic divsqrt_done_o, + input logic simd_synch_done_i, + output logic divsqrt_ready_o, + input logic simd_synch_rdy_i, + input logic flush_i, + // Output signals + output logic [WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Constants + // ---------- + // Pipelines + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // no regs here otherwise + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [1:0][WIDTH-1:0] operands_q; + fpnew_pkg::roundmode_e rnd_mode_q; + fpnew_pkg::operation_e op_q; + fpnew_pkg::fp_format_e dst_fmt_q; + logic in_valid_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_q[0] = operands_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_vec_op_q[0] = vectorial_op_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; + assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign op_q = inp_pipe_op_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; + + // ----------------- + // Input processing + // ----------------- + logic [1:0] divsqrt_fmt; + + // Translate fpnew formats into divsqrt formats + if(WIDTH == 64) begin : translate_fmt_64_bits + always_comb begin : translate_fmt + unique case (dst_fmt_q) + fpnew_pkg::FP64: divsqrt_fmt = 2'b10; + fpnew_pkg::FP32: divsqrt_fmt = 2'b01; + fpnew_pkg::FP16: divsqrt_fmt = 2'b00; + default: divsqrt_fmt = 2'b10; // 64 bit max width + endcase + end + end else if(WIDTH == 32) begin : translate_fmt_32_bits + always_comb begin : translate_fmt + unique case (dst_fmt_q) + fpnew_pkg::FP32: divsqrt_fmt = 2'b01; + fpnew_pkg::FP16: divsqrt_fmt = 2'b00; + default: divsqrt_fmt = 2'b01; // 32 bit max width + endcase + end + end else if(WIDTH == 16) begin : translate_fmt_16_bits + always_comb begin : translate_fmt + unique case (dst_fmt_q) + fpnew_pkg::FP16: divsqrt_fmt = 2'b00; + default: divsqrt_fmt = 2'b00; // 16 bit max width + endcase + end + end else begin + $fatal(1, "DivSqrt THMULTI: Unsupported WIDTH (the supported width are 64, 32, 16)"); + end + + // ------------ + // Control FSM + // ------------ + + logic in_ready; // input handshake with upstream + logic div_valid, sqrt_valid; // input signalling with unit + logic unit_ready, unit_done, unit_done_q; // status signals from unit instance + logic op_starting; // high in the cycle a new operation starts + logic out_valid, out_ready; // output handshake with downstream + logic unit_busy; // valid data in flight + logic simd_synch_done; + // FSM states + typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; + fsm_state_e state_q, state_d; + + // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. + assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; + assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; + assign op_starting = div_valid | sqrt_valid; + + // Hold additional information while the operation is in progress + + TagType result_tag_q; + logic result_mask_q; + AuxType result_aux_q; + logic result_vec_op_q; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) + `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) + `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) + + // Wait for other lanes only if the operation is vectorial + assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; + + // Valid synch with other lanes + // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes + // As soon as all the lanes are over, we can clear this FF and start with a new operation + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); + // Tell the other units that this unit has finished now or in the past + assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; + + // Ready synch with other lanes + // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes + assign divsqrt_ready_o = in_ready; + // Upstream ready comes from sanitization FSM, and it is synched among all the lanes + assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; + + // FSM to safely apply and receive data from DIVSQRT unit + always_comb begin : flag_fsm + // Default assignments + in_ready = 1'b0; + out_valid = 1'b0; + unit_busy = 1'b0; + state_d = state_q; + + unique case (state_q) + // Waiting for work + IDLE: begin + in_ready = 1'b1; // we're ready + if (in_valid_q && unit_ready) begin // New work arrives + state_d = BUSY; // go into processing state + end + end + // Operation in progress + BUSY: begin + unit_busy = 1'b1; // data in flight + // If all the lanes are done with processing + if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin + out_valid = 1'b1; // try to commit result downstream + // If downstream accepts our result + if (out_ready) begin + state_d = IDLE; // we anticipate going back to idling.. + in_ready = 1'b1; // we acknowledge the instruction + if (in_valid_q && unit_ready) begin // ..unless new work comes in + state_d = BUSY; // and stay busy with it + end + // Otherwise if downstream is not ready for the result + end else begin + state_d = HOLD; // wait for the pipeline to take the data + end + end + end + // Waiting with valid result for downstream + HOLD: begin + unit_busy = 1'b1; // data in flight + out_valid = 1'b1; // try to commit result downstream + // If the result is accepted by downstream + if (out_ready) begin + state_d = IDLE; // go back to idle.. + if (in_valid_q && unit_ready) begin // ..unless new work comes in + in_ready = 1'b1; // acknowledge the new transaction + state_d = BUSY; // will be busy with the next instruction + end + end + end + // fall into idle state otherwise + default: state_d = IDLE; + endcase + + // Flushing overrides the other actions + if (flush_i) begin + unit_busy = 1'b0; // data is invalidated + out_valid = 1'b0; // cancel any valid data + state_d = IDLE; // go to default state + end + end + + // FSM status register (asynch active low reset) + `FF(state_q, state_d, IDLE) + + // ----------------- + // DIVSQRT instance + // ----------------- + logic [63:0] unit_result, held_result_q; + fpnew_pkg::status_t unit_status, held_status_q; + logic hold_en; + + logic vfdsu_dp_fdiv_busy; + + // Regs to save current instruction + fpnew_pkg::roundmode_e rm_q; + logic[1:0] divsqrt_fmt_q; + fpnew_pkg::operation_e divsqrt_op_q; + logic div_op, sqrt_op; + logic [WIDTH-1:0] srcf0_q, srcf1_q; + logic [63:0] srcf0, srcf1; + + // Save operands in regs, C910 saves all the following information in its regs in the next cycle. + `FFL(rm_q, rnd_mode_q, op_starting, fpnew_pkg::RNE) + `FFL(divsqrt_fmt_q, divsqrt_fmt, op_starting, '0) + `FFL(divsqrt_op_q, op_q, op_starting, fpnew_pkg::DIV) + `FFL(srcf0_q, operands_q[0], op_starting, '0) + `FFL(srcf1_q, operands_q[1], op_starting, '0) + + // NaN-box inputs with max WIDTH + if(WIDTH == 64) begin : gen_fmt_64_bits + always_comb begin : NaN_box_inputs + if(divsqrt_fmt_q == 2'b10) begin // 64-bit + srcf0[63:0] = srcf0_q[63:0]; + srcf1[63:0] = srcf1_q[63:0]; + end else if(divsqrt_fmt_q == 2'b01) begin // 32-bit + srcf0[63:32] = '1; + srcf1[63:32] = '1; + srcf0[31:0] = srcf0_q[31:0]; + srcf1[31:0] = srcf1_q[31:0]; + end else if(divsqrt_fmt_q == 2'b00) begin //16-bit + srcf0[63:16] = '1; + srcf1[63:16] = '1; + srcf0[15:0] = srcf0_q[15:0]; + srcf1[15:0] = srcf1_q[15:0]; + end else begin // Unsupported + srcf0[63:0] = '1; + srcf1[63:0] = '1; + end + end + end else if (WIDTH == 32) begin : gen_fmt_32_bits + always_comb begin : NaN_box_inputs + if(divsqrt_fmt_q == 2'b01) begin // 32-bit + srcf0[63:32] = '1; + srcf1[63:32] = '1; + srcf0[31:0] = srcf0_q[31:0]; + srcf1[31:0] = srcf1_q[31:0]; + end else if(divsqrt_fmt_q == 2'b00) begin // 16-bit + srcf0[63:16] = '1; + srcf1[63:16] = '1; + srcf0[15:0] = srcf0_q[15:0]; + srcf1[15:0] = srcf1_q[15:0]; + end else begin // Unsupported + srcf0[63:0] = '1; + srcf1[63:0] = '1; + end + end + end else if (WIDTH == 16) begin : gen_fmt_16_bits + always_comb begin : NaN_box_inputs + if(divsqrt_fmt_q == 2'b00) begin // 16-bit + srcf0[63:16] = '1; + srcf1[63:16] = '1; + srcf0[15:0] = srcf0_q[15:0]; + srcf1[15:0] = srcf1_q[15:0]; + end else begin // Unsupported + srcf0[63:0] = '1; + srcf1[63:0] = '1; + end + end + end else begin + $fatal(1, "DivSqrt THMULTI: Unsupported WIDTH (the supported width are 64, 32, 16)"); + end + + assign div_op = (divsqrt_op_q == fpnew_pkg::DIV) ? 1'b1 : 1'b0; + assign sqrt_op = (divsqrt_op_q != fpnew_pkg::DIV) ? 1'b1 : 1'b0; + + // Select func 1 cycle after div issue + logic func_sel; + `FFLARNC(func_sel, 1'b1, op_starting, func_sel, 1'b0, clk_i, rst_ni) + + // Select operands 2 cycles after div issue + logic op_sel; + `FFLARNC(op_sel, 1'b1, func_sel, op_sel, 1'b0, clk_i, rst_ni) + + ct_vfdsu_top i_ct_vfdsu_top ( + .cp0_vfpu_icg_en ( 1'b0 ), // Internal clock gating, (module enable) doesn't matter when the clk_gate module is redundant anyway + .cp0_yy_clk_en ( 1'b1 ), // Global clock enable (same as above) + .cpurst_b ( rst_ni ), // Reset + .dp_vfdsu_ex1_pipex_dst_ereg ( '0 ), // Don't care, used in C910 + .dp_vfdsu_ex1_pipex_dst_vreg ( '0 ), // Don't care, used in C910 + .dp_vfdsu_ex1_pipex_iid ( '0 ), // Don't care, used in C910 + .dp_vfdsu_ex1_pipex_imm0 ( 3'b111 ), // Round mode, set to 3'b111 to select vfpu_yy_xx_rm signal + .dp_vfdsu_ex1_pipex_sel ( op_sel ), // 3. Select operands, start operation + .dp_vfdsu_ex1_pipex_srcf0 ( srcf0 ), // Input for operand 0 + .dp_vfdsu_ex1_pipex_srcf1 ( srcf1 ), // Input for operand 1 + .dp_vfdsu_fdiv_gateclk_issue ( 1'b1 ), // Local clock enable (same as above) + .dp_vfdsu_idu_fdiv_issue ( op_starting ), // 1. Issue fdiv (FSM in ctrl) + .forever_cpuclk ( clk_i ), // Clock input + .idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 13'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0) + .idu_vfpu_rf_pipex_gateclk_sel ( func_sel ), // 2. Select func + .pad_yy_icg_scan_en ( 1'b0 ), // SE signal for the redundant clock gating module + .rtu_yy_xx_flush ( flush_i ), // Flush + .vfpu_yy_xx_dqnan ( 1'b0 ), // Disable qNaN, set to 1 if sNaN is used + .vfpu_yy_xx_rm ( rm_q ), // Round mode. redundant if imm0 set to the same + .pipex_dp_vfdsu_ereg ( ), // Don't care, used by C910 + .pipex_dp_vfdsu_ereg_data ( unit_status ), // Output: status flags + .pipex_dp_vfdsu_freg_data ( unit_result ), // Output: result + .pipex_dp_vfdsu_inst_vld ( unit_done ), // The result is valid + .pipex_dp_vfdsu_vreg ( ), // Don't care, used by C910 + .vfdsu_dp_fdiv_busy ( vfdsu_dp_fdiv_busy ), // Unit is busy, data in flight + .vfdsu_dp_inst_wb_req ( ), // Don't care, used by C910 + .vfdsu_ifu_debug_ex2_wait ( ), // Debug output + .vfdsu_ifu_debug_idle ( ), // Debug output + .vfdsu_ifu_debug_pipe_busy ( ) // Debug output + ); + + assign unit_ready = !vfdsu_dp_fdiv_busy; + + // Hold the result when one lane has finished execution, except when all the lanes finish together, + // or the operation is not vectorial, and the result can be accepted downstream + assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); + // The Hold register (load, no reset) + `FFLNR(held_result_q, unit_result, hold_en, clk_i) + `FFLNR(held_status_q, unit_status, hold_en, clk_i) + + // -------------- + // Output Select + // -------------- + logic [WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + // Prioritize hold register data + assign result_d[WIDTH-1:0] = unit_done_q ? held_result_q[WIDTH-1:0] : unit_result[WIDTH-1:0]; + assign status_d = unit_done_q ? held_status_q : unit_status; + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = result_tag_q; + assign out_pipe_mask_q[0] = result_mask_q; + assign out_pipe_aux_q[0] = result_aux_q; + assign out_pipe_valid_q[0] = out_valid; + // Input stage: Propagate pipeline ready signal to inside pipe + assign out_ready = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); +endmodule + diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index eb3f529e..db2c3032 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -18,7 +18,7 @@ module fpnew_opgroup_block #( // FPU configuration parameter int unsigned Width = 32, parameter logic EnableVectors = 1'b1, - parameter logic PulpDivsqrt = 1'b1, + parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, parameter fpnew_pkg::fmt_logic_t FpFmtMask = '1, parameter fpnew_pkg::ifmt_logic_t IntFmtMask = '1, parameter fpnew_pkg::fmt_unsigned_t FmtPipeRegs = '{default: 0}, @@ -183,7 +183,7 @@ module fpnew_opgroup_block #( .FpFmtConfig ( FpFmtMask ), .IntFmtConfig ( IntFmtMask ), .EnableVectors ( EnableVectors ), - .PulpDivsqrt ( PulpDivsqrt ), + .DivSqrtSel ( DivSqrtSel ), .NumPipeRegs ( REG ), .PipeConfig ( PipeConfig ), .TagType ( TagType ), diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index d135141d..6b5545c5 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -16,17 +16,17 @@ `include "common_cells/registers.svh" module fpnew_opgroup_multifmt_slice #( - parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV, - parameter int unsigned Width = 64, + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV, + parameter int unsigned Width = 64, // FPU configuration - parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, - parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, - parameter logic EnableVectors = 1'b1, - parameter logic PulpDivsqrt = 1'b1, - parameter int unsigned NumPipeRegs = 0, - parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, + parameter logic EnableVectors = 1'b1, + parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, // Do not change localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, @@ -64,11 +64,14 @@ module fpnew_opgroup_multifmt_slice #( output logic busy_o ); - if ((OpGroup == fpnew_pkg::DIVSQRT) && !PulpDivsqrt && - !((FpFmtConfig[0] == 1) && (FpFmtConfig[1:NUM_FORMATS-1] == '0))) begin - $fatal(1, "T-Head-based DivSqrt unit supported only in FP32-only configurations. \ -Set PulpDivsqrt to 1 not to use the PULP DivSqrt unit \ -or set Features.FpFmtMask to support only FP32"); + if ((OpGroup == fpnew_pkg::DIVSQRT)) begin + if ((DivSqrtSel == fpnew_pkg::TH32) && !((FpFmtConfig[0] == 1) && (FpFmtConfig[1:NUM_FORMATS-1] == '0))) begin + $fatal(1, "T-Head-based DivSqrt unit supported only in FP32-only configurations. \ +Set DivSqrtSel = THMULTI or DivSqrtSel = PULP to use a multi-format divider"); + end else if ((DivSqrtSel == fpnew_pkg::THMULTI) && (FpFmtConfig[3] == 1'b1 || FpFmtConfig[4] == 1'b1 || FpFmtConfig[5] == 1'b1)) begin + $warning("The DivSqrt unit of C910 (instantiated by DivSqrtSel = THMULTI) does not support \ +FP16alt, FP8, FP8alt. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP16alt, FP8, FP8alt."); + end end if ((OpGroup == fpnew_pkg::DOTP) && @@ -82,6 +85,7 @@ or on 16b inputs producing 32b outputs"); localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig); localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig); localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1); + localparam int unsigned NUM_DIVSQRT_LANES = fpnew_pkg::num_divsqrt_lanes(Width, FpFmtConfig, 1'b1, DivSqrtSel); localparam int unsigned NUM_DOTP_LANES = fpnew_pkg::num_dotp_lanes(Width, FpFmtConfig); localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS; // We will send the format information along with the data @@ -201,7 +205,8 @@ or on 16b inputs producing 32b outputs"); logic [LANE_WIDTH-1:0] local_result; // lane-local results // Generate instances only if needed, lane 0 always generated - if ((lane == 0) || (EnableVectors & !(OpGroup == fpnew_pkg::DOTP && (lane >= NUM_DOTP_LANES)))) begin : active_lane + if ((lane == 0) || (EnableVectors & (!(OpGroup == fpnew_pkg::DOTP && (lane >= NUM_DOTP_LANES)) + && !(OpGroup == fpnew_pkg::DIVSQRT && (lane >= NUM_DIVSQRT_LANES))))) begin : active_lane logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands @@ -317,7 +322,7 @@ or on 16b inputs producing 32b outputs"); .busy_o ( lane_busy[lane] ) ); end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - if (!PulpDivsqrt) begin : gen_th_32_divsqrt + if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt // The T-head-based DivSqrt unit is supported only in FP32-only configurations fpnew_divsqrt_th_32 #( .NumPipeRegs ( NumPipeRegs ), @@ -347,6 +352,42 @@ or on 16b inputs producing 32b outputs"); .out_ready_i ( out_ready ), .busy_o ( lane_busy[lane] ) ); + end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt + fpnew_divsqrt_th_64_multi #( + .FpFmtConfig ( LANE_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_divsqrt_th_64_c910 ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), + .op_i, + .dst_fmt_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .divsqrt_done_o ( divsqrt_done[lane] ), + .simd_synch_done_i( simd_synch_done ), + .divsqrt_ready_o ( divsqrt_ready[lane] ), + .simd_synch_rdy_i ( simd_synch_rdy ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); end else begin : gen_pulp_divsqrt fpnew_divsqrt_multi #( .FpFmtConfig ( LANE_FORMATS ), @@ -585,7 +626,7 @@ or on 16b inputs producing 32b outputs"); assign conv_target_q = '0; end - if (PulpDivsqrt && (OpGroup == fpnew_pkg::DIVSQRT)) begin + if ((DivSqrtSel != fpnew_pkg::TH32) && (OpGroup == fpnew_pkg::DIVSQRT)) begin // Synch lanes if there is more than one assign simd_synch_rdy = EnableVectors ? &divsqrt_ready : divsqrt_ready[0]; assign simd_synch_done = EnableVectors ? &divsqrt_done : divsqrt_done[0]; diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv index 0fc88d68..42d0df6b 100644 --- a/src/fpnew_pkg.sv +++ b/src/fpnew_pkg.sv @@ -130,6 +130,15 @@ package fpnew_pkg; SDOTP, EXVSUM, VSUM // DOTP operation group } operation_e; + // ------------- + // DIVSQRT UNIT + // ------------- + typedef enum logic[1:0] { + PULP, // "PULP" instantiates the PULP DivSqrt unit supports FP64, FP32, FP16, FP16ALT, FP8 and SIMD operations + TH32, // "TH32" instantiates the E906 DivSqrt unit supports only FP32 (no SIMD support) + THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16 and SIMD operations + } divsqrt_unit_t; + // ------------------- // RISC-V FP-SPECIFIC // ------------------- @@ -442,6 +451,13 @@ package fpnew_pkg; return vec ? width / min_fp_width(cfg) : 1; // if no vectors, only one lane endfunction + // Returns the maximum number of lanes in the FPU according to width, format config and vectors + function automatic int unsigned num_divsqrt_lanes(int unsigned width, fmt_logic_t cfg, logic vec, divsqrt_unit_t DivSqrtSel); + automatic fmt_logic_t cfg_tmp; + cfg_tmp = (DivSqrtSel == THMULTI) ? cfg & 6'b111000 : cfg; + return vec ? width / min_fp_width(cfg_tmp) : 1; // if no vectors, only one lane + endfunction + // Returns a mask of active FP formats that are present in lane lane_no of a multiformat slice function automatic fmt_logic_t get_lane_formats(int unsigned width, fmt_logic_t cfg, diff --git a/src/fpnew_top.sv b/src/fpnew_top.sv index 9cea0ec1..b564286d 100644 --- a/src/fpnew_top.sv +++ b/src/fpnew_top.sv @@ -17,8 +17,8 @@ module fpnew_top #( // FPU configuration parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, - // PulpDivSqrt = 0 enables T-head-based DivSqrt unit. Supported only for FP32-only instances of Fpnew - parameter logic PulpDivsqrt = 1'b1, + // DivSqrtSel chooses among PULP, TH32, or THMULTI (see documentation and fpnew_pkg.sv for further details) + parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, parameter type TagType = logic, parameter logic TrueSIMDClass = 1'b0, parameter logic EnableSIMDMask = 1'b0, @@ -122,7 +122,7 @@ module fpnew_top #( .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ), .Width ( WIDTH ), .EnableVectors ( Features.EnableVectors ), - .PulpDivsqrt ( PulpDivsqrt ), + .DivSqrtSel ( DivSqrtSel ), .FpFmtMask ( Features.FpFmtMask ), .IntFmtMask ( Features.IntFmtMask ), .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), diff --git a/src_files.yml b/src_files.yml index eaf51dd0..84348a98 100644 --- a/src_files.yml +++ b/src_files.yml @@ -21,7 +21,20 @@ fpnew: vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_dp.v, vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_frbus.v, vendor/opene906/E906_RTL_FACTORY/gen_rtl/fpu/rtl/pa_fpu_src_type.v, +# vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v, # same as the one from E906 + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ff1.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_bound_table.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v, + vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v, src/fpnew_divsqrt_th_32.sv, + src/fpnew_divsqrt_th_64_multi.sv, src/fpnew_divsqrt_multi.sv, src/fpnew_fma.sv, src/fpnew_fma_multi.sv, @@ -32,5 +45,6 @@ fpnew: src/fpnew_opgroup_fmt_slice.sv, src/fpnew_opgroup_multifmt_slice.sv, src/fpnew_rounding.sv, + src/lfsr_sr.sv, src/fpnew_top.sv, ] diff --git a/vendor/openc910.lock.hjson b/vendor/openc910.lock.hjson new file mode 100644 index 00000000..64cdb3e8 --- /dev/null +++ b/vendor/openc910.lock.hjson @@ -0,0 +1,14 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// This file is generated by the util/vendor script. Please do not modify it +// manually. + +{ + upstream: + { + url: https://github.com/T-head-Semi/openc910 + rev: e0c4ad8ec7f8c70f649d826ebd6c949086453272 + } +} diff --git a/vendor/openc910.vendor.hjson b/vendor/openc910.vendor.hjson new file mode 100644 index 00000000..ddaa644f --- /dev/null +++ b/vendor/openc910.vendor.hjson @@ -0,0 +1,47 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +{ + name: "openc910", + target_dir: "openc910" + + upstream: { + url: "https://github.com/T-head-Semi/openc910" + rev: "e0c4ad8ec7f8c70f649d826ebd6c949086453272" + } + + exclude_from_upstream: [ + "doc", + "smart_run", + "C910_RTL_FACTORY/gen_rtl/biu", + "C910_RTL_FACTORY/gen_rtl/biu/rtl", + "C910_RTL_FACTORY/gen_rtl/ciu", + "C910_RTL_FACTORY/gen_rtl/clint", + "C910_RTL_FACTORY/gen_rtl/clk/rtl/ct_mp_clk_top.v", + "C910_RTL_FACTORY/gen_rtl/clk/rtl/ct_clk_top.v", + "C910_RTL_FACTORY/gen_rtl/common", + "C910_RTL_FACTORY/gen_rtl/cp0", + "C910_RTL_FACTORY/gen_rtl/cpu", + "C910_RTL_FACTORY/gen_rtl/filelists", + "C910_RTL_FACTORY/gen_rtl/fpga", + "C910_RTL_FACTORY/gen_rtl/had", + "C910_RTL_FACTORY/gen_rtl/idu", + "C910_RTL_FACTORY/gen_rtl/ifu", + "C910_RTL_FACTORY/gen_rtl/iu", + "C910_RTL_FACTORY/gen_rtl/l2c", + "C910_RTL_FACTORY/gen_rtl/lsu", + "C910_RTL_FACTORY/gen_rtl/mmu", + "C910_RTL_FACTORY/gen_rtl/plic", + "C910_RTL_FACTORY/gen_rtl/pmp", + "C910_RTL_FACTORY/gen_rtl/pmu", + "C910_RTL_FACTORY/gen_rtl/rst", + "C910_RTL_FACTORY/gen_rtl/rtu", + "C910_RTL_FACTORY/gen_rtl/vfalu", + "C910_RTL_FACTORY/gen_rtl/vfmau", + "C910_RTL_FACTORY/gen_rtl/vfpu", + "C910_RTL_FACTORY/gen_rtl/vfpu/rtl", + "C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_only_div.v", + "C910_RTL_FACTORY/setup" + ] + +} diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v new file mode 100644 index 00000000..c7d58ad7 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/clk/rtl/gated_clk_cell.v @@ -0,0 +1,49 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +module gated_clk_cell( + clk_in, + global_en, + module_en, + local_en, + external_en, + pad_yy_icg_scan_en, + clk_out +); + +input clk_in; +input global_en; +input module_en; +input local_en; +input external_en; +input pad_yy_icg_scan_en; +output clk_out; + +wire clk_en_bf_latch; +wire SE; + +assign clk_en_bf_latch = (global_en && (module_en || local_en)) || external_en ; + +// SE driven from primary input, held constant +assign SE = pad_yy_icg_scan_en; + +// // &Connect( .clk_in (clk_in), @50 +// // .SE (SE), @51 +// // .external_en (clk_en_bf_latch), @52 +// // .clk_out (clk_out) @53 +// // ) ; @54 +assign clk_out = clk_in; + +endmodule \ No newline at end of file diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v new file mode 100644 index 00000000..f7f541f2 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v @@ -0,0 +1,520 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &Depend("cpu_cfig.h"); @22 +// &ModuleBeg; @23 +module ct_vfdsu_ctrl( + cp0_vfpu_icg_en, + cp0_yy_clk_en, + cpurst_b, + dp_vfdsu_ex1_pipex_sel, + dp_vfdsu_fdiv_gateclk_issue, + dp_vfdsu_idu_fdiv_issue, + ex1_data_clk, + ex1_double, + ex1_pipedown, + ex1_single, + ex2_data_clk, + ex2_pipedown, + ex2_srt_first_round, + ex3_data_clk, + ex3_pipedown, + forever_cpuclk, + pad_yy_icg_scan_en, + pipex_dp_vfdsu_inst_vld, + rtu_yy_xx_flush, + srt_ctrl_rem_zero, + srt_ctrl_skip_srt, + srt_secd_round, + srt_sm_on, + vfdsu_dp_fdiv_busy, + vfdsu_dp_inst_wb_req, + vfdsu_ex2_double, + vfdsu_ex2_single, + vfdsu_ifu_debug_ex2_wait, + vfdsu_ifu_debug_idle, + vfdsu_ifu_debug_pipe_busy +); + +// &Ports; @24 +input cp0_vfpu_icg_en; +input cp0_yy_clk_en; +input cpurst_b; +input dp_vfdsu_ex1_pipex_sel; +input dp_vfdsu_fdiv_gateclk_issue; +input dp_vfdsu_idu_fdiv_issue; +input ex1_double; +input ex1_single; +input forever_cpuclk; +input pad_yy_icg_scan_en; +input rtu_yy_xx_flush; +input srt_ctrl_rem_zero; +input srt_ctrl_skip_srt; +input vfdsu_ex2_double; +input vfdsu_ex2_single; +output ex1_data_clk; +output ex1_pipedown; +output ex2_data_clk; +output ex2_pipedown; +output ex2_srt_first_round; +output ex3_data_clk; +output ex3_pipedown; +output pipex_dp_vfdsu_inst_vld; +output srt_secd_round; +output srt_sm_on; +output vfdsu_dp_fdiv_busy; +output vfdsu_dp_inst_wb_req; +output vfdsu_ifu_debug_ex2_wait; +output vfdsu_ifu_debug_idle; +output vfdsu_ifu_debug_pipe_busy; + +// &Regs; @25 +reg [3:0] div_cur_state; +reg [3:0] div_next_state; +reg ex2_srt_first_round; +reg ex2_srt_secd_round; +reg [4:0] srt_cnt; +reg srt_cur_state; +reg srt_nxt_state; +reg vfdsu_ex3_vld; +reg vfdsu_ex4_vld; + +// &Wires; @26 +wire cp0_vfpu_icg_en; +wire cp0_yy_clk_en; +wire cpurst_b; +wire div_sm_clk; +wire div_sm_clk_en; +wire div_st_ex2; +wire dp_vfdsu_ex1_pipex_sel; +wire dp_vfdsu_fdiv_gateclk_issue; +wire dp_vfdsu_idu_fdiv_issue; +wire ex1_data_clk; +wire ex1_data_clk_en; +wire ex1_double; +wire ex1_pipedown; +wire ex1_single; +wire ex2_data_clk; +wire ex2_data_clk_en; +wire ex2_pipe_clk; +wire ex2_pipe_clk_en; +wire ex2_pipedown; +wire ex2_srt_secd_round_pre; +wire ex3_data_clk; +wire ex3_data_clk_en; +wire ex3_pipe_clk; +wire ex3_pipe_clk_en; +wire ex3_pipedown; +wire ex4_pipedown; +wire forever_cpuclk; +wire pad_yy_icg_scan_en; +wire pipex_dp_vfdsu_inst_vld; +wire rtu_yy_xx_flush; +wire skip_srt; +wire [4:0] srt_cnt_ini; +wire srt_cnt_zero; +wire srt_ctrl_rem_zero; +wire srt_ctrl_skip_srt; +wire srt_last_round; +wire srt_secd_round; +wire srt_secd_round_pre; +wire srt_sm_clk; +wire srt_sm_clk_en; +wire srt_sm_on; +wire vfdsu_dp_fdiv_busy; +wire vfdsu_dp_inst_wb_req; +wire vfdsu_ex2_double; +wire vfdsu_ex2_single; +wire vfdsu_ex2_vld; +wire vfdsu_ifu_debug_ex2_wait; +wire vfdsu_ifu_debug_idle; +wire vfdsu_ifu_debug_pipe_busy; + + +//========================================================== +// EX1 Stage Control Signal +//========================================================== + +//vfdsu ex1 pipedown signal +assign ex1_pipedown = dp_vfdsu_ex1_pipex_sel; +// &Force("output","ex1_pipedown"); @34 +//========================================================== +// EX2 Stage Control Signal +//========================================================== +//state parameter +parameter SRT_IDLE = 1'b0; +parameter SRT_BUSY = 1'b1; + +//gate clk +// &Instance("gated_clk_cell","x_srt_sm_clk"); @43 +gated_clk_cell x_srt_sm_clk ( + .clk_in (forever_cpuclk ), + .clk_out (srt_sm_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (srt_sm_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @44 +// .clk_out (srt_sm_clk),//Out Clock @45 +// .external_en (1'b0), @46 +// .global_en (cp0_yy_clk_en), @47 +// .local_en (srt_sm_clk_en),//Local Condition @48 +// .module_en (cp0_vfpu_icg_en) @49 +// ); @50 +assign srt_sm_clk_en = srt_cur_state || + ex1_pipedown || + rtu_yy_xx_flush; + +//state machine +always @(posedge srt_sm_clk or negedge cpurst_b) +begin + if(!cpurst_b) + srt_cur_state <= SRT_IDLE; + else if(rtu_yy_xx_flush) + srt_cur_state <= SRT_IDLE; + else + srt_cur_state <= srt_nxt_state; +end + +// &CombBeg; @66 +always @( ex1_pipedown + or srt_last_round + or srt_cur_state) +begin +case(srt_cur_state) +SRT_IDLE : if(ex1_pipedown) + srt_nxt_state = SRT_BUSY; + else + srt_nxt_state = SRT_IDLE; +SRT_BUSY : if(srt_last_round) + srt_nxt_state = SRT_IDLE; + else + srt_nxt_state = SRT_BUSY; +default : srt_nxt_state = SRT_IDLE; +endcase +// &CombEnd; @78 +end + +//srt sm state +//assign srt_sm_idle = ~srt_cur_state; +assign srt_sm_on = srt_cur_state; +// &Force("output","srt_sm_on"); @83 +//state machine control signal +//srt_last_round on three condition : +// 1.srt need not execute +// 2.srt rem is zero +// 3.srt cnt zero +assign srt_last_round = (skip_srt || + srt_ctrl_rem_zero || + srt_cnt_zero) && + srt_sm_on; +assign skip_srt = srt_ctrl_skip_srt; +assign srt_cnt_zero = ~|srt_cnt[4:0]; +//srt counter +always @(posedge srt_sm_clk or negedge cpurst_b) +begin + if(!cpurst_b) + srt_cnt[4:0] <= 5'b0; + else if(rtu_yy_xx_flush) + srt_cnt[4:0] <= 5'b0; + else if(ex1_pipedown) + srt_cnt[4:0] <= srt_cnt_ini[4:0]; + else if(srt_sm_on) + srt_cnt[4:0] <= srt_cnt[4:0] - 5'b1; + else + srt_cnt[4:0] <= srt_cnt[4:0]; +end + +//srt_cnt_ini[4:0] +//For Double, initial is 5'b11100('d28), calculate 29 round +//For Single, initial is 5'b01110('d14), calculate 15 round +assign srt_cnt_ini[4:0] = (ex1_double) ? 5'b01101 : + ex1_single ? 5'b00110 + : 5'b00011; + +//vfdsu ex2 pipedown signal +assign ex2_pipedown = srt_last_round && div_st_ex2; +// &Force("output","ex2_pipedown"); @157 +// &Force("output","ex2_srt_first_round"); @172 +always @(posedge srt_sm_clk or negedge cpurst_b) +begin + if(!cpurst_b) + ex2_srt_first_round <= 1'b0; + else if(rtu_yy_xx_flush) + ex2_srt_first_round <= 1'b0; + else if(ex1_pipedown) + ex2_srt_first_round <= 1'h1; + else + ex2_srt_first_round <= 1'b0; +end +// &Force("output","ex2_srt_first_round"); @195 +always @(posedge srt_sm_clk or negedge cpurst_b) +begin + if(!cpurst_b) + ex2_srt_secd_round <= 1'b0; + else if(rtu_yy_xx_flush) + ex2_srt_secd_round <= 1'b0; + else + ex2_srt_secd_round <= {1{ex2_srt_secd_round_pre}}; +end +assign srt_secd_round = ex2_srt_secd_round; + + +assign ex2_srt_secd_round_pre = srt_sm_on && srt_secd_round_pre; +assign srt_secd_round_pre = vfdsu_ex2_double ? srt_cnt[4:0]==5'b01101 : + vfdsu_ex2_single ? srt_cnt[4:0]==5'b00110 : srt_cnt[4:0] == 5'b00011; + +//========================================================== +// EX3 Stage Control Signal +//========================================================== +//gate clk +// &Instance("gated_clk_cell","x_ex2_pipe_clk"); @217 +gated_clk_cell x_ex2_pipe_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex2_pipe_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex2_pipe_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @218 +// .clk_out (ex2_pipe_clk),//Out Clock @219 +// .external_en (1'b0), @220 +// .global_en (cp0_yy_clk_en), @221 +// .local_en (ex2_pipe_clk_en),//Local Condition @222 +// .module_en (cp0_vfpu_icg_en) @223 +// ); @224 +assign ex2_pipe_clk_en = vfdsu_ex2_vld || + vfdsu_ex3_vld || + rtu_yy_xx_flush; +assign vfdsu_ex2_vld = ex2_pipedown; +//EX2 to EX3 pipedown +always @(posedge ex2_pipe_clk or negedge cpurst_b) +begin + if(!cpurst_b) + vfdsu_ex3_vld <= 1'b0; + else if(rtu_yy_xx_flush) + vfdsu_ex3_vld <= 1'b0; + else if(ex2_pipedown) + vfdsu_ex3_vld <= 1'b1; + else + vfdsu_ex3_vld <= 1'b0; +end +assign ex3_pipedown = vfdsu_ex3_vld; +// &Force("output","ex3_pipedown"); @242 + +//========================================================== +// EX4 Stage Control Signal +//========================================================== +//gate clk +// &Instance("gated_clk_cell","x_ex3_pipe_clk"); @248 +gated_clk_cell x_ex3_pipe_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex3_pipe_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex3_pipe_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @249 +// .clk_out (ex3_pipe_clk),//Out Clock @250 +// .external_en (1'b0), @251 +// .global_en (cp0_yy_clk_en), @252 +// .local_en (ex3_pipe_clk_en),//Local Condition @253 +// .module_en (cp0_vfpu_icg_en) @254 +// ); @255 +assign ex3_pipe_clk_en = ex3_pipedown || + vfdsu_ex4_vld || + rtu_yy_xx_flush; + +//EX3 to EX4 pipedown +always @(posedge ex3_pipe_clk or negedge cpurst_b) +begin + if(!cpurst_b) + vfdsu_ex4_vld <= 1'b0; + else if(rtu_yy_xx_flush) + vfdsu_ex4_vld <= 1'b0; + else if(ex3_pipedown) + vfdsu_ex4_vld <= 1'b1; + else + vfdsu_ex4_vld <= 1'b0; +end +assign ex4_pipedown = vfdsu_ex4_vld; + + +//Div Write Back State Machine +parameter IDLE = 4'b0000; +parameter RF = 4'b0100; +parameter EX1 = 4'b0101; +parameter EX2 = 4'b0110; +parameter WB_REQ = 4'b0111; +parameter WB = 4'b1000; + +//GateClk +// &Instance("gated_clk_cell","x_div_sm_clk"); @284 +gated_clk_cell x_div_sm_clk ( + .clk_in (forever_cpuclk ), + .clk_out (div_sm_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (div_sm_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @285 +// .clk_out (div_sm_clk),//Out Clock @286 +// .external_en (1'b0), @287 +// .global_en (cp0_yy_clk_en), @288 +// .local_en (div_sm_clk_en),//Local Condition @289 +// .module_en (cp0_vfpu_icg_en) @290 +// ); @291 +assign div_sm_clk_en = dp_vfdsu_fdiv_gateclk_issue || + !(div_cur_state[3:0] == IDLE); +//State Trans +always @(posedge div_sm_clk or negedge cpurst_b) +begin + if(!cpurst_b) + div_cur_state[3:0] <= IDLE; + else if(rtu_yy_xx_flush) + div_cur_state[3:0] <= IDLE; + else + div_cur_state[3:0] <= div_next_state[3:0]; +end +// &CombBeg; @304 +always @( dp_vfdsu_idu_fdiv_issue + or dp_vfdsu_ex1_pipex_sel + or ex4_pipedown + or srt_last_round + or div_cur_state[3:0]) +begin + case(div_cur_state[3:0]) + IDLE : if(dp_vfdsu_idu_fdiv_issue) + div_next_state[3:0] = RF; + else + div_next_state[3:0] = IDLE; + RF : div_next_state[3:0] = EX1; + EX1 : if(dp_vfdsu_ex1_pipex_sel) + div_next_state[3:0] = EX2; + else + div_next_state[3:0] = IDLE; + EX2 : if(srt_last_round) + div_next_state[3:0] = WB_REQ; + else + div_next_state[3:0] = EX2; + WB_REQ : if(ex4_pipedown) + div_next_state[3:0] = WB; + else + div_next_state[3:0] = WB_REQ; + WB : if(dp_vfdsu_idu_fdiv_issue) + div_next_state[3:0] = RF; + else + div_next_state[3:0] = IDLE; + default : div_next_state[3:0] = IDLE; + endcase +// &CombEnd; @329 +end +//Control Signal +assign div_st_ex2 = (div_cur_state[3:0] == EX2); + +//Div Rdy Signal +//assign vfdsu_vfpu_gateclk_en = div_cur_state[2] || div_cur_state[3] || +// ex4_pipedown; + + +//Active Data with VFPU +//GateClk +// &Instance("gated_clk_cell","x_ex1_data_clk"); @340 +gated_clk_cell x_ex1_data_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex1_data_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex1_data_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @341 +// .clk_out (ex1_data_clk),//Out Clock @342 +// .external_en (1'b0), @343 +// .global_en (cp0_yy_clk_en), @344 +// .local_en (ex1_data_clk_en),//Local Condition @345 +// .module_en (cp0_vfpu_icg_en) @346 +// ); @347 +assign ex1_data_clk_en = ex1_pipedown; + +// &Instance("gated_clk_cell","x_ex2_data_clk"); @350 +gated_clk_cell x_ex2_data_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex2_data_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex2_data_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @351 +// .clk_out (ex2_data_clk),//Out Clock @352 +// .external_en (1'b0), @353 +// .global_en (cp0_yy_clk_en), @354 +// .local_en (ex2_data_clk_en),//Local Condition @355 +// .module_en (cp0_vfpu_icg_en) @356 +// ); @357 +assign ex2_data_clk_en = ex2_pipedown; + +// &Instance("gated_clk_cell","x_ex3_data_clk"); @360 +gated_clk_cell x_ex3_data_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex3_data_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex3_data_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @361 +// .clk_out (ex3_data_clk),//Out Clock @362 +// .external_en (1'b0), @363 +// .global_en (cp0_yy_clk_en), @364 +// .local_en (ex3_data_clk_en),//Local Condition @365 +// .module_en (cp0_vfpu_icg_en) @366 +// ); @367 +assign ex3_data_clk_en = ex3_pipedown; + +assign pipex_dp_vfdsu_inst_vld = div_cur_state[3:0] == WB; +// this is used to apply write back port +assign vfdsu_dp_inst_wb_req = vfdsu_ex3_vld; +assign vfdsu_dp_fdiv_busy = div_cur_state[2]; + +//Debug infor +assign vfdsu_ifu_debug_ex2_wait = 1'b0; +assign vfdsu_ifu_debug_idle = (div_cur_state[3:0] == IDLE); +assign vfdsu_ifu_debug_pipe_busy = 1'b0; + + +// &ModuleEnd; @381 +endmodule + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v new file mode 100644 index 00000000..b57e289e --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v @@ -0,0 +1,370 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &Depend("cpu_cfig.h"); @22 +// &ModuleBeg; @23 +module ct_vfdsu_double( + cp0_vfpu_icg_en, + cp0_yy_clk_en, + cpurst_b, + ex1_div, + ex1_double, + ex1_pipedown, + ex1_scalar, + ex1_single, + ex1_sqrt, + ex1_src0, + ex1_src1, + ex1_static_rm, + ex2_pipedown, + ex2_srt_first_round, + ex3_pipedown, + ex4_out_expt, + ex4_out_result, + forever_cpuclk, + pad_yy_icg_scan_en, + srt_ctrl_rem_zero, + srt_ctrl_skip_srt, + srt_secd_round, + srt_sm_on, + vfpu_yy_xx_dqnan, + vfpu_yy_xx_rm +); + +// &Ports; @24 +input cp0_vfpu_icg_en; +input cp0_yy_clk_en; +input cpurst_b; +input ex1_div; +input ex1_double; +input ex1_pipedown; +input ex1_scalar; +input ex1_single; +input ex1_sqrt; +input [63:0] ex1_src0; +input [63:0] ex1_src1; +input [2 :0] ex1_static_rm; +input ex2_pipedown; +input ex2_srt_first_round; +input ex3_pipedown; +input forever_cpuclk; +input pad_yy_icg_scan_en; +input srt_secd_round; +input srt_sm_on; +input vfpu_yy_xx_dqnan; +input [2 :0] vfpu_yy_xx_rm; +output [4 :0] ex4_out_expt; +output [63:0] ex4_out_result; +output srt_ctrl_rem_zero; +output srt_ctrl_skip_srt; + +// &Regs; @25 + +// &Wires; @26 +wire cp0_vfpu_icg_en; +wire cp0_yy_clk_en; +wire cpurst_b; +wire ex1_div; +wire [52:0] ex1_divisor; +wire ex1_double; +wire ex1_pipedown; +wire [59:0] ex1_remainder; +wire ex1_scalar; +wire ex1_single; +wire ex1_sqrt; +wire [63:0] ex1_src0; +wire [63:0] ex1_src1; +wire [2 :0] ex1_static_rm; +wire ex2_pipedown; +wire ex2_srt_first_round; +wire ex3_pipedown; +wire [4 :0] ex4_out_expt; +wire [63:0] ex4_out_result; +wire forever_cpuclk; +wire pad_yy_icg_scan_en; +wire srt_ctrl_rem_zero; +wire srt_ctrl_skip_srt; +wire srt_secd_round; +wire srt_sm_on; +wire [57:0] total_qt_rt_58; +wire vfdsu_ex2_div; +wire vfdsu_ex2_double; +wire vfdsu_ex2_dz; +wire [12:0] vfdsu_ex2_expnt_add0; +wire [12:0] vfdsu_ex2_expnt_add1; +wire vfdsu_ex2_nv; +wire vfdsu_ex2_of_rm_lfn; +wire vfdsu_ex2_op0_norm; +wire vfdsu_ex2_op1_norm; +wire [51:0] vfdsu_ex2_qnan_f; +wire vfdsu_ex2_qnan_sign; +wire vfdsu_ex2_result_inf; +wire vfdsu_ex2_result_qnan; +wire vfdsu_ex2_result_sign; +wire vfdsu_ex2_result_zero; +wire [2 :0] vfdsu_ex2_rm; +wire vfdsu_ex2_single; +wire vfdsu_ex2_sqrt; +wire vfdsu_ex2_srt_skip; +wire [12:0] vfdsu_ex3_doub_expnt_rst; +wire vfdsu_ex3_double; +wire vfdsu_ex3_dz; +wire [12:0] vfdsu_ex3_half_expnt_rst; +wire vfdsu_ex3_id_srt_skip; +wire vfdsu_ex3_nv; +wire vfdsu_ex3_of; +wire vfdsu_ex3_potnt_of; +wire vfdsu_ex3_potnt_uf; +wire [51:0] vfdsu_ex3_qnan_f; +wire vfdsu_ex3_qnan_sign; +wire vfdsu_ex3_rem_sign; +wire vfdsu_ex3_rem_zero; +wire [52:0] vfdsu_ex3_result_denorm_round_add_num; +wire vfdsu_ex3_result_inf; +wire vfdsu_ex3_result_lfn; +wire vfdsu_ex3_result_qnan; +wire vfdsu_ex3_result_sign; +wire vfdsu_ex3_result_zero; +wire [2 :0] vfdsu_ex3_rm; +wire vfdsu_ex3_rslt_denorm; +wire [8 :0] vfdsu_ex3_sing_expnt_rst; +wire vfdsu_ex3_single; +wire vfdsu_ex3_uf; +wire vfdsu_ex4_denorm_to_tiny_frac; +wire vfdsu_ex4_double; +wire vfdsu_ex4_dz; +wire [12:0] vfdsu_ex4_expnt_rst; +wire [54:0] vfdsu_ex4_frac; +wire vfdsu_ex4_nv; +wire vfdsu_ex4_nx; +wire vfdsu_ex4_of; +wire vfdsu_ex4_of_rst_lfn; +wire [1 :0] vfdsu_ex4_potnt_norm; +wire vfdsu_ex4_potnt_of; +wire vfdsu_ex4_potnt_uf; +wire [51:0] vfdsu_ex4_qnan_f; +wire vfdsu_ex4_qnan_sign; +wire vfdsu_ex4_result_inf; +wire vfdsu_ex4_result_lfn; +wire vfdsu_ex4_result_nor; +wire vfdsu_ex4_result_qnan; +wire vfdsu_ex4_result_sign; +wire vfdsu_ex4_result_zero; +wire vfdsu_ex4_rslt_denorm; +wire vfdsu_ex4_single; +wire vfdsu_ex4_uf; +wire vfpu_yy_xx_dqnan; +wire [2 :0] vfpu_yy_xx_rm; + + +// &Instance("ct_vfdsu_prepare"); @28 +ct_vfdsu_prepare x_ct_vfdsu_prepare ( + .cp0_vfpu_icg_en (cp0_vfpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .cpurst_b (cpurst_b ), + .ex1_div (ex1_div ), + .ex1_divisor (ex1_divisor ), + .ex1_double (ex1_double ), + .ex1_pipedown (ex1_pipedown ), + .ex1_remainder (ex1_remainder ), + .ex1_scalar (ex1_scalar ), + .ex1_single (ex1_single ), + .ex1_sqrt (ex1_sqrt ), + .ex1_src0 (ex1_src0 ), + .ex1_src1 (ex1_src1 ), + .ex1_static_rm (ex1_static_rm ), + .forever_cpuclk (forever_cpuclk ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .vfdsu_ex2_div (vfdsu_ex2_div ), + .vfdsu_ex2_double (vfdsu_ex2_double ), + .vfdsu_ex2_dz (vfdsu_ex2_dz ), + .vfdsu_ex2_expnt_add0 (vfdsu_ex2_expnt_add0 ), + .vfdsu_ex2_expnt_add1 (vfdsu_ex2_expnt_add1 ), + .vfdsu_ex2_nv (vfdsu_ex2_nv ), + .vfdsu_ex2_of_rm_lfn (vfdsu_ex2_of_rm_lfn ), + .vfdsu_ex2_op0_norm (vfdsu_ex2_op0_norm ), + .vfdsu_ex2_op1_norm (vfdsu_ex2_op1_norm ), + .vfdsu_ex2_qnan_f (vfdsu_ex2_qnan_f ), + .vfdsu_ex2_qnan_sign (vfdsu_ex2_qnan_sign ), + .vfdsu_ex2_result_inf (vfdsu_ex2_result_inf ), + .vfdsu_ex2_result_qnan (vfdsu_ex2_result_qnan), + .vfdsu_ex2_result_sign (vfdsu_ex2_result_sign), + .vfdsu_ex2_result_zero (vfdsu_ex2_result_zero), + .vfdsu_ex2_rm (vfdsu_ex2_rm ), + .vfdsu_ex2_single (vfdsu_ex2_single ), + .vfdsu_ex2_sqrt (vfdsu_ex2_sqrt ), + .vfdsu_ex2_srt_skip (vfdsu_ex2_srt_skip ), + .vfpu_yy_xx_dqnan (vfpu_yy_xx_dqnan ), + .vfpu_yy_xx_rm (vfpu_yy_xx_rm ) +); + +// &Instance("ct_vfdsu_srt"); @29 +ct_vfdsu_srt x_ct_vfdsu_srt ( + .cp0_vfpu_icg_en (cp0_vfpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .cpurst_b (cpurst_b ), + .ex1_div (ex1_div ), + .ex1_divisor (ex1_divisor ), + .ex1_pipedown (ex1_pipedown ), + .ex1_remainder (ex1_remainder ), + .ex1_sqrt (ex1_sqrt ), + .ex2_pipedown (ex2_pipedown ), + .ex2_srt_first_round (ex2_srt_first_round ), + .forever_cpuclk (forever_cpuclk ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .srt_ctrl_rem_zero (srt_ctrl_rem_zero ), + .srt_ctrl_skip_srt (srt_ctrl_skip_srt ), + .srt_secd_round (srt_secd_round ), + .srt_sm_on (srt_sm_on ), + .total_qt_rt_58 (total_qt_rt_58 ), + .vfdsu_ex2_div (vfdsu_ex2_div ), + .vfdsu_ex2_double (vfdsu_ex2_double ), + .vfdsu_ex2_dz (vfdsu_ex2_dz ), + .vfdsu_ex2_expnt_add0 (vfdsu_ex2_expnt_add0 ), + .vfdsu_ex2_expnt_add1 (vfdsu_ex2_expnt_add1 ), + .vfdsu_ex2_nv (vfdsu_ex2_nv ), + .vfdsu_ex2_of_rm_lfn (vfdsu_ex2_of_rm_lfn ), + .vfdsu_ex2_op0_norm (vfdsu_ex2_op0_norm ), + .vfdsu_ex2_op1_norm (vfdsu_ex2_op1_norm ), + .vfdsu_ex2_qnan_f (vfdsu_ex2_qnan_f ), + .vfdsu_ex2_qnan_sign (vfdsu_ex2_qnan_sign ), + .vfdsu_ex2_result_inf (vfdsu_ex2_result_inf ), + .vfdsu_ex2_result_qnan (vfdsu_ex2_result_qnan ), + .vfdsu_ex2_result_sign (vfdsu_ex2_result_sign ), + .vfdsu_ex2_result_zero (vfdsu_ex2_result_zero ), + .vfdsu_ex2_rm (vfdsu_ex2_rm ), + .vfdsu_ex2_single (vfdsu_ex2_single ), + .vfdsu_ex2_sqrt (vfdsu_ex2_sqrt ), + .vfdsu_ex2_srt_skip (vfdsu_ex2_srt_skip ), + .vfdsu_ex3_doub_expnt_rst (vfdsu_ex3_doub_expnt_rst ), + .vfdsu_ex3_double (vfdsu_ex3_double ), + .vfdsu_ex3_dz (vfdsu_ex3_dz ), + .vfdsu_ex3_half_expnt_rst (vfdsu_ex3_half_expnt_rst ), + .vfdsu_ex3_id_srt_skip (vfdsu_ex3_id_srt_skip ), + .vfdsu_ex3_nv (vfdsu_ex3_nv ), + .vfdsu_ex3_of (vfdsu_ex3_of ), + .vfdsu_ex3_potnt_of (vfdsu_ex3_potnt_of ), + .vfdsu_ex3_potnt_uf (vfdsu_ex3_potnt_uf ), + .vfdsu_ex3_qnan_f (vfdsu_ex3_qnan_f ), + .vfdsu_ex3_qnan_sign (vfdsu_ex3_qnan_sign ), + .vfdsu_ex3_rem_sign (vfdsu_ex3_rem_sign ), + .vfdsu_ex3_rem_zero (vfdsu_ex3_rem_zero ), + .vfdsu_ex3_result_denorm_round_add_num (vfdsu_ex3_result_denorm_round_add_num), + .vfdsu_ex3_result_inf (vfdsu_ex3_result_inf ), + .vfdsu_ex3_result_lfn (vfdsu_ex3_result_lfn ), + .vfdsu_ex3_result_qnan (vfdsu_ex3_result_qnan ), + .vfdsu_ex3_result_sign (vfdsu_ex3_result_sign ), + .vfdsu_ex3_result_zero (vfdsu_ex3_result_zero ), + .vfdsu_ex3_rm (vfdsu_ex3_rm ), + .vfdsu_ex3_rslt_denorm (vfdsu_ex3_rslt_denorm ), + .vfdsu_ex3_sing_expnt_rst (vfdsu_ex3_sing_expnt_rst ), + .vfdsu_ex3_single (vfdsu_ex3_single ), + .vfdsu_ex3_uf (vfdsu_ex3_uf ) +); + +// &Instance("ct_vfdsu_round"); @30 +ct_vfdsu_round x_ct_vfdsu_round ( + .cp0_vfpu_icg_en (cp0_vfpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .cpurst_b (cpurst_b ), + .ex3_pipedown (ex3_pipedown ), + .forever_cpuclk (forever_cpuclk ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .total_qt_rt_58 (total_qt_rt_58 ), + .vfdsu_ex2_of_rm_lfn (vfdsu_ex2_of_rm_lfn ), + .vfdsu_ex3_doub_expnt_rst (vfdsu_ex3_doub_expnt_rst ), + .vfdsu_ex3_double (vfdsu_ex3_double ), + .vfdsu_ex3_dz (vfdsu_ex3_dz ), + .vfdsu_ex3_half_expnt_rst (vfdsu_ex3_half_expnt_rst ), + .vfdsu_ex3_id_srt_skip (vfdsu_ex3_id_srt_skip ), + .vfdsu_ex3_nv (vfdsu_ex3_nv ), + .vfdsu_ex3_of (vfdsu_ex3_of ), + .vfdsu_ex3_potnt_of (vfdsu_ex3_potnt_of ), + .vfdsu_ex3_potnt_uf (vfdsu_ex3_potnt_uf ), + .vfdsu_ex3_qnan_f (vfdsu_ex3_qnan_f ), + .vfdsu_ex3_qnan_sign (vfdsu_ex3_qnan_sign ), + .vfdsu_ex3_rem_sign (vfdsu_ex3_rem_sign ), + .vfdsu_ex3_rem_zero (vfdsu_ex3_rem_zero ), + .vfdsu_ex3_result_denorm_round_add_num (vfdsu_ex3_result_denorm_round_add_num), + .vfdsu_ex3_result_inf (vfdsu_ex3_result_inf ), + .vfdsu_ex3_result_lfn (vfdsu_ex3_result_lfn ), + .vfdsu_ex3_result_qnan (vfdsu_ex3_result_qnan ), + .vfdsu_ex3_result_sign (vfdsu_ex3_result_sign ), + .vfdsu_ex3_result_zero (vfdsu_ex3_result_zero ), + .vfdsu_ex3_rm (vfdsu_ex3_rm ), + .vfdsu_ex3_rslt_denorm (vfdsu_ex3_rslt_denorm ), + .vfdsu_ex3_sing_expnt_rst (vfdsu_ex3_sing_expnt_rst ), + .vfdsu_ex3_single (vfdsu_ex3_single ), + .vfdsu_ex3_uf (vfdsu_ex3_uf ), + .vfdsu_ex4_denorm_to_tiny_frac (vfdsu_ex4_denorm_to_tiny_frac ), + .vfdsu_ex4_double (vfdsu_ex4_double ), + .vfdsu_ex4_dz (vfdsu_ex4_dz ), + .vfdsu_ex4_expnt_rst (vfdsu_ex4_expnt_rst ), + .vfdsu_ex4_frac (vfdsu_ex4_frac ), + .vfdsu_ex4_nv (vfdsu_ex4_nv ), + .vfdsu_ex4_nx (vfdsu_ex4_nx ), + .vfdsu_ex4_of (vfdsu_ex4_of ), + .vfdsu_ex4_of_rst_lfn (vfdsu_ex4_of_rst_lfn ), + .vfdsu_ex4_potnt_norm (vfdsu_ex4_potnt_norm ), + .vfdsu_ex4_potnt_of (vfdsu_ex4_potnt_of ), + .vfdsu_ex4_potnt_uf (vfdsu_ex4_potnt_uf ), + .vfdsu_ex4_qnan_f (vfdsu_ex4_qnan_f ), + .vfdsu_ex4_qnan_sign (vfdsu_ex4_qnan_sign ), + .vfdsu_ex4_result_inf (vfdsu_ex4_result_inf ), + .vfdsu_ex4_result_lfn (vfdsu_ex4_result_lfn ), + .vfdsu_ex4_result_nor (vfdsu_ex4_result_nor ), + .vfdsu_ex4_result_qnan (vfdsu_ex4_result_qnan ), + .vfdsu_ex4_result_sign (vfdsu_ex4_result_sign ), + .vfdsu_ex4_result_zero (vfdsu_ex4_result_zero ), + .vfdsu_ex4_rslt_denorm (vfdsu_ex4_rslt_denorm ), + .vfdsu_ex4_single (vfdsu_ex4_single ), + .vfdsu_ex4_uf (vfdsu_ex4_uf ) +); + +// &Instance("ct_vfdsu_pack"); @31 +ct_vfdsu_pack x_ct_vfdsu_pack ( + .ex4_out_expt (ex4_out_expt ), + .ex4_out_result (ex4_out_result ), + .vfdsu_ex4_denorm_to_tiny_frac (vfdsu_ex4_denorm_to_tiny_frac), + .vfdsu_ex4_double (vfdsu_ex4_double ), + .vfdsu_ex4_dz (vfdsu_ex4_dz ), + .vfdsu_ex4_expnt_rst (vfdsu_ex4_expnt_rst ), + .vfdsu_ex4_frac (vfdsu_ex4_frac ), + .vfdsu_ex4_nv (vfdsu_ex4_nv ), + .vfdsu_ex4_nx (vfdsu_ex4_nx ), + .vfdsu_ex4_of (vfdsu_ex4_of ), + .vfdsu_ex4_of_rst_lfn (vfdsu_ex4_of_rst_lfn ), + .vfdsu_ex4_potnt_norm (vfdsu_ex4_potnt_norm ), + .vfdsu_ex4_potnt_of (vfdsu_ex4_potnt_of ), + .vfdsu_ex4_potnt_uf (vfdsu_ex4_potnt_uf ), + .vfdsu_ex4_qnan_f (vfdsu_ex4_qnan_f ), + .vfdsu_ex4_qnan_sign (vfdsu_ex4_qnan_sign ), + .vfdsu_ex4_result_inf (vfdsu_ex4_result_inf ), + .vfdsu_ex4_result_lfn (vfdsu_ex4_result_lfn ), + .vfdsu_ex4_result_nor (vfdsu_ex4_result_nor ), + .vfdsu_ex4_result_qnan (vfdsu_ex4_result_qnan ), + .vfdsu_ex4_result_sign (vfdsu_ex4_result_sign ), + .vfdsu_ex4_result_zero (vfdsu_ex4_result_zero ), + .vfdsu_ex4_rslt_denorm (vfdsu_ex4_rslt_denorm ), + .vfdsu_ex4_single (vfdsu_ex4_single ), + .vfdsu_ex4_uf (vfdsu_ex4_uf ) +); + + + +// &ModuleEnd; @34 +endmodule + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ff1.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ff1.v new file mode 100644 index 00000000..c6d2e867 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ff1.v @@ -0,0 +1,99 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +module ct_vfdsu_ff1( + fanc_shift_num, + frac_bin_val, + frac_num +); + +// &Ports; @22 +input [51:0] frac_num; +output [51:0] fanc_shift_num; +output [12:0] frac_bin_val; + +// &Regs; @23 +reg [51:0] fanc_shift_num; +reg [12:0] frac_bin_val; + +// &Wires; @24 +wire [51:0] frac_num; + + +// &CombBeg; @26 +always @( frac_num[51:0]) +begin +casez(frac_num[51:0]) + 52'b1???????????????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h0; fanc_shift_num[51:0] = frac_num[51:0]; end + 52'b01??????????????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1fff; fanc_shift_num[51:0] = {frac_num[50:0],1'b0};end + 52'b001?????????????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ffe; fanc_shift_num[51:0] = {frac_num[49:0],2'b0};end + 52'b0001????????????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ffd; fanc_shift_num[51:0] = {frac_num[48:0],3'b0};end + 52'b00001???????????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ffc; fanc_shift_num[51:0] = {frac_num[47:0],4'b0};end + 52'b000001??????????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ffb; fanc_shift_num[51:0] = {frac_num[46:0],5'b0};end + 52'b0000001?????????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ffa; fanc_shift_num[51:0] = {frac_num[45:0],6'b0};end + 52'b00000001????????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff9; fanc_shift_num[51:0] = {frac_num[44:0],7'b0};end + 52'b000000001???????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff8; fanc_shift_num[51:0] = {frac_num[43:0],8'b0};end + 52'b0000000001??????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff7; fanc_shift_num[51:0] = {frac_num[42:0],9'b0};end + 52'b00000000001?????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff6; fanc_shift_num[51:0] = {frac_num[41:0],10'b0};end + 52'b000000000001????????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff5; fanc_shift_num[51:0] = {frac_num[40:0],11'b0};end + 52'b0000000000001???????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff4; fanc_shift_num[51:0] = {frac_num[39:0],12'b0};end + 52'b00000000000001??????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff3; fanc_shift_num[51:0] = {frac_num[38:0],13'b0};end + 52'b000000000000001?????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff2; fanc_shift_num[51:0] = {frac_num[37:0],14'b0};end + 52'b0000000000000001????????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff1; fanc_shift_num[51:0] = {frac_num[36:0],15'b0};end + 52'b00000000000000001???????????????????????????????????: begin frac_bin_val[12:0] = 13'h1ff0; fanc_shift_num[51:0] = {frac_num[35:0],16'b0};end + 52'b000000000000000001??????????????????????????????????: begin frac_bin_val[12:0] = 13'h1fef; fanc_shift_num[51:0] = {frac_num[34:0],17'b0};end + 52'b0000000000000000001?????????????????????????????????: begin frac_bin_val[12:0] = 13'h1fee; fanc_shift_num[51:0] = {frac_num[33:0],18'b0};end + 52'b00000000000000000001????????????????????????????????: begin frac_bin_val[12:0] = 13'h1fed; fanc_shift_num[51:0] = {frac_num[32:0],19'b0};end + 52'b000000000000000000001???????????????????????????????: begin frac_bin_val[12:0] = 13'h1fec; fanc_shift_num[51:0] = {frac_num[31:0],20'b0};end + 52'b0000000000000000000001??????????????????????????????: begin frac_bin_val[12:0] = 13'h1feb; fanc_shift_num[51:0] = {frac_num[30:0],21'b0};end + 52'b00000000000000000000001?????????????????????????????: begin frac_bin_val[12:0] = 13'h1fea; fanc_shift_num[51:0] = {frac_num[29:0],22'b0};end + 52'b000000000000000000000001????????????????????????????: begin frac_bin_val[12:0] = 13'h1fe9; fanc_shift_num[51:0] = {frac_num[28:0],23'b0};end + 52'b0000000000000000000000001???????????????????????????: begin frac_bin_val[12:0] = 13'h1fe8; fanc_shift_num[51:0] = {frac_num[27:0],24'b0};end + 52'b00000000000000000000000001??????????????????????????: begin frac_bin_val[12:0] = 13'h1fe7; fanc_shift_num[51:0] = {frac_num[26:0],25'b0};end + 52'b000000000000000000000000001?????????????????????????: begin frac_bin_val[12:0] = 13'h1fe6; fanc_shift_num[51:0] = {frac_num[25:0],26'b0};end + 52'b0000000000000000000000000001????????????????????????: begin frac_bin_val[12:0] = 13'h1fe5; fanc_shift_num[51:0] = {frac_num[24:0],27'b0};end + 52'b00000000000000000000000000001???????????????????????: begin frac_bin_val[12:0] = 13'h1fe4; fanc_shift_num[51:0] = {frac_num[23:0],28'b0};end + 52'b000000000000000000000000000001??????????????????????: begin frac_bin_val[12:0] = 13'h1fe3; fanc_shift_num[51:0] = {frac_num[22:0],29'b0};end + 52'b0000000000000000000000000000001?????????????????????: begin frac_bin_val[12:0] = 13'h1fe2; fanc_shift_num[51:0] = {frac_num[21:0],30'b0};end + 52'b00000000000000000000000000000001????????????????????: begin frac_bin_val[12:0] = 13'h1fe1; fanc_shift_num[51:0] = {frac_num[20:0],31'b0};end + 52'b000000000000000000000000000000001???????????????????: begin frac_bin_val[12:0] = 13'h1fe0; fanc_shift_num[51:0] = {frac_num[19:0],32'b0};end + 52'b0000000000000000000000000000000001??????????????????: begin frac_bin_val[12:0] = 13'h1fdf; fanc_shift_num[51:0] = {frac_num[18:0],33'b0};end + 52'b00000000000000000000000000000000001?????????????????: begin frac_bin_val[12:0] = 13'h1fde; fanc_shift_num[51:0] = {frac_num[17:0],34'b0};end + 52'b000000000000000000000000000000000001????????????????: begin frac_bin_val[12:0] = 13'h1fdd; fanc_shift_num[51:0] = {frac_num[16:0],35'b0};end + 52'b0000000000000000000000000000000000001???????????????: begin frac_bin_val[12:0] = 13'h1fdc; fanc_shift_num[51:0] = {frac_num[15:0],36'b0};end + 52'b00000000000000000000000000000000000001??????????????: begin frac_bin_val[12:0] = 13'h1fdb; fanc_shift_num[51:0] = {frac_num[14:0],37'b0};end + 52'b000000000000000000000000000000000000001?????????????: begin frac_bin_val[12:0] = 13'h1fda; fanc_shift_num[51:0] = {frac_num[13:0],38'b0};end + 52'b0000000000000000000000000000000000000001????????????: begin frac_bin_val[12:0] = 13'h1fd9; fanc_shift_num[51:0] = {frac_num[12:0],39'b0};end + 52'b00000000000000000000000000000000000000001???????????: begin frac_bin_val[12:0] = 13'h1fd8; fanc_shift_num[51:0] = {frac_num[11:0],40'b0};end + 52'b000000000000000000000000000000000000000001??????????: begin frac_bin_val[12:0] = 13'h1fd7; fanc_shift_num[51:0] = {frac_num[10:0],41'b0};end + 52'b0000000000000000000000000000000000000000001?????????: begin frac_bin_val[12:0] = 13'h1fd6; fanc_shift_num[51:0] = {frac_num[9:0],42'b0};end + 52'b00000000000000000000000000000000000000000001????????: begin frac_bin_val[12:0] = 13'h1fd5; fanc_shift_num[51:0] = {frac_num[8:0],43'b0};end + 52'b000000000000000000000000000000000000000000001???????: begin frac_bin_val[12:0] = 13'h1fd4; fanc_shift_num[51:0] = {frac_num[7:0],44'b0};end + 52'b0000000000000000000000000000000000000000000001??????: begin frac_bin_val[12:0] = 13'h1fd3; fanc_shift_num[51:0] = {frac_num[6:0],45'b0};end + 52'b00000000000000000000000000000000000000000000001?????: begin frac_bin_val[12:0] = 13'h1fd2; fanc_shift_num[51:0] = {frac_num[5:0],46'b0};end + 52'b000000000000000000000000000000000000000000000001????: begin frac_bin_val[12:0] = 13'h1fd1; fanc_shift_num[51:0] = {frac_num[4:0],47'b0};end + 52'b0000000000000000000000000000000000000000000000001???: begin frac_bin_val[12:0] = 13'h1fd0; fanc_shift_num[51:0] = {frac_num[3:0],48'b0};end + 52'b00000000000000000000000000000000000000000000000001??: begin frac_bin_val[12:0] = 13'h1fcf; fanc_shift_num[51:0] = {frac_num[2:0],49'b0};end + 52'b000000000000000000000000000000000000000000000000001?: begin frac_bin_val[12:0] = 13'h1fce; fanc_shift_num[51:0] = {frac_num[1:0],50'b0};end + 52'b0000000000000000000000000000000000000000000000000001: begin frac_bin_val[12:0] = 13'h1fcd; fanc_shift_num[51:0] = {frac_num[0:0],51'b0};end + 52'b0000000000000000000000000000000000000000000000000000: begin frac_bin_val[12:0] = 13'h1fcc; fanc_shift_num[51:0] = {52'b0};end + default:begin frac_bin_val[12:0] = 13'h000; fanc_shift_num[51:0] = {52'b0};end +endcase +// &CombEnd; @83 +end + +// &ModuleEnd; @85 +endmodule + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v new file mode 100644 index 00000000..e1d2e18a --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v @@ -0,0 +1,417 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @22 +module ct_vfdsu_pack( + ex4_out_expt, + ex4_out_result, + vfdsu_ex4_denorm_to_tiny_frac, + vfdsu_ex4_double, + vfdsu_ex4_dz, + vfdsu_ex4_expnt_rst, + vfdsu_ex4_frac, + vfdsu_ex4_nv, + vfdsu_ex4_nx, + vfdsu_ex4_of, + vfdsu_ex4_of_rst_lfn, + vfdsu_ex4_potnt_norm, + vfdsu_ex4_potnt_of, + vfdsu_ex4_potnt_uf, + vfdsu_ex4_qnan_f, + vfdsu_ex4_qnan_sign, + vfdsu_ex4_result_inf, + vfdsu_ex4_result_lfn, + vfdsu_ex4_result_nor, + vfdsu_ex4_result_qnan, + vfdsu_ex4_result_sign, + vfdsu_ex4_result_zero, + vfdsu_ex4_rslt_denorm, + vfdsu_ex4_single, + vfdsu_ex4_uf +); + +// &Ports; @23 +input vfdsu_ex4_denorm_to_tiny_frac; +input vfdsu_ex4_double; +input vfdsu_ex4_dz; +input [12:0] vfdsu_ex4_expnt_rst; +input [54:0] vfdsu_ex4_frac; +input vfdsu_ex4_nv; +input vfdsu_ex4_nx; +input vfdsu_ex4_of; +input vfdsu_ex4_of_rst_lfn; +input [1 :0] vfdsu_ex4_potnt_norm; +input vfdsu_ex4_potnt_of; +input vfdsu_ex4_potnt_uf; +input [51:0] vfdsu_ex4_qnan_f; +input vfdsu_ex4_qnan_sign; +input vfdsu_ex4_result_inf; +input vfdsu_ex4_result_lfn; +input vfdsu_ex4_result_nor; +input vfdsu_ex4_result_qnan; +input vfdsu_ex4_result_sign; +input vfdsu_ex4_result_zero; +input vfdsu_ex4_rslt_denorm; +input vfdsu_ex4_single; +input vfdsu_ex4_uf; +output [4 :0] ex4_out_expt; +output [63:0] ex4_out_result; + +// &Regs; @24 +reg [51:0] ex4_denorm_frac; +reg [51:0] ex4_frac_52; +reg [51:0] ex4_half_denorm_frac; +reg [63:0] ex4_out_result; +reg [51:0] ex4_single_denorm_frac; +reg [12:0] expnt_add_op1; + +// &Wires; @25 +wire ex4_cor_nx; +wire ex4_cor_uf; +wire ex4_denorm_potnt_norm; +wire [63:0] ex4_denorm_result; +wire [63:0] ex4_doub_lfn; +wire [63:0] ex4_doub_rst0; +wire [63:0] ex4_doub_rst_inf; +wire [63:0] ex4_doub_rst_norm; +wire [63:0] ex4_doub_rst_qnan; +wire [12:0] ex4_expnt_rst; +wire ex4_final_rst_norm; +wire [54:0] ex4_frac; +wire [63:0] ex4_half_lfn; +wire [63:0] ex4_half_rst0; +wire [63:0] ex4_half_rst_inf; +wire [63:0] ex4_half_rst_norm; +wire [63:0] ex4_half_rst_qnan; +wire ex4_of_plus; +wire [4 :0] ex4_out_expt; +wire ex4_result_inf; +wire ex4_result_lfn; +wire ex4_rslt_denorm; +wire [63:0] ex4_rst0; +wire [63:0] ex4_rst_inf; +wire [63:0] ex4_rst_lfn; +wire ex4_rst_nor; +wire [63:0] ex4_rst_norm; +wire [63:0] ex4_rst_qnan; +wire [63:0] ex4_sing_lfn; +wire [63:0] ex4_sing_rst0; +wire [63:0] ex4_sing_rst_inf; +wire [63:0] ex4_sing_rst_norm; +wire [63:0] ex4_sing_rst_qnan; +wire ex4_uf_plus; +wire vfdsu_ex4_denorm_to_tiny_frac; +wire vfdsu_ex4_double; +wire vfdsu_ex4_dz; +wire [12:0] vfdsu_ex4_expnt_rst; +wire [54:0] vfdsu_ex4_frac; +wire vfdsu_ex4_nv; +wire vfdsu_ex4_nx; +wire vfdsu_ex4_of; +wire vfdsu_ex4_of_rst_lfn; +wire [1 :0] vfdsu_ex4_potnt_norm; +wire vfdsu_ex4_potnt_of; +wire vfdsu_ex4_potnt_uf; +wire [51:0] vfdsu_ex4_qnan_f; +wire vfdsu_ex4_qnan_sign; +wire vfdsu_ex4_result_inf; +wire vfdsu_ex4_result_lfn; +wire vfdsu_ex4_result_nor; +wire vfdsu_ex4_result_qnan; +wire vfdsu_ex4_result_sign; +wire vfdsu_ex4_result_zero; +wire vfdsu_ex4_rslt_denorm; +wire vfdsu_ex4_single; +wire vfdsu_ex4_uf; + + +//============================EX4 STAGE===================== +assign ex4_frac[54:0] = vfdsu_ex4_frac[54:0]; +//exponent adder +// &CombBeg; @30 +always @( ex4_frac[54:53]) +begin +casez(ex4_frac[54:53]) + 2'b00 : expnt_add_op1[12:0] = 13'h0fff; //the expnt sub 1 + 2'b01 : expnt_add_op1[12:0] = 13'h0; //the expnt stay the origi + 2'b1? : expnt_add_op1[12:0] = 13'h1; // the exptn add 1 + default : expnt_add_op1[12:0] = 13'b0; +endcase +// &CombEnd; @37 +end +assign ex4_expnt_rst[12:0] = vfdsu_ex4_expnt_rst[12:0] + + expnt_add_op1[12:0]; + +//==========================Result Pack===================== + +// result denormal pack +// shift to the denormal number +// &CombBeg; @45 +always @( vfdsu_ex4_expnt_rst[12:0] + or ex4_frac[54:1] + or vfdsu_ex4_denorm_to_tiny_frac) +begin +case(vfdsu_ex4_expnt_rst[12:0]) + 13'h1: ex4_denorm_frac[51:0] = { ex4_frac[52:1]}; //-1022 1 + 13'h0: ex4_denorm_frac[51:0] = { ex4_frac[53:2]}; //-1023 0 + 13'h1fff:ex4_denorm_frac[51:0] = { ex4_frac[54:3]}; //-1024 -1 + 13'h1ffe:ex4_denorm_frac[51:0] = {1'b0, ex4_frac[54:4]}; //-1025 -2 + 13'h1ffd:ex4_denorm_frac[51:0] = {2'b0, ex4_frac[54:5]}; //-1026 -3 + 13'h1ffc:ex4_denorm_frac[51:0] = {3'b0, ex4_frac[54:6]}; //-1027 -4 + 13'h1ffb:ex4_denorm_frac[51:0] = {4'b0, ex4_frac[54:7]}; //-1028 -5 + 13'h1ffa:ex4_denorm_frac[51:0] = {5'b0, ex4_frac[54:8]}; //-1029 -6 + 13'h1ff9:ex4_denorm_frac[51:0] = {6'b0, ex4_frac[54:9]}; //-1030 -7 + 13'h1ff8:ex4_denorm_frac[51:0] = {7'b0, ex4_frac[54:10]}; //-1031 -8 + 13'h1ff7:ex4_denorm_frac[51:0] = {8'b0, ex4_frac[54:11]}; //-1032 -9 + 13'h1ff6:ex4_denorm_frac[51:0] = {9'b0, ex4_frac[54:12]}; //-1033 -10 + 13'h1ff5:ex4_denorm_frac[51:0] = {10'b0,ex4_frac[54:13]}; //-1034 -11 + 13'h1ff4:ex4_denorm_frac[51:0] = {11'b0,ex4_frac[54:14]}; //-1035 -12 + 13'h1ff3:ex4_denorm_frac[51:0] = {12'b0,ex4_frac[54:15]}; //-1036 -13 + 13'h1ff2:ex4_denorm_frac[51:0] = {13'b0,ex4_frac[54:16]}; // -1037 + 13'h1ff1:ex4_denorm_frac[51:0] = {14'b0,ex4_frac[54:17]}; //-1038 + 13'h1ff0:ex4_denorm_frac[51:0] = {15'b0,ex4_frac[54:18]}; //-1039 + 13'h1fef:ex4_denorm_frac[51:0] = {16'b0,ex4_frac[54:19]}; //-1040 + 13'h1fee:ex4_denorm_frac[51:0] = {17'b0,ex4_frac[54:20]}; //-1041 + 13'h1fed:ex4_denorm_frac[51:0] = {18'b0,ex4_frac[54:21]}; //-1042 + 13'h1fec:ex4_denorm_frac[51:0] = {19'b0,ex4_frac[54:22]}; //-1043 + 13'h1feb:ex4_denorm_frac[51:0] = {20'b0,ex4_frac[54:23]}; //-1044 + 13'h1fea:ex4_denorm_frac[51:0] = {21'b0,ex4_frac[54:24]}; //-1045 + 13'h1fe9:ex4_denorm_frac[51:0] = {22'b0,ex4_frac[54:25]}; //-1046 + 13'h1fe8:ex4_denorm_frac[51:0] = {23'b0,ex4_frac[54:26]}; //-1047 + 13'h1fe7:ex4_denorm_frac[51:0] = {24'b0,ex4_frac[54:27]}; //-1048 + 13'h1fe6:ex4_denorm_frac[51:0] = {25'b0,ex4_frac[54:28]}; //-1049 + 13'h1fe5:ex4_denorm_frac[51:0] = {26'b0,ex4_frac[54:29]}; //-1050 + 13'h1fe4:ex4_denorm_frac[51:0] = {27'b0,ex4_frac[54:30]}; //-1056 + 13'h1fe3:ex4_denorm_frac[51:0] = {28'b0,ex4_frac[54:31]}; //-1052 + 13'h1fe2:ex4_denorm_frac[51:0] = {29'b0,ex4_frac[54:32]}; //-1053 + 13'h1fe1:ex4_denorm_frac[51:0] = {30'b0,ex4_frac[54:33]}; //-1054 + 13'h1fe0:ex4_denorm_frac[51:0] = {31'b0,ex4_frac[54:34]}; //-1055 + 13'h1fdf:ex4_denorm_frac[51:0] = {32'b0,ex4_frac[54:35]}; //-1056 + 13'h1fde:ex4_denorm_frac[51:0] = {33'b0,ex4_frac[54:36]}; //-1057 + 13'h1fdd:ex4_denorm_frac[51:0] = {34'b0,ex4_frac[54:37]}; //-1058 + 13'h1fdc:ex4_denorm_frac[51:0] = {35'b0,ex4_frac[54:38]}; //-1059 + 13'h1fdb:ex4_denorm_frac[51:0] = {36'b0,ex4_frac[54:39]}; //-1060 + 13'h1fda:ex4_denorm_frac[51:0] = {37'b0,ex4_frac[54:40]}; //-1061 + 13'h1fd9:ex4_denorm_frac[51:0] = {38'b0,ex4_frac[54:41]}; //-1062 + 13'h1fd8:ex4_denorm_frac[51:0] = {39'b0,ex4_frac[54:42]}; //-1063 + 13'h1fd7:ex4_denorm_frac[51:0] = {40'b0,ex4_frac[54:43]}; //-1064 + 13'h1fd6:ex4_denorm_frac[51:0] = {41'b0,ex4_frac[54:44]}; //-1065 + 13'h1fd5:ex4_denorm_frac[51:0] = {42'b0,ex4_frac[54:45]}; //-1066 + 13'h1fd4:ex4_denorm_frac[51:0] = {43'b0,ex4_frac[54:46]}; //-1067 + 13'h1fd3:ex4_denorm_frac[51:0] = {44'b0,ex4_frac[54:47]}; //-1068 + 13'h1fd2:ex4_denorm_frac[51:0] = {45'b0,ex4_frac[54:48]}; //-1069 + 13'h1fd1:ex4_denorm_frac[51:0] = {46'b0,ex4_frac[54:49]}; //-1070 + 13'h1fd0:ex4_denorm_frac[51:0] = {47'b0,ex4_frac[54:50]}; //-1071 + 13'h1fcf:ex4_denorm_frac[51:0] = {48'b0,ex4_frac[54:51]}; //-1072 + 13'h1fce:ex4_denorm_frac[51:0] = {49'b0,ex4_frac[54:52]}; //-1073 + 13'h1fcd:ex4_denorm_frac[51:0] = {50'b0,ex4_frac[54:53]}; //-1074 + default: ex4_denorm_frac[51:0] = vfdsu_ex4_denorm_to_tiny_frac ? 52'b1 : 52'b0; +endcase +// &CombEnd; @102 +end +// &CombBeg; @103 +always @( vfdsu_ex4_expnt_rst[12:0] + or ex4_frac[54:1] + or vfdsu_ex4_denorm_to_tiny_frac) +begin +case(vfdsu_ex4_expnt_rst[12:0]) + 13'h1: ex4_single_denorm_frac[51:0] = { ex4_frac[52:1]}; //-1022 1 + 13'h0: ex4_single_denorm_frac[51:0] = { ex4_frac[53:2]}; //-1023 0 + 13'h1fff:ex4_single_denorm_frac[51:0] = { ex4_frac[54:3]}; //-1024 -1 + 13'h1ffe:ex4_single_denorm_frac[51:0] = {1'b0, ex4_frac[54:4]}; //-1025 -2 + 13'h1ffd:ex4_single_denorm_frac[51:0] = {2'b0, ex4_frac[54:5]}; //-1026 -3 + 13'h1ffc:ex4_single_denorm_frac[51:0] = {3'b0, ex4_frac[54:6]}; //-1027 -4 + 13'h1ffb:ex4_single_denorm_frac[51:0] = {4'b0, ex4_frac[54:7]}; //-1028 -5 + 13'h1ffa:ex4_single_denorm_frac[51:0] = {5'b0, ex4_frac[54:8]}; //-1029 -6 + 13'h1ff9:ex4_single_denorm_frac[51:0] = {6'b0, ex4_frac[54:9]}; //-1030 -7 + 13'h1ff8:ex4_single_denorm_frac[51:0] = {7'b0, ex4_frac[54:10]}; //-1031 -8 + 13'h1ff7:ex4_single_denorm_frac[51:0] = {8'b0, ex4_frac[54:11]}; //-1032 -9 + 13'h1ff6:ex4_single_denorm_frac[51:0] = {9'b0, ex4_frac[54:12]}; //-1033 -10 + 13'h1ff5:ex4_single_denorm_frac[51:0] = {10'b0,ex4_frac[54:13]}; //-1034 -11 + 13'h1ff4:ex4_single_denorm_frac[51:0] = {11'b0,ex4_frac[54:14]}; //-1035 -12 + 13'h1ff3:ex4_single_denorm_frac[51:0] = {12'b0,ex4_frac[54:15]}; //-1036 -13 + 13'h1ff2:ex4_single_denorm_frac[51:0] = {13'b0,ex4_frac[54:16]}; // -1037 + 13'h1ff1:ex4_single_denorm_frac[51:0] = {14'b0,ex4_frac[54:17]}; //-1038 + 13'h1ff0:ex4_single_denorm_frac[51:0] = {15'b0,ex4_frac[54:18]}; //-1039 + 13'h1fef:ex4_single_denorm_frac[51:0] = {16'b0,ex4_frac[54:19]}; //-1040 + 13'h1fee:ex4_single_denorm_frac[51:0] = {17'b0,ex4_frac[54:20]}; //-1041 + 13'h1fed:ex4_single_denorm_frac[51:0] = {18'b0,ex4_frac[54:21]}; //-1042 + 13'h1fec:ex4_single_denorm_frac[51:0] = {19'b0,ex4_frac[54:22]}; //-1043 + 13'h1feb:ex4_single_denorm_frac[51:0] = {20'b0,ex4_frac[54:23]}; //-1044 + 13'h1fea:ex4_single_denorm_frac[51:0] = {21'b0,ex4_frac[54:24]}; //-1044 + default :ex4_single_denorm_frac[51:0] = vfdsu_ex4_denorm_to_tiny_frac ?{23'b1,29'b0} : 52'b0; //-1045 +endcase +// &CombEnd; @131 +end +// &CombBeg; @132 +always @( vfdsu_ex4_expnt_rst[12:0] + or ex4_frac[54:1] + or vfdsu_ex4_denorm_to_tiny_frac) +begin +case(vfdsu_ex4_expnt_rst[12:0]) + 13'h1: ex4_half_denorm_frac[51:0] = { ex4_frac[52:1]}; //-1022 1 + 13'h0: ex4_half_denorm_frac[51:0] = { ex4_frac[53:2]}; //-1023 0 + 13'h1fff:ex4_half_denorm_frac[51:0] = { ex4_frac[54:3]}; //-1024 -1 + 13'h1ffe:ex4_half_denorm_frac[51:0] = {1'b0, ex4_frac[54:4]}; //-1025 -2 + 13'h1ffd:ex4_half_denorm_frac[51:0] = {2'b0, ex4_frac[54:5]}; //-1026 -3 + 13'h1ffc:ex4_half_denorm_frac[51:0] = {3'b0, ex4_frac[54:6]}; //-1027 -4 + 13'h1ffb:ex4_half_denorm_frac[51:0] = {4'b0, ex4_frac[54:7]}; //-1028 -5 + 13'h1ffa:ex4_half_denorm_frac[51:0] = {5'b0, ex4_frac[54:8]}; //-1029 -6 + 13'h1ff9:ex4_half_denorm_frac[51:0] = {6'b0, ex4_frac[54:9]}; //-1030 -7 + 13'h1ff8:ex4_half_denorm_frac[51:0] = {7'b0, ex4_frac[54:10]}; //-1031 -8 + 13'h1ff7:ex4_half_denorm_frac[51:0] = {8'b0, ex4_frac[54:11]}; //-1032 -9 + default :ex4_half_denorm_frac[51:0] = vfdsu_ex4_denorm_to_tiny_frac ?{10'b1,42'b0} : 52'b0; //-1045 +endcase +// &CombEnd; @147 +end + +//here when denormal number round to add1, it will become normal number +assign ex4_denorm_potnt_norm = (vfdsu_ex4_potnt_norm[1] && ex4_frac[53]) || + (vfdsu_ex4_potnt_norm[0] && ex4_frac[54]) ; +assign ex4_rslt_denorm = !vfdsu_ex4_result_qnan + && !vfdsu_ex4_result_zero + && (vfdsu_ex4_rslt_denorm && !ex4_denorm_potnt_norm); +assign ex4_denorm_result[63:0] = vfdsu_ex4_double ? + {vfdsu_ex4_result_sign,11'h0,ex4_denorm_frac[51:0]} : + vfdsu_ex4_single ? {32'hffffffff,vfdsu_ex4_result_sign, + 8'h0,ex4_single_denorm_frac[51:29]} : { + 48'hffffffffffff,vfdsu_ex4_result_sign,5'h0, + ex4_half_denorm_frac[51:42]}; + + + +assign ex4_half_lfn[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,5'h1e,{10{1'b1}}}; +assign ex4_half_rst_qnan[63:0] = {48'hffffffffffff,vfdsu_ex4_qnan_sign, 5'h1f,1'b1, vfdsu_ex4_qnan_f[8:0]}; +assign ex4_half_rst_inf[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,5'h1f,10'b0}; +assign ex4_half_rst_norm[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign, + ex4_expnt_rst[4:0], + ex4_frac_52[51:42]}; +assign ex4_half_rst0[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,15'h0}; +//ex4 overflow/underflow plus +assign ex4_rst_nor = vfdsu_ex4_result_nor; +assign ex4_of_plus = vfdsu_ex4_potnt_of && + (|ex4_frac[54:53]) && + ex4_rst_nor; +assign ex4_uf_plus = vfdsu_ex4_potnt_uf && + (~|ex4_frac[54:53]) && + ex4_rst_nor; +//ex4 overflow round result +assign ex4_result_lfn = (ex4_of_plus && vfdsu_ex4_of_rst_lfn) || + vfdsu_ex4_result_lfn; +assign ex4_result_inf = (ex4_of_plus && !vfdsu_ex4_of_rst_lfn) || + vfdsu_ex4_result_inf; +//Special Result Form +// result largest finity number +assign ex4_doub_lfn[63:0] = {vfdsu_ex4_result_sign,11'h7fe,{52{1'b1}}}; +assign ex4_sing_lfn[63:0] = {32'hffffffff,vfdsu_ex4_result_sign,8'hfe,{23{1'b1}}}; +// result 0 +assign ex4_doub_rst0[63:0] = {vfdsu_ex4_result_sign,63'b0}; +assign ex4_sing_rst0[63:0] = {32'hffffffff,vfdsu_ex4_result_sign,31'b0}; +//result qNaN +// &Force("bus","vfdsu_ex4_qnan_f",51,0); @192 +assign ex4_doub_rst_qnan[63:0] = { vfdsu_ex4_qnan_sign, 11'h7ff, 1'b1, vfdsu_ex4_qnan_f[50:0]}; +assign ex4_sing_rst_qnan[63:0] = {32'hffffffff,vfdsu_ex4_qnan_sign, 8'hff, 1'b1, vfdsu_ex4_qnan_f[21:0]}; +//result infinity +assign ex4_doub_rst_inf[63:0] = {vfdsu_ex4_result_sign,11'h7ff,52'b0}; +assign ex4_sing_rst_inf[63:0] = {32'hffffffff,vfdsu_ex4_result_sign,8'hff,23'b0}; +//result normal +// &CombBeg; @199 +always @( ex4_frac[54:0]) +begin +casez(ex4_frac[54:53]) + 2'b00 : ex4_frac_52[51:0] = ex4_frac[51:0]; + 2'b01 : ex4_frac_52[51:0] = ex4_frac[52:1]; + 2'b1? : ex4_frac_52[51:0] = ex4_frac[53:2]; + default : ex4_frac_52[51:0] = 52'b0; +endcase +// &CombEnd; @206 +end +assign ex4_doub_rst_norm[63:0] = {vfdsu_ex4_result_sign, + ex4_expnt_rst[10:0], + ex4_frac_52[51:0]}; +assign ex4_sing_rst_norm[63:0] = {32'hffffffff,vfdsu_ex4_result_sign, + ex4_expnt_rst[7:0], + ex4_frac_52[51:29]}; +assign ex4_rst_lfn[63:0] = (vfdsu_ex4_double) ? ex4_doub_lfn[63:0] : + vfdsu_ex4_single ? ex4_sing_lfn[63:0] : ex4_half_lfn[63:0]; + +assign ex4_rst0[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst0[63:0] : + vfdsu_ex4_single ? ex4_sing_rst0[63:0] : ex4_half_rst0[63:0]; + +assign ex4_rst_qnan[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst_qnan[63:0] : + vfdsu_ex4_single ? ex4_sing_rst_qnan[63:0] + : ex4_half_rst_qnan[63:0]; + +assign ex4_rst_norm[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst_norm[63:0] : + vfdsu_ex4_single ? ex4_sing_rst_norm[63:0] + : ex4_half_rst_norm[63:0]; +assign ex4_rst_inf[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst_inf[63:0] : + vfdsu_ex4_single ? ex4_sing_rst_inf[63:0] + : ex4_half_rst_inf[63:0]; + + +assign ex4_cor_uf = (vfdsu_ex4_uf && !ex4_denorm_potnt_norm || ex4_uf_plus) + && vfdsu_ex4_nx; +assign ex4_cor_nx = vfdsu_ex4_nx + || vfdsu_ex4_of + || ex4_of_plus; + +assign ex4_out_expt[4:0] = { + vfdsu_ex4_nv, + vfdsu_ex4_dz, + vfdsu_ex4_of | ex4_of_plus, + ex4_cor_uf, + ex4_cor_nx}; + +assign ex4_final_rst_norm = !vfdsu_ex4_result_qnan && + !ex4_result_inf && + !ex4_result_lfn && + !vfdsu_ex4_result_zero && + !ex4_rslt_denorm; +// &CombBeg; @249 +always @( ex4_rst_norm[63:0] + or ex4_result_lfn + or vfdsu_ex4_result_qnan + or ex4_rst_qnan[63:0] + or ex4_rst0[63:0] + or ex4_rslt_denorm + or ex4_denorm_result[63:0] + or ex4_result_inf + or ex4_final_rst_norm + or ex4_rst_lfn[63:0] + or vfdsu_ex4_result_zero + or ex4_rst_inf[63:0]) +begin +case({ex4_rslt_denorm, + vfdsu_ex4_result_qnan, + ex4_result_inf, + ex4_result_lfn, + vfdsu_ex4_result_zero, + ex4_final_rst_norm}) + 6'b100000 : ex4_out_result[63:0] = ex4_denorm_result[63:0]; + 6'b010000 : ex4_out_result[63:0] = ex4_rst_qnan[63:0]; + 6'b001000 : ex4_out_result[63:0] = ex4_rst_inf[63:0]; + 6'b000100 : ex4_out_result[63:0] = ex4_rst_lfn[63:0]; + 6'b000010 : ex4_out_result[63:0] = ex4_rst0[63:0]; + 6'b000001 : ex4_out_result[63:0] = ex4_rst_norm[63:0]; + default : ex4_out_result[63:0] = 64'b0; +endcase +// &CombEnd; @264 +end + +// &ModuleEnd; @266 +endmodule + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v new file mode 100644 index 00000000..7c5821c8 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v @@ -0,0 +1,773 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @22 +module ct_vfdsu_prepare( + cp0_vfpu_icg_en, + cp0_yy_clk_en, + cpurst_b, + ex1_div, + ex1_divisor, + ex1_double, + ex1_pipedown, + ex1_remainder, + ex1_scalar, + ex1_single, + ex1_sqrt, + ex1_src0, + ex1_src1, + ex1_static_rm, + forever_cpuclk, + pad_yy_icg_scan_en, + vfdsu_ex2_div, + vfdsu_ex2_double, + vfdsu_ex2_dz, + vfdsu_ex2_expnt_add0, + vfdsu_ex2_expnt_add1, + vfdsu_ex2_nv, + vfdsu_ex2_of_rm_lfn, + vfdsu_ex2_op0_norm, + vfdsu_ex2_op1_norm, + vfdsu_ex2_qnan_f, + vfdsu_ex2_qnan_sign, + vfdsu_ex2_result_inf, + vfdsu_ex2_result_qnan, + vfdsu_ex2_result_sign, + vfdsu_ex2_result_zero, + vfdsu_ex2_rm, + vfdsu_ex2_single, + vfdsu_ex2_sqrt, + vfdsu_ex2_srt_skip, + vfpu_yy_xx_dqnan, + vfpu_yy_xx_rm +); + +// &Ports; @23 +input cp0_vfpu_icg_en; +input cp0_yy_clk_en; +input cpurst_b; +input ex1_div; +input ex1_double; +input ex1_pipedown; +input ex1_scalar; +input ex1_single; +input ex1_sqrt; +input [63:0] ex1_src0; +input [63:0] ex1_src1; +input [2 :0] ex1_static_rm; +input forever_cpuclk; +input pad_yy_icg_scan_en; +input vfpu_yy_xx_dqnan; +input [2 :0] vfpu_yy_xx_rm; +output [52:0] ex1_divisor; +output [59:0] ex1_remainder; +output vfdsu_ex2_div; +output vfdsu_ex2_double; +output vfdsu_ex2_dz; +output [12:0] vfdsu_ex2_expnt_add0; +output [12:0] vfdsu_ex2_expnt_add1; +output vfdsu_ex2_nv; +output vfdsu_ex2_of_rm_lfn; +output vfdsu_ex2_op0_norm; +output vfdsu_ex2_op1_norm; +output [51:0] vfdsu_ex2_qnan_f; +output vfdsu_ex2_qnan_sign; +output vfdsu_ex2_result_inf; +output vfdsu_ex2_result_qnan; +output vfdsu_ex2_result_sign; +output vfdsu_ex2_result_zero; +output [2 :0] vfdsu_ex2_rm; +output vfdsu_ex2_single; +output vfdsu_ex2_sqrt; +output vfdsu_ex2_srt_skip; + +// &Regs; @24 +reg [12:0] ex1_expnt_adder_op1; +reg ex1_of_result_lfn; +reg [51:0] ex1_qnan_f; +reg ex1_qnan_sign; +reg vfdsu_ex2_div; +reg vfdsu_ex2_double; +reg vfdsu_ex2_dz; +reg [12:0] vfdsu_ex2_expnt_add0; +reg [12:0] vfdsu_ex2_expnt_add1; +reg vfdsu_ex2_nv; +reg vfdsu_ex2_of_rm_lfn; +reg vfdsu_ex2_op0_norm; +reg vfdsu_ex2_op1_norm; +reg [51:0] vfdsu_ex2_qnan_f; +reg vfdsu_ex2_qnan_sign; +reg vfdsu_ex2_result_inf; +reg vfdsu_ex2_result_qnan; +reg vfdsu_ex2_result_sign; +reg vfdsu_ex2_result_zero; +reg [2 :0] vfdsu_ex2_rm; +reg vfdsu_ex2_single; +reg vfdsu_ex2_sqrt; +reg vfdsu_ex2_srt_skip; + +// &Wires; @25 +wire cp0_vfpu_icg_en; +wire cp0_yy_clk_en; +wire cpurst_b; +wire div_sign; +wire ex1_div; +wire ex1_div_dz; +wire [52:0] ex1_div_noid_nor_srt_op0; +wire [52:0] ex1_div_noid_nor_srt_op1; +wire [52:0] ex1_div_nor_srt_op0; +wire [52:0] ex1_div_nor_srt_op1; +wire ex1_div_nv; +wire [12:0] ex1_div_op0_expnt; +wire [12:0] ex1_div_op1_expnt; +wire ex1_div_rst_inf; +wire ex1_div_rst_qnan; +wire ex1_div_rst_zero; +wire [52:0] ex1_div_srt_op0; +wire [52:0] ex1_div_srt_op1; +wire [52:0] ex1_divisor; +wire ex1_doub_expnt0_max; +wire ex1_doub_expnt0_zero; +wire ex1_doub_expnt1_max; +wire ex1_doub_expnt1_zero; +wire ex1_doub_frac0_all0; +wire ex1_doub_frac1_all0; +wire ex1_double; +wire ex1_dz; +wire ex1_expnt0_max; +wire ex1_expnt0_zero; +wire ex1_expnt1_max; +wire ex1_expnt1_zero; +wire [12:0] ex1_expnt_adder_op0; +wire ex1_frac0_all0; +wire ex1_frac0_msb; +wire ex1_frac1_all0; +wire ex1_frac1_msb; +wire ex1_half_expnt0_max; +wire ex1_half_expnt0_zero; +wire ex1_half_expnt1_max; +wire ex1_half_expnt1_zero; +wire ex1_half_frac0_all0; +wire ex1_half_frac1_all0; +wire ex1_nv; +wire ex1_op0_cnan; +wire [51:0] ex1_op0_f; +wire ex1_op0_id; +wire ex1_op0_id_nor; +wire ex1_op0_inf; +wire ex1_op0_is_qnan; +wire ex1_op0_is_snan; +wire ex1_op0_norm; +wire ex1_op0_qnan; +wire ex1_op0_sign; +wire ex1_op0_snan; +wire ex1_op0_tt_zero; +wire ex1_op0_zero; +wire ex1_op1_cnan; +wire [51:0] ex1_op1_f; +wire ex1_op1_id; +wire ex1_op1_id_nor; +wire ex1_op1_inf; +wire ex1_op1_is_qnan; +wire ex1_op1_is_snan; +wire ex1_op1_norm; +wire ex1_op1_qnan; +wire ex1_op1_sign; +wire ex1_op1_snan; +wire ex1_op1_tt_zero; +wire ex1_op1_zero; +wire [63:0] ex1_oper0; +wire [51:0] ex1_oper0_frac; +wire ex1_oper0_high_all1; +wire [12:0] ex1_oper0_id_expnt; +wire [51:0] ex1_oper0_id_frac; +wire [63:0] ex1_oper1; +wire [51:0] ex1_oper1_frac; +wire ex1_oper1_high_all1; +wire [12:0] ex1_oper1_id_expnt; +wire [51:0] ex1_oper1_id_frac; +wire ex1_pipe_clk; +wire ex1_pipe_clk_en; +wire ex1_pipedown; +wire [59:0] ex1_remainder; +wire ex1_result_inf; +wire ex1_result_qnan; +wire ex1_result_sign; +wire ex1_result_zero; +wire [2 :0] ex1_rm; +wire ex1_rst_default_qnan; +wire ex1_scalar; +wire ex1_sing_expnt0_max; +wire ex1_sing_expnt0_zero; +wire ex1_sing_expnt1_max; +wire ex1_sing_expnt1_zero; +wire ex1_sing_frac0_all0; +wire ex1_sing_frac1_all0; +wire ex1_single; +wire ex1_sqrt; +wire ex1_sqrt_expnt_odd; +wire ex1_sqrt_expnt_result_odd; +wire ex1_sqrt_nv; +wire [12:0] ex1_sqrt_op1_expnt; +wire ex1_sqrt_rst_inf; +wire ex1_sqrt_rst_qnan; +wire ex1_sqrt_rst_zero; +wire [52:0] ex1_sqrt_srt_op0; +wire [63:0] ex1_src0; +wire [63:0] ex1_src1; +wire ex1_srt_skip; +wire [2 :0] ex1_static_rm; +wire forever_cpuclk; +wire pad_yy_icg_scan_en; +wire [59:0] sqrt_remainder; +wire sqrt_sign; +wire vfpu_yy_xx_dqnan; +wire [2 :0] vfpu_yy_xx_rm; + + +//======================Operator prepare==================== +//VECTOR_SIMD + +assign ex1_oper0[63:0] = ex1_src0[63:0]; +assign ex1_oper1[63:0] = ex1_src1[63:0]; + + +//Sign bit prepare +assign ex1_op0_sign = ex1_double ? ex1_oper0[63] : + ex1_single ? ex1_oper0[31] : ex1_oper0[15]; +assign ex1_op1_sign = ex1_double ? ex1_oper1[63] : + ex1_single ? ex1_oper1[31] : ex1_oper1[15]; +assign div_sign = ex1_op0_sign ^ ex1_op1_sign; +assign sqrt_sign = ex1_op0_sign; +assign ex1_result_sign = (ex1_div) + ? div_sign + : sqrt_sign; +//exponent max +assign ex1_doub_expnt0_max = &ex1_oper0[62:52]; +assign ex1_sing_expnt0_max = &ex1_oper0[30:23]; +assign ex1_doub_expnt1_max = &ex1_oper1[62:52]; +assign ex1_sing_expnt1_max = &ex1_oper1[30:23]; +assign ex1_half_expnt0_max = &ex1_oper0[14:10]; +assign ex1_half_expnt1_max = &ex1_oper1[14:10]; +assign ex1_expnt0_max = ex1_double ? ex1_doub_expnt0_max : + ex1_single ? ex1_sing_expnt0_max : ex1_half_expnt0_max; +assign ex1_expnt1_max = ex1_double ? ex1_doub_expnt1_max : + ex1_single ? ex1_sing_expnt1_max : ex1_half_expnt1_max; + +//exponent zero +assign ex1_doub_expnt0_zero = ~|ex1_oper0[62:52]; +assign ex1_sing_expnt0_zero = ~|ex1_oper0[30:23]; +assign ex1_doub_expnt1_zero = ~|ex1_oper1[62:52]; +assign ex1_sing_expnt1_zero = ~|ex1_oper1[30:23]; +assign ex1_half_expnt0_zero = ~|ex1_oper0[14:10]; +assign ex1_half_expnt1_zero = ~|ex1_oper1[14:10]; +assign ex1_expnt0_zero = ex1_double ? ex1_doub_expnt0_zero : + ex1_single ? ex1_sing_expnt0_zero : ex1_half_expnt0_zero; +assign ex1_expnt1_zero = ex1_double ? ex1_doub_expnt1_zero : + ex1_single ? ex1_sing_expnt1_zero : ex1_half_expnt1_zero; +//fraction zero +assign ex1_doub_frac0_all0 = ~|ex1_oper0[51:0]; +assign ex1_sing_frac0_all0 = ~|ex1_oper0[22:0]; +assign ex1_doub_frac1_all0 = ~|ex1_oper1[51:0]; +assign ex1_sing_frac1_all0 = ~|ex1_oper1[22:0]; +assign ex1_half_frac0_all0 = ~|ex1_oper0[9:0]; +assign ex1_half_frac1_all0 = ~|ex1_oper1[9:0]; +assign ex1_frac0_all0 = ex1_double ? ex1_doub_frac0_all0 : + ex1_single ? ex1_sing_frac0_all0 : ex1_half_frac0_all0; +assign ex1_frac1_all0 = ex1_double ? ex1_doub_frac1_all0 : + ex1_single ? ex1_sing_frac1_all0 : ex1_half_frac1_all0; +assign ex1_frac0_msb = ex1_double ? ex1_oper0[51] : + ex1_single ? ex1_oper0[22] : ex1_oper0[9]; +assign ex1_frac1_msb = ex1_double ? ex1_oper1[51] : + ex1_single ? ex1_oper1[22] : ex1_oper1[9]; +assign ex1_oper0_high_all1 = ex1_single ? &ex1_oper0[63:32] : &ex1_oper0[63:16]; +assign ex1_oper1_high_all1 = ex1_single ? &ex1_oper1[63:32] : &ex1_oper1[63:16]; + + +//infinity number +assign ex1_op0_inf = ex1_expnt0_max && + ex1_frac0_all0 && + ~ex1_op0_cnan; +assign ex1_op1_inf = ex1_expnt1_max && + ex1_frac1_all0 && + ~ex1_op1_cnan; +//zero +assign ex1_op0_zero = ex1_expnt0_zero && + ex1_frac0_all0 && + ~ex1_op0_cnan; +assign ex1_op1_zero = ex1_expnt1_zero && + ex1_frac1_all0 && + ~ex1_op1_cnan; +//denormalize number +assign ex1_op0_id = ex1_expnt0_zero && + ~ex1_frac0_all0 && + ~ex1_op0_cnan; +assign ex1_op1_id = ex1_expnt1_zero && + ~ex1_frac1_all0 && + ~ex1_op1_cnan; +//assign ex1_op0_id_fm1 = vfpu_yy_xx_fm[1] && +// vfpu_yy_xx_fm[0] && +// ex1_op0_id; +//assign ex1_op1_id_fm1 = vfpu_yy_xx_fm[1] && +// vfpu_yy_xx_fm[0] && +// ex1_op1_id; +//assign ex1_op0_id_fm0 = vfpu_yy_xx_fm[1] && +// !vfpu_yy_xx_fm[0] && +// ex1_op0_id; +//assign ex1_op1_id_fm0 = vfpu_yy_xx_fm[1] && +// !vfpu_yy_xx_fm[0] && +// ex1_op1_id; +assign ex1_op0_id_nor = ex1_op0_id; +assign ex1_op1_id_nor = ex1_op1_id; + +//cNaN +assign ex1_op0_cnan = ex1_scalar && + !ex1_double && + !ex1_oper0_high_all1; + +assign ex1_op1_cnan = ex1_scalar && + !ex1_double && + !ex1_oper1_high_all1; + +//sNaN +assign ex1_op0_snan = ex1_expnt0_max && + ~ex1_frac0_all0 && + ~ex1_frac0_msb && + ~ex1_op0_cnan; +assign ex1_op1_snan = ex1_expnt1_max && + ~ex1_frac1_all0 && + ~ex1_frac1_msb && + ~ex1_op1_cnan; + +//qNaN +assign ex1_op0_qnan = (ex1_expnt0_max && + ex1_frac0_msb) || + ex1_op0_cnan; +assign ex1_op1_qnan = (ex1_expnt1_max && + ex1_frac1_msb) || + ex1_op1_cnan; +//=====================find first one======================= +// this is for the denormal number +// &Instance("ct_vfdsu_ff1","x_frac0_expnt"); @150 +ct_vfdsu_ff1 x_frac0_expnt ( + .fanc_shift_num (ex1_oper0_id_frac[51:0] ), + .frac_bin_val (ex1_oper0_id_expnt[12:0]), + .frac_num (ex1_oper0_frac[51:0] ) +); + +// &Connect(.frac_num(ex1_oper0_frac[51:0])); @151 +// &Connect(.frac_bin_val(ex1_oper0_id_expnt[12:0])); @152 +// &Connect(.fanc_shift_num(ex1_oper0_id_frac[51:0])); @153 + +// &Instance("ct_vfdsu_ff1","x_frac1_expnt"); @155 +ct_vfdsu_ff1 x_frac1_expnt ( + .fanc_shift_num (ex1_oper1_id_frac[51:0] ), + .frac_bin_val (ex1_oper1_id_expnt[12:0]), + .frac_num (ex1_oper1_frac[51:0] ) +); + +// &Connect(.frac_num(ex1_oper1_frac[51:0])); @156 +// &Connect(.frac_bin_val(ex1_oper1_id_expnt[12:0])); @157 +// &Connect(.fanc_shift_num(ex1_oper1_id_frac[51:0])); @158 +assign ex1_oper0_frac[51:0] = ex1_double ? ex1_oper0[51:0] : + ex1_single ? {ex1_oper0[22:0],29'b0} + : {ex1_oper0[9:0],42'b0}; +assign ex1_oper1_frac[51:0] = ex1_double ? ex1_oper1[51:0] : + ex1_single ? {ex1_oper1[22:0],29'b0} + : {ex1_oper1[9:0],42'b0}; +//=====================exponent add========================= +//exponent number 0 +assign ex1_div_op0_expnt[12:0] = ex1_double ? {2'b0,ex1_oper0[62:52]} : + ex1_single ? {5'b0,ex1_oper0[30:23]} + : {8'b0,ex1_oper0[14:10]}; +assign ex1_expnt_adder_op0[12:0] = ex1_op0_id_nor ? ex1_oper0_id_expnt[12:0] + : ex1_div_op0_expnt[12:0]; +//exponent number 1 +assign ex1_div_op1_expnt[12:0] = ex1_double ? {2'b0,ex1_oper1[62:52]} : + ex1_single ? {5'b0,ex1_oper1[30:23]} + : {8'b0,ex1_oper1[14:10]}; +assign ex1_sqrt_op1_expnt[12:0] = ex1_double ? {3'b0,{10{1'b1}}} : //'d1023 + ex1_single ? {6'b0,{7{1'b1}}} //'d127 + : {9'b0,{4{1'b1}}}; //'d15 + +// &CombBeg; @180 +always @( ex1_oper1_id_expnt[12:0] + or ex1_div + or ex1_op1_id_nor + or ex1_sqrt_op1_expnt[12:0] + or ex1_sqrt + or ex1_div_op1_expnt[12:0]) +begin +case({ex1_div,ex1_sqrt}) + 2'b10: ex1_expnt_adder_op1[12:0] = ex1_op1_id_nor ? ex1_oper1_id_expnt[12:0] + : ex1_div_op1_expnt[12:0]; + 2'b01: ex1_expnt_adder_op1[12:0] = ex1_sqrt_op1_expnt[12:0]; + default: ex1_expnt_adder_op1[12:0] = 13'b0; +endcase +// &CombEnd; @187 +end +//expnt0 sub expnt1 +assign ex1_sqrt_expnt_result_odd = ex1_expnt_adder_op0[0] ^ ex1_expnt_adder_op1[0]; + + +//======================EX1 expt detect===================== +//ex1_id_detect +//any opration is zero +// no input denormalize exception anymore +// +//ex1_nv_detect +//div_nv +// 1.any operation is sNaN +// 2.0/0(include DN flush to zero) +// 3.inf/inf +//sqrt_nv +// 1.any operation is sNaN +// 2.operation sign is 1 && operation is not zero/qNaN +assign ex1_nv = ex1_div && ex1_div_nv || + ex1_sqrt && ex1_sqrt_nv; +//ex1_div_nv +assign ex1_div_nv = ex1_op0_snan || + ex1_op1_snan || + (ex1_op0_tt_zero && ex1_op1_tt_zero)|| + (ex1_op0_inf && ex1_op1_inf); +assign ex1_op0_tt_zero = ex1_op0_zero; +assign ex1_op1_tt_zero = ex1_op1_zero; +//ex1_sqrt_nv +assign ex1_sqrt_nv = ex1_op0_snan || + ex1_op0_sign && + (ex1_op0_norm || + ex1_op0_inf ); +assign ex1_op0_norm = !ex1_expnt0_zero && !ex1_expnt0_max && !ex1_op0_cnan || ex1_op0_id_nor ; +assign ex1_op1_norm = !ex1_expnt1_zero && !ex1_expnt1_max && !ex1_op1_cnan || ex1_op1_id_nor; + +//ex1_of_detect +//div_of +// 1.only detect id overflow case +//assign ex1_of = ex1_div && ex1_div_of; +//assign ex1_div_of = ex1_op1_id_fm1 && +// ex1_op0_norm && +// ex1_div_id_of; +// +////ex1_uf_detect +////div_uf +//// 1.only detect id underflow case +//assign ex1_uf = ex1_div && ex1_div_uf; +//assign ex1_div_uf = ex1_op0_id && +// ex1_op1_norm && +// ex1_div_id_uf; +//ex1_dz_detect +//div_dz +// 1.op0 is normal && op1 zero +assign ex1_dz = ex1_div && ex1_div_dz; +assign ex1_div_dz = ex1_op1_tt_zero && ex1_op0_norm; + +//===================sqrt exponent prepare================== +//sqrt exponent prepare +//afert E sub, div E by 2 +//assign ex1_sqrt_expnt_result[12:0] = {ex1_expnt_result[12], +// ex1_expnt_result[12:1]}; +//ex1_sqrt_expnt_odd +//fraction will shift left by 1 +assign ex1_sqrt_expnt_odd = ex1_sqrt_expnt_result_odd; + +//===================special cal result===================== +//ex1 result is zero +//div_zero +// 1.op0 is zero && op1 is normal +// 2.op0 is zero/normal && op1 is inf +//sqrt_zero +// 1.op0 is zero +assign ex1_result_zero = ex1_div_rst_zero && ex1_div || + ex1_sqrt_rst_zero && ex1_sqrt; +assign ex1_div_rst_zero = (ex1_op0_tt_zero && ex1_op1_norm ) || + (!ex1_expnt0_max && !ex1_op0_cnan && ex1_op1_inf); +assign ex1_sqrt_rst_zero = ex1_op0_tt_zero; + +//ex1 result is qNaN +//ex1_nv +//div_qnan +// 1.op0 is qnan || op1 is qnan +//sqrt_qnan +// 1.op0 is qnan +assign ex1_result_qnan = ex1_div_rst_qnan && ex1_div || + ex1_sqrt_rst_qnan && ex1_sqrt || + ex1_nv; +assign ex1_div_rst_qnan = ex1_op0_qnan || + ex1_op1_qnan; +assign ex1_sqrt_rst_qnan = ex1_op0_qnan; + +//ex1_rst_default_qnan +//0/0, inf/inf, sqrt negative should get default qNaN +assign ex1_rst_default_qnan = (ex1_div && ex1_op0_zero && ex1_op1_zero) || + (ex1_div && ex1_op0_inf && ex1_op1_inf) || + (ex1_sqrt&& ex1_op0_sign && (ex1_op0_norm || ex1_op0_inf)); + +//ex1 result is inf +//ex1_dz +// +//div_inf +// 1.op0 is inf && op1 is normal/zero +//sqrt_inf +// 1.op0 is inf +assign ex1_result_inf = ex1_div_rst_inf && ex1_div || + ex1_sqrt_rst_inf && ex1_sqrt || + ex1_dz ; +assign ex1_div_rst_inf = ex1_op0_inf && !ex1_expnt1_max && !ex1_op1_cnan; +assign ex1_sqrt_rst_inf = ex1_op0_inf && !ex1_op0_sign; + +//ex1 result is lfn +//ex1_of && round result toward not inc 1 + +assign ex1_rm[2:0] = ((ex1_static_rm[2:0] == 3'b111)|| !ex1_scalar) + ? vfpu_yy_xx_rm[2:0] + : ex1_static_rm[2:0]; +//RNE : Always inc 1 because round to nearest of 1.111...11 +//RTZ : Always not inc 1 +//RUP : Always not inc 1 when posetive +//RDN : Always not inc 1 when negative +//RMM : Always inc 1 because round to max magnitude +// &CombBeg; @308 +always @( ex1_rm[2:0] + or ex1_result_sign) +begin +case(ex1_rm[2:0]) + 3'b000 : ex1_of_result_lfn = 1'b0; + 3'b001 : ex1_of_result_lfn = 1'b1; + 3'b010 : ex1_of_result_lfn = !ex1_result_sign; + 3'b011 : ex1_of_result_lfn = ex1_result_sign; + 3'b100 : ex1_of_result_lfn = 1'b0; + default: ex1_of_result_lfn = 1'b0; +endcase +// &CombEnd; @317 +end + +//EX1 Remainder +//div : 1/8 <= x < 1/4 +//sqrt : 1/16 <= x < 1/4 +assign ex1_remainder[59:0] = {60{ex1_div }} & {5'b0,ex1_div_srt_op0[52:0],2'b0} | + {60{ex1_sqrt}} & sqrt_remainder[59:0]; + +//EX1 Divisor +//1/2 <= y < 1 +assign ex1_divisor[52:0] = ex1_div_srt_op1[52:0]; + +//ex1_div_srt_op0 +assign ex1_div_srt_op0[52:0] = ex1_div_nor_srt_op0[52:0]; +//ex1_div_srt_op1 +assign ex1_div_srt_op1[52:0] = ex1_div_nor_srt_op1[52:0]; +//ex1_div_nor_srt_op0 +assign ex1_div_noid_nor_srt_op0[52:0] = ex1_double ? {1'b1,ex1_oper0[51:0]} : + ex1_single ? {1'b1,ex1_oper0[22:0],29'b0} + : {1'b1,ex1_oper0[9:0],42'b0}; +assign ex1_div_noid_nor_srt_op1[52:0] = ex1_double ? {1'b1,ex1_oper1[51:0]} : + ex1_single ? {1'b1,ex1_oper1[22:0],29'b0} + : {1'b1,ex1_oper1[9:0],42'b0}; +assign ex1_div_nor_srt_op0[52:0] = ex1_op0_id_nor ? {ex1_oper0_id_frac[51:0],1'b0} + : ex1_div_noid_nor_srt_op0[52:0]; +//ex1_div_nor_srt_op1 +assign ex1_div_nor_srt_op1[52:0] = ex1_op1_id_nor ? {ex1_oper1_id_frac[51:0],1'b0} + : ex1_div_noid_nor_srt_op1[52:0]; +//sqrt_remainder +assign sqrt_remainder[59:0] = (ex1_sqrt_expnt_odd) + ? {5'b0,ex1_sqrt_srt_op0[52:0],2'b0} + : {6'b0,ex1_sqrt_srt_op0[52:0],1'b0}; +//ex1_sqrt_srt_op0 +assign ex1_sqrt_srt_op0[52:0] = ex1_div_srt_op0[52:0]; + +//Default_qnan/Standard_qnan Select +assign ex1_op0_is_snan = ex1_op0_snan; +assign ex1_op1_is_snan = ex1_op1_snan && ex1_div; +assign ex1_op0_is_qnan = ex1_op0_qnan; +assign ex1_op1_is_qnan = ex1_op1_qnan && ex1_div; +assign ex1_op0_f[51:0] = (ex1_op0_cnan) ? 52'b0: ex1_oper0[51:0]; +assign ex1_op1_f[51:0] = (ex1_op1_cnan) ? 52'b0: ex1_oper1[51:0]; +// &CombBeg; @359 +always @( ex1_op0_is_snan + or ex1_op0_is_qnan + or ex1_op0_f[51:0] + or ex1_rst_default_qnan + or ex1_op1_f[51:0] + or vfpu_yy_xx_dqnan + or ex1_op1_is_snan + or ex1_op1_is_qnan) +begin +if(ex1_rst_default_qnan) + ex1_qnan_f[51:0] = {1'b1, 51'b0}; +else if(ex1_op0_is_snan && vfpu_yy_xx_dqnan) + ex1_qnan_f[51:0] = ex1_op0_f[51:0]; +else if(ex1_op1_is_snan && vfpu_yy_xx_dqnan) + ex1_qnan_f[51:0] = ex1_op1_f[51:0]; +else if(ex1_op0_is_qnan && vfpu_yy_xx_dqnan) + ex1_qnan_f[51:0] = ex1_op0_f[51:0]; +else if(ex1_op1_is_qnan && vfpu_yy_xx_dqnan) + ex1_qnan_f[51:0] = ex1_op1_f[51:0]; +else + ex1_qnan_f[51:0] = {1'b1, 51'b0}; +// &CombEnd; @372 +end + +// &CombBeg; @374 +always @( ex1_op0_is_snan + or ex1_op0_cnan + or ex1_op0_is_qnan + or ex1_op1_sign + or ex1_op0_sign + or ex1_rst_default_qnan + or vfpu_yy_xx_dqnan + or ex1_op1_cnan + or ex1_op1_is_snan + or ex1_op1_is_qnan) +begin +if(ex1_rst_default_qnan) + ex1_qnan_sign = 1'b0; +else if(ex1_op0_is_snan && vfpu_yy_xx_dqnan) + ex1_qnan_sign = ex1_op0_sign; +else if(ex1_op1_is_snan && vfpu_yy_xx_dqnan) + ex1_qnan_sign = ex1_op1_sign; +else if(ex1_op0_is_qnan && vfpu_yy_xx_dqnan) + ex1_qnan_sign = ex1_op0_sign && !ex1_op0_cnan; +else if(ex1_op1_is_qnan && vfpu_yy_xx_dqnan) + ex1_qnan_sign = ex1_op1_sign && !ex1_op1_cnan; +else + ex1_qnan_sign = 1'b0; +// &CombEnd; @387 +end + + +//========================Pipe to EX2======================= +//exponent register cal result +//assign ex1_srt_expnt_rst[12:0] = (ex1_sqrt) +// ? ex1_sqrt_expnt_result[12:0] +// : ex1_expnt_result[12:0]; +//Special result should skip SRT logic +assign ex1_srt_skip = ex1_result_zero || + ex1_result_qnan || + ex1_result_inf; +//gate clk +// &Instance("gated_clk_cell","x_ex1_pipe_clk"); @400 +gated_clk_cell x_ex1_pipe_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex1_pipe_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex1_pipe_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @401 +// .clk_out (ex1_pipe_clk),//Out Clock @402 +// .external_en (1'b0), @403 +// .global_en (cp0_yy_clk_en), @404 +// .local_en (ex1_pipe_clk_en),//Local Condition @405 +// .module_en (cp0_vfpu_icg_en) @406 +// ); @407 +assign ex1_pipe_clk_en = ex1_pipedown; + +always @(posedge ex1_pipe_clk or negedge cpurst_b) +begin + if(!cpurst_b) + begin + vfdsu_ex2_result_zero <= 1'b0; + vfdsu_ex2_result_qnan <= 1'b0; + vfdsu_ex2_result_inf <= 1'b0; + vfdsu_ex2_result_sign <= 1'b0; + vfdsu_ex2_op0_norm <= 1'b0; + vfdsu_ex2_op1_norm <= 1'b0; + vfdsu_ex2_expnt_add0[12:0] <= 13'b0; + vfdsu_ex2_expnt_add1[12:0] <= 13'b0; + vfdsu_ex2_nv <= 1'b0; + vfdsu_ex2_dz <= 1'b0; + vfdsu_ex2_srt_skip <= 1'b0; + vfdsu_ex2_of_rm_lfn <= 1'b0; + vfdsu_ex2_qnan_sign <= 1'b0; + vfdsu_ex2_qnan_f[51:0] <= 52'b0; + vfdsu_ex2_rm[2:0] <= 3'b0; + vfdsu_ex2_div <= 1'b0; + vfdsu_ex2_sqrt <= 1'b0; + vfdsu_ex2_double <= 1'b0; + vfdsu_ex2_single <= 1'b0; + end + else if(ex1_pipedown) + begin + vfdsu_ex2_result_zero <= ex1_result_zero; + vfdsu_ex2_result_qnan <= ex1_result_qnan; + vfdsu_ex2_result_inf <= ex1_result_inf; + vfdsu_ex2_result_sign <= ex1_result_sign; + vfdsu_ex2_op0_norm <= ex1_op0_norm; + vfdsu_ex2_op1_norm <= ex1_op1_norm; + vfdsu_ex2_expnt_add0[12:0] <= ex1_expnt_adder_op0[12:0]; + vfdsu_ex2_expnt_add1[12:0] <= ex1_expnt_adder_op1[12:0]; + vfdsu_ex2_nv <= ex1_nv; + vfdsu_ex2_dz <= ex1_dz; + vfdsu_ex2_srt_skip <= ex1_srt_skip; + vfdsu_ex2_of_rm_lfn <= ex1_of_result_lfn; + vfdsu_ex2_qnan_sign <= ex1_qnan_sign; + vfdsu_ex2_qnan_f[51:0] <= ex1_qnan_f[51:0]; + vfdsu_ex2_rm[2:0] <= ex1_rm[2:0]; + vfdsu_ex2_div <= ex1_div; + vfdsu_ex2_sqrt <= ex1_sqrt; + vfdsu_ex2_double <= ex1_double; + vfdsu_ex2_single <= ex1_single; + end + else + begin + vfdsu_ex2_result_zero <= vfdsu_ex2_result_zero; + vfdsu_ex2_result_qnan <= vfdsu_ex2_result_qnan; + vfdsu_ex2_result_inf <= vfdsu_ex2_result_inf; + vfdsu_ex2_result_sign <= vfdsu_ex2_result_sign; + vfdsu_ex2_op0_norm <= vfdsu_ex2_op0_norm; + vfdsu_ex2_op1_norm <= vfdsu_ex2_op1_norm; + vfdsu_ex2_expnt_add0[12:0] <= vfdsu_ex2_expnt_add0[12:0]; + vfdsu_ex2_expnt_add1[12:0] <= vfdsu_ex2_expnt_add1[12:0]; + vfdsu_ex2_nv <= vfdsu_ex2_nv; + vfdsu_ex2_dz <= vfdsu_ex2_dz; + vfdsu_ex2_srt_skip <= vfdsu_ex2_srt_skip; + vfdsu_ex2_of_rm_lfn <= vfdsu_ex2_of_rm_lfn; + vfdsu_ex2_qnan_sign <= vfdsu_ex2_qnan_sign; + vfdsu_ex2_qnan_f[51:0] <= vfdsu_ex2_qnan_f[51:0]; + vfdsu_ex2_rm[2:0] <= vfdsu_ex2_rm[2:0]; + vfdsu_ex2_div <= vfdsu_ex2_div; + vfdsu_ex2_sqrt <= vfdsu_ex2_sqrt; + vfdsu_ex2_double <= vfdsu_ex2_double; + vfdsu_ex2_single <= vfdsu_ex2_single; + end +end + +// &Force("output","vfdsu_ex2_op0_norm"); @480 +// &Force("output","vfdsu_ex2_op1_norm"); @481 +// &Force("output","vfdsu_ex2_dz"); @482 +// &Force("output","vfdsu_ex2_nv"); @483 +// &Force("output","vfdsu_ex2_srt_skip"); @484 +// &Force("output","vfdsu_ex2_of_rm_lfn"); @485 +// &Force("output","vfdsu_ex2_result_inf"); @486 +// &Force("output","vfdsu_ex2_result_qnan"); @487 +// &Force("output","vfdsu_ex2_result_zero"); @488 +// //&Force("output","vfdsu_ex2_expnt_rst"); @489 +// &Force("output","vfdsu_ex2_result_sign"); @490 +// &Force("output","vfdsu_ex2_qnan_f"); @491 +// &Force("output","vfdsu_ex2_qnan_sign"); @492 +// &Force("output","vfdsu_ex2_rm"); @493 +// &Force("output","vfdsu_ex2_div"); @494 +// &Force("output","vfdsu_ex2_sqrt"); @495 +// &Force("output","vfdsu_ex2_double"); @496 +// &Force("output","vfdsu_ex2_single"); @497 +// &Force("output","vfdsu_ex2_expnt_add0"); @498 +// &Force("output","vfdsu_ex2_expnt_add1"); @499 + +// &ModuleEnd; @501 +endmodule + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v new file mode 100644 index 00000000..6eece526 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v @@ -0,0 +1,1041 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @22 +module ct_vfdsu_round( + cp0_vfpu_icg_en, + cp0_yy_clk_en, + cpurst_b, + ex3_pipedown, + forever_cpuclk, + pad_yy_icg_scan_en, + total_qt_rt_58, + vfdsu_ex2_of_rm_lfn, + vfdsu_ex3_doub_expnt_rst, + vfdsu_ex3_double, + vfdsu_ex3_dz, + vfdsu_ex3_half_expnt_rst, + vfdsu_ex3_id_srt_skip, + vfdsu_ex3_nv, + vfdsu_ex3_of, + vfdsu_ex3_potnt_of, + vfdsu_ex3_potnt_uf, + vfdsu_ex3_qnan_f, + vfdsu_ex3_qnan_sign, + vfdsu_ex3_rem_sign, + vfdsu_ex3_rem_zero, + vfdsu_ex3_result_denorm_round_add_num, + vfdsu_ex3_result_inf, + vfdsu_ex3_result_lfn, + vfdsu_ex3_result_qnan, + vfdsu_ex3_result_sign, + vfdsu_ex3_result_zero, + vfdsu_ex3_rm, + vfdsu_ex3_rslt_denorm, + vfdsu_ex3_sing_expnt_rst, + vfdsu_ex3_single, + vfdsu_ex3_uf, + vfdsu_ex4_denorm_to_tiny_frac, + vfdsu_ex4_double, + vfdsu_ex4_dz, + vfdsu_ex4_expnt_rst, + vfdsu_ex4_frac, + vfdsu_ex4_nv, + vfdsu_ex4_nx, + vfdsu_ex4_of, + vfdsu_ex4_of_rst_lfn, + vfdsu_ex4_potnt_norm, + vfdsu_ex4_potnt_of, + vfdsu_ex4_potnt_uf, + vfdsu_ex4_qnan_f, + vfdsu_ex4_qnan_sign, + vfdsu_ex4_result_inf, + vfdsu_ex4_result_lfn, + vfdsu_ex4_result_nor, + vfdsu_ex4_result_qnan, + vfdsu_ex4_result_sign, + vfdsu_ex4_result_zero, + vfdsu_ex4_rslt_denorm, + vfdsu_ex4_single, + vfdsu_ex4_uf +); + +// &Ports; @23 +input cp0_vfpu_icg_en; +input cp0_yy_clk_en; +input cpurst_b; +input ex3_pipedown; +input forever_cpuclk; +input pad_yy_icg_scan_en; +input [57:0] total_qt_rt_58; +input vfdsu_ex2_of_rm_lfn; +input [12:0] vfdsu_ex3_doub_expnt_rst; +input vfdsu_ex3_double; +input vfdsu_ex3_dz; +input [12:0] vfdsu_ex3_half_expnt_rst; +input vfdsu_ex3_id_srt_skip; +input vfdsu_ex3_nv; +input vfdsu_ex3_of; +input vfdsu_ex3_potnt_of; +input vfdsu_ex3_potnt_uf; +input [51:0] vfdsu_ex3_qnan_f; +input vfdsu_ex3_qnan_sign; +input vfdsu_ex3_rem_sign; +input vfdsu_ex3_rem_zero; +input [52:0] vfdsu_ex3_result_denorm_round_add_num; +input vfdsu_ex3_result_inf; +input vfdsu_ex3_result_lfn; +input vfdsu_ex3_result_qnan; +input vfdsu_ex3_result_sign; +input vfdsu_ex3_result_zero; +input [2 :0] vfdsu_ex3_rm; +input vfdsu_ex3_rslt_denorm; +input [8 :0] vfdsu_ex3_sing_expnt_rst; +input vfdsu_ex3_single; +input vfdsu_ex3_uf; +output vfdsu_ex4_denorm_to_tiny_frac; +output vfdsu_ex4_double; +output vfdsu_ex4_dz; +output [12:0] vfdsu_ex4_expnt_rst; +output [54:0] vfdsu_ex4_frac; +output vfdsu_ex4_nv; +output vfdsu_ex4_nx; +output vfdsu_ex4_of; +output vfdsu_ex4_of_rst_lfn; +output [1 :0] vfdsu_ex4_potnt_norm; +output vfdsu_ex4_potnt_of; +output vfdsu_ex4_potnt_uf; +output [51:0] vfdsu_ex4_qnan_f; +output vfdsu_ex4_qnan_sign; +output vfdsu_ex4_result_inf; +output vfdsu_ex4_result_lfn; +output vfdsu_ex4_result_nor; +output vfdsu_ex4_result_qnan; +output vfdsu_ex4_result_sign; +output vfdsu_ex4_result_zero; +output vfdsu_ex4_rslt_denorm; +output vfdsu_ex4_single; +output vfdsu_ex4_uf; + +// &Regs; @24 +reg denorm_to_tiny_frac; +reg double_denorm_lst_frac; +reg [54:0] frac_add1_op1; +reg frac_add_1; +reg frac_orig; +reg [54:0] frac_sub1_op1; +reg frac_sub_1; +reg half_denorm_lst_frac; +reg [56:0] qt_result_double_denorm_for_round; +reg [13:0] qt_result_half_denorm_for_round; +reg [27:0] qt_result_single_denorm_for_round; +reg single_denorm_lst_frac; +reg vfdsu_ex4_denorm_to_tiny_frac; +reg vfdsu_ex4_double; +reg vfdsu_ex4_dz; +reg [12:0] vfdsu_ex4_expnt_rst; +reg [54:0] vfdsu_ex4_frac; +reg vfdsu_ex4_nv; +reg vfdsu_ex4_nx; +reg vfdsu_ex4_of; +reg vfdsu_ex4_of_rst_lfn; +reg [1 :0] vfdsu_ex4_potnt_norm; +reg vfdsu_ex4_potnt_of; +reg vfdsu_ex4_potnt_uf; +reg [51:0] vfdsu_ex4_qnan_f; +reg vfdsu_ex4_qnan_sign; +reg vfdsu_ex4_result_inf; +reg vfdsu_ex4_result_lfn; +reg vfdsu_ex4_result_nor; +reg vfdsu_ex4_result_qnan; +reg vfdsu_ex4_result_sign; +reg vfdsu_ex4_result_zero; +reg vfdsu_ex4_rslt_denorm; +reg vfdsu_ex4_single; +reg vfdsu_ex4_uf; + +// &Wires; @25 +wire cp0_vfpu_icg_en; +wire cp0_yy_clk_en; +wire cpurst_b; +wire ex3_denorm_eq; +wire ex3_denorm_gr; +wire ex3_denorm_lst_frac; +wire ex3_denorm_nx; +wire ex3_denorm_plus; +wire ex3_denorm_potnt_norm; +wire ex3_denorm_zero; +wire ex3_doub_denorm_plus; +wire ex3_doub_denorm_potnt_norm; +wire ex3_doub_eq; +wire ex3_doub_gr; +wire ex3_doub_rst_eq_1; +wire ex3_doub_zero; +wire ex3_double_denorm_eq; +wire ex3_double_denorm_gr; +wire ex3_double_denorm_zero; +wire ex3_double_low_not_zero; +wire [12:0] ex3_expnt_adjst; +wire [12:0] ex3_expnt_adjust_result; +wire ex3_half_denorm_eq; +wire ex3_half_denorm_gr; +wire ex3_half_denorm_plus; +wire ex3_half_denorm_potnt_norm; +wire ex3_half_denorm_zero; +wire ex3_half_eq; +wire ex3_half_gr; +wire ex3_half_low_not_zero; +wire ex3_half_rst_eq_1; +wire ex3_half_zero; +wire ex3_nx; +wire ex3_pipe_clk; +wire ex3_pipe_clk_en; +wire ex3_pipedown; +wire [1 :0] ex3_potnt_norm; +wire ex3_qt_doub_lo2_not0; +wire ex3_qt_doub_lo3_not0; +wire ex3_qt_eq; +wire ex3_qt_gr; +wire ex3_qt_half_lo2_not0; +wire ex3_qt_half_lo3_not0; +wire ex3_qt_sing_lo3_not0; +wire ex3_qt_sing_lo4_not0; +wire ex3_qt_zero; +wire ex3_rslt_denorm; +wire ex3_rst_eq_1; +wire ex3_rst_nor; +wire ex3_sing_denorm_plus; +wire ex3_sing_denorm_potnt_norm; +wire ex3_sing_eq; +wire ex3_sing_gr; +wire ex3_sing_rst_eq_1; +wire ex3_sing_zero; +wire ex3_single_denorm_eq; +wire ex3_single_denorm_gr; +wire ex3_single_denorm_zero; +wire ex3_single_low_not_zero; +wire forever_cpuclk; +wire [54:0] frac_add1_op1_with_denorm; +wire [54:0] frac_add1_rst; +wire frac_denorm_rdn_add_1; +wire frac_denorm_rdn_sub_1; +wire frac_denorm_rmm_add_1; +wire frac_denorm_rne_add_1; +wire frac_denorm_rtz_sub_1; +wire frac_denorm_rup_add_1; +wire frac_denorm_rup_sub_1; +wire [54:0] frac_final_rst; +wire frac_rdn_add_1; +wire frac_rdn_sub_1; +wire frac_rmm_add_1; +wire frac_rne_add_1; +wire frac_rtz_sub_1; +wire frac_rup_add_1; +wire frac_rup_sub_1; +wire [54:0] frac_sub1_op1_with_denorm; +wire [54:0] frac_sub1_rst; +wire pad_yy_icg_scan_en; +wire [57:0] total_qt_rt_58; +wire vfdsu_ex2_of_rm_lfn; +wire [12:0] vfdsu_ex3_doub_expnt_rst; +wire vfdsu_ex3_double; +wire vfdsu_ex3_dz; +wire [12:0] vfdsu_ex3_expnt_rst; +wire [12:0] vfdsu_ex3_half_expnt_rst; +wire vfdsu_ex3_id_srt_skip; +wire vfdsu_ex3_nv; +wire vfdsu_ex3_of; +wire vfdsu_ex3_potnt_of; +wire vfdsu_ex3_potnt_uf; +wire [51:0] vfdsu_ex3_qnan_f; +wire vfdsu_ex3_qnan_sign; +wire vfdsu_ex3_rem_sign; +wire vfdsu_ex3_rem_zero; +wire [52:0] vfdsu_ex3_result_denorm_round_add_num; +wire vfdsu_ex3_result_inf; +wire vfdsu_ex3_result_lfn; +wire vfdsu_ex3_result_qnan; +wire vfdsu_ex3_result_sign; +wire vfdsu_ex3_result_zero; +wire [2 :0] vfdsu_ex3_rm; +wire vfdsu_ex3_rslt_denorm; +wire [8 :0] vfdsu_ex3_sing_expnt_rst; +wire vfdsu_ex3_single; +wire vfdsu_ex3_uf; + + +//=======================Round Rule========================= +//1/8 <= x < 1/4, 1/2 <= y < 1, => 1/8 < z < 1/2 +//q[57:0] represent the fraction part result of quotient, q[57] for 1/2 +//Thus the first "1" in 58 bit quotient will be in q[56] or q[55] +//For Double Float +//29 round to get 58 bit quotient, 52+1 bit as valid result, other for round +//if q[56] is 1, q[56:4] as 1.xxxx valid result, [3:0] for round +//if q[56] is 0, q[55:3] as 1.xxxx valid result, [2:0] for round +//For Single Float +//15 round to get 30 bit quotient, 23+1 bit as valid result, other for round +//if q[56] is 1, q[56:33] as 1.xxxx valid result, [32:28] for round +//if q[56] is 0, q[55:32] as 1.xxxx valid result, [31:28] for round +assign ex3_qt_half_lo3_not0 = |total_qt_rt_58[44:42]; +assign ex3_qt_half_lo2_not0 = |total_qt_rt_58[43:42]; +assign ex3_half_gr = total_qt_rt_58[56] + ? total_qt_rt_58[45] && ex3_qt_half_lo3_not0 + : total_qt_rt_58[44] && ex3_qt_half_lo2_not0; +assign ex3_half_eq = (total_qt_rt_58[56]) + ? total_qt_rt_58[45] && !ex3_qt_sing_lo4_not0 + : total_qt_rt_58[44] && !ex3_qt_sing_lo3_not0; +assign ex3_half_zero = (total_qt_rt_58[56]) + ? ~|total_qt_rt_58[45:42] + : ~|total_qt_rt_58[44:42]; +assign ex3_half_rst_eq_1 = total_qt_rt_58[56] && ~|total_qt_rt_58[55:46]; +assign ex3_half_denorm_plus = !total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1ff2); +assign ex3_half_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1ff1); +assign vfdsu_ex3_expnt_rst[12:0] = vfdsu_ex3_half_expnt_rst[12:0]; +// &Force("bus","total_qt_rt_58",57,0); @54 +assign ex3_qt_doub_lo3_not0 = |total_qt_rt_58[2:0]; +assign ex3_qt_doub_lo2_not0 = |total_qt_rt_58[1:0]; +assign ex3_qt_sing_lo4_not0 = |total_qt_rt_58[31:28]; +assign ex3_qt_sing_lo3_not0 = |total_qt_rt_58[30:28]; +//the quotient round bits great than "10000"(ronnd bits 10..0) +assign ex3_doub_gr = (total_qt_rt_58[56]) + ? total_qt_rt_58[3] && ex3_qt_doub_lo3_not0 + : total_qt_rt_58[2] && ex3_qt_doub_lo2_not0; +assign ex3_sing_gr = (total_qt_rt_58[56]) + ? total_qt_rt_58[32] && ex3_qt_sing_lo4_not0 + : total_qt_rt_58[31] && ex3_qt_sing_lo3_not0; + +//the quotient round bits is equal to "10000"(ronnd bits 10..0) +assign ex3_doub_eq = (total_qt_rt_58[56]) + ? total_qt_rt_58[3] && !ex3_qt_doub_lo3_not0 + : total_qt_rt_58[2] && !ex3_qt_doub_lo2_not0; +assign ex3_sing_eq = (total_qt_rt_58[56]) + ? total_qt_rt_58[32] && !ex3_qt_sing_lo4_not0 + : total_qt_rt_58[31] && !ex3_qt_sing_lo3_not0; +//the quotient round bits is zero +assign ex3_doub_zero = (total_qt_rt_58[56]) + ? ~|total_qt_rt_58[3:0] + : ~|total_qt_rt_58[2:0]; +assign ex3_sing_zero = (total_qt_rt_58[56]) + ? ~|total_qt_rt_58[32:28] + : ~|total_qt_rt_58[31:28]; +//quotient is 1.00000..00 need special dealt with in the following +assign ex3_doub_rst_eq_1 = total_qt_rt_58[56] && ~|total_qt_rt_58[55:4]; +assign ex3_sing_rst_eq_1 = total_qt_rt_58[56] && ~|total_qt_rt_58[55:33]; +// for denormal result, first select the quotation num for rounding +// specially for the result e=-126 and e=-1022,the denorm depends on the +// MSB of the quotient +assign ex3_doub_denorm_plus = !total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1c02); +assign ex3_sing_denorm_plus = !total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1f82); + +assign ex3_doub_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1c01); +assign ex3_sing_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1f81); +assign ex3_rslt_denorm = ex3_denorm_plus || vfdsu_ex3_rslt_denorm; +assign ex3_denorm_potnt_norm = vfdsu_ex3_double ? ex3_doub_denorm_potnt_norm : + vfdsu_ex3_single ? ex3_sing_denorm_potnt_norm + : ex3_half_denorm_potnt_norm; +assign ex3_rst_eq_1 = (vfdsu_ex3_double)? ex3_doub_rst_eq_1 : + vfdsu_ex3_single ? ex3_sing_rst_eq_1 : ex3_half_rst_eq_1; +assign ex3_qt_eq = (vfdsu_ex3_double)? ex3_doub_eq : + vfdsu_ex3_single ? ex3_sing_eq : ex3_half_eq; +assign ex3_qt_gr = (vfdsu_ex3_double)? ex3_doub_gr : + vfdsu_ex3_single ? ex3_sing_gr : ex3_half_gr; +assign ex3_qt_zero = (vfdsu_ex3_double)? ex3_doub_zero : + vfdsu_ex3_single ? ex3_sing_zero : ex3_half_zero; +assign ex3_denorm_plus = (vfdsu_ex3_double) ? ex3_doub_denorm_plus + : vfdsu_ex3_single ? ex3_sing_denorm_plus + : ex3_half_denorm_plus; + +// &CombBeg; @108 +always @( vfdsu_ex3_doub_expnt_rst[12:0] + or total_qt_rt_58[56:0]) +begin +case(vfdsu_ex3_doub_expnt_rst[12:0]) + 13'h1c02:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[3:0], 53'b0}; + double_denorm_lst_frac = total_qt_rt_58[4]; + end//-1022 1 + 13'h1c01:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[4:0], 52'b0}; //-1023 0 + double_denorm_lst_frac = total_qt_rt_58[5]; + end//-1022 1 + 13'h1c00:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[5:0], 51'b0}; //-1024 -1 + double_denorm_lst_frac = total_qt_rt_58[6]; + end//-1022 1 + 13'h1bff:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[6:0], 50'b0}; //-1025 -2 + double_denorm_lst_frac = total_qt_rt_58[7]; + end//-1022 1 + 13'h1bfe:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[7:0], 49'b0}; //-1026 -3 + double_denorm_lst_frac = total_qt_rt_58[8]; + end//-1022 1 + 13'h1bfd:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[8:0], 48'b0}; //-1027 -4 + double_denorm_lst_frac = total_qt_rt_58[9]; + end//-1022 1 + 13'h1bfc:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[9:0], 47'b0}; //-1028 -5 + double_denorm_lst_frac = total_qt_rt_58[10]; + end//-1022 1 + 13'h1bfb:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[10:0],46'b0}; //-1029 -6 + double_denorm_lst_frac = total_qt_rt_58[11]; + end//-1022 1 + 13'h1bfa:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[11:0],45'b0}; //-1030 -7 + double_denorm_lst_frac = total_qt_rt_58[12]; + end//-1022 1 + 13'h1bf9:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[12:0],44'b0}; //-1031 -8 + double_denorm_lst_frac = total_qt_rt_58[13]; + end//-1022 1 + 13'h1bf8:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[13:0],43'b0}; //-1032 -9 + double_denorm_lst_frac = total_qt_rt_58[14]; + end//-1022 1 + 13'h1bf7:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[14:0],42'b0}; //-1033 -10 + double_denorm_lst_frac = total_qt_rt_58[15]; + end//-1022 1 + 13'h1bf6:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[15:0],41'b0}; //-1034 -11 + double_denorm_lst_frac = total_qt_rt_58[16]; + end//-1022 1 + 13'h1bf5:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[16:0],40'b0}; //-1035 -12 + double_denorm_lst_frac = total_qt_rt_58[17]; + end//-1022 1 + 13'h1bf4:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[17:0],39'b0}; //-1036 -13 + double_denorm_lst_frac = total_qt_rt_58[18]; + end//-1022 1 + 13'h1bf3:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[18:0],38'b0}; // -1037 + double_denorm_lst_frac = total_qt_rt_58[19]; + end//-1022 1 + 13'h1bf2:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[19:0],37'b0}; //-1038 + double_denorm_lst_frac = total_qt_rt_58[20]; + end//-1022 1 + 13'h1bf1:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[20:0],36'b0}; //-1039 + double_denorm_lst_frac = total_qt_rt_58[21]; + end//-1022 1 + 13'h1bf0:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[21:0],35'b0}; //-1040 + double_denorm_lst_frac = total_qt_rt_58[22]; + end//-1022 1 + 13'h1bef:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[22:0],34'b0}; //-1041 + double_denorm_lst_frac = total_qt_rt_58[23]; + end//-1022 1 + 13'h1bee:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[23:0],33'b0}; //-1042 + double_denorm_lst_frac = total_qt_rt_58[24]; + end//-1022 1 + 13'h1bed:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[24:0],32'b0}; //-1043 + double_denorm_lst_frac = total_qt_rt_58[25]; + end//-1022 1 + 13'h1bec:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[25:0],31'b0}; //-1044 + double_denorm_lst_frac = total_qt_rt_58[26]; + end//-1022 1 + 13'h1beb:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[26:0],30'b0}; //-1045 + double_denorm_lst_frac = total_qt_rt_58[27]; + end//-1022 1 + 13'h1bea:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[27:0],29'b0}; //-1046 + double_denorm_lst_frac = total_qt_rt_58[28]; + end//-1022 1 + 13'h1be9:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[28:0],28'b0}; //-1047 + double_denorm_lst_frac = total_qt_rt_58[29]; + end//-1022 1 + 13'h1be8:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[29:0],27'b0}; //-1048 + double_denorm_lst_frac = total_qt_rt_58[30]; + end//-1022 1 + 13'h1be7:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[30:0],26'b0}; //-1049 + double_denorm_lst_frac = total_qt_rt_58[31]; + end//-1022 1 + 13'h1be6:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[31:0],25'b0}; //-1050 + double_denorm_lst_frac = total_qt_rt_58[32]; + end//-1022 1 + 13'h1be5:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[32:0],24'b0}; //-1056 + double_denorm_lst_frac = total_qt_rt_58[33]; + end//-1022 1 + 13'h1be4:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[33:0],23'b0}; //-1052 + double_denorm_lst_frac = total_qt_rt_58[34]; + end//-1022 1 + 13'h1be3:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[34:0],22'b0}; //-1053 + double_denorm_lst_frac = total_qt_rt_58[35]; + end//-1022 1 + 13'h1be2:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[35:0],21'b0}; //-1054 + double_denorm_lst_frac = total_qt_rt_58[36]; + end//-1022 1 + 13'h1be1:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[36:0],20'b0}; //-1055 + double_denorm_lst_frac = total_qt_rt_58[37]; + end//-1022 1 + 13'h1be0:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[37:0],19'b0}; //-1056 + double_denorm_lst_frac = total_qt_rt_58[38]; + end//-1022 1 + 13'h1bdf:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[38:0],18'b0}; //-1057 + double_denorm_lst_frac = total_qt_rt_58[39]; + end//-1022 1 + 13'h1bde:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[39:0],17'b0}; //-1058 + double_denorm_lst_frac = total_qt_rt_58[40]; + end//-1022 1 + 13'h1bdd:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[40:0],16'b0}; //-1059 + double_denorm_lst_frac = total_qt_rt_58[41]; + end//-1022 1 + 13'h1bdc:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[41:0],15'b0}; //-1060 + double_denorm_lst_frac = total_qt_rt_58[42]; + end//-1022 1 + 13'h1bdb:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[42:0],14'b0}; //-1061 + double_denorm_lst_frac = total_qt_rt_58[43]; + end//-1022 1 + 13'h1bda:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[43:0],13'b0}; //-1062 + double_denorm_lst_frac = total_qt_rt_58[44]; + end//-1022 1 + 13'h1bd9:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[44:0],12'b0}; //-1063 + double_denorm_lst_frac = total_qt_rt_58[45]; + end//-1022 1 + 13'h1bd8:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[45:0],11'b0}; //-1064 + double_denorm_lst_frac = total_qt_rt_58[46]; + end//-1022 1 + 13'h1bd7:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[46:0],10'b0}; //-1065 + double_denorm_lst_frac = total_qt_rt_58[47]; + end//-1022 1 + 13'h1bd6:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[47:0],9'b0}; //-1066 + double_denorm_lst_frac = total_qt_rt_58[48]; + end//-1022 1 + 13'h1bd5:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[48:0],8'b0}; //-1067 + double_denorm_lst_frac = total_qt_rt_58[49]; + end//-1022 1 + 13'h1bd4:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[49:0],7'b0}; //-1068 + double_denorm_lst_frac = total_qt_rt_58[50]; + end//-1022 1 + 13'h1bd3:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[50:0],6'b0}; //-1069 + double_denorm_lst_frac = total_qt_rt_58[51]; + end//-1022 1 + 13'h1bd2:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[51:0],5'b0}; //-1070 + double_denorm_lst_frac = total_qt_rt_58[52]; + end//-1022 1 + 13'h1bd1:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[52:0],4'b0}; //-1071 + double_denorm_lst_frac = total_qt_rt_58[53]; + end//-1022 1 + 13'h1bd0:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[53:0],3'b0}; //-1072 + double_denorm_lst_frac = total_qt_rt_58[54]; + end//-1022 1 + 13'h1bcf:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[54:0],2'b0}; //-1073 + double_denorm_lst_frac = total_qt_rt_58[55]; + end//-1022 1 + 13'h1bce:begin qt_result_double_denorm_for_round[56:0] = {total_qt_rt_58[55:0],1'b0}; + double_denorm_lst_frac = total_qt_rt_58[56]; + end//-1022 1 + default:begin qt_result_double_denorm_for_round[56:0] = total_qt_rt_58[56:0]; + double_denorm_lst_frac = 1'b0; + end//-1022 1 + +endcase +// &CombEnd; @274 +end +//denomal result, check for rounding further optimization can be done in +//future +assign ex3_double_denorm_eq = qt_result_double_denorm_for_round[56] + && !ex3_double_low_not_zero; +assign ex3_double_low_not_zero = |qt_result_double_denorm_for_round[55:0]; +assign ex3_double_denorm_gr = qt_result_double_denorm_for_round[56] + && ex3_double_low_not_zero; +assign ex3_double_denorm_zero = !qt_result_double_denorm_for_round[56] + && !ex3_double_low_not_zero; + +// &CombBeg; @285 +always @( vfdsu_ex3_sing_expnt_rst[8:0] + or total_qt_rt_58[56:28]) +begin +case(vfdsu_ex3_sing_expnt_rst[8:0]) + 9'h182:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[32:28],23'b0}; //-126 1 + single_denorm_lst_frac = total_qt_rt_58[33]; + end//-1022 1 + 9'h181:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[33:28],22'b0}; //-127 0 + single_denorm_lst_frac = total_qt_rt_58[34]; + end//-1022 1 + 9'h180:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[34:28],21'b0}; //-128 -1 + single_denorm_lst_frac = total_qt_rt_58[35]; + end//-1022 1 + 9'h17f:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[35:28],20'b0}; //-129 -2 + single_denorm_lst_frac = total_qt_rt_58[36]; + end//-1022 1 + 9'h17e:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[36:28],19'b0}; //-90 -3 + single_denorm_lst_frac = total_qt_rt_58[37]; + end//-1022 1 + 9'h17d:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[37:28],18'b0}; //-91 -4 + single_denorm_lst_frac = total_qt_rt_58[38]; + end//-1022 1 + 9'h17c:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[38:28],17'b0}; //-92 -5 + single_denorm_lst_frac = total_qt_rt_58[39]; + end//-1022 1 + 9'h17b:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[39:28],16'b0}; //-93 -6 + single_denorm_lst_frac = total_qt_rt_58[40]; + end//-1022 1 + 9'h17a:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[40:28],15'b0}; //-94 -7 + single_denorm_lst_frac = total_qt_rt_58[41]; + end//-1022 1 + 9'h179:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[41:28],14'b0}; //-95 -8 + single_denorm_lst_frac = total_qt_rt_58[42]; + end//-1022 1 + 9'h178:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[42:28],13'b0}; //-96 -9 + single_denorm_lst_frac = total_qt_rt_58[43]; + end//-1022 1 + 9'h177:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[43:28],12'b0}; //-97 -10 + single_denorm_lst_frac = total_qt_rt_58[44]; + end//-1022 1 + 9'h176:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[44:28],11'b0}; //-98 -11 + single_denorm_lst_frac = total_qt_rt_58[45]; + end//-1022 1 + 9'h175:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[45:28],10'b0}; //-99 -12 + single_denorm_lst_frac = total_qt_rt_58[46]; + end//-1022 1 + 9'h174:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[46:28],9'b0}; //-140 -9 + single_denorm_lst_frac = total_qt_rt_58[47]; + end//-1022 1 + 9'h173:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[47:28],8'b0}; // -141 + single_denorm_lst_frac = total_qt_rt_58[48]; + end//-1022 1 + 9'h172:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[48:28],7'b0};//-142 + single_denorm_lst_frac = total_qt_rt_58[49]; + end//-1022 1 + 9'h171:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[49:28],6'b0};//-143 + single_denorm_lst_frac = total_qt_rt_58[50]; + end//-1022 1 + 9'h170:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[50:28],5'b0}; //-144 + single_denorm_lst_frac = total_qt_rt_58[51]; + end//-1022 1 + 9'h16f:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[51:28],4'b0}; //-145 + single_denorm_lst_frac = total_qt_rt_58[52]; + end//-1022 1 + 9'h16e:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[52:28],3'b0}; //-146 + single_denorm_lst_frac = total_qt_rt_58[53]; + end//-1022 1 + 9'h16d:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[53:28],2'b0}; //-147 + single_denorm_lst_frac = total_qt_rt_58[54]; + end//-1022 1 + 9'h16c:begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[54:28],1'b0}; //-148 + single_denorm_lst_frac = total_qt_rt_58[55]; + end//-1022 1 + 9'h16b: begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[55:28]}; + single_denorm_lst_frac = total_qt_rt_58[56] ; + end//-1022 1 + default: begin qt_result_single_denorm_for_round[27:0] = {total_qt_rt_58[56:29]}; + single_denorm_lst_frac = 1'b0; + end//-1022 1 +endcase +// &CombEnd; @363 +end +//rounding evaluation for single denormalize number +assign ex3_single_denorm_eq = qt_result_single_denorm_for_round[27] + && !ex3_single_low_not_zero; +assign ex3_single_low_not_zero = |qt_result_single_denorm_for_round[26:0]; +assign ex3_single_denorm_gr = qt_result_single_denorm_for_round[27] + && ex3_single_low_not_zero; +assign ex3_single_denorm_zero = !qt_result_single_denorm_for_round[27] + && !ex3_single_low_not_zero; +// &CombBeg; @372 +always @( total_qt_rt_58[56:42] + or vfdsu_ex3_half_expnt_rst[12:0]) +begin +case(vfdsu_ex3_half_expnt_rst[12:0]) + 13'h1ff2:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[45:42],10'b0}; //-14 1 + half_denorm_lst_frac = total_qt_rt_58[46]; + end//-1022 1 + 13'h1ff1:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[46:42],9'b0}; //-15 0 + half_denorm_lst_frac = total_qt_rt_58[47]; + end//-1022 1 + 13'h1ff0:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[47:42],8'b0}; //-16 -1 + half_denorm_lst_frac = total_qt_rt_58[48]; + end//-1022 1 + 13'h1fef:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[48:42],7'b0}; //-17 -2 + half_denorm_lst_frac = total_qt_rt_58[49]; + end//-1022 1 + 13'h1fee:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[49:42],6'b0}; //-18 -3 + half_denorm_lst_frac = total_qt_rt_58[50]; + end//-1022 1 + 13'h1fed:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[50:42],5'b0}; //-19 -4 + half_denorm_lst_frac = total_qt_rt_58[51]; + end//-1022 1 + 13'h1fec:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[51:42],4'b0}; //-20 -5 + half_denorm_lst_frac = total_qt_rt_58[52]; + end//-1022 1 + 13'h1feb:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[52:42],3'b0}; //-21 -6 + half_denorm_lst_frac = total_qt_rt_58[53]; + end//-1022 1 + 13'h1fea:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[53:42],2'b0}; //-22 -7 + half_denorm_lst_frac = total_qt_rt_58[54]; + end//-1022 1 + 13'h1fe9:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[54:42],1'b0}; //-23 -8 + half_denorm_lst_frac = total_qt_rt_58[55]; + end//-1022 1 + 13'h1fe8:begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[55:42]}; //-24 -9 + half_denorm_lst_frac = total_qt_rt_58[56]; + end//-1022 1 + default: begin qt_result_half_denorm_for_round[13:0] = {total_qt_rt_58[56:43]}; + half_denorm_lst_frac = 1'b0; + end//-1022 1 +endcase +// &CombEnd; @411 +end +//rounding evaluation for single denormalize number +assign ex3_half_denorm_eq = qt_result_half_denorm_for_round[13] + && !ex3_half_low_not_zero; +assign ex3_half_low_not_zero = |qt_result_half_denorm_for_round[12:0]; +assign ex3_half_denorm_gr = qt_result_half_denorm_for_round[13] + && ex3_half_low_not_zero; +assign ex3_half_denorm_zero = !qt_result_half_denorm_for_round[13] + && !ex3_half_low_not_zero; + +assign ex3_denorm_eq = vfdsu_ex3_double ? ex3_double_denorm_eq : + vfdsu_ex3_single ? ex3_single_denorm_eq : ex3_half_denorm_eq; +assign ex3_denorm_gr = vfdsu_ex3_double ? ex3_double_denorm_gr : + vfdsu_ex3_single ? ex3_single_denorm_gr : ex3_half_denorm_gr; +assign ex3_denorm_zero = vfdsu_ex3_double ? ex3_double_denorm_zero : + vfdsu_ex3_single ? ex3_single_denorm_zero : ex3_half_denorm_zero; +assign ex3_denorm_lst_frac = vfdsu_ex3_double ? double_denorm_lst_frac : + vfdsu_ex3_single ? single_denorm_lst_frac : half_denorm_lst_frac; + +//Different Round Mode with different rounding rule +//Here we call rounding bit as "rb", remainder as "rem" +//RNE : +// 1.+1 : rb>10000 || rb==10000 && rem>0 +// 2. 0 : Rest Condition +// 3.-1 : Never occur +//RTZ : +// 1.+1 : Never occur +// 2. 0 : Rest Condition +// 3.-1 : rb=10000 && rem<0 +//RDN : +// 1.+1 : Q>0 Never occur ; Q<0 Rest condition +// 2. 0 : Q>0 Rest condition; Q<0 Rem<0 && rb=0 +// 3.-1 : Q>0 Rem<0 && rb=0 ; Q<0 Never occur +//RUP : +// 1.+1 : Q>0 Rest Condition; Q<0 Never occur +// 2. 0 : Q>0 Rem<0 && rb=0 ; Q<0 Rest condition +// 3.-1 : Q>0 Never occur ; Q<0 Rem<0 && rb=0 +//RMM : +// 1.+1 : rb>10000 || rb==10000 && rem>0 +// 2. 0 : Rest Condition +// 3.-1 : Never occur +assign frac_rne_add_1 = ex3_qt_gr || + (ex3_qt_eq && !vfdsu_ex3_rem_sign); +assign frac_rtz_sub_1 = ex3_qt_zero && vfdsu_ex3_rem_sign; +assign frac_rup_add_1 = !vfdsu_ex3_result_sign && + (!ex3_qt_zero || + (!vfdsu_ex3_rem_sign && !vfdsu_ex3_rem_zero)); +assign frac_rup_sub_1 = vfdsu_ex3_result_sign && + (ex3_qt_zero && vfdsu_ex3_rem_sign); +assign frac_rdn_add_1 = vfdsu_ex3_result_sign && + (!ex3_qt_zero || + (!vfdsu_ex3_rem_sign && !vfdsu_ex3_rem_zero)); +assign frac_rdn_sub_1 = !vfdsu_ex3_result_sign && + (ex3_qt_zero && vfdsu_ex3_rem_sign); +assign frac_rmm_add_1 = ex3_qt_gr || + (ex3_qt_eq && !vfdsu_ex3_rem_sign); +//denormal result +assign frac_denorm_rne_add_1 = ex3_denorm_gr || + (ex3_denorm_eq && + ((vfdsu_ex3_rem_zero && + ex3_denorm_lst_frac) || + (!vfdsu_ex3_rem_zero && + !vfdsu_ex3_rem_sign))); +assign frac_denorm_rtz_sub_1 = ex3_denorm_zero && vfdsu_ex3_rem_sign; +assign frac_denorm_rup_add_1 = !vfdsu_ex3_result_sign && + (!ex3_denorm_zero || + (!vfdsu_ex3_rem_sign && !vfdsu_ex3_rem_zero)); +assign frac_denorm_rup_sub_1 = vfdsu_ex3_result_sign && + (ex3_denorm_zero && vfdsu_ex3_rem_sign); +assign frac_denorm_rdn_add_1 = vfdsu_ex3_result_sign && + (!ex3_denorm_zero || + (!vfdsu_ex3_rem_sign && !vfdsu_ex3_rem_zero)); +assign frac_denorm_rdn_sub_1 = !vfdsu_ex3_result_sign && + (ex3_denorm_zero && vfdsu_ex3_rem_sign); +assign frac_denorm_rmm_add_1 = ex3_denorm_gr || + (ex3_denorm_eq && !vfdsu_ex3_rem_sign); + +//RM select +// &CombBeg; @489 +always @( vfdsu_ex3_result_sign + or frac_rtz_sub_1 + or frac_rdn_add_1 + or frac_denorm_rtz_sub_1 + or frac_rup_sub_1 + or frac_denorm_rmm_add_1 + or frac_denorm_rne_add_1 + or frac_rmm_add_1 + or frac_denorm_rdn_add_1 + or frac_rne_add_1 + or frac_denorm_rdn_sub_1 + or frac_rup_add_1 + or frac_denorm_rup_sub_1 + or frac_rdn_sub_1 + or ex3_rslt_denorm + or vfdsu_ex3_rm[2:0] + or frac_denorm_rup_add_1 + or vfdsu_ex3_id_srt_skip) +begin +case(vfdsu_ex3_rm[2:0]) + 3'b000://round to nearst,ties to even + begin + frac_add_1 = ex3_rslt_denorm ? frac_denorm_rne_add_1 : frac_rne_add_1; + frac_sub_1 = 1'b0; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rne_add_1 : !frac_rne_add_1; + denorm_to_tiny_frac = vfdsu_ex3_id_srt_skip ? 1'b0 : frac_denorm_rne_add_1; + end + 3'b001:// round to 0 + begin + frac_add_1 = 1'b0; + frac_sub_1 = ex3_rslt_denorm ? frac_denorm_rtz_sub_1 : frac_rtz_sub_1; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rtz_sub_1 : !frac_rtz_sub_1; + denorm_to_tiny_frac = 1'b0; + end + 3'b010://round to -inf + begin + frac_add_1 = ex3_rslt_denorm ? frac_denorm_rdn_add_1 : frac_rdn_add_1; + frac_sub_1 = ex3_rslt_denorm ? frac_denorm_rdn_sub_1 : frac_rdn_sub_1; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rdn_add_1 && !frac_denorm_rdn_sub_1 + : !frac_rdn_add_1 && !frac_rdn_sub_1; + denorm_to_tiny_frac = vfdsu_ex3_id_srt_skip ? vfdsu_ex3_result_sign + : frac_denorm_rdn_add_1; + end + 3'b011://round to +inf + begin + frac_add_1 = ex3_rslt_denorm ? frac_denorm_rup_add_1 : frac_rup_add_1; + frac_sub_1 = ex3_rslt_denorm ? frac_denorm_rup_sub_1 : frac_rup_sub_1; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rup_add_1 && !frac_denorm_rup_sub_1 + : !frac_rup_add_1 && !frac_rup_sub_1; + denorm_to_tiny_frac = vfdsu_ex3_id_srt_skip ? !vfdsu_ex3_result_sign + : frac_denorm_rup_add_1; + end + 3'b100://round to nearest,ties to max magnitude + begin + frac_add_1 = ex3_rslt_denorm ? frac_denorm_rmm_add_1 : frac_rmm_add_1; + frac_sub_1 = 1'b0; + frac_orig = ex3_rslt_denorm ? !frac_denorm_rmm_add_1 : !frac_rmm_add_1; + denorm_to_tiny_frac = vfdsu_ex3_id_srt_skip ? 1'b0 : frac_denorm_rmm_add_1; + end + default: + begin + frac_add_1 = 1'b0; + frac_sub_1 = 1'b0; + frac_orig = 1'b0; + denorm_to_tiny_frac = 1'b0; + end +endcase +// &CombEnd; @538 +end +//Add 1 or Sub 1 constant +// &CombBeg; @540 +always @( total_qt_rt_58[56] + or vfdsu_ex3_single + or vfdsu_ex3_double) +begin +case({total_qt_rt_58[56],vfdsu_ex3_double,vfdsu_ex3_single}) + 3'b001: + begin + frac_add1_op1[54:0] = {2'b0,24'b1,29'b0}; + frac_sub1_op1[54:0] = {2'b11,{24{1'b1}},29'b0}; + end + 3'b010: + begin + frac_add1_op1[54:0] = 55'b1; + frac_sub1_op1[54:0] = {55{1'b1}}; + end + 3'b101: + begin + frac_add1_op1[54:0] = {25'b1,30'b0}; + frac_sub1_op1[54:0] = {{25{1'b1}},30'b0}; + end + 3'b110: + begin + frac_add1_op1[54:0] = 55'b10; + frac_sub1_op1[54:0] = {{54{1'b1}},1'b0}; + end + 3'b100: + begin + frac_add1_op1[54:0] = {12'b1,43'b0}; + frac_sub1_op1[54:0] = {{12{1'b1}},43'b0}; + end + 3'b000: + begin + frac_add1_op1[54:0] = {13'b1,42'b0}; + frac_sub1_op1[54:0] = {{13{1'b1}},42'b0}; + end + default: + begin + frac_add1_op1[54:0] = 55'b0; + frac_sub1_op1[54:0] = 55'b0; + end +endcase +// &CombEnd; @578 +end +//Add 1 or Sub1 final result +//Conner case when quotient is 0.010000...00 and remainder is negative, +//The real quotient is actually 0.00fff..ff, +//The final result will need to sub 1 when +//RN : Never occur +//RP : sign of quotient is - +//RM : sign of quotient is + +assign frac_add1_rst[54:0] = {1'b0,total_qt_rt_58[56:3]} + + frac_add1_op1_with_denorm[54:0]; +assign frac_add1_op1_with_denorm[54:0] = ex3_rslt_denorm ? + {1'b0,vfdsu_ex3_result_denorm_round_add_num[52:0],1'b0} : + frac_add1_op1[54:0]; +assign frac_sub1_rst[54:0] = (ex3_rst_eq_1) + ? {2'b0,{53{1'b1}}} + : {1'b0,total_qt_rt_58[56:3]} + + frac_sub1_op1_with_denorm[54:0] + {54'b0,ex3_rslt_denorm}; +assign frac_sub1_op1_with_denorm[54:0] = ex3_rslt_denorm ? + ~{1'b0,vfdsu_ex3_result_denorm_round_add_num[52:0],1'b0} : + frac_sub1_op1[54:0]; +assign frac_final_rst[54:0] = (frac_add1_rst[54:0] & {55{frac_add_1}}) | + (frac_sub1_rst[54:0] & {55{frac_sub_1}}) | + ({1'b0,total_qt_rt_58[56:3]} & {55{frac_orig}}); + +//===============Pipe down signal prepare=================== +assign ex3_rst_nor = !vfdsu_ex3_result_zero && + !vfdsu_ex3_result_qnan && + !vfdsu_ex3_result_inf && + !vfdsu_ex3_result_lfn; +assign ex3_nx = ex3_rst_nor && + (!ex3_qt_zero || !vfdsu_ex3_rem_zero || ex3_denorm_nx); +assign ex3_denorm_nx = ex3_rslt_denorm && (!ex3_denorm_zero || !vfdsu_ex3_rem_zero); +//Adjust expnt +//Div:Actural expnt should plus 1 when op0 is id, sub 1 when op1 id +assign ex3_expnt_adjst[12:0] = vfdsu_ex3_double ? 13'h3ff: vfdsu_ex3_single ? 13'h7f : 13'hf; +assign ex3_expnt_adjust_result[12:0] = vfdsu_ex3_expnt_rst[12:0] + + ex3_expnt_adjst[12:0]; +//this information is for the packing, which determin the result is normal +//numer or not; +assign ex3_potnt_norm[1:0] = {ex3_denorm_plus,ex3_denorm_potnt_norm}; +//=======================Pipe to EX4======================== +//gate clk +// &Instance("gated_clk_cell","x_ex3_pipe_clk"); @620 +gated_clk_cell x_ex3_pipe_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex3_pipe_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex3_pipe_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @621 +// .clk_out (ex3_pipe_clk),//Out Clock @622 +// .external_en (1'b0), @623 +// .global_en (cp0_yy_clk_en), @624 +// .local_en (ex3_pipe_clk_en),//Local Condition @625 +// .module_en (cp0_vfpu_icg_en) @626 +// ); @627 +assign ex3_pipe_clk_en = ex3_pipedown; + +always @(posedge ex3_pipe_clk or negedge cpurst_b) +begin + if(!cpurst_b) + begin + vfdsu_ex4_result_zero <= 1'b0; + vfdsu_ex4_result_qnan <= 1'b0; + vfdsu_ex4_result_inf <= 1'b0; + vfdsu_ex4_result_lfn <= 1'b0; + vfdsu_ex4_result_sign <= 1'b0; + vfdsu_ex4_potnt_of <= 1'b0; + vfdsu_ex4_potnt_uf <= 1'b0; + vfdsu_ex4_result_nor <= 1'b0; + vfdsu_ex4_expnt_rst[12:0] <= 13'b0; + vfdsu_ex4_nv <= 1'b0; + vfdsu_ex4_nx <= 1'b0; + vfdsu_ex4_uf <= 1'b0; + vfdsu_ex4_of <= 1'b0; + vfdsu_ex4_dz <= 1'b0; + vfdsu_ex4_of_rst_lfn <= 1'b0; + vfdsu_ex4_frac[54:0] <= 55'b0; + vfdsu_ex4_qnan_sign <= 1'b0; + vfdsu_ex4_qnan_f[51:0] <= 52'b0; + vfdsu_ex4_rslt_denorm <= 1'b0; + vfdsu_ex4_denorm_to_tiny_frac + <= 1'b0; + vfdsu_ex4_potnt_norm[1:0] <= 2'b0; + vfdsu_ex4_double <= 1'b0; + vfdsu_ex4_single <= 1'b0; + + end + else if(ex3_pipedown) + begin + vfdsu_ex4_result_zero <= vfdsu_ex3_result_zero; + vfdsu_ex4_result_qnan <= vfdsu_ex3_result_qnan; + vfdsu_ex4_result_inf <= vfdsu_ex3_result_inf; + vfdsu_ex4_result_lfn <= vfdsu_ex3_result_lfn; + vfdsu_ex4_result_sign <= vfdsu_ex3_result_sign; + vfdsu_ex4_potnt_of <= vfdsu_ex3_potnt_of; + vfdsu_ex4_potnt_uf <= vfdsu_ex3_potnt_uf; + vfdsu_ex4_result_nor <= ex3_rst_nor; + vfdsu_ex4_expnt_rst[12:0] <= ex3_expnt_adjust_result[12:0]; + vfdsu_ex4_nv <= vfdsu_ex3_nv; + vfdsu_ex4_nx <= ex3_nx; + vfdsu_ex4_uf <= vfdsu_ex3_uf; + vfdsu_ex4_of <= vfdsu_ex3_of; + vfdsu_ex4_dz <= vfdsu_ex3_dz; + vfdsu_ex4_of_rst_lfn <= vfdsu_ex2_of_rm_lfn; + vfdsu_ex4_frac[54:0] <= frac_final_rst[54:0]; + vfdsu_ex4_qnan_sign <= vfdsu_ex3_qnan_sign; + vfdsu_ex4_qnan_f[51:0] <= vfdsu_ex3_qnan_f[51:0]; + vfdsu_ex4_rslt_denorm <= ex3_rslt_denorm; + vfdsu_ex4_denorm_to_tiny_frac + <= denorm_to_tiny_frac; + vfdsu_ex4_potnt_norm[1:0] <= ex3_potnt_norm[1:0]; + vfdsu_ex4_double <= vfdsu_ex3_double; + vfdsu_ex4_single <= vfdsu_ex3_single; + end + else + begin + vfdsu_ex4_result_zero <= vfdsu_ex4_result_zero; + vfdsu_ex4_result_qnan <= vfdsu_ex4_result_qnan; + vfdsu_ex4_result_inf <= vfdsu_ex4_result_inf; + vfdsu_ex4_result_lfn <= vfdsu_ex4_result_lfn; + vfdsu_ex4_result_sign <= vfdsu_ex4_result_sign; + vfdsu_ex4_potnt_of <= vfdsu_ex4_potnt_of; + vfdsu_ex4_potnt_uf <= vfdsu_ex4_potnt_uf; + vfdsu_ex4_result_nor <= vfdsu_ex4_result_nor; + vfdsu_ex4_expnt_rst[12:0] <= vfdsu_ex4_expnt_rst[12:0]; + vfdsu_ex4_nv <= vfdsu_ex4_nv; + vfdsu_ex4_nx <= vfdsu_ex4_nx; + vfdsu_ex4_uf <= vfdsu_ex4_uf; + vfdsu_ex4_of <= vfdsu_ex4_of; + vfdsu_ex4_dz <= vfdsu_ex4_dz; + vfdsu_ex4_of_rst_lfn <= vfdsu_ex4_of_rst_lfn; + vfdsu_ex4_frac[54:0] <= vfdsu_ex4_frac[54:0]; + vfdsu_ex4_qnan_sign <= vfdsu_ex4_qnan_sign; + vfdsu_ex4_qnan_f[51:0] <= vfdsu_ex4_qnan_f[51:0]; + vfdsu_ex4_rslt_denorm <= vfdsu_ex4_rslt_denorm; + vfdsu_ex4_denorm_to_tiny_frac + <= vfdsu_ex4_denorm_to_tiny_frac; + vfdsu_ex4_potnt_norm[1:0] <= vfdsu_ex4_potnt_norm[1:0]; + vfdsu_ex4_double <= vfdsu_ex4_double; + vfdsu_ex4_single <= vfdsu_ex4_single; + end +end + +// &Force("output","vfdsu_ex4_result_nor"); @716 +// &Force("output","vfdsu_ex4_nx"); @717 +// &Force("output","vfdsu_ex4_nv"); @718 +// &Force("output","vfdsu_ex4_uf"); @719 +// &Force("output","vfdsu_ex4_of"); @720 +// &Force("output","vfdsu_ex4_dz"); @721 +// &Force("output","vfdsu_ex4_result_sign"); @722 +// &Force("output","vfdsu_ex4_of_rst_lfn"); @723 +// &Force("output","vfdsu_ex4_potnt_of"); @724 +// &Force("output","vfdsu_ex4_potnt_uf"); @725 +// &Force("output","vfdsu_ex4_result_inf"); @726 +// &Force("output","vfdsu_ex4_result_lfn"); @727 +// &Force("output","vfdsu_ex4_result_qnan"); @728 +// &Force("output","vfdsu_ex4_result_zero"); @729 +// &Force("output","vfdsu_ex4_frac"); @730 +// &Force("output","vfdsu_ex4_expnt_rst"); @731 +// &Force("output","vfdsu_ex4_qnan_sign"); @732 +// &Force("output","vfdsu_ex4_qnan_f"); @733 +// &Force("output","vfdsu_ex4_rslt_denorm"); @734 +// &Force("output","vfdsu_ex4_denorm_to_tiny_frac"); @735 +// &Force("output","vfdsu_ex4_potnt_norm"); @736 +// &Force("output","vfdsu_ex4_double"); @737 +// &Force("output","vfdsu_ex4_single"); @738 +// &ModuleEnd; @739 +endmodule + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v new file mode 100644 index 00000000..c7a679c1 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v @@ -0,0 +1,323 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &Depend("cpu_cfig.h"); @22 +// &ModuleBeg; @23 +module ct_vfdsu_scalar_dp( + cp0_vfpu_icg_en, + cp0_yy_clk_en, + cpurst_b, + dp_vfdsu_ex1_pipex_dst_ereg, + dp_vfdsu_ex1_pipex_dst_vreg, + dp_vfdsu_ex1_pipex_iid, + dp_vfdsu_ex1_pipex_imm0, + dp_vfdsu_ex1_pipex_srcf0, + dp_vfdsu_ex1_pipex_srcf1, + ex1_data_clk, + ex1_div, + ex1_double, + ex1_pipedown, + ex1_scalar, + ex1_single, + ex1_sqrt, + ex1_src0, + ex1_src1, + ex1_static_rm, + ex2_data_clk, + ex2_pipedown, + ex3_data_clk, + ex3_pipedown, + ex4_out_expt, + ex4_out_result, + forever_cpuclk, + idu_vfpu_rf_pipex_func, + idu_vfpu_rf_pipex_gateclk_sel, + pad_yy_icg_scan_en, + pipex_dp_vfdsu_ereg, + pipex_dp_vfdsu_ereg_data, + pipex_dp_vfdsu_freg_data, + pipex_dp_vfdsu_vreg, + vfdsu_ex2_double, + vfdsu_ex2_single +); + +// &Ports; @24 +input cp0_vfpu_icg_en; +input cp0_yy_clk_en; +input cpurst_b; +input [4 :0] dp_vfdsu_ex1_pipex_dst_ereg; +input [6 :0] dp_vfdsu_ex1_pipex_dst_vreg; +input [6 :0] dp_vfdsu_ex1_pipex_iid; +input [2 :0] dp_vfdsu_ex1_pipex_imm0; +input [63:0] dp_vfdsu_ex1_pipex_srcf0; +input [63:0] dp_vfdsu_ex1_pipex_srcf1; +input ex1_data_clk; +input ex1_pipedown; +input ex2_data_clk; +input ex2_pipedown; +input ex3_data_clk; +input ex3_pipedown; +input [4 :0] ex4_out_expt; +input [63:0] ex4_out_result; +input forever_cpuclk; +input [19:0] idu_vfpu_rf_pipex_func; +input idu_vfpu_rf_pipex_gateclk_sel; +input pad_yy_icg_scan_en; +output ex1_div; +output ex1_double; +output ex1_scalar; +output ex1_single; +output ex1_sqrt; +output [63:0] ex1_src0; +output [63:0] ex1_src1; +output [2 :0] ex1_static_rm; +output [4 :0] pipex_dp_vfdsu_ereg; +output [4 :0] pipex_dp_vfdsu_ereg_data; +output [63:0] pipex_dp_vfdsu_freg_data; +output [6 :0] pipex_dp_vfdsu_vreg; +output vfdsu_ex2_double; +output vfdsu_ex2_single; + +// &Regs; @25 +reg ex1_div; +reg ex1_double; +reg ex1_single; +reg ex1_sqrt; +reg vfdsu_ex2_div; +reg vfdsu_ex2_double; +reg [4 :0] vfdsu_ex2_dst_ereg; +reg [6 :0] vfdsu_ex2_dst_vreg; +reg [6 :0] vfdsu_ex2_iid; +reg vfdsu_ex2_single; +reg vfdsu_ex2_sqrt; +reg [4 :0] vfdsu_ex3_dst_ereg; +reg [6 :0] vfdsu_ex3_dst_vreg; +reg [6 :0] vfdsu_ex3_iid; +reg [4 :0] vfdsu_ex4_dst_ereg; +reg [6 :0] vfdsu_ex4_dst_vreg; +reg [6 :0] vfdsu_ex4_iid; + +// &Wires; @26 +wire cp0_vfpu_icg_en; +wire cp0_yy_clk_en; +wire cpurst_b; +wire [4 :0] dp_vfdsu_ex1_pipex_dst_ereg; +wire [6 :0] dp_vfdsu_ex1_pipex_dst_vreg; +wire [6 :0] dp_vfdsu_ex1_pipex_iid; +wire [2 :0] dp_vfdsu_ex1_pipex_imm0; +wire [63:0] dp_vfdsu_ex1_pipex_srcf0; +wire [63:0] dp_vfdsu_ex1_pipex_srcf1; +wire ex1_data_clk; +wire ex1_pipedown; +wire ex1_scalar; +wire [63:0] ex1_src0; +wire [63:0] ex1_src1; +wire [2 :0] ex1_static_rm; +wire ex2_data_clk; +wire ex2_pipedown; +wire ex3_data_clk; +wire ex3_pipedown; +wire [4 :0] ex4_out_expt; +wire [63:0] ex4_out_result; +wire forever_cpuclk; +wire [19:0] idu_vfpu_rf_pipex_func; +wire idu_vfpu_rf_pipex_gateclk_sel; +wire pad_yy_icg_scan_en; +wire [4 :0] pipex_dp_vfdsu_ereg; +wire [4 :0] pipex_dp_vfdsu_ereg_data; +wire [63:0] pipex_dp_vfdsu_freg_data; +wire [6 :0] pipex_dp_vfdsu_vreg; +wire vfdsu_sew_clk; +wire vfdsu_sew_clk_en; + + +//========================================================== +// EX1 Stage Control Signal +//========================================================== +// &Force("bus","idu_vfpu_rf_pipex_func",19,0); @31 +//assign func[19:0] = dp_vfdsu_ex1_pipex_func[19:0]; +// &Instance("gated_clk_cell","x_vfdsu_sew_clk"); @33 +gated_clk_cell x_vfdsu_sew_clk ( + .clk_in (forever_cpuclk ), + .clk_out (vfdsu_sew_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (vfdsu_sew_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @34 +// .clk_out (vfdsu_sew_clk),//Out Clock @35 +// .external_en (1'b0), @36 +// .global_en (cp0_yy_clk_en), @37 +// .local_en (vfdsu_sew_clk_en),//Local Condition @38 +// .module_en (cp0_vfpu_icg_en) @39 +// ); @40 +assign vfdsu_sew_clk_en = idu_vfpu_rf_pipex_gateclk_sel; +always @(posedge vfdsu_sew_clk or negedge cpurst_b) +begin + if(!cpurst_b) + begin + ex1_div <= 1'b0; + ex1_sqrt <= 1'b0; + ex1_double <= 1'b0; + ex1_single <= 1'b0; + end + else if(idu_vfpu_rf_pipex_gateclk_sel) + begin + ex1_div <= idu_vfpu_rf_pipex_func[0]; + ex1_sqrt <= idu_vfpu_rf_pipex_func[1]; + ex1_double <= idu_vfpu_rf_pipex_func[16]; + ex1_single <= idu_vfpu_rf_pipex_func[15]; + end +end +assign ex1_scalar = 1'b1; +assign ex1_static_rm[2:0] = dp_vfdsu_ex1_pipex_imm0[2:0]; +// &Force("output","ex1_div"); @61 +// &Force("output","ex1_sqrt"); @62 +// &Force("output","ex1_double"); @63 +// &Force("output","ex1_single"); @64 + +assign ex1_src0[63:0] = dp_vfdsu_ex1_pipex_srcf0[63:0]; +assign ex1_src1[63:0] = dp_vfdsu_ex1_pipex_srcf1[63:0]; + + +always @(posedge ex1_data_clk or negedge cpurst_b) +begin + if(!cpurst_b) + begin + vfdsu_ex2_dst_ereg[4:0] <= 5'b0; + vfdsu_ex2_dst_vreg[6:0] <= 7'b0; + vfdsu_ex2_iid[6:0] <= 7'b0; + vfdsu_ex2_double <= 1'b0; + vfdsu_ex2_single <= 1'b0; + vfdsu_ex2_div <= 1'b0; + vfdsu_ex2_sqrt <= 1'b0; + end + else if(ex1_pipedown) + begin + vfdsu_ex2_dst_ereg[4:0] <= dp_vfdsu_ex1_pipex_dst_ereg[4:0]; + vfdsu_ex2_dst_vreg[6:0] <= dp_vfdsu_ex1_pipex_dst_vreg[6:0]; + vfdsu_ex2_iid[6:0] <= dp_vfdsu_ex1_pipex_iid[6:0]; + vfdsu_ex2_double <= ex1_double; + vfdsu_ex2_single <= ex1_single; + vfdsu_ex2_div <= ex1_div; + vfdsu_ex2_sqrt <= ex1_sqrt; + end + else + begin + vfdsu_ex2_dst_ereg[4:0] <= vfdsu_ex2_dst_ereg[4:0]; + vfdsu_ex2_dst_vreg[6:0] <= vfdsu_ex2_dst_vreg[6:0]; + vfdsu_ex2_iid[6:0] <= vfdsu_ex2_iid[6:0]; + vfdsu_ex2_double <= vfdsu_ex2_double; + vfdsu_ex2_single <= vfdsu_ex2_single; + vfdsu_ex2_div <= vfdsu_ex2_div; + vfdsu_ex2_sqrt <= vfdsu_ex2_sqrt; + end +end +// &Force("output","vfdsu_ex2_double"); @103 +// &Force("output","vfdsu_ex2_single"); @104 +// //&Force("output","vfdsu_ex2_div"); @105 +// //&Force("output","vfdsu_ex2_sqrt"); @106 + + +always @(posedge ex2_data_clk or negedge cpurst_b) +begin + if(!cpurst_b) + begin + vfdsu_ex3_dst_ereg[4:0] <= 5'b0; + vfdsu_ex3_dst_vreg[6:0] <= 7'b0; + vfdsu_ex3_iid[6:0] <= 7'b0; +// vfdsu_ex3_double <= 1'b0; +// vfdsu_ex3_single <= 1'b0; +// vfdsu_ex3_div <= 1'b0; +// vfdsu_ex3_sqrt <= 1'b0; + end + else if(ex2_pipedown) + begin + vfdsu_ex3_dst_ereg[4:0] <= vfdsu_ex2_dst_ereg[4:0]; + vfdsu_ex3_dst_vreg[6:0] <= vfdsu_ex2_dst_vreg[6:0]; + vfdsu_ex3_iid[6:0] <= vfdsu_ex2_iid[6:0]; + // vfdsu_ex3_double <= vfdsu_ex2_double; +// vfdsu_ex3_single <= vfdsu_ex2_single; +// vfdsu_ex3_div <= vfdsu_ex2_div; +// vfdsu_ex3_sqrt <= vfdsu_ex2_sqrt; + end + else + begin + vfdsu_ex3_dst_ereg[4:0] <= vfdsu_ex3_dst_ereg[4:0]; + vfdsu_ex3_dst_vreg[6:0] <= vfdsu_ex3_dst_vreg[6:0]; + vfdsu_ex3_iid[6:0] <= vfdsu_ex3_iid[6:0]; +// vfdsu_ex3_double <= vfdsu_ex3_double; +// vfdsu_ex3_single <= vfdsu_ex3_single; +// vfdsu_ex3_div <= vfdsu_ex3_div; +// vfdsu_ex3_sqrt <= vfdsu_ex3_sqrt; + end +end +// //&Force("output","vfdsu_ex3_double"); @142 +// //&Force("output","vfdsu_ex3_single"); @143 + +always @(posedge ex3_data_clk or negedge cpurst_b) +begin + if(!cpurst_b) + begin + vfdsu_ex4_dst_ereg[4:0] <= 5'b0; + vfdsu_ex4_dst_vreg[6:0] <= 7'b0; + vfdsu_ex4_iid[6:0] <= 7'b0; +// vfdsu_ex4_double <= 1'b0; +// vfdsu_ex4_single <= 1'b0; +// vfdsu_ex4_div <= 1'b0; +// vfdsu_ex4_sqrt <= 1'b0; + end + else if(ex3_pipedown) + begin + vfdsu_ex4_dst_ereg[4:0] <= vfdsu_ex3_dst_ereg[4:0]; + vfdsu_ex4_dst_vreg[6:0] <= vfdsu_ex3_dst_vreg[6:0]; + vfdsu_ex4_iid[6:0] <= vfdsu_ex3_iid[6:0]; +// vfdsu_ex4_double <= vfdsu_ex3_double; +// vfdsu_ex4_single <= vfdsu_ex3_single; +// vfdsu_ex4_div <= vfdsu_ex3_div; +// vfdsu_ex4_sqrt <= vfdsu_ex3_sqrt; + end + else + begin + vfdsu_ex4_dst_ereg[4:0] <= vfdsu_ex4_dst_ereg[4:0]; + vfdsu_ex4_dst_vreg[6:0] <= vfdsu_ex4_dst_vreg[6:0]; + vfdsu_ex4_iid[6:0] <= vfdsu_ex4_iid[6:0]; +// vfdsu_ex4_double <= vfdsu_ex4_double; +// vfdsu_ex4_single <= vfdsu_ex4_single; +// vfdsu_ex4_div <= vfdsu_ex4_div; +// vfdsu_ex4_sqrt <= vfdsu_ex4_sqrt; + end +end +// //&Force("output","vfdsu_ex4_double"); @178 +// //&Force("output","vfdsu_ex4_single"); @179 + + +assign pipex_dp_vfdsu_ereg_data[4:0] = ex4_out_expt[4:0]; +assign pipex_dp_vfdsu_freg_data[63:0] = ex4_out_result[63:0]; +assign pipex_dp_vfdsu_ereg[4:0] = vfdsu_ex4_dst_ereg[4:0]; +assign pipex_dp_vfdsu_vreg[6:0] = vfdsu_ex4_dst_vreg[6:0]; + + + + + + +// &ModuleEnd; @192 +endmodule + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v new file mode 100644 index 00000000..cdeb3a30 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v @@ -0,0 +1,691 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @22 +module ct_vfdsu_srt( + cp0_vfpu_icg_en, + cp0_yy_clk_en, + cpurst_b, + ex1_div, + ex1_divisor, + ex1_pipedown, + ex1_remainder, + ex1_sqrt, + ex2_pipedown, + ex2_srt_first_round, + forever_cpuclk, + pad_yy_icg_scan_en, + srt_ctrl_rem_zero, + srt_ctrl_skip_srt, + srt_secd_round, + srt_sm_on, + total_qt_rt_58, + vfdsu_ex2_div, + vfdsu_ex2_double, + vfdsu_ex2_dz, + vfdsu_ex2_expnt_add0, + vfdsu_ex2_expnt_add1, + vfdsu_ex2_nv, + vfdsu_ex2_of_rm_lfn, + vfdsu_ex2_op0_norm, + vfdsu_ex2_op1_norm, + vfdsu_ex2_qnan_f, + vfdsu_ex2_qnan_sign, + vfdsu_ex2_result_inf, + vfdsu_ex2_result_qnan, + vfdsu_ex2_result_sign, + vfdsu_ex2_result_zero, + vfdsu_ex2_rm, + vfdsu_ex2_single, + vfdsu_ex2_sqrt, + vfdsu_ex2_srt_skip, + vfdsu_ex3_doub_expnt_rst, + vfdsu_ex3_double, + vfdsu_ex3_dz, + vfdsu_ex3_half_expnt_rst, + vfdsu_ex3_id_srt_skip, + vfdsu_ex3_nv, + vfdsu_ex3_of, + vfdsu_ex3_potnt_of, + vfdsu_ex3_potnt_uf, + vfdsu_ex3_qnan_f, + vfdsu_ex3_qnan_sign, + vfdsu_ex3_rem_sign, + vfdsu_ex3_rem_zero, + vfdsu_ex3_result_denorm_round_add_num, + vfdsu_ex3_result_inf, + vfdsu_ex3_result_lfn, + vfdsu_ex3_result_qnan, + vfdsu_ex3_result_sign, + vfdsu_ex3_result_zero, + vfdsu_ex3_rm, + vfdsu_ex3_rslt_denorm, + vfdsu_ex3_sing_expnt_rst, + vfdsu_ex3_single, + vfdsu_ex3_uf +); + +// &Ports; @23 +input cp0_vfpu_icg_en; +input cp0_yy_clk_en; +input cpurst_b; +input ex1_div; +input [52:0] ex1_divisor; +input ex1_pipedown; +input [59:0] ex1_remainder; +input ex1_sqrt; +input ex2_pipedown; +input ex2_srt_first_round; +input forever_cpuclk; +input pad_yy_icg_scan_en; +input srt_secd_round; +input srt_sm_on; +input vfdsu_ex2_div; +input vfdsu_ex2_double; +input vfdsu_ex2_dz; +input [12:0] vfdsu_ex2_expnt_add0; +input [12:0] vfdsu_ex2_expnt_add1; +input vfdsu_ex2_nv; +input vfdsu_ex2_of_rm_lfn; +input vfdsu_ex2_op0_norm; +input vfdsu_ex2_op1_norm; +input [51:0] vfdsu_ex2_qnan_f; +input vfdsu_ex2_qnan_sign; +input vfdsu_ex2_result_inf; +input vfdsu_ex2_result_qnan; +input vfdsu_ex2_result_sign; +input vfdsu_ex2_result_zero; +input [2 :0] vfdsu_ex2_rm; +input vfdsu_ex2_single; +input vfdsu_ex2_sqrt; +input vfdsu_ex2_srt_skip; +output srt_ctrl_rem_zero; +output srt_ctrl_skip_srt; +output [57:0] total_qt_rt_58; +output [12:0] vfdsu_ex3_doub_expnt_rst; +output vfdsu_ex3_double; +output vfdsu_ex3_dz; +output [12:0] vfdsu_ex3_half_expnt_rst; +output vfdsu_ex3_id_srt_skip; +output vfdsu_ex3_nv; +output vfdsu_ex3_of; +output vfdsu_ex3_potnt_of; +output vfdsu_ex3_potnt_uf; +output [51:0] vfdsu_ex3_qnan_f; +output vfdsu_ex3_qnan_sign; +output vfdsu_ex3_rem_sign; +output vfdsu_ex3_rem_zero; +output [52:0] vfdsu_ex3_result_denorm_round_add_num; +output vfdsu_ex3_result_inf; +output vfdsu_ex3_result_lfn; +output vfdsu_ex3_result_qnan; +output vfdsu_ex3_result_sign; +output vfdsu_ex3_result_zero; +output [2 :0] vfdsu_ex3_rm; +output vfdsu_ex3_rslt_denorm; +output [8 :0] vfdsu_ex3_sing_expnt_rst; +output vfdsu_ex3_single; +output vfdsu_ex3_uf; + +// &Regs; @24 +reg [52:0] ex2_result_double_denorm_round_add_num; +reg [52:0] ex2_result_half_denorm_round_add_num; +reg [52:0] ex2_result_single_denorm_round_add_num; +reg [12:0] vfdsu_ex3_doub_expnt_rst; +reg vfdsu_ex3_double; +reg vfdsu_ex3_dz; +reg [12:0] vfdsu_ex3_half_expnt_rst; +reg vfdsu_ex3_id_srt_skip; +reg vfdsu_ex3_nv; +reg vfdsu_ex3_of; +reg vfdsu_ex3_potnt_of; +reg vfdsu_ex3_potnt_uf; +reg [51:0] vfdsu_ex3_qnan_f; +reg vfdsu_ex3_qnan_sign; +reg vfdsu_ex3_rem_sign; +reg [52:0] vfdsu_ex3_result_denorm_round_add_num; +reg vfdsu_ex3_result_inf; +reg vfdsu_ex3_result_lfn; +reg vfdsu_ex3_result_qnan; +reg vfdsu_ex3_result_sign; +reg vfdsu_ex3_result_zero; +reg [2 :0] vfdsu_ex3_rm; +reg vfdsu_ex3_rslt_denorm; +reg [8 :0] vfdsu_ex3_sing_expnt_rst; +reg vfdsu_ex3_single; +reg vfdsu_ex3_uf; + +// &Wires; @25 +wire cp0_vfpu_icg_en; +wire cp0_yy_clk_en; +wire cpurst_b; +wire ex1_div; +wire [52:0] ex1_divisor; +wire ex1_pipedown; +wire [59:0] ex1_remainder; +wire ex1_sqrt; +wire ex2_div_of; +wire ex2_div_uf; +wire ex2_doub_expnt_of; +wire ex2_doub_expnt_uf; +wire ex2_doub_potnt_of; +wire ex2_doub_potnt_uf; +wire ex2_double_id_nor_srt_skip; +wire ex2_expnt_of; +wire [12:0] ex2_expnt_result; +wire ex2_expnt_uf; +wire ex2_half_expnt_of; +wire ex2_half_expnt_uf; +wire ex2_half_id_nor_srt_skip; +wire ex2_half_potnt_of; +wire ex2_half_potnt_uf; +wire ex2_id_nor_srt_skip; +wire ex2_of; +wire ex2_of_plus; +wire ex2_pipe_clk; +wire ex2_pipe_clk_en; +wire ex2_pipedown; +wire ex2_potnt_of; +wire ex2_potnt_of_pre; +wire ex2_potnt_uf; +wire ex2_potnt_uf_pre; +wire [52:0] ex2_result_denorm_round_add_num; +wire ex2_result_inf; +wire ex2_result_lfn; +wire ex2_result_qnan; +wire ex2_result_zero; +wire ex2_rslt_denorm; +wire ex2_sing_expnt_of; +wire ex2_sing_expnt_uf; +wire ex2_sing_potnt_of; +wire ex2_sing_potnt_uf; +wire ex2_single_id_nor_srt_skip; +wire [12:0] ex2_sqrt_expnt_result; +wire ex2_srt_first_round; +wire ex2_uf; +wire ex2_uf_plus; +wire forever_cpuclk; +wire [6 :0] initial_bound_sel_in; +wire [55:0] initial_divisor_in; +wire [60:0] initial_remainder_in; +wire initial_srt_en; +wire initial_srt_sel_div_in; +wire initial_srt_sel_sqrt_in; +wire pad_yy_icg_scan_en; +wire srt_ctrl_rem_zero; +wire srt_ctrl_skip_srt; +wire srt_first_round; +wire [60:0] srt_remainder; +wire [59:0] srt_remainder_out; +wire srt_remainder_sign; +wire srt_secd_round; +wire srt_sm_on; +wire [57:0] total_qt_rt; +wire [57:0] total_qt_rt_58; +wire [57:0] vdiv_qt_rt; +wire vfdsu_ex2_div; +wire vfdsu_ex2_double; +wire vfdsu_ex2_dz; +wire [12:0] vfdsu_ex2_expnt_add0; +wire [12:0] vfdsu_ex2_expnt_add1; +wire [12:0] vfdsu_ex2_expnt_rst; +wire vfdsu_ex2_nv; +wire vfdsu_ex2_of_rm_lfn; +wire vfdsu_ex2_op0_norm; +wire vfdsu_ex2_op1_norm; +wire [51:0] vfdsu_ex2_qnan_f; +wire vfdsu_ex2_qnan_sign; +wire vfdsu_ex2_result_inf; +wire vfdsu_ex2_result_qnan; +wire vfdsu_ex2_result_sign; +wire vfdsu_ex2_result_zero; +wire [2 :0] vfdsu_ex2_rm; +wire vfdsu_ex2_single; +wire vfdsu_ex2_sqrt; +wire vfdsu_ex2_srt_skip; +wire vfdsu_ex3_rem_zero; + + +//====================EX2 Expt info========================= +//EX1 only detect of/uf under id condition +//EX2 will deal with other condition + +//When input is normal, overflow when E1-E2 > 128/1024 +//here we mov the expnt result calculation into second stage + +assign vfdsu_ex2_expnt_rst[12:0] = (vfdsu_ex2_sqrt) + ? ex2_sqrt_expnt_result[12:0] + : ex2_expnt_result[12:0]; +assign ex2_sqrt_expnt_result[12:0] = {ex2_expnt_result[12], + ex2_expnt_result[12:1]}; +assign ex2_expnt_result[12:0] = vfdsu_ex2_expnt_add0[12:0] - vfdsu_ex2_expnt_add1[12:0]; +assign ex2_doub_expnt_of = ~vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11] + || (vfdsu_ex2_expnt_rst[10] && + |vfdsu_ex2_expnt_rst[9:0])); +assign ex2_sing_expnt_of = ~vfdsu_ex2_expnt_rst[9] && (vfdsu_ex2_expnt_rst[8] + || (vfdsu_ex2_expnt_rst[7] && + |vfdsu_ex2_expnt_rst[6:0])); + +assign ex2_half_expnt_of = ~vfdsu_ex2_expnt_rst[6] && (vfdsu_ex2_expnt_rst[5] + || (vfdsu_ex2_expnt_rst[4] && + |vfdsu_ex2_expnt_rst[3:0])); +assign ex2_expnt_of = vfdsu_ex2_double ? ex2_doub_expnt_of : + vfdsu_ex2_single ? ex2_sing_expnt_of + : ex2_half_expnt_of; +assign ex2_potnt_of_pre = vfdsu_ex2_double ? ex2_doub_potnt_of : + vfdsu_ex2_single ? ex2_sing_potnt_of : ex2_half_potnt_of; +assign ex2_potnt_uf_pre = vfdsu_ex2_double ? ex2_doub_potnt_uf : + vfdsu_ex2_single ? ex2_sing_potnt_uf : ex2_half_potnt_uf; +assign ex2_expnt_uf = vfdsu_ex2_double ? ex2_doub_expnt_uf : + vfdsu_ex2_single ? ex2_sing_expnt_uf : ex2_half_expnt_uf; +assign ex2_id_nor_srt_skip = vfdsu_ex2_double ? ex2_double_id_nor_srt_skip : + vfdsu_ex2_single ? ex2_single_id_nor_srt_skip + : ex2_half_id_nor_srt_skip; +assign ex2_result_denorm_round_add_num[52:0] = vfdsu_ex2_double ? + ex2_result_double_denorm_round_add_num[52:0] : + vfdsu_ex2_single ? + ex2_result_single_denorm_round_add_num[52:0] : + ex2_result_half_denorm_round_add_num[52:0]; + + +//potential overflow when E1-E2 = 128/1024 +assign ex2_doub_potnt_of = ~vfdsu_ex2_expnt_rst[12] && + ~vfdsu_ex2_expnt_rst[11] && + vfdsu_ex2_expnt_rst[10] && + ~|vfdsu_ex2_expnt_rst[9:0]; +assign ex2_sing_potnt_of = ~vfdsu_ex2_expnt_rst[9] && + ~vfdsu_ex2_expnt_rst[8] && + vfdsu_ex2_expnt_rst[7] && + ~|vfdsu_ex2_expnt_rst[6:0]; +assign ex2_half_potnt_of = ~vfdsu_ex2_expnt_rst[6] && + ~vfdsu_ex2_expnt_rst[5] && + vfdsu_ex2_expnt_rst[4] && + ~|vfdsu_ex2_expnt_rst[3:0]; +assign ex2_potnt_of = ex2_potnt_of_pre && + vfdsu_ex2_op0_norm && + vfdsu_ex2_op1_norm && + vfdsu_ex2_div; + +//When input is normal, underflow when E1-E2 <= -127/-1023/-15 +assign ex2_doub_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hc01); +assign ex2_sing_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hf81); +assign ex2_half_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hff1); +assign ex2_half_potnt_uf = &vfdsu_ex2_expnt_rst[6:4] && + ~|vfdsu_ex2_expnt_rst[3:2] && + vfdsu_ex2_expnt_rst[1] && + !vfdsu_ex2_expnt_rst[0]; + + +//potential underflow when E1-E2 = -126/-1022 +assign ex2_doub_potnt_uf = &vfdsu_ex2_expnt_rst[12:10] && + ~|vfdsu_ex2_expnt_rst[9:2] && + vfdsu_ex2_expnt_rst[1] && + !vfdsu_ex2_expnt_rst[0]; +assign ex2_sing_potnt_uf = &vfdsu_ex2_expnt_rst[9:7] && + ~|vfdsu_ex2_expnt_rst[6:2] && + vfdsu_ex2_expnt_rst[1] && + !vfdsu_ex2_expnt_rst[0]; + +assign ex2_potnt_uf = (ex2_potnt_uf_pre && + vfdsu_ex2_op0_norm && + vfdsu_ex2_op1_norm && + vfdsu_ex2_div) || + (ex2_potnt_uf_pre && + vfdsu_ex2_op0_norm); + +//========================EX2 Overflow====================== +//ex2 overflow when +// 1.op0 & op1 both norm && expnt overflow +// 2.ex1_id_of +assign ex2_of = ex2_of_plus; +assign ex2_of_plus = ex2_div_of && vfdsu_ex2_div; +assign ex2_div_of = vfdsu_ex2_op0_norm && + vfdsu_ex2_op1_norm && + ex2_expnt_of; + +//=======================EX2 Underflow====================== +//ex2 underflow when +// 1.op0 & op1 both norm && expnt underflow +// 2.ex1_id_uf +// and detect when to skip the srt, here, we have further optmization +assign ex2_uf = ex2_uf_plus; +assign ex2_uf_plus = ex2_div_uf && vfdsu_ex2_div; +assign ex2_div_uf = vfdsu_ex2_op0_norm && + vfdsu_ex2_op1_norm && + ex2_expnt_uf; +assign ex2_double_id_nor_srt_skip = vfdsu_ex2_expnt_rst[12] + && (vfdsu_ex2_expnt_rst[11:0]<12'hbcd); +assign ex2_single_id_nor_srt_skip = vfdsu_ex2_expnt_rst[12] + && (vfdsu_ex2_expnt_rst[11:0]<12'hf6a); +assign ex2_half_id_nor_srt_skip = vfdsu_ex2_expnt_rst[12] + && (vfdsu_ex2_expnt_rst[11:0]<12'hfe7); +assign ex2_rslt_denorm = ex2_uf; + +//=======================EX2 skip srt iteration====================== +assign srt_ctrl_skip_srt = ex2_of || ex2_id_nor_srt_skip + || vfdsu_ex2_srt_skip; +//===============ex2 round prepare for denormal round====== +// &CombBeg; @146 +always @( vfdsu_ex2_expnt_rst[12:0]) +begin +case(vfdsu_ex2_expnt_rst[12:0]) + 13'h1c02:ex2_result_double_denorm_round_add_num[52:0] = 53'h1; //-1022 1 + 13'h1c01:ex2_result_double_denorm_round_add_num[52:0] = 53'h2; //-1023 0 + 13'h1c00:ex2_result_double_denorm_round_add_num[52:0] = 53'h4; //-1024 -1 + 13'h1bff:ex2_result_double_denorm_round_add_num[52:0] = 53'h8; //-1025 -2 + 13'h1bfe:ex2_result_double_denorm_round_add_num[52:0] = 53'h10; //-1026 -3 + 13'h1bfd:ex2_result_double_denorm_round_add_num[52:0] = 53'h20; //-1027 -4 + 13'h1bfc:ex2_result_double_denorm_round_add_num[52:0] = 53'h40; //-1028 -5 + 13'h1bfb:ex2_result_double_denorm_round_add_num[52:0] = 53'h80; //-1029 -6 + 13'h1bfa:ex2_result_double_denorm_round_add_num[52:0] = 53'h100; //-1030 -7 + 13'h1bf9:ex2_result_double_denorm_round_add_num[52:0] = 53'h200; //-1031 -8 + 13'h1bf8:ex2_result_double_denorm_round_add_num[52:0] = 53'h400; //-1032 -9 + 13'h1bf7:ex2_result_double_denorm_round_add_num[52:0] = 53'h800; //-1033 -10 + 13'h1bf6:ex2_result_double_denorm_round_add_num[52:0] = 53'h1000; //-1034 -11 + 13'h1bf5:ex2_result_double_denorm_round_add_num[52:0] = 53'h2000; //-1035 -12 + 13'h1bf4:ex2_result_double_denorm_round_add_num[52:0] = 53'h4000; //-1036 -13 + 13'h1bf3:ex2_result_double_denorm_round_add_num[52:0] = 53'h8000; // -1037 + 13'h1bf2:ex2_result_double_denorm_round_add_num[52:0] = 53'h10000;//-1038 + 13'h1bf1:ex2_result_double_denorm_round_add_num[52:0] = 53'h20000;//-1039 + 13'h1bf0:ex2_result_double_denorm_round_add_num[52:0] = 53'h40000; //-1040 + 13'h1bef:ex2_result_double_denorm_round_add_num[52:0] = 53'h80000; //-1041 + 13'h1bee:ex2_result_double_denorm_round_add_num[52:0] = 53'h100000; //-1042 + 13'h1bed:ex2_result_double_denorm_round_add_num[52:0] = 53'h200000; //-1043 + 13'h1bec:ex2_result_double_denorm_round_add_num[52:0] = 53'h400000; //-1044 + 13'h1beb:ex2_result_double_denorm_round_add_num[52:0] = 53'h800000; //-1045 + 13'h1bea:ex2_result_double_denorm_round_add_num[52:0] = 53'h1000000;//-1046 + 13'h1be9:ex2_result_double_denorm_round_add_num[52:0] = 53'h2000000;//-1047 + 13'h1be8:ex2_result_double_denorm_round_add_num[52:0] = 53'h4000000; //-1048 + 13'h1be7:ex2_result_double_denorm_round_add_num[52:0] = 53'h8000000; //-1049 + 13'h1be6:ex2_result_double_denorm_round_add_num[52:0] = 53'h10000000;//-1050 + 13'h1be5:ex2_result_double_denorm_round_add_num[52:0] = 53'h20000000; //-1051 + 13'h1be4:ex2_result_double_denorm_round_add_num[52:0] = 53'h40000000; //-1052 + 13'h1be3:ex2_result_double_denorm_round_add_num[52:0] = 53'h80000000; //-1053 + 13'h1be2:ex2_result_double_denorm_round_add_num[52:0] = 53'h100000000; //-1054 + 13'h1be1:ex2_result_double_denorm_round_add_num[52:0] = 53'h200000000; //-1055 + 13'h1be0:ex2_result_double_denorm_round_add_num[52:0] = 53'h400000000; //-1056 + 13'h1bdf:ex2_result_double_denorm_round_add_num[52:0] = 53'h800000000; //-1057 + 13'h1bde:ex2_result_double_denorm_round_add_num[52:0] = 53'h1000000000; //-1058 + 13'h1bdd:ex2_result_double_denorm_round_add_num[52:0] = 53'h2000000000; //-1059 + 13'h1bdc:ex2_result_double_denorm_round_add_num[52:0] = 53'h4000000000; //-1060 + 13'h1bdb:ex2_result_double_denorm_round_add_num[52:0] = 53'h8000000000; //-1061 + 13'h1bda:ex2_result_double_denorm_round_add_num[52:0] = 53'h10000000000; //-1062 + 13'h1bd9:ex2_result_double_denorm_round_add_num[52:0] = 53'h20000000000; //-1063 + 13'h1bd8:ex2_result_double_denorm_round_add_num[52:0] = 53'h40000000000; //-1064 + 13'h1bd7:ex2_result_double_denorm_round_add_num[52:0] = 53'h80000000000; //-1065 + 13'h1bd6:ex2_result_double_denorm_round_add_num[52:0] = 53'h100000000000; //-1066 + 13'h1bd5:ex2_result_double_denorm_round_add_num[52:0] = 53'h200000000000; //-1067 + 13'h1bd4:ex2_result_double_denorm_round_add_num[52:0] = 53'h400000000000; //-1068 + 13'h1bd3:ex2_result_double_denorm_round_add_num[52:0] = 53'h800000000000; //-1069 + 13'h1bd2:ex2_result_double_denorm_round_add_num[52:0] = 53'h1000000000000;//-1070 + 13'h1bd1:ex2_result_double_denorm_round_add_num[52:0] = 53'h2000000000000; //-1071 + 13'h1bd0:ex2_result_double_denorm_round_add_num[52:0] = 53'h4000000000000; //-1072 + 13'h1bcf:ex2_result_double_denorm_round_add_num[52:0] = 53'h8000000000000; //-1073 + 13'h1bce:ex2_result_double_denorm_round_add_num[52:0] = 53'h10000000000000; //-1073 + default: ex2_result_double_denorm_round_add_num[52:0] = 53'h0; +endcase +// &CombEnd; @203 +end +// &CombBeg; @204 +always @( vfdsu_ex2_expnt_rst[12:0]) +begin +case(vfdsu_ex2_expnt_rst[12:0]) + 13'h1f82:ex2_result_single_denorm_round_add_num[52:0] = 53'h20000000; //-126 1 + 13'h1f81:ex2_result_single_denorm_round_add_num[52:0] = 53'h40000000; //-127 0 + 13'h1f80:ex2_result_single_denorm_round_add_num[52:0] = 53'h80000000; //-128 -1 + 13'h1f7f:ex2_result_single_denorm_round_add_num[52:0] = 53'h100000000; //-129 -2 + 13'h1f7e:ex2_result_single_denorm_round_add_num[52:0] = 53'h200000000; //-130 -3 + 13'h1f7d:ex2_result_single_denorm_round_add_num[52:0] = 53'h400000000; //-131 -4 + 13'h1f7c:ex2_result_single_denorm_round_add_num[52:0] = 53'h800000000; //-132 -5 + 13'h1f7b:ex2_result_single_denorm_round_add_num[52:0] = 53'h1000000000; //-133 -6 + 13'h1f7a:ex2_result_single_denorm_round_add_num[52:0] = 53'h2000000000; //-134 -7 + 13'h1f79:ex2_result_single_denorm_round_add_num[52:0] = 53'h4000000000; //-135 -8 + 13'h1f78:ex2_result_single_denorm_round_add_num[52:0] = 53'h8000000000; //-136 -9 + 13'h1f77:ex2_result_single_denorm_round_add_num[52:0] = 53'h10000000000; //-137 -10 + 13'h1f76:ex2_result_single_denorm_round_add_num[52:0] = 53'h20000000000; //-138 -11 + 13'h1f75:ex2_result_single_denorm_round_add_num[52:0] = 53'h40000000000; //-139 -12 + 13'h1f74:ex2_result_single_denorm_round_add_num[52:0] = 53'h80000000000; //-140 -13 + 13'h1f73:ex2_result_single_denorm_round_add_num[52:0] = 53'h100000000000; // -141 -14 + 13'h1f72:ex2_result_single_denorm_round_add_num[52:0] = 53'h200000000000;//-142 -15 + 13'h1f71:ex2_result_single_denorm_round_add_num[52:0] = 53'h400000000000;//-143 -16 + 13'h1f70:ex2_result_single_denorm_round_add_num[52:0] = 53'h800000000000; //-144 -17 + 13'h1f6f:ex2_result_single_denorm_round_add_num[52:0] = 53'h1000000000000; //-145 -18 + 13'h1f6e:ex2_result_single_denorm_round_add_num[52:0] = 53'h2000000000000; //-146 -19 + 13'h1f6d:ex2_result_single_denorm_round_add_num[52:0] = 53'h4000000000000; //-147 -20 + 13'h1f6c:ex2_result_single_denorm_round_add_num[52:0] = 53'h8000000000000; //-148 -21 + 13'h1f6b:ex2_result_single_denorm_round_add_num[52:0] = 53'h10000000000000; //-148 -22 + default: ex2_result_single_denorm_round_add_num[52:0] = 53'h0; // -23 +endcase +// &CombEnd; @232 +end +// &CombBeg; @233 +always @( vfdsu_ex2_expnt_rst[12:0]) +begin +case(vfdsu_ex2_expnt_rst[12:0]) + 13'h1ff2:ex2_result_half_denorm_round_add_num[52:0] = 53'h40000000000; //-14 1 + 13'h1ff1:ex2_result_half_denorm_round_add_num[52:0] = 53'h80000000000; //-15 0 + 13'h1ff0:ex2_result_half_denorm_round_add_num[52:0] = 53'h100000000000; //-16 -1 + 13'h1fef:ex2_result_half_denorm_round_add_num[52:0] = 53'h200000000000; //-17 -2 + 13'h1fee:ex2_result_half_denorm_round_add_num[52:0] = 53'h400000000000; //-18 -3 + 13'h1fed:ex2_result_half_denorm_round_add_num[52:0] = 53'h800000000000; //-19 -4 + 13'h1fec:ex2_result_half_denorm_round_add_num[52:0] = 53'h1000000000000; //-20 -5 + 13'h1feb:ex2_result_half_denorm_round_add_num[52:0] = 53'h2000000000000; //-21 -6 + 13'h1fea:ex2_result_half_denorm_round_add_num[52:0] = 53'h4000000000000; //-22 -7 + 13'h1fe9:ex2_result_half_denorm_round_add_num[52:0] = 53'h8000000000000; //-23 -8 + 13'h1fe8:ex2_result_half_denorm_round_add_num[52:0] = 53'h10000000000000; //-24 -9 + default: ex2_result_half_denorm_round_add_num[52:0] = 53'h0; // -23 +endcase +// &CombEnd; @248 +end + +//===================special result======================== +assign ex2_result_zero = vfdsu_ex2_result_zero; +assign ex2_result_qnan = vfdsu_ex2_result_qnan; +assign ex2_result_inf = vfdsu_ex2_result_inf || + ex2_of_plus && !vfdsu_ex2_of_rm_lfn; +assign ex2_result_lfn = + ex2_of_plus && vfdsu_ex2_of_rm_lfn; + + + +//====================Pipe to EX3=========================== +//gate clk +// &Instance("gated_clk_cell","x_ex2_pipe_clk"); @262 +gated_clk_cell x_ex2_pipe_clk ( + .clk_in (forever_cpuclk ), + .clk_out (ex2_pipe_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (ex2_pipe_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @263 +// .clk_out (ex2_pipe_clk),//Out Clock @264 +// .external_en (1'b0), @265 +// .global_en (cp0_yy_clk_en), @266 +// .local_en (ex2_pipe_clk_en),//Local Condition @267 +// .module_en (cp0_vfpu_icg_en) @268 +// ); @269 +assign ex2_pipe_clk_en = ex2_pipedown; + +always @(posedge ex2_pipe_clk or negedge cpurst_b) +begin + if(!cpurst_b) + begin + vfdsu_ex3_result_zero <= 1'b0; + vfdsu_ex3_result_qnan <= 1'b0; + vfdsu_ex3_result_inf <= 1'b0; + vfdsu_ex3_result_lfn <= 1'b0; + vfdsu_ex3_of <= 1'b0; + vfdsu_ex3_uf <= 1'b0; + vfdsu_ex3_nv <= 1'b0; + vfdsu_ex3_dz <= 1'b0; + vfdsu_ex3_potnt_of <= 1'b0; + vfdsu_ex3_potnt_uf <= 1'b0; + vfdsu_ex3_rem_sign <= 1'b0; +// vfdsu_ex3_rem_zero <= 1'b0; + vfdsu_ex3_doub_expnt_rst[12:0] <= 13'b0; + vfdsu_ex3_sing_expnt_rst[8:0] <= 9'b0; + vfdsu_ex3_half_expnt_rst[12:0] <= 13'b0; + vfdsu_ex3_result_sign <= 1'b0; + vfdsu_ex3_qnan_sign <= 1'b0; + vfdsu_ex3_qnan_f[51:0] <= 52'b0; + vfdsu_ex3_rm[2:0] <= 3'b0; + vfdsu_ex3_result_denorm_round_add_num[52:0] + <= 53'b0; + vfdsu_ex3_rslt_denorm <= 1'b0; + vfdsu_ex3_id_srt_skip <= 1'b0; + vfdsu_ex3_double <= 1'b0; + vfdsu_ex3_single <= 1'b0; + end + else if(ex2_pipedown) + begin + vfdsu_ex3_result_zero <= ex2_result_zero; + vfdsu_ex3_result_qnan <= ex2_result_qnan; + vfdsu_ex3_result_inf <= ex2_result_inf; + vfdsu_ex3_result_lfn <= ex2_result_lfn; + vfdsu_ex3_of <= ex2_of; + vfdsu_ex3_uf <= ex2_uf; + vfdsu_ex3_nv <= vfdsu_ex2_nv; + vfdsu_ex3_dz <= vfdsu_ex2_dz; + vfdsu_ex3_potnt_of <= ex2_potnt_of; + vfdsu_ex3_potnt_uf <= ex2_potnt_uf; + vfdsu_ex3_rem_sign <= srt_remainder_sign; + //vfdsu_ex3_rem_zero <= srt_remainder_zero; + vfdsu_ex3_doub_expnt_rst[12:0] <= vfdsu_ex2_expnt_rst[12:0]; + vfdsu_ex3_sing_expnt_rst[8:0] <= vfdsu_ex2_expnt_rst[8:0]; + vfdsu_ex3_half_expnt_rst[12:0] <= vfdsu_ex2_expnt_rst[12:0]; + vfdsu_ex3_result_sign <= vfdsu_ex2_result_sign; + vfdsu_ex3_qnan_sign <= vfdsu_ex2_qnan_sign; + vfdsu_ex3_qnan_f[51:0] <= vfdsu_ex2_qnan_f[51:0]; + vfdsu_ex3_rm[2:0] <= vfdsu_ex2_rm[2:0]; + vfdsu_ex3_result_denorm_round_add_num[52:0] + <= ex2_result_denorm_round_add_num[52:0]; + vfdsu_ex3_rslt_denorm <= ex2_rslt_denorm; + vfdsu_ex3_id_srt_skip <= ex2_id_nor_srt_skip; + vfdsu_ex3_double <= vfdsu_ex2_double; + vfdsu_ex3_single <= vfdsu_ex2_single; + end + else + begin + vfdsu_ex3_result_zero <= vfdsu_ex3_result_zero; + vfdsu_ex3_result_qnan <= vfdsu_ex3_result_qnan; + vfdsu_ex3_result_inf <= vfdsu_ex3_result_inf; + vfdsu_ex3_result_lfn <= vfdsu_ex3_result_lfn; + vfdsu_ex3_of <= vfdsu_ex3_of; + vfdsu_ex3_uf <= vfdsu_ex3_uf; + vfdsu_ex3_nv <= vfdsu_ex3_nv; + vfdsu_ex3_dz <= vfdsu_ex3_dz; + vfdsu_ex3_potnt_of <= vfdsu_ex3_potnt_of; + vfdsu_ex3_potnt_uf <= vfdsu_ex3_potnt_uf; + vfdsu_ex3_rem_sign <= vfdsu_ex3_rem_sign; + //vfdsu_ex3_rem_zero <= vfdsu_ex3_rem_zero; + vfdsu_ex3_doub_expnt_rst[12:0] <= vfdsu_ex3_doub_expnt_rst[12:0]; + vfdsu_ex3_sing_expnt_rst[8:0] <= vfdsu_ex3_sing_expnt_rst[8:0]; + vfdsu_ex3_half_expnt_rst[12:0] <= vfdsu_ex3_half_expnt_rst[12:0]; + vfdsu_ex3_result_sign <= vfdsu_ex3_result_sign; + vfdsu_ex3_qnan_sign <= vfdsu_ex3_qnan_sign; + vfdsu_ex3_qnan_f[51:0] <= vfdsu_ex3_qnan_f[51:0]; + vfdsu_ex3_rm[2:0] <= vfdsu_ex3_rm[2:0]; + vfdsu_ex3_result_denorm_round_add_num[52:0] + <= vfdsu_ex3_result_denorm_round_add_num[52:0]; + vfdsu_ex3_rslt_denorm <= vfdsu_ex3_rslt_denorm; + vfdsu_ex3_id_srt_skip <= vfdsu_ex3_id_srt_skip; + vfdsu_ex3_double <= vfdsu_ex3_double; + vfdsu_ex3_single <= vfdsu_ex3_single; + end +end +assign vfdsu_ex3_rem_zero = ~|srt_remainder[60:0]; +assign srt_ctrl_rem_zero = vfdsu_ex3_rem_zero; +// &Force("output","vfdsu_ex3_potnt_of"); @365 +// &Force("output","vfdsu_ex3_potnt_uf"); @366 +// &Force("output","vfdsu_ex3_rem_sign"); @367 +// &Force("output","vfdsu_ex3_rem_zero"); @368 +// &Force("output","vfdsu_ex3_result_zero"); @369 +// &Force("output","vfdsu_ex3_result_qnan"); @370 +// &Force("output","vfdsu_ex3_result_inf"); @371 +// &Force("output","vfdsu_ex3_result_lfn"); @372 +// &Force("output","vfdsu_ex3_dz"); @373 +// &Force("output","vfdsu_ex3_nv"); @374 +// &Force("output","vfdsu_ex3_of"); @375 +// &Force("output","vfdsu_ex3_uf"); @376 +// &Force("output","vfdsu_ex3_result_sign"); @377 +// &Force("output","vfdsu_ex3_doub_expnt_rst"); @378 +// &Force("output","vfdsu_ex3_sing_expnt_rst"); @379 +// &Force("output","vfdsu_ex3_half_expnt_rst"); @380 +// &Force("output","vfdsu_ex3_qnan_sign"); @381 +// &Force("output","vfdsu_ex3_qnan_f"); @382 +// &Force("output","vfdsu_ex3_rm"); @383 +// &Force("output","vfdsu_ex3_result_denorm_round_add_num"); @384 +// &Force("output","vfdsu_ex3_rslt_denorm"); @385 +// &Force("output","vfdsu_ex3_id_srt_skip"); @386 +// &Force("output","vfdsu_ex3_single"); @387 +// &Force("output","vfdsu_ex3_double"); @388 + +//========================================================== +// SRT Remainder & Divisor for Quotient/Root Generate +//========================================================== +// &Instance("ct_vfdsu_srt_radix16_with_sqrt_for_vdsp"); @411 +// &Connect(.srt_sm_on (srt_sm_on_all)); @412 +// &Force("bus","ex1_remainder",59,0); @414 +// &Force("bus","srt_remainder_out",69,0); @415 +// &Force("nonport","srt_remainder_out"); @422 +// &Force("nonport","vdiv_qt_rt"); @423 +assign initial_divisor_in[55:0] = {ex1_divisor[52:0],3'b000}; + +assign initial_remainder_in[60:0] = {2'b00,ex1_remainder[59:1]}; + +assign initial_bound_sel_in[6:0] = ex1_div ? initial_divisor_in[55:49]:{7{1'b0}}; + +assign initial_srt_en = ex1_pipedown; +assign initial_srt_sel_div_in = ex1_div; +assign initial_srt_sel_sqrt_in = ex1_sqrt; + +assign srt_first_round = ex2_srt_first_round; + +// &Instance("ct_vfdsu_srt_radix16_with_sqrt"); @436 +ct_vfdsu_srt_radix16_with_sqrt x_ct_vfdsu_srt_radix16_with_sqrt ( + .cp0_vfpu_icg_en (cp0_vfpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .cpurst_b (cpurst_b ), + .forever_cpuclk (forever_cpuclk ), + .initial_bound_sel_in (initial_bound_sel_in ), + .initial_divisor_in (initial_divisor_in ), + .initial_remainder_in (initial_remainder_in ), + .initial_srt_en (initial_srt_en ), + .initial_srt_sel_div_in (initial_srt_sel_div_in ), + .initial_srt_sel_sqrt_in (initial_srt_sel_sqrt_in), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .srt_first_round (srt_first_round ), + .srt_remainder (srt_remainder ), + .srt_remainder_out (srt_remainder_out ), + .srt_remainder_sign (srt_remainder_sign ), + .srt_secd_round (srt_secd_round ), + .srt_sm_on (srt_sm_on ), + .total_qt_rt (total_qt_rt ), + .vdiv_qt_rt (vdiv_qt_rt ) +); + + +// &Force("bus","ex1_remainder",59,0); @438 + +assign total_qt_rt_58[57:0] = {total_qt_rt[57:2],2'b00}; + +// &ModuleEnd; @443 +endmodule + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_bound_table.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_bound_table.v new file mode 100644 index 00000000..097562e3 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_bound_table.v @@ -0,0 +1,1168 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @22 +module ct_vfdsu_srt_radix16_bound_table( + bound_sel, + digit_bound_1, + digit_bound_2, + digit_bound_3, + digit_bound_4, + digit_bound_5, + digit_bound_6, + digit_bound_7, + digit_bound_8, + digit_bound_9, + sqrt_first_round, + sqrt_secd_round, + sqrt_secd_round_sign +); + +// &Ports; @23 +input [6 :0] bound_sel; +input sqrt_first_round; +input sqrt_secd_round; +input sqrt_secd_round_sign; +output [11:0] digit_bound_1; +output [11:0] digit_bound_2; +output [11:0] digit_bound_3; +output [11:0] digit_bound_4; +output [11:0] digit_bound_5; +output [11:0] digit_bound_6; +output [11:0] digit_bound_7; +output [11:0] digit_bound_8; +output [11:0] digit_bound_9; + +// &Regs; @24 +reg [11:0] ori_digit_bound_1; +reg [11:0] ori_digit_bound_2; +reg [11:0] ori_digit_bound_3; +reg [11:0] ori_digit_bound_4; +reg [11:0] ori_digit_bound_5; +reg [11:0] ori_digit_bound_6; +reg [11:0] ori_digit_bound_7; +reg [11:0] ori_digit_bound_8; +reg [11:0] ori_digit_bound_9; +reg [11:0] sqrt_digit_boundm2_1; +reg [11:0] sqrt_digit_boundm2_2; +reg [11:0] sqrt_digit_boundm2_3; +reg [11:0] sqrt_digit_boundm2_4; +reg [11:0] sqrt_digit_boundm2_5; +reg [11:0] sqrt_digit_boundm2_6; +reg [11:0] sqrt_digit_boundm2_7; +reg [11:0] sqrt_digit_boundm2_8; +reg [11:0] sqrt_digit_boundm2_9; +reg [11:0] sqrt_digit_boundp2_1; +reg [11:0] sqrt_digit_boundp2_2; +reg [11:0] sqrt_digit_boundp2_3; +reg [11:0] sqrt_digit_boundp2_4; +reg [11:0] sqrt_digit_boundp2_5; +reg [11:0] sqrt_digit_boundp2_6; +reg [11:0] sqrt_digit_boundp2_7; +reg [11:0] sqrt_digit_boundp2_8; +reg [11:0] sqrt_digit_boundp2_9; + +// &Wires; @25 +wire [6 :0] bound_sel; +wire [11:0] digit_bound_1; +wire [11:0] digit_bound_2; +wire [11:0] digit_bound_3; +wire [11:0] digit_bound_4; +wire [11:0] digit_bound_5; +wire [11:0] digit_bound_6; +wire [11:0] digit_bound_7; +wire [11:0] digit_bound_8; +wire [11:0] digit_bound_9; +wire [11:0] sqrt_digit_bound2_1; +wire [11:0] sqrt_digit_bound2_2; +wire [11:0] sqrt_digit_bound2_3; +wire [11:0] sqrt_digit_bound2_4; +wire [11:0] sqrt_digit_bound2_5; +wire [11:0] sqrt_digit_bound2_6; +wire [11:0] sqrt_digit_bound2_7; +wire [11:0] sqrt_digit_bound2_8; +wire [11:0] sqrt_digit_bound2_9; +wire sqrt_first_round; +wire sqrt_secd_round; +wire sqrt_secd_round_sign; + + +// &CombBeg; @27 +always @( bound_sel[6:0]) +begin +case(bound_sel[6:0]) + 7'h40: + begin + ori_digit_bound_1[11:0] = 12'h1b;//27 + ori_digit_bound_2[11:0] = 12'h5c;//92 + ori_digit_bound_3[11:0] = 12'h9d;//157 + ori_digit_bound_4[11:0] = 12'hde;//222 + ori_digit_bound_5[11:0] = 12'h120;//288 + ori_digit_bound_6[11:0] = 12'h161;//353 + ori_digit_bound_7[11:0] = 12'h1a2;//418 + ori_digit_bound_8[11:0] = 12'h1e3;//483 + ori_digit_bound_9[11:0] = 12'h225;//549 + + end + 7'h41: + begin + ori_digit_bound_1[11:0] = 12'h1b;//27 + ori_digit_bound_2[11:0] = 12'h5d;//93 + ori_digit_bound_3[11:0] = 12'ha0;//160 + ori_digit_bound_4[11:0] = 12'he2;//226 + ori_digit_bound_5[11:0] = 12'h124;//292 + ori_digit_bound_6[11:0] = 12'h166;//358 + ori_digit_bound_7[11:0] = 12'h1a9;//425 + ori_digit_bound_8[11:0] = 12'h1eb;//491 + ori_digit_bound_9[11:0] = 12'h22d;//557 + + end + 7'h42: + begin + ori_digit_bound_1[11:0] = 12'h1b;//27 + ori_digit_bound_2[11:0] = 12'h5f;//95 + ori_digit_bound_3[11:0] = 12'ha2;//162 + ori_digit_bound_4[11:0] = 12'he5;//229 + ori_digit_bound_5[11:0] = 12'h128;//296 + ori_digit_bound_6[11:0] = 12'h16c;//364 + ori_digit_bound_7[11:0] = 12'h1af;//431 + ori_digit_bound_8[11:0] = 12'h1f2;//498 + ori_digit_bound_9[11:0] = 12'h236;//566 + + end + 7'h43: + begin + ori_digit_bound_1[11:0] = 12'h1c;//28 + ori_digit_bound_2[11:0] = 12'h60;//96 + ori_digit_bound_3[11:0] = 12'ha4;//164 + ori_digit_bound_4[11:0] = 12'he9;//233 + ori_digit_bound_5[11:0] = 12'h12d;//301 + ori_digit_bound_6[11:0] = 12'h171;//369 + ori_digit_bound_7[11:0] = 12'h1b5;//437 + ori_digit_bound_8[11:0] = 12'h1fa;//506 + ori_digit_bound_9[11:0] = 12'h23e;//574 + + end + 7'h44: + begin + ori_digit_bound_1[11:0] = 12'h1c;//28 + ori_digit_bound_2[11:0] = 12'h61;//97 + ori_digit_bound_3[11:0] = 12'ha7;//167 + ori_digit_bound_4[11:0] = 12'hec;//236 + ori_digit_bound_5[11:0] = 12'h131;//305 + ori_digit_bound_6[11:0] = 12'h177;//375 + ori_digit_bound_7[11:0] = 12'h1bc;//444 + ori_digit_bound_8[11:0] = 12'h201;//513 + ori_digit_bound_9[11:0] = 12'h246;//582 + + end + 7'h45: + begin + ori_digit_bound_1[11:0] = 12'h1d;//29 + ori_digit_bound_2[11:0] = 12'h63;//99 + ori_digit_bound_3[11:0] = 12'ha9;//169 + ori_digit_bound_4[11:0] = 12'hef;//239 + ori_digit_bound_5[11:0] = 12'h136;//310 + ori_digit_bound_6[11:0] = 12'h17c;//380 + ori_digit_bound_7[11:0] = 12'h1c2;//450 + ori_digit_bound_8[11:0] = 12'h208;//520 + ori_digit_bound_9[11:0] = 12'h24f;//591 + + end + 7'h46: + begin + ori_digit_bound_1[11:0] = 12'h1d;//29 + ori_digit_bound_2[11:0] = 12'h64;//100 + ori_digit_bound_3[11:0] = 12'hac;//172 + ori_digit_bound_4[11:0] = 12'hf3;//243 + ori_digit_bound_5[11:0] = 12'h13a;//314 + ori_digit_bound_6[11:0] = 12'h181;//385 + ori_digit_bound_7[11:0] = 12'h1c9;//457 + ori_digit_bound_8[11:0] = 12'h210;//528 + ori_digit_bound_9[11:0] = 12'h257;//599 + + end + 7'h47: + begin + ori_digit_bound_1[11:0] = 12'h1d;//29 + ori_digit_bound_2[11:0] = 12'h66;//102 + ori_digit_bound_3[11:0] = 12'hae;//174 + ori_digit_bound_4[11:0] = 12'hf6;//246 + ori_digit_bound_5[11:0] = 12'h13e;//318 + ori_digit_bound_6[11:0] = 12'h187;//391 + ori_digit_bound_7[11:0] = 12'h1cf;//463 + ori_digit_bound_8[11:0] = 12'h217;//535 + ori_digit_bound_9[11:0] = 12'h260;//608 + + end + 7'h48: + begin + ori_digit_bound_1[11:0] = 12'h1e;//30 + ori_digit_bound_2[11:0] = 12'h67;//103 + ori_digit_bound_3[11:0] = 12'hb0;//176 + ori_digit_bound_4[11:0] = 12'hfa;//250 + ori_digit_bound_5[11:0] = 12'h143;//323 + ori_digit_bound_6[11:0] = 12'h18c;//396 + ori_digit_bound_7[11:0] = 12'h1d5;//469 + ori_digit_bound_8[11:0] = 12'h21f;//543 + ori_digit_bound_9[11:0] = 12'h268;//616 + + end + 7'h49: + begin + ori_digit_bound_1[11:0] = 12'h1e;//30 + ori_digit_bound_2[11:0] = 12'h68;//104 + ori_digit_bound_3[11:0] = 12'hb3;//179 + ori_digit_bound_4[11:0] = 12'hfd;//253 + ori_digit_bound_5[11:0] = 12'h147;//327 + ori_digit_bound_6[11:0] = 12'h192;//402 + ori_digit_bound_7[11:0] = 12'h1dc;//476 + ori_digit_bound_8[11:0] = 12'h226;//550 + ori_digit_bound_9[11:0] = 12'h270;//624 + + end + 7'h4a: + begin + ori_digit_bound_1[11:0] = 12'h1f;//31 + ori_digit_bound_2[11:0] = 12'h6a;//106 + ori_digit_bound_3[11:0] = 12'hb5;//181 + ori_digit_bound_4[11:0] = 12'h100;//256 + ori_digit_bound_5[11:0] = 12'h14c;//332 + ori_digit_bound_6[11:0] = 12'h197;//407 + ori_digit_bound_7[11:0] = 12'h1e2;//482 + ori_digit_bound_8[11:0] = 12'h22d;//557 + ori_digit_bound_9[11:0] = 12'h279;//633 + + end + 7'h4b: + begin + ori_digit_bound_1[11:0] = 12'h1f;//31 + ori_digit_bound_2[11:0] = 12'h6b;//107 + ori_digit_bound_3[11:0] = 12'hb8;//184 + ori_digit_bound_4[11:0] = 12'h104;//260 + ori_digit_bound_5[11:0] = 12'h150;//336 + ori_digit_bound_6[11:0] = 12'h19c;//412 + ori_digit_bound_7[11:0] = 12'h1e9;//489 + ori_digit_bound_8[11:0] = 12'h235;//565 + ori_digit_bound_9[11:0] = 12'h281;//641 + + end + 7'h4c: + begin + ori_digit_bound_1[11:0] = 12'h1f;//31 + ori_digit_bound_2[11:0] = 12'h6d;//109 + ori_digit_bound_3[11:0] = 12'hba;//186 + ori_digit_bound_4[11:0] = 12'h107;//263 + ori_digit_bound_5[11:0] = 12'h154;//340 + ori_digit_bound_6[11:0] = 12'h1a2;//418 + ori_digit_bound_7[11:0] = 12'h1ef;//495 + ori_digit_bound_8[11:0] = 12'h23c;//572 + ori_digit_bound_9[11:0] = 12'h28a;//650 + + end + 7'h4d: + begin + ori_digit_bound_1[11:0] = 12'h20;//32 + ori_digit_bound_2[11:0] = 12'h6e;//110 + ori_digit_bound_3[11:0] = 12'hbc;//188 + ori_digit_bound_4[11:0] = 12'h10b;//267 + ori_digit_bound_5[11:0] = 12'h159;//345 + ori_digit_bound_6[11:0] = 12'h1a7;//423 + ori_digit_bound_7[11:0] = 12'h1f5;//501 + ori_digit_bound_8[11:0] = 12'h244;//580 + ori_digit_bound_9[11:0] = 12'h292;//658 + + end + 7'h4e: + begin + ori_digit_bound_1[11:0] = 12'h20;//32 + ori_digit_bound_2[11:0] = 12'h6f;//111 + ori_digit_bound_3[11:0] = 12'hbf;//191 + ori_digit_bound_4[11:0] = 12'h10e;//270 + ori_digit_bound_5[11:0] = 12'h15d;//349 + ori_digit_bound_6[11:0] = 12'h1ad;//429 + ori_digit_bound_7[11:0] = 12'h1fc;//508 + ori_digit_bound_8[11:0] = 12'h24b;//587 + ori_digit_bound_9[11:0] = 12'h29a;//666 + + end + 7'h4f: + begin + ori_digit_bound_1[11:0] = 12'h21;//33 + ori_digit_bound_2[11:0] = 12'h71;//113 + ori_digit_bound_3[11:0] = 12'hc1;//193 + ori_digit_bound_4[11:0] = 12'h111;//273 + ori_digit_bound_5[11:0] = 12'h162;//354 + ori_digit_bound_6[11:0] = 12'h1b2;//434 + ori_digit_bound_7[11:0] = 12'h202;//514 + ori_digit_bound_8[11:0] = 12'h252;//594 + ori_digit_bound_9[11:0] = 12'h2a3;//675 + + end + 7'h50: + begin + ori_digit_bound_1[11:0] = 12'h21;//33 + ori_digit_bound_2[11:0] = 12'h72;//114 + ori_digit_bound_3[11:0] = 12'hc4;//196 + ori_digit_bound_4[11:0] = 12'h115;//277 + ori_digit_bound_5[11:0] = 12'h166;//358 + ori_digit_bound_6[11:0] = 12'h1b7;//439 + ori_digit_bound_7[11:0] = 12'h209;//521 + ori_digit_bound_8[11:0] = 12'h25a;//602 + ori_digit_bound_9[11:0] = 12'h2ab;//683 + + end + 7'h51: + begin + ori_digit_bound_1[11:0] = 12'h21;//33 + ori_digit_bound_2[11:0] = 12'h74;//116 + ori_digit_bound_3[11:0] = 12'hc6;//198 + ori_digit_bound_4[11:0] = 12'h118;//280 + ori_digit_bound_5[11:0] = 12'h16a;//362 + ori_digit_bound_6[11:0] = 12'h1bd;//445 + ori_digit_bound_7[11:0] = 12'h20f;//527 + ori_digit_bound_8[11:0] = 12'h261;//609 + ori_digit_bound_9[11:0] = 12'h2b4;//692 + + end + 7'h52: + begin + ori_digit_bound_1[11:0] = 12'h22;//34 + ori_digit_bound_2[11:0] = 12'h75;//117 + ori_digit_bound_3[11:0] = 12'hc8;//200 + ori_digit_bound_4[11:0] = 12'h11c;//284 + ori_digit_bound_5[11:0] = 12'h16f;//367 + ori_digit_bound_6[11:0] = 12'h1c2;//450 + ori_digit_bound_7[11:0] = 12'h215;//533 + ori_digit_bound_8[11:0] = 12'h269;//617 + ori_digit_bound_9[11:0] = 12'h2bc;//700 + + end + 7'h53: + begin + ori_digit_bound_1[11:0] = 12'h22;//34 + ori_digit_bound_2[11:0] = 12'h76;//118 + ori_digit_bound_3[11:0] = 12'hcb;//203 + ori_digit_bound_4[11:0] = 12'h11f;//287 + ori_digit_bound_5[11:0] = 12'h173;//371 + ori_digit_bound_6[11:0] = 12'h1c8;//456 + ori_digit_bound_7[11:0] = 12'h21c;//540 + ori_digit_bound_8[11:0] = 12'h270;//624 + ori_digit_bound_9[11:0] = 12'h2c4;//708 + + end + 7'h54: + begin + ori_digit_bound_1[11:0] = 12'h23;//35 + ori_digit_bound_2[11:0] = 12'h78;//120 + ori_digit_bound_3[11:0] = 12'hcd;//205 + ori_digit_bound_4[11:0] = 12'h122;//290 + ori_digit_bound_5[11:0] = 12'h178;//376 + ori_digit_bound_6[11:0] = 12'h1cd;//461 + ori_digit_bound_7[11:0] = 12'h222;//546 + ori_digit_bound_8[11:0] = 12'h277;//631 + ori_digit_bound_9[11:0] = 12'h2cd;//717 + + end + 7'h55: + begin + ori_digit_bound_1[11:0] = 12'h23;//35 + ori_digit_bound_2[11:0] = 12'h79;//121 + ori_digit_bound_3[11:0] = 12'hd0;//208 + ori_digit_bound_4[11:0] = 12'h126;//294 + ori_digit_bound_5[11:0] = 12'h17c;//380 + ori_digit_bound_6[11:0] = 12'h1d2;//466 + ori_digit_bound_7[11:0] = 12'h229;//553 + ori_digit_bound_8[11:0] = 12'h27f;//639 + ori_digit_bound_9[11:0] = 12'h2d5;//725 + + end + 7'h56: + begin + ori_digit_bound_1[11:0] = 12'h23;//35 + ori_digit_bound_2[11:0] = 12'h7b;//123 + ori_digit_bound_3[11:0] = 12'hd2;//210 + ori_digit_bound_4[11:0] = 12'h129;//297 + ori_digit_bound_5[11:0] = 12'h180;//384 + ori_digit_bound_6[11:0] = 12'h1d8;//472 + ori_digit_bound_7[11:0] = 12'h22f;//559 + ori_digit_bound_8[11:0] = 12'h286;//646 + ori_digit_bound_9[11:0] = 12'h2de;//734 + + end + 7'h57: + begin + ori_digit_bound_1[11:0] = 12'h24;//36 + ori_digit_bound_2[11:0] = 12'h7c;//124 + ori_digit_bound_3[11:0] = 12'hd4;//212 + ori_digit_bound_4[11:0] = 12'h12d;//301 + ori_digit_bound_5[11:0] = 12'h185;//389 + ori_digit_bound_6[11:0] = 12'h1dd;//477 + ori_digit_bound_7[11:0] = 12'h235;//565 + ori_digit_bound_8[11:0] = 12'h28e;//654 + ori_digit_bound_9[11:0] = 12'h2e6;//742 + + end + 7'h58: + begin + ori_digit_bound_1[11:0] = 12'h24;//36 + ori_digit_bound_2[11:0] = 12'h7d;//125 + ori_digit_bound_3[11:0] = 12'hd7;//215 + ori_digit_bound_4[11:0] = 12'h130;//304 + ori_digit_bound_5[11:0] = 12'h189;//393 + ori_digit_bound_6[11:0] = 12'h1e3;//483 + ori_digit_bound_7[11:0] = 12'h23c;//572 + ori_digit_bound_8[11:0] = 12'h295;//661 + ori_digit_bound_9[11:0] = 12'h2ee;//750 + + end + 7'h59: + begin + ori_digit_bound_1[11:0] = 12'h25;//37 + ori_digit_bound_2[11:0] = 12'h7f;//127 + ori_digit_bound_3[11:0] = 12'hd9;//217 + ori_digit_bound_4[11:0] = 12'h133;//307 + ori_digit_bound_5[11:0] = 12'h18e;//398 + ori_digit_bound_6[11:0] = 12'h1e8;//488 + ori_digit_bound_7[11:0] = 12'h242;//578 + ori_digit_bound_8[11:0] = 12'h29c;//668 + ori_digit_bound_9[11:0] = 12'h2f7;//759 + + end + 7'h5a: + begin + ori_digit_bound_1[11:0] = 12'h25;//37 + ori_digit_bound_2[11:0] = 12'h80;//128 + ori_digit_bound_3[11:0] = 12'hdc;//220 + ori_digit_bound_4[11:0] = 12'h137;//311 + ori_digit_bound_5[11:0] = 12'h192;//402 + ori_digit_bound_6[11:0] = 12'h1ed;//493 + ori_digit_bound_7[11:0] = 12'h249;//585 + ori_digit_bound_8[11:0] = 12'h2a4;//676 + ori_digit_bound_9[11:0] = 12'h2ff;//767 + + end + 7'h5b: + begin + ori_digit_bound_1[11:0] = 12'h25;//37 + ori_digit_bound_2[11:0] = 12'h82;//130 + ori_digit_bound_3[11:0] = 12'hde;//222 + ori_digit_bound_4[11:0] = 12'h13a;//314 + ori_digit_bound_5[11:0] = 12'h196;//406 + ori_digit_bound_6[11:0] = 12'h1f3;//499 + ori_digit_bound_7[11:0] = 12'h24f;//591 + ori_digit_bound_8[11:0] = 12'h2ab;//683 + ori_digit_bound_9[11:0] = 12'h308;//776 + + end + 7'h5c: + begin + ori_digit_bound_1[11:0] = 12'h26;//38 + ori_digit_bound_2[11:0] = 12'h83;//131 + ori_digit_bound_3[11:0] = 12'he0;//224 + ori_digit_bound_4[11:0] = 12'h13e;//318 + ori_digit_bound_5[11:0] = 12'h19b;//411 + ori_digit_bound_6[11:0] = 12'h1f8;//504 + ori_digit_bound_7[11:0] = 12'h255;//597 + ori_digit_bound_8[11:0] = 12'h2b3;//691 + ori_digit_bound_9[11:0] = 12'h310;//784 + + end + 7'h5d: + begin + ori_digit_bound_1[11:0] = 12'h26;//38 + ori_digit_bound_2[11:0] = 12'h84;//132 + ori_digit_bound_3[11:0] = 12'he3;//227 + ori_digit_bound_4[11:0] = 12'h141;//321 + ori_digit_bound_5[11:0] = 12'h19f;//415 + ori_digit_bound_6[11:0] = 12'h1fe;//510 + ori_digit_bound_7[11:0] = 12'h25c;//604 + ori_digit_bound_8[11:0] = 12'h2ba;//698 + ori_digit_bound_9[11:0] = 12'h318;//792 + + end + 7'h5e: + begin + ori_digit_bound_1[11:0] = 12'h27;//39 + ori_digit_bound_2[11:0] = 12'h86;//134 + ori_digit_bound_3[11:0] = 12'he5;//229 + ori_digit_bound_4[11:0] = 12'h144;//324 + ori_digit_bound_5[11:0] = 12'h1a4;//420 + ori_digit_bound_6[11:0] = 12'h203;//515 + ori_digit_bound_7[11:0] = 12'h262;//610 + ori_digit_bound_8[11:0] = 12'h2c1;//705 + ori_digit_bound_9[11:0] = 12'h321;//801 + + end + 7'h5f: + begin + ori_digit_bound_1[11:0] = 12'h27;//39 + ori_digit_bound_2[11:0] = 12'h87;//135 + ori_digit_bound_3[11:0] = 12'he8;//232 + ori_digit_bound_4[11:0] = 12'h148;//328 + ori_digit_bound_5[11:0] = 12'h1a8;//424 + ori_digit_bound_6[11:0] = 12'h208;//520 + ori_digit_bound_7[11:0] = 12'h269;//617 + ori_digit_bound_8[11:0] = 12'h2c9;//713 + ori_digit_bound_9[11:0] = 12'h329;//809 + + end + 7'h60: + begin + ori_digit_bound_1[11:0] = 12'h27;//39 + ori_digit_bound_2[11:0] = 12'h89;//137 + ori_digit_bound_3[11:0] = 12'hea;//234 + ori_digit_bound_4[11:0] = 12'h14b;//331 + ori_digit_bound_5[11:0] = 12'h1ac;//428 + ori_digit_bound_6[11:0] = 12'h20e;//526 + ori_digit_bound_7[11:0] = 12'h26f;//623 + ori_digit_bound_8[11:0] = 12'h2d0;//720 + ori_digit_bound_9[11:0] = 12'h332;//818 + + end + 7'h61: + begin + ori_digit_bound_1[11:0] = 12'h28;//40 + ori_digit_bound_2[11:0] = 12'h8a;//138 + ori_digit_bound_3[11:0] = 12'hec;//236 + ori_digit_bound_4[11:0] = 12'h14f;//335 + ori_digit_bound_5[11:0] = 12'h1b1;//433 + ori_digit_bound_6[11:0] = 12'h213;//531 + ori_digit_bound_7[11:0] = 12'h275;//629 + ori_digit_bound_8[11:0] = 12'h2d8;//728 + ori_digit_bound_9[11:0] = 12'h33a;//826 + + end + 7'h62: + begin + ori_digit_bound_1[11:0] = 12'h28;//40 + ori_digit_bound_2[11:0] = 12'h8b;//139 + ori_digit_bound_3[11:0] = 12'hef;//239 + ori_digit_bound_4[11:0] = 12'h152;//338 + ori_digit_bound_5[11:0] = 12'h1b5;//437 + ori_digit_bound_6[11:0] = 12'h219;//537 + ori_digit_bound_7[11:0] = 12'h27c;//636 + ori_digit_bound_8[11:0] = 12'h2df;//735 + ori_digit_bound_9[11:0] = 12'h342;//834 + + end + 7'h63: + begin + ori_digit_bound_1[11:0] = 12'h29;//41 + ori_digit_bound_2[11:0] = 12'h8d;//141 + ori_digit_bound_3[11:0] = 12'hf1;//241 + ori_digit_bound_4[11:0] = 12'h155;//341 + ori_digit_bound_5[11:0] = 12'h1ba;//442 + ori_digit_bound_6[11:0] = 12'h21e;//542 + ori_digit_bound_7[11:0] = 12'h282;//642 + ori_digit_bound_8[11:0] = 12'h2e6;//742 + ori_digit_bound_9[11:0] = 12'h34b;//843 + + end + 7'h64: + begin + ori_digit_bound_1[11:0] = 12'h29;//41 + ori_digit_bound_2[11:0] = 12'h8e;//142 + ori_digit_bound_3[11:0] = 12'hf4;//244 + ori_digit_bound_4[11:0] = 12'h159;//345 + ori_digit_bound_5[11:0] = 12'h1be;//446 + ori_digit_bound_6[11:0] = 12'h223;//547 + ori_digit_bound_7[11:0] = 12'h289;//649 + ori_digit_bound_8[11:0] = 12'h2ee;//750 + ori_digit_bound_9[11:0] = 12'h353;//851 + + end + 7'h65: + begin + ori_digit_bound_1[11:0] = 12'h29;//41 + ori_digit_bound_2[11:0] = 12'h90;//144 + ori_digit_bound_3[11:0] = 12'hf6;//246 + ori_digit_bound_4[11:0] = 12'h15c;//348 + ori_digit_bound_5[11:0] = 12'h1c2;//450 + ori_digit_bound_6[11:0] = 12'h229;//553 + ori_digit_bound_7[11:0] = 12'h28f;//655 + ori_digit_bound_8[11:0] = 12'h2f5;//757 + ori_digit_bound_9[11:0] = 12'h35c;//860 + + end + 7'h66: + begin + ori_digit_bound_1[11:0] = 12'h2a;//42 + ori_digit_bound_2[11:0] = 12'h91;//145 + ori_digit_bound_3[11:0] = 12'hf8;//248 + ori_digit_bound_4[11:0] = 12'h160;//352 + ori_digit_bound_5[11:0] = 12'h1c7;//455 + ori_digit_bound_6[11:0] = 12'h22e;//558 + ori_digit_bound_7[11:0] = 12'h295;//661 + ori_digit_bound_8[11:0] = 12'h2fd;//765 + ori_digit_bound_9[11:0] = 12'h364;//868 + + end + 7'h67: + begin + ori_digit_bound_1[11:0] = 12'h2a;//42 + ori_digit_bound_2[11:0] = 12'h92;//146 + ori_digit_bound_3[11:0] = 12'hfb;//251 + ori_digit_bound_4[11:0] = 12'h163;//355 + ori_digit_bound_5[11:0] = 12'h1cb;//459 + ori_digit_bound_6[11:0] = 12'h234;//564 + ori_digit_bound_7[11:0] = 12'h29c;//668 + ori_digit_bound_8[11:0] = 12'h304;//772 + ori_digit_bound_9[11:0] = 12'h36c;//876 + + end + 7'h68: + begin + ori_digit_bound_1[11:0] = 12'h2b;//43 + ori_digit_bound_2[11:0] = 12'h94;//148 + ori_digit_bound_3[11:0] = 12'hfd;//253 + ori_digit_bound_4[11:0] = 12'h166;//358 + ori_digit_bound_5[11:0] = 12'h1d0;//464 + ori_digit_bound_6[11:0] = 12'h239;//569 + ori_digit_bound_7[11:0] = 12'h2a2;//674 + ori_digit_bound_8[11:0] = 12'h30b;//779 + ori_digit_bound_9[11:0] = 12'h375;//885 + + end + 7'h69: + begin + ori_digit_bound_1[11:0] = 12'h2b;//43 + ori_digit_bound_2[11:0] = 12'h95;//149 + ori_digit_bound_3[11:0] = 12'h100;//256 + ori_digit_bound_4[11:0] = 12'h16a;//362 + ori_digit_bound_5[11:0] = 12'h1d4;//468 + ori_digit_bound_6[11:0] = 12'h23e;//574 + ori_digit_bound_7[11:0] = 12'h2a9;//681 + ori_digit_bound_8[11:0] = 12'h313;//787 + ori_digit_bound_9[11:0] = 12'h37d;//893 + + end + 7'h6a: + begin + ori_digit_bound_1[11:0] = 12'h2b;//43 + ori_digit_bound_2[11:0] = 12'h97;//151 + ori_digit_bound_3[11:0] = 12'h102;//258 + ori_digit_bound_4[11:0] = 12'h16d;//365 + ori_digit_bound_5[11:0] = 12'h1d8;//472 + ori_digit_bound_6[11:0] = 12'h244;//580 + ori_digit_bound_7[11:0] = 12'h2af;//687 + ori_digit_bound_8[11:0] = 12'h31a;//794 + ori_digit_bound_9[11:0] = 12'h386;//902 + + end + 7'h6b: + begin + ori_digit_bound_1[11:0] = 12'h2c;//44 + ori_digit_bound_2[11:0] = 12'h98;//152 + ori_digit_bound_3[11:0] = 12'h104;//260 + ori_digit_bound_4[11:0] = 12'h171;//369 + ori_digit_bound_5[11:0] = 12'h1dd;//477 + ori_digit_bound_6[11:0] = 12'h249;//585 + ori_digit_bound_7[11:0] = 12'h2b5;//693 + ori_digit_bound_8[11:0] = 12'h322;//802 + ori_digit_bound_9[11:0] = 12'h38e;//910 + + end + 7'h6c: + begin + ori_digit_bound_1[11:0] = 12'h2c;//44 + ori_digit_bound_2[11:0] = 12'h99;//153 + ori_digit_bound_3[11:0] = 12'h107;//263 + ori_digit_bound_4[11:0] = 12'h174;//372 + ori_digit_bound_5[11:0] = 12'h1e1;//481 + ori_digit_bound_6[11:0] = 12'h24f;//591 + ori_digit_bound_7[11:0] = 12'h2bc;//700 + ori_digit_bound_8[11:0] = 12'h329;//809 + ori_digit_bound_9[11:0] = 12'h396;//918 + + end + 7'h6d: + begin + ori_digit_bound_1[11:0] = 12'h2d;//45 + ori_digit_bound_2[11:0] = 12'h9b;//155 + ori_digit_bound_3[11:0] = 12'h109;//265 + ori_digit_bound_4[11:0] = 12'h177;//375 + ori_digit_bound_5[11:0] = 12'h1e6;//486 + ori_digit_bound_6[11:0] = 12'h254;//596 + ori_digit_bound_7[11:0] = 12'h2c2;//706 + ori_digit_bound_8[11:0] = 12'h330;//816 + ori_digit_bound_9[11:0] = 12'h39f;//927 + + end + 7'h6e: + begin + ori_digit_bound_1[11:0] = 12'h2d;//45 + ori_digit_bound_2[11:0] = 12'h9c;//156 + ori_digit_bound_3[11:0] = 12'h10c;//268 + ori_digit_bound_4[11:0] = 12'h17b;//379 + ori_digit_bound_5[11:0] = 12'h1ea;//490 + ori_digit_bound_6[11:0] = 12'h259;//601 + ori_digit_bound_7[11:0] = 12'h2c9;//713 + ori_digit_bound_8[11:0] = 12'h338;//824 + ori_digit_bound_9[11:0] = 12'h3a7;//935 + + end + 7'h6f: + begin + ori_digit_bound_1[11:0] = 12'h2d;//45 + ori_digit_bound_2[11:0] = 12'h9e;//158 + ori_digit_bound_3[11:0] = 12'h10e;//270 + ori_digit_bound_4[11:0] = 12'h17e;//382 + ori_digit_bound_5[11:0] = 12'h1ee;//494 + ori_digit_bound_6[11:0] = 12'h25f;//607 + ori_digit_bound_7[11:0] = 12'h2cf;//719 + ori_digit_bound_8[11:0] = 12'h33f;//831 + ori_digit_bound_9[11:0] = 12'h3b0;//944 + + end + 7'h70: + begin + ori_digit_bound_1[11:0] = 12'h2e;//46 + ori_digit_bound_2[11:0] = 12'h9f;//159 + ori_digit_bound_3[11:0] = 12'h110;//272 + ori_digit_bound_4[11:0] = 12'h182;//386 + ori_digit_bound_5[11:0] = 12'h1f3;//499 + ori_digit_bound_6[11:0] = 12'h264;//612 + ori_digit_bound_7[11:0] = 12'h2d5;//725 + ori_digit_bound_8[11:0] = 12'h347;//839 + ori_digit_bound_9[11:0] = 12'h3b8;//952 + + end + 7'h71: + begin + ori_digit_bound_1[11:0] = 12'h2e;//46 + ori_digit_bound_2[11:0] = 12'ha0;//160 + ori_digit_bound_3[11:0] = 12'h113;//275 + ori_digit_bound_4[11:0] = 12'h185;//389 + ori_digit_bound_5[11:0] = 12'h1f7;//503 + ori_digit_bound_6[11:0] = 12'h26a;//618 + ori_digit_bound_7[11:0] = 12'h2dc;//732 + ori_digit_bound_8[11:0] = 12'h34e;//846 + ori_digit_bound_9[11:0] = 12'h3c0;//960 + + end + 7'h72: + begin + ori_digit_bound_1[11:0] = 12'h2f;//47 + ori_digit_bound_2[11:0] = 12'ha2;//162 + ori_digit_bound_3[11:0] = 12'h115;//277 + ori_digit_bound_4[11:0] = 12'h188;//392 + ori_digit_bound_5[11:0] = 12'h1fc;//508 + ori_digit_bound_6[11:0] = 12'h26f;//623 + ori_digit_bound_7[11:0] = 12'h2e2;//738 + ori_digit_bound_8[11:0] = 12'h355;//853 + ori_digit_bound_9[11:0] = 12'h3c9;//969 + + end + 7'h73: + begin + ori_digit_bound_1[11:0] = 12'h2f;//47 + ori_digit_bound_2[11:0] = 12'ha3;//163 + ori_digit_bound_3[11:0] = 12'h118;//280 + ori_digit_bound_4[11:0] = 12'h18c;//396 + ori_digit_bound_5[11:0] = 12'h200;//512 + ori_digit_bound_6[11:0] = 12'h274;//628 + ori_digit_bound_7[11:0] = 12'h2e9;//745 + ori_digit_bound_8[11:0] = 12'h35d;//861 + ori_digit_bound_9[11:0] = 12'h3d1;//977 + + end + 7'h74: + begin + ori_digit_bound_1[11:0] = 12'h2f;//47 + ori_digit_bound_2[11:0] = 12'ha5;//165 + ori_digit_bound_3[11:0] = 12'h11a;//282 + ori_digit_bound_4[11:0] = 12'h18f;//399 + ori_digit_bound_5[11:0] = 12'h204;//516 + ori_digit_bound_6[11:0] = 12'h27a;//634 + ori_digit_bound_7[11:0] = 12'h2ef;//751 + ori_digit_bound_8[11:0] = 12'h364;//868 + ori_digit_bound_9[11:0] = 12'h3da;//986 + + end + 7'h75: + begin + ori_digit_bound_1[11:0] = 12'h30;//48 + ori_digit_bound_2[11:0] = 12'ha6;//166 + ori_digit_bound_3[11:0] = 12'h11c;//284 + ori_digit_bound_4[11:0] = 12'h193;//403 + ori_digit_bound_5[11:0] = 12'h209;//521 + ori_digit_bound_6[11:0] = 12'h27f;//639 + ori_digit_bound_7[11:0] = 12'h2f5;//757 + ori_digit_bound_8[11:0] = 12'h36c;//876 + ori_digit_bound_9[11:0] = 12'h3e2;//994 + + end + 7'h76: + begin + ori_digit_bound_1[11:0] = 12'h30;//48 + ori_digit_bound_2[11:0] = 12'ha7;//167 + ori_digit_bound_3[11:0] = 12'h11f;//287 + ori_digit_bound_4[11:0] = 12'h196;//406 + ori_digit_bound_5[11:0] = 12'h20d;//525 + ori_digit_bound_6[11:0] = 12'h285;//645 + ori_digit_bound_7[11:0] = 12'h2fc;//764 + ori_digit_bound_8[11:0] = 12'h373;//883 + ori_digit_bound_9[11:0] = 12'h3ea;//1002 + + end + 7'h77: + begin + ori_digit_bound_1[11:0] = 12'h31;//49 + ori_digit_bound_2[11:0] = 12'ha9;//169 + ori_digit_bound_3[11:0] = 12'h121;//289 + ori_digit_bound_4[11:0] = 12'h199;//409 + ori_digit_bound_5[11:0] = 12'h212;//530 + ori_digit_bound_6[11:0] = 12'h28a;//650 + ori_digit_bound_7[11:0] = 12'h302;//770 + ori_digit_bound_8[11:0] = 12'h37a;//890 + ori_digit_bound_9[11:0] = 12'h3f3;//1011 + + end + 7'h78: + begin + ori_digit_bound_1[11:0] = 12'h31;//49 + ori_digit_bound_2[11:0] = 12'haa;//170 + ori_digit_bound_3[11:0] = 12'h124;//292 + ori_digit_bound_4[11:0] = 12'h19d;//413 + ori_digit_bound_5[11:0] = 12'h216;//534 + ori_digit_bound_6[11:0] = 12'h28f;//655 + ori_digit_bound_7[11:0] = 12'h309;//777 + ori_digit_bound_8[11:0] = 12'h382;//898 + ori_digit_bound_9[11:0] = 12'h3fb;//1019 + + end + 7'h79: + begin + ori_digit_bound_1[11:0] = 12'h31;//49 + ori_digit_bound_2[11:0] = 12'hac;//172 + ori_digit_bound_3[11:0] = 12'h126;//294 + ori_digit_bound_4[11:0] = 12'h1a0;//416 + ori_digit_bound_5[11:0] = 12'h21a;//538 + ori_digit_bound_6[11:0] = 12'h295;//661 + ori_digit_bound_7[11:0] = 12'h30f;//783 + ori_digit_bound_8[11:0] = 12'h389;//905 + ori_digit_bound_9[11:0] = 12'h404;//1028 + + end + 7'h7a: + begin + ori_digit_bound_1[11:0] = 12'h32;//50 + ori_digit_bound_2[11:0] = 12'had;//173 + ori_digit_bound_3[11:0] = 12'h128;//296 + ori_digit_bound_4[11:0] = 12'h1a4;//420 + ori_digit_bound_5[11:0] = 12'h21f;//543 + ori_digit_bound_6[11:0] = 12'h29a;//666 + ori_digit_bound_7[11:0] = 12'h315;//789 + ori_digit_bound_8[11:0] = 12'h391;//913 + ori_digit_bound_9[11:0] = 12'h40c;//1036 + + end + 7'h7b: + begin + ori_digit_bound_1[11:0] = 12'h32;//50 + ori_digit_bound_2[11:0] = 12'hae;//174 + ori_digit_bound_3[11:0] = 12'h12b;//299 + ori_digit_bound_4[11:0] = 12'h1a7;//423 + ori_digit_bound_5[11:0] = 12'h223;//547 + ori_digit_bound_6[11:0] = 12'h2a0;//672 + ori_digit_bound_7[11:0] = 12'h31c;//796 + ori_digit_bound_8[11:0] = 12'h398;//920 + ori_digit_bound_9[11:0] = 12'h414;//1044 + + end + 7'h7c: + begin + ori_digit_bound_1[11:0] = 12'h33;//51 + ori_digit_bound_2[11:0] = 12'hb0;//176 + ori_digit_bound_3[11:0] = 12'h12d;//301 + ori_digit_bound_4[11:0] = 12'h1aa;//426 + ori_digit_bound_5[11:0] = 12'h228;//552 + ori_digit_bound_6[11:0] = 12'h2a5;//677 + ori_digit_bound_7[11:0] = 12'h322;//802 + ori_digit_bound_8[11:0] = 12'h39f;//927 + ori_digit_bound_9[11:0] = 12'h41d;//1053 + + end + 7'h7d: + begin + ori_digit_bound_1[11:0] = 12'h33;//51 + ori_digit_bound_2[11:0] = 12'hb1;//177 + ori_digit_bound_3[11:0] = 12'h130;//304 + ori_digit_bound_4[11:0] = 12'h1ae;//430 + ori_digit_bound_5[11:0] = 12'h22c;//556 + ori_digit_bound_6[11:0] = 12'h2aa;//682 + ori_digit_bound_7[11:0] = 12'h329;//809 + ori_digit_bound_8[11:0] = 12'h3a7;//935 + ori_digit_bound_9[11:0] = 12'h425;//1061 + + end + 7'h7e: + begin + ori_digit_bound_1[11:0] = 12'h33;//51 + ori_digit_bound_2[11:0] = 12'hb3;//179 + ori_digit_bound_3[11:0] = 12'h132;//306 + ori_digit_bound_4[11:0] = 12'h1b1;//433 + ori_digit_bound_5[11:0] = 12'h230;//560 + ori_digit_bound_6[11:0] = 12'h2b0;//688 + ori_digit_bound_7[11:0] = 12'h32f;//815 + ori_digit_bound_8[11:0] = 12'h3ae;//942 + ori_digit_bound_9[11:0] = 12'h42e;//1070 + + end + 7'h7f: + begin + ori_digit_bound_1[11:0] = 12'h34;//52 + ori_digit_bound_2[11:0] = 12'hb4;//180 + ori_digit_bound_3[11:0] = 12'h134;//308 + ori_digit_bound_4[11:0] = 12'h1b5;//437 + ori_digit_bound_5[11:0] = 12'h235;//565 + ori_digit_bound_6[11:0] = 12'h2b5;//693 + ori_digit_bound_7[11:0] = 12'h335;//821 + ori_digit_bound_8[11:0] = 12'h3b6;//950 + ori_digit_bound_9[11:0] = 12'h436;//1078 + + end + 7'h00: + begin + ori_digit_bound_1[11:0] = 12'h34;//52 + ori_digit_bound_2[11:0] = 12'hb5;//181 + ori_digit_bound_3[11:0] = 12'h137;//311 + ori_digit_bound_4[11:0] = 12'h1b8;//440 + ori_digit_bound_5[11:0] = 12'h239;//569 + ori_digit_bound_6[11:0] = 12'h2bb;//699 + ori_digit_bound_7[11:0] = 12'h33c;//828 + ori_digit_bound_8[11:0] = 12'h3bd;//957 + ori_digit_bound_9[11:0] = 12'h43e;//1086 + end + default: + begin + ori_digit_bound_1[11:0] = {12{1'bx}}; + ori_digit_bound_2[11:0] = {12{1'bx}}; + ori_digit_bound_3[11:0] = {12{1'bx}}; + ori_digit_bound_4[11:0] = {12{1'bx}}; + ori_digit_bound_5[11:0] = {12{1'bx}}; + ori_digit_bound_6[11:0] = {12{1'bx}}; + ori_digit_bound_7[11:0] = {12{1'bx}}; + ori_digit_bound_8[11:0] = {12{1'bx}}; + ori_digit_bound_9[11:0] = {12{1'bx}}; + end +endcase +// &CombEnd; @886 +end +// &CombBeg; @887 +always @( bound_sel[6:0]) +begin +case(bound_sel[6:0]) + 7'h40: + begin + sqrt_digit_boundp2_1[11:0] = 12'h21;//33 + sqrt_digit_boundp2_2[11:0] = 12'h62;//98 + sqrt_digit_boundp2_3[11:0] = 12'ha4;//164 + sqrt_digit_boundp2_4[11:0] = 12'he7;//231 + sqrt_digit_boundp2_5[11:0] = 12'h12b;//299 + sqrt_digit_boundp2_6[11:0] = 12'h170;//368 + sqrt_digit_boundp2_7[11:0] = 12'h1b6;//438 + sqrt_digit_boundp2_8[11:0] = 12'h1fd;//509 + sqrt_digit_boundp2_9[11:0] = 12'h245;//581 + end + 7'h50: + begin + sqrt_digit_boundp2_1[11:0] = 12'h29;//41 + sqrt_digit_boundp2_2[11:0] = 12'h7a;//122 + sqrt_digit_boundp2_3[11:0] = 12'hcc;//204 + sqrt_digit_boundp2_4[11:0] = 12'h11f;//287 + sqrt_digit_boundp2_5[11:0] = 12'h173;//371 + sqrt_digit_boundp2_6[11:0] = 12'h1c8;//456 + sqrt_digit_boundp2_7[11:0] = 12'h21e;//542 + sqrt_digit_boundp2_8[11:0] = 12'h275;//629 + sqrt_digit_boundp2_9[11:0] = 12'h2cd;//717 + end + 7'h60: + begin + sqrt_digit_boundp2_1[11:0] = 12'h31;//49 + sqrt_digit_boundp2_2[11:0] = 12'h92;//146 + sqrt_digit_boundp2_3[11:0] = 12'hf4;//244 + sqrt_digit_boundp2_4[11:0] = 12'h157;//343 + sqrt_digit_boundp2_5[11:0] = 12'h1bb;//443 + sqrt_digit_boundp2_6[11:0] = 12'h220;//544 + sqrt_digit_boundp2_7[11:0] = 12'h286;//646 + sqrt_digit_boundp2_8[11:0] = 12'h2ed;//749 + sqrt_digit_boundp2_9[11:0] = 12'h355;//853 + end + 7'h70: + begin + sqrt_digit_boundp2_1[11:0] = 12'h39;//57 + sqrt_digit_boundp2_2[11:0] = 12'haa;//170 + sqrt_digit_boundp2_3[11:0] = 12'h11c;//284 + sqrt_digit_boundp2_4[11:0] = 12'h18f;//399 + sqrt_digit_boundp2_5[11:0] = 12'h203;//515 + sqrt_digit_boundp2_6[11:0] = 12'h278;//632 + sqrt_digit_boundp2_7[11:0] = 12'h2ee;//750 + sqrt_digit_boundp2_8[11:0] = 12'h365;//869 + sqrt_digit_boundp2_9[11:0] = 12'h3dd;//989 + end + 7'h00: + begin + sqrt_digit_boundp2_1[11:0] = 12'h41;//65 + sqrt_digit_boundp2_2[11:0] = 12'hc2;//194 + sqrt_digit_boundp2_3[11:0] = 12'h144;//324 + sqrt_digit_boundp2_4[11:0] = 12'h1c7;//455 + sqrt_digit_boundp2_5[11:0] = 12'h24b;//587 + sqrt_digit_boundp2_6[11:0] = 12'h2d0;//720 + sqrt_digit_boundp2_7[11:0] = 12'h356;//854 + sqrt_digit_boundp2_8[11:0] = 12'h3dd;//989 + sqrt_digit_boundp2_9[11:0] = 12'h465;//1125 + end + default: + begin + sqrt_digit_boundp2_1[11:0] = {12{1'bx}};//-66 + sqrt_digit_boundp2_2[11:0] = {12{1'bx}};//-190 + sqrt_digit_boundp2_3[11:0] = {12{1'bx}};//-324 + sqrt_digit_boundp2_4[11:0] = {12{1'bx}};//-450 + sqrt_digit_boundp2_5[11:0] = {12{1'bx}};//-588 + sqrt_digit_boundp2_6[11:0] = {12{1'bx}};//-720 + sqrt_digit_boundp2_7[11:0] = {12{1'bx}};//-852 + sqrt_digit_boundp2_8[11:0] = {12{1'bx}};//-988 + sqrt_digit_boundp2_9[11:0] = {12{1'bx}};//-1120 + end + + +endcase +// &CombEnd; @964 +end +// &CombBeg; @965 +always @( bound_sel[6:0]) +begin +case(bound_sel[6:0]) + 7'h40: + begin + sqrt_digit_boundm2_1[11:0] = 12'h20;//32 + sqrt_digit_boundm2_2[11:0] = 12'h5f;//95 + sqrt_digit_boundm2_3[11:0] = 12'h9d;//157 + sqrt_digit_boundm2_4[11:0] = 12'hda;//218 + sqrt_digit_boundm2_5[11:0] = 12'h116;//278 + sqrt_digit_boundm2_6[11:0] = 12'h151;//337 + sqrt_digit_boundm2_7[11:0] = 12'h18b;//395 + sqrt_digit_boundm2_8[11:0] = 12'h1c4;//452 + sqrt_digit_boundm2_9[11:0] = 12'h1fc;//508 + end + 7'h50: + begin + sqrt_digit_boundm2_1[11:0] = 12'h28;//40 + sqrt_digit_boundm2_2[11:0] = 12'h77;//119 + sqrt_digit_boundm2_3[11:0] = 12'hc5;//197 + sqrt_digit_boundm2_4[11:0] = 12'h112;//274 + sqrt_digit_boundm2_5[11:0] = 12'h15e;//350 + sqrt_digit_boundm2_6[11:0] = 12'h1a9;//425 + sqrt_digit_boundm2_7[11:0] = 12'h1f3;//499 + sqrt_digit_boundm2_8[11:0] = 12'h23c;//572 + sqrt_digit_boundm2_9[11:0] = 12'h284;//644 + end + 7'h60: + begin + sqrt_digit_boundm2_1[11:0] = 12'h30;//48 + sqrt_digit_boundm2_2[11:0] = 12'h8f;//143 + sqrt_digit_boundm2_3[11:0] = 12'hed;//237 + sqrt_digit_boundm2_4[11:0] = 12'h14a;//330 + sqrt_digit_boundm2_5[11:0] = 12'h1a6;//422 + sqrt_digit_boundm2_6[11:0] = 12'h201;//513 + sqrt_digit_boundm2_7[11:0] = 12'h25b;//603 + sqrt_digit_boundm2_8[11:0] = 12'h2b4;//692 + sqrt_digit_boundm2_9[11:0] = 12'h30c;//780 + end + 7'h70: + begin + sqrt_digit_boundm2_1[11:0] = 12'h38;//56 + sqrt_digit_boundm2_2[11:0] = 12'ha7;//167 + sqrt_digit_boundm2_3[11:0] = 12'h115;//277 + sqrt_digit_boundm2_4[11:0] = 12'h182;//386 + sqrt_digit_boundm2_5[11:0] = 12'h1ee;//494 + sqrt_digit_boundm2_6[11:0] = 12'h259;//601 + sqrt_digit_boundm2_7[11:0] = 12'h2c3;//707 + sqrt_digit_boundm2_8[11:0] = 12'h32c;//812 + sqrt_digit_boundm2_9[11:0] = 12'h394;//916 + end + 7'h00: + begin + sqrt_digit_boundm2_1[11:0] = 12'h40;//64 + sqrt_digit_boundm2_2[11:0] = 12'hbf;//191 + sqrt_digit_boundm2_3[11:0] = 12'h13d;//317 + sqrt_digit_boundm2_4[11:0] = 12'h1ba;//442 + sqrt_digit_boundm2_5[11:0] = 12'h236;//566 + sqrt_digit_boundm2_6[11:0] = 12'h2b1;//689 + sqrt_digit_boundm2_7[11:0] = 12'h32b;//811 + sqrt_digit_boundm2_8[11:0] = 12'h3a4;//932 + sqrt_digit_boundm2_9[11:0] = 12'h41c;//1052 + end + + default: + begin + sqrt_digit_boundm2_1[11:0] = {12{1'bx}};//-66 + sqrt_digit_boundm2_2[11:0] = {12{1'bx}};//-190 + sqrt_digit_boundm2_3[11:0] = {12{1'bx}};//-324 + sqrt_digit_boundm2_4[11:0] = {12{1'bx}};//-450 + sqrt_digit_boundm2_5[11:0] = {12{1'bx}};//-588 + sqrt_digit_boundm2_6[11:0] = {12{1'bx}};//-720 + sqrt_digit_boundm2_7[11:0] = {12{1'bx}};//-852 + sqrt_digit_boundm2_8[11:0] = {12{1'bx}};//-988 + sqrt_digit_boundm2_9[11:0] = {12{1'bx}};//-1120 + end +endcase +// &CombEnd; @1041 +end +assign sqrt_digit_bound2_1[11:0] = sqrt_secd_round_sign ? sqrt_digit_boundm2_1[11:0] : sqrt_digit_boundp2_1[11:0]; +assign sqrt_digit_bound2_2[11:0] = sqrt_secd_round_sign ? sqrt_digit_boundm2_2[11:0] : sqrt_digit_boundp2_2[11:0]; +assign sqrt_digit_bound2_3[11:0] = sqrt_secd_round_sign ? sqrt_digit_boundm2_3[11:0] : sqrt_digit_boundp2_3[11:0]; +assign sqrt_digit_bound2_4[11:0] = sqrt_secd_round_sign ? sqrt_digit_boundm2_4[11:0] : sqrt_digit_boundp2_4[11:0]; +assign sqrt_digit_bound2_5[11:0] = sqrt_secd_round_sign ? sqrt_digit_boundm2_5[11:0] : sqrt_digit_boundp2_5[11:0]; +assign sqrt_digit_bound2_6[11:0] = sqrt_secd_round_sign ? sqrt_digit_boundm2_6[11:0] : sqrt_digit_boundp2_6[11:0]; +assign sqrt_digit_bound2_7[11:0] = sqrt_secd_round_sign ? sqrt_digit_boundm2_7[11:0] : sqrt_digit_boundp2_7[11:0]; +assign sqrt_digit_bound2_8[11:0] = sqrt_secd_round_sign ? sqrt_digit_boundm2_8[11:0] : sqrt_digit_boundp2_8[11:0]; +assign sqrt_digit_bound2_9[11:0] = sqrt_secd_round_sign ? sqrt_digit_boundm2_9[11:0] : sqrt_digit_boundp2_9[11:0]; + + +assign digit_bound_1[11:0] = sqrt_first_round ? 12'h2 : + sqrt_secd_round ? sqrt_digit_bound2_1[11:0] + : ori_digit_bound_1[11:0]; //-2 +assign digit_bound_2[11:0] = sqrt_first_round ? 12'h10 : + sqrt_secd_round ? sqrt_digit_bound2_2[11:0] + : ori_digit_bound_2[11:0]; //-16 +assign digit_bound_3[11:0] = sqrt_first_round ? 12'h35 : + sqrt_secd_round ? sqrt_digit_bound2_3[11:0] + : ori_digit_bound_3[11:0]; //-53 +assign digit_bound_4[11:0] = sqrt_first_round ? 12'h5f : + sqrt_secd_round ? sqrt_digit_bound2_4[11:0] + : ori_digit_bound_4[11:0]; //-95 +assign digit_bound_5[11:0] = sqrt_first_round ? 12'ha0 : + sqrt_secd_round ? sqrt_digit_bound2_5[11:0] + : ori_digit_bound_5[11:0]; //-160 +assign digit_bound_6[11:0] = sqrt_first_round ? 12'hf0 : + sqrt_secd_round ? sqrt_digit_bound2_6[11:0] + : ori_digit_bound_6[11:0]; //-240 +assign digit_bound_7[11:0] = sqrt_first_round ? 12'h14f : + sqrt_secd_round ? sqrt_digit_bound2_7[11:0] + : ori_digit_bound_7[11:0]; //-335 +assign digit_bound_8[11:0] = sqrt_first_round ? 12'h1c2 : + sqrt_secd_round ? sqrt_digit_bound2_8[11:0] + : ori_digit_bound_8[11:0]; //-450 +assign digit_bound_9[11:0] = sqrt_first_round ? 12'h23a : + sqrt_secd_round ? sqrt_digit_bound2_9[11:0] + : ori_digit_bound_9[11:0]; //-570 +// &ModuleEnd; @1080 +endmodule + + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v new file mode 100644 index 00000000..77a95ae9 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v @@ -0,0 +1,1152 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &ModuleBeg; @22 +module ct_vfdsu_srt_radix16_with_sqrt( + cp0_vfpu_icg_en, + cp0_yy_clk_en, + cpurst_b, + forever_cpuclk, + initial_bound_sel_in, + initial_divisor_in, + initial_remainder_in, + initial_srt_en, + initial_srt_sel_div_in, + initial_srt_sel_sqrt_in, + pad_yy_icg_scan_en, + srt_first_round, + srt_remainder, + srt_remainder_out, + srt_remainder_sign, + srt_secd_round, + srt_sm_on, + total_qt_rt, + vdiv_qt_rt +); + +// &Ports; @23 +input cp0_vfpu_icg_en; +input cp0_yy_clk_en; +input cpurst_b; +input forever_cpuclk; +input [6 :0] initial_bound_sel_in; +input [55:0] initial_divisor_in; +input [60:0] initial_remainder_in; +input initial_srt_en; +input initial_srt_sel_div_in; +input initial_srt_sel_sqrt_in; +input pad_yy_icg_scan_en; +input srt_first_round; +input srt_secd_round; +input srt_sm_on; +output [60:0] srt_remainder; +output [59:0] srt_remainder_out; +output srt_remainder_sign; +output [57:0] total_qt_rt; +output [57:0] vdiv_qt_rt; + +// &Regs; @24 +reg [6 :0] bound_sel; +reg [60:0] cur_rem; +reg [57:0] qt_rt_const_shift_std; +reg [55:0] srt_divisor; +reg [60:0] srt_remainder; +reg [60:0] srt_remainder_minus; +reg [60:0] srt_remainder_minus_nxt; +reg srt_sel_div; +reg srt_sel_sqrt; +reg [57:0] total_qt_rt; +reg [57:0] total_qt_rt_minus; +reg [57:0] total_qt_rt_minus_next; +reg [57:0] total_qt_rt_next; + +// &Wires; @25 +wire bound1_cmp_sign; +wire bound2_cmp_sign; +wire bound3_cmp_sign; +wire bound4_cmp_sign; +wire bound5_cmp_sign; +wire bound6_cmp_sign; +wire bound7_cmp_sign; +wire bound8_cmp_sign; +wire bound9_cmp_sign; +wire [8 :0] bound_cmp_sign; +wire cp0_vfpu_icg_en; +wire cp0_yy_clk_en; +wire cpurst_b; +wire [60:0] cur_rem_1; +wire [60:0] cur_rem_2; +wire [60:0] cur_rem_3; +wire [60:0] cur_rem_4; +wire [60:0] cur_rem_5; +wire [60:0] cur_rem_6; +wire [60:0] cur_rem_7; +wire [60:0] cur_rem_8; +wire [60:0] cur_rem_9; +wire [11:0] digit_bound_1; +wire [11:0] digit_bound_2; +wire [11:0] digit_bound_3; +wire [11:0] digit_bound_4; +wire [11:0] digit_bound_5; +wire [11:0] digit_bound_6; +wire [11:0] digit_bound_7; +wire [11:0] digit_bound_8; +wire [11:0] digit_bound_9; +wire [60:0] div_qt_1_rem_add_op1; +wire [60:0] div_qt_2_rem_add_op1; +wire [60:0] div_qt_3_rem_add_op1_0; +wire [60:0] div_qt_3_rem_add_op1_1; +wire [60:0] div_qt_4_rem_add_op1; +wire [60:0] div_qt_5_rem_add_op1_0; +wire [60:0] div_qt_5_rem_add_op1_1; +wire [60:0] div_qt_6_rem_add_op1_0; +wire [60:0] div_qt_6_rem_add_op1_1; +wire [60:0] div_qt_7_rem_add_op1_0; +wire [60:0] div_qt_7_rem_add_op1_1; +wire [60:0] div_qt_8_rem_add_op1; +wire [60:0] div_qt_9_rem_add_op1_0; +wire [60:0] div_qt_9_rem_add_op1_1; +wire [60:0] div_qt_r1_rem_add_op1; +wire [60:0] div_qt_r2_rem_add_op1; +wire [60:0] div_qt_r3_rem_add_op1_0; +wire [60:0] div_qt_r3_rem_add_op1_1; +wire [60:0] div_qt_r4_rem_add_op1; +wire [60:0] div_qt_r5_rem_add_op1_0; +wire [60:0] div_qt_r5_rem_add_op1_1; +wire [60:0] div_qt_r6_rem_add_op1_0; +wire [60:0] div_qt_r6_rem_add_op1_1; +wire [60:0] div_qt_r7_rem_add_op1_0; +wire [60:0] div_qt_r7_rem_add_op1_1; +wire [60:0] div_qt_r8_rem_add_op1; +wire [60:0] div_qt_r9_rem_add_op1_0; +wire [60:0] div_qt_r9_rem_add_op1_1; +wire [60:0] div_rem_add1_op1; +wire [60:0] div_rem_add2_op1; +wire [60:0] div_rem_add3_op1_0; +wire [60:0] div_rem_add3_op1_1; +wire [60:0] div_rem_add4_op1; +wire [60:0] div_rem_add5_op1_0; +wire [60:0] div_rem_add5_op1_1; +wire [60:0] div_rem_add6_op1_0; +wire [60:0] div_rem_add6_op1_1; +wire [60:0] div_rem_add7_op1_0; +wire [60:0] div_rem_add7_op1_1; +wire [60:0] div_rem_add8_op1; +wire [60:0] div_rem_add9_op1_0; +wire [60:0] div_rem_add9_op1_1; +wire forever_cpuclk; +wire [6 :0] initial_bound_sel_in; +wire [55:0] initial_divisor_in; +wire [60:0] initial_remainder_in; +wire initial_srt_en; +wire initial_srt_sel_div_in; +wire initial_srt_sel_sqrt_in; +wire pad_yy_icg_scan_en; +wire [11:0] part_rem; +wire [62:0] qt_rt_const_q1; +wire [62:0] qt_rt_const_q10; +wire [62:0] qt_rt_const_q11; +wire [62:0] qt_rt_const_q112; +wire [62:0] qt_rt_const_q12; +wire [62:0] qt_rt_const_q128; +wire [62:0] qt_rt_const_q13; +wire [62:0] qt_rt_const_q14; +wire [62:0] qt_rt_const_q15; +wire [62:0] qt_rt_const_q16; +wire [62:0] qt_rt_const_q17; +wire [62:0] qt_rt_const_q192; +wire [62:0] qt_rt_const_q2; +wire [62:0] qt_rt_const_q23; +wire [62:0] qt_rt_const_q24; +wire [62:0] qt_rt_const_q27; +wire [62:0] qt_rt_const_q3; +wire [62:0] qt_rt_const_q31; +wire [62:0] qt_rt_const_q32; +wire [62:0] qt_rt_const_q4; +wire [62:0] qt_rt_const_q44; +wire [62:0] qt_rt_const_q5; +wire [62:0] qt_rt_const_q56; +wire [62:0] qt_rt_const_q6; +wire [62:0] qt_rt_const_q60; +wire [62:0] qt_rt_const_q64; +wire [62:0] qt_rt_const_q7; +wire [62:0] qt_rt_const_q8; +wire [62:0] qt_rt_const_q80; +wire [62:0] qt_rt_const_q9; +wire [57:0] qt_rt_const_shift_std_next; +wire [60:0] rem_add1_op1; +wire [60:0] rem_add2_op1; +wire [60:0] rem_add3_op1_0; +wire [60:0] rem_add3_op1_1; +wire [60:0] rem_add4_op1; +wire [60:0] rem_add5_op1_0; +wire [60:0] rem_add5_op1_1; +wire [60:0] rem_add6_op1_0; +wire [60:0] rem_add6_op1_1; +wire [60:0] rem_add7_op1_0; +wire [60:0] rem_add7_op1_1; +wire [60:0] rem_add8_op1; +wire [60:0] rem_add9_op1_0; +wire [60:0] rem_add9_op1_1; +wire [60:0] rem_minus_minus_6; +wire rem_sign; +wire [60:0] remainder_minus_nor_nxt_0; +wire [60:0] remainder_minus_nor_nxt_1; +wire [60:0] remainder_minus_nor_nxt_2; +wire [60:0] remainder_minus_nor_nxt_3; +wire [60:0] remainder_minus_nor_nxt_4; +wire [60:0] remainder_minus_nor_nxt_5; +wire [60:0] remainder_minus_nor_nxt_6; +wire [60:0] remainder_minus_nor_nxt_7; +wire [60:0] remainder_minus_nor_nxt_8; +wire [60:0] remainder_minus_nor_nxt_9; +wire [60:0] remainder_minus_shift; +wire [60:0] remainder_shift; +wire sqrt_first_round; +wire [60:0] sqrt_qt_1_rem_add_op1; +wire [60:0] sqrt_qt_2_rem_add_op1; +wire [60:0] sqrt_qt_3_rem_add_op1_0; +wire [60:0] sqrt_qt_3_rem_add_op1_1; +wire [60:0] sqrt_qt_4_rem_add_op1; +wire [60:0] sqrt_qt_5_rem_add_op1_0; +wire [60:0] sqrt_qt_5_rem_add_op1_1; +wire [60:0] sqrt_qt_6_rem_add_op1_0; +wire [60:0] sqrt_qt_6_rem_add_op1_1; +wire [60:0] sqrt_qt_7_rem_add_op1_0; +wire [60:0] sqrt_qt_7_rem_add_op1_1; +wire [60:0] sqrt_qt_8_rem_add_op1; +wire [60:0] sqrt_qt_9_rem_add_op1_0; +wire [60:0] sqrt_qt_9_rem_add_op1_1; +wire [60:0] sqrt_qt_r1_rem_add_op1; +wire [60:0] sqrt_qt_r2_rem_add_op1; +wire [60:0] sqrt_qt_r3_rem_add_op1_0; +wire [60:0] sqrt_qt_r3_rem_add_op1_1; +wire [60:0] sqrt_qt_r4_rem_add_op1; +wire [60:0] sqrt_qt_r5_rem_add_op1_0; +wire [60:0] sqrt_qt_r5_rem_add_op1_1; +wire [60:0] sqrt_qt_r6_rem_add_op1_0; +wire [60:0] sqrt_qt_r6_rem_add_op1_1; +wire [60:0] sqrt_qt_r7_rem_add_op1_0; +wire [60:0] sqrt_qt_r7_rem_add_op1_1; +wire [60:0] sqrt_qt_r8_rem_add_op1; +wire [60:0] sqrt_qt_r9_rem_add_op1_0; +wire [60:0] sqrt_qt_r9_rem_add_op1_1; +wire [60:0] sqrt_rem_add1_op1; +wire [60:0] sqrt_rem_add2_op1; +wire [60:0] sqrt_rem_add3_op1_0; +wire [60:0] sqrt_rem_add3_op1_1; +wire [60:0] sqrt_rem_add4_op1; +wire [60:0] sqrt_rem_add5_op1_0; +wire [60:0] sqrt_rem_add5_op1_1; +wire [60:0] sqrt_rem_add6_op1_0; +wire [60:0] sqrt_rem_add6_op1_1; +wire [60:0] sqrt_rem_add7_op1_0; +wire [60:0] sqrt_rem_add7_op1_1; +wire [60:0] sqrt_rem_add8_op1; +wire [60:0] sqrt_rem_add9_op1_0; +wire [60:0] sqrt_rem_add9_op1_1; +wire sqrt_secd_round; +wire sqrt_secd_round_sign; +wire srt_div_clk; +wire srt_div_clk_en; +wire srt_first_round; +wire srt_qt_rem_clk; +wire srt_qt_rem_clk_en; +wire [60:0] srt_remainder_nxt; +wire [59:0] srt_remainder_out; +wire srt_remainder_sign; +wire srt_secd_round; +wire srt_sm_on; +wire [57:0] vdiv_qt_rt; + + +parameter DATA_WIDTH = 56; +parameter REM_WIDTH = 61; +parameter QT_WIDTH = 58; +//========================================================== +// SRT Remainder & Divisor for Quotient/Root Generate +//========================================================== +assign srt_qt_rem_clk_en = initial_srt_en || srt_sm_on; + +// &Instance("gated_clk_cell","x_srt_qt_rem_clk"); @35 +gated_clk_cell x_srt_qt_rem_clk ( + .clk_in (forever_cpuclk ), + .clk_out (srt_qt_rem_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (srt_qt_rem_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @36 +// .clk_out (srt_qt_rem_clk), @37 +// .external_en (1'b0), @38 +// .global_en (cp0_yy_clk_en), @39 +// .local_en (srt_qt_rem_clk_en), @40 +// .module_en (cp0_vfpu_icg_en) @41 +// ); @42 + +assign srt_div_clk_en = initial_srt_en; + +// &Instance("gated_clk_cell","x_srt_div_clk"); @46 +gated_clk_cell x_srt_div_clk ( + .clk_in (forever_cpuclk ), + .clk_out (srt_div_clk ), + .external_en (1'b0 ), + .global_en (cp0_yy_clk_en ), + .local_en (srt_div_clk_en ), + .module_en (cp0_vfpu_icg_en ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en) +); + +// &Connect( .clk_in (forever_cpuclk), @47 +// .clk_out (srt_div_clk), @48 +// .external_en (1'b0), @49 +// .global_en (cp0_yy_clk_en), @50 +// .local_en (srt_div_clk_en), @51 +// .module_en (cp0_vfpu_icg_en) @52 +// ); @53 + +always @(posedge srt_qt_rem_clk or negedge cpurst_b) +begin + if(!cpurst_b) + srt_remainder[REM_WIDTH-1:0] <= {REM_WIDTH{1'b0}}; + else if(initial_srt_en) + srt_remainder[REM_WIDTH-1:0] <= initial_remainder_in[REM_WIDTH-1:0]; + else if(srt_sm_on) + srt_remainder[REM_WIDTH-1:0] <= srt_remainder_nxt[REM_WIDTH-1:0]; + else + srt_remainder[REM_WIDTH-1:0] <= srt_remainder[REM_WIDTH-1:0]; +end +// &Force("output","srt_remainder"); @66 +always @(posedge srt_div_clk or negedge cpurst_b) +begin + if(!cpurst_b) + srt_divisor[DATA_WIDTH-1:0] <= {DATA_WIDTH{1'b0}}; + else if(initial_srt_en) + srt_divisor[DATA_WIDTH-1:0] <= initial_divisor_in[DATA_WIDTH-1:0]; + else + srt_divisor[DATA_WIDTH-1:0] <= srt_divisor[DATA_WIDTH-1:0]; +end + +always @(posedge srt_qt_rem_clk or negedge cpurst_b) +begin + if(!cpurst_b) + bound_sel[6:0] <= {7{1'b0}}; + else if(initial_srt_en) + bound_sel[6:0] <= initial_bound_sel_in[6:0]; + else if(srt_sm_on && srt_sel_sqrt) + bound_sel[6:0] <= total_qt_rt_next[QT_WIDTH-2:QT_WIDTH-8]; + else + bound_sel[6:0] <= bound_sel[6:0]; +end + +always @(posedge srt_div_clk or negedge cpurst_b) +begin + if(!cpurst_b) + srt_sel_div <= 1'b0; + else if(initial_srt_en) + srt_sel_div <= initial_srt_sel_div_in; + else + srt_sel_div <= srt_sel_div; +end + +always @(posedge srt_div_clk or negedge cpurst_b) +begin + if(!cpurst_b) + srt_sel_sqrt <= 1'b0; + else if(initial_srt_en) + srt_sel_sqrt <= initial_srt_sel_sqrt_in; + else + srt_sel_sqrt <= srt_sel_sqrt; +end + +always @(posedge srt_qt_rem_clk or negedge cpurst_b) +begin + if(!cpurst_b) + begin + qt_rt_const_shift_std[QT_WIDTH-1:0] <= {QT_WIDTH{1'b0}}; + total_qt_rt[QT_WIDTH-1:0] <= {QT_WIDTH{1'b0}}; + total_qt_rt_minus[QT_WIDTH-1:0] <= {QT_WIDTH{1'b0}}; + end + else if(initial_srt_en) + begin + qt_rt_const_shift_std[QT_WIDTH-1:0] <= {4'b0001,{(QT_WIDTH-4){1'b0}}}; + total_qt_rt[QT_WIDTH-1:0] <= {QT_WIDTH{1'b0}}; + total_qt_rt_minus[QT_WIDTH-1:0] <= {QT_WIDTH{1'b0}}; + end + else if(srt_sm_on) + begin + qt_rt_const_shift_std[QT_WIDTH-1:0] <= qt_rt_const_shift_std_next[QT_WIDTH-1:0]; + total_qt_rt[QT_WIDTH-1:0] <= total_qt_rt_next[QT_WIDTH-1:0]; + total_qt_rt_minus[QT_WIDTH-1:0] <= total_qt_rt_minus_next[QT_WIDTH-1:0]; + end + else + begin + qt_rt_const_shift_std[QT_WIDTH-1:0] <= qt_rt_const_shift_std[QT_WIDTH-1:0]; + total_qt_rt[QT_WIDTH-1:0] <= total_qt_rt[QT_WIDTH-1:0]; + total_qt_rt_minus[QT_WIDTH-1:0] <= total_qt_rt_minus[QT_WIDTH-1:0]; + end +end + +// &Force("output","total_qt_rt"); @137 +// &Force("output","vdiv_qt_rt"); @138 + +assign vdiv_qt_rt[QT_WIDTH-1:0] = srt_remainder[REM_WIDTH-1] + ? total_qt_rt_minus[QT_WIDTH-1:0] + : total_qt_rt[QT_WIDTH-1:0]; + +assign qt_rt_const_shift_std_next[QT_WIDTH-1:0] = {4'b0, qt_rt_const_shift_std[QT_WIDTH-1:4]}; + +//==================================================== +// boundary calculation +//==================================================== +//assign bound_sel[6:0] = srt_sel_div ? srt_divisor[DATA_WIDTH-1:DATA_WIDTH-7] +// : total_qt_rt[QT_WIDTH-2:QT_WIDTH-8]; + +// &Instance("ct_vfdsu_srt_radix16_bound_table"); @152 +ct_vfdsu_srt_radix16_bound_table x_ct_vfdsu_srt_radix16_bound_table ( + .bound_sel (bound_sel ), + .digit_bound_1 (digit_bound_1 ), + .digit_bound_2 (digit_bound_2 ), + .digit_bound_3 (digit_bound_3 ), + .digit_bound_4 (digit_bound_4 ), + .digit_bound_5 (digit_bound_5 ), + .digit_bound_6 (digit_bound_6 ), + .digit_bound_7 (digit_bound_7 ), + .digit_bound_8 (digit_bound_8 ), + .digit_bound_9 (digit_bound_9 ), + .sqrt_first_round (sqrt_first_round ), + .sqrt_secd_round (sqrt_secd_round ), + .sqrt_secd_round_sign (sqrt_secd_round_sign) +); + +assign sqrt_first_round = srt_sel_sqrt && srt_first_round; +assign sqrt_secd_round = srt_sel_sqrt && srt_secd_round; +assign sqrt_secd_round_sign = rem_sign; +assign rem_sign = srt_remainder[REM_WIDTH-1]; +assign part_rem[11:0] = rem_sign + ? ~srt_remainder[REM_WIDTH-5:REM_WIDTH-16] + : srt_remainder[REM_WIDTH-5:REM_WIDTH-16]; +// &Force("nonport","bound1_cmp_result"); @160 +// &Force("nonport","bound2_cmp_result"); @161 +// &Force("nonport","bound3_cmp_result"); @162 +// &Force("nonport","bound4_cmp_result"); @163 +// &Force("nonport","bound5_cmp_result"); @164 +// &Force("nonport","bound6_cmp_result"); @165 +// &Force("nonport","bound7_cmp_result"); @166 +// &Force("nonport","bound8_cmp_result"); @167 +// &Force("nonport","bound9_cmp_result"); @168 +// &Force("bus","bound1_cmp_result",11,0); @169 +// &Force("bus","bound2_cmp_result",11,0); @170 +// &Force("bus","bound3_cmp_result",11,0); @171 +// &Force("bus","bound4_cmp_result",11,0); @172 +// &Force("bus","bound5_cmp_result",11,0); @173 +// &Force("bus","bound6_cmp_result",11,0); @174 +// &Force("bus","bound7_cmp_result",11,0); @175 +// &Force("bus","bound8_cmp_result",11,0); @176 +// &Force("bus","bound9_cmp_result",11,0); @177 +// &Force("nonport","digit_bound_1"); @178 +// &Force("nonport","digit_bound_2"); @179 +// &Force("nonport","digit_bound_3"); @180 +// &Force("nonport","digit_bound_4"); @181 +// &Force("nonport","digit_bound_5"); @182 +// &Force("nonport","digit_bound_6"); @183 +// &Force("nonport","digit_bound_7"); @184 +// &Force("nonport","digit_bound_8"); @185 +// &Force("nonport","digit_bound_9"); @186 +// &Force("nonport","part_rem"); @187 +////csky vperl_off +//assign bound1_cmp_result[11:0] = $unsigned($signed(part_rem[11:0]) +// + $signed(digit_bound_1[11:0])); +//assign bound2_cmp_result[11:0] = $unsigned($signed(part_rem[11:0]) +// + $signed(digit_bound_2[11:0])); +//assign bound3_cmp_result[11:0] = $unsigned($signed(part_rem[11:0]) +// + $signed(digit_bound_3[11:0])); +//assign bound4_cmp_result[11:0] = $unsigned($signed(part_rem[11:0]) +// + $signed(digit_bound_4[11:0])); +//assign bound5_cmp_result[11:0] = $unsigned($signed(part_rem[11:0]) +// + $signed(digit_bound_5[11:0])); +//assign bound6_cmp_result[11:0] = $unsigned($signed(part_rem[11:0]) +// + $signed(digit_bound_6[11:0])); +//assign bound7_cmp_result[11:0] = $unsigned($signed(part_rem[11:0]) +// + $signed(digit_bound_7[11:0])); +//assign bound8_cmp_result[11:0] = $unsigned($signed(part_rem[11:0]) +// + $signed(digit_bound_8[11:0])); +//assign bound9_cmp_result[11:0] = $unsigned($signed(part_rem[11:0]) +// + $signed(digit_bound_9[11:0])); +////csky vperl_on +//assign bound1_cmp_sign = bound1_cmp_result[11]; +//assign bound2_cmp_sign = bound2_cmp_result[11]; +//assign bound3_cmp_sign = bound3_cmp_result[11]; +//assign bound4_cmp_sign = bound4_cmp_result[11]; +//assign bound5_cmp_sign = bound5_cmp_result[11]; +//assign bound6_cmp_sign = bound6_cmp_result[11]; +//assign bound7_cmp_sign = bound7_cmp_result[11]; +//assign bound8_cmp_sign = bound8_cmp_result[11]; +//assign bound9_cmp_sign = bound9_cmp_result[11]; +assign bound1_cmp_sign = part_rem[11:0] < digit_bound_1[11:0]; +assign bound2_cmp_sign = part_rem[11:0] < digit_bound_2[11:0]; +assign bound3_cmp_sign = part_rem[11:0] < digit_bound_3[11:0]; +assign bound4_cmp_sign = part_rem[11:0] < digit_bound_4[11:0]; +assign bound5_cmp_sign = part_rem[11:0] < digit_bound_5[11:0]; +assign bound6_cmp_sign = part_rem[11:0] < digit_bound_6[11:0]; +assign bound7_cmp_sign = part_rem[11:0] < digit_bound_7[11:0]; +assign bound8_cmp_sign = part_rem[11:0] < digit_bound_8[11:0]; +assign bound9_cmp_sign = part_rem[11:0] < digit_bound_9[11:0]; +//==================================================== +// remainder calculation +//==================================================== +// the root preparation +assign qt_rt_const_q1[REM_WIDTH+1:0] = {5'b0,qt_rt_const_shift_std[QT_WIDTH-1:0]}; +assign qt_rt_const_q2[REM_WIDTH+1:0] = {4'b0,qt_rt_const_shift_std[QT_WIDTH-1:0],1'b0}; +assign qt_rt_const_q4[REM_WIDTH+1:0] = {3'b0,qt_rt_const_shift_std[QT_WIDTH-1:0],2'b0}; +assign qt_rt_const_q8[REM_WIDTH+1:0] = {2'b0,qt_rt_const_shift_std[QT_WIDTH-1:0],3'b0}; +assign qt_rt_const_q16[REM_WIDTH+1:0] = {1'b0,qt_rt_const_shift_std[QT_WIDTH-1:0],4'b0}; +assign qt_rt_const_q32[REM_WIDTH+1:0] = {qt_rt_const_shift_std[QT_WIDTH-1:0],5'b0}; +assign qt_rt_const_q64[REM_WIDTH+1:0] = {qt_rt_const_shift_std[QT_WIDTH-2:0],6'b0}; +assign qt_rt_const_q128[REM_WIDTH+1:0] = {qt_rt_const_shift_std[QT_WIDTH-3:0],7'b0}; +assign qt_rt_const_q3[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q2[REM_WIDTH+1:0]; +assign qt_rt_const_q5[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q4[REM_WIDTH+1:0]; +assign qt_rt_const_q6[REM_WIDTH+1:0] = qt_rt_const_q2[REM_WIDTH+1:0] + |qt_rt_const_q4[REM_WIDTH+1:0]; +assign qt_rt_const_q7[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q2[REM_WIDTH+1:0] + |qt_rt_const_q4[REM_WIDTH+1:0]; +assign qt_rt_const_q9[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0]; +assign qt_rt_const_q10[REM_WIDTH+1:0] = qt_rt_const_q2[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0]; +assign qt_rt_const_q11[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q2[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0]; +assign qt_rt_const_q12[REM_WIDTH+1:0] = qt_rt_const_q4[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0]; +assign qt_rt_const_q13[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q4[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0]; +assign qt_rt_const_q14[REM_WIDTH+1:0] = qt_rt_const_q2[REM_WIDTH+1:0] + |qt_rt_const_q4[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0]; +assign qt_rt_const_q15[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q2[REM_WIDTH+1:0] + |qt_rt_const_q4[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0]; +assign qt_rt_const_q17[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q16[REM_WIDTH+1:0]; +assign qt_rt_const_q23[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q2[REM_WIDTH+1:0] + |qt_rt_const_q4[REM_WIDTH+1:0] + |qt_rt_const_q16[REM_WIDTH+1:0]; +assign qt_rt_const_q24[REM_WIDTH+1:0] = qt_rt_const_q8[REM_WIDTH+1:0] + |qt_rt_const_q16[REM_WIDTH+1:0]; +assign qt_rt_const_q27[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q2[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0] + |qt_rt_const_q16[REM_WIDTH+1:0]; +assign qt_rt_const_q31[REM_WIDTH+1:0] = qt_rt_const_q1[REM_WIDTH+1:0] + |qt_rt_const_q2[REM_WIDTH+1:0] + |qt_rt_const_q4[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0] + |qt_rt_const_q16[REM_WIDTH+1:0]; +assign qt_rt_const_q44[REM_WIDTH+1:0] = qt_rt_const_q4[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0] + |qt_rt_const_q32[REM_WIDTH+1:0]; +assign qt_rt_const_q56[REM_WIDTH+1:0] = qt_rt_const_q8[REM_WIDTH+1:0] + |qt_rt_const_q16[REM_WIDTH+1:0] + |qt_rt_const_q32[REM_WIDTH+1:0]; +assign qt_rt_const_q60[REM_WIDTH+1:0] = qt_rt_const_q4[REM_WIDTH+1:0] + |qt_rt_const_q8[REM_WIDTH+1:0] + |qt_rt_const_q16[REM_WIDTH+1:0] + |qt_rt_const_q32[REM_WIDTH+1:0]; +assign qt_rt_const_q80[REM_WIDTH+1:0] = qt_rt_const_q16[REM_WIDTH+1:0] + |qt_rt_const_q64[REM_WIDTH+1:0]; +assign qt_rt_const_q112[REM_WIDTH+1:0] = qt_rt_const_q16[REM_WIDTH+1:0] + |qt_rt_const_q32[REM_WIDTH+1:0] + |qt_rt_const_q64[REM_WIDTH+1:0]; +assign qt_rt_const_q192[REM_WIDTH+1:0] = qt_rt_const_q64[REM_WIDTH+1:0] + |qt_rt_const_q128[REM_WIDTH+1:0]; +//===================================== +// the sqrt current remainder oprand b +//===================================== +// the root is negative +// -1 +assign sqrt_qt_r1_rem_add_op1[REM_WIDTH-1:0] = {4'b0,total_qt_rt_minus[QT_WIDTH-1:1]} + |qt_rt_const_q31[REM_WIDTH+1:2]; +//-2 +assign sqrt_qt_r2_rem_add_op1[REM_WIDTH-1:0] = {3'b0,total_qt_rt_minus[QT_WIDTH-1:0]} + |qt_rt_const_q60[REM_WIDTH+1:2]; +//-4 +assign sqrt_qt_r4_rem_add_op1[REM_WIDTH-1:0] = {2'b0,total_qt_rt_minus[QT_WIDTH-1:0],1'b0} + |qt_rt_const_q112[REM_WIDTH+1:2]; +//-8 +assign sqrt_qt_r8_rem_add_op1[REM_WIDTH-1:0] = {1'b0,total_qt_rt_minus[QT_WIDTH-1:0],2'b0} + |qt_rt_const_q192[REM_WIDTH+1:2]; +//-3 +assign sqrt_qt_r3_rem_add_op1_0[REM_WIDTH-1:0] = sqrt_qt_r2_rem_add_op1[REM_WIDTH-1:0]; +assign sqrt_qt_r3_rem_add_op1_1[REM_WIDTH-1:0] = {4'b0,total_qt_rt_minus[QT_WIDTH-1:1]} + |qt_rt_const_q27[REM_WIDTH+1:2]; +//-5 112+23q-i-1 +assign sqrt_qt_r5_rem_add_op1_0[REM_WIDTH-1:0] = sqrt_qt_r4_rem_add_op1[REM_WIDTH-1:0]; +assign sqrt_qt_r5_rem_add_op1_1[REM_WIDTH-1:0] = {4'b0,total_qt_rt_minus[QT_WIDTH-1:1]} + |qt_rt_const_q23[REM_WIDTH+1:2]; +//-6 +assign sqrt_qt_r6_rem_add_op1_0[REM_WIDTH-1:0] = sqrt_qt_r4_rem_add_op1[REM_WIDTH-1:0]; +assign sqrt_qt_r6_rem_add_op1_1[REM_WIDTH-1:0] = {3'b0,total_qt_rt_minus[QT_WIDTH-1:0]} + |qt_rt_const_q44[REM_WIDTH+1:2]; +//-7 +assign sqrt_qt_r7_rem_add_op1_0[REM_WIDTH-1:0] = sqrt_qt_r8_rem_add_op1[REM_WIDTH-1:0]; +assign sqrt_qt_r7_rem_add_op1_1[REM_WIDTH-1:0] = ~({4'b0,total_qt_rt_minus[QT_WIDTH-1:1]} + |qt_rt_const_q17[REM_WIDTH+1:2]); +//-9 +assign sqrt_qt_r9_rem_add_op1_0[REM_WIDTH-1:0] = sqrt_qt_r8_rem_add_op1[REM_WIDTH-1:0]; +assign sqrt_qt_r9_rem_add_op1_1[REM_WIDTH-1:0] = {4'b0,total_qt_rt_minus[QT_WIDTH-1:1]} + | qt_rt_const_q15[REM_WIDTH+1:2]; +// the root is positive +// 1 +assign sqrt_qt_1_rem_add_op1[REM_WIDTH-1:0] = ~({4'b0,total_qt_rt[QT_WIDTH-1:1]} + | qt_rt_const_q1[REM_WIDTH+1:2]); +// 2 +assign sqrt_qt_2_rem_add_op1[REM_WIDTH-1:0] = ~({3'b0,total_qt_rt[QT_WIDTH-1:0]} + | qt_rt_const_q4[REM_WIDTH+1:2]); +// 4 +assign sqrt_qt_4_rem_add_op1[REM_WIDTH-1:0] = ~({2'b0,total_qt_rt[QT_WIDTH-1:0],1'b0} + | qt_rt_const_q16[REM_WIDTH+1:2]); +// 8 +assign sqrt_qt_8_rem_add_op1[REM_WIDTH-1:0] = ~({1'b0,total_qt_rt[QT_WIDTH-1:0],2'b0} + | qt_rt_const_q64[REM_WIDTH+1:2]); +// 3 +assign sqrt_qt_3_rem_add_op1_0[REM_WIDTH-1:0] = ~({3'b0,total_qt_rt[QT_WIDTH-1:0]} + |qt_rt_const_q8[REM_WIDTH+1:2]); +assign sqrt_qt_3_rem_add_op1_1[REM_WIDTH-1:0] = sqrt_qt_1_rem_add_op1[REM_WIDTH-1:0]; +//5 +assign sqrt_qt_5_rem_add_op1_0[REM_WIDTH-1:0] = ~({2'b0,total_qt_rt[QT_WIDTH-1:0],1'b0} + | qt_rt_const_q24[REM_WIDTH+1:2]); +assign sqrt_qt_5_rem_add_op1_1[REM_WIDTH-1:0] = sqrt_qt_1_rem_add_op1[REM_WIDTH-1:0]; +//6 +assign sqrt_qt_6_rem_add_op1_0[REM_WIDTH-1:0] = ~({2'b0,total_qt_rt[QT_WIDTH-1:0],1'b0} + | qt_rt_const_q32[REM_WIDTH+1:2]); +assign sqrt_qt_6_rem_add_op1_1[REM_WIDTH-1:0] = sqrt_qt_2_rem_add_op1[REM_WIDTH-1:0]; +//7 +assign sqrt_qt_7_rem_add_op1_0[REM_WIDTH-1:0] = ~({1'b0,total_qt_rt[QT_WIDTH-1:0],2'b0} + | qt_rt_const_q56[REM_WIDTH+1:2]); +assign sqrt_qt_7_rem_add_op1_1[REM_WIDTH-1:0] = {4'b0,total_qt_rt[QT_WIDTH-1:1]} + | qt_rt_const_q7[REM_WIDTH+1:2]; +//9 +assign sqrt_qt_9_rem_add_op1_0[REM_WIDTH-1:0] = ~({1'b0,total_qt_rt[QT_WIDTH-1:0],2'b0} + | qt_rt_const_q80[REM_WIDTH+1:2]); +assign sqrt_qt_9_rem_add_op1_1[REM_WIDTH-1:0] = sqrt_qt_1_rem_add_op1[REM_WIDTH-1:0]; + +assign sqrt_rem_add1_op1[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r1_rem_add_op1[REM_WIDTH-1:0] + : sqrt_qt_1_rem_add_op1[REM_WIDTH-1:0]; +assign sqrt_rem_add2_op1[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r2_rem_add_op1[REM_WIDTH-1:0] + : sqrt_qt_2_rem_add_op1[REM_WIDTH-1:0]; +assign sqrt_rem_add4_op1[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r4_rem_add_op1[REM_WIDTH-1:0] + : sqrt_qt_4_rem_add_op1[REM_WIDTH-1:0]; +assign sqrt_rem_add8_op1[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r8_rem_add_op1[REM_WIDTH-1:0] + : sqrt_qt_8_rem_add_op1[REM_WIDTH-1:0]; +assign sqrt_rem_add3_op1_0[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r3_rem_add_op1_0[REM_WIDTH-1:0] + : sqrt_qt_3_rem_add_op1_0[REM_WIDTH-1:0]; +assign sqrt_rem_add3_op1_1[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r3_rem_add_op1_1[REM_WIDTH-1:0] + : sqrt_qt_3_rem_add_op1_1[REM_WIDTH-1:0]; +assign sqrt_rem_add5_op1_0[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r5_rem_add_op1_0[REM_WIDTH-1:0] + : sqrt_qt_5_rem_add_op1_0[REM_WIDTH-1:0]; +assign sqrt_rem_add5_op1_1[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r5_rem_add_op1_1[REM_WIDTH-1:0] + : sqrt_qt_5_rem_add_op1_1[REM_WIDTH-1:0]; +assign sqrt_rem_add6_op1_0[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r6_rem_add_op1_0[REM_WIDTH-1:0] + : sqrt_qt_6_rem_add_op1_0[REM_WIDTH-1:0]; +assign sqrt_rem_add6_op1_1[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r6_rem_add_op1_1[REM_WIDTH-1:0] + : sqrt_qt_6_rem_add_op1_1[REM_WIDTH-1:0]; +assign sqrt_rem_add7_op1_0[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r7_rem_add_op1_0[REM_WIDTH-1:0] + : sqrt_qt_7_rem_add_op1_0[REM_WIDTH-1:0]; +assign sqrt_rem_add7_op1_1[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r7_rem_add_op1_1[REM_WIDTH-1:0] + : sqrt_qt_7_rem_add_op1_1[REM_WIDTH-1:0]; +assign sqrt_rem_add9_op1_0[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r9_rem_add_op1_0[REM_WIDTH-1:0] + : sqrt_qt_9_rem_add_op1_0[REM_WIDTH-1:0]; +assign sqrt_rem_add9_op1_1[REM_WIDTH-1:0] = rem_sign + ? sqrt_qt_r9_rem_add_op1_1[REM_WIDTH-1:0] + : sqrt_qt_9_rem_add_op1_1[REM_WIDTH-1:0]; + +//===================================== +// the div current remainder oprand b +//===================================== +//negative +assign div_qt_r1_rem_add_op1[REM_WIDTH-1:0] = {5'b0,srt_divisor[DATA_WIDTH-1:0]}; +assign div_qt_r2_rem_add_op1[REM_WIDTH-1:0] = {4'b0,srt_divisor[DATA_WIDTH-1:0],1'b0}; +assign div_qt_r4_rem_add_op1[REM_WIDTH-1:0] = {3'b0,srt_divisor[DATA_WIDTH-1:0],2'b0}; +assign div_qt_r8_rem_add_op1[REM_WIDTH-1:0] = {2'b0,srt_divisor[DATA_WIDTH-1:0],3'b0}; +assign div_qt_r3_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_r2_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_r3_rem_add_op1_1[REM_WIDTH-1:0] = div_qt_r1_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_r5_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_r4_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_r5_rem_add_op1_1[REM_WIDTH-1:0] = div_qt_r1_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_r6_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_r4_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_r6_rem_add_op1_1[REM_WIDTH-1:0] = div_qt_r2_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_r7_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_r8_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_r7_rem_add_op1_1[REM_WIDTH-1:0] =~div_qt_r1_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_r9_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_r8_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_r9_rem_add_op1_1[REM_WIDTH-1:0] = div_qt_r1_rem_add_op1[REM_WIDTH-1:0]; +//positive +assign div_qt_1_rem_add_op1[REM_WIDTH-1:0] =~div_qt_r1_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_2_rem_add_op1[REM_WIDTH-1:0] =~div_qt_r2_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_4_rem_add_op1[REM_WIDTH-1:0] =~div_qt_r4_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_8_rem_add_op1[REM_WIDTH-1:0] =~div_qt_r8_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_3_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_2_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_3_rem_add_op1_1[REM_WIDTH-1:0] = div_qt_1_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_5_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_4_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_5_rem_add_op1_1[REM_WIDTH-1:0] = div_qt_1_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_6_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_4_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_6_rem_add_op1_1[REM_WIDTH-1:0] = div_qt_2_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_7_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_8_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_7_rem_add_op1_1[REM_WIDTH-1:0] = ~div_qt_1_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_9_rem_add_op1_0[REM_WIDTH-1:0] = div_qt_8_rem_add_op1[REM_WIDTH-1:0]; +assign div_qt_9_rem_add_op1_1[REM_WIDTH-1:0] = div_qt_1_rem_add_op1[REM_WIDTH-1:0]; +assign div_rem_add1_op1[REM_WIDTH-1:0] = rem_sign ? div_qt_r1_rem_add_op1[REM_WIDTH-1:0] + : div_qt_1_rem_add_op1[REM_WIDTH-1:0]; +assign div_rem_add2_op1[REM_WIDTH-1:0] = rem_sign ? div_qt_r2_rem_add_op1[REM_WIDTH-1:0] + : div_qt_2_rem_add_op1[REM_WIDTH-1:0]; +assign div_rem_add4_op1[REM_WIDTH-1:0] = rem_sign ? div_qt_r4_rem_add_op1[REM_WIDTH-1:0] + : div_qt_4_rem_add_op1[REM_WIDTH-1:0]; +assign div_rem_add8_op1[REM_WIDTH-1:0] = rem_sign ? div_qt_r8_rem_add_op1[REM_WIDTH-1:0] + : div_qt_8_rem_add_op1[REM_WIDTH-1:0]; +assign div_rem_add3_op1_0[REM_WIDTH-1:0] = rem_sign + ? div_qt_r3_rem_add_op1_0[REM_WIDTH-1:0] + : div_qt_3_rem_add_op1_0[REM_WIDTH-1:0]; +assign div_rem_add3_op1_1[REM_WIDTH-1:0] = rem_sign + ? div_qt_r3_rem_add_op1_1[REM_WIDTH-1:0] + : div_qt_3_rem_add_op1_1[REM_WIDTH-1:0]; +assign div_rem_add5_op1_0[REM_WIDTH-1:0] = rem_sign + ? div_qt_r5_rem_add_op1_0[REM_WIDTH-1:0] + : div_qt_5_rem_add_op1_0[REM_WIDTH-1:0]; +assign div_rem_add5_op1_1[REM_WIDTH-1:0] = rem_sign + ? div_qt_r5_rem_add_op1_1[REM_WIDTH-1:0] + : div_qt_5_rem_add_op1_1[REM_WIDTH-1:0]; +assign div_rem_add6_op1_0[REM_WIDTH-1:0] = rem_sign + ? div_qt_r6_rem_add_op1_0[REM_WIDTH-1:0] + : div_qt_6_rem_add_op1_0[REM_WIDTH-1:0]; +assign div_rem_add6_op1_1[REM_WIDTH-1:0] = rem_sign + ? div_qt_r6_rem_add_op1_1[REM_WIDTH-1:0] + : div_qt_6_rem_add_op1_1[REM_WIDTH-1:0]; +assign div_rem_add7_op1_0[REM_WIDTH-1:0] = rem_sign + ? div_qt_r7_rem_add_op1_0[REM_WIDTH-1:0] + : div_qt_7_rem_add_op1_0[REM_WIDTH-1:0]; +assign div_rem_add7_op1_1[REM_WIDTH-1:0] = rem_sign + ? div_qt_r7_rem_add_op1_1[REM_WIDTH-1:0] + : div_qt_7_rem_add_op1_1[REM_WIDTH-1:0]; +assign div_rem_add9_op1_0[REM_WIDTH-1:0] = rem_sign + ? div_qt_r9_rem_add_op1_0[REM_WIDTH-1:0] + : div_qt_9_rem_add_op1_0[REM_WIDTH-1:0]; +assign div_rem_add9_op1_1[REM_WIDTH-1:0] = rem_sign + ? div_qt_r9_rem_add_op1_1[REM_WIDTH-1:0] + : div_qt_9_rem_add_op1_1[REM_WIDTH-1:0]; +//===================================== +// the remainder calculation +//===================================== +assign rem_add1_op1[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add1_op1[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add1_op1[REM_WIDTH-1:0]); +assign rem_add2_op1[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add2_op1[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add2_op1[REM_WIDTH-1:0]); +assign rem_add4_op1[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add4_op1[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add4_op1[REM_WIDTH-1:0]); +assign rem_add8_op1[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add8_op1[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add8_op1[REM_WIDTH-1:0]); +assign rem_add3_op1_0[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add3_op1_0[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add3_op1_0[REM_WIDTH-1:0]); +assign rem_add3_op1_1[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add3_op1_1[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add3_op1_1[REM_WIDTH-1:0]); +assign rem_add5_op1_0[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add5_op1_0[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add5_op1_0[REM_WIDTH-1:0]); +assign rem_add5_op1_1[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add5_op1_1[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add5_op1_1[REM_WIDTH-1:0]); +assign rem_add6_op1_0[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add6_op1_0[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add6_op1_0[REM_WIDTH-1:0]); +assign rem_add6_op1_1[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add6_op1_1[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add6_op1_1[REM_WIDTH-1:0]); +assign rem_add7_op1_0[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add7_op1_0[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add7_op1_0[REM_WIDTH-1:0]); +assign rem_add7_op1_1[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add7_op1_1[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add7_op1_1[REM_WIDTH-1:0]); +assign rem_add9_op1_0[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add9_op1_0[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add9_op1_0[REM_WIDTH-1:0]); +assign rem_add9_op1_1[REM_WIDTH-1:0] = ({REM_WIDTH{srt_sel_div}} & div_rem_add9_op1_1[REM_WIDTH-1:0]) + |({REM_WIDTH{srt_sel_sqrt}} & sqrt_rem_add9_op1_1[REM_WIDTH-1:0]); +// remainder calculation for all of the remainders +assign remainder_shift[REM_WIDTH-1:0] = {srt_remainder[REM_WIDTH-5:0],4'b0}; +// &Force("nonport","cur_rem_1"); @518 +// &Force("nonport","cur_rem_2"); @519 +// &Force("nonport","cur_rem_3"); @520 +// &Force("nonport","cur_rem_4"); @521 +// &Force("nonport","cur_rem_5"); @522 +// &Force("nonport","cur_rem_6"); @523 +// &Force("nonport","cur_rem_7"); @524 +// &Force("nonport","cur_rem_8"); @525 +// &Force("nonport","cur_rem_9"); @526 +// &Force("nonport","remainder_shift"); @527 +// &Force("nonport","rem_add1_op1"); @528 +// &Force("nonport","rem_add2_op1"); @529 +// &Force("nonport","rem_add3_op1_0"); @530 +// &Force("nonport","rem_add3_op1_1"); @531 +// &Force("nonport","rem_add4_op1"); @532 +// &Force("nonport","rem_add5_op1_0"); @533 +// &Force("nonport","rem_add5_op1_1"); @534 +// &Force("nonport","rem_add6_op1_0"); @535 +// &Force("nonport","rem_add6_op1_1"); @536 +// &Force("nonport","rem_add7_op1_0"); @537 +// &Force("nonport","rem_add7_op1_1"); @538 +// &Force("nonport","rem_add8_op1"); @539 +// &Force("nonport","rem_add9_op1_0"); @540 +// &Force("nonport","rem_add9_op1_1"); @541 +//csky vperl_off +assign cur_rem_1[REM_WIDTH-1:0] = $unsigned($signed(remainder_shift[REM_WIDTH-1:0]) + + $signed(rem_add1_op1[REM_WIDTH-1:0]) + + $signed({{REM_WIDTH-1{1'b0}},~rem_sign})); +assign cur_rem_2[REM_WIDTH-1:0] = $unsigned($signed(remainder_shift[REM_WIDTH-1:0]) + + $signed(rem_add2_op1[REM_WIDTH-1:0]) + + $signed({{REM_WIDTH-1{1'b0}},~rem_sign})); +assign cur_rem_4[REM_WIDTH-1:0] = $unsigned($signed(remainder_shift[REM_WIDTH-1:0]) + + $signed(rem_add4_op1[REM_WIDTH-1:0]) + + $signed({{REM_WIDTH-1{1'b0}},~rem_sign})); +assign cur_rem_8[REM_WIDTH-1:0] = $unsigned($signed(remainder_shift[REM_WIDTH-1:0]) + + $signed(rem_add8_op1[REM_WIDTH-1:0]) + + $signed({{REM_WIDTH-1{1'b0}},~rem_sign})); +assign cur_rem_3[REM_WIDTH-1:0] = $unsigned($signed(remainder_shift[REM_WIDTH-1:0]) + + $signed(rem_add3_op1_0[REM_WIDTH-1:0]) + + $signed(rem_add3_op1_1[REM_WIDTH-1:0]) + + $signed({{REM_WIDTH-2{1'b0}},~rem_sign,1'b0})); +assign cur_rem_5[REM_WIDTH-1:0] = $unsigned($signed(remainder_shift[REM_WIDTH-1:0]) + + $signed(rem_add5_op1_0[REM_WIDTH-1:0]) + + $signed(rem_add5_op1_1[REM_WIDTH-1:0]) + + $signed({{REM_WIDTH-2{1'b0}},~rem_sign,1'b0})); +assign cur_rem_6[REM_WIDTH-1:0] = $unsigned($signed(remainder_shift[REM_WIDTH-1:0]) + + $signed(rem_add6_op1_0[REM_WIDTH-1:0]) + + $signed(rem_add6_op1_1[REM_WIDTH-1:0]) + + $signed({{REM_WIDTH-2{1'b0}},~rem_sign,1'b0})); +assign cur_rem_7[REM_WIDTH-1:0] = $unsigned($signed(remainder_shift[REM_WIDTH-1:0]) + + $signed(rem_add7_op1_0[REM_WIDTH-1:0]) + + $signed(rem_add7_op1_1[REM_WIDTH-1:0]) + + $signed({{REM_WIDTH-1{1'b0}},1'b1})); +assign cur_rem_9[REM_WIDTH-1:0] = $unsigned($signed(remainder_shift[REM_WIDTH-1:0]) + + $signed(rem_add9_op1_0[REM_WIDTH-1:0]) + + $signed(rem_add9_op1_1[REM_WIDTH-1:0]) + + $signed({{REM_WIDTH-2{1'b0}},~rem_sign,1'b0})); +//csky vperl_on +//==================================================== +// quotient selection +//==================================================== +assign bound_cmp_sign[8:0] = {bound1_cmp_sign,bound2_cmp_sign,bound3_cmp_sign,bound4_cmp_sign, + bound5_cmp_sign,bound6_cmp_sign,bound7_cmp_sign,bound8_cmp_sign,bound9_cmp_sign}; + +// &CombBeg; @582 +always @( rem_sign + or bound_cmp_sign[8:0] + or qt_rt_const_q10[57:0] + or qt_rt_const_q6[57:0] + or qt_rt_const_q12[57:0] + or qt_rt_const_q13[57:0] + or qt_rt_const_q5[57:0] + or qt_rt_const_q9[57:0] + or qt_rt_const_q8[57:0] + or total_qt_rt_minus[57:0] + or qt_rt_const_q4[57:0] + or qt_rt_const_q2[57:0] + or qt_rt_const_q15[57:0] + or qt_rt_const_q1[57:0] + or qt_rt_const_q7[57:0] + or qt_rt_const_q14[57:0] + or qt_rt_const_q3[57:0] + or qt_rt_const_q11[57:0] + or total_qt_rt[57:0]) +begin +case({rem_sign,bound_cmp_sign[8:0]}) + 10'b0111111111: //0 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q15[QT_WIDTH-1:0]; + end + 10'b0011111111: //1 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q1[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0]; + end + 10'b0001111111://2 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q2[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q1[QT_WIDTH-1:0]; + end + 10'b0000111111://3 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q3[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q2[QT_WIDTH-1:0]; + end + 10'b0000011111://4 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q4[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q3[QT_WIDTH-1:0]; + end + 10'b0000001111://5 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q5[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q4[QT_WIDTH-1:0]; + end + 10'b0000000111://6 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q6[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q5[QT_WIDTH-1:0]; + end + 10'b0000000011://7 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q7[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q6[QT_WIDTH-1:0]; + end + 10'b0000000001://8 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q8[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q7[QT_WIDTH-1:0]; + end + 10'b0000000000://9 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q9[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0] + |qt_rt_const_q8[QT_WIDTH-1:0]; + end + 10'b1111111111: //0 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q15[QT_WIDTH-1:0]; + end + 10'b1011111111: //-1 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q15[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q14[QT_WIDTH-1:0]; + end + 10'b1001111111://-2 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q14[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q13[QT_WIDTH-1:0]; + end + 10'b1000111111://-3 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q13[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q12[QT_WIDTH-1:0]; + end + 10'b1000011111://-4 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q12[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q11[QT_WIDTH-1:0]; + end + 10'b1000001111://-5 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q11[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q10[QT_WIDTH-1:0]; + end + 10'b1000000111://-6 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q10[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q9[QT_WIDTH-1:0]; + end + 10'b1000000011://-7 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q9[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q8[QT_WIDTH-1:0]; + end + 10'b1000000001://-8 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q8[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q7[QT_WIDTH-1:0]; + end + 10'b1000000000://-9 + begin + total_qt_rt_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q7[QT_WIDTH-1:0]; + total_qt_rt_minus_next[QT_WIDTH-1:0] = total_qt_rt_minus[QT_WIDTH-1:0] + |qt_rt_const_q6[QT_WIDTH-1:0]; + end + default : + begin + total_qt_rt_next[QT_WIDTH-1:0] = {QT_WIDTH{1'bx}}; + total_qt_rt_minus_next[QT_WIDTH-1:0] = {QT_WIDTH{1'bx}}; + end +endcase +// &CombEnd; @727 +end +//==================================================== +// remainder selection +//==================================================== +// &CombBeg; @731 +always @( cur_rem_2[60:0] + or remainder_shift[60:0] + or bound_cmp_sign[8:0] + or cur_rem_6[60:0] + or cur_rem_8[60:0] + or cur_rem_3[60:0] + or cur_rem_7[60:0] + or cur_rem_4[60:0] + or cur_rem_5[60:0] + or cur_rem_9[60:0] + or cur_rem_1[60:0]) +begin +case(bound_cmp_sign[8:0]) + 9'b111111111: cur_rem[REM_WIDTH-1:0] = remainder_shift[REM_WIDTH-1:0]; //0 + 9'b011111111: cur_rem[REM_WIDTH-1:0] = cur_rem_1[REM_WIDTH-1:0]; //+-1 + 9'b001111111: cur_rem[REM_WIDTH-1:0] = cur_rem_2[REM_WIDTH-1:0]; //+-2 + 9'b000111111: cur_rem[REM_WIDTH-1:0] = cur_rem_3[REM_WIDTH-1:0]; //+-3 + 9'b000011111: cur_rem[REM_WIDTH-1:0] = cur_rem_4[REM_WIDTH-1:0]; //+-4 + 9'b000001111: cur_rem[REM_WIDTH-1:0] = cur_rem_5[REM_WIDTH-1:0]; //+-5 + 9'b000000111: cur_rem[REM_WIDTH-1:0] = cur_rem_6[REM_WIDTH-1:0]; //+-6 + 9'b000000011: cur_rem[REM_WIDTH-1:0] = cur_rem_7[REM_WIDTH-1:0]; //+-7 + 9'b000000001: cur_rem[REM_WIDTH-1:0] = cur_rem_8[REM_WIDTH-1:0]; //+-8 + 9'b000000000: cur_rem[REM_WIDTH-1:0] = cur_rem_9[REM_WIDTH-1:0]; //+-9 + default : cur_rem[REM_WIDTH-1:0] = {REM_WIDTH{1'bx}}; +endcase +// &CombEnd; @745 +end +assign srt_remainder_nxt[REM_WIDTH-1:0] = cur_rem[REM_WIDTH-1:0]; +//assign srt_remainder_zero = ~|srt_remainder_nxt[REM_WIDTH-1:0]; +assign srt_remainder_sign = srt_remainder_nxt[REM_WIDTH-1]; + +//==================================================== +// remainder logic for integer VREM/VREMU inst +//==================================================== +always @(posedge srt_qt_rem_clk or negedge cpurst_b) +begin + if(!cpurst_b)begin + srt_remainder_minus[REM_WIDTH-1:0] <= {REM_WIDTH{1'b0}}; + end + else if(srt_sm_on)begin + srt_remainder_minus[REM_WIDTH-1:0] <= srt_remainder_minus_nxt[REM_WIDTH-1:0]; + end + else begin + srt_remainder_minus[REM_WIDTH-1:0] <= srt_remainder_minus[REM_WIDTH-1:0]; + end +end + +assign srt_remainder_out[REM_WIDTH-2:0] = srt_remainder[REM_WIDTH-1] ? srt_remainder_minus[REM_WIDTH-2:0] + : srt_remainder[REM_WIDTH-2:0]; + +assign remainder_minus_shift[REM_WIDTH-1:0] = {srt_remainder_minus[REM_WIDTH-5:0],4'b0}; +//csky vperl_off +assign rem_minus_minus_6[REM_WIDTH-1:0] = $unsigned($signed(remainder_minus_shift[REM_WIDTH-1:0]) + - $signed({div_qt_r4_rem_add_op1[REM_WIDTH-1:0]}) + - $signed({div_qt_r2_rem_add_op1[REM_WIDTH-1:0]})); +//assign rem_minus_minus_4[REM_WIDTH-1:0] = $unsigned($signed(remainder_minus_shift[REM_WIDTH-1:0]) +// - $signed({div_qt_r4_rem_add_op1[REM_WIDTH-1:0]})); +//csky vperl_on + +// &Force("nonport","rem_minus_minus_6"); @778 +// //&Force("nonport","rem_minus_minus_4"); @779 + +// here add for positive remainder calculation +assign remainder_minus_nor_nxt_0[REM_WIDTH-1:0] = rem_sign ? cur_rem_1[REM_WIDTH-1:0] + : remainder_minus_shift[REM_WIDTH-1:0]; +assign remainder_minus_nor_nxt_1[REM_WIDTH-1:0] = rem_sign ? cur_rem_2[REM_WIDTH-1:0] + : remainder_shift[REM_WIDTH-1:0]; +assign remainder_minus_nor_nxt_2[REM_WIDTH-1:0] = rem_sign ? cur_rem_3[REM_WIDTH-1:0] + : cur_rem_1[REM_WIDTH-1:0]; +assign remainder_minus_nor_nxt_3[REM_WIDTH-1:0] = rem_sign ? cur_rem_4[REM_WIDTH-1:0] + : cur_rem_2[REM_WIDTH-1:0]; +assign remainder_minus_nor_nxt_4[REM_WIDTH-1:0] = rem_sign ? cur_rem_5[REM_WIDTH-1:0] + : cur_rem_3[REM_WIDTH-1:0]; +assign remainder_minus_nor_nxt_5[REM_WIDTH-1:0] = rem_sign ? cur_rem_6[REM_WIDTH-1:0] + : cur_rem_4[REM_WIDTH-1:0]; +assign remainder_minus_nor_nxt_6[REM_WIDTH-1:0] = rem_sign ? cur_rem_7[REM_WIDTH-1:0] + : cur_rem_5[REM_WIDTH-1:0]; +assign remainder_minus_nor_nxt_7[REM_WIDTH-1:0] = rem_sign ? cur_rem_8[REM_WIDTH-1:0] + : cur_rem_6[REM_WIDTH-1:0]; +assign remainder_minus_nor_nxt_8[REM_WIDTH-1:0] = rem_sign ? cur_rem_9[REM_WIDTH-1:0] + : cur_rem_7[REM_WIDTH-1:0]; +assign remainder_minus_nor_nxt_9[REM_WIDTH-1:0] = rem_sign ? rem_minus_minus_6[REM_WIDTH-1:0] + : cur_rem_8[REM_WIDTH-1:0]; +// &CombBeg; @802 +always @( bound_cmp_sign[8:0] + or remainder_minus_nor_nxt_7[60:0] + or remainder_minus_nor_nxt_1[60:0] + or remainder_minus_nor_nxt_4[60:0] + or remainder_minus_nor_nxt_5[60:0] + or remainder_minus_nor_nxt_2[60:0] + or remainder_minus_nor_nxt_9[60:0] + or remainder_minus_nor_nxt_8[60:0] + or remainder_minus_nor_nxt_0[60:0] + or remainder_minus_nor_nxt_3[60:0] + or remainder_minus_nor_nxt_6[60:0]) +begin +case({bound_cmp_sign[8:0]}) + 9'b111111111: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_0[REM_WIDTH-1:0];//0 + 9'b011111111: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_1[REM_WIDTH-1:0];//+-1 + 9'b001111111: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_2[REM_WIDTH-1:0];//+-2 + 9'b000111111: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_3[REM_WIDTH-1:0];//+-3 + 9'b000011111: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_4[REM_WIDTH-1:0];//+-4 + 9'b000001111: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_5[REM_WIDTH-1:0];//+-5 + 9'b000000111: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_6[REM_WIDTH-1:0];//+-6 + 9'b000000011: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_7[REM_WIDTH-1:0];//+-7 + 9'b000000001: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_8[REM_WIDTH-1:0];//+-8 + 9'b000000000: srt_remainder_minus_nxt[REM_WIDTH-1:0] = remainder_minus_nor_nxt_9[REM_WIDTH-1:0];//+-9 + default : srt_remainder_minus_nxt[REM_WIDTH-1:0] = {REM_WIDTH{1'bx}}; +endcase +// &CombEnd; @816 +end + +// &ModuleEnd; @818 +endmodule + + diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v new file mode 100644 index 00000000..f8846255 --- /dev/null +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v @@ -0,0 +1,331 @@ +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// &Depend("cpu_cfig.h"); @22 +// &ModuleBeg; @23 +module ct_vfdsu_top( + cp0_vfpu_icg_en, + cp0_yy_clk_en, + cpurst_b, + dp_vfdsu_ex1_pipex_dst_ereg, + dp_vfdsu_ex1_pipex_dst_vreg, + dp_vfdsu_ex1_pipex_iid, + dp_vfdsu_ex1_pipex_imm0, + dp_vfdsu_ex1_pipex_sel, + dp_vfdsu_ex1_pipex_srcf0, + dp_vfdsu_ex1_pipex_srcf1, + dp_vfdsu_fdiv_gateclk_issue, + dp_vfdsu_idu_fdiv_issue, + forever_cpuclk, + idu_vfpu_rf_pipex_func, + idu_vfpu_rf_pipex_gateclk_sel, + pad_yy_icg_scan_en, + pipex_dp_vfdsu_ereg, + pipex_dp_vfdsu_ereg_data, + pipex_dp_vfdsu_freg_data, + pipex_dp_vfdsu_inst_vld, + pipex_dp_vfdsu_vreg, + rtu_yy_xx_flush, + vfdsu_dp_fdiv_busy, + vfdsu_dp_inst_wb_req, + vfdsu_ifu_debug_ex2_wait, + vfdsu_ifu_debug_idle, + vfdsu_ifu_debug_pipe_busy, + vfpu_yy_xx_dqnan, + vfpu_yy_xx_rm +); + +// &Ports; @24 +input cp0_vfpu_icg_en; +input cp0_yy_clk_en; +input cpurst_b; +input [4 :0] dp_vfdsu_ex1_pipex_dst_ereg; +input [6 :0] dp_vfdsu_ex1_pipex_dst_vreg; +input [6 :0] dp_vfdsu_ex1_pipex_iid; +input [2 :0] dp_vfdsu_ex1_pipex_imm0; +input dp_vfdsu_ex1_pipex_sel; +input [63:0] dp_vfdsu_ex1_pipex_srcf0; +input [63:0] dp_vfdsu_ex1_pipex_srcf1; +input dp_vfdsu_fdiv_gateclk_issue; +input dp_vfdsu_idu_fdiv_issue; +input forever_cpuclk; +input [19:0] idu_vfpu_rf_pipex_func; +input idu_vfpu_rf_pipex_gateclk_sel; +input pad_yy_icg_scan_en; +input rtu_yy_xx_flush; +input vfpu_yy_xx_dqnan; +input [2 :0] vfpu_yy_xx_rm; +output [4 :0] pipex_dp_vfdsu_ereg; +output [4 :0] pipex_dp_vfdsu_ereg_data; +output [63:0] pipex_dp_vfdsu_freg_data; +output pipex_dp_vfdsu_inst_vld; +output [6 :0] pipex_dp_vfdsu_vreg; +output vfdsu_dp_fdiv_busy; +output vfdsu_dp_inst_wb_req; +output vfdsu_ifu_debug_ex2_wait; +output vfdsu_ifu_debug_idle; +output vfdsu_ifu_debug_pipe_busy; + +// &Regs; @25 + +// &Wires; @26 +wire cp0_vfpu_icg_en; +wire cp0_yy_clk_en; +wire cpurst_b; +wire [4 :0] dp_vfdsu_ex1_pipex_dst_ereg; +wire [6 :0] dp_vfdsu_ex1_pipex_dst_vreg; +wire [6 :0] dp_vfdsu_ex1_pipex_iid; +wire [2 :0] dp_vfdsu_ex1_pipex_imm0; +wire dp_vfdsu_ex1_pipex_sel; +wire [63:0] dp_vfdsu_ex1_pipex_srcf0; +wire [63:0] dp_vfdsu_ex1_pipex_srcf1; +wire dp_vfdsu_fdiv_gateclk_issue; +wire dp_vfdsu_idu_fdiv_issue; +wire ex1_data_clk; +wire ex1_div; +wire ex1_double; +wire ex1_pipedown; +wire ex1_scalar; +wire ex1_single; +wire ex1_sqrt; +wire [63:0] ex1_src0; +wire [63:0] ex1_src1; +wire [2 :0] ex1_static_rm; +wire ex2_data_clk; +wire ex2_pipedown; +wire ex2_srt_first_round; +wire ex3_data_clk; +wire ex3_pipedown; +wire [4 :0] ex4_out_expt; +wire [63:0] ex4_out_result; +wire forever_cpuclk; +wire [19:0] idu_vfpu_rf_pipex_func; +wire idu_vfpu_rf_pipex_gateclk_sel; +wire pad_yy_icg_scan_en; +wire [4 :0] pipex_dp_vfdsu_ereg; +wire [4 :0] pipex_dp_vfdsu_ereg_data; +wire [63:0] pipex_dp_vfdsu_freg_data; +wire pipex_dp_vfdsu_inst_vld; +wire [6 :0] pipex_dp_vfdsu_vreg; +wire rtu_yy_xx_flush; +wire srt_ctrl_rem_zero; +wire srt_ctrl_skip_srt; +wire srt_secd_round; +wire srt_sm_on; +wire vfdsu_dp_fdiv_busy; +wire vfdsu_dp_inst_wb_req; +wire vfdsu_ex2_double; +wire vfdsu_ex2_single; +wire vfdsu_ifu_debug_ex2_wait; +wire vfdsu_ifu_debug_idle; +wire vfdsu_ifu_debug_pipe_busy; +wire vfpu_yy_xx_dqnan; +wire [2 :0] vfpu_yy_xx_rm; + +// &Instance("ct_vfdsu_ctrl"); @28 +// &Instance("ct_vfdsu_dp"); @29 +// &ConnRule(s/ex4_out/set0_doub_ex4/); @30 +// &ConnRule(s/srt_ctrl/set0_doub_srt_ctrl/); @31 +// &ConnRule(s/vfdsu_ex2_/dp_set0_double_ex2_/); @32 +// &ConnRule(s/slice_x/slice_0/); @33 +// &ConnRule(s/vfdsu_ex3_/dp_set0_double_ex3_/); @34 +// &ConnRule(s/vfdsu_ex4_/dp_set0_double_ex4_/); @35 +// &Instance("ct_vfdsu_double","x_ct_vfdsu_double_set0"); @36 +// &Connect(.ex1_src0(ex1_src0[63:0])); @37 +// &Connect(.ex1_src1(ex1_src1[63:0])); @38 +// &Connect(.ex1_double(set0_ex1_double)); @39 +// &Connect(.srt_secd_round(srt_secd_round[0])); @40 +// &Connect(.ex2_srt_first_round(ex2_srt_first_round[0])); @41 +// &ConnRule(s/ex4_out/set0_half0_ex4/); @43 +// &ConnRule(s/_pipedown/_half_pipedown/); @44 +// &ConnRule(s/srt_ctrl/set0_half0_srt_ctrl/); @45 +// &ConnRule(s/vfdsu_ex2_/dp_set0_half0_ex2_/); @46 +// &ConnRule(s/vfdsu_ex3_/dp_set0_half0_ex3_/); @47 +// &ConnRule(s/vfdsu_ex4_/dp_set0_half0_ex4_/); @48 +// &Instance("ct_vfdsu_half","x_ct_vfdsu_half0_set0"); @49 +// &Connect(.ex1_src0(ex1_src0[31:16])); @50 +// &Connect(.ex1_src1(ex1_src1[31:16])); @51 +// &Connect(.srt_secd_round(srt_secd_round[1])); @52 +// &Connect(.ex2_srt_first_round(ex2_srt_first_round[1])); @53 +// &ConnRule(s/ex4_out/set0_sing_ex4/); @56 +// &ConnRule(s/_pipedown/_sing_pipedown/); @57 +// &ConnRule(s/srt_ctrl/set0_sing_srt_ctrl/); @58 +// &ConnRule(s/slice_x/slice_0/); @59 +// &Instance("ct_vfdsu_single","x_ct_vfdsu_single_set0"); @60 +// &Connect(.ex1_src0(ex1_src0[63:32])); @61 +// &Connect(.ex1_src1(ex1_src1[63:32])); @62 +// &Connect(.srt_secd_round(srt_secd_round[1])); @63 +// &Connect(.ex2_srt_first_round(ex2_srt_first_round[1])); @64 +// &ConnRule(s/ex4_out/set0_half1_ex4/); @68 +// &ConnRule(s/_pipedown/_half_pipedown/); @69 +// &ConnRule(s/srt_ctrl/set0_half1_srt_ctrl/); @70 +// &ConnRule(s/vfdsu_ex2_/dp_set0_half1_ex2_/); @71 +// &ConnRule(s/vfdsu_ex3_/dp_set0_half1_ex3_/); @72 +// &ConnRule(s/vfdsu_ex4_/dp_set0_half1_ex4_/); @73 +// &Instance("ct_vfdsu_half","x_ct_vfdsu_half1_set0"); @74 +// &Connect(.ex1_src0(ex1_src0[63:48])); @75 +// &Connect(.ex1_src1(ex1_src1[63:48])); @76 +// &Connect(.srt_secd_round(srt_secd_round[1])); @77 +// &Connect(.ex2_srt_first_round(ex2_srt_first_round[1])); @78 +// &ConnRule(s/ex4_out/set1_doub_ex4/); @81 +// &ConnRule(s/srt_ctrl/set1_doub_srt_ctrl/); @82 +// &ConnRule(s/vfdsu_ex2_/dp_set1_double_ex2_/); @83 +// &ConnRule(s/slice_x/slice_1/); @84 +// &ConnRule(s/vfdsu_ex3_/dp_set1_double_ex3_/); @85 +// &ConnRule(s/vfdsu_ex4_/dp_set1_double_ex4_/); @86 +// &Instance("ct_vfdsu_double","x_ct_vfdsu_double_set1"); @87 +// &Connect(.ex1_src0(ex1_src0[127:64])); @88 +// &Connect(.ex1_src1(ex1_src1[127:64])); @89 +// &Connect(.ex1_double(set1_ex1_double)); @90 +// &Connect(.srt_secd_round(srt_secd_round[2])); @91 +// &Connect(.ex2_srt_first_round(ex2_srt_first_round[2])); @92 +// &ConnRule(s/ex4_out/set1_half0_ex4/); @95 +// &ConnRule(s/_pipedown/_half_pipedown/); @96 +// &ConnRule(s/srt_ctrl/set1_half0_srt_ctrl/); @97 +// &ConnRule(s/vfdsu_ex2_/dp_set1_half0_ex2_/); @98 +// &ConnRule(s/vfdsu_ex3_/dp_set1_half0_ex3_/); @99 +// &ConnRule(s/vfdsu_ex4_/dp_set1_half0_ex4_/); @100 +// &Instance("ct_vfdsu_half","x_ct_vfdsu_half0_set1"); @101 +// &Connect(.ex1_src0(ex1_src0[95:80])); @102 +// &Connect(.ex1_src1(ex1_src1[95:80])); @103 +// &Connect(.srt_secd_round(srt_secd_round[3])); @104 +// &Connect(.ex2_srt_first_round(ex2_srt_first_round[3])); @105 +// &ConnRule(s/ex4_out/set1_sing_ex4/); @108 +// &ConnRule(s/_pipedown/_sing_pipedown/); @109 +// &ConnRule(s/srt_ctrl/set1_sing_srt_ctrl/); @110 +// &ConnRule(s/slice_x/slice_1/); @111 +// &Instance("ct_vfdsu_single","x_ct_vfdsu_single_set1"); @112 +// &Connect(.ex1_src0(ex1_src0[127:96])); @113 +// &Connect(.ex1_src1(ex1_src1[127:96])); @114 +// &Connect(.srt_secd_round(srt_secd_round[3])); @115 +// &Connect(.ex2_srt_first_round(ex2_srt_first_round[3])); @116 +// &ConnRule(s/ex4_out/set1_half1_ex4/); @119 +// &ConnRule(s/_pipedown/_half_pipedown/); @120 +// &ConnRule(s/srt_ctrl/set1_half1_srt_ctrl/); @121 +// &ConnRule(s/vfdsu_ex2_/dp_set1_half1_ex2_/); @122 +// &ConnRule(s/vfdsu_ex3_/dp_set1_half1_ex3_/); @123 +// &ConnRule(s/vfdsu_ex4_/dp_set1_half1_ex4_/); @124 +// &Instance("ct_vfdsu_half","x_ct_vfdsu_half1_set1"); @125 +// &Connect(.ex1_src0(ex1_src0[127:112])); @126 +// &Connect(.ex1_src1(ex1_src1[127:112])); @127 +// &Connect(.srt_secd_round(srt_secd_round[3])); @128 +// &Connect(.ex2_srt_first_round(ex2_srt_first_round[3])); @129 +// &Instance("ct_vfdsu_ctrl"); @132 +ct_vfdsu_ctrl x_ct_vfdsu_ctrl ( + .cp0_vfpu_icg_en (cp0_vfpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .cpurst_b (cpurst_b ), + .dp_vfdsu_ex1_pipex_sel (dp_vfdsu_ex1_pipex_sel ), + .dp_vfdsu_fdiv_gateclk_issue (dp_vfdsu_fdiv_gateclk_issue), + .dp_vfdsu_idu_fdiv_issue (dp_vfdsu_idu_fdiv_issue ), + .ex1_data_clk (ex1_data_clk ), + .ex1_double (ex1_double ), + .ex1_pipedown (ex1_pipedown ), + .ex1_single (ex1_single ), + .ex2_data_clk (ex2_data_clk ), + .ex2_pipedown (ex2_pipedown ), + .ex2_srt_first_round (ex2_srt_first_round ), + .ex3_data_clk (ex3_data_clk ), + .ex3_pipedown (ex3_pipedown ), + .forever_cpuclk (forever_cpuclk ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .pipex_dp_vfdsu_inst_vld (pipex_dp_vfdsu_inst_vld ), + .rtu_yy_xx_flush (rtu_yy_xx_flush ), + .srt_ctrl_rem_zero (srt_ctrl_rem_zero ), + .srt_ctrl_skip_srt (srt_ctrl_skip_srt ), + .srt_secd_round (srt_secd_round ), + .srt_sm_on (srt_sm_on ), + .vfdsu_dp_fdiv_busy (vfdsu_dp_fdiv_busy ), + .vfdsu_dp_inst_wb_req (vfdsu_dp_inst_wb_req ), + .vfdsu_ex2_double (vfdsu_ex2_double ), + .vfdsu_ex2_single (vfdsu_ex2_single ), + .vfdsu_ifu_debug_ex2_wait (vfdsu_ifu_debug_ex2_wait ), + .vfdsu_ifu_debug_idle (vfdsu_ifu_debug_idle ), + .vfdsu_ifu_debug_pipe_busy (vfdsu_ifu_debug_pipe_busy ) +); + +// &Instance("ct_vfdsu_double"); @133 +ct_vfdsu_double x_ct_vfdsu_double ( + .cp0_vfpu_icg_en (cp0_vfpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .cpurst_b (cpurst_b ), + .ex1_div (ex1_div ), + .ex1_double (ex1_double ), + .ex1_pipedown (ex1_pipedown ), + .ex1_scalar (ex1_scalar ), + .ex1_single (ex1_single ), + .ex1_sqrt (ex1_sqrt ), + .ex1_src0 (ex1_src0 ), + .ex1_src1 (ex1_src1 ), + .ex1_static_rm (ex1_static_rm ), + .ex2_pipedown (ex2_pipedown ), + .ex2_srt_first_round (ex2_srt_first_round), + .ex3_pipedown (ex3_pipedown ), + .ex4_out_expt (ex4_out_expt ), + .ex4_out_result (ex4_out_result ), + .forever_cpuclk (forever_cpuclk ), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .srt_ctrl_rem_zero (srt_ctrl_rem_zero ), + .srt_ctrl_skip_srt (srt_ctrl_skip_srt ), + .srt_secd_round (srt_secd_round ), + .srt_sm_on (srt_sm_on ), + .vfpu_yy_xx_dqnan (vfpu_yy_xx_dqnan ), + .vfpu_yy_xx_rm (vfpu_yy_xx_rm ) +); + +// &Instance("ct_vfdsu_scalar_dp"); @134 +ct_vfdsu_scalar_dp x_ct_vfdsu_scalar_dp ( + .cp0_vfpu_icg_en (cp0_vfpu_icg_en ), + .cp0_yy_clk_en (cp0_yy_clk_en ), + .cpurst_b (cpurst_b ), + .dp_vfdsu_ex1_pipex_dst_ereg (dp_vfdsu_ex1_pipex_dst_ereg ), + .dp_vfdsu_ex1_pipex_dst_vreg (dp_vfdsu_ex1_pipex_dst_vreg ), + .dp_vfdsu_ex1_pipex_iid (dp_vfdsu_ex1_pipex_iid ), + .dp_vfdsu_ex1_pipex_imm0 (dp_vfdsu_ex1_pipex_imm0 ), + .dp_vfdsu_ex1_pipex_srcf0 (dp_vfdsu_ex1_pipex_srcf0 ), + .dp_vfdsu_ex1_pipex_srcf1 (dp_vfdsu_ex1_pipex_srcf1 ), + .ex1_data_clk (ex1_data_clk ), + .ex1_div (ex1_div ), + .ex1_double (ex1_double ), + .ex1_pipedown (ex1_pipedown ), + .ex1_scalar (ex1_scalar ), + .ex1_single (ex1_single ), + .ex1_sqrt (ex1_sqrt ), + .ex1_src0 (ex1_src0 ), + .ex1_src1 (ex1_src1 ), + .ex1_static_rm (ex1_static_rm ), + .ex2_data_clk (ex2_data_clk ), + .ex2_pipedown (ex2_pipedown ), + .ex3_data_clk (ex3_data_clk ), + .ex3_pipedown (ex3_pipedown ), + .ex4_out_expt (ex4_out_expt ), + .ex4_out_result (ex4_out_result ), + .forever_cpuclk (forever_cpuclk ), + .idu_vfpu_rf_pipex_func (idu_vfpu_rf_pipex_func ), + .idu_vfpu_rf_pipex_gateclk_sel (idu_vfpu_rf_pipex_gateclk_sel), + .pad_yy_icg_scan_en (pad_yy_icg_scan_en ), + .pipex_dp_vfdsu_ereg (pipex_dp_vfdsu_ereg ), + .pipex_dp_vfdsu_ereg_data (pipex_dp_vfdsu_ereg_data ), + .pipex_dp_vfdsu_freg_data (pipex_dp_vfdsu_freg_data ), + .pipex_dp_vfdsu_vreg (pipex_dp_vfdsu_vreg ), + .vfdsu_ex2_double (vfdsu_ex2_double ), + .vfdsu_ex2_single (vfdsu_ex2_single ) +); + + +// &ModuleEnd; @137 +endmodule + + diff --git a/vendor/openc910/LICENSE b/vendor/openc910/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/vendor/openc910/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/openc910/README.md b/vendor/openc910/README.md new file mode 100644 index 00000000..c4febe77 --- /dev/null +++ b/vendor/openc910/README.md @@ -0,0 +1,74 @@ +# IP Readme + + Welcome to C910! Some key directories are shown below. +``` +|--C910_RTL_FACTORY/ + |--gen_rtl/ ## Verilog source code of C910 + |--setup/ ## Script to set the environment variables +|--smart_run/ ## RTL simulation environment + |--impl/ ## SDC file, scripts and file lists for implementation + |--logical/ ## SoC demo and test bench to run the simulation + |--setup/ ## GNU tool chain setting + |--tests/ ## Test driver and test cases + |--work/ ## Working directory for builds + |--Makefile ## Makefile for building and running sim targets +|--doc/ ## The user and integration manual of C910 +``` + + +## Usage + + Step1: Get Started + +``` +$ cd C910_RTL_FACTORY +$ source setup/setup.csh +$ cd ../smart_run +$ make help +To gain more information about how to use smart testbench. +``` + + Step2: Download and install C/C++ Compiler + +``` +You can download the GNU tool chain compiled by T-HEAD from the url below: +https://occ.t-head.cn/community/download?id=3948120165480468480 + +$ cd ./smart_run +GNU tool chain (specific riscv version) must be installed and specified before +compiling *.c/*.v tests of the smart environment. Please refer to the following +setup file about how to specify it: + ./smart_run/setup/example_setup.csh +``` + + +## Notes + +``` +The testbench supports Verilator(version is better newer than 4.215),iverilog, vcs and irun to run simulation and you can use Gtkwave or verdi +to open the waveform under ./smart_run/work/ directory. + +You can get the debugger, IDE and SDK from the url:https://occ.t-head.cn/community/download?id=575997419775328256 +``` + + +## Discussion + If you are interested in participating in discussions or improving the "openXuantie" cores, you can scan the DingDing QR code below to join the discussion group. + + + +/*Copyright 2019-2021 T-Head Semiconductor Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +*/ From 97bfe0095c4e0cb8b6fd117987f04b11ff56eb1b Mon Sep 17 00:00:00 2001 From: Luca Bertaccini <55843305+lucabertaccini@users.noreply.github.com> Date: Fri, 7 Jun 2024 14:43:10 +0200 Subject: [PATCH 3/8] Fix synchronization of THMULTI DivSqrt lanes when FP16ALT, FP8, or FP8ALT are enabled (#9) * Fix synchronization of THMULTI DivSqrt lanes when FP16ALT, FP8 or FP8ALT are enabled * Update CHANGELOG-PULP.md --- docs/CHANGELOG-PULP.md | 5 +++++ src/fpnew_opgroup_multifmt_slice.sv | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/CHANGELOG-PULP.md b/docs/CHANGELOG-PULP.md index cd09eda5..44e1432c 100644 --- a/docs/CHANGELOG-PULP.md +++ b/docs/CHANGELOG-PULP.md @@ -7,6 +7,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a In this sense, we interpret the "Public API" of a hardware module as its port/parameter list. Versions of the IP in the same major relase are "pin-compatible" with each other. Minor relases are permitted to add new parameters as long as their default bindings ensure backwards compatibility. +## [pulp-v0.2.1] - 2024-06-07 + +### Fix +- Fix synchronization of THMULTI DivSqrt lanes when FP16ALT, FP8, or FP8ALT are enabled. + ## [pulp-v0.2.0] - 2024-05-29 ### Added diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 6b5545c5..ff6f1a14 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -628,10 +628,10 @@ or on 16b inputs producing 32b outputs"); if ((DivSqrtSel != fpnew_pkg::TH32) && (OpGroup == fpnew_pkg::DIVSQRT)) begin // Synch lanes if there is more than one - assign simd_synch_rdy = EnableVectors ? &divsqrt_ready : divsqrt_ready[0]; - assign simd_synch_done = EnableVectors ? &divsqrt_done : divsqrt_done[0]; + assign simd_synch_rdy = EnableVectors ? &divsqrt_ready[NUM_DIVSQRT_LANES-1:0] : divsqrt_ready[0]; + assign simd_synch_done = EnableVectors ? &divsqrt_done[NUM_DIVSQRT_LANES-1:0] : divsqrt_done[0]; end else begin - // Unused (alternative divider only supported for scalar FP32 divsqrt) + // Unused (TH32 divider only supported for scalar FP32 divsqrt) assign simd_synch_rdy = '0; assign simd_synch_done = '0; end From d30aecff395618cb0608ca8896e19425b77a3420 Mon Sep 17 00:00:00 2001 From: Luca Bertaccini <55843305+lucabertaccini@users.noreply.github.com> Date: Wed, 26 Jun 2024 12:26:43 +0200 Subject: [PATCH 4/8] Add FP16ALT support to THMULTI DivSqrt (#12) * Add FP16ALT support to THMULTI DivSqrt --- docs/CHANGELOG-PULP.md | 5 + docs/README.md | 2 +- src/fpnew_divsqrt_th_64_multi.sv | 39 +- src/fpnew_opgroup_multifmt_slice.sv | 4 +- src/fpnew_pkg.sv | 4 +- vendor/openc910.vendor.hjson | 2 + .../gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v | 21 +- .../gen_rtl/vfdsu/rtl/ct_vfdsu_double.v | 29 + .../gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v | 64 +- .../gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v | 96 +- .../gen_rtl/vfdsu/rtl/ct_vfdsu_round.v | 151 +- .../gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v | 24 +- .../gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v | 86 +- .../gen_rtl/vfdsu/rtl/ct_vfdsu_top.v | 16 +- ...6ALT-support-to-THMULTI-DivSqrt-unit.patch | 1359 +++++++++++++++++ 15 files changed, 1811 insertions(+), 91 deletions(-) create mode 100644 vendor/patches/openc910/0001-Add-FP16ALT-support-to-THMULTI-DivSqrt-unit.patch diff --git a/docs/CHANGELOG-PULP.md b/docs/CHANGELOG-PULP.md index 44e1432c..94d245be 100644 --- a/docs/CHANGELOG-PULP.md +++ b/docs/CHANGELOG-PULP.md @@ -7,6 +7,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a In this sense, we interpret the "Public API" of a hardware module as its port/parameter list. Versions of the IP in the same major relase are "pin-compatible" with each other. Minor relases are permitted to add new parameters as long as their default bindings ensure backwards compatibility. +## [pulp-v0.2.2] - 2024-06-24 + +### Added +- Add FP16ALT support to THMULTI DivSqrt + ## [pulp-v0.2.1] - 2024-06-07 ### Fix diff --git a/docs/README.md b/docs/README.md index dd8a0e9b..f00fb3b5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -366,7 +366,7 @@ It is of type `divsqrt_unit_t`, which is defined as: typedef enum logic[1:0] { PULP, // "PULP" instantiates the PULP DivSqrt unit supports FP64, FP32, FP16, FP16ALT, FP8 and SIMD operations TH32, // "TH32" instantiates the E906 DivSqrt unit supports only FP32 (no SIMD support) - THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16 and SIMD operations + THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16, FP16ALT and SIMD operations } divsqrt_unit_t; ``` diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index eff0620d..a15878af 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -144,31 +144,34 @@ module fpnew_divsqrt_th_64_multi #( // ----------------- // Input processing // ----------------- - logic [1:0] divsqrt_fmt; + logic [3:0] divsqrt_fmt; // Translate fpnew formats into divsqrt formats if(WIDTH == 64) begin : translate_fmt_64_bits always_comb begin : translate_fmt unique case (dst_fmt_q) - fpnew_pkg::FP64: divsqrt_fmt = 2'b10; - fpnew_pkg::FP32: divsqrt_fmt = 2'b01; - fpnew_pkg::FP16: divsqrt_fmt = 2'b00; - default: divsqrt_fmt = 2'b10; // 64 bit max width + fpnew_pkg::FP64: divsqrt_fmt = 4'b1000; + fpnew_pkg::FP32: divsqrt_fmt = 4'b0100; + fpnew_pkg::FP16: divsqrt_fmt = 4'b0010; + fpnew_pkg::FP16ALT: divsqrt_fmt = 4'b0001; + default: divsqrt_fmt = 4'b1000; // 64 bit max width endcase end end else if(WIDTH == 32) begin : translate_fmt_32_bits always_comb begin : translate_fmt unique case (dst_fmt_q) - fpnew_pkg::FP32: divsqrt_fmt = 2'b01; - fpnew_pkg::FP16: divsqrt_fmt = 2'b00; - default: divsqrt_fmt = 2'b01; // 32 bit max width + fpnew_pkg::FP32: divsqrt_fmt = 4'b0100; + fpnew_pkg::FP16: divsqrt_fmt = 4'b0010; + fpnew_pkg::FP16ALT: divsqrt_fmt = 4'b0001; + default: divsqrt_fmt = 4'b0100; // 32 bit max width endcase end end else if(WIDTH == 16) begin : translate_fmt_16_bits always_comb begin : translate_fmt unique case (dst_fmt_q) - fpnew_pkg::FP16: divsqrt_fmt = 2'b00; - default: divsqrt_fmt = 2'b00; // 16 bit max width + fpnew_pkg::FP16: divsqrt_fmt = 4'b0010; + fpnew_pkg::FP16ALT: divsqrt_fmt = 4'b0001; + default: divsqrt_fmt = 4'b0010; // 16 bit max width endcase end end else begin @@ -298,7 +301,7 @@ module fpnew_divsqrt_th_64_multi #( // Regs to save current instruction fpnew_pkg::roundmode_e rm_q; - logic[1:0] divsqrt_fmt_q; + logic[3:0] divsqrt_fmt_q; fpnew_pkg::operation_e divsqrt_op_q; logic div_op, sqrt_op; logic [WIDTH-1:0] srcf0_q, srcf1_q; @@ -314,15 +317,15 @@ module fpnew_divsqrt_th_64_multi #( // NaN-box inputs with max WIDTH if(WIDTH == 64) begin : gen_fmt_64_bits always_comb begin : NaN_box_inputs - if(divsqrt_fmt_q == 2'b10) begin // 64-bit + if(divsqrt_fmt_q == 4'b1000) begin // 64-bit srcf0[63:0] = srcf0_q[63:0]; srcf1[63:0] = srcf1_q[63:0]; - end else if(divsqrt_fmt_q == 2'b01) begin // 32-bit + end else if(divsqrt_fmt_q == 4'b0100) begin // 32-bit srcf0[63:32] = '1; srcf1[63:32] = '1; srcf0[31:0] = srcf0_q[31:0]; srcf1[31:0] = srcf1_q[31:0]; - end else if(divsqrt_fmt_q == 2'b00) begin //16-bit + end else if((divsqrt_fmt_q == 4'b0010) || (divsqrt_fmt_q == 4'b0001)) begin //16-bit srcf0[63:16] = '1; srcf1[63:16] = '1; srcf0[15:0] = srcf0_q[15:0]; @@ -334,12 +337,12 @@ module fpnew_divsqrt_th_64_multi #( end end else if (WIDTH == 32) begin : gen_fmt_32_bits always_comb begin : NaN_box_inputs - if(divsqrt_fmt_q == 2'b01) begin // 32-bit + if(divsqrt_fmt_q == 4'b0100) begin // 32-bit srcf0[63:32] = '1; srcf1[63:32] = '1; srcf0[31:0] = srcf0_q[31:0]; srcf1[31:0] = srcf1_q[31:0]; - end else if(divsqrt_fmt_q == 2'b00) begin // 16-bit + end else if((divsqrt_fmt_q == 4'b0010) || (divsqrt_fmt_q == 4'b0001)) begin // 16-bit srcf0[63:16] = '1; srcf1[63:16] = '1; srcf0[15:0] = srcf0_q[15:0]; @@ -351,7 +354,7 @@ module fpnew_divsqrt_th_64_multi #( end end else if (WIDTH == 16) begin : gen_fmt_16_bits always_comb begin : NaN_box_inputs - if(divsqrt_fmt_q == 2'b00) begin // 16-bit + if((divsqrt_fmt_q == 4'b0010) || (divsqrt_fmt_q == 4'b0001)) begin // 16-bit srcf0[63:16] = '1; srcf1[63:16] = '1; srcf0[15:0] = srcf0_q[15:0]; @@ -390,7 +393,7 @@ module fpnew_divsqrt_th_64_multi #( .dp_vfdsu_fdiv_gateclk_issue ( 1'b1 ), // Local clock enable (same as above) .dp_vfdsu_idu_fdiv_issue ( op_starting ), // 1. Issue fdiv (FSM in ctrl) .forever_cpuclk ( clk_i ), // Clock input - .idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 13'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0) + .idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 11'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0) .idu_vfpu_rf_pipex_gateclk_sel ( func_sel ), // 2. Select func .pad_yy_icg_scan_en ( 1'b0 ), // SE signal for the redundant clock gating module .rtu_yy_xx_flush ( flush_i ), // Flush diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index ff6f1a14..f5991cbd 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -68,9 +68,9 @@ module fpnew_opgroup_multifmt_slice #( if ((DivSqrtSel == fpnew_pkg::TH32) && !((FpFmtConfig[0] == 1) && (FpFmtConfig[1:NUM_FORMATS-1] == '0))) begin $fatal(1, "T-Head-based DivSqrt unit supported only in FP32-only configurations. \ Set DivSqrtSel = THMULTI or DivSqrtSel = PULP to use a multi-format divider"); - end else if ((DivSqrtSel == fpnew_pkg::THMULTI) && (FpFmtConfig[3] == 1'b1 || FpFmtConfig[4] == 1'b1 || FpFmtConfig[5] == 1'b1)) begin + end else if ((DivSqrtSel == fpnew_pkg::THMULTI) && (FpFmtConfig[3] == 1'b1 || FpFmtConfig[5] == 1'b1)) begin $warning("The DivSqrt unit of C910 (instantiated by DivSqrtSel = THMULTI) does not support \ -FP16alt, FP8, FP8alt. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP16alt, FP8, FP8alt."); +FP8, FP8alt. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8, FP8alt."); end end diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv index 42d0df6b..1e8ce099 100644 --- a/src/fpnew_pkg.sv +++ b/src/fpnew_pkg.sv @@ -136,7 +136,7 @@ package fpnew_pkg; typedef enum logic[1:0] { PULP, // "PULP" instantiates the PULP DivSqrt unit supports FP64, FP32, FP16, FP16ALT, FP8 and SIMD operations TH32, // "TH32" instantiates the E906 DivSqrt unit supports only FP32 (no SIMD support) - THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16 and SIMD operations + THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16, FP16ALT and SIMD operations } divsqrt_unit_t; // ------------------- @@ -454,7 +454,7 @@ package fpnew_pkg; // Returns the maximum number of lanes in the FPU according to width, format config and vectors function automatic int unsigned num_divsqrt_lanes(int unsigned width, fmt_logic_t cfg, logic vec, divsqrt_unit_t DivSqrtSel); automatic fmt_logic_t cfg_tmp; - cfg_tmp = (DivSqrtSel == THMULTI) ? cfg & 6'b111000 : cfg; + cfg_tmp = (DivSqrtSel == THMULTI) ? cfg & 6'b111010 : cfg; return vec ? width / min_fp_width(cfg_tmp) : 1; // if no vectors, only one lane endfunction diff --git a/vendor/openc910.vendor.hjson b/vendor/openc910.vendor.hjson index ddaa644f..356121b0 100644 --- a/vendor/openc910.vendor.hjson +++ b/vendor/openc910.vendor.hjson @@ -10,6 +10,8 @@ rev: "e0c4ad8ec7f8c70f649d826ebd6c949086453272" } + patch_dir: "patches/openc910" + exclude_from_upstream: [ "doc", "smart_run", diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v index f7f541f2..0aba4f1c 100644 --- a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v @@ -26,6 +26,8 @@ module ct_vfdsu_ctrl( ex1_double, ex1_pipedown, ex1_single, + ex1_half, + ex1_bfloat, ex2_data_clk, ex2_pipedown, ex2_srt_first_round, @@ -43,6 +45,8 @@ module ct_vfdsu_ctrl( vfdsu_dp_inst_wb_req, vfdsu_ex2_double, vfdsu_ex2_single, + vfdsu_ex2_half, + vfdsu_ex2_bfloat, vfdsu_ifu_debug_ex2_wait, vfdsu_ifu_debug_idle, vfdsu_ifu_debug_pipe_busy @@ -57,6 +61,8 @@ input dp_vfdsu_fdiv_gateclk_issue; input dp_vfdsu_idu_fdiv_issue; input ex1_double; input ex1_single; +input ex1_half; +input ex1_bfloat; input forever_cpuclk; input pad_yy_icg_scan_en; input rtu_yy_xx_flush; @@ -64,6 +70,8 @@ input srt_ctrl_rem_zero; input srt_ctrl_skip_srt; input vfdsu_ex2_double; input vfdsu_ex2_single; +input vfdsu_ex2_half; +input vfdsu_ex2_bfloat; output ex1_data_clk; output ex1_pipedown; output ex2_data_clk; @@ -106,6 +114,8 @@ wire ex1_data_clk_en; wire ex1_double; wire ex1_pipedown; wire ex1_single; +wire ex1_half; +wire ex1_bfloat; wire ex2_data_clk; wire ex2_data_clk_en; wire ex2_pipe_clk; @@ -137,6 +147,8 @@ wire vfdsu_dp_fdiv_busy; wire vfdsu_dp_inst_wb_req; wire vfdsu_ex2_double; wire vfdsu_ex2_single; +wire vfdsu_ex2_half; +wire vfdsu_ex2_bfloat; wire vfdsu_ex2_vld; wire vfdsu_ifu_debug_ex2_wait; wire vfdsu_ifu_debug_idle; @@ -244,8 +256,9 @@ end //For Double, initial is 5'b11100('d28), calculate 29 round //For Single, initial is 5'b01110('d14), calculate 15 round assign srt_cnt_ini[4:0] = (ex1_double) ? 5'b01101 : - ex1_single ? 5'b00110 - : 5'b00011; + (ex1_single) ? 5'b00110 : + (ex1_half) ? 5'b00011 + : 5'b00010; //vfdsu ex2 pipedown signal assign ex2_pipedown = srt_last_round && div_st_ex2; @@ -277,7 +290,9 @@ assign srt_secd_round = ex2_srt_secd_round; assign ex2_srt_secd_round_pre = srt_sm_on && srt_secd_round_pre; assign srt_secd_round_pre = vfdsu_ex2_double ? srt_cnt[4:0]==5'b01101 : - vfdsu_ex2_single ? srt_cnt[4:0]==5'b00110 : srt_cnt[4:0] == 5'b00011; + vfdsu_ex2_single ? srt_cnt[4:0]==5'b00110 : + vfdsu_ex2_half ? srt_cnt[4:0]==5'b00011 + : srt_cnt[4:0]==5'b00010; //========================================================== // EX3 Stage Control Signal diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v index b57e289e..ccd34f9c 100644 --- a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v @@ -24,6 +24,8 @@ module ct_vfdsu_double( ex1_pipedown, ex1_scalar, ex1_single, + ex1_half, + ex1_bfloat, ex1_sqrt, ex1_src0, ex1_src1, @@ -52,6 +54,8 @@ input ex1_double; input ex1_pipedown; input ex1_scalar; input ex1_single; +input ex1_half; +input ex1_bfloat; input ex1_sqrt; input [63:0] ex1_src0; input [63:0] ex1_src1; @@ -83,6 +87,8 @@ wire ex1_pipedown; wire [59:0] ex1_remainder; wire ex1_scalar; wire ex1_single; +wire ex1_half; +wire ex1_bfloat; wire ex1_sqrt; wire [63:0] ex1_src0; wire [63:0] ex1_src1; @@ -116,12 +122,15 @@ wire vfdsu_ex2_result_sign; wire vfdsu_ex2_result_zero; wire [2 :0] vfdsu_ex2_rm; wire vfdsu_ex2_single; +wire vfdsu_ex2_half; +wire vfdsu_ex2_bfloat; wire vfdsu_ex2_sqrt; wire vfdsu_ex2_srt_skip; wire [12:0] vfdsu_ex3_doub_expnt_rst; wire vfdsu_ex3_double; wire vfdsu_ex3_dz; wire [12:0] vfdsu_ex3_half_expnt_rst; +wire [12:0] vfdsu_ex3_bfloat_expnt_rst; wire vfdsu_ex3_id_srt_skip; wire vfdsu_ex3_nv; wire vfdsu_ex3_of; @@ -141,6 +150,8 @@ wire [2 :0] vfdsu_ex3_rm; wire vfdsu_ex3_rslt_denorm; wire [8 :0] vfdsu_ex3_sing_expnt_rst; wire vfdsu_ex3_single; +wire vfdsu_ex3_half; +wire vfdsu_ex3_bfloat; wire vfdsu_ex3_uf; wire vfdsu_ex4_denorm_to_tiny_frac; wire vfdsu_ex4_double; @@ -164,6 +175,8 @@ wire vfdsu_ex4_result_sign; wire vfdsu_ex4_result_zero; wire vfdsu_ex4_rslt_denorm; wire vfdsu_ex4_single; +wire vfdsu_ex4_half; +wire vfdsu_ex4_bfloat; wire vfdsu_ex4_uf; wire vfpu_yy_xx_dqnan; wire [2 :0] vfpu_yy_xx_rm; @@ -181,6 +194,8 @@ ct_vfdsu_prepare x_ct_vfdsu_prepare ( .ex1_remainder (ex1_remainder ), .ex1_scalar (ex1_scalar ), .ex1_single (ex1_single ), + .ex1_half (ex1_half ), + .ex1_bfloat (ex1_bfloat ), .ex1_sqrt (ex1_sqrt ), .ex1_src0 (ex1_src0 ), .ex1_src1 (ex1_src1 ), @@ -204,6 +219,8 @@ ct_vfdsu_prepare x_ct_vfdsu_prepare ( .vfdsu_ex2_result_zero (vfdsu_ex2_result_zero), .vfdsu_ex2_rm (vfdsu_ex2_rm ), .vfdsu_ex2_single (vfdsu_ex2_single ), + .vfdsu_ex2_half (vfdsu_ex2_half ), + .vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ), .vfdsu_ex2_sqrt (vfdsu_ex2_sqrt ), .vfdsu_ex2_srt_skip (vfdsu_ex2_srt_skip ), .vfpu_yy_xx_dqnan (vfpu_yy_xx_dqnan ), @@ -246,12 +263,15 @@ ct_vfdsu_srt x_ct_vfdsu_srt ( .vfdsu_ex2_result_zero (vfdsu_ex2_result_zero ), .vfdsu_ex2_rm (vfdsu_ex2_rm ), .vfdsu_ex2_single (vfdsu_ex2_single ), + .vfdsu_ex2_half (vfdsu_ex2_half ), + .vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ), .vfdsu_ex2_sqrt (vfdsu_ex2_sqrt ), .vfdsu_ex2_srt_skip (vfdsu_ex2_srt_skip ), .vfdsu_ex3_doub_expnt_rst (vfdsu_ex3_doub_expnt_rst ), .vfdsu_ex3_double (vfdsu_ex3_double ), .vfdsu_ex3_dz (vfdsu_ex3_dz ), .vfdsu_ex3_half_expnt_rst (vfdsu_ex3_half_expnt_rst ), + .vfdsu_ex3_bfloat_expnt_rst (vfdsu_ex3_bfloat_expnt_rst ), .vfdsu_ex3_id_srt_skip (vfdsu_ex3_id_srt_skip ), .vfdsu_ex3_nv (vfdsu_ex3_nv ), .vfdsu_ex3_of (vfdsu_ex3_of ), @@ -271,6 +291,8 @@ ct_vfdsu_srt x_ct_vfdsu_srt ( .vfdsu_ex3_rslt_denorm (vfdsu_ex3_rslt_denorm ), .vfdsu_ex3_sing_expnt_rst (vfdsu_ex3_sing_expnt_rst ), .vfdsu_ex3_single (vfdsu_ex3_single ), + .vfdsu_ex3_half (vfdsu_ex3_half ), + .vfdsu_ex3_bfloat (vfdsu_ex3_bfloat ), .vfdsu_ex3_uf (vfdsu_ex3_uf ) ); @@ -288,6 +310,7 @@ ct_vfdsu_round x_ct_vfdsu_round ( .vfdsu_ex3_double (vfdsu_ex3_double ), .vfdsu_ex3_dz (vfdsu_ex3_dz ), .vfdsu_ex3_half_expnt_rst (vfdsu_ex3_half_expnt_rst ), + .vfdsu_ex3_bfloat_expnt_rst (vfdsu_ex3_bfloat_expnt_rst ), .vfdsu_ex3_id_srt_skip (vfdsu_ex3_id_srt_skip ), .vfdsu_ex3_nv (vfdsu_ex3_nv ), .vfdsu_ex3_of (vfdsu_ex3_of ), @@ -307,6 +330,8 @@ ct_vfdsu_round x_ct_vfdsu_round ( .vfdsu_ex3_rslt_denorm (vfdsu_ex3_rslt_denorm ), .vfdsu_ex3_sing_expnt_rst (vfdsu_ex3_sing_expnt_rst ), .vfdsu_ex3_single (vfdsu_ex3_single ), + .vfdsu_ex3_half (vfdsu_ex3_half ), + .vfdsu_ex3_bfloat (vfdsu_ex3_bfloat ), .vfdsu_ex3_uf (vfdsu_ex3_uf ), .vfdsu_ex4_denorm_to_tiny_frac (vfdsu_ex4_denorm_to_tiny_frac ), .vfdsu_ex4_double (vfdsu_ex4_double ), @@ -330,6 +355,8 @@ ct_vfdsu_round x_ct_vfdsu_round ( .vfdsu_ex4_result_zero (vfdsu_ex4_result_zero ), .vfdsu_ex4_rslt_denorm (vfdsu_ex4_rslt_denorm ), .vfdsu_ex4_single (vfdsu_ex4_single ), + .vfdsu_ex4_half (vfdsu_ex4_half ), + .vfdsu_ex4_bfloat (vfdsu_ex4_bfloat ), .vfdsu_ex4_uf (vfdsu_ex4_uf ) ); @@ -359,6 +386,8 @@ ct_vfdsu_pack x_ct_vfdsu_pack ( .vfdsu_ex4_result_zero (vfdsu_ex4_result_zero ), .vfdsu_ex4_rslt_denorm (vfdsu_ex4_rslt_denorm ), .vfdsu_ex4_single (vfdsu_ex4_single ), + .vfdsu_ex4_half (vfdsu_ex4_half ), + .vfdsu_ex4_bfloat (vfdsu_ex4_bfloat ), .vfdsu_ex4_uf (vfdsu_ex4_uf ) ); diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v index e1d2e18a..681b77aa 100644 --- a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v @@ -39,6 +39,8 @@ module ct_vfdsu_pack( vfdsu_ex4_result_zero, vfdsu_ex4_rslt_denorm, vfdsu_ex4_single, + vfdsu_ex4_half, + vfdsu_ex4_bfloat, vfdsu_ex4_uf ); @@ -65,6 +67,8 @@ input vfdsu_ex4_result_sign; input vfdsu_ex4_result_zero; input vfdsu_ex4_rslt_denorm; input vfdsu_ex4_single; +input vfdsu_ex4_half; +input vfdsu_ex4_bfloat; input vfdsu_ex4_uf; output [4 :0] ex4_out_expt; output [63:0] ex4_out_result; @@ -73,6 +77,7 @@ output [63:0] ex4_out_result; reg [51:0] ex4_denorm_frac; reg [51:0] ex4_frac_52; reg [51:0] ex4_half_denorm_frac; +reg [51:0] ex4_bfloat_denorm_frac; reg [63:0] ex4_out_result; reg [51:0] ex4_single_denorm_frac; reg [12:0] expnt_add_op1; @@ -95,6 +100,11 @@ wire [63:0] ex4_half_rst0; wire [63:0] ex4_half_rst_inf; wire [63:0] ex4_half_rst_norm; wire [63:0] ex4_half_rst_qnan; +wire [63:0] ex4_bfloat_lfn; +wire [63:0] ex4_bfloat_rst0; +wire [63:0] ex4_bfloat_rst_inf; +wire [63:0] ex4_bfloat_rst_norm; +wire [63:0] ex4_bfloat_rst_qnan; wire ex4_of_plus; wire [4 :0] ex4_out_expt; wire ex4_result_inf; @@ -134,6 +144,8 @@ wire vfdsu_ex4_result_sign; wire vfdsu_ex4_result_zero; wire vfdsu_ex4_rslt_denorm; wire vfdsu_ex4_single; +wire vfdsu_ex4_half; +wire vfdsu_ex4_bfloat; wire vfdsu_ex4_uf; @@ -277,6 +289,23 @@ endcase // &CombEnd; @147 end +always @( vfdsu_ex4_expnt_rst[12:0] + or ex4_frac[54:1] + or vfdsu_ex4_denorm_to_tiny_frac) +begin +case(vfdsu_ex4_expnt_rst[12:0]) + 13'h1: ex4_bfloat_denorm_frac[51:0] = { ex4_frac[52:1]}; //-1022 1 + 13'h0: ex4_bfloat_denorm_frac[51:0] = { ex4_frac[53:2]}; //-1023 0 + 13'h1fff:ex4_bfloat_denorm_frac[51:0] = { ex4_frac[54:3]}; //-1024 -1 + 13'h1ffe:ex4_bfloat_denorm_frac[51:0] = {1'b0, ex4_frac[54:4]}; //-1025 -2 + 13'h1ffd:ex4_bfloat_denorm_frac[51:0] = {2'b0, ex4_frac[54:5]}; //-1026 -3 + 13'h1ffc:ex4_bfloat_denorm_frac[51:0] = {3'b0, ex4_frac[54:6]}; //-1027 -4 + 13'h1ffb:ex4_bfloat_denorm_frac[51:0] = {4'b0, ex4_frac[54:7]}; //-1028 -5 + 13'h1ffa:ex4_bfloat_denorm_frac[51:0] = {5'b0, ex4_frac[54:8]}; //-1029 -6 + default :ex4_bfloat_denorm_frac[51:0] = vfdsu_ex4_denorm_to_tiny_frac ?{7'b1,45'b0} : 52'b0; //-1045 +endcase +end + //here when denormal number round to add1, it will become normal number assign ex4_denorm_potnt_norm = (vfdsu_ex4_potnt_norm[1] && ex4_frac[53]) || (vfdsu_ex4_potnt_norm[0] && ex4_frac[54]) ; @@ -286,9 +315,11 @@ assign ex4_rslt_denorm = !vfdsu_ex4_result_qnan assign ex4_denorm_result[63:0] = vfdsu_ex4_double ? {vfdsu_ex4_result_sign,11'h0,ex4_denorm_frac[51:0]} : vfdsu_ex4_single ? {32'hffffffff,vfdsu_ex4_result_sign, - 8'h0,ex4_single_denorm_frac[51:29]} : { - 48'hffffffffffff,vfdsu_ex4_result_sign,5'h0, - ex4_half_denorm_frac[51:42]}; + 8'h0,ex4_single_denorm_frac[51:29]} : + vfdsu_ex4_half ? {48'hffffffffffff,vfdsu_ex4_result_sign,5'h0, + ex4_half_denorm_frac[51:42]} + : {48'hffffffffffff,vfdsu_ex4_result_sign,8'h0, + ex4_bfloat_denorm_frac[51:45]}; @@ -299,6 +330,15 @@ assign ex4_half_rst_norm[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign, ex4_expnt_rst[4:0], ex4_frac_52[51:42]}; assign ex4_half_rst0[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,15'h0}; + +assign ex4_bfloat_lfn[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,8'hfe,{7{1'b1}}}; +assign ex4_bfloat_rst_qnan[63:0] = {48'hffffffffffff,vfdsu_ex4_qnan_sign, 8'hff,1'b1, vfdsu_ex4_qnan_f[5:0]}; +assign ex4_bfloat_rst_inf[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,8'hff,7'b0}; +assign ex4_bfloat_rst_norm[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign, + ex4_expnt_rst[7:0], + ex4_frac_52[51:45]}; +assign ex4_bfloat_rst0[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,15'h0}; + //ex4 overflow/underflow plus assign ex4_rst_nor = vfdsu_ex4_result_nor; assign ex4_of_plus = vfdsu_ex4_potnt_of && @@ -345,21 +385,23 @@ assign ex4_sing_rst_norm[63:0] = {32'hffffffff,vfdsu_ex4_result_sign, ex4_expnt_rst[7:0], ex4_frac_52[51:29]}; assign ex4_rst_lfn[63:0] = (vfdsu_ex4_double) ? ex4_doub_lfn[63:0] : - vfdsu_ex4_single ? ex4_sing_lfn[63:0] : ex4_half_lfn[63:0]; + vfdsu_ex4_single ? ex4_sing_lfn[63:0] : + vfdsu_ex4_half ? ex4_half_lfn[63:0] : ex4_bfloat_lfn[63:0]; assign ex4_rst0[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst0[63:0] : - vfdsu_ex4_single ? ex4_sing_rst0[63:0] : ex4_half_rst0[63:0]; + vfdsu_ex4_single ? ex4_sing_rst0[63:0] : + vfdsu_ex4_half ? ex4_half_rst0[63:0] : ex4_bfloat_rst0[63:0]; assign ex4_rst_qnan[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst_qnan[63:0] : - vfdsu_ex4_single ? ex4_sing_rst_qnan[63:0] - : ex4_half_rst_qnan[63:0]; + vfdsu_ex4_single ? ex4_sing_rst_qnan[63:0] : + vfdsu_ex4_half ? ex4_half_rst_qnan[63:0] : ex4_bfloat_rst_qnan[63:0]; assign ex4_rst_norm[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst_norm[63:0] : - vfdsu_ex4_single ? ex4_sing_rst_norm[63:0] - : ex4_half_rst_norm[63:0]; + vfdsu_ex4_single ? ex4_sing_rst_norm[63:0] : + vfdsu_ex4_half ? ex4_half_rst_norm[63:0] : ex4_bfloat_rst_norm[63:0]; assign ex4_rst_inf[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst_inf[63:0] : - vfdsu_ex4_single ? ex4_sing_rst_inf[63:0] - : ex4_half_rst_inf[63:0]; + vfdsu_ex4_single ? ex4_sing_rst_inf[63:0] : + vfdsu_ex4_half ? ex4_half_rst_inf[63:0] : ex4_bfloat_rst_inf[63:0]; assign ex4_cor_uf = (vfdsu_ex4_uf && !ex4_denorm_potnt_norm || ex4_uf_plus) diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v index 7c5821c8..0ef958a3 100644 --- a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v @@ -25,6 +25,8 @@ module ct_vfdsu_prepare( ex1_remainder, ex1_scalar, ex1_single, + ex1_half, + ex1_bfloat, ex1_sqrt, ex1_src0, ex1_src1, @@ -48,6 +50,8 @@ module ct_vfdsu_prepare( vfdsu_ex2_result_zero, vfdsu_ex2_rm, vfdsu_ex2_single, + vfdsu_ex2_half, + vfdsu_ex2_bfloat, vfdsu_ex2_sqrt, vfdsu_ex2_srt_skip, vfpu_yy_xx_dqnan, @@ -63,6 +67,8 @@ input ex1_double; input ex1_pipedown; input ex1_scalar; input ex1_single; +input ex1_half; +input ex1_bfloat; input ex1_sqrt; input [63:0] ex1_src0; input [63:0] ex1_src1; @@ -90,6 +96,8 @@ output vfdsu_ex2_result_sign; output vfdsu_ex2_result_zero; output [2 :0] vfdsu_ex2_rm; output vfdsu_ex2_single; +output vfdsu_ex2_half; +output vfdsu_ex2_bfloat; output vfdsu_ex2_sqrt; output vfdsu_ex2_srt_skip; @@ -115,6 +123,8 @@ reg vfdsu_ex2_result_sign; reg vfdsu_ex2_result_zero; reg [2 :0] vfdsu_ex2_rm; reg vfdsu_ex2_single; +reg vfdsu_ex2_half; +reg vfdsu_ex2_bfloat; reg vfdsu_ex2_sqrt; reg vfdsu_ex2_srt_skip; @@ -161,6 +171,12 @@ wire ex1_half_expnt1_max; wire ex1_half_expnt1_zero; wire ex1_half_frac0_all0; wire ex1_half_frac1_all0; +wire ex1_bfloat_expnt0_max; +wire ex1_bfloat_expnt1_max; +wire ex1_bfloat_expnt0_zero; +wire ex1_bfloat_expnt1_zero; +wire ex1_bfloat_frac0_all0; +wire ex1_bfloat_frac1_all0; wire ex1_nv; wire ex1_op0_cnan; wire [51:0] ex1_op0_f; @@ -216,6 +232,8 @@ wire ex1_sing_expnt1_zero; wire ex1_sing_frac0_all0; wire ex1_sing_frac1_all0; wire ex1_single; +wire ex1_half; +wire ex1_bfloat; wire ex1_sqrt; wire ex1_sqrt_expnt_odd; wire ex1_sqrt_expnt_result_odd; @@ -246,9 +264,11 @@ assign ex1_oper1[63:0] = ex1_src1[63:0]; //Sign bit prepare assign ex1_op0_sign = ex1_double ? ex1_oper0[63] : - ex1_single ? ex1_oper0[31] : ex1_oper0[15]; + ex1_single ? ex1_oper0[31] : + ex1_half ? ex1_oper0[15] : ex1_oper0[15]; assign ex1_op1_sign = ex1_double ? ex1_oper1[63] : - ex1_single ? ex1_oper1[31] : ex1_oper1[15]; + ex1_single ? ex1_oper1[31] : + ex1_half ? ex1_oper1[15] : ex1_oper1[15]; assign div_sign = ex1_op0_sign ^ ex1_op1_sign; assign sqrt_sign = ex1_op0_sign; assign ex1_result_sign = (ex1_div) @@ -261,10 +281,14 @@ assign ex1_doub_expnt1_max = &ex1_oper1[62:52]; assign ex1_sing_expnt1_max = &ex1_oper1[30:23]; assign ex1_half_expnt0_max = &ex1_oper0[14:10]; assign ex1_half_expnt1_max = &ex1_oper1[14:10]; +assign ex1_bfloat_expnt0_max = &ex1_oper0[14:7]; +assign ex1_bfloat_expnt1_max = &ex1_oper1[14:7]; assign ex1_expnt0_max = ex1_double ? ex1_doub_expnt0_max : - ex1_single ? ex1_sing_expnt0_max : ex1_half_expnt0_max; + ex1_single ? ex1_sing_expnt0_max : + ex1_half ? ex1_half_expnt0_max : ex1_bfloat_expnt0_max; assign ex1_expnt1_max = ex1_double ? ex1_doub_expnt1_max : - ex1_single ? ex1_sing_expnt1_max : ex1_half_expnt1_max; + ex1_single ? ex1_sing_expnt1_max : + ex1_half ? ex1_half_expnt1_max : ex1_bfloat_expnt1_max; //exponent zero assign ex1_doub_expnt0_zero = ~|ex1_oper0[62:52]; @@ -273,10 +297,15 @@ assign ex1_doub_expnt1_zero = ~|ex1_oper1[62:52]; assign ex1_sing_expnt1_zero = ~|ex1_oper1[30:23]; assign ex1_half_expnt0_zero = ~|ex1_oper0[14:10]; assign ex1_half_expnt1_zero = ~|ex1_oper1[14:10]; +assign ex1_bfloat_expnt0_zero = ~|ex1_oper0[14:7]; +assign ex1_bfloat_expnt1_zero = ~|ex1_oper1[14:7]; assign ex1_expnt0_zero = ex1_double ? ex1_doub_expnt0_zero : - ex1_single ? ex1_sing_expnt0_zero : ex1_half_expnt0_zero; + ex1_single ? ex1_sing_expnt0_zero : + ex1_half ? ex1_half_expnt0_zero : ex1_bfloat_expnt0_zero; assign ex1_expnt1_zero = ex1_double ? ex1_doub_expnt1_zero : - ex1_single ? ex1_sing_expnt1_zero : ex1_half_expnt1_zero; + ex1_single ? ex1_sing_expnt1_zero : + ex1_half ? ex1_half_expnt1_zero : ex1_bfloat_expnt1_zero; + //fraction zero assign ex1_doub_frac0_all0 = ~|ex1_oper0[51:0]; assign ex1_sing_frac0_all0 = ~|ex1_oper0[22:0]; @@ -284,14 +313,20 @@ assign ex1_doub_frac1_all0 = ~|ex1_oper1[51:0]; assign ex1_sing_frac1_all0 = ~|ex1_oper1[22:0]; assign ex1_half_frac0_all0 = ~|ex1_oper0[9:0]; assign ex1_half_frac1_all0 = ~|ex1_oper1[9:0]; +assign ex1_bfloat_frac0_all0 = ~|ex1_oper0[6:0]; +assign ex1_bfloat_frac1_all0 = ~|ex1_oper1[6:0]; assign ex1_frac0_all0 = ex1_double ? ex1_doub_frac0_all0 : - ex1_single ? ex1_sing_frac0_all0 : ex1_half_frac0_all0; + ex1_single ? ex1_sing_frac0_all0 : + ex1_half ? ex1_half_frac0_all0 : ex1_bfloat_frac0_all0; assign ex1_frac1_all0 = ex1_double ? ex1_doub_frac1_all0 : - ex1_single ? ex1_sing_frac1_all0 : ex1_half_frac1_all0; + ex1_single ? ex1_sing_frac1_all0 : + ex1_half ? ex1_half_frac1_all0 : ex1_bfloat_frac1_all0; assign ex1_frac0_msb = ex1_double ? ex1_oper0[51] : - ex1_single ? ex1_oper0[22] : ex1_oper0[9]; + ex1_single ? ex1_oper0[22] : + ex1_half ? ex1_oper0[9] : ex1_oper0[6]; assign ex1_frac1_msb = ex1_double ? ex1_oper1[51] : - ex1_single ? ex1_oper1[22] : ex1_oper1[9]; + ex1_single ? ex1_oper1[22] : + ex1_half ? ex1_oper1[9] : ex1_oper1[6]; assign ex1_oper0_high_all1 = ex1_single ? &ex1_oper0[63:32] : &ex1_oper0[63:16]; assign ex1_oper1_high_all1 = ex1_single ? &ex1_oper1[63:32] : &ex1_oper1[63:16]; @@ -382,25 +417,30 @@ ct_vfdsu_ff1 x_frac1_expnt ( // &Connect(.frac_bin_val(ex1_oper1_id_expnt[12:0])); @157 // &Connect(.fanc_shift_num(ex1_oper1_id_frac[51:0])); @158 assign ex1_oper0_frac[51:0] = ex1_double ? ex1_oper0[51:0] : - ex1_single ? {ex1_oper0[22:0],29'b0} - : {ex1_oper0[9:0],42'b0}; + ex1_single ? {ex1_oper0[22:0],29'b0} : + ex1_half ? {ex1_oper0[9:0],42'b0} + : {ex1_oper0[6:0],45'b0}; assign ex1_oper1_frac[51:0] = ex1_double ? ex1_oper1[51:0] : - ex1_single ? {ex1_oper1[22:0],29'b0} - : {ex1_oper1[9:0],42'b0}; + ex1_single ? {ex1_oper1[22:0],29'b0} : + ex1_half ? {ex1_oper1[9:0],42'b0} + : {ex1_oper1[6:0],45'b0}; //=====================exponent add========================= //exponent number 0 assign ex1_div_op0_expnt[12:0] = ex1_double ? {2'b0,ex1_oper0[62:52]} : - ex1_single ? {5'b0,ex1_oper0[30:23]} - : {8'b0,ex1_oper0[14:10]}; + ex1_single ? {5'b0,ex1_oper0[30:23]} : + ex1_half ? {8'b0,ex1_oper0[14:10]} + : {5'b0,ex1_oper0[14:7]}; assign ex1_expnt_adder_op0[12:0] = ex1_op0_id_nor ? ex1_oper0_id_expnt[12:0] : ex1_div_op0_expnt[12:0]; //exponent number 1 assign ex1_div_op1_expnt[12:0] = ex1_double ? {2'b0,ex1_oper1[62:52]} : - ex1_single ? {5'b0,ex1_oper1[30:23]} - : {8'b0,ex1_oper1[14:10]}; + ex1_single ? {5'b0,ex1_oper1[30:23]} : + ex1_half ? {8'b0,ex1_oper1[14:10]} + : {5'b0,ex1_oper1[14:7]}; assign ex1_sqrt_op1_expnt[12:0] = ex1_double ? {3'b0,{10{1'b1}}} : //'d1023 - ex1_single ? {6'b0,{7{1'b1}}} //'d127 - : {9'b0,{4{1'b1}}}; //'d15 + ex1_single ? {6'b0,{7{1'b1}}} ://'d127 + ex1_half ? {9'b0,{4{1'b1}}} //'d15 + : {6'b0,{7{1'b1}}}; //'d127 // &CombBeg; @180 always @( ex1_oper1_id_expnt[12:0] @@ -569,11 +609,13 @@ assign ex1_div_srt_op0[52:0] = ex1_div_nor_srt_op0[52:0]; assign ex1_div_srt_op1[52:0] = ex1_div_nor_srt_op1[52:0]; //ex1_div_nor_srt_op0 assign ex1_div_noid_nor_srt_op0[52:0] = ex1_double ? {1'b1,ex1_oper0[51:0]} : - ex1_single ? {1'b1,ex1_oper0[22:0],29'b0} - : {1'b1,ex1_oper0[9:0],42'b0}; + ex1_single ? {1'b1,ex1_oper0[22:0],29'b0} : + ex1_half ? {1'b1,ex1_oper0[9:0],42'b0} + : {1'b1,ex1_oper0[6:0],45'b0}; assign ex1_div_noid_nor_srt_op1[52:0] = ex1_double ? {1'b1,ex1_oper1[51:0]} : - ex1_single ? {1'b1,ex1_oper1[22:0],29'b0} - : {1'b1,ex1_oper1[9:0],42'b0}; + ex1_single ? {1'b1,ex1_oper1[22:0],29'b0} : + ex1_half ? {1'b1,ex1_oper1[9:0],42'b0} + : {1'b1,ex1_oper1[6:0],45'b0}; assign ex1_div_nor_srt_op0[52:0] = ex1_op0_id_nor ? {ex1_oper0_id_frac[51:0],1'b0} : ex1_div_noid_nor_srt_op0[52:0]; //ex1_div_nor_srt_op1 @@ -699,6 +741,8 @@ begin vfdsu_ex2_sqrt <= 1'b0; vfdsu_ex2_double <= 1'b0; vfdsu_ex2_single <= 1'b0; + vfdsu_ex2_half <= 1'b0; + vfdsu_ex2_bfloat <= 1'b0; end else if(ex1_pipedown) begin @@ -721,6 +765,8 @@ begin vfdsu_ex2_sqrt <= ex1_sqrt; vfdsu_ex2_double <= ex1_double; vfdsu_ex2_single <= ex1_single; + vfdsu_ex2_half <= ex1_half; + vfdsu_ex2_bfloat <= ex1_bfloat; end else begin @@ -743,6 +789,8 @@ begin vfdsu_ex2_sqrt <= vfdsu_ex2_sqrt; vfdsu_ex2_double <= vfdsu_ex2_double; vfdsu_ex2_single <= vfdsu_ex2_single; + vfdsu_ex2_half <= vfdsu_ex2_half; + vfdsu_ex2_bfloat <= vfdsu_ex2_bfloat; end end diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v index 6eece526..cb3dc8e3 100644 --- a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v @@ -27,6 +27,7 @@ module ct_vfdsu_round( vfdsu_ex3_double, vfdsu_ex3_dz, vfdsu_ex3_half_expnt_rst, + vfdsu_ex3_bfloat_expnt_rst, vfdsu_ex3_id_srt_skip, vfdsu_ex3_nv, vfdsu_ex3_of, @@ -46,6 +47,8 @@ module ct_vfdsu_round( vfdsu_ex3_rslt_denorm, vfdsu_ex3_sing_expnt_rst, vfdsu_ex3_single, + vfdsu_ex3_half, + vfdsu_ex3_bfloat, vfdsu_ex3_uf, vfdsu_ex4_denorm_to_tiny_frac, vfdsu_ex4_double, @@ -69,6 +72,8 @@ module ct_vfdsu_round( vfdsu_ex4_result_zero, vfdsu_ex4_rslt_denorm, vfdsu_ex4_single, + vfdsu_ex4_half, + vfdsu_ex4_bfloat, vfdsu_ex4_uf ); @@ -85,6 +90,7 @@ input [12:0] vfdsu_ex3_doub_expnt_rst; input vfdsu_ex3_double; input vfdsu_ex3_dz; input [12:0] vfdsu_ex3_half_expnt_rst; +input [12:0] vfdsu_ex3_bfloat_expnt_rst; input vfdsu_ex3_id_srt_skip; input vfdsu_ex3_nv; input vfdsu_ex3_of; @@ -104,6 +110,8 @@ input [2 :0] vfdsu_ex3_rm; input vfdsu_ex3_rslt_denorm; input [8 :0] vfdsu_ex3_sing_expnt_rst; input vfdsu_ex3_single; +input vfdsu_ex3_half; +input vfdsu_ex3_bfloat; input vfdsu_ex3_uf; output vfdsu_ex4_denorm_to_tiny_frac; output vfdsu_ex4_double; @@ -127,6 +135,8 @@ output vfdsu_ex4_result_sign; output vfdsu_ex4_result_zero; output vfdsu_ex4_rslt_denorm; output vfdsu_ex4_single; +output vfdsu_ex4_half; +output vfdsu_ex4_bfloat; output vfdsu_ex4_uf; // &Regs; @24 @@ -138,8 +148,10 @@ reg frac_orig; reg [54:0] frac_sub1_op1; reg frac_sub_1; reg half_denorm_lst_frac; +reg bfloat_denorm_lst_frac; reg [56:0] qt_result_double_denorm_for_round; reg [13:0] qt_result_half_denorm_for_round; +reg [10:0] qt_result_bfloat_denorm_for_round; reg [27:0] qt_result_single_denorm_for_round; reg single_denorm_lst_frac; reg vfdsu_ex4_denorm_to_tiny_frac; @@ -164,6 +176,8 @@ reg vfdsu_ex4_result_sign; reg vfdsu_ex4_result_zero; reg vfdsu_ex4_rslt_denorm; reg vfdsu_ex4_single; +reg vfdsu_ex4_half; +reg vfdsu_ex4_bfloat; reg vfdsu_ex4_uf; // &Wires; @25 @@ -199,6 +213,16 @@ wire ex3_half_gr; wire ex3_half_low_not_zero; wire ex3_half_rst_eq_1; wire ex3_half_zero; +wire ex3_bfloat_denorm_eq; +wire ex3_bfloat_denorm_gr; +wire ex3_bfloat_denorm_plus; +wire ex3_bfloat_denorm_potnt_norm; +wire ex3_bfloat_denorm_zero; +wire ex3_bfloat_eq; +wire ex3_bfloat_gr; +wire ex3_bfloat_low_not_zero; +wire ex3_bfloat_rst_eq_1; +wire ex3_bfloat_zero; wire ex3_nx; wire ex3_pipe_clk; wire ex3_pipe_clk_en; @@ -210,6 +234,8 @@ wire ex3_qt_eq; wire ex3_qt_gr; wire ex3_qt_half_lo2_not0; wire ex3_qt_half_lo3_not0; +wire ex3_qt_bfloat_lo2_not0; +wire ex3_qt_bfloat_lo3_not0; wire ex3_qt_sing_lo3_not0; wire ex3_qt_sing_lo4_not0; wire ex3_qt_zero; @@ -254,6 +280,7 @@ wire vfdsu_ex3_double; wire vfdsu_ex3_dz; wire [12:0] vfdsu_ex3_expnt_rst; wire [12:0] vfdsu_ex3_half_expnt_rst; +wire [12:0] vfdsu_ex3_bfloat_expnt_rst; wire vfdsu_ex3_id_srt_skip; wire vfdsu_ex3_nv; wire vfdsu_ex3_of; @@ -273,6 +300,8 @@ wire [2 :0] vfdsu_ex3_rm; wire vfdsu_ex3_rslt_denorm; wire [8 :0] vfdsu_ex3_sing_expnt_rst; wire vfdsu_ex3_single; +wire vfdsu_ex3_half; +wire vfdsu_ex3_bfloat; wire vfdsu_ex3_uf; @@ -302,6 +331,22 @@ assign ex3_half_zero = (total_qt_rt_58[56]) assign ex3_half_rst_eq_1 = total_qt_rt_58[56] && ~|total_qt_rt_58[55:46]; assign ex3_half_denorm_plus = !total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1ff2); assign ex3_half_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1ff1); + +assign ex3_qt_bfloat_lo3_not0 = |total_qt_rt_58[47:45]; +assign ex3_qt_bfloat_lo2_not0 = |total_qt_rt_58[46:45]; +assign ex3_bfloat_gr = total_qt_rt_58[56] + ? total_qt_rt_58[48] && ex3_qt_bfloat_lo3_not0 + : total_qt_rt_58[47] && ex3_qt_bfloat_lo2_not0; +assign ex3_bfloat_eq = (total_qt_rt_58[56]) + ? total_qt_rt_58[48] && !ex3_qt_sing_lo4_not0 + : total_qt_rt_58[47] && !ex3_qt_sing_lo3_not0; +assign ex3_bfloat_zero = (total_qt_rt_58[56]) + ? ~|total_qt_rt_58[48:45] + : ~|total_qt_rt_58[47:45]; +assign ex3_bfloat_rst_eq_1 = total_qt_rt_58[56] && ~|total_qt_rt_58[55:49]; +assign ex3_bfloat_denorm_plus = !total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1f82); +assign ex3_bfloat_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1f81); + assign vfdsu_ex3_expnt_rst[12:0] = vfdsu_ex3_half_expnt_rst[12:0]; // &Force("bus","total_qt_rt_58",57,0); @54 assign ex3_qt_doub_lo3_not0 = |total_qt_rt_58[2:0]; @@ -343,19 +388,24 @@ assign ex3_doub_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[1 assign ex3_sing_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1f81); assign ex3_rslt_denorm = ex3_denorm_plus || vfdsu_ex3_rslt_denorm; assign ex3_denorm_potnt_norm = vfdsu_ex3_double ? ex3_doub_denorm_potnt_norm : - vfdsu_ex3_single ? ex3_sing_denorm_potnt_norm - : ex3_half_denorm_potnt_norm; + vfdsu_ex3_single ? ex3_sing_denorm_potnt_norm : + vfdsu_ex3_half ? ex3_half_denorm_potnt_norm + : ex3_bfloat_denorm_potnt_norm; assign ex3_rst_eq_1 = (vfdsu_ex3_double)? ex3_doub_rst_eq_1 : - vfdsu_ex3_single ? ex3_sing_rst_eq_1 : ex3_half_rst_eq_1; + vfdsu_ex3_single ? ex3_sing_rst_eq_1 : + vfdsu_ex3_half ? ex3_half_rst_eq_1 : ex3_bfloat_rst_eq_1; assign ex3_qt_eq = (vfdsu_ex3_double)? ex3_doub_eq : - vfdsu_ex3_single ? ex3_sing_eq : ex3_half_eq; + vfdsu_ex3_single ? ex3_sing_eq : + vfdsu_ex3_half ? ex3_half_eq : ex3_bfloat_eq; assign ex3_qt_gr = (vfdsu_ex3_double)? ex3_doub_gr : - vfdsu_ex3_single ? ex3_sing_gr : ex3_half_gr; + vfdsu_ex3_single ? ex3_sing_gr : + vfdsu_ex3_half ? ex3_half_gr : ex3_bfloat_gr; assign ex3_qt_zero = (vfdsu_ex3_double)? ex3_doub_zero : - vfdsu_ex3_single ? ex3_sing_zero : ex3_half_zero; + vfdsu_ex3_single ? ex3_sing_zero : + vfdsu_ex3_half ? ex3_half_zero : ex3_bfloat_zero; assign ex3_denorm_plus = (vfdsu_ex3_double) ? ex3_doub_denorm_plus : vfdsu_ex3_single ? ex3_sing_denorm_plus - : ex3_half_denorm_plus; + : vfdsu_ex3_half ? ex3_half_denorm_plus : ex3_bfloat_denorm_plus; // &CombBeg; @108 always @( vfdsu_ex3_doub_expnt_rst[12:0] @@ -682,14 +732,63 @@ assign ex3_half_denorm_gr = qt_result_half_denorm_for_round[13] assign ex3_half_denorm_zero = !qt_result_half_denorm_for_round[13] && !ex3_half_low_not_zero; +always @( vfdsu_ex3_bfloat_expnt_rst[8:0] + or total_qt_rt_58[56:45]) +begin +case(vfdsu_ex3_bfloat_expnt_rst[8:0]) + 9'h182:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[48:45],7'b0}; //-126 1 + bfloat_denorm_lst_frac = total_qt_rt_58[49]; + end//-1022 1 + 9'h181:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[49:45],6'b0}; //-127 0 + bfloat_denorm_lst_frac = total_qt_rt_58[50]; + end//-1022 1 + 9'h180:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[50:45],5'b0}; //-128 -1 + bfloat_denorm_lst_frac = total_qt_rt_58[51]; + end//-1022 1 + 9'h17f:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[51:45],4'b0}; //-129 -2 + bfloat_denorm_lst_frac = total_qt_rt_58[52]; + end//-1022 1 + 9'h17e:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[52:45],3'b0}; //-90 -3 + bfloat_denorm_lst_frac = total_qt_rt_58[53]; + end//-1022 1 + 9'h17d:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[53:45],2'b0}; //-91 -4 + bfloat_denorm_lst_frac = total_qt_rt_58[54]; + end//-1022 1 + 9'h17c:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[54:45],1'b0}; //-92 -5 + bfloat_denorm_lst_frac = total_qt_rt_58[55]; + end//-1022 1 + 9'h17b:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[55:45]}; //-93 -6 + bfloat_denorm_lst_frac = total_qt_rt_58[56]; + end//-1022 1 + 9'h17a:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[56:46]}; //-93 -6 + bfloat_denorm_lst_frac = 1'b0; + end//-1022 1 + default: begin qt_result_bfloat_denorm_for_round[10:0] = '0; + bfloat_denorm_lst_frac = 1'b0; + end//-1022 1 +endcase +end +//rounding evaluation for bfloat denormalize number +assign ex3_bfloat_denorm_eq = qt_result_bfloat_denorm_for_round[10] + && !ex3_bfloat_low_not_zero; +assign ex3_bfloat_low_not_zero = |qt_result_bfloat_denorm_for_round[9:0]; +assign ex3_bfloat_denorm_gr = qt_result_bfloat_denorm_for_round[10] + && ex3_bfloat_low_not_zero; +assign ex3_bfloat_denorm_zero = !qt_result_bfloat_denorm_for_round[10] + && !ex3_bfloat_low_not_zero; + assign ex3_denorm_eq = vfdsu_ex3_double ? ex3_double_denorm_eq : - vfdsu_ex3_single ? ex3_single_denorm_eq : ex3_half_denorm_eq; + vfdsu_ex3_single ? ex3_single_denorm_eq : + vfdsu_ex3_half ? ex3_half_denorm_eq : ex3_bfloat_denorm_eq; assign ex3_denorm_gr = vfdsu_ex3_double ? ex3_double_denorm_gr : - vfdsu_ex3_single ? ex3_single_denorm_gr : ex3_half_denorm_gr; + vfdsu_ex3_single ? ex3_single_denorm_gr : + vfdsu_ex3_half ? ex3_half_denorm_gr : ex3_bfloat_denorm_gr; assign ex3_denorm_zero = vfdsu_ex3_double ? ex3_double_denorm_zero : - vfdsu_ex3_single ? ex3_single_denorm_zero : ex3_half_denorm_zero; + vfdsu_ex3_single ? ex3_single_denorm_zero : + vfdsu_ex3_half ? ex3_half_denorm_zero : ex3_bfloat_denorm_zero; assign ex3_denorm_lst_frac = vfdsu_ex3_double ? double_denorm_lst_frac : - vfdsu_ex3_single ? single_denorm_lst_frac : half_denorm_lst_frac; + vfdsu_ex3_single ? single_denorm_lst_frac : + vfdsu_ex3_half ? half_denorm_lst_frac : bfloat_denorm_lst_frac; //Different Round Mode with different rounding rule //Here we call rounding bit as "rb", remainder as "rem" @@ -824,7 +923,9 @@ end // &CombBeg; @540 always @( total_qt_rt_58[56] or vfdsu_ex3_single - or vfdsu_ex3_double) + or vfdsu_ex3_double + or vfdsu_ex3_half + or vfdsu_ex3_bfloat) begin case({total_qt_rt_58[56],vfdsu_ex3_double,vfdsu_ex3_single}) 3'b001: @@ -849,13 +950,23 @@ case({total_qt_rt_58[56],vfdsu_ex3_double,vfdsu_ex3_single}) end 3'b100: begin - frac_add1_op1[54:0] = {12'b1,43'b0}; - frac_sub1_op1[54:0] = {{12{1'b1}},43'b0}; + if (vfdsu_ex3_half) begin + frac_add1_op1[54:0] = {12'b1,43'b0}; + frac_sub1_op1[54:0] = {{12{1'b1}},43'b0}; + end else begin + frac_add1_op1[54:0] = {9'b1,46'b0}; + frac_sub1_op1[54:0] = {{9{1'b1}},46'b0}; + end end 3'b000: begin - frac_add1_op1[54:0] = {13'b1,42'b0}; - frac_sub1_op1[54:0] = {{13{1'b1}},42'b0}; + if (vfdsu_ex3_half) begin + frac_add1_op1[54:0] = {13'b1,42'b0}; + frac_sub1_op1[54:0] = {{13{1'b1}},42'b0}; + end else begin + frac_add1_op1[54:0] = {10'b1,45'b0}; + frac_sub1_op1[54:0] = {{10{1'b1}},45'b0}; + end end default: begin @@ -898,7 +1009,7 @@ assign ex3_nx = ex3_rst_nor && assign ex3_denorm_nx = ex3_rslt_denorm && (!ex3_denorm_zero || !vfdsu_ex3_rem_zero); //Adjust expnt //Div:Actural expnt should plus 1 when op0 is id, sub 1 when op1 id -assign ex3_expnt_adjst[12:0] = vfdsu_ex3_double ? 13'h3ff: vfdsu_ex3_single ? 13'h7f : 13'hf; +assign ex3_expnt_adjst[12:0] = vfdsu_ex3_double ? 13'h3ff: vfdsu_ex3_single ? 13'h7f : vfdsu_ex3_half ? 13'hf : 13'h7f; assign ex3_expnt_adjust_result[12:0] = vfdsu_ex3_expnt_rst[12:0] + ex3_expnt_adjst[12:0]; //this information is for the packing, which determin the result is normal @@ -954,6 +1065,8 @@ begin vfdsu_ex4_potnt_norm[1:0] <= 2'b0; vfdsu_ex4_double <= 1'b0; vfdsu_ex4_single <= 1'b0; + vfdsu_ex4_half <= 1'b0; + vfdsu_ex4_bfloat <= 1'b0; end else if(ex3_pipedown) @@ -982,6 +1095,8 @@ begin vfdsu_ex4_potnt_norm[1:0] <= ex3_potnt_norm[1:0]; vfdsu_ex4_double <= vfdsu_ex3_double; vfdsu_ex4_single <= vfdsu_ex3_single; + vfdsu_ex4_half <= vfdsu_ex3_half; + vfdsu_ex4_bfloat <= vfdsu_ex3_bfloat; end else begin @@ -1009,6 +1124,8 @@ begin vfdsu_ex4_potnt_norm[1:0] <= vfdsu_ex4_potnt_norm[1:0]; vfdsu_ex4_double <= vfdsu_ex4_double; vfdsu_ex4_single <= vfdsu_ex4_single; + vfdsu_ex4_half <= vfdsu_ex4_half; + vfdsu_ex4_bfloat <= vfdsu_ex4_bfloat; end end diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v index c7a679c1..4d91a2cc 100644 --- a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v @@ -30,6 +30,8 @@ module ct_vfdsu_scalar_dp( ex1_double, ex1_pipedown, ex1_scalar, + ex1_half, + ex1_bfloat, ex1_single, ex1_sqrt, ex1_src0, @@ -50,7 +52,9 @@ module ct_vfdsu_scalar_dp( pipex_dp_vfdsu_freg_data, pipex_dp_vfdsu_vreg, vfdsu_ex2_double, - vfdsu_ex2_single + vfdsu_ex2_single, + vfdsu_ex2_half, + vfdsu_ex2_bfloat ); // &Ports; @24 @@ -79,6 +83,8 @@ output ex1_div; output ex1_double; output ex1_scalar; output ex1_single; +output ex1_half; +output ex1_bfloat; output ex1_sqrt; output [63:0] ex1_src0; output [63:0] ex1_src1; @@ -89,11 +95,15 @@ output [63:0] pipex_dp_vfdsu_freg_data; output [6 :0] pipex_dp_vfdsu_vreg; output vfdsu_ex2_double; output vfdsu_ex2_single; +output vfdsu_ex2_half; +output vfdsu_ex2_bfloat; // &Regs; @25 reg ex1_div; reg ex1_double; reg ex1_single; +reg ex1_half; +reg ex1_bfloat; reg ex1_sqrt; reg vfdsu_ex2_div; reg vfdsu_ex2_double; @@ -101,6 +111,8 @@ reg [4 :0] vfdsu_ex2_dst_ereg; reg [6 :0] vfdsu_ex2_dst_vreg; reg [6 :0] vfdsu_ex2_iid; reg vfdsu_ex2_single; +reg vfdsu_ex2_half; +reg vfdsu_ex2_bfloat; reg vfdsu_ex2_sqrt; reg [4 :0] vfdsu_ex3_dst_ereg; reg [6 :0] vfdsu_ex3_dst_vreg; @@ -175,6 +187,8 @@ begin ex1_sqrt <= 1'b0; ex1_double <= 1'b0; ex1_single <= 1'b0; + ex1_half <= 1'b0; + ex1_bfloat <= 1'b0; end else if(idu_vfpu_rf_pipex_gateclk_sel) begin @@ -182,6 +196,8 @@ begin ex1_sqrt <= idu_vfpu_rf_pipex_func[1]; ex1_double <= idu_vfpu_rf_pipex_func[16]; ex1_single <= idu_vfpu_rf_pipex_func[15]; + ex1_half <= idu_vfpu_rf_pipex_func[14]; + ex1_bfloat <= idu_vfpu_rf_pipex_func[13]; end end assign ex1_scalar = 1'b1; @@ -204,6 +220,8 @@ begin vfdsu_ex2_iid[6:0] <= 7'b0; vfdsu_ex2_double <= 1'b0; vfdsu_ex2_single <= 1'b0; + vfdsu_ex2_half <= 1'b0; + vfdsu_ex2_bfloat <= 1'b0; vfdsu_ex2_div <= 1'b0; vfdsu_ex2_sqrt <= 1'b0; end @@ -214,6 +232,8 @@ begin vfdsu_ex2_iid[6:0] <= dp_vfdsu_ex1_pipex_iid[6:0]; vfdsu_ex2_double <= ex1_double; vfdsu_ex2_single <= ex1_single; + vfdsu_ex2_half <= ex1_half; + vfdsu_ex2_bfloat <= ex1_bfloat; vfdsu_ex2_div <= ex1_div; vfdsu_ex2_sqrt <= ex1_sqrt; end @@ -224,6 +244,8 @@ begin vfdsu_ex2_iid[6:0] <= vfdsu_ex2_iid[6:0]; vfdsu_ex2_double <= vfdsu_ex2_double; vfdsu_ex2_single <= vfdsu_ex2_single; + vfdsu_ex2_half <= ex1_half; + vfdsu_ex2_bfloat <= ex1_bfloat; vfdsu_ex2_div <= vfdsu_ex2_div; vfdsu_ex2_sqrt <= vfdsu_ex2_sqrt; end diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v index cdeb3a30..4e2c68b0 100644 --- a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v @@ -49,12 +49,15 @@ module ct_vfdsu_srt( vfdsu_ex2_result_zero, vfdsu_ex2_rm, vfdsu_ex2_single, + vfdsu_ex2_half, + vfdsu_ex2_bfloat, vfdsu_ex2_sqrt, vfdsu_ex2_srt_skip, vfdsu_ex3_doub_expnt_rst, vfdsu_ex3_double, vfdsu_ex3_dz, vfdsu_ex3_half_expnt_rst, + vfdsu_ex3_bfloat_expnt_rst, vfdsu_ex3_id_srt_skip, vfdsu_ex3_nv, vfdsu_ex3_of, @@ -74,6 +77,8 @@ module ct_vfdsu_srt( vfdsu_ex3_rslt_denorm, vfdsu_ex3_sing_expnt_rst, vfdsu_ex3_single, + vfdsu_ex3_half, + vfdsu_ex3_bfloat, vfdsu_ex3_uf ); @@ -109,6 +114,8 @@ input vfdsu_ex2_result_sign; input vfdsu_ex2_result_zero; input [2 :0] vfdsu_ex2_rm; input vfdsu_ex2_single; +input vfdsu_ex2_half; +input vfdsu_ex2_bfloat; input vfdsu_ex2_sqrt; input vfdsu_ex2_srt_skip; output srt_ctrl_rem_zero; @@ -118,6 +125,7 @@ output [12:0] vfdsu_ex3_doub_expnt_rst; output vfdsu_ex3_double; output vfdsu_ex3_dz; output [12:0] vfdsu_ex3_half_expnt_rst; +output [12:0] vfdsu_ex3_bfloat_expnt_rst; output vfdsu_ex3_id_srt_skip; output vfdsu_ex3_nv; output vfdsu_ex3_of; @@ -137,16 +145,20 @@ output [2 :0] vfdsu_ex3_rm; output vfdsu_ex3_rslt_denorm; output [8 :0] vfdsu_ex3_sing_expnt_rst; output vfdsu_ex3_single; +output vfdsu_ex3_half; +output vfdsu_ex3_bfloat; output vfdsu_ex3_uf; // &Regs; @24 reg [52:0] ex2_result_double_denorm_round_add_num; reg [52:0] ex2_result_half_denorm_round_add_num; reg [52:0] ex2_result_single_denorm_round_add_num; +reg [52:0] ex2_result_bfloat_denorm_round_add_num; reg [12:0] vfdsu_ex3_doub_expnt_rst; reg vfdsu_ex3_double; reg vfdsu_ex3_dz; reg [12:0] vfdsu_ex3_half_expnt_rst; +reg [12:0] vfdsu_ex3_bfloat_expnt_rst; reg vfdsu_ex3_id_srt_skip; reg vfdsu_ex3_nv; reg vfdsu_ex3_of; @@ -165,6 +177,8 @@ reg [2 :0] vfdsu_ex3_rm; reg vfdsu_ex3_rslt_denorm; reg [8 :0] vfdsu_ex3_sing_expnt_rst; reg vfdsu_ex3_single; +reg vfdsu_ex3_half; +reg vfdsu_ex3_bfloat; reg vfdsu_ex3_uf; // &Wires; @25 @@ -191,6 +205,11 @@ wire ex2_half_expnt_uf; wire ex2_half_id_nor_srt_skip; wire ex2_half_potnt_of; wire ex2_half_potnt_uf; +wire ex2_bfloat_expnt_of; +wire ex2_bfloat_expnt_uf; +wire ex2_bfloat_id_nor_srt_skip; +wire ex2_bfloat_potnt_of; +wire ex2_bfloat_potnt_uf; wire ex2_id_nor_srt_skip; wire ex2_of; wire ex2_of_plus; @@ -253,6 +272,8 @@ wire vfdsu_ex2_result_sign; wire vfdsu_ex2_result_zero; wire [2 :0] vfdsu_ex2_rm; wire vfdsu_ex2_single; +wire vfdsu_ex2_half; +wire vfdsu_ex2_bfloat; wire vfdsu_ex2_sqrt; wire vfdsu_ex2_srt_skip; wire vfdsu_ex3_rem_zero; @@ -281,25 +302,33 @@ assign ex2_sing_expnt_of = ~vfdsu_ex2_expnt_rst[9] && (vfdsu_ex2_expnt_rst[8] assign ex2_half_expnt_of = ~vfdsu_ex2_expnt_rst[6] && (vfdsu_ex2_expnt_rst[5] || (vfdsu_ex2_expnt_rst[4] && |vfdsu_ex2_expnt_rst[3:0])); +assign ex2_bfloat_expnt_of = ~vfdsu_ex2_expnt_rst[9] && (vfdsu_ex2_expnt_rst[8] + || (vfdsu_ex2_expnt_rst[7] && + |vfdsu_ex2_expnt_rst[6:0])); assign ex2_expnt_of = vfdsu_ex2_double ? ex2_doub_expnt_of : - vfdsu_ex2_single ? ex2_sing_expnt_of - : ex2_half_expnt_of; + vfdsu_ex2_single ? ex2_sing_expnt_of : + vfdsu_ex2_half ? ex2_half_expnt_of : ex2_bfloat_expnt_of; assign ex2_potnt_of_pre = vfdsu_ex2_double ? ex2_doub_potnt_of : - vfdsu_ex2_single ? ex2_sing_potnt_of : ex2_half_potnt_of; -assign ex2_potnt_uf_pre = vfdsu_ex2_double ? ex2_doub_potnt_uf : - vfdsu_ex2_single ? ex2_sing_potnt_uf : ex2_half_potnt_uf; + vfdsu_ex2_single ? ex2_sing_potnt_of : + vfdsu_ex2_half ? ex2_half_potnt_of : ex2_bfloat_potnt_of; +assign ex2_potnt_uf_pre = vfdsu_ex2_double ? ex2_doub_potnt_uf : + vfdsu_ex2_single ? ex2_sing_potnt_uf : + vfdsu_ex2_half ? ex2_half_potnt_uf : ex2_bfloat_potnt_uf; assign ex2_expnt_uf = vfdsu_ex2_double ? ex2_doub_expnt_uf : - vfdsu_ex2_single ? ex2_sing_expnt_uf : ex2_half_expnt_uf; + vfdsu_ex2_single ? ex2_sing_expnt_uf : + vfdsu_ex2_half ? ex2_half_expnt_uf : ex2_bfloat_expnt_uf; assign ex2_id_nor_srt_skip = vfdsu_ex2_double ? ex2_double_id_nor_srt_skip : - vfdsu_ex2_single ? ex2_single_id_nor_srt_skip - : ex2_half_id_nor_srt_skip; + vfdsu_ex2_single ? ex2_single_id_nor_srt_skip : + vfdsu_ex2_half ? ex2_half_id_nor_srt_skip : ex2_bfloat_id_nor_srt_skip; assign ex2_result_denorm_round_add_num[52:0] = vfdsu_ex2_double ? ex2_result_double_denorm_round_add_num[52:0] : vfdsu_ex2_single ? ex2_result_single_denorm_round_add_num[52:0] : - ex2_result_half_denorm_round_add_num[52:0]; - - + vfdsu_ex2_half ? + ex2_result_half_denorm_round_add_num[52:0] : + ex2_result_bfloat_denorm_round_add_num[52:0]; + + //potential overflow when E1-E2 = 128/1024 assign ex2_doub_potnt_of = ~vfdsu_ex2_expnt_rst[12] && ~vfdsu_ex2_expnt_rst[11] && @@ -313,6 +342,10 @@ assign ex2_half_potnt_of = ~vfdsu_ex2_expnt_rst[6] && ~vfdsu_ex2_expnt_rst[5] && vfdsu_ex2_expnt_rst[4] && ~|vfdsu_ex2_expnt_rst[3:0]; +assign ex2_bfloat_potnt_of = ~vfdsu_ex2_expnt_rst[9] && + ~vfdsu_ex2_expnt_rst[8] && + vfdsu_ex2_expnt_rst[7] && + ~|vfdsu_ex2_expnt_rst[6:0]; assign ex2_potnt_of = ex2_potnt_of_pre && vfdsu_ex2_op0_norm && vfdsu_ex2_op1_norm && @@ -321,6 +354,7 @@ assign ex2_potnt_of = ex2_potnt_of_pre && //When input is normal, underflow when E1-E2 <= -127/-1023/-15 assign ex2_doub_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hc01); assign ex2_sing_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hf81); +assign ex2_bfloat_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hf81); assign ex2_half_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hff1); assign ex2_half_potnt_uf = &vfdsu_ex2_expnt_rst[6:4] && ~|vfdsu_ex2_expnt_rst[3:2] && @@ -337,6 +371,10 @@ assign ex2_sing_potnt_uf = &vfdsu_ex2_expnt_rst[9:7] && ~|vfdsu_ex2_expnt_rst[6:2] && vfdsu_ex2_expnt_rst[1] && !vfdsu_ex2_expnt_rst[0]; +assign ex2_bfloat_potnt_uf = &vfdsu_ex2_expnt_rst[9:7] && + ~|vfdsu_ex2_expnt_rst[6:2] && + vfdsu_ex2_expnt_rst[1] && + !vfdsu_ex2_expnt_rst[0]; assign ex2_potnt_uf = (ex2_potnt_uf_pre && vfdsu_ex2_op0_norm && @@ -371,6 +409,8 @@ assign ex2_single_id_nor_srt_skip = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0]<12'hf6a); assign ex2_half_id_nor_srt_skip = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0]<12'hfe7); +assign ex2_bfloat_id_nor_srt_skip = vfdsu_ex2_expnt_rst[12] + && (vfdsu_ex2_expnt_rst[11:0]<12'hf6a); assign ex2_rslt_denorm = ex2_uf; //=======================EX2 skip srt iteration====================== @@ -490,6 +530,21 @@ endcase // &CombEnd; @248 end +always @( vfdsu_ex2_expnt_rst[12:0]) +begin +case(vfdsu_ex2_expnt_rst[12:0]) + 13'h1f82:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h200000000000; //-126 1 + 13'h1f81:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h400000000000; //-127 0 + 13'h1f80:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h800000000000; //-128 -1 + 13'h1f7f:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h1000000000000; //-129 -2 + 13'h1f7e:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h2000000000000; //-130 -3 + 13'h1f7d:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h4000000000000; //-131 -4 + 13'h1f7c:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h8000000000000; //-132 -5 + 13'h1f7b:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h10000000000000; //-133 -6 + default: ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h0; // -23 +endcase +end + //===================special result======================== assign ex2_result_zero = vfdsu_ex2_result_zero; assign ex2_result_qnan = vfdsu_ex2_result_qnan; @@ -541,6 +596,7 @@ begin vfdsu_ex3_doub_expnt_rst[12:0] <= 13'b0; vfdsu_ex3_sing_expnt_rst[8:0] <= 9'b0; vfdsu_ex3_half_expnt_rst[12:0] <= 13'b0; + vfdsu_ex3_bfloat_expnt_rst[12:0] <= 13'b0; vfdsu_ex3_result_sign <= 1'b0; vfdsu_ex3_qnan_sign <= 1'b0; vfdsu_ex3_qnan_f[51:0] <= 52'b0; @@ -551,6 +607,8 @@ begin vfdsu_ex3_id_srt_skip <= 1'b0; vfdsu_ex3_double <= 1'b0; vfdsu_ex3_single <= 1'b0; + vfdsu_ex3_half <= 1'b0; + vfdsu_ex3_bfloat <= 1'b0; end else if(ex2_pipedown) begin @@ -569,6 +627,7 @@ begin vfdsu_ex3_doub_expnt_rst[12:0] <= vfdsu_ex2_expnt_rst[12:0]; vfdsu_ex3_sing_expnt_rst[8:0] <= vfdsu_ex2_expnt_rst[8:0]; vfdsu_ex3_half_expnt_rst[12:0] <= vfdsu_ex2_expnt_rst[12:0]; + vfdsu_ex3_bfloat_expnt_rst[12:0] <= vfdsu_ex2_expnt_rst[12:0]; vfdsu_ex3_result_sign <= vfdsu_ex2_result_sign; vfdsu_ex3_qnan_sign <= vfdsu_ex2_qnan_sign; vfdsu_ex3_qnan_f[51:0] <= vfdsu_ex2_qnan_f[51:0]; @@ -579,6 +638,8 @@ begin vfdsu_ex3_id_srt_skip <= ex2_id_nor_srt_skip; vfdsu_ex3_double <= vfdsu_ex2_double; vfdsu_ex3_single <= vfdsu_ex2_single; + vfdsu_ex3_half <= vfdsu_ex2_half; + vfdsu_ex3_bfloat <= vfdsu_ex2_bfloat; end else begin @@ -597,6 +658,7 @@ begin vfdsu_ex3_doub_expnt_rst[12:0] <= vfdsu_ex3_doub_expnt_rst[12:0]; vfdsu_ex3_sing_expnt_rst[8:0] <= vfdsu_ex3_sing_expnt_rst[8:0]; vfdsu_ex3_half_expnt_rst[12:0] <= vfdsu_ex3_half_expnt_rst[12:0]; + vfdsu_ex3_bfloat_expnt_rst[12:0] <= vfdsu_ex3_bfloat_expnt_rst[12:0]; vfdsu_ex3_result_sign <= vfdsu_ex3_result_sign; vfdsu_ex3_qnan_sign <= vfdsu_ex3_qnan_sign; vfdsu_ex3_qnan_f[51:0] <= vfdsu_ex3_qnan_f[51:0]; @@ -607,6 +669,8 @@ begin vfdsu_ex3_id_srt_skip <= vfdsu_ex3_id_srt_skip; vfdsu_ex3_double <= vfdsu_ex3_double; vfdsu_ex3_single <= vfdsu_ex3_single; + vfdsu_ex3_half <= vfdsu_ex3_half; + vfdsu_ex3_bfloat <= vfdsu_ex3_bfloat; end end assign vfdsu_ex3_rem_zero = ~|srt_remainder[60:0]; diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v index f8846255..28ca2595 100644 --- a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v @@ -99,6 +99,8 @@ wire ex1_double; wire ex1_pipedown; wire ex1_scalar; wire ex1_single; +wire ex1_half; +wire ex1_bfloat; wire ex1_sqrt; wire [63:0] ex1_src0; wire [63:0] ex1_src1; @@ -128,6 +130,8 @@ wire vfdsu_dp_fdiv_busy; wire vfdsu_dp_inst_wb_req; wire vfdsu_ex2_double; wire vfdsu_ex2_single; +wire vfdsu_ex2_half; +wire vfdsu_ex2_bfloat; wire vfdsu_ifu_debug_ex2_wait; wire vfdsu_ifu_debug_idle; wire vfdsu_ifu_debug_pipe_busy; @@ -234,6 +238,8 @@ ct_vfdsu_ctrl x_ct_vfdsu_ctrl ( .ex1_double (ex1_double ), .ex1_pipedown (ex1_pipedown ), .ex1_single (ex1_single ), + .ex1_half (ex1_half ), + .ex1_bfloat (ex1_bfloat ), .ex2_data_clk (ex2_data_clk ), .ex2_pipedown (ex2_pipedown ), .ex2_srt_first_round (ex2_srt_first_round ), @@ -251,6 +257,8 @@ ct_vfdsu_ctrl x_ct_vfdsu_ctrl ( .vfdsu_dp_inst_wb_req (vfdsu_dp_inst_wb_req ), .vfdsu_ex2_double (vfdsu_ex2_double ), .vfdsu_ex2_single (vfdsu_ex2_single ), + .vfdsu_ex2_half (vfdsu_ex2_half ), + .vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ), .vfdsu_ifu_debug_ex2_wait (vfdsu_ifu_debug_ex2_wait ), .vfdsu_ifu_debug_idle (vfdsu_ifu_debug_idle ), .vfdsu_ifu_debug_pipe_busy (vfdsu_ifu_debug_pipe_busy ) @@ -266,6 +274,8 @@ ct_vfdsu_double x_ct_vfdsu_double ( .ex1_pipedown (ex1_pipedown ), .ex1_scalar (ex1_scalar ), .ex1_single (ex1_single ), + .ex1_half (ex1_half ), + .ex1_bfloat (ex1_bfloat ), .ex1_sqrt (ex1_sqrt ), .ex1_src0 (ex1_src0 ), .ex1_src1 (ex1_src1 ), @@ -302,6 +312,8 @@ ct_vfdsu_scalar_dp x_ct_vfdsu_scalar_dp ( .ex1_pipedown (ex1_pipedown ), .ex1_scalar (ex1_scalar ), .ex1_single (ex1_single ), + .ex1_half (ex1_half ), + .ex1_bfloat (ex1_bfloat ), .ex1_sqrt (ex1_sqrt ), .ex1_src0 (ex1_src0 ), .ex1_src1 (ex1_src1 ), @@ -321,7 +333,9 @@ ct_vfdsu_scalar_dp x_ct_vfdsu_scalar_dp ( .pipex_dp_vfdsu_freg_data (pipex_dp_vfdsu_freg_data ), .pipex_dp_vfdsu_vreg (pipex_dp_vfdsu_vreg ), .vfdsu_ex2_double (vfdsu_ex2_double ), - .vfdsu_ex2_single (vfdsu_ex2_single ) + .vfdsu_ex2_single (vfdsu_ex2_single ), + .vfdsu_ex2_half (vfdsu_ex2_half ), + .vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ) ); diff --git a/vendor/patches/openc910/0001-Add-FP16ALT-support-to-THMULTI-DivSqrt-unit.patch b/vendor/patches/openc910/0001-Add-FP16ALT-support-to-THMULTI-DivSqrt-unit.patch new file mode 100644 index 00000000..7d1ce903 --- /dev/null +++ b/vendor/patches/openc910/0001-Add-FP16ALT-support-to-THMULTI-DivSqrt-unit.patch @@ -0,0 +1,1359 @@ +From 032de47f043e3fe1dcb34c52363f7cb837681b33 Mon Sep 17 00:00:00 2001 +From: Luca Bertaccini +Date: Mon, 24 Jun 2024 17:30:43 +0200 +Subject: [PATCH] Add FP16ALT support to THMULTI DivSqrt unit + +--- + .../gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v | 21 ++- + .../gen_rtl/vfdsu/rtl/ct_vfdsu_double.v | 29 ++++ + .../gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v | 65 +++++++-- + .../gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v | 96 +++++++++---- + .../gen_rtl/vfdsu/rtl/ct_vfdsu_round.v | 152 ++++++++++++++++++--- + .../gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v | 24 +++- + .../gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v | 88 ++++++++++-- + .../gen_rtl/vfdsu/rtl/ct_vfdsu_top.v | 16 ++- + 8 files changed, 423 insertions(+), 68 deletions(-) + +diff --git a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v +index f7f541f..0aba4f1 100644 +--- a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v ++++ b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v +@@ -26,6 +26,8 @@ module ct_vfdsu_ctrl( + ex1_double, + ex1_pipedown, + ex1_single, ++ ex1_half, ++ ex1_bfloat, + ex2_data_clk, + ex2_pipedown, + ex2_srt_first_round, +@@ -43,6 +45,8 @@ module ct_vfdsu_ctrl( + vfdsu_dp_inst_wb_req, + vfdsu_ex2_double, + vfdsu_ex2_single, ++ vfdsu_ex2_half, ++ vfdsu_ex2_bfloat, + vfdsu_ifu_debug_ex2_wait, + vfdsu_ifu_debug_idle, + vfdsu_ifu_debug_pipe_busy +@@ -57,6 +61,8 @@ input dp_vfdsu_fdiv_gateclk_issue; + input dp_vfdsu_idu_fdiv_issue; + input ex1_double; + input ex1_single; ++input ex1_half; ++input ex1_bfloat; + input forever_cpuclk; + input pad_yy_icg_scan_en; + input rtu_yy_xx_flush; +@@ -64,6 +70,8 @@ input srt_ctrl_rem_zero; + input srt_ctrl_skip_srt; + input vfdsu_ex2_double; + input vfdsu_ex2_single; ++input vfdsu_ex2_half; ++input vfdsu_ex2_bfloat; + output ex1_data_clk; + output ex1_pipedown; + output ex2_data_clk; +@@ -106,6 +114,8 @@ wire ex1_data_clk_en; + wire ex1_double; + wire ex1_pipedown; + wire ex1_single; ++wire ex1_half; ++wire ex1_bfloat; + wire ex2_data_clk; + wire ex2_data_clk_en; + wire ex2_pipe_clk; +@@ -137,6 +147,8 @@ wire vfdsu_dp_fdiv_busy; + wire vfdsu_dp_inst_wb_req; + wire vfdsu_ex2_double; + wire vfdsu_ex2_single; ++wire vfdsu_ex2_half; ++wire vfdsu_ex2_bfloat; + wire vfdsu_ex2_vld; + wire vfdsu_ifu_debug_ex2_wait; + wire vfdsu_ifu_debug_idle; +@@ -244,8 +256,9 @@ end + //For Double, initial is 5'b11100('d28), calculate 29 round + //For Single, initial is 5'b01110('d14), calculate 15 round + assign srt_cnt_ini[4:0] = (ex1_double) ? 5'b01101 : +- ex1_single ? 5'b00110 +- : 5'b00011; ++ (ex1_single) ? 5'b00110 : ++ (ex1_half) ? 5'b00011 ++ : 5'b00010; + + //vfdsu ex2 pipedown signal + assign ex2_pipedown = srt_last_round && div_st_ex2; +@@ -277,7 +290,9 @@ assign srt_secd_round = ex2_srt_secd_round; + + assign ex2_srt_secd_round_pre = srt_sm_on && srt_secd_round_pre; + assign srt_secd_round_pre = vfdsu_ex2_double ? srt_cnt[4:0]==5'b01101 : +- vfdsu_ex2_single ? srt_cnt[4:0]==5'b00110 : srt_cnt[4:0] == 5'b00011; ++ vfdsu_ex2_single ? srt_cnt[4:0]==5'b00110 : ++ vfdsu_ex2_half ? srt_cnt[4:0]==5'b00011 ++ : srt_cnt[4:0]==5'b00010; + + //========================================================== + // EX3 Stage Control Signal +diff --git a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v +index b57e289..ccd34f9 100644 +--- a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v ++++ b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v +@@ -24,6 +24,8 @@ module ct_vfdsu_double( + ex1_pipedown, + ex1_scalar, + ex1_single, ++ ex1_half, ++ ex1_bfloat, + ex1_sqrt, + ex1_src0, + ex1_src1, +@@ -52,6 +54,8 @@ input ex1_double; + input ex1_pipedown; + input ex1_scalar; + input ex1_single; ++input ex1_half; ++input ex1_bfloat; + input ex1_sqrt; + input [63:0] ex1_src0; + input [63:0] ex1_src1; +@@ -83,6 +87,8 @@ wire ex1_pipedown; + wire [59:0] ex1_remainder; + wire ex1_scalar; + wire ex1_single; ++wire ex1_half; ++wire ex1_bfloat; + wire ex1_sqrt; + wire [63:0] ex1_src0; + wire [63:0] ex1_src1; +@@ -116,12 +122,15 @@ wire vfdsu_ex2_result_sign; + wire vfdsu_ex2_result_zero; + wire [2 :0] vfdsu_ex2_rm; + wire vfdsu_ex2_single; ++wire vfdsu_ex2_half; ++wire vfdsu_ex2_bfloat; + wire vfdsu_ex2_sqrt; + wire vfdsu_ex2_srt_skip; + wire [12:0] vfdsu_ex3_doub_expnt_rst; + wire vfdsu_ex3_double; + wire vfdsu_ex3_dz; + wire [12:0] vfdsu_ex3_half_expnt_rst; ++wire [12:0] vfdsu_ex3_bfloat_expnt_rst; + wire vfdsu_ex3_id_srt_skip; + wire vfdsu_ex3_nv; + wire vfdsu_ex3_of; +@@ -141,6 +150,8 @@ wire [2 :0] vfdsu_ex3_rm; + wire vfdsu_ex3_rslt_denorm; + wire [8 :0] vfdsu_ex3_sing_expnt_rst; + wire vfdsu_ex3_single; ++wire vfdsu_ex3_half; ++wire vfdsu_ex3_bfloat; + wire vfdsu_ex3_uf; + wire vfdsu_ex4_denorm_to_tiny_frac; + wire vfdsu_ex4_double; +@@ -164,6 +175,8 @@ wire vfdsu_ex4_result_sign; + wire vfdsu_ex4_result_zero; + wire vfdsu_ex4_rslt_denorm; + wire vfdsu_ex4_single; ++wire vfdsu_ex4_half; ++wire vfdsu_ex4_bfloat; + wire vfdsu_ex4_uf; + wire vfpu_yy_xx_dqnan; + wire [2 :0] vfpu_yy_xx_rm; +@@ -181,6 +194,8 @@ ct_vfdsu_prepare x_ct_vfdsu_prepare ( + .ex1_remainder (ex1_remainder ), + .ex1_scalar (ex1_scalar ), + .ex1_single (ex1_single ), ++ .ex1_half (ex1_half ), ++ .ex1_bfloat (ex1_bfloat ), + .ex1_sqrt (ex1_sqrt ), + .ex1_src0 (ex1_src0 ), + .ex1_src1 (ex1_src1 ), +@@ -204,6 +219,8 @@ ct_vfdsu_prepare x_ct_vfdsu_prepare ( + .vfdsu_ex2_result_zero (vfdsu_ex2_result_zero), + .vfdsu_ex2_rm (vfdsu_ex2_rm ), + .vfdsu_ex2_single (vfdsu_ex2_single ), ++ .vfdsu_ex2_half (vfdsu_ex2_half ), ++ .vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ), + .vfdsu_ex2_sqrt (vfdsu_ex2_sqrt ), + .vfdsu_ex2_srt_skip (vfdsu_ex2_srt_skip ), + .vfpu_yy_xx_dqnan (vfpu_yy_xx_dqnan ), +@@ -246,12 +263,15 @@ ct_vfdsu_srt x_ct_vfdsu_srt ( + .vfdsu_ex2_result_zero (vfdsu_ex2_result_zero ), + .vfdsu_ex2_rm (vfdsu_ex2_rm ), + .vfdsu_ex2_single (vfdsu_ex2_single ), ++ .vfdsu_ex2_half (vfdsu_ex2_half ), ++ .vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ), + .vfdsu_ex2_sqrt (vfdsu_ex2_sqrt ), + .vfdsu_ex2_srt_skip (vfdsu_ex2_srt_skip ), + .vfdsu_ex3_doub_expnt_rst (vfdsu_ex3_doub_expnt_rst ), + .vfdsu_ex3_double (vfdsu_ex3_double ), + .vfdsu_ex3_dz (vfdsu_ex3_dz ), + .vfdsu_ex3_half_expnt_rst (vfdsu_ex3_half_expnt_rst ), ++ .vfdsu_ex3_bfloat_expnt_rst (vfdsu_ex3_bfloat_expnt_rst ), + .vfdsu_ex3_id_srt_skip (vfdsu_ex3_id_srt_skip ), + .vfdsu_ex3_nv (vfdsu_ex3_nv ), + .vfdsu_ex3_of (vfdsu_ex3_of ), +@@ -271,6 +291,8 @@ ct_vfdsu_srt x_ct_vfdsu_srt ( + .vfdsu_ex3_rslt_denorm (vfdsu_ex3_rslt_denorm ), + .vfdsu_ex3_sing_expnt_rst (vfdsu_ex3_sing_expnt_rst ), + .vfdsu_ex3_single (vfdsu_ex3_single ), ++ .vfdsu_ex3_half (vfdsu_ex3_half ), ++ .vfdsu_ex3_bfloat (vfdsu_ex3_bfloat ), + .vfdsu_ex3_uf (vfdsu_ex3_uf ) + ); + +@@ -288,6 +310,7 @@ ct_vfdsu_round x_ct_vfdsu_round ( + .vfdsu_ex3_double (vfdsu_ex3_double ), + .vfdsu_ex3_dz (vfdsu_ex3_dz ), + .vfdsu_ex3_half_expnt_rst (vfdsu_ex3_half_expnt_rst ), ++ .vfdsu_ex3_bfloat_expnt_rst (vfdsu_ex3_bfloat_expnt_rst ), + .vfdsu_ex3_id_srt_skip (vfdsu_ex3_id_srt_skip ), + .vfdsu_ex3_nv (vfdsu_ex3_nv ), + .vfdsu_ex3_of (vfdsu_ex3_of ), +@@ -307,6 +330,8 @@ ct_vfdsu_round x_ct_vfdsu_round ( + .vfdsu_ex3_rslt_denorm (vfdsu_ex3_rslt_denorm ), + .vfdsu_ex3_sing_expnt_rst (vfdsu_ex3_sing_expnt_rst ), + .vfdsu_ex3_single (vfdsu_ex3_single ), ++ .vfdsu_ex3_half (vfdsu_ex3_half ), ++ .vfdsu_ex3_bfloat (vfdsu_ex3_bfloat ), + .vfdsu_ex3_uf (vfdsu_ex3_uf ), + .vfdsu_ex4_denorm_to_tiny_frac (vfdsu_ex4_denorm_to_tiny_frac ), + .vfdsu_ex4_double (vfdsu_ex4_double ), +@@ -330,6 +355,8 @@ ct_vfdsu_round x_ct_vfdsu_round ( + .vfdsu_ex4_result_zero (vfdsu_ex4_result_zero ), + .vfdsu_ex4_rslt_denorm (vfdsu_ex4_rslt_denorm ), + .vfdsu_ex4_single (vfdsu_ex4_single ), ++ .vfdsu_ex4_half (vfdsu_ex4_half ), ++ .vfdsu_ex4_bfloat (vfdsu_ex4_bfloat ), + .vfdsu_ex4_uf (vfdsu_ex4_uf ) + ); + +@@ -359,6 +386,8 @@ ct_vfdsu_pack x_ct_vfdsu_pack ( + .vfdsu_ex4_result_zero (vfdsu_ex4_result_zero ), + .vfdsu_ex4_rslt_denorm (vfdsu_ex4_rslt_denorm ), + .vfdsu_ex4_single (vfdsu_ex4_single ), ++ .vfdsu_ex4_half (vfdsu_ex4_half ), ++ .vfdsu_ex4_bfloat (vfdsu_ex4_bfloat ), + .vfdsu_ex4_uf (vfdsu_ex4_uf ) + ); + +diff --git a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v +index e1d2e18..b29c70f 100644 +--- a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v ++++ b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_pack.v +@@ -39,6 +39,8 @@ module ct_vfdsu_pack( + vfdsu_ex4_result_zero, + vfdsu_ex4_rslt_denorm, + vfdsu_ex4_single, ++ vfdsu_ex4_half, ++ vfdsu_ex4_bfloat, + vfdsu_ex4_uf + ); + +@@ -65,6 +67,8 @@ input vfdsu_ex4_result_sign; + input vfdsu_ex4_result_zero; + input vfdsu_ex4_rslt_denorm; + input vfdsu_ex4_single; ++input vfdsu_ex4_half; ++input vfdsu_ex4_bfloat; + input vfdsu_ex4_uf; + output [4 :0] ex4_out_expt; + output [63:0] ex4_out_result; +@@ -73,6 +77,7 @@ output [63:0] ex4_out_result; + reg [51:0] ex4_denorm_frac; + reg [51:0] ex4_frac_52; + reg [51:0] ex4_half_denorm_frac; ++reg [51:0] ex4_bfloat_denorm_frac; + reg [63:0] ex4_out_result; + reg [51:0] ex4_single_denorm_frac; + reg [12:0] expnt_add_op1; +@@ -95,6 +100,11 @@ wire [63:0] ex4_half_rst0; + wire [63:0] ex4_half_rst_inf; + wire [63:0] ex4_half_rst_norm; + wire [63:0] ex4_half_rst_qnan; ++wire [63:0] ex4_bfloat_lfn; ++wire [63:0] ex4_bfloat_rst0; ++wire [63:0] ex4_bfloat_rst_inf; ++wire [63:0] ex4_bfloat_rst_norm; ++wire [63:0] ex4_bfloat_rst_qnan; + wire ex4_of_plus; + wire [4 :0] ex4_out_expt; + wire ex4_result_inf; +@@ -134,6 +144,8 @@ wire vfdsu_ex4_result_sign; + wire vfdsu_ex4_result_zero; + wire vfdsu_ex4_rslt_denorm; + wire vfdsu_ex4_single; ++wire vfdsu_ex4_half; ++wire vfdsu_ex4_bfloat; + wire vfdsu_ex4_uf; + + +@@ -276,6 +288,24 @@ case(vfdsu_ex4_expnt_rst[12:0]) + endcase + // &CombEnd; @147 + end ++// &CombBeg; @132 ++always @( vfdsu_ex4_expnt_rst[12:0] ++ or ex4_frac[54:1] ++ or vfdsu_ex4_denorm_to_tiny_frac) ++begin ++case(vfdsu_ex4_expnt_rst[12:0]) ++ 13'h1: ex4_bfloat_denorm_frac[51:0] = { ex4_frac[52:1]}; //-1022 1 ++ 13'h0: ex4_bfloat_denorm_frac[51:0] = { ex4_frac[53:2]}; //-1023 0 ++ 13'h1fff:ex4_bfloat_denorm_frac[51:0] = { ex4_frac[54:3]}; //-1024 -1 ++ 13'h1ffe:ex4_bfloat_denorm_frac[51:0] = {1'b0, ex4_frac[54:4]}; //-1025 -2 ++ 13'h1ffd:ex4_bfloat_denorm_frac[51:0] = {2'b0, ex4_frac[54:5]}; //-1026 -3 ++ 13'h1ffc:ex4_bfloat_denorm_frac[51:0] = {3'b0, ex4_frac[54:6]}; //-1027 -4 ++ 13'h1ffb:ex4_bfloat_denorm_frac[51:0] = {4'b0, ex4_frac[54:7]}; //-1028 -5 ++ 13'h1ffa:ex4_bfloat_denorm_frac[51:0] = {5'b0, ex4_frac[54:8]}; //-1029 -6 ++ default :ex4_bfloat_denorm_frac[51:0] = vfdsu_ex4_denorm_to_tiny_frac ?{7'b1,45'b0} : 52'b0; //-1045 ++endcase ++// &CombEnd; @147 ++end + + //here when denormal number round to add1, it will become normal number + assign ex4_denorm_potnt_norm = (vfdsu_ex4_potnt_norm[1] && ex4_frac[53]) || +@@ -286,9 +316,11 @@ assign ex4_rslt_denorm = !vfdsu_ex4_result_qnan + assign ex4_denorm_result[63:0] = vfdsu_ex4_double ? + {vfdsu_ex4_result_sign,11'h0,ex4_denorm_frac[51:0]} : + vfdsu_ex4_single ? {32'hffffffff,vfdsu_ex4_result_sign, +- 8'h0,ex4_single_denorm_frac[51:29]} : { +- 48'hffffffffffff,vfdsu_ex4_result_sign,5'h0, +- ex4_half_denorm_frac[51:42]}; ++ 8'h0,ex4_single_denorm_frac[51:29]} : ++ vfdsu_ex4_half ? {48'hffffffffffff,vfdsu_ex4_result_sign,5'h0, ++ ex4_half_denorm_frac[51:42]} ++ : {48'hffffffffffff,vfdsu_ex4_result_sign,8'h0, ++ ex4_bfloat_denorm_frac[51:45]}; + + + +@@ -299,6 +331,15 @@ assign ex4_half_rst_norm[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign, + ex4_expnt_rst[4:0], + ex4_frac_52[51:42]}; + assign ex4_half_rst0[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,15'h0}; ++ ++assign ex4_bfloat_lfn[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,8'hfe,{7{1'b1}}}; ++assign ex4_bfloat_rst_qnan[63:0] = {48'hffffffffffff,vfdsu_ex4_qnan_sign, 8'hff,1'b1, vfdsu_ex4_qnan_f[5:0]}; ++assign ex4_bfloat_rst_inf[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,8'hff,7'b0}; ++assign ex4_bfloat_rst_norm[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign, ++ ex4_expnt_rst[7:0], ++ ex4_frac_52[51:45]}; ++assign ex4_bfloat_rst0[63:0] = {48'hffffffffffff,vfdsu_ex4_result_sign,15'h0}; ++ + //ex4 overflow/underflow plus + assign ex4_rst_nor = vfdsu_ex4_result_nor; + assign ex4_of_plus = vfdsu_ex4_potnt_of && +@@ -345,21 +386,23 @@ assign ex4_sing_rst_norm[63:0] = {32'hffffffff,vfdsu_ex4_result_sign, + ex4_expnt_rst[7:0], + ex4_frac_52[51:29]}; + assign ex4_rst_lfn[63:0] = (vfdsu_ex4_double) ? ex4_doub_lfn[63:0] : +- vfdsu_ex4_single ? ex4_sing_lfn[63:0] : ex4_half_lfn[63:0]; ++ vfdsu_ex4_single ? ex4_sing_lfn[63:0] : ++ vfdsu_ex4_half ? ex4_half_lfn[63:0] : ex4_bfloat_lfn[63:0]; + + assign ex4_rst0[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst0[63:0] : +- vfdsu_ex4_single ? ex4_sing_rst0[63:0] : ex4_half_rst0[63:0]; ++ vfdsu_ex4_single ? ex4_sing_rst0[63:0] : ++ vfdsu_ex4_half ? ex4_half_rst0[63:0] : ex4_bfloat_rst0[63:0]; + + assign ex4_rst_qnan[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst_qnan[63:0] : +- vfdsu_ex4_single ? ex4_sing_rst_qnan[63:0] +- : ex4_half_rst_qnan[63:0]; ++ vfdsu_ex4_single ? ex4_sing_rst_qnan[63:0] : ++ vfdsu_ex4_half ? ex4_half_rst_qnan[63:0] : ex4_bfloat_rst_qnan[63:0]; + + assign ex4_rst_norm[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst_norm[63:0] : +- vfdsu_ex4_single ? ex4_sing_rst_norm[63:0] +- : ex4_half_rst_norm[63:0]; ++ vfdsu_ex4_single ? ex4_sing_rst_norm[63:0] : ++ vfdsu_ex4_half ? ex4_half_rst_norm[63:0] : ex4_bfloat_rst_norm[63:0]; + assign ex4_rst_inf[63:0] = (vfdsu_ex4_double) ? ex4_doub_rst_inf[63:0] : +- vfdsu_ex4_single ? ex4_sing_rst_inf[63:0] +- : ex4_half_rst_inf[63:0]; ++ vfdsu_ex4_single ? ex4_sing_rst_inf[63:0] : ++ vfdsu_ex4_half ? ex4_half_rst_inf[63:0] : ex4_bfloat_rst_inf[63:0]; + + + assign ex4_cor_uf = (vfdsu_ex4_uf && !ex4_denorm_potnt_norm || ex4_uf_plus) +diff --git a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v +index 7c5821c..0ef958a 100644 +--- a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v ++++ b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_prepare.v +@@ -25,6 +25,8 @@ module ct_vfdsu_prepare( + ex1_remainder, + ex1_scalar, + ex1_single, ++ ex1_half, ++ ex1_bfloat, + ex1_sqrt, + ex1_src0, + ex1_src1, +@@ -48,6 +50,8 @@ module ct_vfdsu_prepare( + vfdsu_ex2_result_zero, + vfdsu_ex2_rm, + vfdsu_ex2_single, ++ vfdsu_ex2_half, ++ vfdsu_ex2_bfloat, + vfdsu_ex2_sqrt, + vfdsu_ex2_srt_skip, + vfpu_yy_xx_dqnan, +@@ -63,6 +67,8 @@ input ex1_double; + input ex1_pipedown; + input ex1_scalar; + input ex1_single; ++input ex1_half; ++input ex1_bfloat; + input ex1_sqrt; + input [63:0] ex1_src0; + input [63:0] ex1_src1; +@@ -90,6 +96,8 @@ output vfdsu_ex2_result_sign; + output vfdsu_ex2_result_zero; + output [2 :0] vfdsu_ex2_rm; + output vfdsu_ex2_single; ++output vfdsu_ex2_half; ++output vfdsu_ex2_bfloat; + output vfdsu_ex2_sqrt; + output vfdsu_ex2_srt_skip; + +@@ -115,6 +123,8 @@ reg vfdsu_ex2_result_sign; + reg vfdsu_ex2_result_zero; + reg [2 :0] vfdsu_ex2_rm; + reg vfdsu_ex2_single; ++reg vfdsu_ex2_half; ++reg vfdsu_ex2_bfloat; + reg vfdsu_ex2_sqrt; + reg vfdsu_ex2_srt_skip; + +@@ -161,6 +171,12 @@ wire ex1_half_expnt1_max; + wire ex1_half_expnt1_zero; + wire ex1_half_frac0_all0; + wire ex1_half_frac1_all0; ++wire ex1_bfloat_expnt0_max; ++wire ex1_bfloat_expnt1_max; ++wire ex1_bfloat_expnt0_zero; ++wire ex1_bfloat_expnt1_zero; ++wire ex1_bfloat_frac0_all0; ++wire ex1_bfloat_frac1_all0; + wire ex1_nv; + wire ex1_op0_cnan; + wire [51:0] ex1_op0_f; +@@ -216,6 +232,8 @@ wire ex1_sing_expnt1_zero; + wire ex1_sing_frac0_all0; + wire ex1_sing_frac1_all0; + wire ex1_single; ++wire ex1_half; ++wire ex1_bfloat; + wire ex1_sqrt; + wire ex1_sqrt_expnt_odd; + wire ex1_sqrt_expnt_result_odd; +@@ -246,9 +264,11 @@ assign ex1_oper1[63:0] = ex1_src1[63:0]; + + //Sign bit prepare + assign ex1_op0_sign = ex1_double ? ex1_oper0[63] : +- ex1_single ? ex1_oper0[31] : ex1_oper0[15]; ++ ex1_single ? ex1_oper0[31] : ++ ex1_half ? ex1_oper0[15] : ex1_oper0[15]; + assign ex1_op1_sign = ex1_double ? ex1_oper1[63] : +- ex1_single ? ex1_oper1[31] : ex1_oper1[15]; ++ ex1_single ? ex1_oper1[31] : ++ ex1_half ? ex1_oper1[15] : ex1_oper1[15]; + assign div_sign = ex1_op0_sign ^ ex1_op1_sign; + assign sqrt_sign = ex1_op0_sign; + assign ex1_result_sign = (ex1_div) +@@ -261,10 +281,14 @@ assign ex1_doub_expnt1_max = &ex1_oper1[62:52]; + assign ex1_sing_expnt1_max = &ex1_oper1[30:23]; + assign ex1_half_expnt0_max = &ex1_oper0[14:10]; + assign ex1_half_expnt1_max = &ex1_oper1[14:10]; ++assign ex1_bfloat_expnt0_max = &ex1_oper0[14:7]; ++assign ex1_bfloat_expnt1_max = &ex1_oper1[14:7]; + assign ex1_expnt0_max = ex1_double ? ex1_doub_expnt0_max : +- ex1_single ? ex1_sing_expnt0_max : ex1_half_expnt0_max; ++ ex1_single ? ex1_sing_expnt0_max : ++ ex1_half ? ex1_half_expnt0_max : ex1_bfloat_expnt0_max; + assign ex1_expnt1_max = ex1_double ? ex1_doub_expnt1_max : +- ex1_single ? ex1_sing_expnt1_max : ex1_half_expnt1_max; ++ ex1_single ? ex1_sing_expnt1_max : ++ ex1_half ? ex1_half_expnt1_max : ex1_bfloat_expnt1_max; + + //exponent zero + assign ex1_doub_expnt0_zero = ~|ex1_oper0[62:52]; +@@ -273,10 +297,15 @@ assign ex1_doub_expnt1_zero = ~|ex1_oper1[62:52]; + assign ex1_sing_expnt1_zero = ~|ex1_oper1[30:23]; + assign ex1_half_expnt0_zero = ~|ex1_oper0[14:10]; + assign ex1_half_expnt1_zero = ~|ex1_oper1[14:10]; ++assign ex1_bfloat_expnt0_zero = ~|ex1_oper0[14:7]; ++assign ex1_bfloat_expnt1_zero = ~|ex1_oper1[14:7]; + assign ex1_expnt0_zero = ex1_double ? ex1_doub_expnt0_zero : +- ex1_single ? ex1_sing_expnt0_zero : ex1_half_expnt0_zero; ++ ex1_single ? ex1_sing_expnt0_zero : ++ ex1_half ? ex1_half_expnt0_zero : ex1_bfloat_expnt0_zero; + assign ex1_expnt1_zero = ex1_double ? ex1_doub_expnt1_zero : +- ex1_single ? ex1_sing_expnt1_zero : ex1_half_expnt1_zero; ++ ex1_single ? ex1_sing_expnt1_zero : ++ ex1_half ? ex1_half_expnt1_zero : ex1_bfloat_expnt1_zero; ++ + //fraction zero + assign ex1_doub_frac0_all0 = ~|ex1_oper0[51:0]; + assign ex1_sing_frac0_all0 = ~|ex1_oper0[22:0]; +@@ -284,14 +313,20 @@ assign ex1_doub_frac1_all0 = ~|ex1_oper1[51:0]; + assign ex1_sing_frac1_all0 = ~|ex1_oper1[22:0]; + assign ex1_half_frac0_all0 = ~|ex1_oper0[9:0]; + assign ex1_half_frac1_all0 = ~|ex1_oper1[9:0]; ++assign ex1_bfloat_frac0_all0 = ~|ex1_oper0[6:0]; ++assign ex1_bfloat_frac1_all0 = ~|ex1_oper1[6:0]; + assign ex1_frac0_all0 = ex1_double ? ex1_doub_frac0_all0 : +- ex1_single ? ex1_sing_frac0_all0 : ex1_half_frac0_all0; ++ ex1_single ? ex1_sing_frac0_all0 : ++ ex1_half ? ex1_half_frac0_all0 : ex1_bfloat_frac0_all0; + assign ex1_frac1_all0 = ex1_double ? ex1_doub_frac1_all0 : +- ex1_single ? ex1_sing_frac1_all0 : ex1_half_frac1_all0; ++ ex1_single ? ex1_sing_frac1_all0 : ++ ex1_half ? ex1_half_frac1_all0 : ex1_bfloat_frac1_all0; + assign ex1_frac0_msb = ex1_double ? ex1_oper0[51] : +- ex1_single ? ex1_oper0[22] : ex1_oper0[9]; ++ ex1_single ? ex1_oper0[22] : ++ ex1_half ? ex1_oper0[9] : ex1_oper0[6]; + assign ex1_frac1_msb = ex1_double ? ex1_oper1[51] : +- ex1_single ? ex1_oper1[22] : ex1_oper1[9]; ++ ex1_single ? ex1_oper1[22] : ++ ex1_half ? ex1_oper1[9] : ex1_oper1[6]; + assign ex1_oper0_high_all1 = ex1_single ? &ex1_oper0[63:32] : &ex1_oper0[63:16]; + assign ex1_oper1_high_all1 = ex1_single ? &ex1_oper1[63:32] : &ex1_oper1[63:16]; + +@@ -382,25 +417,30 @@ ct_vfdsu_ff1 x_frac1_expnt ( + // &Connect(.frac_bin_val(ex1_oper1_id_expnt[12:0])); @157 + // &Connect(.fanc_shift_num(ex1_oper1_id_frac[51:0])); @158 + assign ex1_oper0_frac[51:0] = ex1_double ? ex1_oper0[51:0] : +- ex1_single ? {ex1_oper0[22:0],29'b0} +- : {ex1_oper0[9:0],42'b0}; ++ ex1_single ? {ex1_oper0[22:0],29'b0} : ++ ex1_half ? {ex1_oper0[9:0],42'b0} ++ : {ex1_oper0[6:0],45'b0}; + assign ex1_oper1_frac[51:0] = ex1_double ? ex1_oper1[51:0] : +- ex1_single ? {ex1_oper1[22:0],29'b0} +- : {ex1_oper1[9:0],42'b0}; ++ ex1_single ? {ex1_oper1[22:0],29'b0} : ++ ex1_half ? {ex1_oper1[9:0],42'b0} ++ : {ex1_oper1[6:0],45'b0}; + //=====================exponent add========================= + //exponent number 0 + assign ex1_div_op0_expnt[12:0] = ex1_double ? {2'b0,ex1_oper0[62:52]} : +- ex1_single ? {5'b0,ex1_oper0[30:23]} +- : {8'b0,ex1_oper0[14:10]}; ++ ex1_single ? {5'b0,ex1_oper0[30:23]} : ++ ex1_half ? {8'b0,ex1_oper0[14:10]} ++ : {5'b0,ex1_oper0[14:7]}; + assign ex1_expnt_adder_op0[12:0] = ex1_op0_id_nor ? ex1_oper0_id_expnt[12:0] + : ex1_div_op0_expnt[12:0]; + //exponent number 1 + assign ex1_div_op1_expnt[12:0] = ex1_double ? {2'b0,ex1_oper1[62:52]} : +- ex1_single ? {5'b0,ex1_oper1[30:23]} +- : {8'b0,ex1_oper1[14:10]}; ++ ex1_single ? {5'b0,ex1_oper1[30:23]} : ++ ex1_half ? {8'b0,ex1_oper1[14:10]} ++ : {5'b0,ex1_oper1[14:7]}; + assign ex1_sqrt_op1_expnt[12:0] = ex1_double ? {3'b0,{10{1'b1}}} : //'d1023 +- ex1_single ? {6'b0,{7{1'b1}}} //'d127 +- : {9'b0,{4{1'b1}}}; //'d15 ++ ex1_single ? {6'b0,{7{1'b1}}} ://'d127 ++ ex1_half ? {9'b0,{4{1'b1}}} //'d15 ++ : {6'b0,{7{1'b1}}}; //'d127 + + // &CombBeg; @180 + always @( ex1_oper1_id_expnt[12:0] +@@ -569,11 +609,13 @@ assign ex1_div_srt_op0[52:0] = ex1_div_nor_srt_op0[52:0]; + assign ex1_div_srt_op1[52:0] = ex1_div_nor_srt_op1[52:0]; + //ex1_div_nor_srt_op0 + assign ex1_div_noid_nor_srt_op0[52:0] = ex1_double ? {1'b1,ex1_oper0[51:0]} : +- ex1_single ? {1'b1,ex1_oper0[22:0],29'b0} +- : {1'b1,ex1_oper0[9:0],42'b0}; ++ ex1_single ? {1'b1,ex1_oper0[22:0],29'b0} : ++ ex1_half ? {1'b1,ex1_oper0[9:0],42'b0} ++ : {1'b1,ex1_oper0[6:0],45'b0}; + assign ex1_div_noid_nor_srt_op1[52:0] = ex1_double ? {1'b1,ex1_oper1[51:0]} : +- ex1_single ? {1'b1,ex1_oper1[22:0],29'b0} +- : {1'b1,ex1_oper1[9:0],42'b0}; ++ ex1_single ? {1'b1,ex1_oper1[22:0],29'b0} : ++ ex1_half ? {1'b1,ex1_oper1[9:0],42'b0} ++ : {1'b1,ex1_oper1[6:0],45'b0}; + assign ex1_div_nor_srt_op0[52:0] = ex1_op0_id_nor ? {ex1_oper0_id_frac[51:0],1'b0} + : ex1_div_noid_nor_srt_op0[52:0]; + //ex1_div_nor_srt_op1 +@@ -699,6 +741,8 @@ begin + vfdsu_ex2_sqrt <= 1'b0; + vfdsu_ex2_double <= 1'b0; + vfdsu_ex2_single <= 1'b0; ++ vfdsu_ex2_half <= 1'b0; ++ vfdsu_ex2_bfloat <= 1'b0; + end + else if(ex1_pipedown) + begin +@@ -721,6 +765,8 @@ begin + vfdsu_ex2_sqrt <= ex1_sqrt; + vfdsu_ex2_double <= ex1_double; + vfdsu_ex2_single <= ex1_single; ++ vfdsu_ex2_half <= ex1_half; ++ vfdsu_ex2_bfloat <= ex1_bfloat; + end + else + begin +@@ -743,6 +789,8 @@ begin + vfdsu_ex2_sqrt <= vfdsu_ex2_sqrt; + vfdsu_ex2_double <= vfdsu_ex2_double; + vfdsu_ex2_single <= vfdsu_ex2_single; ++ vfdsu_ex2_half <= vfdsu_ex2_half; ++ vfdsu_ex2_bfloat <= vfdsu_ex2_bfloat; + end + end + +diff --git a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v +index 6eece52..a419289 100644 +--- a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v ++++ b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v +@@ -27,6 +27,7 @@ module ct_vfdsu_round( + vfdsu_ex3_double, + vfdsu_ex3_dz, + vfdsu_ex3_half_expnt_rst, ++ vfdsu_ex3_bfloat_expnt_rst, + vfdsu_ex3_id_srt_skip, + vfdsu_ex3_nv, + vfdsu_ex3_of, +@@ -46,6 +47,8 @@ module ct_vfdsu_round( + vfdsu_ex3_rslt_denorm, + vfdsu_ex3_sing_expnt_rst, + vfdsu_ex3_single, ++ vfdsu_ex3_half, ++ vfdsu_ex3_bfloat, + vfdsu_ex3_uf, + vfdsu_ex4_denorm_to_tiny_frac, + vfdsu_ex4_double, +@@ -69,6 +72,8 @@ module ct_vfdsu_round( + vfdsu_ex4_result_zero, + vfdsu_ex4_rslt_denorm, + vfdsu_ex4_single, ++ vfdsu_ex4_half, ++ vfdsu_ex4_bfloat, + vfdsu_ex4_uf + ); + +@@ -85,6 +90,7 @@ input [12:0] vfdsu_ex3_doub_expnt_rst; + input vfdsu_ex3_double; + input vfdsu_ex3_dz; + input [12:0] vfdsu_ex3_half_expnt_rst; ++input [12:0] vfdsu_ex3_bfloat_expnt_rst; + input vfdsu_ex3_id_srt_skip; + input vfdsu_ex3_nv; + input vfdsu_ex3_of; +@@ -104,6 +110,8 @@ input [2 :0] vfdsu_ex3_rm; + input vfdsu_ex3_rslt_denorm; + input [8 :0] vfdsu_ex3_sing_expnt_rst; + input vfdsu_ex3_single; ++input vfdsu_ex3_half; ++input vfdsu_ex3_bfloat; + input vfdsu_ex3_uf; + output vfdsu_ex4_denorm_to_tiny_frac; + output vfdsu_ex4_double; +@@ -127,6 +135,8 @@ output vfdsu_ex4_result_sign; + output vfdsu_ex4_result_zero; + output vfdsu_ex4_rslt_denorm; + output vfdsu_ex4_single; ++output vfdsu_ex4_half; ++output vfdsu_ex4_bfloat; + output vfdsu_ex4_uf; + + // &Regs; @24 +@@ -138,8 +148,10 @@ reg frac_orig; + reg [54:0] frac_sub1_op1; + reg frac_sub_1; + reg half_denorm_lst_frac; ++reg bfloat_denorm_lst_frac; + reg [56:0] qt_result_double_denorm_for_round; + reg [13:0] qt_result_half_denorm_for_round; ++reg [10:0] qt_result_bfloat_denorm_for_round; + reg [27:0] qt_result_single_denorm_for_round; + reg single_denorm_lst_frac; + reg vfdsu_ex4_denorm_to_tiny_frac; +@@ -164,6 +176,8 @@ reg vfdsu_ex4_result_sign; + reg vfdsu_ex4_result_zero; + reg vfdsu_ex4_rslt_denorm; + reg vfdsu_ex4_single; ++reg vfdsu_ex4_half; ++reg vfdsu_ex4_bfloat; + reg vfdsu_ex4_uf; + + // &Wires; @25 +@@ -199,6 +213,16 @@ wire ex3_half_gr; + wire ex3_half_low_not_zero; + wire ex3_half_rst_eq_1; + wire ex3_half_zero; ++wire ex3_bfloat_denorm_eq; ++wire ex3_bfloat_denorm_gr; ++wire ex3_bfloat_denorm_plus; ++wire ex3_bfloat_denorm_potnt_norm; ++wire ex3_bfloat_denorm_zero; ++wire ex3_bfloat_eq; ++wire ex3_bfloat_gr; ++wire ex3_bfloat_low_not_zero; ++wire ex3_bfloat_rst_eq_1; ++wire ex3_bfloat_zero; + wire ex3_nx; + wire ex3_pipe_clk; + wire ex3_pipe_clk_en; +@@ -210,6 +234,8 @@ wire ex3_qt_eq; + wire ex3_qt_gr; + wire ex3_qt_half_lo2_not0; + wire ex3_qt_half_lo3_not0; ++wire ex3_qt_bfloat_lo2_not0; ++wire ex3_qt_bfloat_lo3_not0; + wire ex3_qt_sing_lo3_not0; + wire ex3_qt_sing_lo4_not0; + wire ex3_qt_zero; +@@ -254,6 +280,7 @@ wire vfdsu_ex3_double; + wire vfdsu_ex3_dz; + wire [12:0] vfdsu_ex3_expnt_rst; + wire [12:0] vfdsu_ex3_half_expnt_rst; ++wire [12:0] vfdsu_ex3_bfloat_expnt_rst; + wire vfdsu_ex3_id_srt_skip; + wire vfdsu_ex3_nv; + wire vfdsu_ex3_of; +@@ -273,6 +300,8 @@ wire [2 :0] vfdsu_ex3_rm; + wire vfdsu_ex3_rslt_denorm; + wire [8 :0] vfdsu_ex3_sing_expnt_rst; + wire vfdsu_ex3_single; ++wire vfdsu_ex3_half; ++wire vfdsu_ex3_bfloat; + wire vfdsu_ex3_uf; + + +@@ -302,6 +331,22 @@ assign ex3_half_zero = (total_qt_rt_58[56]) + assign ex3_half_rst_eq_1 = total_qt_rt_58[56] && ~|total_qt_rt_58[55:46]; + assign ex3_half_denorm_plus = !total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1ff2); + assign ex3_half_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1ff1); ++ ++assign ex3_qt_bfloat_lo3_not0 = |total_qt_rt_58[47:45]; ++assign ex3_qt_bfloat_lo2_not0 = |total_qt_rt_58[46:45]; ++assign ex3_bfloat_gr = total_qt_rt_58[56] ++ ? total_qt_rt_58[48] && ex3_qt_bfloat_lo3_not0 ++ : total_qt_rt_58[47] && ex3_qt_bfloat_lo2_not0; ++assign ex3_bfloat_eq = (total_qt_rt_58[56]) ++ ? total_qt_rt_58[48] && !ex3_qt_sing_lo4_not0 ++ : total_qt_rt_58[47] && !ex3_qt_sing_lo3_not0; ++assign ex3_bfloat_zero = (total_qt_rt_58[56]) ++ ? ~|total_qt_rt_58[48:45] ++ : ~|total_qt_rt_58[47:45]; ++assign ex3_bfloat_rst_eq_1 = total_qt_rt_58[56] && ~|total_qt_rt_58[55:49]; ++assign ex3_bfloat_denorm_plus = !total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1f82); ++assign ex3_bfloat_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1f81); ++ + assign vfdsu_ex3_expnt_rst[12:0] = vfdsu_ex3_half_expnt_rst[12:0]; + // &Force("bus","total_qt_rt_58",57,0); @54 + assign ex3_qt_doub_lo3_not0 = |total_qt_rt_58[2:0]; +@@ -343,19 +388,24 @@ assign ex3_doub_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[1 + assign ex3_sing_denorm_potnt_norm = total_qt_rt_58[56] && (vfdsu_ex3_expnt_rst[12:0] == 13'h1f81); + assign ex3_rslt_denorm = ex3_denorm_plus || vfdsu_ex3_rslt_denorm; + assign ex3_denorm_potnt_norm = vfdsu_ex3_double ? ex3_doub_denorm_potnt_norm : +- vfdsu_ex3_single ? ex3_sing_denorm_potnt_norm +- : ex3_half_denorm_potnt_norm; ++ vfdsu_ex3_single ? ex3_sing_denorm_potnt_norm : ++ vfdsu_ex3_half ? ex3_half_denorm_potnt_norm ++ : ex3_bfloat_denorm_potnt_norm; + assign ex3_rst_eq_1 = (vfdsu_ex3_double)? ex3_doub_rst_eq_1 : +- vfdsu_ex3_single ? ex3_sing_rst_eq_1 : ex3_half_rst_eq_1; ++ vfdsu_ex3_single ? ex3_sing_rst_eq_1 : ++ vfdsu_ex3_half ? ex3_half_rst_eq_1 : ex3_bfloat_rst_eq_1; + assign ex3_qt_eq = (vfdsu_ex3_double)? ex3_doub_eq : +- vfdsu_ex3_single ? ex3_sing_eq : ex3_half_eq; ++ vfdsu_ex3_single ? ex3_sing_eq : ++ vfdsu_ex3_half ? ex3_half_eq : ex3_bfloat_eq; + assign ex3_qt_gr = (vfdsu_ex3_double)? ex3_doub_gr : +- vfdsu_ex3_single ? ex3_sing_gr : ex3_half_gr; ++ vfdsu_ex3_single ? ex3_sing_gr : ++ vfdsu_ex3_half ? ex3_half_gr : ex3_bfloat_gr; + assign ex3_qt_zero = (vfdsu_ex3_double)? ex3_doub_zero : +- vfdsu_ex3_single ? ex3_sing_zero : ex3_half_zero; ++ vfdsu_ex3_single ? ex3_sing_zero : ++ vfdsu_ex3_half ? ex3_half_zero : ex3_bfloat_zero; + assign ex3_denorm_plus = (vfdsu_ex3_double) ? ex3_doub_denorm_plus + : vfdsu_ex3_single ? ex3_sing_denorm_plus +- : ex3_half_denorm_plus; ++ : vfdsu_ex3_half ? ex3_half_denorm_plus : ex3_bfloat_denorm_plus; + + // &CombBeg; @108 + always @( vfdsu_ex3_doub_expnt_rst[12:0] +@@ -682,14 +732,64 @@ assign ex3_half_denorm_gr = qt_result_half_denorm_for_round[13] + assign ex3_half_denorm_zero = !qt_result_half_denorm_for_round[13] + && !ex3_half_low_not_zero; + ++always @( vfdsu_ex3_bfloat_expnt_rst[8:0] ++ or total_qt_rt_58[56:45]) ++begin ++case(vfdsu_ex3_bfloat_expnt_rst[8:0]) ++ 9'h182:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[48:45],7'b0}; //-126 1 ++ bfloat_denorm_lst_frac = total_qt_rt_58[49]; ++ end//-1022 1 ++ 9'h181:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[49:45],6'b0}; //-127 0 ++ bfloat_denorm_lst_frac = total_qt_rt_58[50]; ++ end//-1022 1 ++ 9'h180:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[50:45],5'b0}; //-128 -1 ++ bfloat_denorm_lst_frac = total_qt_rt_58[51]; ++ end//-1022 1 ++ 9'h17f:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[51:45],4'b0}; //-129 -2 ++ bfloat_denorm_lst_frac = total_qt_rt_58[52]; ++ end//-1022 1 ++ 9'h17e:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[52:45],3'b0}; //-90 -3 ++ bfloat_denorm_lst_frac = total_qt_rt_58[53]; ++ end//-1022 1 ++ 9'h17d:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[53:45],2'b0}; //-91 -4 ++ bfloat_denorm_lst_frac = total_qt_rt_58[54]; ++ end//-1022 1 ++ 9'h17c:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[54:45],1'b0}; //-92 -5 ++ bfloat_denorm_lst_frac = total_qt_rt_58[55]; ++ end//-1022 1 ++ 9'h17b:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[55:45]}; //-93 -6 ++ bfloat_denorm_lst_frac = total_qt_rt_58[56]; ++ end//-1022 1 ++ 9'h17a:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[56:46]}; //-93 -6 ++ bfloat_denorm_lst_frac = 1'b0; ++ end//-1022 1 ++ default: begin qt_result_bfloat_denorm_for_round[10:0] = '0; ++ bfloat_denorm_lst_frac = 1'b0; ++ end//-1022 1 ++endcase ++// &CombEnd; @363 ++end ++//rounding evaluation for single denormalize number ++assign ex3_bfloat_denorm_eq = qt_result_bfloat_denorm_for_round[10] ++ && !ex3_bfloat_low_not_zero; ++assign ex3_bfloat_low_not_zero = |qt_result_bfloat_denorm_for_round[9:0]; ++assign ex3_bfloat_denorm_gr = qt_result_bfloat_denorm_for_round[10] ++ && ex3_bfloat_low_not_zero; ++assign ex3_bfloat_denorm_zero = !qt_result_bfloat_denorm_for_round[10] ++ && !ex3_bfloat_low_not_zero; ++ + assign ex3_denorm_eq = vfdsu_ex3_double ? ex3_double_denorm_eq : +- vfdsu_ex3_single ? ex3_single_denorm_eq : ex3_half_denorm_eq; ++ vfdsu_ex3_single ? ex3_single_denorm_eq : ++ vfdsu_ex3_half ? ex3_half_denorm_eq : ex3_bfloat_denorm_eq; + assign ex3_denorm_gr = vfdsu_ex3_double ? ex3_double_denorm_gr : +- vfdsu_ex3_single ? ex3_single_denorm_gr : ex3_half_denorm_gr; ++ vfdsu_ex3_single ? ex3_single_denorm_gr : ++ vfdsu_ex3_half ? ex3_half_denorm_gr : ex3_bfloat_denorm_gr; + assign ex3_denorm_zero = vfdsu_ex3_double ? ex3_double_denorm_zero : +- vfdsu_ex3_single ? ex3_single_denorm_zero : ex3_half_denorm_zero; ++ vfdsu_ex3_single ? ex3_single_denorm_zero : ++ vfdsu_ex3_half ? ex3_half_denorm_zero : ex3_bfloat_denorm_zero; + assign ex3_denorm_lst_frac = vfdsu_ex3_double ? double_denorm_lst_frac : +- vfdsu_ex3_single ? single_denorm_lst_frac : half_denorm_lst_frac; ++ vfdsu_ex3_single ? single_denorm_lst_frac : ++ vfdsu_ex3_half ? half_denorm_lst_frac : bfloat_denorm_lst_frac; + + //Different Round Mode with different rounding rule + //Here we call rounding bit as "rb", remainder as "rem" +@@ -824,7 +924,9 @@ end + // &CombBeg; @540 + always @( total_qt_rt_58[56] + or vfdsu_ex3_single +- or vfdsu_ex3_double) ++ or vfdsu_ex3_double ++ or vfdsu_ex3_half ++ or vfdsu_ex3_bfloat) + begin + case({total_qt_rt_58[56],vfdsu_ex3_double,vfdsu_ex3_single}) + 3'b001: +@@ -849,13 +951,23 @@ case({total_qt_rt_58[56],vfdsu_ex3_double,vfdsu_ex3_single}) + end + 3'b100: + begin +- frac_add1_op1[54:0] = {12'b1,43'b0}; +- frac_sub1_op1[54:0] = {{12{1'b1}},43'b0}; ++ if (vfdsu_ex3_half) begin ++ frac_add1_op1[54:0] = {12'b1,43'b0}; ++ frac_sub1_op1[54:0] = {{12{1'b1}},43'b0}; ++ end else begin ++ frac_add1_op1[54:0] = {9'b1,46'b0}; ++ frac_sub1_op1[54:0] = {{9{1'b1}},46'b0}; ++ end + end + 3'b000: + begin +- frac_add1_op1[54:0] = {13'b1,42'b0}; +- frac_sub1_op1[54:0] = {{13{1'b1}},42'b0}; ++ if (vfdsu_ex3_half) begin ++ frac_add1_op1[54:0] = {13'b1,42'b0}; ++ frac_sub1_op1[54:0] = {{13{1'b1}},42'b0}; ++ end else begin ++ frac_add1_op1[54:0] = {10'b1,45'b0}; ++ frac_sub1_op1[54:0] = {{10{1'b1}},45'b0}; ++ end + end + default: + begin +@@ -898,7 +1010,7 @@ assign ex3_nx = ex3_rst_nor && + assign ex3_denorm_nx = ex3_rslt_denorm && (!ex3_denorm_zero || !vfdsu_ex3_rem_zero); + //Adjust expnt + //Div:Actural expnt should plus 1 when op0 is id, sub 1 when op1 id +-assign ex3_expnt_adjst[12:0] = vfdsu_ex3_double ? 13'h3ff: vfdsu_ex3_single ? 13'h7f : 13'hf; ++assign ex3_expnt_adjst[12:0] = vfdsu_ex3_double ? 13'h3ff: vfdsu_ex3_single ? 13'h7f : vfdsu_ex3_half ? 13'hf : 13'h7f; + assign ex3_expnt_adjust_result[12:0] = vfdsu_ex3_expnt_rst[12:0] + + ex3_expnt_adjst[12:0]; + //this information is for the packing, which determin the result is normal +@@ -954,6 +1066,8 @@ begin + vfdsu_ex4_potnt_norm[1:0] <= 2'b0; + vfdsu_ex4_double <= 1'b0; + vfdsu_ex4_single <= 1'b0; ++ vfdsu_ex4_half <= 1'b0; ++ vfdsu_ex4_bfloat <= 1'b0; + + end + else if(ex3_pipedown) +@@ -982,6 +1096,8 @@ begin + vfdsu_ex4_potnt_norm[1:0] <= ex3_potnt_norm[1:0]; + vfdsu_ex4_double <= vfdsu_ex3_double; + vfdsu_ex4_single <= vfdsu_ex3_single; ++ vfdsu_ex4_half <= vfdsu_ex3_half; ++ vfdsu_ex4_bfloat <= vfdsu_ex3_bfloat; + end + else + begin +@@ -1009,6 +1125,8 @@ begin + vfdsu_ex4_potnt_norm[1:0] <= vfdsu_ex4_potnt_norm[1:0]; + vfdsu_ex4_double <= vfdsu_ex4_double; + vfdsu_ex4_single <= vfdsu_ex4_single; ++ vfdsu_ex4_half <= vfdsu_ex4_half; ++ vfdsu_ex4_bfloat <= vfdsu_ex4_bfloat; + end + end + +diff --git a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v +index c7a679c..4d91a2c 100644 +--- a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v ++++ b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_scalar_dp.v +@@ -30,6 +30,8 @@ module ct_vfdsu_scalar_dp( + ex1_double, + ex1_pipedown, + ex1_scalar, ++ ex1_half, ++ ex1_bfloat, + ex1_single, + ex1_sqrt, + ex1_src0, +@@ -50,7 +52,9 @@ module ct_vfdsu_scalar_dp( + pipex_dp_vfdsu_freg_data, + pipex_dp_vfdsu_vreg, + vfdsu_ex2_double, +- vfdsu_ex2_single ++ vfdsu_ex2_single, ++ vfdsu_ex2_half, ++ vfdsu_ex2_bfloat + ); + + // &Ports; @24 +@@ -79,6 +83,8 @@ output ex1_div; + output ex1_double; + output ex1_scalar; + output ex1_single; ++output ex1_half; ++output ex1_bfloat; + output ex1_sqrt; + output [63:0] ex1_src0; + output [63:0] ex1_src1; +@@ -89,11 +95,15 @@ output [63:0] pipex_dp_vfdsu_freg_data; + output [6 :0] pipex_dp_vfdsu_vreg; + output vfdsu_ex2_double; + output vfdsu_ex2_single; ++output vfdsu_ex2_half; ++output vfdsu_ex2_bfloat; + + // &Regs; @25 + reg ex1_div; + reg ex1_double; + reg ex1_single; ++reg ex1_half; ++reg ex1_bfloat; + reg ex1_sqrt; + reg vfdsu_ex2_div; + reg vfdsu_ex2_double; +@@ -101,6 +111,8 @@ reg [4 :0] vfdsu_ex2_dst_ereg; + reg [6 :0] vfdsu_ex2_dst_vreg; + reg [6 :0] vfdsu_ex2_iid; + reg vfdsu_ex2_single; ++reg vfdsu_ex2_half; ++reg vfdsu_ex2_bfloat; + reg vfdsu_ex2_sqrt; + reg [4 :0] vfdsu_ex3_dst_ereg; + reg [6 :0] vfdsu_ex3_dst_vreg; +@@ -175,6 +187,8 @@ begin + ex1_sqrt <= 1'b0; + ex1_double <= 1'b0; + ex1_single <= 1'b0; ++ ex1_half <= 1'b0; ++ ex1_bfloat <= 1'b0; + end + else if(idu_vfpu_rf_pipex_gateclk_sel) + begin +@@ -182,6 +196,8 @@ begin + ex1_sqrt <= idu_vfpu_rf_pipex_func[1]; + ex1_double <= idu_vfpu_rf_pipex_func[16]; + ex1_single <= idu_vfpu_rf_pipex_func[15]; ++ ex1_half <= idu_vfpu_rf_pipex_func[14]; ++ ex1_bfloat <= idu_vfpu_rf_pipex_func[13]; + end + end + assign ex1_scalar = 1'b1; +@@ -204,6 +220,8 @@ begin + vfdsu_ex2_iid[6:0] <= 7'b0; + vfdsu_ex2_double <= 1'b0; + vfdsu_ex2_single <= 1'b0; ++ vfdsu_ex2_half <= 1'b0; ++ vfdsu_ex2_bfloat <= 1'b0; + vfdsu_ex2_div <= 1'b0; + vfdsu_ex2_sqrt <= 1'b0; + end +@@ -214,6 +232,8 @@ begin + vfdsu_ex2_iid[6:0] <= dp_vfdsu_ex1_pipex_iid[6:0]; + vfdsu_ex2_double <= ex1_double; + vfdsu_ex2_single <= ex1_single; ++ vfdsu_ex2_half <= ex1_half; ++ vfdsu_ex2_bfloat <= ex1_bfloat; + vfdsu_ex2_div <= ex1_div; + vfdsu_ex2_sqrt <= ex1_sqrt; + end +@@ -224,6 +244,8 @@ begin + vfdsu_ex2_iid[6:0] <= vfdsu_ex2_iid[6:0]; + vfdsu_ex2_double <= vfdsu_ex2_double; + vfdsu_ex2_single <= vfdsu_ex2_single; ++ vfdsu_ex2_half <= ex1_half; ++ vfdsu_ex2_bfloat <= ex1_bfloat; + vfdsu_ex2_div <= vfdsu_ex2_div; + vfdsu_ex2_sqrt <= vfdsu_ex2_sqrt; + end +diff --git a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v +index cdeb3a3..8e8d66b 100644 +--- a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v ++++ b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v +@@ -49,12 +49,15 @@ module ct_vfdsu_srt( + vfdsu_ex2_result_zero, + vfdsu_ex2_rm, + vfdsu_ex2_single, ++ vfdsu_ex2_half, ++ vfdsu_ex2_bfloat, + vfdsu_ex2_sqrt, + vfdsu_ex2_srt_skip, + vfdsu_ex3_doub_expnt_rst, + vfdsu_ex3_double, + vfdsu_ex3_dz, + vfdsu_ex3_half_expnt_rst, ++ vfdsu_ex3_bfloat_expnt_rst, + vfdsu_ex3_id_srt_skip, + vfdsu_ex3_nv, + vfdsu_ex3_of, +@@ -74,6 +77,8 @@ module ct_vfdsu_srt( + vfdsu_ex3_rslt_denorm, + vfdsu_ex3_sing_expnt_rst, + vfdsu_ex3_single, ++ vfdsu_ex3_half, ++ vfdsu_ex3_bfloat, + vfdsu_ex3_uf + ); + +@@ -109,6 +114,8 @@ input vfdsu_ex2_result_sign; + input vfdsu_ex2_result_zero; + input [2 :0] vfdsu_ex2_rm; + input vfdsu_ex2_single; ++input vfdsu_ex2_half; ++input vfdsu_ex2_bfloat; + input vfdsu_ex2_sqrt; + input vfdsu_ex2_srt_skip; + output srt_ctrl_rem_zero; +@@ -118,6 +125,7 @@ output [12:0] vfdsu_ex3_doub_expnt_rst; + output vfdsu_ex3_double; + output vfdsu_ex3_dz; + output [12:0] vfdsu_ex3_half_expnt_rst; ++output [12:0] vfdsu_ex3_bfloat_expnt_rst; + output vfdsu_ex3_id_srt_skip; + output vfdsu_ex3_nv; + output vfdsu_ex3_of; +@@ -137,16 +145,20 @@ output [2 :0] vfdsu_ex3_rm; + output vfdsu_ex3_rslt_denorm; + output [8 :0] vfdsu_ex3_sing_expnt_rst; + output vfdsu_ex3_single; ++output vfdsu_ex3_half; ++output vfdsu_ex3_bfloat; + output vfdsu_ex3_uf; + + // &Regs; @24 + reg [52:0] ex2_result_double_denorm_round_add_num; + reg [52:0] ex2_result_half_denorm_round_add_num; + reg [52:0] ex2_result_single_denorm_round_add_num; ++reg [52:0] ex2_result_bfloat_denorm_round_add_num; + reg [12:0] vfdsu_ex3_doub_expnt_rst; + reg vfdsu_ex3_double; + reg vfdsu_ex3_dz; + reg [12:0] vfdsu_ex3_half_expnt_rst; ++reg [12:0] vfdsu_ex3_bfloat_expnt_rst; + reg vfdsu_ex3_id_srt_skip; + reg vfdsu_ex3_nv; + reg vfdsu_ex3_of; +@@ -165,6 +177,8 @@ reg [2 :0] vfdsu_ex3_rm; + reg vfdsu_ex3_rslt_denorm; + reg [8 :0] vfdsu_ex3_sing_expnt_rst; + reg vfdsu_ex3_single; ++reg vfdsu_ex3_half; ++reg vfdsu_ex3_bfloat; + reg vfdsu_ex3_uf; + + // &Wires; @25 +@@ -191,6 +205,11 @@ wire ex2_half_expnt_uf; + wire ex2_half_id_nor_srt_skip; + wire ex2_half_potnt_of; + wire ex2_half_potnt_uf; ++wire ex2_bfloat_expnt_of; ++wire ex2_bfloat_expnt_uf; ++wire ex2_bfloat_id_nor_srt_skip; ++wire ex2_bfloat_potnt_of; ++wire ex2_bfloat_potnt_uf; + wire ex2_id_nor_srt_skip; + wire ex2_of; + wire ex2_of_plus; +@@ -253,6 +272,8 @@ wire vfdsu_ex2_result_sign; + wire vfdsu_ex2_result_zero; + wire [2 :0] vfdsu_ex2_rm; + wire vfdsu_ex2_single; ++wire vfdsu_ex2_half; ++wire vfdsu_ex2_bfloat; + wire vfdsu_ex2_sqrt; + wire vfdsu_ex2_srt_skip; + wire vfdsu_ex3_rem_zero; +@@ -281,25 +302,33 @@ assign ex2_sing_expnt_of = ~vfdsu_ex2_expnt_rst[9] && (vfdsu_ex2_expnt_rst[8] + assign ex2_half_expnt_of = ~vfdsu_ex2_expnt_rst[6] && (vfdsu_ex2_expnt_rst[5] + || (vfdsu_ex2_expnt_rst[4] && + |vfdsu_ex2_expnt_rst[3:0])); ++assign ex2_bfloat_expnt_of = ~vfdsu_ex2_expnt_rst[9] && (vfdsu_ex2_expnt_rst[8] ++ || (vfdsu_ex2_expnt_rst[7] && ++ |vfdsu_ex2_expnt_rst[6:0])); + assign ex2_expnt_of = vfdsu_ex2_double ? ex2_doub_expnt_of : +- vfdsu_ex2_single ? ex2_sing_expnt_of +- : ex2_half_expnt_of; ++ vfdsu_ex2_single ? ex2_sing_expnt_of : ++ vfdsu_ex2_half ? ex2_half_expnt_of : ex2_bfloat_expnt_of; + assign ex2_potnt_of_pre = vfdsu_ex2_double ? ex2_doub_potnt_of : +- vfdsu_ex2_single ? ex2_sing_potnt_of : ex2_half_potnt_of; +-assign ex2_potnt_uf_pre = vfdsu_ex2_double ? ex2_doub_potnt_uf : +- vfdsu_ex2_single ? ex2_sing_potnt_uf : ex2_half_potnt_uf; ++ vfdsu_ex2_single ? ex2_sing_potnt_of : ++ vfdsu_ex2_half ? ex2_half_potnt_of : ex2_bfloat_potnt_of; ++assign ex2_potnt_uf_pre = vfdsu_ex2_double ? ex2_doub_potnt_uf : ++ vfdsu_ex2_single ? ex2_sing_potnt_uf : ++ vfdsu_ex2_half ? ex2_half_potnt_uf : ex2_bfloat_potnt_uf; + assign ex2_expnt_uf = vfdsu_ex2_double ? ex2_doub_expnt_uf : +- vfdsu_ex2_single ? ex2_sing_expnt_uf : ex2_half_expnt_uf; ++ vfdsu_ex2_single ? ex2_sing_expnt_uf : ++ vfdsu_ex2_half ? ex2_half_expnt_uf : ex2_bfloat_expnt_uf; + assign ex2_id_nor_srt_skip = vfdsu_ex2_double ? ex2_double_id_nor_srt_skip : +- vfdsu_ex2_single ? ex2_single_id_nor_srt_skip +- : ex2_half_id_nor_srt_skip; ++ vfdsu_ex2_single ? ex2_single_id_nor_srt_skip : ++ vfdsu_ex2_half ? ex2_half_id_nor_srt_skip : ex2_bfloat_id_nor_srt_skip; + assign ex2_result_denorm_round_add_num[52:0] = vfdsu_ex2_double ? + ex2_result_double_denorm_round_add_num[52:0] : + vfdsu_ex2_single ? + ex2_result_single_denorm_round_add_num[52:0] : +- ex2_result_half_denorm_round_add_num[52:0]; +- +- ++ vfdsu_ex2_half ? ++ ex2_result_half_denorm_round_add_num[52:0] : ++ ex2_result_bfloat_denorm_round_add_num[52:0]; ++ ++ + //potential overflow when E1-E2 = 128/1024 + assign ex2_doub_potnt_of = ~vfdsu_ex2_expnt_rst[12] && + ~vfdsu_ex2_expnt_rst[11] && +@@ -313,6 +342,10 @@ assign ex2_half_potnt_of = ~vfdsu_ex2_expnt_rst[6] && + ~vfdsu_ex2_expnt_rst[5] && + vfdsu_ex2_expnt_rst[4] && + ~|vfdsu_ex2_expnt_rst[3:0]; ++assign ex2_bfloat_potnt_of = ~vfdsu_ex2_expnt_rst[9] && ++ ~vfdsu_ex2_expnt_rst[8] && ++ vfdsu_ex2_expnt_rst[7] && ++ ~|vfdsu_ex2_expnt_rst[6:0]; + assign ex2_potnt_of = ex2_potnt_of_pre && + vfdsu_ex2_op0_norm && + vfdsu_ex2_op1_norm && +@@ -321,6 +354,7 @@ assign ex2_potnt_of = ex2_potnt_of_pre && + //When input is normal, underflow when E1-E2 <= -127/-1023/-15 + assign ex2_doub_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hc01); + assign ex2_sing_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hf81); ++assign ex2_bfloat_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hf81); + assign ex2_half_expnt_uf = vfdsu_ex2_expnt_rst[12] && (vfdsu_ex2_expnt_rst[11:0] <= 12'hff1); + assign ex2_half_potnt_uf = &vfdsu_ex2_expnt_rst[6:4] && + ~|vfdsu_ex2_expnt_rst[3:2] && +@@ -337,6 +371,10 @@ assign ex2_sing_potnt_uf = &vfdsu_ex2_expnt_rst[9:7] && + ~|vfdsu_ex2_expnt_rst[6:2] && + vfdsu_ex2_expnt_rst[1] && + !vfdsu_ex2_expnt_rst[0]; ++assign ex2_bfloat_potnt_uf = &vfdsu_ex2_expnt_rst[9:7] && ++ ~|vfdsu_ex2_expnt_rst[6:2] && ++ vfdsu_ex2_expnt_rst[1] && ++ !vfdsu_ex2_expnt_rst[0]; + + assign ex2_potnt_uf = (ex2_potnt_uf_pre && + vfdsu_ex2_op0_norm && +@@ -371,6 +409,8 @@ assign ex2_single_id_nor_srt_skip = vfdsu_ex2_expnt_rst[12] + && (vfdsu_ex2_expnt_rst[11:0]<12'hf6a); + assign ex2_half_id_nor_srt_skip = vfdsu_ex2_expnt_rst[12] + && (vfdsu_ex2_expnt_rst[11:0]<12'hfe7); ++assign ex2_bfloat_id_nor_srt_skip = vfdsu_ex2_expnt_rst[12] ++ && (vfdsu_ex2_expnt_rst[11:0]<12'hf6a); + assign ex2_rslt_denorm = ex2_uf; + + //=======================EX2 skip srt iteration====================== +@@ -490,6 +530,23 @@ endcase + // &CombEnd; @248 + end + ++// &CombBeg; @204 ++always @( vfdsu_ex2_expnt_rst[12:0]) ++begin ++case(vfdsu_ex2_expnt_rst[12:0]) ++ 13'h1f82:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h200000000000; //-126 1 ++ 13'h1f81:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h400000000000; //-127 0 ++ 13'h1f80:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h800000000000; //-128 -1 ++ 13'h1f7f:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h1000000000000; //-129 -2 ++ 13'h1f7e:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h2000000000000; //-130 -3 ++ 13'h1f7d:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h4000000000000; //-131 -4 ++ 13'h1f7c:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h8000000000000; //-132 -5 ++ 13'h1f7b:ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h10000000000000; //-133 -6 ++ default: ex2_result_bfloat_denorm_round_add_num[52:0] = 53'h0; // -23 ++endcase ++// &CombEnd; @232 ++end ++ + //===================special result======================== + assign ex2_result_zero = vfdsu_ex2_result_zero; + assign ex2_result_qnan = vfdsu_ex2_result_qnan; +@@ -541,6 +598,7 @@ begin + vfdsu_ex3_doub_expnt_rst[12:0] <= 13'b0; + vfdsu_ex3_sing_expnt_rst[8:0] <= 9'b0; + vfdsu_ex3_half_expnt_rst[12:0] <= 13'b0; ++ vfdsu_ex3_bfloat_expnt_rst[12:0] <= 13'b0; + vfdsu_ex3_result_sign <= 1'b0; + vfdsu_ex3_qnan_sign <= 1'b0; + vfdsu_ex3_qnan_f[51:0] <= 52'b0; +@@ -551,6 +609,8 @@ begin + vfdsu_ex3_id_srt_skip <= 1'b0; + vfdsu_ex3_double <= 1'b0; + vfdsu_ex3_single <= 1'b0; ++ vfdsu_ex3_half <= 1'b0; ++ vfdsu_ex3_bfloat <= 1'b0; + end + else if(ex2_pipedown) + begin +@@ -569,6 +629,7 @@ begin + vfdsu_ex3_doub_expnt_rst[12:0] <= vfdsu_ex2_expnt_rst[12:0]; + vfdsu_ex3_sing_expnt_rst[8:0] <= vfdsu_ex2_expnt_rst[8:0]; + vfdsu_ex3_half_expnt_rst[12:0] <= vfdsu_ex2_expnt_rst[12:0]; ++ vfdsu_ex3_bfloat_expnt_rst[12:0] <= vfdsu_ex2_expnt_rst[12:0]; + vfdsu_ex3_result_sign <= vfdsu_ex2_result_sign; + vfdsu_ex3_qnan_sign <= vfdsu_ex2_qnan_sign; + vfdsu_ex3_qnan_f[51:0] <= vfdsu_ex2_qnan_f[51:0]; +@@ -579,6 +640,8 @@ begin + vfdsu_ex3_id_srt_skip <= ex2_id_nor_srt_skip; + vfdsu_ex3_double <= vfdsu_ex2_double; + vfdsu_ex3_single <= vfdsu_ex2_single; ++ vfdsu_ex3_half <= vfdsu_ex2_half; ++ vfdsu_ex3_bfloat <= vfdsu_ex2_bfloat; + end + else + begin +@@ -597,6 +660,7 @@ begin + vfdsu_ex3_doub_expnt_rst[12:0] <= vfdsu_ex3_doub_expnt_rst[12:0]; + vfdsu_ex3_sing_expnt_rst[8:0] <= vfdsu_ex3_sing_expnt_rst[8:0]; + vfdsu_ex3_half_expnt_rst[12:0] <= vfdsu_ex3_half_expnt_rst[12:0]; ++ vfdsu_ex3_bfloat_expnt_rst[12:0] <= vfdsu_ex3_bfloat_expnt_rst[12:0]; + vfdsu_ex3_result_sign <= vfdsu_ex3_result_sign; + vfdsu_ex3_qnan_sign <= vfdsu_ex3_qnan_sign; + vfdsu_ex3_qnan_f[51:0] <= vfdsu_ex3_qnan_f[51:0]; +@@ -607,6 +671,8 @@ begin + vfdsu_ex3_id_srt_skip <= vfdsu_ex3_id_srt_skip; + vfdsu_ex3_double <= vfdsu_ex3_double; + vfdsu_ex3_single <= vfdsu_ex3_single; ++ vfdsu_ex3_half <= vfdsu_ex3_half; ++ vfdsu_ex3_bfloat <= vfdsu_ex3_bfloat; + end + end + assign vfdsu_ex3_rem_zero = ~|srt_remainder[60:0]; +diff --git a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v +index f884625..28ca259 100644 +--- a/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v ++++ b/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v +@@ -99,6 +99,8 @@ wire ex1_double; + wire ex1_pipedown; + wire ex1_scalar; + wire ex1_single; ++wire ex1_half; ++wire ex1_bfloat; + wire ex1_sqrt; + wire [63:0] ex1_src0; + wire [63:0] ex1_src1; +@@ -128,6 +130,8 @@ wire vfdsu_dp_fdiv_busy; + wire vfdsu_dp_inst_wb_req; + wire vfdsu_ex2_double; + wire vfdsu_ex2_single; ++wire vfdsu_ex2_half; ++wire vfdsu_ex2_bfloat; + wire vfdsu_ifu_debug_ex2_wait; + wire vfdsu_ifu_debug_idle; + wire vfdsu_ifu_debug_pipe_busy; +@@ -234,6 +238,8 @@ ct_vfdsu_ctrl x_ct_vfdsu_ctrl ( + .ex1_double (ex1_double ), + .ex1_pipedown (ex1_pipedown ), + .ex1_single (ex1_single ), ++ .ex1_half (ex1_half ), ++ .ex1_bfloat (ex1_bfloat ), + .ex2_data_clk (ex2_data_clk ), + .ex2_pipedown (ex2_pipedown ), + .ex2_srt_first_round (ex2_srt_first_round ), +@@ -251,6 +257,8 @@ ct_vfdsu_ctrl x_ct_vfdsu_ctrl ( + .vfdsu_dp_inst_wb_req (vfdsu_dp_inst_wb_req ), + .vfdsu_ex2_double (vfdsu_ex2_double ), + .vfdsu_ex2_single (vfdsu_ex2_single ), ++ .vfdsu_ex2_half (vfdsu_ex2_half ), ++ .vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ), + .vfdsu_ifu_debug_ex2_wait (vfdsu_ifu_debug_ex2_wait ), + .vfdsu_ifu_debug_idle (vfdsu_ifu_debug_idle ), + .vfdsu_ifu_debug_pipe_busy (vfdsu_ifu_debug_pipe_busy ) +@@ -266,6 +274,8 @@ ct_vfdsu_double x_ct_vfdsu_double ( + .ex1_pipedown (ex1_pipedown ), + .ex1_scalar (ex1_scalar ), + .ex1_single (ex1_single ), ++ .ex1_half (ex1_half ), ++ .ex1_bfloat (ex1_bfloat ), + .ex1_sqrt (ex1_sqrt ), + .ex1_src0 (ex1_src0 ), + .ex1_src1 (ex1_src1 ), +@@ -302,6 +312,8 @@ ct_vfdsu_scalar_dp x_ct_vfdsu_scalar_dp ( + .ex1_pipedown (ex1_pipedown ), + .ex1_scalar (ex1_scalar ), + .ex1_single (ex1_single ), ++ .ex1_half (ex1_half ), ++ .ex1_bfloat (ex1_bfloat ), + .ex1_sqrt (ex1_sqrt ), + .ex1_src0 (ex1_src0 ), + .ex1_src1 (ex1_src1 ), +@@ -321,7 +333,9 @@ ct_vfdsu_scalar_dp x_ct_vfdsu_scalar_dp ( + .pipex_dp_vfdsu_freg_data (pipex_dp_vfdsu_freg_data ), + .pipex_dp_vfdsu_vreg (pipex_dp_vfdsu_vreg ), + .vfdsu_ex2_double (vfdsu_ex2_double ), +- .vfdsu_ex2_single (vfdsu_ex2_single ) ++ .vfdsu_ex2_single (vfdsu_ex2_single ), ++ .vfdsu_ex2_half (vfdsu_ex2_half ), ++ .vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ) + ); + + +-- +2.16.5 + From 0a9568e7a37f105283ff906a371de3b5dc479b1f Mon Sep 17 00:00:00 2001 From: Luca Bertaccini <55843305+lucabertaccini@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:52:27 +0200 Subject: [PATCH 5/8] Fix illegal Verilog assignment (#15) --- docs/CHANGELOG-PULP.md | 5 +++++ .../C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v | 2 +- .../0001-Add-FP16ALT-support-to-THMULTI-DivSqrt-unit.patch | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/CHANGELOG-PULP.md b/docs/CHANGELOG-PULP.md index 94d245be..17ef11f5 100644 --- a/docs/CHANGELOG-PULP.md +++ b/docs/CHANGELOG-PULP.md @@ -7,6 +7,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a In this sense, we interpret the "Public API" of a hardware module as its port/parameter list. Versions of the IP in the same major relase are "pin-compatible" with each other. Minor relases are permitted to add new parameters as long as their default bindings ensure backwards compatibility. +## [pulp-v0.2.3] - 2024-09-27 + +### Fix +- Fix illegal Verilog `'0` + ## [pulp-v0.2.2] - 2024-06-24 ### Added diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v index cb3dc8e3..69462eb9 100644 --- a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v +++ b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_round.v @@ -763,7 +763,7 @@ case(vfdsu_ex3_bfloat_expnt_rst[8:0]) 9'h17a:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[56:46]}; //-93 -6 bfloat_denorm_lst_frac = 1'b0; end//-1022 1 - default: begin qt_result_bfloat_denorm_for_round[10:0] = '0; + default: begin qt_result_bfloat_denorm_for_round[10:0] = 11'b0; bfloat_denorm_lst_frac = 1'b0; end//-1022 1 endcase diff --git a/vendor/patches/openc910/0001-Add-FP16ALT-support-to-THMULTI-DivSqrt-unit.patch b/vendor/patches/openc910/0001-Add-FP16ALT-support-to-THMULTI-DivSqrt-unit.patch index 7d1ce903..fab95f9d 100644 --- a/vendor/patches/openc910/0001-Add-FP16ALT-support-to-THMULTI-DivSqrt-unit.patch +++ b/vendor/patches/openc910/0001-Add-FP16ALT-support-to-THMULTI-DivSqrt-unit.patch @@ -817,7 +817,7 @@ index 6eece52..a419289 100644 + 9'h17a:begin qt_result_bfloat_denorm_for_round[10:0] = {total_qt_rt_58[56:46]}; //-93 -6 + bfloat_denorm_lst_frac = 1'b0; + end//-1022 1 -+ default: begin qt_result_bfloat_denorm_for_round[10:0] = '0; ++ default: begin qt_result_bfloat_denorm_for_round[10:0] = 11'b0; + bfloat_denorm_lst_frac = 1'b0; + end//-1022 1 +endcase From 5098afdffd8a48319eedca6fb2fdb7d53fab6172 Mon Sep 17 00:00:00 2001 From: Luca Bertaccini <55843305+lucabertaccini@users.noreply.github.com> Date: Tue, 5 Aug 2025 18:09:03 +0200 Subject: [PATCH 6/8] Update maintainer in README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b13f00d1..e261e7b4 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,9 @@ Parametric floating-point unit with support for standard RISC-V formats and operations as well as transprecision formats, written in SystemVerilog. -Maintainer: Luca Bertaccini
-Principal Author: Stefan Mach +Current Maintainer: Gamze İslamoğlu
+Past Maintainer: Luca Bertaccini
+Main Author: Stefan Mach ## Features From 29d8a981295a1fb7b6b6c1544843b618d09717ad Mon Sep 17 00:00:00 2001 From: Luca Bertaccini <55843305+lucabertaccini@users.noreply.github.com> Date: Tue, 5 Aug 2025 18:09:43 +0200 Subject: [PATCH 7/8] Update CODEOWNERS --- docs/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/CODEOWNERS b/docs/CODEOWNERS index 6b8f7762..21c23e3d 100644 --- a/docs/CODEOWNERS +++ b/docs/CODEOWNERS @@ -1,2 +1,2 @@ # Global owners -* @lucabertaccini +* @gamzeisl From 8edb8754ef06ae80c59c452b883fdae457e9827e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gamze=20=C4=B0slamo=C4=9Flu?= <54476562+gamzeisl@users.noreply.github.com> Date: Wed, 25 Feb 2026 13:12:48 +0100 Subject: [PATCH 8/8] Add MXDOTP operation group with FP4/FP6/FP8 source format support (#17) * Add FP4, FP6, FP6ALT formats and MXDOTP operation support to fpnew_pkg Extended fpnew_pkg.sv with new floating-point formats and MXDOTP operation group for MX dot product operations: - New formats: FP6(E3M2), FP6ALT(E2M3), FP4(E2M1) - Increased NUM_FP_FORMATS from 6 to 9 - Added MXDOTP operation group (6th group) - New operations: MXDOTPF (FP), MXDOTPI (INT) - Updated all format masks from 6-bit to 9-bit - Added bias_constant() helper function for MXDOTP - Updated FPU configurations (DEFAULT_NOREGS, DEFAULT_SNITCH) * Add MXDOTP multi-format package definitions Introduces fpnew_mxdotp_multi_pkg.sv with parameterized configuration for MXDOTP operations supporting mixed-precision arithmetic with low precision formats. Configuration: - Source formats: FP4, FP6, FP6ALT, FP8, FP8ALT, INT8 - Destination formats: FP32, FP16ALT * Add MXDOTP multi-format core implementation Add core MXDOTP implementation supporting very low-precision floating-point formats (FP4, FP6, FP8) and INT8. New files: - fpnew_mxdotp_multi_modules.sv: 14 modules implementing the MXDOTP datapath (classification, multiplication, shifting, accumulation, normalization, rounding) - fpnew_mxdotp_multi.sv: Top-level MXDOTP unit integrating all modules * Add MXDOTP wrapper New file: - fpnew_mxdotp_multi_wrapper.sv: Wrapper handling operand unpacking, FP6 extended operand processing (3-step with unroll factor), NaN-boxing, and scale extraction Changes to core module: - Add NumPipeRegs and PipeConfig as module parameters - Compute NUM_INP_REGS, NUM_MID_REGS, NUM_OUT_REGS from parameters * Extend classifier for MX floating-point formats Add MX parameter and format-specific classification logic to support low-precision formats used in MXDOTP operations. Changes: - Add MX parameter (default 1) to enable MX-specific classification - FP8ALT (E4M3): No infinity, NaN when exp=all1s and man=all1s - FP6/FP6ALT/FP4 (E3M2/E2M3/E2M1): No infinity or NaN - Other formats: Standard IEEE-754 classification * Add configurable format parameters to MXDOTP wrapper and pkg * Integrate MXDOTP into opgroup multifmt slice - Add elaboration-time checks: fatal for Width!=64, missing FP32, missing FP8/INT8; warnings for inactive FP6/FP6ALT/FP4 - Add NUM_MX_LANES localparam and lane generation for MXDOTP - Instantiate fpnew_mxdotp_multi_wrapper with FpFmtConfig and IntFmtConfig * Update SDOTP wrapper format masks for extended format support - Widen FpSrcFmtConfig bitmasks from 6b to 9b to match the extended NUM_FP_FORMATS (FP6, FP6ALT, FP4 added but masked off for SDOTP) * Add MXDOTP sources to Bender and src_files * Update documentation for MXDOTP * Parameterize MXDOTP format configuration and rename package constants * Make INT8 optional and unify FP8/INT8 product width Relax format validation in fpnew_opgroup_multifmt_slice to require only FP8 and FP8ALT as mandatory base formats, allowing INT8 to be disabled. * Use bias constant function instead of fixed constant * Fix default mxdotp operation * Fix classifier consistency for fp4 * Remove the warning message for MXDOTP about enabled formats --- Bender.yml | 4 + docs/CHANGELOG-PULP.md | 18 + docs/README.md | 37 +- src/fpnew_classifier.sv | 28 +- src/fpnew_mxdotp_multi.sv | 804 ++++++++++++++++++ src/fpnew_mxdotp_multi_wrapper.sv | 245 ++++++ src/fpnew_opgroup_block.sv | 22 +- src/fpnew_opgroup_multifmt_slice.sv | 84 +- src/fpnew_pkg.sv | 131 ++- src/fpnew_sdotp_multi_wrapper.sv | 2 +- src/fpnew_top.sv | 2 + src/mxdotp/fpnew_mxdotp_multi_modules.sv | 987 +++++++++++++++++++++++ src/mxdotp/fpnew_mxdotp_multi_pkg.sv | 148 ++++ src_files.yml | 4 + 14 files changed, 2447 insertions(+), 69 deletions(-) create mode 100644 src/fpnew_mxdotp_multi.sv create mode 100644 src/fpnew_mxdotp_multi_wrapper.sv create mode 100644 src/mxdotp/fpnew_mxdotp_multi_modules.sv create mode 100644 src/mxdotp/fpnew_mxdotp_multi_pkg.sv diff --git a/Bender.yml b/Bender.yml index b635aa07..6b47a7f1 100644 --- a/Bender.yml +++ b/Bender.yml @@ -45,6 +45,10 @@ sources: - src/fpnew_sdotp_multi.sv - src/fpnew_sdotp_multi_wrapper.sv - src/fpnew_noncomp.sv + - src/mxdotp/fpnew_mxdotp_multi_pkg.sv + - src/mxdotp/fpnew_mxdotp_multi_modules.sv + - src/fpnew_mxdotp_multi.sv + - src/fpnew_mxdotp_multi_wrapper.sv - src/fpnew_opgroup_block.sv - src/fpnew_opgroup_fmt_slice.sv - src/fpnew_opgroup_multifmt_slice.sv diff --git a/docs/CHANGELOG-PULP.md b/docs/CHANGELOG-PULP.md index 17ef11f5..2c7c544a 100644 --- a/docs/CHANGELOG-PULP.md +++ b/docs/CHANGELOG-PULP.md @@ -7,6 +7,24 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a In this sense, we interpret the "Public API" of a hardware module as its port/parameter list. Versions of the IP in the same major relase are "pin-compatible" with each other. Minor relases are permitted to add new parameters as long as their default bindings ensure backwards compatibility. +## [Unreleased] + +### Added +- Add FP6(E3M2), FP6ALT(E2M3), and FP4(E2M1) floating-point formats +- Add MXDOTP Microscaling dot product multi-format operation group + - Supports source formats: FP8, FP8ALT, FP6, FP6ALT, FP4, INT8 + - Supports destination formats: FP32, FP16ALT + - Scaled dot-product and accumulation support with two 8-bit exponent scale factors + +### Changed +- Extend classifier to support MX-specific special cases for FP6, FP6ALT, FP4 formats +- Increase number of supported FP formats from 6 to 9 +- Increase number of opgroups from 5 to 6 + +### Notes +- MXDOTP implementation tested with all element formats enabled, but not yet exhaustively tested with all possible combinations of enabled formats. +- Known limitations documented in TODO comments (see source files for details) + ## [pulp-v0.2.3] - 2024-09-27 ### Fix diff --git a/docs/README.md b/docs/README.md index f00fb3b5..9def7b1f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -109,8 +109,10 @@ Unless noted otherwise, the first operand `op[0]` is used for the operation. | `ADD` | `0` | Addition (`op[1] + op[2]`) *note the operand indices* | | `ADD` | `1` | Subtraction (`op[1] - op[2]`) *note the operand indices* | | `MUL` | `0` | Multiplication (`op[0] * op[1]`) | -| `SDOTP` | `0` | Sum of dot product ) | -| `VSUM` | `0` | Vector Inner Sum ) | +| `SDOTP` | `0` | Sum of dot product | +| `VSUM` | `0` | Vector Inner Sum | +| `MXDOTPF` | `0` | Microscaling FP scaled dot product and accumulate | +| `MXDOTPI` | `0` | Microscaling INT scaled dot product and accumulate | | `DIV` | `0` | Division (`op[0] / op[1]`) | | `SQRT` | `0` | Square root | | `SGNJ` | `0` | Sign injection, operation encoded in rounding mode
`RNE`: `op[0]` with `sign(op[1])`
`RTZ`: `op[0]` with `~sign(op[1])`
`RDN`: `op[0]` with `sign(op[0]) ^ sign(op[1])`
`RUP`: `op[0]` (passthrough) | @@ -130,7 +132,7 @@ Unless noted otherwise, the first operand `op[0]` is used for the operation. ##### `fp_format_e` - FP Formats -Enumeration of type `logic [2:0]` holding the supported FP formats. +Enumeration of type `logic [3:0]` holding the supported FP formats. | Enumerator | Format | Width | Exp. Bits | Man. Bits | | ---------- | ------------- | -----: | :-------: | :-------: | @@ -140,10 +142,13 @@ Enumeration of type `logic [2:0]` holding the supported FP formats. | `FP8` | binary8 | 8 bit | 5 | 2 | | `FP16ALT` | binary16alt | 16 bit | 8 | 7 | | `FP8ALT` | binary8alt | 8 bit | 4 | 3 | +| `FP6` | binary6 | 6 bit | 3 | 2 | +| `FP6ALT` | binary6alt | 6 bit | 2 | 3 | +| `FP4` | binary4 | 4 bit | 2 | 1 | The following global parameters associated with FP formats are set in `fpnew_pkg`: ```SystemVerilog -localparam int unsigned NUM_FP_FORMATS = 6; +localparam int unsigned NUM_FP_FORMATS = 9; localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS); ``` @@ -286,7 +291,7 @@ Otherwise, synthesis tools can optimize away any logic associated with this form #### `Implementation` - Implementation Options -The FPU is divided into five operation groups, `ADDMUL`, `DIVSQRT`, `NONDOMP`, `CONV`, and `DOTP` (see [Architecture: Top-Level](#top-level)). +The FPU is divided into six operation groups: `ADDMUL`, `DIVSQRT`, `NONCOMP`, `CONV`, `DOTP`, and `MXDOTP` (see [Architecture: Top-Level](#top-level)). The `Implementation` parameter controls the implementation of these operation groups. It is of type `fpu_implementation_t` which is defined as: ```SystemVerilog @@ -328,18 +333,19 @@ The unit type `unit_type_t` is an enumeration of type `logic [1:0]` holding the The `UnitTypes` parameter allows to control resources used for the FPU by either removing operation units for certain formats and operations, or merging multiple formats into one. Currently, the follwoing unit types are available for the FPU operation groups: -| | `ADDMUL` | `DIVSQRT` | `NONCOMP` | `CONV` | `DOTP` | -|------------|--------------------|--------------------|--------------------|--------------------|--------------------| -| `PARALLEL` | :heavy_check_mark: | | :heavy_check_mark: | | | -| `MERGED` | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | :heavy_check_mark: | +| | `ADDMUL` | `DIVSQRT` | `NONCOMP` | `CONV` | `DOTP` | `MXDOTP` | +|------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------| +| `PARALLEL` | :heavy_check_mark: | | :heavy_check_mark: | | | | +| `MERGED` | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | *Default*: ```SystemVerilog -'{'{default: PARALLEL}, // ADDMUL - '{default: MERGED}, // DIVSQRT - '{default: PARALLEL}, // NONCOMP - '{default: MERGED}, // CONV` - '{default: DISABLED}} // DOTP` +'{'{default: PARALLEL}, // ADDMUL + '{default: MERGED}, // DIVSQRT + '{default: PARALLEL}, // NONCOMP + '{default: MERGED}, // CONV + '{default: DISABLED}, // DOTP + '{default: DISABLED}} // MXDOTP ``` (all formats within operation group use same type) @@ -437,7 +443,7 @@ The *operation group* is the highest level of grouping within FPnew and signifie ![FPnew](fig/top_block.png) -There are currently five operation groups in FPnew which are enumerated in `opgroup_e` as outlined in the following table: +There are currently six operation groups in FPnew which are enumerated in `opgroup_e` as outlined in the following table: | Enumerator | Description | Associated Operations | |------------|-----------------------------------------------|---------------------------------------| @@ -446,6 +452,7 @@ There are currently five operation groups in FPnew which are enumerated in `opgr | `NONCOMP` | Non-Computational Operations like Comparisons | `SGNJ`, `MINMAX`, `CMP`, `CLASS` | | `CONV` | Conversions | `F2I`, `I2F`, `F2F`, `CPKAB`, `CPKCD` | | `DOTP` | Dot Products | `SDOTP`, `EXVSUM`, `VSUM` | +| `MXDOTP` | Microscaling Dot Products | `MXDOTPF`, `MXDOTPI` | Most architectural decisions for FPnew are made at very fine granularity. The big exception to this is the generation of vectorial hardware which is decided at top level through the `EnableVectors` parameter. diff --git a/src/fpnew_classifier.sv b/src/fpnew_classifier.sv index a322946d..632416db 100644 --- a/src/fpnew_classifier.sv +++ b/src/fpnew_classifier.sv @@ -16,6 +16,7 @@ module fpnew_classifier #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumOperands = 1, + parameter int unsigned MX = 0, // Do not change localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) ) ( @@ -51,13 +52,30 @@ module fpnew_classifier #( // Classify Input // --------------- always_comb begin : classify_input - value = operands_i[op]; - is_boxed = is_boxed_i[op]; - is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1); + value = operands_i[op]; + is_boxed = is_boxed_i[op]; + + if (MX == 1 && FpFormat == fpnew_pkg::fp_format_e'(fpnew_pkg::FP8ALT)) begin + // E4M3: No infinity, NaN when exp=all1s and man=all1s + is_inf = 1'b0; + is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa == '1)); + is_normal = is_boxed && (value.exponent != '0) && !is_nan; + end else if (MX == 1 && (FpFormat == fpnew_pkg::fp_format_e'(fpnew_pkg::FP6) || + FpFormat == fpnew_pkg::fp_format_e'(fpnew_pkg::FP6ALT) || + FpFormat == fpnew_pkg::fp_format_e'(fpnew_pkg::FP4))) begin + // E3M2, E2M3, E2M1: No infinity or NaN + is_inf = 1'b0; + is_nan = 1'b0; + is_normal = is_boxed && (value.exponent != '0); + end else begin + // Standard IEEE-754 classification (for all other formats and MX=0) + is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0)); + is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0)); + is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1); + end + is_zero = is_boxed && (value.exponent == '0) && (value.mantissa == '0); is_subnormal = is_boxed && (value.exponent == '0) && !is_zero; - is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0)); - is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0)); is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0); is_quiet = is_nan && !is_signalling; // Assign output for current input diff --git a/src/fpnew_mxdotp_multi.sv b/src/fpnew_mxdotp_multi.sv new file mode 100644 index 00000000..6038bf99 --- /dev/null +++ b/src/fpnew_mxdotp_multi.sv @@ -0,0 +1,804 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Gamze Islamoglu + +`include "common_cells/registers.svh" + +module fpnew_mxdotp_multi + import fpnew_mxdotp_multi_pkg::*; +#( + // By default, all MX formats are enabled for the source and FP32 and FP16ALT are enabled for the destination. + parameter fpnew_pkg::fmt_logic_t FpSrcFmtConfig = MxdotpSrcFpFmtConfig, + parameter fpnew_pkg::ifmt_logic_t IntSrcFmtConfig = MxdotpSrcIntFmtConfig, + parameter fpnew_pkg::fmt_logic_t FpDstFmtConfig = MxdotpDstFpFmtConfig, + parameter int unsigned NumPipeRegs = 4, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [VectorSize-1:0][SRC_WIDTH-1:0] operands_a_i, + input logic [VectorSize-1:0][SRC_WIDTH-1:0] operands_b_i, + input logic [1:0] operands_a_fp6_rem_i, + input logic [1:0] operands_b_fp6_rem_i, + input logic [1:0][SCALE_WIDTH-1:0] operands_c_i, // 2 operands + input logic [DST_WIDTH-1:0] operand_d_i, // 1 operand, accumulator + input logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed_i, + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands + input fpnew_pkg::int_format_e int_fmt_i, // format of the multiplicands if they are integers + input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [DST_WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------------- + // Pipeline stages + // ---------------- + localparam int unsigned NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 3) + : 0); + localparam int unsigned NUM_MID_REGS = PipeConfig == fpnew_pkg::INSIDE + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 2) / 3) + : 0); + localparam int unsigned NUM_OUT_REGS = PipeConfig == fpnew_pkg::AFTER + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 3) + : 0); + + // ----------------------------------------- + // Config-dependent derived localparams + // ----------------------------------------- + // Computed from module parameters instead of package constants + localparam int unsigned FP6_VECTOR_SIZE = ((FpSrcFmtConfig[fpnew_pkg::FP6] || FpSrcFmtConfig[fpnew_pkg::FP6ALT]) == 1) ? + (((FpSrcFmtConfig[fpnew_pkg::FP8] || FpSrcFmtConfig[fpnew_pkg::FP8ALT]) == 1) ? 3 : 11) : 0; + localparam int unsigned FP4_VECTOR_SIZE = (FpSrcFmtConfig[fpnew_pkg::FP4] == 1) ? + (((FpSrcFmtConfig[fpnew_pkg::FP8] || FpSrcFmtConfig[fpnew_pkg::FP8ALT]) == 1) ? + (((FpSrcFmtConfig[fpnew_pkg::FP6] || FpSrcFmtConfig[fpnew_pkg::FP6ALT]) == 1) ? 5 : 8) : 16) : 0; + + localparam int unsigned INT_SUPER_BITS = fpnew_pkg::max_int_width(IntSrcFmtConfig); + + // FP8/INT8 Lane configuration + localparam int unsigned PROD_BITS = fpnew_pkg::maximum(2*INT_SUPER_BITS, 2*PRECISION_BITS+1); // +1 for the sign bit in FP8 product + + localparam int unsigned FP6_SUM_WIDTH = $clog2(FP6_VECTOR_SIZE) + FP6_PROD_SHIFT_WIDTH; + localparam int unsigned FP4_SUM_WIDTH = $clog2(FP4_VECTOR_SIZE) + FP4_PROD_SHIFT_WIDTH; + + // --------------- + // Input pipeline + // --------------- + // Selected pipeline output signals as non-arrays + logic [VectorSize-1:0][SRC_WIDTH-1:0] operands_a_q; + logic [VectorSize-1:0][SRC_WIDTH-1:0] operands_b_q; + logic [1:0] operands_a_fp6_rem_q; + logic [1:0] operands_b_fp6_rem_q; + logic [1:0][SCALE_WIDTH-1:0] operands_c_q; + logic [DST_WIDTH-1:0] operand_d_q; + fpnew_pkg::fp_format_e src_fmt_q; + fpnew_pkg::int_format_e int_fmt_q; + fpnew_pkg::fp_format_e dst_fmt_q; + + // Input pipeline signals, index i holds signal after i register stages + logic [0:NUM_INP_REGS][VectorSize-1:0][SRC_WIDTH-1:0] inp_pipe_operands_a_q; + logic [0:NUM_INP_REGS][VectorSize-1:0][SRC_WIDTH-1:0] inp_pipe_operands_b_q; + logic [0:NUM_INP_REGS][1:0] inp_pipe_operands_a_fp6_rem_q; + logic [0:NUM_INP_REGS][1:0] inp_pipe_operands_b_fp6_rem_q; + logic [0:NUM_INP_REGS][1:0][SCALE_WIDTH-1:0] inp_pipe_operands_c_q; + logic [0:NUM_INP_REGS][DST_WIDTH-1:0] inp_pipe_operand_d_q; + logic [0:NUM_INP_REGS][NUM_FORMATS-1:0][NUM_OPERANDS-1:0] inp_pipe_is_boxed_q; + fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; + fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; + logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; + fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; + fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; + TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; + AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; + logic [0:NUM_INP_REGS] inp_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] inp_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign inp_pipe_operands_a_q[0] = operands_a_i; + assign inp_pipe_operands_b_q[0] = operands_b_i; + assign inp_pipe_operands_a_fp6_rem_q[0] = operands_a_fp6_rem_i; + assign inp_pipe_operands_b_fp6_rem_q[0] = operands_b_fp6_rem_i; + assign inp_pipe_operands_c_q[0] = operands_c_i; + assign inp_pipe_operand_d_q[0] = operand_d_i; + assign inp_pipe_is_boxed_q[0] = is_boxed_i; + assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; + assign inp_pipe_op_q[0] = op_i; + assign inp_pipe_op_mod_q[0] = op_mod_i; + assign inp_pipe_src_fmt_q[0] = src_fmt_i; + assign inp_pipe_int_fmt_q[0] = int_fmt_i; + assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; + assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; + assign inp_pipe_aux_q[0] = aux_i; + assign inp_pipe_valid_q[0] = in_valid_i; + // Input stage: Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipeline ready and a valid data item is present + assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(inp_pipe_operands_a_q[i+1], inp_pipe_operands_a_q[i], reg_ena, '0) + `FFL(inp_pipe_operands_b_q[i+1], inp_pipe_operands_b_q[i], reg_ena, '0) + `FFL(inp_pipe_operands_a_fp6_rem_q[i+1], inp_pipe_operands_a_fp6_rem_q[i], reg_ena, '0) + `FFL(inp_pipe_operands_b_fp6_rem_q[i+1], inp_pipe_operands_b_fp6_rem_q[i], reg_ena, '0) + `FFL(inp_pipe_operands_c_q[i+1], inp_pipe_operands_c_q[i], reg_ena, '0) + `FFL(inp_pipe_operand_d_q[i+1], inp_pipe_operand_d_q[i], reg_ena, '0) + `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) + `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::MXDOTPF) + `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) + `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) + `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) + `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign operands_a_q = inp_pipe_operands_a_q[NUM_INP_REGS]; + assign operands_b_q = inp_pipe_operands_b_q[NUM_INP_REGS]; + assign operands_a_fp6_rem_q = inp_pipe_operands_a_fp6_rem_q[NUM_INP_REGS]; + assign operands_b_fp6_rem_q = inp_pipe_operands_b_fp6_rem_q[NUM_INP_REGS]; + assign operands_c_q = inp_pipe_operands_c_q[NUM_INP_REGS]; + assign operand_d_q = inp_pipe_operand_d_q[NUM_INP_REGS]; + assign src_fmt_q = inp_pipe_src_fmt_q[NUM_INP_REGS]; + assign int_fmt_q = inp_pipe_int_fmt_q[NUM_INP_REGS]; + assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; + + logic [2*VectorSize-1:0][SRC_WIDTH-1:0] operands_post_inp_pipe; + logic [2*FP6_VECTOR_SIZE-1:0][SRC_WIDTH-1:0] fp6_operands_post_inp_pipe; + logic [2*FP4_VECTOR_SIZE-1:0][SRC_WIDTH-1:0] fp4_operands_post_inp_pipe; + + logic [VectorSize*SRC_WIDTH-1:0] flat_operands_a_q; + logic [VectorSize*SRC_WIDTH-1:0] flat_operands_b_q; + + always_comb begin + fp6_operands_post_inp_pipe = '0; + fp4_operands_post_inp_pipe = '0; + operands_post_inp_pipe = {operands_b_q, operands_a_q}; + flat_operands_a_q = operands_a_q; + flat_operands_b_q = operands_b_q; + // TODO: FP6 and FP4 without FP8 + if (src_fmt_q == fpnew_pkg::FP6 || src_fmt_q == fpnew_pkg::FP6ALT) begin + for (int i = 0; i < FP6_VECTOR_SIZE; i++) begin // Last 3 elements use FP6 datapath + fp6_operands_post_inp_pipe[i] = {{(SRC_WIDTH-6){1'b0}}, flat_operands_a_q[(48+i*6) +: 6]}; + fp6_operands_post_inp_pipe[i+FP6_VECTOR_SIZE] = {{(SRC_WIDTH-6){1'b0}}, flat_operands_b_q[(48+i*6) +: 6]}; + if (i == FP6_VECTOR_SIZE-1) begin // Last element of the FP6 remainder extends to 66 bits + fp6_operands_post_inp_pipe[i][5:4] = operands_a_fp6_rem_q; + fp6_operands_post_inp_pipe[i+FP6_VECTOR_SIZE][5:4] = operands_b_fp6_rem_q; + end + end + for (int i = 0; i < VectorSize; i++) begin // Top 8 elements use FP8 datapath + operands_post_inp_pipe[i] = {{(SRC_WIDTH-6){1'b0}}, flat_operands_a_q[(i*6) +: 6]}; + operands_post_inp_pipe[i+VectorSize] = {{(SRC_WIDTH-6){1'b0}}, flat_operands_b_q[(i*6) +: 6]}; + end + end else if (src_fmt_q == fpnew_pkg::FP4) begin + for (int i = 0; i < VectorSize; i++) begin + if (i < FP6_VECTOR_SIZE) begin // First 3 elements use FP6 datapath + fp6_operands_post_inp_pipe[i] = {{(SRC_WIDTH-4){1'b0}}, operands_a_q[i][7:4]}; + fp6_operands_post_inp_pipe[i+FP6_VECTOR_SIZE] = {{(SRC_WIDTH-4){1'b0}}, operands_b_q[i][7:4]}; + end else begin // Last 5 elements use FP4 datapath, remaining elements already use FP8 datapath via operands_post_inp_pipe + fp4_operands_post_inp_pipe[i-FP6_VECTOR_SIZE] = {{(SRC_WIDTH-4){1'b0}}, operands_a_q[i][7:4]}; + fp4_operands_post_inp_pipe[i-FP6_VECTOR_SIZE+FP4_VECTOR_SIZE] = {{(SRC_WIDTH-4){1'b0}}, operands_b_q[i][7:4]}; + end + end + end + end + + // ----------------- + // Input processing + // ----------------- + logic src_is_int; // if 0, it's a float + + assign src_is_int = (inp_pipe_op_q[NUM_INP_REGS] == fpnew_pkg::MXDOTPI); + + fp_src_t [VectorSize-1:0] operands_a, operands_b; + logic signed [1:0][SCALE_WIDTH-1:0] operands_c; + fp_dst_t operand_d; + fpnew_pkg::fp_info_t [VectorSize-1:0] info_a, info_b; + fpnew_pkg::fp_info_t [1:0] info_c; + fpnew_pkg::fp_info_t info_d; + + fp6_src_t [FP6_VECTOR_SIZE-1:0] fp6_operands_a, fp6_operands_b; + fpnew_pkg::fp_info_t [FP6_VECTOR_SIZE-1:0] fp6_info_a, fp6_info_b; + + fp4_src_t [FP4_VECTOR_SIZE-1:0] fp4_operands_a, fp4_operands_b; + fpnew_pkg::fp_info_t [FP4_VECTOR_SIZE-1:0] fp4_info_a, fp4_info_b; + + fpnew_mxdotp_classifier #( + .FpSrcFmtConfig ( FpSrcFmtConfig ), + .FpDstFmtConfig ( FpDstFmtConfig ), + .FP6VectorSize ( FP6_VECTOR_SIZE ), + .FP4VectorSize ( FP4_VECTOR_SIZE ), + .NumInpRegs ( NUM_INP_REGS ) + ) i_classifier ( + .operands_post_inp_pipe(operands_post_inp_pipe), + .fp6_operands_post_inp_pipe(fp6_operands_post_inp_pipe), + .fp4_operands_post_inp_pipe(fp4_operands_post_inp_pipe), + .operands_c_q(operands_c_q), + .operand_d_q(operand_d_q), + .inp_pipe_is_boxed_q(inp_pipe_is_boxed_q), + .src_fmt_q(src_fmt_q), + .src_is_int(src_is_int), + .dst_fmt_q(dst_fmt_q), + .inp_pipe_op_mod_q(inp_pipe_op_mod_q), + .info_a(info_a), + .fp6_info_a(fp6_info_a), + .fp4_info_a(fp4_info_a), + .info_b(info_b), + .fp6_info_b(fp6_info_b), + .fp4_info_b(fp4_info_b), + .info_c(info_c), + .info_d(info_d), + .operands_a(operands_a), + .fp6_operands_a(fp6_operands_a), + .fp4_operands_a(fp4_operands_a), + .operands_b(operands_b), + .fp6_operands_b(fp6_operands_b), + .fp4_operands_b(fp4_operands_b), + .operands_c(operands_c), + .operand_d(operand_d) + ); + + // --------------------- + // Special case handling + // --------------------- + + logic [DST_WIDTH-1:0] special_result; + fpnew_pkg::status_t special_status; + logic result_is_special; + + // Inf and NaN do not exists in FP6 and FP4 formats + if (FpSrcFmtConfig[fpnew_pkg::FP8] || FpSrcFmtConfig[fpnew_pkg::FP8ALT]) begin : special_case_handling + fpnew_mxdotp_special_cases #( + .FpDstFmtConfig ( FpDstFmtConfig ) + ) i_special_cases ( + .operands_a(operands_a), + .operands_b(operands_b), + .operands_c(operands_c), + .operand_d(operand_d), + .info_a(info_a), + .info_b(info_b), + .info_c(info_c), + .info_d(info_d), + .dst_fmt_q(dst_fmt_q), + .special_result(special_result), + .special_status(special_status), + .result_is_special(result_is_special) + ); + end else begin : no_special_case_handling + assign special_result = '0; + assign special_status = fpnew_pkg::status_t'(0); + assign result_is_special = 1'b0; + end + + // ------------------ + // Scale data path + // ------------------ + logic signed [SCALE_WIDTH:0] scale; // +1 for addition + + fpnew_mxdotp_scale_adder #( + ) i_scale_adder ( + .operands_c(operands_c), + .scale(scale) + ); + + // ------------------ + // Product data path + // ------------------ + logic signed [VectorSize-1:0][PROD_BITS-1:0] product_signed; // two's complement product, already signed + logic signed [FP6_VECTOR_SIZE-1:0][2*FP6_PREC_BITS:0] fp6_product_signed; // two's complement product, +1 for sign bit + logic signed [FP4_VECTOR_SIZE-1:0][2*FP4_PREC_BITS:0] fp4_product_signed; // two's complement product, +1 for sign bit + + if (IntSrcFmtConfig[fpnew_pkg::INT8]) begin : int8_multiplier + fpnew_mxdotp_signed_vector_multiplier #( + .SrcType(fp_src_t), + .LocalVectorSize(VectorSize), + .PrecisionBits(INT_SUPER_BITS) + ) i_vector_multiplier_int8 ( + .operands_a(operands_a), + .operands_b(operands_b), + .src_fmt_q(src_fmt_q), + .int_fmt_q(int_fmt_q), + .src_is_int(src_is_int), + .info_a(info_a), + .info_b(info_b), + .product_signed(product_signed) + ); + end else if (FpSrcFmtConfig[fpnew_pkg::FP8] || FpSrcFmtConfig[fpnew_pkg::FP8ALT]) begin : fp8_multiplier + fpnew_mxdotp_vector_multiplier #( + .SrcType(fp_src_t), + .LocalVectorSize(VectorSize), + .PrecisionBits(PRECISION_BITS) + ) i_vector_multiplier_fp8 ( + .operands_a(operands_a), + .operands_b(operands_b), + .info_a(info_a), + .info_b(info_b), + .product_signed(product_signed) + ); + end else begin : no_fp8_multiplier + assign product_signed = '0; + end + + if (FpSrcFmtConfig[fpnew_pkg::FP6] || FpSrcFmtConfig[fpnew_pkg::FP6ALT]) begin : fp6_multiplier + fpnew_mxdotp_vector_multiplier #( + .SrcType(fp6_src_t), + .LocalVectorSize(FP6_VECTOR_SIZE), + .PrecisionBits(FP6_PREC_BITS) + ) i_vector_multiplier_fp6 ( + .operands_a(fp6_operands_a), + .operands_b(fp6_operands_b), + .info_a(fp6_info_a), + .info_b(fp6_info_b), + .product_signed(fp6_product_signed) + ); + end else begin : no_fp6_multiplier + assign fp6_product_signed = '0; + end + if (FpSrcFmtConfig[fpnew_pkg::FP4]) begin : fp4_multiplier + fpnew_mxdotp_vector_multiplier #( + .SrcType(fp4_src_t), + .LocalVectorSize(FP4_VECTOR_SIZE), + .PrecisionBits(FP4_PREC_BITS) + ) i_vector_multiplier_fp4 ( + .operands_a(fp4_operands_a), + .operands_b(fp4_operands_b), + .info_a(fp4_info_a), + .info_b(fp4_info_b), + .product_signed(fp4_product_signed) + ); + end else begin : no_fp4_multiplier + assign fp4_product_signed = '0; + end + + // ------------------ + // Shift data path + // ------------------ + logic signed [VectorSize-1:0][PROD_SHIFT_WIDTH-1:0] shifted_product; + logic signed [FP6_VECTOR_SIZE-1:0][FP6_PROD_SHIFT_WIDTH-1:0] fp6_shifted_product; + logic signed [FP4_VECTOR_SIZE-1:0][FP4_PROD_SHIFT_WIDTH-1:0] fp4_shifted_product; + + if (FpSrcFmtConfig[fpnew_pkg::FP8] || FpSrcFmtConfig[fpnew_pkg::FP8ALT]) begin : fp8_product_shifter + fpnew_mxdotp_product_shifter #( + .SrcType(fp_src_t), + .LocalVectorSize(VectorSize), + .SrcFmt(fpnew_pkg::FP8), // TODO: For now, we assume that FP8 and FP8ALT are always enabled together + .ProductBits(PROD_BITS), + .ExpWidth(EXP_WIDTH), + .OutputWidth(PROD_SHIFT_WIDTH) + ) i_product_shifter_fp8 ( + .operands_a(operands_a), + .operands_b(operands_b), + .info_a(info_a), + .info_b(info_b), + .product_signed(product_signed), + .src_fmt_q(src_fmt_q), + .int_fmt_q(int_fmt_q), + .src_is_int(src_is_int), + .shifted_product(shifted_product) + ); + end else begin : no_fp8_product_shifter + assign shifted_product = '0; + end + + if (FpSrcFmtConfig[fpnew_pkg::FP6] || FpSrcFmtConfig[fpnew_pkg::FP6ALT]) begin : fp6_product_shifter + fpnew_mxdotp_product_shifter #( + .SrcType(fp6_src_t), + .LocalVectorSize(FP6_VECTOR_SIZE), + .SrcFmt(fpnew_pkg::FP6), // TODO: For now, we assume that FP6 and FP6ALT are always enabled together + .ProductBits(2*FP6_PREC_BITS+1), + .ExpWidth(5), + .OutputWidth(FP6_PROD_SHIFT_WIDTH) + ) i_product_shifter_fp6 ( + .operands_a(fp6_operands_a), + .operands_b(fp6_operands_b), + .info_a(fp6_info_a), + .info_b(fp6_info_b), + .product_signed(fp6_product_signed), + .src_fmt_q(src_fmt_q), + .int_fmt_q(int_fmt_q), + .src_is_int(src_is_int), + .shifted_product(fp6_shifted_product) + ); + end else begin : no_fp6_product_shifter + assign fp6_shifted_product = '0; + end + if (FpSrcFmtConfig[fpnew_pkg::FP4]) begin : fp4_product_shifter + fpnew_mxdotp_product_shifter #( + .SrcType(fp4_src_t), + .LocalVectorSize(FP4_VECTOR_SIZE), + .SrcFmt(fpnew_pkg::FP4), + .ProductBits(2*FP4_PREC_BITS+1), + .ExpWidth(3), + .OutputWidth(FP4_PROD_SHIFT_WIDTH) + ) i_product_shifter_fp4 ( + .operands_a(fp4_operands_a), + .operands_b(fp4_operands_b), + .info_a(fp4_info_a), + .info_b(fp4_info_b), + .product_signed(fp4_product_signed), + .src_fmt_q(src_fmt_q), + .int_fmt_q(int_fmt_q), + .src_is_int(src_is_int), + .shifted_product(fp4_shifted_product) + ); + end else begin : no_fp4_product_shifter + assign fp4_shifted_product = '0; + end + + // ------------------ + // Adder data path + // ------------------ + logic signed [SOP_FIXED_WIDTH-1:0] sum_product_fp8; + logic signed [FP6_SUM_WIDTH-1:0] sum_product_fp6; + logic signed [FP4_SUM_WIDTH-1:0] sum_product_fp4; + logic signed [FIXED_SUM_WIDTH-1:0] sum_product; + + if (FpSrcFmtConfig[fpnew_pkg::FP8] || FpSrcFmtConfig[fpnew_pkg::FP8ALT] || IntSrcFmtConfig[fpnew_pkg::INT8]) begin : fp8_adder_tree + fpnew_mxdotp_adder_tree #( + .LocalVectorSize(VectorSize), + .InputWidth(PROD_SHIFT_WIDTH), + .OutputWidth(SOP_FIXED_WIDTH) + ) i_adder_tree_fp8 ( + .shifted_product(shifted_product), + .sum_product(sum_product_fp8) + ); + end else begin : no_fp8_adder_tree + assign sum_product_fp8 = '0; + end + + if (FpSrcFmtConfig[fpnew_pkg::FP6] || FpSrcFmtConfig[fpnew_pkg::FP6ALT]) begin : fp6_adder_tree + fpnew_mxdotp_adder_tree #( + .LocalVectorSize(FP6_VECTOR_SIZE), + .InputWidth(FP6_PROD_SHIFT_WIDTH), + .OutputWidth(FP6_SUM_WIDTH) + ) i_adder_tree_fp6 ( + .shifted_product(fp6_shifted_product), + .sum_product(sum_product_fp6) + ); + end else begin : no_fp6_adder_tree + assign sum_product_fp6 = '0; + end + if (FpSrcFmtConfig[fpnew_pkg::FP4]) begin : fp4_adder_tree + fpnew_mxdotp_adder_tree #( + .LocalVectorSize(FP4_VECTOR_SIZE), + .InputWidth(FP4_PROD_SHIFT_WIDTH), + .OutputWidth(FP4_SUM_WIDTH) + ) i_adder_tree_fp4 ( + .shifted_product(fp4_shifted_product), + .sum_product(sum_product_fp4) + ); + end else begin : no_fp4_adder_tree + assign sum_product_fp4 = '0; + end + + // Unified format adder: handles FP8 + FP6 + FP4 (FP6/FP4 are zero when disabled) + fpnew_mxdotp_format_adder #( + .Fp6SumWidth ( FP6_SUM_WIDTH ), + .Fp4SumWidth ( FP4_SUM_WIDTH ) + ) i_format_adder ( + .sum_product_fp8(sum_product_fp8), + .sum_product_fp6(sum_product_fp6), + .sum_product_fp4(sum_product_fp4), + .sum_product(sum_product) + ); + + // --------------- + // Internal pipeline + // --------------- + // Pipeline output signals as non-arrays + logic signed [FIXED_SUM_WIDTH-1:0] sum_product_q; + logic [SCALE_WIDTH:0] scale_q2; + fp_dst_t operand_d_q2; + fpnew_pkg::fp_info_t info_d_q; + fpnew_pkg::fp_format_e dst_fmt_q2; + fpnew_pkg::roundmode_e rnd_mode_q; + logic result_is_special_q; + logic [DST_WIDTH-1:0] special_result_q; + fpnew_pkg::status_t special_status_q; + // Internal pipeline signals, index i holds signal after i register stages + logic signed [0:NUM_MID_REGS][FIXED_SUM_WIDTH-1:0] mid_pipe_sum_product_q; + logic [0:NUM_MID_REGS][SCALE_WIDTH:0] mid_pipe_scale_q; + fp_dst_t [0:NUM_MID_REGS] mid_pipe_operand_d_q; + fpnew_pkg::fp_info_t [0:NUM_MID_REGS] mid_pipe_info_d_q; + fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; + fpnew_pkg::roundmode_e [0:NUM_MID_REGS] mid_pipe_rnd_mode_q; + logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; + logic [0:NUM_MID_REGS][DST_WIDTH-1:0] mid_pipe_spec_res_q; + fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; + TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; + AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; + logic [0:NUM_MID_REGS] mid_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_MID_REGS] mid_pipe_ready; + + // Input stage: First element of pipeline is taken from upstream logic + assign mid_pipe_sum_product_q[0] = sum_product; + assign mid_pipe_scale_q[0] = scale; + assign mid_pipe_operand_d_q[0] = operand_d; + assign mid_pipe_info_d_q[0] = info_d; + assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; + assign mid_pipe_rnd_mode_q[0] = inp_pipe_rnd_mode_q[NUM_INP_REGS]; + assign mid_pipe_res_is_spec_q[0] = result_is_special; + assign mid_pipe_spec_res_q[0] = special_result; + assign mid_pipe_spec_stat_q[0] = special_status; + assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; + assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; + assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; + // Input stage: Propagate pipeline ready signal to input pipe + assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipeline ready and a valid data item is present + assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(mid_pipe_sum_product_q[i+1], mid_pipe_sum_product_q[i], reg_ena, '0) + `FFL(mid_pipe_scale_q[i+1], mid_pipe_scale_q[i], reg_ena, '0) + `FFL(mid_pipe_operand_d_q[i+1], mid_pipe_operand_d_q[i], reg_ena, '0) + `FFL(mid_pipe_info_d_q[i+1], mid_pipe_info_d_q[i], reg_ena, '0) + `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) + `FFL(mid_pipe_rnd_mode_q[i+1], mid_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) + `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) + `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) + `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) + `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: assign selected pipe outputs to signals for later use + assign sum_product_q = mid_pipe_sum_product_q[NUM_MID_REGS]; + assign scale_q2 = mid_pipe_scale_q[NUM_MID_REGS]; + assign operand_d_q2 = mid_pipe_operand_d_q[NUM_MID_REGS]; + assign info_d_q = mid_pipe_info_d_q[NUM_MID_REGS]; + assign dst_fmt_q2 = mid_pipe_dst_fmt_q[NUM_MID_REGS]; + assign rnd_mode_q = mid_pipe_rnd_mode_q[NUM_MID_REGS]; + assign result_is_special_q = mid_pipe_res_is_spec_q[NUM_MID_REGS]; + assign special_result_q = mid_pipe_spec_res_q[NUM_MID_REGS]; + assign special_status_q = mid_pipe_spec_stat_q[NUM_MID_REGS]; + + // ----------------------------- + // Accumulator shift data path + // ----------------------------- + logic result_is_accumulator; + logic accumulator_is_right_shifted; + + logic signed [9:0] accumulator_right_shift_amount; + logic signed [FIXED_SUM_WIDTH-1:0] accumulator_shifted; + logic signed [DST_PRECISION_BITS :0] signed_mantissa_d; + logic accumulator_sticky; + logic signed [DST_PRECISION_BITS-1:0] accumulator_remaining; + + fpnew_mxdotp_accumulator_shift #( + ) i_accumulator_shift ( + .sum_product_q(sum_product_q), + .scale_q2(scale_q2), + .operand_d_q2(operand_d_q2), + .info_d_q(info_d_q), + .dst_fmt_q2(dst_fmt_q2), + .accumulator_is_right_shifted(accumulator_is_right_shifted), + .accumulator_right_shift_amount(accumulator_right_shift_amount), + .accumulator_shifted(accumulator_shifted), + .result_is_accumulator(result_is_accumulator), + .accumulator_sticky(accumulator_sticky), + .signed_mantissa_d(signed_mantissa_d), + .accumulator_remaining(accumulator_remaining) + ); + + // ----------------- + // Accumulator + SoP + // ----------------- + logic signed [LZC_SUM_WIDTH-1:0] sum_product_accumulator_extended; + + fpnew_mxdotp_add_accumulator_sop #( + ) i_add_accumulator_sop ( + .sum_product_q(sum_product_q), + .accumulator_shifted(accumulator_shifted), + .accumulator_remaining(accumulator_remaining), + .sum_product_accumulator_extended(sum_product_accumulator_extended) + ); + + // -------------- + // Normalization + // -------------- + logic [LZC_SUM_WIDTH-1:0] sum_magnitude; + logic final_sign; + logic [DST_PRECISION_BITS-1:0] final_mantissa; + logic sticky_after_norm; + logic signed [DST_EXP_WIDTH-1:0] final_exponent; + + fpnew_mxdotp_normalizer #( + ) i_normalizer ( + .sum_product_accumulator_extended(sum_product_accumulator_extended), + .accumulator_sticky(accumulator_sticky), + .accumulator_is_right_shifted(accumulator_is_right_shifted), + .accumulator_right_shift_amount(accumulator_right_shift_amount), + .signed_mantissa_d(signed_mantissa_d), + .scale_q2(scale_q2), + .dst_fmt_q2(dst_fmt_q2), + .final_sign(final_sign), + .final_mantissa(final_mantissa), + .sticky_after_norm(sticky_after_norm), + .final_exponent(final_exponent), + .sum_magnitude(sum_magnitude) + ); + + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic [1:0] round_sticky_bits; + logic [NUM_FORMATS-1:0][DST_WIDTH-1:0] fmt_result; + + logic of_before_round, of_after_round; // overflow + logic uf_after_round; // underflow + + fpnew_mxdotp_rounder #( + .FpDstFmtConfig ( FpDstFmtConfig ) + ) i_rounder ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .final_sign(final_sign), + .final_mantissa(final_mantissa), + .final_exponent(final_exponent), + .sticky_after_norm(sticky_after_norm), + .sum_magnitude(sum_magnitude), + .dst_fmt_q2(dst_fmt_q2), + .rnd_mode_q(rnd_mode_q), + .round_sticky_bits(round_sticky_bits), + .fmt_result(fmt_result), + .of_before_round(of_before_round), + .of_after_round(of_after_round), + .uf_after_round(uf_after_round) + ); + + // ----------------- + // Result selection + // ----------------- + logic [DST_WIDTH-1:0] regular_result; + logic [DST_WIDTH-1:0] accumulator_result; + fpnew_pkg::status_t regular_status; + fpnew_pkg::status_t accumulator_status; + + // Assemble regular result + assign regular_result = fmt_result[dst_fmt_q2]; + assign regular_status.NV = 1'b0; // only valid cases are handled in regular path + assign regular_status.DZ = 1'b0; // no divisions + assign regular_status.OF = of_before_round | of_after_round; // rounding can introduce overflow + assign regular_status.UF = uf_after_round & regular_status.NX; // only inexact results raise UF + assign regular_status.NX = (| round_sticky_bits) | of_before_round | of_after_round; + + // Accumulator dominates: NX if SoP was non-zero + assign accumulator_status.NV = 1'b0; + assign accumulator_status.DZ = 1'b0; + assign accumulator_status.OF = 1'b0; + assign accumulator_status.UF = 1'b0; + assign accumulator_status.NX = (sum_product_q != '0); + + assign accumulator_result = (dst_fmt_q2 == fpnew_pkg::FP16ALT) ? {16'hFFFF, operand_d_q2[31:16]} : + operand_d_q2; + + // Final results for output pipeline + logic [DST_WIDTH-1:0] result_d; + fpnew_pkg::status_t status_d; + + // Select output depending on special case detection + assign result_d = result_is_special_q ? special_result_q : (result_is_accumulator ? accumulator_result : regular_result); + assign status_d = result_is_special_q ? special_status_q : (result_is_accumulator ? accumulator_status : regular_status); + + // ---------------- + // Output Pipeline + // ---------------- + // Output pipeline signals, index i holds signal after i register stages + logic [0:NUM_OUT_REGS][DST_WIDTH-1:0] out_pipe_result_q; + fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; + TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; + AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; + logic [0:NUM_OUT_REGS] out_pipe_valid_q; + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_pipe_ready; + + // Input stage: First element of pipeline is taken from inputs + assign out_pipe_result_q[0] = result_d; + assign out_pipe_status_q[0] = status_d; + assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; + assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; + assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; + // Input stage: Propagate pipeline ready signal to inside pipe + assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) + // Enable register if pipeline ready and a valid data item is present + assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Generate the pipeline registers within the stages, use enable-registers + `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) + `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) + `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) + `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) + end + // Output stage: Ready travels backwards from output side, driven by downstream circuitry + assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs + assign result_o = out_pipe_result_q[NUM_OUT_REGS]; + assign status_o = out_pipe_status_q[NUM_OUT_REGS]; + assign extension_bit_o = 1'b1; // always NaN-Box result + assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; + assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; + assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; + assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); +endmodule diff --git a/src/fpnew_mxdotp_multi_wrapper.sv b/src/fpnew_mxdotp_multi_wrapper.sv new file mode 100644 index 00000000..2efccc60 --- /dev/null +++ b/src/fpnew_mxdotp_multi_wrapper.sv @@ -0,0 +1,245 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Gamze Islamoglu + +module fpnew_mxdotp_multi_wrapper + import fpnew_mxdotp_multi_pkg::*; +#( + parameter int unsigned LaneWidth = 64, + parameter fpnew_pkg::fmt_logic_t FpSrcFmtConfig = '1, // Supported FP source formats (FP8, FP8ALT, FP6, FP6ALT, FP4) + parameter fpnew_pkg::ifmt_logic_t IntSrcFmtConfig = '1, // Supported INT formats (INT8) + parameter fpnew_pkg::fmt_logic_t FpDstFmtConfig = '1, // Supported FP destination formats (FP32, FP16ALT) + parameter int unsigned Unroll = 8, // Unroll factor for FP6 extended operands, possible values: 1, 2, 4, 8 + parameter int unsigned NumPipeRegs = 4, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic, + parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, + // Do not change + localparam int OPERAND_WIDTH = LaneWidth, + localparam int UNROLL_IDX_WIDTH = (Unroll > 1) ? $clog2(Unroll) : 1 +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input logic [2:0][OPERAND_WIDTH-1:0] operands_i, // 3 operands + input logic [NUM_FORMATS-1:0][2:0] is_boxed_i, // 3 operands + input fpnew_pkg::roundmode_e rnd_mode_i, + input fpnew_pkg::operation_e op_i, + input logic op_mod_i, + input fpnew_pkg::fp_format_e src_fmt_i, + input fpnew_pkg::int_format_e int_fmt_i, + input fpnew_pkg::fp_format_e dst_fmt_i, + input TagType tag_i, + input logic mask_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output logic [OPERAND_WIDTH-1:0] result_o, + output fpnew_pkg::status_t status_o, + output logic extension_bit_o, + output TagType tag_o, + output logic mask_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ----------------- + // Input processing + // ----------------- + logic [VectorSize-1:0][SRC_WIDTH-1:0] local_src_fmt_operand_a; + logic [VectorSize-1:0][SRC_WIDTH-1:0] local_src_fmt_operand_b; + logic [1:0] local_src_fmt_operand_a_rem; + logic [1:0] local_src_fmt_operand_b_rem; + logic [1:0][SCALE_WIDTH-1:0] local_src_fmt_operand_c; + logic [NUM_FORMATS-1:0][DST_WIDTH-1:0] local_src_fmt_operand_d; + logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] local_is_boxed; + logic [OPERAND_WIDTH-1:0] local_result; + + // ------------------------- + // Extended operands for FP6 + // ------------------------- + + if (FpSrcFmtConfig[fpnew_pkg::FP6] || FpSrcFmtConfig[fpnew_pkg::FP6ALT]) begin : gen_fp6_operands + + typedef enum logic [1:0] { + STEP0 = 2'b00, + STEP1 = 2'b01, + STEP2 = 2'b10 + } fp6_step_e; + + fp6_step_e step; + + // Count for the number of FP6 extended operands processed + // Each 192b/6b = 32 FP6 operands are processed in 3 steps + logic [$clog2(3*Unroll)-1:0] count_q, count_d; + logic [UNROLL_IDX_WIDTH-1:0] unroll_index; + + // Store the FP6 extended operands + logic [1:0][Unroll-1:0][3:0] local_fp6_stores_d, local_fp6_stores_q; + logic [1:0][3:0] local_fp6_stores; + + if (Unroll > 1) begin : gen_unroll_idx + assign unroll_index = count_q[$clog2(Unroll)-1:0]; + end else begin : gen_no_unroll + assign unroll_index = '0; + end + + assign step = fp6_step_e'(count_q >> $clog2(Unroll)); + + always_comb begin + count_d = count_q; + local_fp6_stores_d = local_fp6_stores_q; + + local_fp6_stores = '0; + + local_src_fmt_operand_a = '0; + local_src_fmt_operand_b = '0; + local_src_fmt_operand_a_rem = '0; + local_src_fmt_operand_b_rem = '0; + + if (src_fmt_i == fpnew_pkg::FP6 || src_fmt_i == fpnew_pkg::FP6ALT) begin + if (step == STEP0) begin + local_src_fmt_operand_a = {4'b0000, operands_i[0][59:0]}; + local_fp6_stores[0] = operands_i[0][63:60]; + local_src_fmt_operand_b = {4'b0000, operands_i[1][59:0]}; + local_fp6_stores[1] = operands_i[1][63:60]; + end else if (step == STEP1) begin + local_src_fmt_operand_a = {operands_i[0][59:0], local_fp6_stores_q[0][unroll_index][3:0]}; + local_src_fmt_operand_a_rem = operands_i[0][61:60]; + local_fp6_stores[0] = {2'b00, operands_i[0][63:62]}; + local_src_fmt_operand_b = {operands_i[1][59:0], local_fp6_stores_q[1][unroll_index][3:0]}; + local_src_fmt_operand_b_rem = operands_i[1][61:60]; + local_fp6_stores[1] = {2'b00, operands_i[1][63:62]}; + end else if (step == STEP2) begin + local_src_fmt_operand_a = {operands_i[0][61:0], local_fp6_stores_q[0][unroll_index][1:0]}; + local_src_fmt_operand_a_rem = operands_i[0][63:62]; + local_src_fmt_operand_b = {operands_i[1][61:0], local_fp6_stores_q[1][unroll_index][1:0]}; + local_src_fmt_operand_b_rem = operands_i[1][63:62]; + end + + if (in_valid_i && in_ready_o) begin + // Store the FP6 extended operands + local_fp6_stores_d[0][unroll_index] = local_fp6_stores[0]; + local_fp6_stores_d[1][unroll_index] = local_fp6_stores[1]; + count_d = count_q + 1; + if (count_d == 3 * Unroll) begin + count_d = '0; + end + end + end else begin + local_src_fmt_operand_a = operands_i[0]; + local_src_fmt_operand_b = operands_i[1]; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + count_q <= '0; + local_fp6_stores_q <= '0; + end else begin + count_q <= count_d; + local_fp6_stores_q <= local_fp6_stores_d; + end + end + + end else begin : gen_no_fp6_operands + assign local_src_fmt_operand_a = operands_i[0]; + assign local_src_fmt_operand_b = operands_i[1]; + assign local_src_fmt_operand_a_rem = '0; + assign local_src_fmt_operand_b_rem = '0; + end + + // ---------------------------------- + // assign scale operands + // ---------------------------------- + assign local_src_fmt_operand_c[1] = operands_i[2][(DST_WIDTH+SCALE_WIDTH)+:SCALE_WIDTH]; + assign local_src_fmt_operand_c[0] = operands_i[2][DST_WIDTH+:SCALE_WIDTH]; + + // ---------------------------------- + // assign operands with src format + // ---------------------------------- + // NaN-boxing check + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox + + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned FP_WIDTH_DST_MIN = fpnew_pkg::minimum(DST_WIDTH, FP_WIDTH); + + always_comb begin : nanbox + local_src_fmt_operand_d[fmt] = '1; + local_src_fmt_operand_d[fmt][FP_WIDTH_DST_MIN-1:0] = operands_i[2][FP_WIDTH_DST_MIN-1:0]; + + for (int i = 0; i < VectorSize; i++) begin + local_is_boxed[fmt][i] = is_boxed_i[fmt][0]; + local_is_boxed[fmt][i+VectorSize] = is_boxed_i[fmt][1]; + end + + local_is_boxed[fmt][2*VectorSize] = is_boxed_i[fmt][2]; + end + end + + fpnew_mxdotp_multi #( + .FpSrcFmtConfig ( FpSrcFmtConfig ), + .IntSrcFmtConfig ( IntSrcFmtConfig ), + .FpDstFmtConfig ( FpDstFmtConfig ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( AuxType ) + ) i_fpnew_mxdotp_multi ( + .clk_i, + .rst_ni, + .operands_a_i ( local_src_fmt_operand_a ), + .operands_b_i ( local_src_fmt_operand_b ), + .operands_a_fp6_rem_i ( local_src_fmt_operand_a_rem ), + .operands_b_fp6_rem_i ( local_src_fmt_operand_b_rem ), + .operands_c_i ( local_src_fmt_operand_c ), + .operand_d_i ( local_src_fmt_operand_d[dst_fmt_i] ), + .is_boxed_i ( local_is_boxed ), + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, // format of the multiplicands + .int_fmt_i, // format of the multiplicands if they are integers + .dst_fmt_i, // format of the addend and result + .tag_i, + .mask_i, + .aux_i, + .in_valid_i, + .in_ready_o, + .flush_i, + .result_o ( local_result[DST_WIDTH-1:0] ), + .status_o, + .extension_bit_o, + .tag_o, + .mask_o, + .aux_o, + .out_valid_o, + .out_ready_i, + .busy_o + ); + + if (OPERAND_WIDTH > DST_WIDTH) begin + assign local_result[OPERAND_WIDTH-1:DST_WIDTH] = '1; + end + assign result_o = local_result; + +endmodule diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index db2c3032..f03119aa 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -21,6 +21,8 @@ module fpnew_opgroup_block #( parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, parameter fpnew_pkg::fmt_logic_t FpFmtMask = '1, parameter fpnew_pkg::ifmt_logic_t IntFmtMask = '1, + parameter fpnew_pkg::fmt_logic_t MxFpFmtMask = '0, // MX-specific FP formats + parameter fpnew_pkg::ifmt_logic_t MxIntFmtMask = '0, // MX-specific INT formats parameter fpnew_pkg::fmt_unsigned_t FmtPipeRegs = '{default: 0}, parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes = '{default: fpnew_pkg::PARALLEL}, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, @@ -178,15 +180,17 @@ module fpnew_opgroup_block #( assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED); fpnew_opgroup_multifmt_slice #( - .OpGroup ( OpGroup ), - .Width ( Width ), - .FpFmtConfig ( FpFmtMask ), - .IntFmtConfig ( IntFmtMask ), - .EnableVectors ( EnableVectors ), - .DivSqrtSel ( DivSqrtSel ), - .NumPipeRegs ( REG ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), + .OpGroup ( OpGroup ), + .Width ( Width ), + .FpFmtConfig ( FpFmtMask ), + .IntFmtConfig ( IntFmtMask ), + .MxFpFmtConfig ( MxFpFmtMask ), + .MxIntFmtConfig ( MxIntFmtMask ), + .EnableVectors ( EnableVectors ), + .DivSqrtSel ( DivSqrtSel ), + .NumPipeRegs ( REG ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), .StochasticRndImplementation ( StochasticRndImplementation ) ) i_multifmt_slice ( .clk_i, diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index f5991cbd..5409b34a 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -16,16 +16,18 @@ `include "common_cells/registers.svh" module fpnew_opgroup_multifmt_slice #( - parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV, - parameter int unsigned Width = 64, + parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::CONV, + parameter int unsigned Width = 64, // FPU configuration - parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, - parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, - parameter logic EnableVectors = 1'b1, - parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, - parameter int unsigned NumPipeRegs = 0, - parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = '1, + parameter fpnew_pkg::fmt_logic_t MxFpFmtConfig = '0, // MX-specific FP formats + parameter fpnew_pkg::ifmt_logic_t MxIntFmtConfig = '0, // MX-specific INT formats + parameter logic EnableVectors = 1'b1, + parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, // Do not change localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), @@ -82,11 +84,22 @@ The SDOTP operations compute on 8b inputs producing 16b outputs \ or on 16b inputs producing 32b outputs"); end + if (OpGroup == fpnew_pkg::MXDOTP) begin + if (Width != 64) begin + $fatal(1, "MXDOTP only supported on 64b CVFPU instances, got Width=%0d", Width); + end else if (!FpFmtConfig[fpnew_pkg::FP32]) begin + $fatal(1, "MXDOTP requires FP32 to be enabled as a destination format. Please enable FP32 in FpFmtConfig"); + end else if (!MxFpFmtConfig[fpnew_pkg::FP8]) begin + $fatal(1, "MXDOTP requires FP8 to be enabled as a source format. Please enable FP8 in MxFpFmtConfig."); + end + end + localparam int unsigned MAX_FP_WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig); localparam int unsigned MAX_INT_WIDTH = fpnew_pkg::max_int_width(IntFmtConfig); localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, 1'b1); localparam int unsigned NUM_DIVSQRT_LANES = fpnew_pkg::num_divsqrt_lanes(Width, FpFmtConfig, 1'b1, DivSqrtSel); localparam int unsigned NUM_DOTP_LANES = fpnew_pkg::num_dotp_lanes(Width, FpFmtConfig); + localparam int unsigned NUM_MX_LANES = fpnew_pkg::num_mxdotp_lanes(Width, MxFpFmtConfig, MxIntFmtConfig); localparam int unsigned NUM_INT_FORMATS = fpnew_pkg::NUM_INT_FORMATS; // We will send the format information along with the data localparam int unsigned FMT_BITS = @@ -195,6 +208,16 @@ or on 16b inputs producing 32b outputs"); localparam int unsigned DOTP_MAX_FMT_WIDTH = fpnew_pkg::max_fp_width(DOTP_FORMATS); localparam int unsigned DOTP_WIDTH = fpnew_pkg::minimum(2*DOTP_MAX_FMT_WIDTH, Width); + // MXDOTP-specific parameters + localparam fpnew_pkg::lane_formats_t MXDOTP_FORMATS = + fpnew_pkg::get_mxdotp_formats(Width, FpFmtConfig, MxFpFmtConfig, MxIntFmtConfig, LANE); + localparam fpnew_pkg::fmt_logic_t MXDOTP_FP_FORMATS = + MXDOTP_FORMATS.src_fp_formats; + localparam fpnew_pkg::ifmt_logic_t MXDOTP_INT_FORMATS = + MXDOTP_FORMATS.src_int_formats; + localparam fpnew_pkg::fmt_logic_t MXDOTP_DST_FORMATS = + MXDOTP_FORMATS.dst_fp_formats; + // Lane parameters from Opgroup localparam fpnew_pkg::fmt_logic_t LANE_FORMATS = (OpGroup == fpnew_pkg::CONV) ? CONV_FORMATS : (OpGroup == fpnew_pkg::DOTP) ? DOTP_FORMATS : @@ -206,7 +229,9 @@ or on 16b inputs producing 32b outputs"); // Generate instances only if needed, lane 0 always generated if ((lane == 0) || (EnableVectors & (!(OpGroup == fpnew_pkg::DOTP && (lane >= NUM_DOTP_LANES)) - && !(OpGroup == fpnew_pkg::DIVSQRT && (lane >= NUM_DIVSQRT_LANES))))) begin : active_lane + && !(OpGroup == fpnew_pkg::DIVSQRT && (lane >= NUM_DIVSQRT_LANES)) + && !(OpGroup == fpnew_pkg::MXDOTP && (lane >= NUM_MX_LANES)) + ))) begin : active_lane logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands @@ -215,7 +240,8 @@ or on 16b inputs producing 32b outputs"); logic lane_is_used; assign lane_is_used = (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | - (LANE_FORMATS[dst_fmt_i] & is_up_cast) | (OpGroup == fpnew_pkg::DIVSQRT); + (LANE_FORMATS[dst_fmt_i] & is_up_cast) | + (OpGroup == fpnew_pkg::DIVSQRT) | (OpGroup == fpnew_pkg::MXDOTP); assign in_valid = in_valid_i & ((lane == 0) | vectorial_op) & lane_is_used; // upper lanes only for vectors // Slice out the operands for this lane, upper bits are ignored in the unit @@ -462,6 +488,42 @@ or on 16b inputs producing 32b outputs"); .out_ready_i ( out_ready ), .busy_o ( lane_busy[lane] ) ); + end else if (OpGroup == fpnew_pkg::MXDOTP) begin : lane_instance + fpnew_mxdotp_multi_wrapper #( + .FpSrcFmtConfig ( MXDOTP_FP_FORMATS ), + .IntSrcFmtConfig ( MXDOTP_INT_FORMATS ), + .FpDstFmtConfig ( MXDOTP_DST_FORMATS ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_fpnew_mxdotp_multi_wrapper ( + .clk_i, + .rst_ni, + .operands_i ( local_operands[2:0] ), + .is_boxed_i, + .rnd_mode_i, + .op_i, + .op_mod_i, + .src_fmt_i, + .int_fmt_i, + .dst_fmt_i, + .tag_i, + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), + .flush_i, + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ) + ); end // ADD OTHER OPTIONS HERE // Handshakes are only done if the lane is actually used diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv index 1e8ce099..637b2e6e 100644 --- a/src/fpnew_pkg.sv +++ b/src/fpnew_pkg.sv @@ -26,6 +26,9 @@ package fpnew_pkg; // | FP8 | binary8 | 8 bit | 5 | 2 // | FP16ALT | binary16alt | 16 bit | 8 | 7 // | FP8ALT | binary8alt | 8 bit | 4 | 3 + // | FP6 | binary6 | 6 bit | 3 | 2 + // | FP6ALT | binary6alt | 6 bit | 2 | 3 + // | FP4 | binary4 | 4 bit | 2 | 1 // *NOTE:* Add new formats only at the end of the enumeration for backwards compatibilty! // Encoding for a format @@ -34,7 +37,7 @@ package fpnew_pkg; int unsigned man_bits; } fp_encoding_t; - localparam int unsigned NUM_FP_FORMATS = 6; // change me to add formats + localparam int unsigned NUM_FP_FORMATS = 9; // change me to add formats localparam int unsigned FP_FORMAT_BITS = $clog2(NUM_FP_FORMATS); // FP formats @@ -44,7 +47,10 @@ package fpnew_pkg; FP16 = 'd2, FP8 = 'd3, FP16ALT = 'd4, - FP8ALT = 'd5 + FP8ALT = 'd5, + FP6 = 'd6, + FP6ALT = 'd7, + FP4 = 'd8 // add new formats here } fp_format_e; @@ -55,17 +61,20 @@ package fpnew_pkg; '{5, 10}, // IEEE binary16 (half) '{5, 2}, // custom binary8 '{8, 7}, // custom binary16alt - '{4, 3} // custom binary8alt + '{4, 3}, // custom binary8alt + '{3, 2}, // custom binary6 + '{2, 3}, // custom binary6alt + '{2, 1} // custom binary4 // add new formats here }; typedef logic [0:NUM_FP_FORMATS-1] fmt_logic_t; // Logic indexed by FP format (for masks) typedef logic [0:NUM_FP_FORMATS-1][31:0] fmt_unsigned_t; // Unsigned indexed by FP format - localparam fmt_logic_t CPK_FORMATS = 6'b110000; // FP32 and FP64 can provide CPK only + localparam fmt_logic_t CPK_FORMATS = 9'b110000000; // FP32 and FP64 can provide CPK only // FP32, FP64 cannot be provided for DOTP // Small hack: FP32 only enabled for wide enough wrapper input widths for vsum.s instruction - localparam fmt_logic_t DOTP_FORMATS = 6'b101111; + localparam fmt_logic_t DOTP_FORMATS = 9'b101111000; // --------- // INT TYPES @@ -110,14 +119,28 @@ package fpnew_pkg; typedef logic [0:NUM_INT_FORMATS-1] ifmt_logic_t; // Logic indexed by INT format (for masks) + // Combined format struct for operations that need FP, INT, and destination formats + typedef struct packed { + fmt_logic_t src_fp_formats; + ifmt_logic_t src_int_formats; + fmt_logic_t dst_fp_formats; + } lane_formats_t; + + // MXDOTP format masks + localparam lane_formats_t MXDOTP_FORMATS_MASK = '{ + src_fp_formats: 9'b000101111, // FP8, FP8ALT, FP6, FP6ALT, FP4 + src_int_formats: 4'b1000, // INT8 + dst_fp_formats: 9'b100010000 // FP32, FP16ALT + }; + // -------------- // FP OPERATIONS // -------------- - localparam int unsigned NUM_OPGROUPS = 5; + localparam int unsigned NUM_OPGROUPS = 6; // Each FP operation belongs to an operation group typedef enum logic [2:0] { - ADDMUL, DIVSQRT, NONCOMP, CONV, DOTP + ADDMUL, DIVSQRT, NONCOMP, CONV, DOTP, MXDOTP } opgroup_e; localparam int unsigned OP_BITS = 5; @@ -127,7 +150,8 @@ package fpnew_pkg; DIV, SQRT, // DIVSQRT operation group SGNJ, MINMAX, CMP, CLASSIFY, // NONCOMP operation group F2F, F2I, I2F, CPKAB, CPKCD, // CONV operation group - SDOTP, EXVSUM, VSUM // DOTP operation group + SDOTP, EXVSUM, VSUM, // DOTP operation group + MXDOTPF, MXDOTPI // MXDOTP operation group } operation_e; // ------------- @@ -226,56 +250,70 @@ package fpnew_pkg; int unsigned Width; logic EnableVectors; logic EnableNanBox; - fmt_logic_t FpFmtMask; - ifmt_logic_t IntFmtMask; + fmt_logic_t FpFmtMask; // Standard FP formats for all opgroups + ifmt_logic_t IntFmtMask; // Standard INT formats for all opgroups + fmt_logic_t MxFpFmtMask; // MX-specific FP formats (FP6, FP6ALT, FP4, plus FP8/FP8ALT) + ifmt_logic_t MxIntFmtMask; // MX-specific INT formats (INT8) } fpu_features_t; localparam fpu_features_t RV64D = '{ Width: 64, EnableVectors: 1'b0, EnableNanBox: 1'b1, - FpFmtMask: 6'b110000, - IntFmtMask: 4'b0011 + FpFmtMask: 9'b110000000, + IntFmtMask: 4'b0011, + MxFpFmtMask: 9'b0, // No MX support + MxIntFmtMask: 4'b0 }; localparam fpu_features_t RV32D = '{ Width: 64, EnableVectors: 1'b1, EnableNanBox: 1'b1, - FpFmtMask: 6'b110000, - IntFmtMask: 4'b0010 + FpFmtMask: 9'b110000000, + IntFmtMask: 4'b0010, + MxFpFmtMask: 9'b0, // No MX support + MxIntFmtMask: 4'b0 }; localparam fpu_features_t RV32F = '{ Width: 32, EnableVectors: 1'b0, EnableNanBox: 1'b1, - FpFmtMask: 6'b100000, - IntFmtMask: 4'b0010 + FpFmtMask: 9'b100000000, + IntFmtMask: 4'b0010, + MxFpFmtMask: 9'b0, // No MX support + MxIntFmtMask: 4'b0 }; localparam fpu_features_t RV64D_Xsflt = '{ Width: 64, EnableVectors: 1'b1, EnableNanBox: 1'b1, - FpFmtMask: 6'b111111, - IntFmtMask: 4'b1111 + FpFmtMask: 9'b111111000, // Standard formats (not including FP6, FP6ALT, FP4) + IntFmtMask: 4'b1111, + MxFpFmtMask: 9'b000101111, // MX formats: FP8, FP8ALT, FP6, FP6ALT, FP4 + MxIntFmtMask: 4'b1000 // INT8 for MX operations }; localparam fpu_features_t RV32F_Xsflt = '{ Width: 32, EnableVectors: 1'b1, EnableNanBox: 1'b1, - FpFmtMask: 6'b101111, - IntFmtMask: 4'b1110 + FpFmtMask: 9'b101111000, + IntFmtMask: 4'b1110, + MxFpFmtMask: 9'b0, // No MX support (32-bit width insufficient) + MxIntFmtMask: 4'b0 }; localparam fpu_features_t RV32F_Xf16alt_Xfvec = '{ Width: 32, EnableVectors: 1'b1, EnableNanBox: 1'b1, - FpFmtMask: 6'b100010, - IntFmtMask: 4'b0110 + FpFmtMask: 9'b100010000, + IntFmtMask: 4'b0110, + MxFpFmtMask: 9'b0, // No MX support + MxIntFmtMask: 4'b0 }; @@ -292,7 +330,8 @@ package fpnew_pkg; '{default: MERGED}, // DIVSQRT '{default: PARALLEL}, // NONCOMP '{default: MERGED}, // CONV - '{default: DISABLED}}, // DOTP + '{default: DISABLED}, // DOTP + '{default: DISABLED}}, // MXDOTP PipeConfig: BEFORE }; @@ -302,7 +341,8 @@ package fpnew_pkg; '{default: DISABLED}, // DIVSQRT '{default: PARALLEL}, // NONCOMP '{default: MERGED}, // CONV - '{default: MERGED}}, // DOTP + '{default: MERGED}, // DOTP + '{default: MERGED}}, // MXDOTP PipeConfig: BEFORE }; @@ -425,6 +465,7 @@ package fpnew_pkg; SGNJ, MINMAX, CMP, CLASSIFY: return NONCOMP; F2F, F2I, I2F, CPKAB, CPKCD: return CONV; SDOTP, EXVSUM, VSUM: return DOTP; + MXDOTPF, MXDOTPI: return MXDOTP; default: return NONCOMP; endcase endfunction @@ -437,6 +478,7 @@ package fpnew_pkg; NONCOMP: return 2; CONV: return 3; // vectorial casts use 3 operands DOTP: return 3; // splitting into 5 operands done in wrapper + MXDOTP: return 3; // splitting into 4 operands done in wrapper default: return 0; endcase endfunction @@ -454,7 +496,7 @@ package fpnew_pkg; // Returns the maximum number of lanes in the FPU according to width, format config and vectors function automatic int unsigned num_divsqrt_lanes(int unsigned width, fmt_logic_t cfg, logic vec, divsqrt_unit_t DivSqrtSel); automatic fmt_logic_t cfg_tmp; - cfg_tmp = (DivSqrtSel == THMULTI) ? cfg & 6'b111010 : cfg; + cfg_tmp = (DivSqrtSel == THMULTI) ? cfg & 9'b111010000 : cfg; return vec ? width / min_fp_width(cfg_tmp) : 1; // if no vectors, only one lane endfunction @@ -514,13 +556,43 @@ package fpnew_pkg; automatic fmt_logic_t mask; int unsigned nr_16to32bit_lanes = (cfg[FP32]) ? (width / 32) : 0; if (lane_no < nr_16to32bit_lanes) - mask = 6'b101111; //lane should be 16-bit -> 32-bit + mask = 9'b101111000; //lane should be 16-bit -> 32-bit else - mask = 6'b001111; //lane should be 8-bit -> 16-bit + mask = 9'b001111000; //lane should be 8-bit -> 16-bit res = cfg & mask; return res; endfunction + // Returns how many MXDOTP lanes should be generated + function automatic int num_mxdotp_lanes(int unsigned width, + fmt_logic_t mx_fp_cfg, + ifmt_logic_t mx_int_cfg); + // MXDOTP is single-lane, non-vectorial + // Check if any MX source format is enabled (FP8, FP8ALT, FP6, FP6ALT, FP4) or INT8 + return (width == 64 && (|(mx_fp_cfg & MXDOTP_FORMATS_MASK.src_fp_formats) || + |(mx_int_cfg & MXDOTP_FORMATS_MASK.src_int_formats))) ? 1 : 0; + endfunction + + // Returns all format masks for MXDOTP operations + // Note: Assumes width == 64 (validated at instantiation) + function automatic lane_formats_t get_mxdotp_formats(int unsigned width, + fmt_logic_t fp_cfg, + fmt_logic_t mx_fp_cfg, + ifmt_logic_t mx_int_cfg, + int unsigned lane_no); + automatic lane_formats_t res; + + // Source FP formats from MX config: FP8, FP8ALT, FP6, FP6ALT, FP4 + res.src_fp_formats = mx_fp_cfg & MXDOTP_FORMATS_MASK.src_fp_formats; + + // Source INT formats from MX config: INT8 only + res.src_int_formats = mx_int_cfg & MXDOTP_FORMATS_MASK.src_int_formats; + + // Destination formats from standard FP config: FP32 and FP16ALT + res.dst_fp_formats = fp_cfg & MXDOTP_FORMATS_MASK.dst_fp_formats; + return res; + endfunction + // Returns the dotp dest FP format string function automatic fmt_logic_t get_dotp_dst_fmts(fmt_logic_t cfg, fmt_logic_t src_cfg); automatic fmt_logic_t res; @@ -529,7 +601,10 @@ package fpnew_pkg; cfg[FP16] && (src_cfg[FP8] || src_cfg[FP8ALT]), cfg[FP8], // FP8 supported as dstFmt for VSUM cfg[FP16ALT] && (src_cfg[FP8] || src_cfg[FP8ALT]), - cfg[FP8ALT] // FP8ALT supported as dstFmt for VSUM + cfg[FP8ALT], // FP8ALT supported as dstFmt for VSUM + 1'b0, // FP6 not supported as dstFmt + 1'b0, // FP6ALT not supported as dstFmt + 1'b0 // FP4 not supported as dstFmt }; return res; endfunction diff --git a/src/fpnew_sdotp_multi_wrapper.sv b/src/fpnew_sdotp_multi_wrapper.sv index d402b67a..edbbea9e 100644 --- a/src/fpnew_sdotp_multi_wrapper.sv +++ b/src/fpnew_sdotp_multi_wrapper.sv @@ -26,7 +26,7 @@ module fpnew_sdotp_multi_wrapper #( parameter type AuxType = logic, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, // Do not change - localparam fpnew_pkg::fmt_logic_t FpSrcFmtConfig = FpFmtConfig[0] ? (FpFmtConfig & 6'b001111) : (FpFmtConfig & 6'b000101), + localparam fpnew_pkg::fmt_logic_t FpSrcFmtConfig = FpFmtConfig[0] ? (FpFmtConfig & 9'b001111000) : (FpFmtConfig & 9'b000101000), localparam fpnew_pkg::fmt_logic_t FpDstFmtConfig = fpnew_pkg::get_dotp_dst_fmts(FpFmtConfig, FpSrcFmtConfig), localparam int SRC_WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpSrcFmtConfig), 1), localparam int DST_WIDTH = fpnew_pkg::maximum(2*fpnew_pkg::max_fp_width(FpSrcFmtConfig), 1), // do not change, current assumption of sdotpex_multi diff --git a/src/fpnew_top.sv b/src/fpnew_top.sv index b564286d..a483df72 100644 --- a/src/fpnew_top.sv +++ b/src/fpnew_top.sv @@ -125,6 +125,8 @@ module fpnew_top #( .DivSqrtSel ( DivSqrtSel ), .FpFmtMask ( Features.FpFmtMask ), .IntFmtMask ( Features.IntFmtMask ), + .MxFpFmtMask ( Features.MxFpFmtMask ), + .MxIntFmtMask ( Features.MxIntFmtMask ), .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), .PipeConfig ( Implementation.PipeConfig ), diff --git a/src/mxdotp/fpnew_mxdotp_multi_modules.sv b/src/mxdotp/fpnew_mxdotp_multi_modules.sv new file mode 100644 index 00000000..5b558e2b --- /dev/null +++ b/src/mxdotp/fpnew_mxdotp_multi_modules.sv @@ -0,0 +1,987 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Gamze Islamoglu + +// Classifies and unpacks input operands (FP8/FP6/FP4 vectors, scales, accumulator) into sign/exponent/mantissa +// fields and fp_info structs. Converts unsigned scales (0-255) to signed offsets (-127 to +128). +module fpnew_mxdotp_classifier + import fpnew_mxdotp_multi_pkg::*; +#( + parameter fpnew_pkg::fmt_logic_t FpSrcFmtConfig = MxdotpSrcFpFmtConfig, + parameter fpnew_pkg::fmt_logic_t FpDstFmtConfig = MxdotpDstFpFmtConfig, + parameter int unsigned FP6VectorSize = 3, + parameter int unsigned FP4VectorSize = 5, + parameter int unsigned NumInpRegs = 0 +) ( + // Input signals + input logic [2*VectorSize-1:0][SRC_WIDTH-1:0] operands_post_inp_pipe, + input logic [2*FP6VectorSize-1:0][SRC_WIDTH-1:0] fp6_operands_post_inp_pipe, + input logic [2*FP4VectorSize-1:0][SRC_WIDTH-1:0] fp4_operands_post_inp_pipe, + input logic signed [1:0][SCALE_WIDTH-1:0] operands_c_q, + input logic [DST_WIDTH-1:0] operand_d_q, + input logic [0:NumInpRegs][NUM_FORMATS-1:0][NUM_OPERANDS-1:0] inp_pipe_is_boxed_q, + input fpnew_pkg::fp_format_e src_fmt_q, + input logic src_is_int, + input fpnew_pkg::fp_format_e dst_fmt_q, + input logic [0:NumInpRegs] inp_pipe_op_mod_q, + // Output signals + output fpnew_pkg::fp_info_t [VectorSize-1:0] info_a, + output fpnew_pkg::fp_info_t [FP6VectorSize-1:0] fp6_info_a, + output fpnew_pkg::fp_info_t [FP4VectorSize-1:0] fp4_info_a, + output fpnew_pkg::fp_info_t [VectorSize-1:0] info_b, + output fpnew_pkg::fp_info_t [FP6VectorSize-1:0] fp6_info_b, + output fpnew_pkg::fp_info_t [FP4VectorSize-1:0] fp4_info_b, + output fpnew_pkg::fp_info_t [1:0] info_c, + output fpnew_pkg::fp_info_t info_d, + output fp_src_t [VectorSize-1:0] operands_a, + output fp6_src_t [FP6VectorSize-1:0] fp6_operands_a, + output fp4_src_t [FP4VectorSize-1:0] fp4_operands_a, + output fp_src_t [VectorSize-1:0] operands_b, + output fp6_src_t [FP6VectorSize-1:0] fp6_operands_b, + output fp4_src_t [FP4VectorSize-1:0] fp4_operands_b, + output logic signed [1:0][SCALE_WIDTH-1:0] operands_c, + output fp_dst_t operand_d +); + + // ----------------- + // Source operands + // ----------------- + logic [NUM_FORMATS-1:0][2*VectorSize-1:0] fmt_sign; + logic signed [NUM_FORMATS-1:0][2*VectorSize-1:0][SUPER_EXP_BITS-1:0] fmt_exponent; + logic [NUM_FORMATS-1:0][2*VectorSize-1:0][SUPER_MAN_BITS-1:0] fmt_mantissa; + + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] info_q; + + // FP6 + logic [NUM_FORMATS-1:0][2*FP6VectorSize-1:0] fp6_fmt_sign; + logic signed [NUM_FORMATS-1:0][2*FP6VectorSize-1:0][FP6_EXP_BITS-1:0] fp6_fmt_exponent; + logic [NUM_FORMATS-1:0][2*FP6VectorSize-1:0][FP6_MAN_BITS-1:0] fp6_fmt_mantissa; + + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2*FP6VectorSize-1:0] fp6_info_q; + + // FP4 + logic [NUM_FORMATS-1:0][2*FP4VectorSize-1:0] fp4_fmt_sign; + logic signed [NUM_FORMATS-1:0][2*FP4VectorSize-1:0][FP4_EXP_BITS-1:0] fp4_fmt_exponent; + logic [NUM_FORMATS-1:0][2*FP4VectorSize-1:0][FP4_MAN_BITS-1:0] fp4_fmt_mantissa; + + fpnew_pkg::fp_info_t [NUM_FORMATS-1:0][2*FP4VectorSize-1:0] fp4_info_q; + + // FP Input initialization (Src) + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_src_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpSrcFmtConfig[fmt]) begin : active_src_format + logic [2*VectorSize-1:0][FP_WIDTH-1:0] trimmed_ops; + + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 2*VectorSize ), + .MX ( 1 ) + ) i_fpnew_classifier ( + .operands_i ( trimmed_ops ), + .is_boxed_i ( inp_pipe_is_boxed_q[NumInpRegs][fmt][2*VectorSize-1:0] ), + .info_o ( info_q[fmt][2*VectorSize-1:0] ) + ); + for (genvar op = 0; op < 2*VectorSize; op++) begin : gen_operands + assign trimmed_ops[op] = operands_post_inp_pipe[op][FP_WIDTH-1:0]; + assign fmt_sign[fmt][op] = operands_post_inp_pipe[op][FP_WIDTH-1]; + assign fmt_exponent[fmt][op] = signed'({1'b0, operands_post_inp_pipe[op][MAN_BITS+:EXP_BITS]}); + assign fmt_mantissa[fmt][op] = operands_post_inp_pipe[op][MAN_BITS-1:0] << + (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa + end + end else begin : inactive_src_format + assign info_q[fmt][2*VectorSize-1:0] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + if (FP6VectorSize != 0) begin : fp6_classifier + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fp6_fmt_src_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpSrcFmtConfig[fmt]) begin : active_src_format + logic [2*FP6VectorSize-1:0][FP_WIDTH-1:0] trimmed_ops; + + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 2*FP6VectorSize ), + .MX ( 1 ) + ) i_fpnew_classifier ( + .operands_i ( trimmed_ops ), + .is_boxed_i ( inp_pipe_is_boxed_q[NumInpRegs][fmt][2*FP6VectorSize-1:0] ), + .info_o ( fp6_info_q[fmt][2*FP6VectorSize-1:0] ) + ); + for (genvar op = 0; op < 2*FP6VectorSize; op++) begin : gen_operands + assign trimmed_ops[op] = fp6_operands_post_inp_pipe[op][FP_WIDTH-1:0]; + assign fp6_fmt_sign[fmt][op] = fp6_operands_post_inp_pipe[op][FP_WIDTH-1]; + assign fp6_fmt_exponent[fmt][op] = fp6_operands_post_inp_pipe[op][MAN_BITS+:EXP_BITS]; + assign fp6_fmt_mantissa[fmt][op] = fp6_operands_post_inp_pipe[op][MAN_BITS-1:0] << + (SUPER_MAN_BITS - MAN_BITS); // move to left of mantissa + end + end else begin : inactive_src_format + assign fp6_info_q[fmt][2*FP6VectorSize-1:0] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fp6_fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fp6_fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fp6_fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + end + + if (FP4VectorSize != 0) begin : fp4_classifier + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fp4_fmt_src_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpSrcFmtConfig[fmt]) begin : active_src_format + logic [2*FP4VectorSize-1:0][FP_WIDTH-1:0] trimmed_ops; + + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 2*FP4VectorSize ), + .MX ( 1 ) + ) i_fpnew_classifier ( + .operands_i ( trimmed_ops ), + .is_boxed_i ( inp_pipe_is_boxed_q[NumInpRegs][fmt][2*FP4VectorSize-1:0] ), + .info_o ( fp4_info_q[fmt][2*FP4VectorSize-1:0] ) + ); + for (genvar op = 0; op < 2*FP4VectorSize; op++) begin : gen_operands + assign trimmed_ops[op] = fp4_operands_post_inp_pipe[op][FP_WIDTH-1:0]; + assign fp4_fmt_sign[fmt][op] = fp4_operands_post_inp_pipe[op][FP_WIDTH-1]; + assign fp4_fmt_exponent[fmt][op] = fp4_operands_post_inp_pipe[op][MAN_BITS+:EXP_BITS]; + assign fp4_fmt_mantissa[fmt][op] = fp4_operands_post_inp_pipe[op][MAN_BITS-1:0]; + end + end else begin : inactive_src_format + assign fp4_info_q[fmt][2*FP4VectorSize-1:0] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fp4_fmt_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fp4_fmt_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fp4_fmt_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + end + + // ---------------------------- + // Destination operand + // ---------------------------- + logic [NUM_FORMATS-1:0] fmt_dst_sign; + logic signed [NUM_FORMATS-1:0][SUPER_DST_EXP_BITS-1:0] fmt_dst_exponent; + logic [NUM_FORMATS-1:0][SUPER_DST_MAN_BITS-1:0] fmt_dst_mantissa; + + // FP Input initialization (Src) + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : fmt_dst_init_inputs + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpDstFmtConfig[fmt]) begin : active_dst_format + logic [FP_WIDTH-1:0] trimmed_dst_ops; + logic dst_ops_is_boxed; + + assign dst_ops_is_boxed = inp_pipe_is_boxed_q[NumInpRegs][fmt][NUM_OPERANDS-1]; + + // Classify input + fpnew_classifier #( + .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), + .NumOperands ( 1 ) + ) i_fpnew_classifier ( + .operands_i ( trimmed_dst_ops ), + .is_boxed_i ( dst_ops_is_boxed ), + .info_o ( info_q[fmt][NUM_OPERANDS-1] ) + ); + assign trimmed_dst_ops = operand_d_q[FP_WIDTH-1:0]; + assign fmt_dst_sign[fmt] = operand_d_q[FP_WIDTH-1]; + assign fmt_dst_exponent[fmt] = signed'({1'b0, operand_d_q[MAN_BITS+:EXP_BITS]}); + assign fmt_dst_mantissa[fmt] = {info_q[fmt][NUM_OPERANDS-1].is_normal, operand_d_q[MAN_BITS-1:0]} + << (SUPER_DST_MAN_BITS - MAN_BITS); + end else begin : inactive_dst_format + assign info_q[fmt][NUM_OPERANDS-1] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_dst_sign[fmt] = fpnew_pkg::DONT_CARE; // format disabled + assign fmt_dst_exponent[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + assign fmt_dst_mantissa[fmt] = '{default: fpnew_pkg::DONT_CARE}; // format disabled + end + end + + // ------------------------------------------- + // Operation selection and operand adjustment + // ------------------------------------------- + + always_comb begin : op_select + // Default assignments - packing-order-agnostic + if (src_is_int) begin : gen_int_default_assignments + // Integer operands + for (int i = 0; i < VectorSize; i++) begin : gen_default_assignments_int + operands_a[i] = operands_post_inp_pipe[i]; + operands_b[i] = operands_post_inp_pipe[i+VectorSize]; + // set to zero + info_a[i] = fpnew_pkg::fp_info_t'(0); + info_b[i] = fpnew_pkg::fp_info_t'(0); + end + for (int i = 0; i < FP6VectorSize; i++) begin : gen_default_assignments_fp6_int + // FP6 + fp6_operands_a[i] = fp6_operands_post_inp_pipe[i]; + fp6_operands_b[i] = fp6_operands_post_inp_pipe[i+FP6VectorSize]; + // set to zero + fp6_info_a[i] = fpnew_pkg::fp_info_t'(0); + fp6_info_b[i] = fpnew_pkg::fp_info_t'(0); + end + for (int i = 0; i < FP4VectorSize; i++) begin : gen_default_assignments_fp4_int + // FP4 + fp4_operands_a[i] = fp4_operands_post_inp_pipe[i]; + fp4_operands_b[i] = fp4_operands_post_inp_pipe[i+FP4VectorSize]; + // set to zero + fp4_info_a[i] = fpnew_pkg::fp_info_t'(0); + fp4_info_b[i] = fpnew_pkg::fp_info_t'(0); + end + end else begin : gen_fp_default_assignments + // Floating-point operands + for (int i = 0; i < VectorSize; i++) begin : gen_default_assignments_fp + operands_a[i] = {fmt_sign[src_fmt_q][i], fmt_exponent[src_fmt_q][i], fmt_mantissa[src_fmt_q][i]}; + operands_b[i] = {fmt_sign[src_fmt_q][i+VectorSize], fmt_exponent[src_fmt_q][i+VectorSize], fmt_mantissa[src_fmt_q][i+VectorSize]}; + info_a[i] = info_q[src_fmt_q][i]; + info_b[i] = info_q[src_fmt_q][i+VectorSize]; + end + for (int i = 0; i < FP6VectorSize; i++) begin : gen_default_assignments_fp6 + // FP6 + fp6_operands_a[i] = {fp6_fmt_sign[src_fmt_q][i], fp6_fmt_exponent[src_fmt_q][i], fp6_fmt_mantissa[src_fmt_q][i]}; + fp6_operands_b[i] = {fp6_fmt_sign[src_fmt_q][i+FP6VectorSize], fp6_fmt_exponent[src_fmt_q][i+FP6VectorSize], fp6_fmt_mantissa[src_fmt_q][i+FP6VectorSize]}; + fp6_info_a[i] = fp6_info_q[src_fmt_q][i]; + fp6_info_b[i] = fp6_info_q[src_fmt_q][i+FP6VectorSize]; + end + for (int i = 0; i < FP4VectorSize; i++) begin : gen_default_assignments_fp4 + // FP4 + fp4_operands_a[i] = {fp4_fmt_sign[src_fmt_q][i], fp4_fmt_exponent[src_fmt_q][i], fp4_fmt_mantissa[src_fmt_q][i]}; + fp4_operands_b[i] = {fp4_fmt_sign[src_fmt_q][i+FP4VectorSize], fp4_fmt_exponent[src_fmt_q][i+FP4VectorSize], fp4_fmt_mantissa[src_fmt_q][i+FP4VectorSize]}; + fp4_info_a[i] = fp4_info_q[src_fmt_q][i]; + fp4_info_b[i] = fp4_info_q[src_fmt_q][i+FP4VectorSize]; + end + end + for (int i = 0; i < 2; i++) begin : gen_default_assignments_c + operands_c[i] = signed'(operands_c_q[i]) - 127; // signed scale, 127 = signed'(2**(SCALE_WIDTH-1)-1) + info_c[i] = '{is_normal: 1'b1, is_nan: operands_c_q[i] == 2**SCALE_WIDTH-1, is_boxed: 1'b1, default: 1'b0}; // normal, boxed value, scale can be NaN + end + operand_d = {fmt_dst_sign[dst_fmt_q], fmt_dst_exponent[dst_fmt_q], fmt_dst_mantissa[dst_fmt_q]}; + info_d = info_q[dst_fmt_q][NUM_OPERANDS-1]; + end +endmodule + +// Detects special cases (NaN, infinity, invalid operations like 0×inf) and generates canonical results. +// Only FP8 sources can have inf/nan; FP6 and FP4 have limited exponent ranges. +module fpnew_mxdotp_special_cases + import fpnew_mxdotp_multi_pkg::*; +#( + parameter fpnew_pkg::fmt_logic_t FpDstFmtConfig = MxdotpDstFpFmtConfig +) ( + // Input signals + input fp_src_t [VectorSize-1:0] operands_a, + input fp_src_t [VectorSize-1:0] operands_b, + input logic signed [1:0][SCALE_WIDTH-1:0] operands_c, + input fp_dst_t operand_d, + input fpnew_pkg::fp_info_t [VectorSize-1:0] info_a, + input fpnew_pkg::fp_info_t [VectorSize-1:0] info_b, + input fpnew_pkg::fp_info_t [1:0] info_c, + input fpnew_pkg::fp_info_t info_d, + input fpnew_pkg::fp_format_e dst_fmt_q, + // Output signals: special_result, special_status, result_is_special + output logic [DST_WIDTH-1:0] special_result, + output fpnew_pkg::status_t special_status, + output logic result_is_special +); + + // --------------------- + // Input classification + // --------------------- + logic any_operand_inf; + logic any_operand_nan; + logic signalling_nan; + logic any_produced_nan; + logic any_pos_inf; + logic any_neg_inf; + + // Intermediate signals for each condition + logic [VectorSize-1:0] operand_inf_conditions; + logic [VectorSize-1:0] operand_nan_conditions; + logic [VectorSize-1:0] signalling_nan_conditions; + logic [VectorSize-1:0] nan_conditions; + logic [VectorSize-1:0] pos_inf_conditions; + logic [VectorSize-1:0] neg_inf_conditions; + + // Single generate block for all conditions + generate + for (genvar i = 0; i < VectorSize; i = i + 1) begin : gen_conditions + // Check if any operand is infinite + assign operand_inf_conditions[i] = info_a[i].is_inf || info_b[i].is_inf; + + // Check if any operand is NaN + assign operand_nan_conditions[i] = info_a[i].is_nan || info_b[i].is_nan; + + // Check for signalling NaN + assign signalling_nan_conditions[i] = info_a[i].is_signalling || info_b[i].is_signalling; + + // Check for produced NaN (0 * inf or inf * 0) + assign nan_conditions[i] = (info_a[i].is_inf && info_b[i].is_zero) || + (info_b[i].is_inf && info_a[i].is_zero); + + // Check for positive infinity (inf with same sign) + assign pos_inf_conditions[i] = (info_a[i].is_inf && ~(operands_a[i].sign ^ operands_b[i].sign)) || + (info_b[i].is_inf && ~(operands_a[i].sign ^ operands_b[i].sign)); + + // Check for negative infinity (inf with opposite sign) + assign neg_inf_conditions[i] = (info_a[i].is_inf && (operands_a[i].sign ^ operands_b[i].sign)) || + (info_b[i].is_inf && (operands_a[i].sign ^ operands_b[i].sign)); + end + endgenerate + + // Reduction for final results + assign any_operand_inf = |operand_inf_conditions || info_d.is_inf; + assign any_operand_nan = |operand_nan_conditions || info_c[0].is_nan || info_c[1].is_nan || info_d.is_nan; + assign signalling_nan = |signalling_nan_conditions || info_c[0].is_signalling || info_c[1].is_signalling || info_d.is_signalling; + assign any_produced_nan = |nan_conditions; + assign any_pos_inf = |pos_inf_conditions || (info_d.is_inf && ~operand_d.sign); + assign any_neg_inf = |neg_inf_conditions || (info_d.is_inf && operand_d.sign); + + // ---------------------- + // Special case handling + // ---------------------- + logic [NUM_FORMATS-1:0][DST_WIDTH-1:0] fmt_special_result; + fpnew_pkg::status_t [NUM_FORMATS-1:0] fmt_special_status; + logic [NUM_FORMATS-1:0] fmt_result_is_special; + + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_special_results + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + localparam logic [EXP_BITS-1:0] QNAN_EXPONENT = '1; + localparam logic [MAN_BITS-1:0] QNAN_MANTISSA = 2**(MAN_BITS-1); + localparam logic [MAN_BITS-1:0] ZERO_MANTISSA = '0; + + if (FpDstFmtConfig[fmt]) begin : active_format + always_comb begin : special_cases + logic [FP_WIDTH-1:0] special_res; + + // Default assignment + special_res = {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN + fmt_special_status[fmt] = '0; + fmt_result_is_special[fmt] = 1'b0; + + // Handle potentially mixed nan & infinity input => important for the case where infinity and + // zero are multiplied and added to a qNaN. + // RISC-V mandates raising the NV exception in these cases: + // (inf * 0) + c or (0 * inf) + c INVALID, no matter c (even quiet NaNs) + if (any_produced_nan) begin + fmt_result_is_special[fmt] = 1'b1; // bypass OP, output is the canonical qNaN + fmt_special_status[fmt].NV = 1'b1; // invalid operation + // NaN Inputs cause canonical quiet NaN at the output and maybe invalid OP + end else if (any_operand_nan) begin + fmt_result_is_special[fmt] = 1'b1; // bypass OP, output is the canonical qNaN + fmt_special_status[fmt].NV = signalling_nan; // raise the invalid operation flag if signalling + // Special cases involving infinity + end else if (any_operand_inf) begin + fmt_result_is_special[fmt] = 1'b1; // bypass OP + // Effective addition of opposite infinities (±inf - ±inf) is invalid! + if (any_pos_inf && any_neg_inf) begin + fmt_special_status[fmt].NV = 1'b1; // invalid operation + // Handle cases where output will be inf because of inf product input + end else if (any_pos_inf) begin + // Result is infinity with the positive sign + special_res = {1'b0, QNAN_EXPONENT, ZERO_MANTISSA}; + // Handle cases where the second product is inf + end else if (any_neg_inf) begin + // Result is infinity with the negative sign + special_res = {1'b1, QNAN_EXPONENT, ZERO_MANTISSA}; + end + end + // Initialize special result with ones (NaN-box) + fmt_special_result[fmt] = '1; + fmt_special_result[fmt][FP_WIDTH-1:0] = special_res; + end + end else begin : inactive_format + assign fmt_special_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_special_status[fmt] = '0; + assign fmt_result_is_special[fmt] = 1'b0; + end + end + + // Detect special case from source format + assign result_is_special = fmt_result_is_special[dst_fmt_q]; + // Signalling input NaNs raise invalid flag, otherwise no flags set + assign special_status = fmt_special_status[dst_fmt_q]; + // Assemble result according to destination format + assign special_result = fmt_special_result[dst_fmt_q]; +endmodule + +// Adds two signed 8-bit scale values to produce a 9-bit combined scale. +module fpnew_mxdotp_scale_adder + import fpnew_mxdotp_multi_pkg::*; +( + // Input signals + input logic signed [1:0][SCALE_WIDTH-1:0] operands_c, + output logic signed [SCALE_WIDTH:0] scale // +1 for addition +); + // ------------------ + // Scale data path + // ------------------ + assign scale = signed'(operands_c[0]) + signed'(operands_c[1]); +endmodule + +// Multiplies two vectors of mantissas (with implicit bit prepended) element-wise, applying sign logic. +// Produces signed products (2p+1 bits) based on XOR of input signs. +module fpnew_mxdotp_vector_multiplier + import fpnew_mxdotp_multi_pkg::*; +#( + parameter type SrcType = logic, + parameter int unsigned LocalVectorSize = 8, + parameter int unsigned PrecisionBits = 4 +) ( + // Input signals + input SrcType [LocalVectorSize-1:0] operands_a, + input SrcType [LocalVectorSize-1:0] operands_b, + input fpnew_pkg::fp_info_t [LocalVectorSize-1:0] info_a, + input fpnew_pkg::fp_info_t [LocalVectorSize-1:0] info_b, + output logic signed [LocalVectorSize-1:0][2*PrecisionBits :0] product_signed +); + // ------------------ + // Product data path + // ------------------ + logic [LocalVectorSize-1:0][ PrecisionBits-1:0] mantissa_a, mantissa_b; + logic [LocalVectorSize-1:0][2*PrecisionBits-1:0] product; // the p*p product is 2p-bit wide + + // Add implicit bits to mantissae + for (genvar i = 0; i < LocalVectorSize; i++) begin : gen_mantissa + assign mantissa_a[i] = {info_a[i].is_normal, operands_a[i].mantissa}; + assign mantissa_b[i] = {info_b[i].is_normal, operands_b[i].mantissa}; + assign product[i] = mantissa_a[i] * mantissa_b[i]; + assign product_signed[i] = (operands_a[i].sign ^ operands_b[i].sign) ? -product[i] : product[i]; + end +endmodule + +// Multiplies vectors of signed integers (INT8) or floating-point mantissas (FP8) with sign handling. +// For FP8: adds implicit bit and applies sign via negation. For INT8: uses full 8-bit signed values. +module fpnew_mxdotp_signed_vector_multiplier + import fpnew_mxdotp_multi_pkg::*; +#( + parameter type SrcType = logic, + parameter int unsigned LocalVectorSize = 8, + parameter int unsigned PrecisionBits = 8 +) ( + // Input signals + input SrcType [LocalVectorSize-1:0] operands_a, + input SrcType [LocalVectorSize-1:0] operands_b, + input fpnew_pkg::fp_format_e src_fmt_q, + input fpnew_pkg::int_format_e int_fmt_q, + input logic src_is_int, + input fpnew_pkg::fp_info_t [LocalVectorSize-1:0] info_a, + input fpnew_pkg::fp_info_t [LocalVectorSize-1:0] info_b, + output logic signed [LocalVectorSize-1:0][2*PrecisionBits-1:0] product_signed +); + // ------------------ + // Product data path + // ------------------ + logic signed [LocalVectorSize-1:0][ PrecisionBits-1:0] mantissa_a, mantissa_b; + + for (genvar i = 0; i < LocalVectorSize; i++) begin : gen_mantissa_fp8 + always_comb begin + if (src_is_int && int_fmt_q == fpnew_pkg::INT8) begin : int8 + // For INT8, we use the full 8-bit mantissa + mantissa_a[i] = operands_a[i][7:0]; + mantissa_b[i] = operands_b[i][7:0]; + end else begin : fp8 + // Add implicit bits to mantissae and pad with zeros + mantissa_a[i] = {4'b0, info_a[i].is_normal, operands_a[i].mantissa}; + mantissa_b[i] = {4'b0, info_b[i].is_normal, operands_b[i].mantissa}; + if (operands_a[i].sign ^ operands_b[i].sign) begin + // If the signs are different, we need to negate one mantissa + mantissa_a[i] = -signed'(mantissa_a[i]); + end + end + end + end + + for (genvar i = 0; i < LocalVectorSize; i++) begin : gen_mantissa + assign product_signed[i] = signed'(mantissa_a[i]) * signed'(mantissa_b[i]); + end +endmodule + +// Shifts products left by (exp_a + exp_b - 2×bias + SOP_SHIFT) to align to fixed-point anchor. +// Handles FP8/FP6/FP4 with format-specific offsets; INT8 shifts directly to anchor position. +module fpnew_mxdotp_product_shifter + import fpnew_mxdotp_multi_pkg::*; +#( + parameter type SrcType = logic, + parameter int unsigned LocalVectorSize = 8, + parameter fpnew_pkg::fp_format_e SrcFmt = fpnew_pkg::FP8, + parameter int unsigned ProductBits = 4, + parameter int unsigned ExpWidth = 8, + parameter int unsigned OutputWidth = 70 +) ( + // Input signals + input SrcType [LocalVectorSize-1:0] operands_a, + input SrcType [LocalVectorSize-1:0] operands_b, + input logic [LocalVectorSize-1:0][ProductBits-1:0] product_signed, + input fpnew_pkg::fp_info_t [LocalVectorSize-1:0] info_a, + input fpnew_pkg::fp_info_t [LocalVectorSize-1:0] info_b, + input fpnew_pkg::fp_format_e src_fmt_q, + input fpnew_pkg::int_format_e int_fmt_q, + input logic src_is_int, + output logic signed [LocalVectorSize-1:0][OutputWidth-1:0] shifted_product +); + // ------------------ + // Shift data path + // ------------------ + logic signed [LocalVectorSize-1:0][ExpWidth-1:0] exponent_product; + + // Calculate the non-biased exponent of the product + for (genvar i = 0; i < LocalVectorSize; i++) begin : gen_exponent_adjustment + assign exponent_product[i] = operands_a[i].exponent + info_a[i].is_subnormal + + operands_b[i].exponent + info_b[i].is_subnormal + - 2*signed'(bias_constant(src_fmt_q)); + if (SrcFmt == fpnew_pkg::FP8) begin + always_comb begin // TODO: Generate only for INT8 vs FP8 + if (src_is_int && int_fmt_q == fpnew_pkg::INT8) begin + // INT8: shift to integer position + shifted_product[i] = signed'(product_signed[i]) << ANCHOR; + end else begin + // Right shift the significand by anchor point - exponent + // sum of four 9-bit numbers can be at most 11 bits, for 69 bits output we need to shift by 69 - 11 = 58 + // 58-30=28 plus inherit 6 fractional bits from the multiplication -> point moves to 28+6=34 + // max shift can be 58 (28 + exp-max(30)), min shift is 0 (28 + exp-min(-28)) + shifted_product[i] = signed'(product_signed[i]) << (signed'(SOP_SHIFT) + signed'(exponent_product[i])); + end + end + end else if (SrcFmt == fpnew_pkg::FP6) begin + // E3 exponent_product is in range [-4, 8], requires 5b for signed representation + // To make shift positive, we scale by 4 + assign shifted_product[i] = signed'(product_signed[i]) << (signed'(4) + signed'(exponent_product[i])); + end else begin + // exponent_product is negative only for zero inputs for FP4 + assign shifted_product[i] = signed'(product_signed[i]) << exponent_product[i]; + end + end +endmodule + +// Sums all shifted products in the vector. +module fpnew_mxdotp_adder_tree + import fpnew_mxdotp_multi_pkg::*; +#( + parameter int unsigned LocalVectorSize = 8, + parameter int unsigned InputWidth = 4, + parameter int unsigned OutputWidth = 70 +) ( + // Input signals + input logic signed [LocalVectorSize-1:0][InputWidth-1:0] shifted_product, + output logic signed [OutputWidth-1:0] sum_product +); + // ------------------ + // Adder data path + // ------------------ + // Sum the products + always_comb begin : sum_products + sum_product = '0; + for (int i = 0; i < LocalVectorSize; i++) begin : gen_sum_products + sum_product += signed'(shifted_product[i]); + end + end +endmodule + +// Adds FP8, FP6, and FP4 sum-of-products; shifts FP6 and FP4 sums to align before adding. +// When FP6 is disabled, sum_product_fp6 is zero and optimized away by synthesis. +module fpnew_mxdotp_format_adder + import fpnew_mxdotp_multi_pkg::*; +#( + parameter int unsigned Fp6SumWidth = FP6_PROD_SHIFT_WIDTH, + parameter int unsigned Fp4SumWidth = FP4_PROD_SHIFT_WIDTH +) ( + input logic signed [SOP_FIXED_WIDTH-1:0] sum_product_fp8, + input logic signed [Fp6SumWidth-1:0] sum_product_fp6, + input logic signed [Fp4SumWidth-1:0] sum_product_fp4, + output logic signed [FIXED_SUM_WIDTH-1:0] sum_product +); + // ------------------ + // Adder data path + // ------------------ + logic signed [FIXED_SUM_WIDTH-1:0] sum_product_fp4_shifted; + logic signed [FIXED_SUM_WIDTH-1:0] sum_product_fp6_shifted; + + assign sum_product_fp4_shifted = signed'(sum_product_fp4) << (SOP_SHIFT+2*(SUPER_MAN_BITS-FP4_MAN_BITS)); + assign sum_product_fp6_shifted = signed'(sum_product_fp6) << (SOP_SHIFT-4+2*(SUPER_MAN_BITS-FP6_MAN_BITS)); // 4 is subtracted to account for the 4-bit shift in the product shifter + assign sum_product = sum_product_fp8 + sum_product_fp4_shifted + sum_product_fp6_shifted; +endmodule + +// Shifts accumulator right to align with sum-of-products based on scale and accumulator exponent. +// Computes shift amount, handles sticky bits, and detects if accumulator dominates the result. +module fpnew_mxdotp_accumulator_shift + import fpnew_mxdotp_multi_pkg::*; +( + // Input signals + input logic signed [FIXED_SUM_WIDTH-1:0] sum_product_q, + input logic [SCALE_WIDTH:0] scale_q2, + input fp_dst_t operand_d_q2, + input fpnew_pkg::fp_info_t info_d_q, + input fpnew_pkg::fp_format_e dst_fmt_q2, + output logic result_is_accumulator, + output logic accumulator_is_right_shifted, + output logic signed [9:0] accumulator_right_shift_amount, + output logic signed [DST_PRECISION_BITS-1:0] accumulator_remaining, + output logic accumulator_sticky, + output logic signed [DST_PRECISION_BITS :0] signed_mantissa_d, + output logic signed [FIXED_SUM_WIDTH-1:0] accumulator_shifted +); + + // ----------------------------- + // Accumulator shift data path + // ----------------------------- + logic signed [9:0] accumulator_shift_amount; + logic signed [DST_EXP_WIDTH-1:0] exponent_d; + logic [DST_PRECISION_BITS-1:0] mantissa_d; + + // Zero-extend exponents into signed container - implicit width extension + assign exponent_d = {1'b0, operand_d_q2.exponent}; + assign mantissa_d = {info_d_q.is_normal, operand_d_q2.mantissa}; + assign signed_mantissa_d = operand_d_q2.sign ? -mantissa_d : mantissa_d; + + // Calculate the shift amount for the accumulator, range=[-370,394-9b -> signed 10b] + assign accumulator_shift_amount = signed'(ANCHOR - SUPER_DST_MAN_BITS) - signed'(scale_q2) + + signed'(exponent_d + info_d_q.is_subnormal) + - signed'(bias_constant(dst_fmt_q2)); + + always_comb begin : accumulator_shift + result_is_accumulator = 1'b0; + accumulator_is_right_shifted = 1'b0; + accumulator_right_shift_amount = '0; + accumulator_remaining = '0; + accumulator_sticky = 1'b0; + if (accumulator_shift_amount > MAX_ACC_SHIFT_AMOUNT) begin + // SoP is too small to change the accumulator, result is the accumulator + accumulator_shifted = '0; + result_is_accumulator = 1'b1; + end else if (accumulator_shift_amount >= 0) begin + accumulator_shifted = signed'(signed_mantissa_d) <<< accumulator_shift_amount; + end else begin + accumulator_is_right_shifted = 1'b1; + accumulator_right_shift_amount = -accumulator_shift_amount; + accumulator_shifted = signed'(signed_mantissa_d) >>> accumulator_right_shift_amount; + if (accumulator_right_shift_amount > DST_PRECISION_BITS) begin + result_is_accumulator = (sum_product_q == '0) ? 1'b1 : 1'b0; + accumulator_remaining = signed'(signed_mantissa_d) >>> (accumulator_right_shift_amount - DST_PRECISION_BITS); + accumulator_sticky = |(signed'(signed_mantissa_d) & ((1 << (accumulator_right_shift_amount - DST_PRECISION_BITS)) - 1)); + end else begin + accumulator_remaining = signed'(signed_mantissa_d) << (DST_PRECISION_BITS - accumulator_right_shift_amount); + accumulator_sticky = 1'b0; + end + end + end +endmodule + +// Adds aligned accumulator to sum-of-products, extending with accumulator remainder bits. +module fpnew_mxdotp_add_accumulator_sop + import fpnew_mxdotp_multi_pkg::*; +( + // Input signals + input logic signed [FIXED_SUM_WIDTH-1:0] sum_product_q, + input logic signed [FIXED_SUM_WIDTH-1:0] accumulator_shifted, + input logic signed [DST_PRECISION_BITS-1:0] accumulator_remaining, + output logic signed [LZC_SUM_WIDTH-1:0] sum_product_accumulator_extended +); + + logic signed [FIXED_SUM_WIDTH-1:0] sum_product_accumulator; + + assign sum_product_accumulator = sum_product_q + accumulator_shifted; + assign sum_product_accumulator_extended = {sum_product_accumulator, accumulator_remaining}; +endmodule + +// Converts results to sign-magnitude format using two's complement. +module fpnew_mxdotp_twos_compl + import fpnew_mxdotp_multi_pkg::*; +( + // Input signals + input logic [LZC_SUM_WIDTH-1:0] sum_product_accumulator_extended, + input logic signed [DST_PRECISION_BITS :0] signed_mantissa_d, + input logic accumulator_is_right_shifted, + input logic signed [9:0] accumulator_right_shift_amount, + input logic final_sign, + // Output signals + output logic [LZC_SUM_WIDTH-1:0] sum_magnitude +); + // ------------------ + // Two's complement + // ------------------ + + always_comb begin : get_twos_complement + if (final_sign) begin + sum_magnitude = ~sum_product_accumulator_extended + 1; + if (accumulator_is_right_shifted && accumulator_right_shift_amount > DST_PRECISION_BITS && signed_mantissa_d != 0) begin + sum_magnitude = ~sum_product_accumulator_extended; + end + end else begin + sum_magnitude = sum_product_accumulator_extended; + end + end +endmodule + +// Shifts magnitude left by normalization amount to align leading 1 to implicit bit position. +module fpnew_mxdotp_norm_shift + import fpnew_mxdotp_multi_pkg::*; +( + // Input signals + input logic [LZC_SUM_WIDTH-1:0] sum_magnitude, + input logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt, + // Output signals + output logic [LZC_SUM_WIDTH-1:0] sum_shifted +); + // ------------------ + // Normalization shift + // ------------------ + + // Shift the sum to normalize it + assign sum_shifted = sum_magnitude << norm_shamt; +endmodule + +// Normalizes fixed-point sum to floating-point: computes LZC, determines shift amount, calculates +// biased exponent (127 - anchor + scale + shift), and extracts mantissa. Handles subnormals. +module fpnew_mxdotp_normalizer + import fpnew_mxdotp_multi_pkg::*; +( + // Input signals + input logic signed [LZC_SUM_WIDTH-1:0] sum_product_accumulator_extended, + input logic accumulator_is_right_shifted, + input logic signed [9:0] accumulator_right_shift_amount, + input logic signed [DST_PRECISION_BITS :0] signed_mantissa_d, + input logic accumulator_sticky, + input logic [SCALE_WIDTH:0] scale_q2, + input fpnew_pkg::fp_format_e dst_fmt_q2, + // Output signals + output logic final_sign, + output logic signed [DST_EXP_WIDTH-1:0] final_exponent, + output logic [DST_PRECISION_BITS-1:0] final_mantissa, + output logic sticky_after_norm, + output logic [LZC_SUM_WIDTH-1:0] sum_magnitude +); + + // -------------- + // Normalization + // -------------- + logic [LZC_SUM_WIDTH-1:0] sum_shifted; + logic [LZC_RESULT_WIDTH-1:0] leading_zero_count; // the number of leading zeroes + logic signed [LZC_RESULT_WIDTH:0] leading_zero_count_sgn; // signed leading-zero count + logic lzc_zeroes; // in case only zeroes found + + logic signed [DST_EXP_WIDTH-1:0] final_tentative_exponent; + + logic [SHIFT_AMOUNT_WIDTH-1:0] norm_shamt; // Normalization shift amount + logic signed [DST_EXP_WIDTH-1:0] normalized_exponent; + + logic [LZC_SUM_WIDTH-DST_PRECISION_BITS-1:0] sum_sticky_bits; + + // Leading sign counter + // If sum is negative, complement to feed into leading zero counter + assign final_sign = sum_product_accumulator_extended[LZC_SUM_WIDTH-1]; + + fpnew_mxdotp_twos_compl #( + ) i_twos_compl ( + .sum_product_accumulator_extended ( sum_product_accumulator_extended ), + .final_sign ( final_sign ), + .signed_mantissa_d ( signed_mantissa_d ), + .accumulator_is_right_shifted ( accumulator_is_right_shifted ), + .accumulator_right_shift_amount ( accumulator_right_shift_amount ), + .sum_magnitude( sum_magnitude ) + ); + + // Leading sign counter + lzc #( + .WIDTH ( LZC_SUM_WIDTH ), + .MODE ( 1 ) // MODE = 1 counts leading zeroes + ) i_lzc ( + .in_i ( sum_magnitude ), + .cnt_o ( leading_zero_count ), + .empty_o ( lzc_zeroes ) + ); + + assign leading_zero_count_sgn = signed'({1'b0, leading_zero_count}); + + // Calculate the biased exponent (excess-127 form) + // The exponent-major is -scaled_anchor + // exponent = 127 - scaled_anchor + (94-count-1) + increment_exponent [-195, 315 9b -> 10b signed] + assign final_tentative_exponent = 127 - (signed'(ANCHOR)-signed'(scale_q2)) + (signed'(FIXED_SUM_WIDTH) - leading_zero_count_sgn - 1); // 127 = signed'(fpnew_pkg::bias(dst_fmt_q2)) + + // Normalization shift amount based on exponents and LZC (unsigned as only left shifts) + always_comb begin : norm_shift_amount + // Subnormals + if (final_tentative_exponent > 0 && !lzc_zeroes) begin + norm_shamt = leading_zero_count_sgn + 1; + normalized_exponent = final_tentative_exponent; + end else begin // Subnormals and zero + norm_shamt = leading_zero_count_sgn + final_tentative_exponent; + normalized_exponent = '0; // subnormals encoded as 0 + end + end + + fpnew_mxdotp_norm_shift #( + ) i_norm_shift ( + .sum_shifted ( sum_shifted ), + .sum_magnitude ( sum_magnitude ), + .norm_shamt ( norm_shamt ) + ); + + // LSB of final mantissa is the rounding bit + assign {final_mantissa, sum_sticky_bits} = sum_shifted; + assign final_exponent = normalized_exponent; + assign sticky_after_norm = (|sum_sticky_bits) | accumulator_sticky; +endmodule + +// Rounds normalized result to destination format with IEEE rounding modes (RNE/RTZ/RDN/RUP/RMM). +// Detects overflow/underflow before and after rounding, generates round/sticky bits. +module fpnew_mxdotp_rounder + import fpnew_mxdotp_multi_pkg::*; +#( + parameter fpnew_pkg::fmt_logic_t FpDstFmtConfig = MxdotpDstFpFmtConfig +) ( + // Input signals + input logic clk_i, + input logic rst_ni, + input logic final_sign, + input logic [DST_EXP_WIDTH-1:0] final_exponent, + input logic [DST_PRECISION_BITS-1:0] final_mantissa, + input logic [LZC_SUM_WIDTH-1:0] sum_magnitude, + input logic sticky_after_norm, + input fpnew_pkg::fp_format_e dst_fmt_q2, + input fpnew_pkg::roundmode_e rnd_mode_q, + // Output signals + output logic [NUM_FORMATS-1:0][DST_WIDTH-1:0] fmt_result, + output logic [1:0] round_sticky_bits, + output logic of_before_round, + output logic of_after_round, + output logic uf_after_round +); + + // ---------------------------- + // Rounding and classification + // ---------------------------- + logic pre_round_sign; + logic [SUPER_DST_EXP_BITS+SUPER_DST_MAN_BITS-1:0] pre_round_abs; // absolute value of result before rounding + + logic [NUM_FORMATS-1:0][SUPER_DST_EXP_BITS+SUPER_DST_MAN_BITS-1:0] fmt_pre_round_abs; // per format + logic [NUM_FORMATS-1:0][1:0] fmt_round_sticky_bits; + + logic [NUM_FORMATS-1:0] fmt_of_after_round; + logic [NUM_FORMATS-1:0] fmt_uf_after_round; + + logic rounded_sign; + logic [SUPER_DST_EXP_BITS+SUPER_DST_MAN_BITS-1:0] rounded_abs; // absolute value of result after rounding + logic result_zero; + + // Classification before round. RISC-V mandates checking underflow AFTER rounding + assign of_before_round = final_exponent >= 2**(fpnew_pkg::exp_bits(dst_fmt_q2))-1; // infinity exponent is all ones + + // Pack exponent and mantissa into proper rounding form + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_res_assemble + // Set up some constants + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned ALL_EXTRA_BITS = fpnew_pkg::maximum(SUPER_DST_MAN_BITS-MAN_BITS+1+DST_PRECISION_BITS+PRECISION_BITS+2+1, 1); + + logic [EXP_BITS-1:0] pre_round_exponent; + logic [MAN_BITS-1:0] pre_round_mantissa; + + if (FpDstFmtConfig[fmt]) begin : active_dst_format + + assign pre_round_exponent = (of_before_round) ? 2**EXP_BITS-2 : final_exponent[EXP_BITS-1:0]; + assign pre_round_mantissa = (of_before_round) ? '1 : final_mantissa[SUPER_DST_MAN_BITS-:MAN_BITS]; + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign fmt_pre_round_abs[fmt] = {pre_round_exponent, pre_round_mantissa}; // 0-extend + + // Round bit is after mantissa (1 in case of overflow for rounding) + assign fmt_round_sticky_bits[fmt][1] = final_mantissa[SUPER_DST_MAN_BITS-MAN_BITS] | + of_before_round; + + // remaining bits in mantissa to sticky (1 in case of overflow for rounding) + if (MAN_BITS < SUPER_DST_MAN_BITS) begin : narrow_sticky + assign fmt_round_sticky_bits[fmt][0] = (| final_mantissa[SUPER_DST_MAN_BITS-MAN_BITS-1:0]) | + sticky_after_norm | of_before_round; + end else begin : normal_sticky + assign fmt_round_sticky_bits[fmt][0] = sticky_after_norm | of_before_round; + end + end else begin : inactive_format + assign fmt_pre_round_abs[fmt] = '{default: fpnew_pkg::DONT_CARE}; + assign fmt_round_sticky_bits[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Assemble result before rounding. In case of overflow, the largest normal value is set. + assign pre_round_abs = fmt_pre_round_abs[dst_fmt_q2]; + + // In case of overflow, the round and sticky bits are set for proper rounding + assign round_sticky_bits = fmt_round_sticky_bits[dst_fmt_q2]; + assign pre_round_sign = final_sign; + + // Perform the rounding + fpnew_rounding #( + .AbsWidth ( SUPER_DST_EXP_BITS + SUPER_DST_MAN_BITS ) + ) i_fpnew_rounding ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .id_i ( '0 ), + .abs_value_i ( pre_round_abs ), + .en_rsr_i ( 1'b0 ), + .sign_i ( pre_round_sign ), + .round_sticky_bits_i ( round_sticky_bits ), + .stochastic_rounding_bits_i ( '0 ), + .rnd_mode_i ( rnd_mode_q ), + .effective_subtraction_i ( 1'b0 ), // Effective subtraction is not implemented as RNE is used + .abs_rounded_o ( rounded_abs ), + .sign_o ( rounded_sign ), + .exact_zero_o ( result_zero ) + ); + + + for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_sign_inject + // Set up some constants + localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::fp_format_e'(fmt)); + localparam int unsigned MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::fp_format_e'(fmt)); + + if (FpDstFmtConfig[fmt]) begin : active_dst_format + always_comb begin : post_process + // detect of / uf + fmt_uf_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0; // denormal + fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. + + // Assemble regular result, nan box short ones. + fmt_result[fmt] = '1; + fmt_result[fmt][FP_WIDTH-1:0] = {rounded_sign, rounded_abs[EXP_BITS+MAN_BITS-1:0]}; + end + end else begin : inactive_format + assign fmt_uf_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_of_after_round[fmt] = fpnew_pkg::DONT_CARE; + assign fmt_result[fmt] = '{default: fpnew_pkg::DONT_CARE}; + end + end + + // Classification after rounding select by destination format + assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; + assign of_after_round = fmt_of_after_round[dst_fmt_q2]; +endmodule diff --git a/src/mxdotp/fpnew_mxdotp_multi_pkg.sv b/src/mxdotp/fpnew_mxdotp_multi_pkg.sv new file mode 100644 index 00000000..c3ce3404 --- /dev/null +++ b/src/mxdotp/fpnew_mxdotp_multi_pkg.sv @@ -0,0 +1,148 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Gamze Islamoglu + +package fpnew_mxdotp_multi_pkg; + // Configuration + // One-hot config string: | FP32 | FP64 | FP16 | FP8 | FP16ALT | FP8ALT | FP6 | FP6ALT | FP4 + + // Default format configuration (all MX formats enabled) + // These define the maximum-width types and serve as defaults when not overridden by module parameters. + localparam fpnew_pkg::fmt_logic_t MxdotpSrcFpFmtConfig = 9'b000101111; // FP8, FP8ALT, FP6, FP6ALT, FP4 + localparam fpnew_pkg::ifmt_logic_t MxdotpSrcIntFmtConfig = 4'b1000; // INT8 + localparam fpnew_pkg::fmt_logic_t MxdotpDstFpFmtConfig = 9'b100010000; // FP32, FP16ALT + localparam int unsigned VectorSize = 8; + + // Do not change + localparam int unsigned SRC_WIDTH = fpnew_pkg::max_fp_width(MxdotpSrcFpFmtConfig); + localparam int unsigned DST_WIDTH = fpnew_pkg::max_fp_width(MxdotpDstFpFmtConfig); + localparam int unsigned SCALE_WIDTH = 8; + localparam int unsigned NUM_OPERANDS = 2*VectorSize+1; // Two input vectors + accumulator (scale handled separately) + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS; + // ---------- + // Constants + // ---------- + // The super-format that can hold all formats + localparam fpnew_pkg::fp_encoding_t SUPER_FORMAT = fpnew_pkg::super_format(MxdotpSrcFpFmtConfig); + localparam fpnew_pkg::fp_encoding_t SUPER_DST_FORMAT = fpnew_pkg::super_format(MxdotpDstFpFmtConfig); + + localparam int unsigned SUPER_EXP_BITS = SUPER_FORMAT.exp_bits; + localparam int unsigned SUPER_MAN_BITS = SUPER_FORMAT.man_bits; + localparam int unsigned SUPER_DST_EXP_BITS = SUPER_DST_FORMAT.exp_bits; + localparam int unsigned SUPER_DST_MAN_BITS = SUPER_DST_FORMAT.man_bits; + + // FP6 super format specific + localparam fpnew_pkg::fp_encoding_t FP6_SUPER_FORMAT = fpnew_pkg::super_format(9'b000000110); // FP6 & FP6ALT + localparam int unsigned FP6_EXP_BITS = FP6_SUPER_FORMAT.exp_bits; + localparam int unsigned FP6_MAN_BITS = FP6_SUPER_FORMAT.man_bits; + localparam int unsigned FP6_PREC_BITS = FP6_MAN_BITS + 1; + + // FP4 specific + localparam int unsigned FP4_EXP_BITS = fpnew_pkg::exp_bits(fpnew_pkg::FP4); + localparam int unsigned FP4_MAN_BITS = fpnew_pkg::man_bits(fpnew_pkg::FP4); + localparam int unsigned FP4_PREC_BITS = FP4_MAN_BITS + 1; + + // Precision bits 'p' include the implicit bit + localparam int unsigned PRECISION_BITS = SUPER_MAN_BITS + 1; + // Destination precision bits 'p_dst' include the implicit bit + localparam int unsigned DST_PRECISION_BITS = SUPER_DST_MAN_BITS + 1; + + // Algorithm constants + localparam int unsigned ANCHOR = 34; // Fractional point position + localparam int unsigned INT_BITS = 32; + localparam int unsigned VECTOR_BITS = $clog2(VectorSize); + localparam int unsigned PROD_SHIFT_WIDTH = 1 + INT_BITS + ANCHOR; + localparam int unsigned SOP_FIXED_WIDTH = VECTOR_BITS + PROD_SHIFT_WIDTH; + localparam int unsigned FIXED_SUM_WIDTH = 1 + DST_PRECISION_BITS + 1 + (SOP_FIXED_WIDTH - 1); // |s|-Acc:24b-|R|-unsigned SoP:64+log2k-| + localparam int unsigned LZC_SUM_WIDTH = FIXED_SUM_WIDTH + DST_PRECISION_BITS; + localparam int unsigned LZC_RESULT_WIDTH = $clog2(LZC_SUM_WIDTH); + localparam int signed MAX_ACC_SHIFT_AMOUNT = FIXED_SUM_WIDTH - DST_PRECISION_BITS - 1; // Maximum allowable shift, -1 for the sign bit + localparam int unsigned SOP_SHIFT = ANCHOR - 2*SUPER_MAN_BITS; // Constant left shift amount for the SOP to align the fractional point + + // FP6 specific + localparam int unsigned FP6_PROD_WIDTH = 2*FP6_PREC_BITS + 1; // 2p+1 for the product + localparam int unsigned FP6_PROD_SHIFT_WIDTH = 2*(2**FP6_EXP_BITS-1-fpnew_pkg::bias(fpnew_pkg::FP6)) + FP6_PROD_WIDTH + 4; // 2*(2^e-1-bias) + 2p+1 + 4, (2^e-1-bias): max shift amount; +4 is due to the minimum value of the sum of exponents for FP6 (-4) + + // FP4 specific + localparam int unsigned FP4_PROD_WIDTH = 2*FP4_PREC_BITS + 1; // 2p+1 for the product + localparam int unsigned FP4_PROD_SHIFT_WIDTH = 2*(2**FP4_EXP_BITS-1-fpnew_pkg::bias(fpnew_pkg::FP4)) + FP4_PROD_WIDTH; // 2*(2^e-1-bias) + 2p+1, (2^e-1-bias): max shift amount + + // Internal exponent width of FMA must accommodate all meaningful exponent values in order to avoid + // datapath leakage. This is either given by the exponent bits or the width of the LZC result. + // In most reasonable FP formats the internal exponent will be wider than the LZC result. + localparam int unsigned EXP_WIDTH = SUPER_EXP_BITS + 1; + localparam int unsigned DST_EXP_WIDTH = SUPER_DST_EXP_BITS + 2; // +2 for overflow handling + // Shift amount width: $clog2(DST_BIAS - ANCHOR + (scale_a+scale_b) + FIXED_SUM_WIDTH - 1) + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(fpnew_pkg::bias(fpnew_pkg::FP32) - ANCHOR + 2**(SCALE_WIDTH) - 1 + FIXED_SUM_WIDTH - 1); + + // ---------------- + // Type definition + // ---------------- + typedef struct packed { + logic sign; + logic [SUPER_EXP_BITS-1:0] exponent; + logic [SUPER_MAN_BITS-1:0] mantissa; + } fp_src_t; + typedef struct packed { + logic sign; + logic [FP6_EXP_BITS-1:0] exponent; + logic [FP6_MAN_BITS-1:0] mantissa; + } fp6_src_t; + typedef struct packed { + logic sign; + logic [FP4_EXP_BITS-1:0] exponent; + logic [FP4_MAN_BITS-1:0] mantissa; + } fp4_src_t; + typedef struct packed { + logic sign; + logic [SUPER_DST_EXP_BITS-1:0] exponent; + logic [SUPER_DST_MAN_BITS-1:0] mantissa; + } fp_dst_t; + + // ---------- + // Functions + // ---------- + + // Returns the MXDOTP destination format config from the global FpFmtConfig. + // Only FP32 and FP16ALT are valid destination formats for MXDOTP. + function automatic fpnew_pkg::fmt_logic_t get_mxdotp_dst_fmts(fpnew_pkg::fmt_logic_t cfg); + automatic fpnew_pkg::fmt_logic_t res; + res = { cfg[fpnew_pkg::FP32], // FP32 + 1'b0, // FP64 + 1'b0, // FP16 + 1'b0, // FP8 + cfg[fpnew_pkg::FP16ALT], // FP16ALT + 1'b0, // FP8ALT + 1'b0, // FP6 + 1'b0, // FP6ALT + 1'b0 // FP4 + }; + return res; + endfunction + + function automatic int unsigned bias_constant(fpnew_pkg::fp_format_e fmt); + unique case (fmt) + fpnew_pkg::FP32: return 127; // 2^(8-1) - 1 + fpnew_pkg::FP16: return 15; // 2^(5-1) - 1 + fpnew_pkg::FP16ALT: return 127; // 2^(8-1) - 1, + fpnew_pkg::FP8: return 15; // 2^(5-1) - 1 + fpnew_pkg::FP8ALT: return 7; // 2^(4-1) - 1 + fpnew_pkg::FP6: return 3; // 2^(3-1) - 1 + fpnew_pkg::FP6ALT: return 1; // 2^(2-1) - 1 + fpnew_pkg::FP4: return 1; // 2^(2-1) - 1 + default: return fpnew_pkg::bias(fmt); + endcase + endfunction + +endpackage diff --git a/src_files.yml b/src_files.yml index 84348a98..8ba39f50 100644 --- a/src_files.yml +++ b/src_files.yml @@ -41,6 +41,10 @@ fpnew: src/fpnew_sdotp_multi.sv, src/fpnew_sdotp_multi_wrapper.sv, src/fpnew_noncomp.sv, + src/mxdotp/fpnew_mxdotp_multi_pkg.sv, + src/mxdotp/fpnew_mxdotp_multi_modules.sv, + src/fpnew_mxdotp_multi.sv, + src/fpnew_mxdotp_multi_wrapper.sv, src/fpnew_opgroup_block.sv, src/fpnew_opgroup_fmt_slice.sv, src/fpnew_opgroup_multifmt_slice.sv,