From 29eba93bfaf6f69c924a17fe24eccdea281fe4dc Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 17:26:46 -0800 Subject: [PATCH 01/20] Path to new Questa --- setup.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.sh b/setup.sh index b1ecbd84f..e1d4e6cd3 100755 --- a/setup.sh +++ b/setup.sh @@ -16,8 +16,7 @@ echo \$WALLY set to ${WALLY} # Must edit these based on your local environment. Ask your sysadmin. export MGLS_LICENSE_FILE=27002@zircon.eng.hmc.edu # Change this to your Siemens license server export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Synopsys license server -export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_2/questasim # Change this for your path to Questa, excluding bin -#export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_3/questasim # Change this for your path to Questa, excluding bin +export QUESTA_HOME=/cad/mentor/questa_sim-2023.4/questasim # Change this for your path to Questa, excluding bin export SNPS_HOME=/cad/synopsys/SYN # Change this for your path to Design Compiler, excluding bin # Path to RISC-V Tools From 0588d611ead1deb5379be81a93b1f7ebb5859dfa Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 17:27:40 -0800 Subject: [PATCH 02/20] Zfa fli support working for F and D --- src/fpu/fctrl.sv | 7 +++++-- src/fpu/fpu.sv | 11 ++++++++++- testbench/testbench.sv | 4 +++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 999837889..d4cc60e87 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -143,14 +143,16 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q fp to int register 7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000) ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x int to fp reg + else if (P.ZFA_SUPPORTED & Funct3D == 3'b000 & Rs2D == 5'b00001) + ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0; // fli 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.(s/h/q) // coverage off - // Not covered in testing because rv64gc does not support half or quad precision 7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10) ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0; // fcvt.h.(s/d/q) + // Not covered in testing because rv64gc does not support quad precision 7'b0100011: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b11) ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0; // fcvt.q.(s/h/d) // coverage on @@ -179,7 +181,6 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d d->lu endcase // coverage off - // Not covered in testing because rv64gc does not support half or quad precision 7'b1101010: case(Rs2D) 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.h.w w->h 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.h.wu wu->h @@ -192,6 +193,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.h h->l 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.h h->lu endcase + // Not covered in testing because rv64gc does not support quad precision 7'b1101011: case(Rs2D) 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.q.w w->q 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.q.wu wu->q @@ -274,6 +276,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( // 011 - mv to fp 01 // 110 - min 10 // 101 - max 10 + // 111 - fli 11 // OpCtrl: // Fma: {not multiply-add?, negate prod?, negate Z?} diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 45af38c0c..8be0e4488 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -160,6 +160,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer logic mvsgn; // sign bit for extending move + logic [P.FLEN-1:0] FliResE; // Floating-point load immediate value ////////////////////////////////////////////////////////////////////////////////////////// // Decode Stage: fctrl decoder, read register file @@ -263,6 +264,14 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); + // floating-point load immediate: fli + if (P.ZFA_SUPPORTED) begin + logic [4:0] Rs1E; + + flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, InstrD[19:15], Rs1E); + fli #(P) fli(.Rs1(Rs1E), .Fmt(FmtE), .Imm(FliResE)); + end else assign FliResE = '0; + // NaN Box SrcA to convert integer to requested FP size for fmv.*.x if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; else if(P.FPSIZES == 2) @@ -276,7 +285,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( end // select a result that may be written to the FP register - mux3 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + mux4 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register with fmv.x.* diff --git a/testbench/testbench.sv b/testbench/testbench.sv index efd4ea637..87b603288 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -128,7 +128,8 @@ module testbench; "arch64zicboz": if (P.ZICBOZ_SUPPORTED) tests = arch64zicboz; "arch64zcb": if (P.ZCB_SUPPORTED) tests = arch64zcb; "arch64zfh": if (P.ZFH_SUPPORTED) tests = arch64zfh; -// "arch64zfa": if (P.ZFA_SUPPORTED) tests = arch64zfa; + "arch64zfaf": if (P.ZFA_SUPPORTED) tests = arch64zfaf; + "arch64zfad": if (P.ZFA_SUPPORTED & P.D_SUPPORTED) tests = arch64zfad; endcase end else begin // RV32 case (TEST) @@ -165,6 +166,7 @@ module testbench; "arch32zcb": if (P.ZCB_SUPPORTED) tests = arch32zcb; "arch32zfh": if (P.ZFH_SUPPORTED) tests = arch32zfh; "arch32zfaf": if (P.ZFA_SUPPORTED) tests = arch32zfaf; + "arch32zfad": if (P.ZFA_SUPPORTED & P.D_SUPPORTED) tests = arch32zfad; endcase end if (tests.size() == 0) begin From 9d57002c070461ec9518fa811936d4a975846fc6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 17:27:59 -0800 Subject: [PATCH 03/20] Zfa fli support working for F and D (add fli.sv module) --- src/fpu/fli.sv | 219 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 src/fpu/fli.sv diff --git a/src/fpu/fli.sv b/src/fpu/fli.sv new file mode 100644 index 000000000..e61415388 --- /dev/null +++ b/src/fpu/fli.sv @@ -0,0 +1,219 @@ +/////////////////////////////////////////// +// fli.sv +// +// Written: David_Harris@hmc.edu +// Modified: 1/16/2024 +// +// Purpose: Floating-point float immediate +// +// Documentation: RISC-V System on Chip Design Chapter 16 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module fli import cvw::*; #(parameter cvw_t P) ( + input logic [4:0] Rs1, // Index of immediate to select + input logic [1:0] Fmt, // 00 = single, 01 = double, 10 = half, 11 = quad + output logic [P.FLEN-1:0] Imm // Immediate output +); + + logic [P.FLEN-1:0] HImmBox, SImmBox, DImmBox, QImmBox; + + // select constant for each immediate size supported + + //////////////////////////// + // half + //////////////////////////// + + if (P.ZFH_SUPPORTED) begin + logic [15:0] HImm; + always_comb begin + case(Rs1) + 0: HImm = 16'hBC00; + 1: HImm = 16'h0400; + 2: HImm = 16'h0100; + 3: HImm = 16'h0200; + 4: HImm = 16'h1C00; + 5: HImm = 16'h2000; + 6: HImm = 16'h2C00; + 7: HImm = 16'h3000; + 8: HImm = 16'h3400; + 9: HImm = 16'h3500; + 10: HImm = 16'h3600; + 11: HImm = 16'h3700; + 12: HImm = 16'h3800; + 13: HImm = 16'h3900; + 14: HImm = 16'h3A00; + 15: HImm = 16'h3B00; + 16: HImm = 16'h3C00; + 17: HImm = 16'h3D00; + 18: HImm = 16'h3E00; + 19: HImm = 16'h3F00; + 20: HImm = 16'h4000; + 21: HImm = 16'h4100; + 22: HImm = 16'h4200; + 23: HImm = 16'h4400; + 24: HImm = 16'h4800; + 25: HImm = 16'h4C00; + 26: HImm = 16'h5800; + 27: HImm = 16'h5C00; + 28: HImm = 16'h7800; + 29: HImm = 16'h7C00; + 30: HImm = 16'h7C00; + 31: HImm = 16'h7E00; + endcase + end + assign HImmBox = {{(P.FLEN-16){1'b1}}, HImm}; // NaN-box HImm + end else assign HImmBox = '0; + + //////////////////////////// + // single + //////////////////////////// + + logic [31:0] SImm; + always_comb begin + case(Rs1) + 0: SImm = 32'hBF800000; + 1: SImm = 32'h00800000; + 2: SImm = 32'h37800000; + 3: SImm = 32'h38000000; + 4: SImm = 32'h3B800000; + 5: SImm = 32'h3C000000; + 6: SImm = 32'h3D800000; + 7: SImm = 32'h3E000000; + 8: SImm = 32'h3E800000; + 9: SImm = 32'h3EA00000; + 10: SImm = 32'h3EC00000; + 11: SImm = 32'h3EE00000; + 12: SImm = 32'h3F000000; + 13: SImm = 32'h3F200000; + 14: SImm = 32'h3F400000; + 15: SImm = 32'h3F600000; + 16: SImm = 32'h3F800000; + 17: SImm = 32'h3FA00000; + 18: SImm = 32'h3FC00000; + 19: SImm = 32'h3FE00000; + 20: SImm = 32'h40000000; + 21: SImm = 32'h40200000; + 22: SImm = 32'h40400000; + 23: SImm = 32'h40800000; + 24: SImm = 32'h41000000; + 25: SImm = 32'h41800000; + 26: SImm = 32'h43000000; + 27: SImm = 32'h43800000; + 28: SImm = 32'h47000000; + 29: SImm = 32'h47800000; + 30: SImm = 32'h7F800000; + 31: SImm = 32'h7FC00000; + endcase + end + assign SImmBox = {{(P.FLEN-32){1'b1}}, SImm}; // NaN-box SImm + + //////////////////////////// + // double + //////////////////////////// + + if (P.D_SUPPORTED) begin + logic [63:0] DImm; + always_comb begin + case(Rs1) + 0: DImm = 64'hBFF0000000000000; + 1: DImm = 64'h0010000000000000; + 2: DImm = 64'h3EF0000000000000; + 3: DImm = 64'h3F00000000000000; + 4: DImm = 64'h3F70000000000000; + 5: DImm = 64'h3F80000000000000; + 6: DImm = 64'h3FB0000000000000; + 7: DImm = 64'h3FC0000000000000; + 8: DImm = 64'h3FD0000000000000; + 9: DImm = 64'h3FD4000000000000; + 10: DImm = 64'h3FD8000000000000; + 11: DImm = 64'h3FDC000000000000; + 12: DImm = 64'h3FE0000000000000; + 13: DImm = 64'h3FE4000000000000; + 14: DImm = 64'h3FE8000000000000; + 15: DImm = 64'h3FEC000000000000; + 16: DImm = 64'h3FF0000000000000; + 17: DImm = 64'h3FF4000000000000; + 18: DImm = 64'h3FF8000000000000; + 19: DImm = 64'h3FFC000000000000; + 20: DImm = 64'h4000000000000000; + 21: DImm = 64'h4004000000000000; + 22: DImm = 64'h4008000000000000; + 23: DImm = 64'h4010000000000000; + 24: DImm = 64'h4020000000000000; + 25: DImm = 64'h4030000000000000; + 26: DImm = 64'h4060000000000000; + 27: DImm = 64'h4070000000000000; + 28: DImm = 64'h40E0000000000000; + 29: DImm = 64'h40F0000000000000; + 30: DImm = 64'h7FF0000000000000; + 31: DImm = 64'h7FF8000000000000; + endcase + end + assign DImmBox = {{(P.FLEN-64){1'b1}}, DImm}; // NaN-box DImm + end else assign DImmBox = '0; + + //////////////////////////// + // double + //////////////////////////// + + if (P.Q_SUPPORTED) begin + logic [63:0] QImm; + always_comb begin + case(Rs1) + 0: QImm = 128'hBFFF0000000000000000000000000000; + 1: QImm = 128'h00010000000000000000000000000000; + 2: QImm = 128'h3FEF0000000000000000000000000000; + 3: QImm = 128'h3FF00000000000000000000000000000; + 4: QImm = 128'h3FF70000000000000000000000000000; + 5: QImm = 128'h3FF80000000000000000000000000000; + 6: QImm = 128'h3FFB0000000000000000000000000000; + 7: QImm = 128'h3FFC0000000000000000000000000000; + 8: QImm = 128'h3FFD0000000000000000000000000000; + 9: QImm = 128'h3FFD4000000000000000000000000000; + 10: QImm = 128'h3FFD8000000000000000000000000000; + 11: QImm = 128'h3FFDC000000000000000000000000000; + 12: QImm = 128'h3FFE0000000000000000000000000000; + 13: QImm = 128'h3FFE4000000000000000000000000000; + 14: QImm = 128'h3FFE8000000000000000000000000000; + 15: QImm = 128'h3FFEC000000000000000000000000000; + 16: QImm = 128'h3FFF0000000000000000000000000000; + 17: QImm = 128'h3FFF4000000000000000000000000000; + 18: QImm = 128'h3FFF8000000000000000000000000000; + 19: QImm = 128'h3FFFC000000000000000000000000000; + 20: QImm = 128'h40000000000000000000000000000000; + 21: QImm = 128'h40004000000000000000000000000000; + 22: QImm = 128'h40008000000000000000000000000000; + 23: QImm = 128'h40010000000000000000000000000000; + 24: QImm = 128'h40020000000000000000000000000000; + 25: QImm = 128'h40030000000000000000000000000000; + 26: QImm = 128'h40060000000000000000000000000000; + 27: QImm = 128'h40070000000000000000000000000000; + 28: QImm = 128'h400E0000000000000000000000000000; + 29: QImm = 128'h400F0000000000000000000000000000; + 30: QImm = 128'h7FFF0000000000000000000000000000; + 31: QImm = 128'h7FFF8000000000000000000000000000; + endcase + end + assign QImmBox = QImm; // NaN-box QImm trivial because Q is longest format + end else assign QImmBox = '0; + + mux4 #(P.FLEN) flimux(SImmBox, DImmBox, HImmBox, QImmBox, Fmt, Imm); // select immediate based on format + +endmodule From 8654375f26504d4efe1b772421f00e87d49a444b Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 20:03:54 -0800 Subject: [PATCH 04/20] Zfa fminm/fmaxm/fltq/fleq implemented and tested --- src/fpu/fcmp.sv | 49 +++++++----- src/fpu/fctrl.sv | 191 +++++++++++++++++++++++++++------------------ src/fpu/fpu.sv | 5 +- testbench/tests.vh | 58 ++++++++++++-- 4 files changed, 198 insertions(+), 105 deletions(-) diff --git a/src/fpu/fcmp.sv b/src/fpu/fcmp.sv index 9d0d582b5..e330f1fda 100755 --- a/src/fpu/fcmp.sv +++ b/src/fpu/fcmp.sv @@ -36,6 +36,7 @@ module fcmp import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] Fmt, // format of fp number input logic [2:0] OpCtrl, // see above table + input logic Zfa, // Zfa variants: fminm, fmaxm, fleq, fltq input logic Xs, Ys, // input signs input logic [P.NE-1:0] Xe, Ye, // input exponents input logic [P.NF:0] Xm, Ym, // input mantissa @@ -70,8 +71,8 @@ module fcmp import cvw::*; #(parameter cvw_t P) ( 3'b110: CmpNV = EitherSNaN; //min 3'b101: CmpNV = EitherSNaN; //max 3'b010: CmpNV = EitherSNaN; //equal - 3'b001: CmpNV = EitherNaN; //less than - 3'b011: CmpNV = EitherNaN; //less than or equal + 3'b001: CmpNV = Zfa ? EitherSNaN : EitherNaN; // fltq / flt perform CompareQuietLess / CompareSignalingLess differing on when to set invalid + 3'b011: CmpNV = Zfa ? EitherSNaN : EitherNaN; // fleq / fle differ on when to set invalid default: CmpNV = 1'bx; endcase end @@ -128,23 +129,35 @@ module fcmp import cvw::*; #(parameter cvw_t P) ( // - if one is a NaN output the non-NaN always_comb if(OpCtrl[0]) // MAX - if(XNaN) - if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN - else CmpFpRes = Y; // X = NaN Y != NaN - else - if(YNaN) CmpFpRes = X; // X != NaN Y = NaN - else // X,Y != NaN - if(LT) CmpFpRes = Y; // X < Y - else CmpFpRes = X; // X > Y + if (Zfa & P.ZFA_SUPPORTED) // fmaxm perform IEEE754 maxNum that produce NaN if either input is NaN + if (XNaN | YNaN) CmpFpRes = NaNRes; // either input is NaN + else + if (LT) CmpFpRes = Y; // X < Y + else CmpFpRes = X; // X > Y + else // fmax performs IEEE754 maxNumber that produces NaN if both inputs are NaN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = Y; // X < Y + else CmpFpRes = X; // X > Y else // MIN - if(XNaN) - if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN - else CmpFpRes = Y; // X = NaN Y != NaN - else - if(YNaN) CmpFpRes = X; // X != NaN Y = NaN - else // X,Y != NaN - if(LT) CmpFpRes = X; // X < Y - else CmpFpRes = Y; // X > Y + if (Zfa & P.ZFA_SUPPORTED) // fminm perform IEEE754 minNum that produce NaN if either input is NaN + if (XNaN | YNaN) CmpFpRes = NaNRes; // either input is NaN + else + if (LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y + else // fmin performs IEEE754 minNumber that produces NaN if both inputs are NaN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y // LT/LE/EQ // - -0 = 0 diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index d4cc60e87..9f60a692f 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -54,6 +54,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage output logic FPUActiveE, // FP instruction being executed + output logic ZfaE, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod) // register control signals output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable output logic FWriteIntE, FWriteIntM, // Write to integer register @@ -64,7 +65,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic FDivStartE, IDivStartE // Start division or squareroot ); - `define FCTRLW 12 + `define FCTRLW 13 logic [`FCTRLW-1:0] ControlsD; // control signals logic FRegWriteD; // FP register write enable @@ -79,6 +80,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( logic SupportedFmt; // is the format supported logic SupportedFmt2; // is the source format supported for fp -> fp logic FCvtIntD, FCvtIntM; // convert to integer opperation + logic ZfaD; // Zfa variants of instructions // FPU Instruction Decoder assign Fmt = Funct7D[1:0]; @@ -91,129 +93,164 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( (Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED)); // decode the instruction - // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt + // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt_Zfa always_comb if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt) - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; // for anything other than loads and stores, check for supported format + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // for anything other than loads and stores, check for supported format else begin - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0; // default: non-implemented instruction + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // default: non-implemented instruction /* verilator lint_off CASEINCOMPLETE */ // default value above has priority so no other default needed case(OpD) 7'b0000111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flw - 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // fld - 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flq - 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0; // flh + 3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flw + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // fld + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flh endcase 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsw - 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsd - 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsq - 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0; // fsh + 3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsw + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsd + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsh endcase - 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd - 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub - 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0; // fnmsub - 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0; // fnmadd + 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0_0; // fnmadd 7'b1010011: casez(Funct7D) - 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0; // fadd - 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0; // fsub - 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0; // fmul - 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0; // fdiv - 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt + 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0_0; // fdiv + 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0_0; // fsqrt 7'b00100??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0; // fsgnj - 3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0; // fsgnjn - 3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0; // fsgnjx + 3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0_0; // fsgnjx endcase 7'b00101??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0; // fmin - 3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0; // fmax + 3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_0; // fmax + 3'b010: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_1; // fminm (Zfa) + 3'b011: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_1; // fmaxm (Zfa) endcase 7'b10100??: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0; // feq - 3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0; // flt - 3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0; // fle + 3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_0; // fle + 3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_0; // flt + 3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0_0; // feq + 3'b100: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_1; // fleq (Zfa) + 3'b101: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_1; // fltq (Zfa) endcase 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass + ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0_0; // fclass else if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q fp to int register + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register + else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001) + ControlsD = '0; // fmvh.x.d (Zfa) *** needs values for all moves + // coverage off Q not supported in RV64GC + else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001) + ControlsD = '0; // fmvh.x.q (Zfa) + // coverage on 7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x int to fp reg + ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0; // fmv.w/d/h/q.x int to fp reg else if (P.ZFA_SUPPORTED & Funct3D == 3'b000 & Rs2D == 5'b00001) - ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0; // fli + ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0_1; // fli (Zfa) 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_0; // fcvt.s.(d/q/h) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.s (Zfa) *** needs ctrl for all rounds + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.s (Zfa) *** needs ctrl for all rounds 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) - ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.(s/h/q) - // coverage off + ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0; // fcvt.d.(s/h/q) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.d (Zfa) + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.d (Zfa) 7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10) - ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0; // fcvt.h.(s/d/q) + ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0; // fcvt.h.(s/d/q) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.h (Zfa) + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.h (Zfa) + // coverage off // Not covered in testing because rv64gc does not support quad precision 7'b0100011: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b11) - ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0; // fcvt.q.(s/h/d) + ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0_0; // fcvt.q.(s/h/d) + else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.q (Zfa) + else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.q (Zfa) // coverage on 7'b1101000: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.s.w w->s - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.s.wu wu->s - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.s.l l->s - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.s.lu lu->s + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.s.w w->s + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.s.wu wu->s + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.s.l l->s + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.s.lu lu->s endcase 7'b1100000: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.s s->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.s s->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.s s->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.s s->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.s s->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.s s->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.s s->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.s s->lu endcase 7'b1101001: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.d.w w->d - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.d.wu wu->d - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.d.l l->d - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.d.lu lu->d + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.d.w w->d + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.d.wu wu->d + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.d.l l->d + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.d.lu lu->d endcase 7'b1100001: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.d d->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.d d->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.d d->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d d->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.d d->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.d d->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.d d->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.d d->lu + 5'b01000: if (P.ZFA_SUPPORTED & P.D_SUPPORTED & Funct3D == 3'b001) + ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_1; // fcvtmod.w.d (Zfa) endcase - // coverage off 7'b1101010: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.h.w w->h - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.h.wu wu->h - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.h.l l->h - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.h.lu lu->h + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.h.w w->h + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.h.wu wu->h + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.h.l l->h + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.h.lu lu->h endcase 7'b1100010: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.h h->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.h h->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.h h->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.h h->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.h h->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.h h->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.h h->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.h h->lu endcase // Not covered in testing because rv64gc does not support quad precision + // coverage off 7'b1101011: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.q.w w->q - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.q.wu wu->q - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.q.l l->q - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.q.lu lu->q + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.q.w w->q + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.q.wu wu->q + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.q.l l->q + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.q.lu lu->q endcase 7'b1100011: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.q q->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.q q->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.q q->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.q q->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.q q->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.q q->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.q q->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.q q->lu endcase // coverage on - endcase + 7'b1011001: if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct3D == 3'b000) + ControlsD = '0; // fmvp.d.x (Zfa) + // Not covered in testing because rv64gc does not support quad precision + // coverage off + 7'b1011011: if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct3D == 3'b000) + ControlsD = '0; // fmvp.q.x (Zfa) + // coverage on + endcase endcase end /* verilator lint_on CASEINCOMPLETE */ // unswizzle control bits - assign #1 {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD} = ControlsD; + assign #1 {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD, ZfaD} = ControlsD; // rounding modes: // 000 - round to nearest, ties to even @@ -313,9 +350,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( assign Adr3D = InstrD[31:27]; // D/E pipleine register - flopenrc #(14+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ~IllegalFPUInstrD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, FPUActiveE}); + flopenrc #(15+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ZfaD, ~IllegalFPUInstrD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE, FPUActiveE}); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE}); diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 8be0e4488..bd387f5d7 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -83,6 +83,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation logic FRegWriteE; // Write floating-point register logic FPUActiveE; // FP instruction being executed + logic ZfaE; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d) // regfile signals logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -170,7 +171,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .IntDivE, .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, + .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .FrmM, .FmtE, .FmtM, .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM, .IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW, @@ -247,7 +248,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .UmM, .FIntDivResultM); // compare: fmin/fmax, flt/fle/feq - fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), + fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Zfa(ZfaE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); diff --git a/testbench/tests.vh b/testbench/tests.vh index 86f65eb14..43cbc91a0 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1999,16 +1999,58 @@ string arch64zbs[] = '{ string arch32zfaf[] = '{ `RISCVARCHTEST, - "rv32i_m/F_Zfa/src/fle_b1-01.S", - "rv32i_m/F_Zfa/src/fle_b19-01.S", - "rv32i_m/F_Zfa/src/fli_b1-01.S", + "rv32i_m/F_Zfa/src/fleq_b1-01.S", + "rv32i_m/F_Zfa/src/fleq_b19-01.S", + "rv32i_m/F_Zfa/src/fli.s-01.S", "rv32i_m/F_Zfa/src/fltq_b1-01.S", "rv32i_m/F_Zfa/src/fltq_b19-01.S", - "rv32i_m/F_Zfa/src/fmin_b1-01.S", - "rv32i_m/F_Zfa/src/fmin_b19-01.S", - "rv32i_m/F_Zfa/src/fmax_b1-01.S", - "rv32i_m/F_Zfa/src/fmax_b19-01.S", - "rv32i_m/F_Zfa/src/fround_b1-01.S" + "rv32i_m/F_Zfa/src/fminm_b1-01.S", + "rv32i_m/F_Zfa/src/fminm_b19-01.S", + "rv32i_m/F_Zfa/src/fmaxm_b1-01.S", + "rv32i_m/F_Zfa/src/fmaxm_b19-01.S" +/* "rv32i_m/F_Zfa/src/fround_b1-01.S" */ + }; + + string arch32zfad[] = '{ + `RISCVARCHTEST, + "rv32i_m/D_Zfa/src/fleq_b1-01.S", + "rv32i_m/D_Zfa/src/fleq_b19-01.S", + "rv32i_m/D_Zfa/src/fli.d-01.S", + "rv32i_m/D_Zfa/src/fltq_b1-01.S", + "rv32i_m/D_Zfa/src/fltq_b19-01.S", + "rv32i_m/D_Zfa/src/fminm_b1-01.S", + "rv32i_m/D_Zfa/src/fminm_b19-01.S", + "rv32i_m/D_Zfa/src/fmaxm_b1-01.S", + "rv32i_m/D_Zfa/src/fmaxm_b19-01.S" +/* "rv32i_m/D_Zfa/src/fround_b1-01.S" */ + }; + + string arch64zfaf[] = '{ + `RISCVARCHTEST, + "rv64i_m/F_Zfa/src/fleq_b1-01.S", + "rv64i_m/F_Zfa/src/fleq_b19-01.S", + "rv64i_m/F_Zfa/src/fli.s-01.S", + "rv64i_m/F_Zfa/src/fltq_b1-01.S", + "rv64i_m/F_Zfa/src/fltq_b19-01.S", + "rv64i_m/F_Zfa/src/fminm_b1-01.S", + "rv64i_m/F_Zfa/src/fminm_b19-01.S", + "rv64i_m/F_Zfa/src/fmaxm_b1-01.S", + "rv64i_m/F_Zfa/src/fmaxm_b19-01.S" +/* "rv64i_m/F_Zfa/src/fround_b1-01.S" */ + }; + + string arch64zfad[] = '{ + `RISCVARCHTEST, + "rv64i_m/D_Zfa/src/fleq_b1-01.S", + "rv64i_m/D_Zfa/src/fleq_b19-01.S", + "rv64i_m/D_Zfa/src/fli.d-01.S", + "rv64i_m/D_Zfa/src/fltq_b1-01.S", + "rv64i_m/D_Zfa/src/fltq_b19-01.S", + "rv64i_m/D_Zfa/src/fminm_b1-01.S", + "rv64i_m/D_Zfa/src/fminm_b19-01.S", + "rv64i_m/D_Zfa/src/fmaxm_b1-01.S", + "rv64i_m/D_Zfa/src/fmaxm_b19-01.S" +/* "rv64i_m/D_Zfa/src/fround_b1-01.S" */ }; string arch32d_fma[] = '{ From 07e7e022415a66a9cc89c5c53b4f5fe72f71558d Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 16 Jan 2024 21:26:42 -0800 Subject: [PATCH 05/20] Coded Zfa fmvp but no tests exist --- src/fpu/fpu.sv | 18 +++++++++++------- testbench/tests.vh | 12 +++++++++++- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index bd387f5d7..430750c71 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -155,7 +155,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register // other signals - logic [P.FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv + logic [P.FLEN-1:0] PreIntSrcE, IntSrcE; // align SrcA from IEU to the floating point format for fmv / fmvp logic [P.FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [P.FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt @@ -273,23 +273,27 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fli #(P) fli(.Rs1(Rs1E), .Fmt(FmtE), .Imm(FliResE)); end else assign FliResE = '0; - // NaN Box SrcA to convert integer to requested FP size for fmv.*.x - if(P.FPSIZES == 1) assign AlignedSrcAE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; + // fmv.*.x: NaN Box SrcA to extend integer to requested FP size + if(P.FPSIZES == 1) assign PreIntSrcE = {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}; else if(P.FPSIZES == 2) - mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); + mux2 #(P.FLEN) SrcAMux ({{P.FLEN-P.LEN1{1'b1}}, ForwardedSrcAE[P.LEN1-1:0]}, {{P.FLEN-P.XLEN{1'b1}}, ForwardedSrcAE}, FmtE, PreIntSrcE); else if(P.FPSIZES == 3 | P.FPSIZES == 4) begin localparam XD_LEN = P.D_LEN < P.XLEN ? P.D_LEN : P.XLEN; // shorter of D_LEN and XLEN mux3 #(P.FLEN) SrcAMux ({{P.FLEN-P.S_LEN{1'b1}}, ForwardedSrcAE[P.S_LEN-1:0]}, {{P.FLEN-XD_LEN{1'b1}}, ForwardedSrcAE[XD_LEN-1:0]}, {{P.FLEN-P.H_LEN{1'b1}}, ForwardedSrcAE[P.H_LEN-1:0]}, - FmtE, AlignedSrcAE); // NaN boxing zeroes + FmtE, PreIntSrcE); // NaN boxing zeroes end + // fmvp.*.x: Select pair of registers + if (P.ZFA_SUPPORTED & (P.XLEN==32 & P.D_SUPPORTED) | (P.XLEN==64 & P.Q_SUPPORTED)) + assign IntSrcE = ZfaE ? {ForwardedSrcBE, ForwardedSrcAE} : PreIntSrcE; // choose pair of integer registers for fmvp.d.x / fmvp.q.x + else assign IntSrcE = PreIntSrcE; // select a result that may be written to the FP register - mux4 #(P.FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + mux4 #(P.FLEN) FResMux(SgnResE, IntSrcE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); - // select the result that may be written to the integer register with fmv.x.* + // fmv.x.*: select the result that may be written to the integer register if(P.FPSIZES == 1) begin assign mvsgn = XE[P.FLEN-1]; assign SgnExtXE = XE; diff --git a/testbench/tests.vh b/testbench/tests.vh index 43cbc91a0..0c60228dd 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -2004,6 +2004,8 @@ string arch64zbs[] = '{ "rv32i_m/F_Zfa/src/fli.s-01.S", "rv32i_m/F_Zfa/src/fltq_b1-01.S", "rv32i_m/F_Zfa/src/fltq_b19-01.S", + "rv32i_m/D_Zfa/src/fltq_b1-01.S", // these D tests are more comprehensive and seem they should replace the F tests. Applies to all F tests duplicated in D + "rv32i_m/D_Zfa/src/fltq_b19-01.S", "rv32i_m/F_Zfa/src/fminm_b1-01.S", "rv32i_m/F_Zfa/src/fminm_b19-01.S", "rv32i_m/F_Zfa/src/fmaxm_b1-01.S", @@ -2015,13 +2017,21 @@ string arch64zbs[] = '{ `RISCVARCHTEST, "rv32i_m/D_Zfa/src/fleq_b1-01.S", "rv32i_m/D_Zfa/src/fleq_b19-01.S", + "rv32i_m/D_Zfa/src/fleq.d_b1-01.S", + "rv32i_m/D_Zfa/src/fleq.d_b19-01.S", "rv32i_m/D_Zfa/src/fli.d-01.S", "rv32i_m/D_Zfa/src/fltq_b1-01.S", "rv32i_m/D_Zfa/src/fltq_b19-01.S", + "rv32i_m/D_Zfa/src/fltq.d_b1-01.S", + "rv32i_m/D_Zfa/src/fltq.d_b19-01.S", "rv32i_m/D_Zfa/src/fminm_b1-01.S", "rv32i_m/D_Zfa/src/fminm_b19-01.S", + "rv32i_m/D_Zfa/src/fminm.d_b1-01.S", + "rv32i_m/D_Zfa/src/fminm.d_b19-01.S", "rv32i_m/D_Zfa/src/fmaxm_b1-01.S", - "rv32i_m/D_Zfa/src/fmaxm_b19-01.S" + "rv32i_m/D_Zfa/src/fmaxm_b19-01.S", + "rv32i_m/D_Zfa/src/fmaxm.d_b1-01.S", + "rv32i_m/D_Zfa/src/fmaxm.d_b19-01.S" /* "rv32i_m/D_Zfa/src/fround_b1-01.S" */ }; From 4cfc86140c5c31740d5fd2a1272c59ebe5d36a53 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Jan 2024 06:18:00 -0800 Subject: [PATCH 06/20] Zfa fmvh complete and passing tests: --- src/fpu/fctrl.sv | 4 ++-- src/fpu/fpu.sv | 3 ++- testbench/tests.vh | 9 ++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 9f60a692f..8dae34f89 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -148,10 +148,10 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( else if (Funct3D == 3'b000 & Rs2D == 5'b00000) ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001) - ControlsD = '0; // fmvh.x.d (Zfa) *** needs values for all moves + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.d (Zfa) // coverage off Q not supported in RV64GC else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001) - ControlsD = '0; // fmvh.x.q (Zfa) + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.q (Zfa) // coverage on 7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000) ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0; // fmv.w/d/h/q.x int to fp reg diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 430750c71..85ea9dba6 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -310,7 +310,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // sign extend to XLEN if necessary if (P.FLEN>P.XLEN) - assign IntSrcXE = SgnExtXE[P.XLEN-1:0]; + if (P.ZFA_SUPPORTED) assign IntSrcXE = ZfaE ? XE[P.FLEN-1:P.FLEN/2] : SgnExtXE[P.XLEN-1:0]; // either fmvh.x.* or fmv.x.* + else assign IntSrcXE = SgnExtXE[P.XLEN-1:0]; else assign IntSrcXE = {{P.XLEN-P.FLEN{mvsgn}}, SgnExtXE}; mux3 #(P.XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); diff --git a/testbench/tests.vh b/testbench/tests.vh index 0c60228dd..7d2d320a8 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -2031,7 +2031,14 @@ string arch64zbs[] = '{ "rv32i_m/D_Zfa/src/fmaxm_b1-01.S", "rv32i_m/D_Zfa/src/fmaxm_b19-01.S", "rv32i_m/D_Zfa/src/fmaxm.d_b1-01.S", - "rv32i_m/D_Zfa/src/fmaxm.d_b19-01.S" + "rv32i_m/D_Zfa/src/fmaxm.d_b19-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b1-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b22-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b23-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b24-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b27-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b28-01.S", + "rv32i_m/D_Zfa/src/fmvh.x.d_b29-01.S" /* "rv32i_m/D_Zfa/src/fround_b1-01.S" */ }; From 74b242ce5ca85b4f19587094ad4739c85372ed0a Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 17 Jan 2024 12:25:06 -0800 Subject: [PATCH 07/20] Partial implementation of fcvtmod.w.d; flags disagree in one case where Sail might be wrong, and result 134 is wrong because of overflow --- src/fpu/fctrl.sv | 15 ++++++++------- src/fpu/fpu.sv | 6 +++--- src/fpu/postproc/postprocess.sv | 5 +++-- src/fpu/postproc/specialcase.sv | 30 ++++++++++++++++++++++++++---- testbench/common/instrNameDecTB.sv | 12 ++++++++++++ testbench/tests.vh | 7 +++++++ 6 files changed, 59 insertions(+), 16 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 8dae34f89..6d5a91aa6 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -54,7 +54,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage output logic FPUActiveE, // FP instruction being executed - output logic ZfaE, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod) + output logic ZfaE, ZfaM, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod) // register control signals output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable output logic FWriteIntE, FWriteIntM, // Write to integer register @@ -149,7 +149,8 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001) ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.d (Zfa) - // coverage off Q not supported in RV64GC + // Q not supported in RV64GC + // coverage off else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001) ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.q (Zfa) // coverage on @@ -238,11 +239,11 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( endcase // coverage on 7'b1011001: if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct3D == 3'b000) - ControlsD = '0; // fmvp.d.x (Zfa) + ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.d.x (Zfa) *** untested, controls could be wrong // Not covered in testing because rv64gc does not support quad precision // coverage off 7'b1011011: if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct3D == 3'b000) - ControlsD = '0; // fmvp.q.x (Zfa) + ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.q.x (Zfa) // coverage on endcase endcase @@ -362,9 +363,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( else assign IDivStartE = 0; // E/M pipleine register - flopenrc #(13+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE}, - {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM}); + flopenrc #(14+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE}, + {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM, ZfaM}); // renameing for readability assign FpLoadStoreM = FResSelM[1]; diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 85ea9dba6..c304219aa 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -83,7 +83,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation logic FRegWriteE; // Write floating-point register logic FPUActiveE; // FP instruction being executed - logic ZfaE; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d) + logic ZfaE, ZfaM; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d) // regfile signals logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -171,7 +171,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .IntDivE, .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .FrmM, .FmtE, .FmtM, + .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .ZfaM, .FrmM, .FmtE, .FmtM, .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM, .IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW, @@ -348,7 +348,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), - .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), + .ToInt(FWriteIntM), .Zfa(ZfaM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index c2de8644e..516752a78 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -56,6 +56,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic CvtResSubnormUf, // the convert result is subnormal or underflows input logic [P.LOGCVTLEN-1:0] CvtShiftAmt, // how much to shift by input logic ToInt, // is fp->int (since it's writting to the integer register) + input logic Zfa, // Zfa operation (fcvtmod.w.d) input logic [P.CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) input logic IntZero, // is the integer input zero // final results @@ -216,9 +217,9 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( negateintres #(P) negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); - specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + specialcase #(P) specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, - .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, + .NaNIn, .IntToFp, .Int64, .Signed, .Zfa, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes); endmodule diff --git a/src/fpu/postproc/specialcase.sv b/src/fpu/postproc/specialcase.sv index 677ccce16..76784e4a1 100644 --- a/src/fpu/postproc/specialcase.sv +++ b/src/fpu/postproc/specialcase.sv @@ -53,6 +53,7 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( input logic IntToFp, // is cvt int -> fp opperation input logic Int64, // is the integer 64 bits input logic Signed, // is the integer signed + input logic Zfa, // Zfa conversion operation: fcvtmod.w.d input logic [P.NE:0] CvtCe, // the calculated expoent for cvt input logic IntInvalid, // integer invalid flag to choose the result input logic CvtResUf, // does the convert result underflow @@ -70,10 +71,12 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( logic [P.FLEN-1:0] OfRes; // overflowed result result logic [P.FLEN-1:0] NormRes; // normal result logic [P.XLEN-1:0] OfIntRes; // the overflow result for integer output + logic [P.XLEN-1:0] OfIntRes2; // the overflow result for integer output after accounting for fcvtmod.w.d + logic [P.XLEN-1:0] Int64Res; // Result for conversion to 64-bit int after accounting for fcvtmod.w.d logic OfResMax; // does the of result output maximum norm fp number logic KillRes; // kill the result for underflow - logic SelOfRes; // should the overflow result be selected - + logic SelOfRes; // should the overflow result be selected (excluding convert) + logic SelCvtOfRes; // select overflow result for convert instruction // does the overflow result output the maximum normalized floating point number // output infinity if the input is infinity @@ -329,6 +332,25 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( else OfIntRes = {P.XLEN{1'b1}}; // unsigned positive end + // fcvtmod.w.d logic + // fcvtmod.w.d is like fcvt.w.d excep thtat it takes bits [31:0] and sign extends the rest, + // and converts +/-inf and NaN to zero. + + if (P.ZFA_SUPPORTED & P.D_SUPPORTED) // fcvtmod.w.d support + always_comb begin + if (Zfa) OfIntRes2 = '0; + else OfIntRes2 = OfIntRes; + if (Zfa) Int64Res = {{(P.XLEN-32){CvtNegRes[P.XLEN-1]}}, CvtNegRes[31:0]}; + else Int64Res = CvtNegRes[P.XLEN-1:0]; + if (Zfa) SelCvtOfRes = InfIn | NaNIn; // fcvtmod.w.d only overflows to 0 on NaN or Infinity + else SelCvtOfRes = IntInvalid; // regular fcvt gives an overflow if out of range + end + else + always_comb begin // no fcvtmod.w.d support + OfIntRes2 = OfIntRes; + Int64Res = CvtNegRes[P.XLEN-1:0]; + SelCvtOfRes = IntInvalid; + end // select the integer output // - if the input is invalid (out of bounds NaN or Inf) then output overflow res @@ -337,10 +359,10 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( // - otherwise output a rounded 0 // - otherwise output the normal res (trmined and sign extended if nessisary) always_comb - if(IntInvalid) FCvtIntRes = OfIntRes; + if(SelCvtOfRes) FCvtIntRes = OfIntRes2; else if(CvtCe[P.NE]) if(Xs&Signed&Plus1) FCvtIntRes = {{P.XLEN{1'b1}}}; else FCvtIntRes = {{P.XLEN-1{1'b0}}, Plus1}; - else if(Int64) FCvtIntRes = CvtNegRes[P.XLEN-1:0]; + else if(Int64) FCvtIntRes = Int64Res; else FCvtIntRes = {{P.XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; endmodule diff --git a/testbench/common/instrNameDecTB.sv b/testbench/common/instrNameDecTB.sv index a3b5ef58e..ee6cd6900 100644 --- a/testbench/common/instrNameDecTB.sv +++ b/testbench/common/instrNameDecTB.sv @@ -298,6 +298,18 @@ module instrNameDecTB( else if (funct7[6:2] == 5'b11100 & funct3 == 3'b001) name = "FCLASS"; else if (funct7[6:2] == 5'b00100 & funct3 == 3'b010) name = "FSGNJX"; else if (funct7[6:2] == 5'b10100 & funct3 == 3'b010) name = "FEQ"; + else if (funct7[6:2] == 5'b11110 & funct3 == 3'b000 & rs2 == 5'b00001) name = "FLI"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b010) name = "FMINM"; + else if (funct7[6:2] == 5'b00101 & funct3 == 3'b011) name = "FMAXM"; + else if (funct7[6:2] == 5'b01000 & rs2 == 5'b00100) name = "FROUND"; + else if (funct7[6:2] == 5'b01000 & rs2 == 5'b00101) name = "FROUNDNX"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b100) name = "FLEQ"; + else if (funct7[6:2] == 5'b10100 & funct3 == 3'b101) name = "FLTQ"; + else if (funct7 == 7'b1110001 & funct3 == 3'b000 & rs2 == 5'b00001) name = "FMVH.X.D"; + else if (funct7 == 7'b1110011 & funct3 == 3'b000 & rs2 == 5'b00001) name = "FMVH.X.Q"; + else if (funct7 == 7'b1011001 & funct3 == 3'b000) name = "FMVP.D.X"; + else if (funct7 == 7'b1011011 & funct3 == 3'b000) name = "FMVP.Q.X"; + else if (funct7 == 7'b1100001 & funct3 == 3'b001 & rs2 == 5'b01000) name = "FCVTMOD.W.D"; else name = "ILLEGAL"; 10'b0000111_010: name = "FLW"; 10'b0100111_010: name = "FSW"; diff --git a/testbench/tests.vh b/testbench/tests.vh index 7d2d320a8..fecf4ebc9 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -2015,6 +2015,13 @@ string arch64zbs[] = '{ string arch32zfad[] = '{ `RISCVARCHTEST, + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b24-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b27-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b28-01.S", + "rv32i_m/D_Zfa/src/fcvtmod.w.d_b29-01.S", "rv32i_m/D_Zfa/src/fleq_b1-01.S", "rv32i_m/D_Zfa/src/fleq_b19-01.S", "rv32i_m/D_Zfa/src/fleq.d_b1-01.S", From 8b60992e72c629add0b2d090b83ecf4711b5a3ab Mon Sep 17 00:00:00 2001 From: naichewa Date: Wed, 17 Jan 2024 14:38:11 -0800 Subject: [PATCH 08/20] fixed SPI tests failing when no icache --- .../riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S | 1 + .../riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S index b9c82c92d..3d0abc6a0 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-spi-01.S @@ -607,6 +607,7 @@ SETUP_PLIC .4byte delay1, 0x0000001, write32_test # reset delay1 register .4byte cs_mode, 0x00000000, write32_test # reset cs_mode .4byte tx_mark, 0x00000001, write32_test # set transmit watermark to 1 (any entry turns mark off) +.4byte sck_div, 0x00000100, write32_test # lower SPI clock rate so read32_tests trigger at correct times #.4byte ie, 0x00000000, write32_test # enable transmit interrupt .4byte ip, 0x00000001, read32_test # tx watermark interupt should be pending .4byte 0x0, 0x00000000, readmip_test diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S index 266b0e74f..11aebe333 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-spi-01.S @@ -608,6 +608,7 @@ SETUP_PLIC .8byte delay1, 0x0000001, write32_test # reset delay1 register .8byte cs_mode, 0x00000000, write32_test # reset cs_mode +.8byte sck_div, 0x00000100, write32_test # lower SPI clock rate so reads are done at correct time when ICACHE not supported .8byte tx_mark, 0x00000001, write32_test # set transmit watermark to 1 (any entry turns mark off) #.8byte ie, 0x00000000, write32_test # enable transmit interrupt .8byte ip, 0x00000001, read32_test # tx watermark interupt should be pending From 911b400af2cfd4ce47e6d48a748d7983684ba184 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 13:13:56 -0800 Subject: [PATCH 09/20] Fault on misaligned AMO --- src/mmu/mmu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index c26ee2a44..dffa7be76 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -140,7 +140,7 @@ module mmu import cvw::*; #(parameter cvw_t P, 2'b11: DataMisalignedM = |VAdr[2:0]; // ld, sd, fld, fsd endcase assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); - assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); + assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM & (~(P.ZICCLSM_SUPPORTED & Cacheable) | ReadAccessM); // Misaligned AMO faults even if ZICCLSM supported // Specify which type of page fault is occurring assign InstrPageFaultF = TLBPageFault & ExecuteAccessF; From 12b2baff827707f6934e71b6f1623f423fe08e1d Mon Sep 17 00:00:00 2001 From: Jordan Carlin Date: Thu, 18 Jan 2024 17:33:59 -0800 Subject: [PATCH 10/20] add coverage of sfence.inval.ir instruction and fix sret coverage --- tests/coverage/priv.S | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index aa9c8b50b..dcf56e14a 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -297,6 +297,16 @@ sretdone: wfi + + + # Test uncovered privdec instructions + # exercise sfence.inval.ir instruction + .word 0x18100073 + + # exercise sret with rs1 not 0 + .word 0x102F8073 + + j done From 82d9467eeaf10135548b491e4224b5d5e0b638ec Mon Sep 17 00:00:00 2001 From: Jordan Carlin Date: Thu, 18 Jan 2024 19:29:16 -0800 Subject: [PATCH 11/20] Add coverage of FIOM in different privelege modes --- tests/coverage/csrwrites.S | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/coverage/csrwrites.S b/tests/coverage/csrwrites.S index 63ee00c38..ce5639bd7 100644 --- a/tests/coverage/csrwrites.S +++ b/tests/coverage/csrwrites.S @@ -37,4 +37,31 @@ main: csrrw t1, menvcfg, t0 csrrw t2, senvcfg, t0 + # testing FIOM with different privelege modes + # setting environment config (to both 1 and 0) in each privelege mode + csrsi menvcfg, 1 + li a0, 1 + ecall # enter supervisor mode + + li a0, 0 + ecall # enter user mode + + li a0, 1 + ecall # enter supervisor mode + + csrsi senvcfg, 1 + li a0, 0 + ecall # enter user mode + + li a0, 3 + ecall # enter machine mode + csrci menvcfg, 1 + + li a0, 1 + ecall # enter supervisor mode + + li a0, 0 + ecall # enter user mode + + j done From f06f681dbd9491fb876261e8d2cd96382ae6628f Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 21:30:39 -0800 Subject: [PATCH 12/20] CoreMark displays StoreStalls --- benchmarks/coremark/Makefile | 22 ++----------------- .../coremark/riscv64-baremetal/syscalls.c | 1 + 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index db8a6e1d6..6e466291e 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -11,8 +11,8 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc_zbs -#ARCH := rv$(XLEN)gc_zba_zbb_zbc_zbs +ARCH := rv$(XLEN)gc_zba_zbb_zbc +#ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc #ARCH := rv$(XLEN)gc #ARCH := rv$(XLEN)imc_zicsr #ARCH := rv$(XLEN)im_zicsr @@ -25,24 +25,6 @@ PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN) -# Black Parrott -#PORT_CFLAGS = -O2 -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 \ - -DITERATIONS=10 -DPERFORMANCE_RUN=1 -#OPTIMIZE := -O2 -fno-common -funroll-loops -finline-functions --param max-inline-insns-auto=20 -falign-functions=4 -falign-jumps=4 -falign-loops=4 -#override CFLAGS += $(OPTIMIZE) -DFLAGS_STR=\""$(OPTIMIZE)"\" -#override CFLAGS += -DITERATIONS=10 -DPERFORMANCE_RUN=1 - -# try adding the new fields from muntjac coremark build -#PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ - -fno-common -flto -funswitch-loops -mcmodel=medany \ - -falign-functions=4 -falign-jumps=4 -falign-loops=4 \ - -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -finline-functions --param max-inline-insns-auto=20 -falign-jumps=4 \ - -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ - -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ - -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN) - - all: $(work_dir)/coremark.bare.riscv.elf.memfile run: diff --git a/benchmarks/coremark/riscv64-baremetal/syscalls.c b/benchmarks/coremark/riscv64-baremetal/syscalls.c index 29cd5f24a..25c47b797 100644 --- a/benchmarks/coremark/riscv64-baremetal/syscalls.c +++ b/benchmarks/coremark/riscv64-baremetal/syscalls.c @@ -177,6 +177,7 @@ void _init(int cid, int nc) counters[17] = read_csr(mhpmcounter17) - counters[17]; ee_printf("Load Stalls %d\n", counters[11]); + ee_printf("Store Stalls %d\n", counters[12]); ee_printf("D-Cache Accesses %d\n", counters[13]); ee_printf("D-Cache Misses %d\n", counters[14]); ee_printf("I-Cache Accesses %d\n", counters[16]); From eb8ab3fae252211d1e600c9a641a04f1e71ab464 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 21:30:59 -0800 Subject: [PATCH 13/20] EBU coverage exclusion --- sim/coverage-exclusions-rv64gc.do | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 76e18e30e..b9c20eead 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -253,3 +253,10 @@ coverage exclude -srcfile priorityonehot.sv # Excluding pmpadrdecs[0] coverage case for PAgePMPAdrIn being hardwired to 1 coverage exclude -scope /dut/core/ifu/immu/immu/pmp/pmpchecker/pmp/pmpadrdecs[0] -linerange [GetLineNum ../src/mmu/pmpadrdec.sv "exclusion-tag: PAgePMPAdrIn"] -item e 1 -fecexprrow 1 coverage exclude -scope /dut/core/lsu/dmmu/dmmu/pmp/pmpchecker/pmp/pmpadrdecs[0] -linerange [GetLineNum ../src/mmu/pmpadrdec.sv "exclusion-tag: PAgePMPAdrIn"] -item e 1 -fecexprrow 1 + +#################### +# EBU +#################### + +# Exclude EBU Beat Counter because it is only idle when bus has multicycle latency, but rv64gc has single cycle latency +coverage exclude -scope /core/ebu/ebu/ebufsmarb/BeatCounter From 17c9be7695b6e7a65541c7e727c63d202c40bf65 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 21:36:52 -0800 Subject: [PATCH 14/20] Cleanup typos, remove Zicond from riscof until it is working --- config/buildroot/config.vh | 12 ++++++------ src/fpu/postproc/flags.sv | 2 +- src/fpu/postproc/postprocess.sv | 2 +- testbench/testbench.sv | 4 ++-- tests/riscof/spike/riscof_spike.py | 4 ++++ tests/riscof/spike/spike_rv32gc_isa.yaml | 3 ++- tests/riscof/spike/spike_rv64gc_isa.yaml | 3 ++- 7 files changed, 18 insertions(+), 12 deletions(-) diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index d36fcf6e3..de6e4800d 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -40,7 +40,7 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; localparam COUNTERS = 12'd32; -localparam ZFH_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; localparam ZFA_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; @@ -57,7 +57,7 @@ localparam BUS_SUPPORTED = 1; localparam DCACHE_SUPPORTED = 1; localparam ICACHE_SUPPORTED = 1; localparam VIRTMEM_SUPPORTED = 1; -localparam VECTORED_INTERRUPTS_SUPPORTED = 1 ; +localparam VECTORED_INTERRUPTS_SUPPORTED = 1; localparam BIGENDIAN_SUPPORTED = 1; // TLB configuration. Entries should be a power of 2 @@ -163,10 +163,10 @@ localparam RADIX = 32'h4; localparam DIVCOPIES = 32'h4; // bit manipulation -localparam ZBA_SUPPORTED = 0; -localparam ZBB_SUPPORTED = 0; -localparam ZBC_SUPPORTED = 0; -localparam ZBS_SUPPORTED = 0; +localparam ZBA_SUPPORTED = 1; +localparam ZBB_SUPPORTED = 1; +localparam ZBC_SUPPORTED = 1; +localparam ZBS_SUPPORTED = 1; // New compressed instructions localparam ZCB_SUPPORTED = 1; diff --git a/src/fpu/postproc/flags.sv b/src/fpu/postproc/flags.sv index 98ed0a34d..50d9bf229 100644 --- a/src/fpu/postproc/flags.sv +++ b/src/fpu/postproc/flags.sv @@ -70,7 +70,7 @@ module flags import cvw::*; #(parameter cvw_t P) ( logic DivInvalid; // integer invalid flag logic Underflow; // Underflow flag logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent - logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible roundning "shift") + logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible rounding "shift") /////////////////////////////////////////////////////////////////////////////// // Overflow diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index 516752a78..1d51fdf85 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -89,7 +89,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( logic [P.NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection logic [$clog2(3*P.NF+5)-1:0] FmaShiftAmt; // normalization shift amount for fma - // division singals + // division signals logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 87b603288..b20c6a993 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -632,8 +632,8 @@ task automatic updateProgramAddrLabelArray; end end - if(ProgramAddrLabelArray["begin_signature"] == 0) $display("Couldn't find begin_signature in %s", ProgramLabelMapFile); - if(ProgramAddrLabelArray["sig_end_canary"] == 0) $display("Couldn't find sig_end_canary in %s", ProgramLabelMapFile); +// if(ProgramAddrLabelArray["begin_signature"] == 0) $display("Couldn't find begin_signature in %s", ProgramLabelMapFile); +// if(ProgramAddrLabelArray["sig_end_canary"] == 0) $display("Couldn't find sig_end_canary in %s", ProgramLabelMapFile); $fclose(ProgramLabelMapFP); $fclose(ProgramAddrMapFP); diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py index 61b556932..5450f64df 100644 --- a/tests/riscof/spike/riscof_spike.py +++ b/tests/riscof/spike/riscof_spike.py @@ -115,6 +115,10 @@ class spike(pluginTemplate): self.isa += '_Zicond' if "Zicboz" in ispec["ISA"]: self.isa += '_Zicboz' + if "Zfa" in ispec["ISA"]: + self.isa += '_Zfa' + if "Zfh" in ispec["ISA"]: + self.isa += '_Zfh' if "Zca" in ispec["ISA"]: self.isa += '_Zca' if "Zcb" in ispec["ISA"]: diff --git a/tests/riscof/spike/spike_rv32gc_isa.yaml b/tests/riscof/spike/spike_rv32gc_isa.yaml index ae314fa76..7d97edb6a 100644 --- a/tests/riscof/spike/spike_rv32gc_isa.yaml +++ b/tests/riscof/spike/spike_rv32gc_isa.yaml @@ -1,6 +1,7 @@ hart_ids: [0] hart0: - ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zba_Zbb_Zbc_Zbs + ISA: RV32IMAFDCZicsr_Zifencei_Zba_Zbb_Zbc_Zbs +# ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # ISA: RV32IMAFDCZicsr_Zicboz_Zifencei_Zca_Zba_Zbb_Zbc_Zbs # _Zbkb_Zcb physical_addr_sz: 32 User_Spec_Version: '2.3' diff --git a/tests/riscof/spike/spike_rv64gc_isa.yaml b/tests/riscof/spike/spike_rv64gc_isa.yaml index df5e7cb2b..471fbbb13 100644 --- a/tests/riscof/spike/spike_rv64gc_isa.yaml +++ b/tests/riscof/spike/spike_rv64gc_isa.yaml @@ -2,7 +2,8 @@ hart_ids: [0] hart0: # ISA: RV64IMAFDCSUZicsr_Zicboz_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb # ISA: RV64IMAFDCSUZicsr_Zifencei_Zca_Zcb_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb - ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb +# ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb + ISA: RV64IMAFDCSUZicsr_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb physical_addr_sz: 56 User_Spec_Version: '2.3' supported_xlen: [64] From 9614913e8f54285096d2c162b2cc9e130e914e9a Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 22:10:20 -0800 Subject: [PATCH 15/20] Changed CoreMark maiefile to rv64im --- benchmarks/coremark/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 6e466291e..a73dc6eea 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -11,8 +11,8 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -ARCH := rv$(XLEN)gc_zba_zbb_zbc -#ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc +#ARCH := rv$(XLEN)gc_zba_zbb_zbc +ARCH := rv$(XLEN)im_zicsr_zba_zbb_zbc #ARCH := rv$(XLEN)gc #ARCH := rv$(XLEN)imc_zicsr #ARCH := rv$(XLEN)im_zicsr From 9260d3c424092d3cd660fb5f3055f600b6ee0f6e Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 18 Jan 2024 22:46:07 -0800 Subject: [PATCH 16/20] Add Zfh support to imperas.ic, use Zicond in riscof now that it is fixed in riscv-arch-test --- config/rv32gc/config.vh | 4 ++-- sim/imperas.ic | 1 + tests/riscof/spike/spike_rv32gc_isa.yaml | 3 +-- tests/riscof/spike/spike_rv64gc_isa.yaml | 3 +-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index a59bb1ab3..4baef0075 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -41,8 +41,8 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; -localparam ZFH_SUPPORTED = 0; -localparam ZFA_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; +localparam ZFA_SUPPORTED = 1; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; diff --git a/sim/imperas.ic b/sim/imperas.ic index f3c620b96..5de5935c6 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -20,6 +20,7 @@ # More extensions --override cpu/Zcb=T --override cpu/Zicond=T +--override cpu/Zfh=T # Cache block operations --override cpu/Zicbom=T diff --git a/tests/riscof/spike/spike_rv32gc_isa.yaml b/tests/riscof/spike/spike_rv32gc_isa.yaml index 7d97edb6a..c2c95fbf4 100644 --- a/tests/riscof/spike/spike_rv32gc_isa.yaml +++ b/tests/riscof/spike/spike_rv32gc_isa.yaml @@ -1,7 +1,6 @@ hart_ids: [0] hart0: - ISA: RV32IMAFDCZicsr_Zifencei_Zba_Zbb_Zbc_Zbs -# ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs + ISA: RV32IMAFDCZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # ISA: RV32IMAFDCZicsr_Zicboz_Zifencei_Zca_Zba_Zbb_Zbc_Zbs # _Zbkb_Zcb physical_addr_sz: 32 User_Spec_Version: '2.3' diff --git a/tests/riscof/spike/spike_rv64gc_isa.yaml b/tests/riscof/spike/spike_rv64gc_isa.yaml index 471fbbb13..4374ad07c 100644 --- a/tests/riscof/spike/spike_rv64gc_isa.yaml +++ b/tests/riscof/spike/spike_rv64gc_isa.yaml @@ -2,8 +2,7 @@ hart_ids: [0] hart0: # ISA: RV64IMAFDCSUZicsr_Zicboz_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb # ISA: RV64IMAFDCSUZicsr_Zifencei_Zca_Zcb_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb -# ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb - ISA: RV64IMAFDCSUZicsr_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb + ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb physical_addr_sz: 56 User_Spec_Version: '2.3' supported_xlen: [64] From 324180244178e85510cbecd953cfe181c20a9893 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 21 Jan 2024 08:25:17 -0800 Subject: [PATCH 17/20] fixed bug in CORRSHIFTSZ param --- config/shared/config-shared.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 86f9a0a9e..ba215785c 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -111,7 +111,7 @@ localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($uns localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6))); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); -localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4))); +localparam CORRSHIFTSZ = (((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4))); // max(DIVMINb+NF+1, 3*NF+4) // Disable spurious Verilator warnings From 1459943a7537ff184ab00d27a476129d2c1d2c91 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Sun, 21 Jan 2024 10:08:48 -0800 Subject: [PATCH 18/20] more shiftcorrection bug fixes --- config/shared/config-shared.vh | 2 +- src/fpu/postproc/shiftcorrection.sv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index ba215785c..5dfb4b1ba 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -111,7 +111,7 @@ localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($uns localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6))); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); -localparam CORRSHIFTSZ = (((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4))); // max(DIVMINb+NF+1, 3*NF+4) +localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Disable spurious Verilator warnings diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index f5860b42d..1da3556d8 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -44,7 +44,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( output logic [P.NE+1:0] Ue // corrected exponent for divider ); - logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction + logic [P.CORRSHIFTSZ-1:0] CorrSumShifted; // the shifted sum after LZA correction logic [P.CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted logic [P.CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift logic ResSubnorm; // is the result Subnormal @@ -68,7 +68,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits always_comb - if(FmaOp) Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}}; + if(FmaOp) Mf = {CorrSumShifted}; else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; else Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ]; From 4936496bb9852f08db33fe3904a7d258be73d0f7 Mon Sep 17 00:00:00 2001 From: Jordan Carlin Date: Mon, 22 Jan 2024 08:58:31 -0800 Subject: [PATCH 19/20] fix sfence.inval.ir and sret coverage from previous PR --- tests/coverage/priv.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index dcf56e14a..6b5260259 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -300,6 +300,8 @@ sretdone: # Test uncovered privdec instructions + li a0, 3 + ecall # exercise sfence.inval.ir instruction .word 0x18100073 From 0c13e14bbf922db0306d9b72285620c565554a34 Mon Sep 17 00:00:00 2001 From: Jordan Carlin Date: Mon, 22 Jan 2024 09:52:58 -0800 Subject: [PATCH 20/20] coverage improvements for mret when mpp = 3; update imperas config --- sim/imperas.ic | 2 ++ tests/coverage/priv.S | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/sim/imperas.ic b/sim/imperas.ic index 5de5935c6..b35166429 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -37,6 +37,8 @@ # SV39 and SV48 supported --override cpu/Sv_modes=768 +--override cpu/Svinval=T + # clarify #--override refRoot/cpu/mtvec_sext=F diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index 6b5260259..39b3a8aeb 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -309,7 +309,20 @@ sretdone: .word 0x102F8073 - j done + # cover mret when mpp = 3 and mprv = 1 + li a0, 3 + ecall # enter machine mode + bseti t0, zero, 17 + csrs mstatus, t0 # set MPRV + li t1, 0x00001800 + csrs mstatus, t1 # set MPP=3 + la t1, finished + csrr t0, mepc + csrw mepc, t1 # set mepc for mret to jump to + mret + + +finished: j done