From 5cfd0577d1db1643514055d71fd4ceb34d4c4c0c Mon Sep 17 00:00:00 2001 From: Miles Cook Date: Mon, 17 Apr 2023 18:35:03 -0700 Subject: [PATCH 01/35] Increase of TLB coverage in IFU --- tests/coverage/ifuCamlineWrite.S | 146 +++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 tests/coverage/ifuCamlineWrite.S diff --git a/tests/coverage/ifuCamlineWrite.S b/tests/coverage/ifuCamlineWrite.S new file mode 100644 index 00000000..4c11bf18 --- /dev/null +++ b/tests/coverage/ifuCamlineWrite.S @@ -0,0 +1,146 @@ +/////////////////////////////////////////// +// ifuCamlineWrite.S +// +// Written: Miles Cook and Kevin Box 4/17 +// +// Acknowledgements: The pagetable and outline for this test was written by Manuel Mendoza +// and Noah Limpert. +// +// Purpose: Test coverage for TLBCamlines in IFU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080010 + csrw satp, t5 + + # switch to supervisor mode + li a0, 1 + ecall + + li t0, 0x80015000 # base addr + + li t2, 0 # i = 0 + li t3, 33 # Max amount of Loops = 32 + +loop: bge t2, t3, finished # exit loop if i >= loops + li t4, 0x1000 + li t1, 0x00008067 # load in jalr + sw t1, 0 (t0) + fence.I + jalr t0 + add t0, t0, t4 + addi t2, t2, 1 + j loop + +finished: + j done + +.data + +.align 16 +# Page table situated at 0x80010000 +pagetable: + .8byte 0x200044C1 // old page table was 200040 which just pointed to itself! wrong + +.align 12 + .8byte 0x0000000000000000 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + + +.align 12 + .8byte 0x0000000020004CC1 + //.8byte 0x00000200800CF// ADD IN THE MEGAPAGE should 3 nibbles of zeros be removed? + +.align 12 + #80000000 + .8byte 0x200000CF + .8byte 0x200004CF + .8byte 0x200008CF + .8byte 0x20000CCF + + .8byte 0x200010CF + .8byte 0x200014CF + .8byte 0x200018CF + .8byte 0x20001CCF + + .8byte 0x200020CF + .8byte 0x200024CF + .8byte 0x200028CF + .8byte 0x20002CCF + + .8byte 0x200030CF + .8byte 0x200034CF + .8byte 0x200038CF + .8byte 0x20003CCF + + .8byte 0x200040CF + .8byte 0x200044CF + .8byte 0x200048CF + .8byte 0x20004CCF + + .8byte 0x200050CF + .8byte 0x200054CF + .8byte 0x200058CF + .8byte 0x20005CCF + + .8byte 0x200060CF + .8byte 0x200064CF + .8byte 0x200068CF + .8byte 0x20006CCF + + .8byte 0x200070CF + .8byte 0x200074CF + .8byte 0x200078CF + .8byte 0x20007CCF + + .8byte 0x200080CF + .8byte 0x200084CF + .8byte 0x200088CF + .8byte 0x20008CCF + + .8byte 0x200090CF + .8byte 0x200094CF + .8byte 0x200098CF + .8byte 0x20009CCF + + .8byte 0x200100CF + .8byte 0x200104CF + .8byte 0x200108CF + .8byte 0x20010CCF + + .8byte 0x200110CF + .8byte 0x200114CF + .8byte 0x200118CF + .8byte 0x20011CCF + + .8byte 0x200120CF + .8byte 0x200124CF + .8byte 0x200128CF + .8byte 0x20012CCF + + .8byte 0x200130CF + .8byte 0x200134CF From 054c8d638c9a53ccb99ba6705b9fcdd4669a1f65 Mon Sep 17 00:00:00 2001 From: Cedar Turek Date: Tue, 18 Apr 2023 15:14:17 -0700 Subject: [PATCH 02/35] moved D flop to preproc --- src/fpu/fdivsqrt/fdivsqrt.sv | 9 ++++----- src/fpu/fdivsqrt/fdivsqrtiter.sv | 6 +----- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 7 +++++-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 92f64cbd..e8708c6c 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -57,7 +57,7 @@ module fdivsqrt( logic [`DIVb+3:0] WS, WC; // Partial remainder components logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) - logic [`DIVb-1:0] DPreproc, D; // Iterator Divisor + logic [`DIVb-1:0] D; // Iterator Divisor logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection @@ -75,8 +75,7 @@ module fdivsqrt( fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E, - .QeM, .X, .DPreproc, + .Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E, .QeM, .X, .D, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .nE, .BZeroM, .nM, .mM, .AM, @@ -90,8 +89,8 @@ module fdivsqrt( .IDivStartE, .ISpecialCaseE, .nE, .IntDivE); fdivsqrtiter fdivsqrtiter( // CSA Iterator - .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .DPreproc, - .D, .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); + .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, + .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index ec15423e..f3048c8b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -34,8 +34,7 @@ module fdivsqrtiter( input logic FDivBusyE, input logic SqrtE, input logic [`DIVb+3:0] X, - input logic [`DIVb-1:0] DPreproc, - output logic [`DIVb-1:0] D, + input logic [`DIVb-1:0] D, output logic [`DIVb:0] FirstU, FirstUM, output logic [`DIVb+1:0] FirstC, output logic Firstun, @@ -95,9 +94,6 @@ module fdivsqrtiter( mux2 #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC); flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]); - // Divisior register - flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D); - // Divisor Selections // - choose the negitive version of what's being selected // - D is a 0.b mantissa diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index cf8a055e..4af1d786 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -39,7 +39,7 @@ module fdivsqrtpreproc ( input logic [2:0] Funct3E, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, - output logic [`DIVb-1:0] DPreproc, + output logic [`DIVb-1:0] D, // Int-specific input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, @@ -50,7 +50,7 @@ module fdivsqrtpreproc ( output logic [`XLEN-1:0] AM ); - logic [`DIVb-1:0] XPreproc; + logic [`DIVb-1:0] XPreproc, DPreproc; logic [`DIVb:0] PreSqrtX; logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) @@ -173,5 +173,8 @@ module fdivsqrtpreproc ( // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + + // Divisior register + flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D); endmodule From 871d495ca1fba51574b604c82ec9dd3e2881586f Mon Sep 17 00:00:00 2001 From: Cedar Turek Date: Tue, 18 Apr 2023 15:41:04 -0700 Subject: [PATCH 03/35] gave integer bits to D instead of adding manually everywhere --- src/fpu/fdivsqrt/fdivsqrt.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtiter.sv | 11 ++++------- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 9 ++++----- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 10 ++++------ src/fpu/fdivsqrt/fdivsqrtstage2.sv | 5 ++--- src/fpu/fdivsqrt/fdivsqrtstage4.sv | 5 ++--- 6 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index e8708c6c..1e05aee1 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -57,7 +57,7 @@ module fdivsqrt( logic [`DIVb+3:0] WS, WC; // Partial remainder components logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) - logic [`DIVb-1:0] D; // Iterator Divisor + logic [`DIVb+3:0] D; // Iterator Divisor logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index f3048c8b..aeb4bcc4 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -33,8 +33,7 @@ module fdivsqrtiter( input logic IFDivStartE, input logic FDivBusyE, input logic SqrtE, - input logic [`DIVb+3:0] X, - input logic [`DIVb-1:0] D, + input logic [`DIVb+3:0] X, D, output logic [`DIVb:0] FirstU, FirstUM, output logic [`DIVb+1:0] FirstC, output logic Firstun, @@ -95,12 +94,10 @@ module fdivsqrtiter( flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]); // Divisor Selections - // - choose the negitive version of what's being selected - // - D is a 0.b mantissa - assign DBar = {3'b111, 1'b0, ~D}; + assign DBar = ~D; // for -D if(`RADIX == 4) begin : d2 - assign DBar2 = {2'b11, 1'b0, ~D, 1'b1}; - assign D2 = {2'b0, 1'b1, D, 1'b0}; + assign D2 = D << 1; // for 2D, only used in R4 + assign DBar2 = ~D2; // for -2D, only used in R4 end // k=DIVCOPIES of the recurrence logic diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index b8575f7f..1009cd22 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -32,7 +32,7 @@ module fdivsqrtpostproc( input logic clk, reset, input logic StallM, input logic [`DIVb+3:0] WS, WC, - input logic [`DIVb-1:0] D, + input logic [`DIVb+3:0] D, input logic [`DIVb:0] FirstU, FirstUM, input logic [`DIVb+1:0] FirstC, input logic SqrtE, @@ -46,7 +46,7 @@ module fdivsqrtpostproc( output logic [`XLEN-1:0] FIntDivResultM ); - logic [`DIVb+3:0] W, Sum, DM; + logic [`DIVb+3:0] W, Sum; logic [`DIVb:0] PreQmM; logic NegStickyM; logic weq0E, WZeroM; @@ -67,7 +67,7 @@ module fdivsqrtpostproc( assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1)); assign FZeroSqrtE = {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root - assign FZeroDivE = {3'b001,D,1'b0}; // F for divide + assign FZeroDivE = D << 1; // F for divide mux2 #(`DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE); csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero}; aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E); @@ -102,11 +102,10 @@ module fdivsqrtpostproc( logic signed [`DIVb+3:0] PreResultM, PreIntResultM; assign W = $signed(Sum) >>> `LOGR; - assign DM = {4'b0001, D}; assign UnsignedQuotM = {3'b000, PreQmM}; // Integer remainder: sticky and sign correction muxes - mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM); + mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM); mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM); diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 4af1d786..9d375a26 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -38,8 +38,7 @@ module fdivsqrtpreproc ( input logic XZeroE, input logic [2:0] Funct3E, output logic [`NE+1:0] QeM, - output logic [`DIVb+3:0] X, - output logic [`DIVb-1:0] D, + output logic [`DIVb+3:0] X, D, // Int-specific input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, @@ -111,7 +110,9 @@ module fdivsqrtpreproc ( // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); assign DivX = {3'b000, ~NumerZeroE, XPreproc}; - // *** CT 4/13/23 Create D output here with leading 1 appended as well, use in the other modules + + // Divisior register + flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); // ***CT: factor out fdivsqrtcycles if (`IDIV_ON_FPU) begin:intrightshift // Int Supported @@ -173,8 +174,5 @@ module fdivsqrtpreproc ( // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); - - // Divisior register - flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 53c1711c..be62f8aa 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -30,8 +30,7 @@ /* verilator lint_off UNOPTFLAT */ module fdivsqrtstage2 ( - input logic [`DIVb-1:0] D, - input logic [`DIVb+3:0] DBar, + input logic [`DIVb+3:0] D, DBar, input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, @@ -66,7 +65,7 @@ module fdivsqrtstage2 ( always_comb if (up) Dsel = DBar; else if (uz) Dsel = '0; - else Dsel = {4'b0001, D}; // un + else Dsel = D; // un // Partial Product Generation // WSA, WCA = WS + WC - qD diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index f2ff3734..9464e6a8 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -29,8 +29,7 @@ `include "wally-config.vh" module fdivsqrtstage4 ( - input logic [`DIVb-1:0] D, - input logic [`DIVb+3:0] DBar, D2, DBar2, + input logic [`DIVb+3:0] D, DBar, D2, DBar2, input logic [`DIVb:0] U,UM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, @@ -75,7 +74,7 @@ module fdivsqrtstage4 ( 4'b1000: Dsel = DBar2; 4'b0100: Dsel = DBar; 4'b0000: Dsel = '0; - 4'b0010: Dsel = {3'b0, 1'b1, D}; + 4'b0010: Dsel = D; 4'b0001: Dsel = D2; default: Dsel = 'x; endcase From 385564fe4c4d7b1c3346a97044dcdb1ebbdcf08c Mon Sep 17 00:00:00 2001 From: Kevin Thomas Date: Tue, 18 Apr 2023 17:57:56 -0500 Subject: [PATCH 04/35] Add PR#252 test file to coverage --- testbench/tests.vh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testbench/tests.vh b/testbench/tests.vh index 6a0f8027..19adb818 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -53,7 +53,8 @@ string tvpaths[] = '{ "lsu", "vm64check", "pmp", - "tlbKP" + "tlbKP", + "ifuCamlineWrite" }; string coremark[] = '{ From 30bd1e2a33860c470e9a9a409ddcafea20aef763 Mon Sep 17 00:00:00 2001 From: Cedar Turek Date: Tue, 18 Apr 2023 16:14:45 -0700 Subject: [PATCH 05/35] created fdivsqrtcycles, moved cycles calculation from FSM to preproc --- src/fpu/fdivsqrt/fdivsqrt.sv | 25 +++++----- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 76 +++++++++++++++++++++++++++++ src/fpu/fdivsqrt/fdivsqrtfsm.sv | 76 ++++++----------------------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 16 +++--- 4 files changed, 114 insertions(+), 79 deletions(-) create mode 100644 src/fpu/fdivsqrt/fdivsqrtcycles.sv diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 1e05aee1..f4d46501 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -57,42 +57,43 @@ module fdivsqrt( logic [`DIVb+3:0] WS, WC; // Partial remainder components logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) - logic [`DIVb+3:0] D; // Iterator Divisor + logic [`DIVb+3:0] D; // Iterator Divisor logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection logic WZeroE; // Early termination flag + logic [`DURLEN-1:0] cycles; // FSM cycles logic SpecialCaseM; // Divide by zero, square root of negative, etc. logic DivStartE; // Enable signal for flops during stall // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts + logic [`DIVBLEN:0] nM, mM; // Shift amounts logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor logic [`XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases - fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor - .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E, .QeM, .X, .D, + fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor + .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), + .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, - .nE, .BZeroM, .nM, .mM, .AM, + .BZeroM, .nM, .mM, .AM, .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM); - fdivsqrtfsm fdivsqrtfsm( // FSM - .clk, .reset, .FmtE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, + fdivsqrtfsm fdivsqrtfsm( // FSM + .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, - .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles, // Int-specific - .IDivStartE, .ISpecialCaseE, .nE, .IntDivE); + .IDivStartE, .ISpecialCaseE, .IntDivE); - fdivsqrtiter fdivsqrtiter( // CSA Iterator + fdivsqrtiter fdivsqrtiter( // CSA Iterator .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); - fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor + fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, .QmM, .WZeroE, .DivStickyM, diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv new file mode 100644 index 00000000..f1ad32cd --- /dev/null +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -0,0 +1,76 @@ +/////////////////////////////////////////// +// fdivsqrt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu +// Modified: 18 April 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fdivsqrtcycles( + input logic [`FMTBITS-1:0] FmtE, + input logic SqrtE, + input logic IntDivE, + input logic [`DIVBLEN:0] nE, + output logic [`DURLEN-1:0] cycles +); + logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits + // DIVN = `NF+3 + // NS = NF + 1 + // N = NS or NS+2 for div/sqrt. + + /* verilator lint_off WIDTH */ + if (`FPSIZES == 1) + assign Nf = `NF; + else if (`FPSIZES == 2) + always_comb + case (FmtE) + 1'b0: Nf = `NF1; + 1'b1: Nf = `NF; + endcase + else if (`FPSIZES == 3) + always_comb + case (FmtE) + `FMT: Nf = `NF; + `FMT1: Nf = `NF1; + `FMT2: Nf = `NF2; + endcase + else if (`FPSIZES == 4) + always_comb + case(FmtE) + `S_FMT: Nf = `S_NF; + `D_FMT: Nf = `D_NF; + `H_FMT: Nf = `H_NF; + `Q_FMT: Nf = `Q_NF; + endcase + + always_comb begin + if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 + else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs + if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + end + /* verilator lint_on WIDTH */ + +endmodule \ No newline at end of file diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 0793346b..5332087a 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -29,32 +29,27 @@ `include "wally-config.vh" module fdivsqrtfsm( - input logic clk, - input logic reset, - input logic [`FMTBITS-1:0] FmtE, - input logic XInfE, YInfE, - input logic XZeroE, YZeroE, - input logic XNaNE, YNaNE, - input logic FDivStartE, IDivStartE, - input logic XsE, - input logic SqrtE, - input logic StallM, - input logic FlushE, - input logic WZeroE, - input logic IntDivE, - input logic [`DIVBLEN:0] nE, - input logic ISpecialCaseE, - output logic IFDivStartE, - output logic FDivBusyE, FDivDoneE, - output logic SpecialCaseM + input logic clk, reset, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic FDivStartE, IDivStartE, + input logic XsE, WZeroE, + input logic SqrtE, + input logic StallM, FlushE, + input logic IntDivE, + input logic ISpecialCaseE, + input logic [`DURLEN-1:0] cycles, + output logic IFDivStartE, + output logic FDivBusyE, FDivDoneE, + output logic SpecialCaseM ); typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; statetype state; - logic [`DURLEN-1:0] step; - logic [`DURLEN-1:0] cycles; logic SpecialCaseE, FSpecialCaseE; + logic [`DURLEN-1:0] step; // FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM; @@ -67,47 +62,6 @@ module fdivsqrtfsm( else assign SpecialCaseE = FSpecialCaseE; flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc -// DIVN = `NF+3 -// NS = NF + 1 -// N = NS or NS+2 for div/sqrt. - -// *** CT 4/13/23 move cycles calculation back to preprocesor -/* verilator lint_off WIDTH */ - logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits - if (`FPSIZES == 1) - assign Nf = `NF; - else if (`FPSIZES == 2) - always_comb - case (FmtE) - 1'b0: Nf = `NF1; - 1'b1: Nf = `NF; - endcase - else if (`FPSIZES == 3) - always_comb - case (FmtE) - `FMT: Nf = `NF; - `FMT1: Nf = `NF1; - `FMT2: Nf = `NF2; - endcase - else if (`FPSIZES == 4) - always_comb - case(FmtE) - `S_FMT: Nf = `S_NF; - `D_FMT: Nf = `D_NF; - `H_FMT: Nf = `H_NF; - `Q_FMT: Nf = `Q_NF; - endcase - - - always_comb begin - if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 - else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); - else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); - end - - /* verilator lint_on WIDTH */ - always_ff @(posedge clk) begin if (reset | FlushE) begin state <= #1 IDLE; diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 9d375a26..43a5e42b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -33,8 +33,8 @@ module fdivsqrtpreproc ( input logic IFDivStartE, input logic [`NF:0] Xm, Ym, input logic [`NE-1:0] Xe, Ye, - input logic [`FMTBITS-1:0] Fmt, - input logic Sqrt, + input logic [`FMTBITS-1:0] FmtE, + input logic SqrtE, input logic XZeroE, input logic [2:0] Funct3E, output logic [`NE+1:0] QeM, @@ -43,7 +43,8 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, output logic ISpecialCaseE, - output logic [`DIVBLEN:0] nE, nM, mM, + output logic [`DURLEN-1:0] cycles, + output logic [`DIVBLEN:0] nM, mM, output logic NegQuotM, ALTBM, IntDivM, W64M, output logic AsM, BZeroM, output logic [`XLEN-1:0] AM @@ -54,7 +55,7 @@ module fdivsqrtpreproc ( logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs + logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic signedDiv; // signed division @@ -169,10 +170,13 @@ module fdivsqrtpreproc ( // Sqrt is initialized on step one as R(X-1), so depends on Radix if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; - mux2 #(`DIVb+4) prexmux(DivX, SqrtX, Sqrt, PreShiftX); + mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); // Floating-point exponent - fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); + fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + + // Number of FSM cycles (to FSM) + fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles); endmodule From 3ef81f4e6a51bedd8ad26953fd2885b029d5d89b Mon Sep 17 00:00:00 2001 From: Kevin Wan Date: Tue, 18 Apr 2023 18:43:50 -0700 Subject: [PATCH 06/35] PMPCFG_ARRAY_REGW cases --- testbench/tests.vh | 2 ++ tests/coverage/pmpcfg.S | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 tests/coverage/pmpcfg.S diff --git a/testbench/tests.vh b/testbench/tests.vh index 6a0f8027..8e327caf 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -53,7 +53,9 @@ string tvpaths[] = '{ "lsu", "vm64check", "pmp", + "pmpcfg", "tlbKP" + }; string coremark[] = '{ diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S new file mode 100644 index 00000000..bb696152 --- /dev/null +++ b/tests/coverage/pmpcfg.S @@ -0,0 +1,33 @@ +// pmpcfg supplemental + +#include "WALLY-init-lib.h" +main: + li t0, 0x8800000000000000 + csrw pmpcfg2, t0 + li t0, 0x88000000000000 + csrw pmpcfg2, t0 + li t0, 0x880000000000 + csrw pmpcfg2, t0 + li t0, 0x8800000000 + csrw pmpcfg2, t0 + li t0, 0x88000000 + csrw pmpcfg2, t0 + li t0, 0x880000 + csrw pmpcfg2, t0 + li t0, 0x8800 + csrw pmpcfg2, t0 + li t0, 0x8800000000000000 + csrw pmpcfg0, t0 + li t0, 0x88000000000000 + csrw pmpcfg0, t0 + li t0, 0x880000000000 + csrw pmpcfg0, t0 + li t0, 0x8800000000 + csrw pmpcfg0, t0 + li t0, 0x88000000 + csrw pmpcfg0, t0 + li t0, 0x880000 + csrw pmpcfg0, t0 + li t0, 0x8800 + csrw pmpcfg0, t0 + j done From 20a0803f46e221201061d5c348e4750bd7dc00bf Mon Sep 17 00:00:00 2001 From: Kevin Wan Date: Tue, 18 Apr 2023 21:50:48 -0700 Subject: [PATCH 07/35] Completely covers all PMPCFG_ARRAY_REGW cases --- testbench/tests.vh | 4 +++- tests/coverage/pmpcfg.S | 8 ++++++- tests/coverage/pmpcfg1.S | 48 ++++++++++++++++++++++++++++++++++++++++ tests/coverage/pmpcfg2.S | 12 ++++++++++ tests/coverage/priv.S | 1 + 5 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 tests/coverage/pmpcfg1.S create mode 100644 tests/coverage/pmpcfg2.S diff --git a/testbench/tests.vh b/testbench/tests.vh index 8e327caf..49c94680 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -54,8 +54,10 @@ string tvpaths[] = '{ "vm64check", "pmp", "pmpcfg", + "pmpcfg1", + "pmpcfg2", "tlbKP" - + }; string coremark[] = '{ diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S index bb696152..d65f810e 100644 --- a/tests/coverage/pmpcfg.S +++ b/tests/coverage/pmpcfg.S @@ -1,4 +1,7 @@ -// pmpcfg supplemental +// pmpcfg part 1 +// Kevin Wan, kewan@hmc.edu, 4/18/2023 +// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. +// See the next part in pmpcfg1.S #include "WALLY-init-lib.h" main: @@ -16,6 +19,8 @@ main: csrw pmpcfg2, t0 li t0, 0x8800 csrw pmpcfg2, t0 + li t0, 0x88 + csrw pmpcfg2, t0 li t0, 0x8800000000000000 csrw pmpcfg0, t0 li t0, 0x88000000000000 @@ -30,4 +35,5 @@ main: csrw pmpcfg0, t0 li t0, 0x8800 csrw pmpcfg0, t0 + j done diff --git a/tests/coverage/pmpcfg1.S b/tests/coverage/pmpcfg1.S new file mode 100644 index 00000000..96264c55 --- /dev/null +++ b/tests/coverage/pmpcfg1.S @@ -0,0 +1,48 @@ +// another set of pmpcfg tests. A new file is made because pmpcfg register fields are +// locked forever after writing 1 to the lock bit for the first time. + +// Kevin Wan, kewan@hmc.edu, 4/13/2023 +// This set tests locking the pmpXcfg fields in descending order again, without setting the TOR bits. +// for the other part of the tests, see pmpcfg.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x800 + csrw pmpcfg0, t0 + li t0, 0x8000000 + csrw pmpcfg0, t0 + + li t0, 0x8000000000000000 + csrw pmpcfg2, t0 + li t0, 0x80000000000000 + csrw pmpcfg2, t0 + li t0, 0x800000000000 + csrw pmpcfg2, t0 + li t0, 0x8000000000 + csrw pmpcfg2, t0 + li t0, 0x80000000 + csrw pmpcfg2, t0 + li t0, 0x800000 + csrw pmpcfg2, t0 + li t0, 0x8000 + csrw pmpcfg2, t0 + li t0, 0x80 + csrw pmpcfg2, t0 + li t0, 0x8000000000000000 + csrw pmpcfg0, t0 + li t0, 0x80000000000000 + csrw pmpcfg0, t0 + li t0, 0x800000000000 + csrw pmpcfg0, t0 + li t0, 0x8000000000 + csrw pmpcfg0, t0 + li t0, 0x80000000 + csrw pmpcfg0, t0 + li t0, 0x800000 + csrw pmpcfg0, t0 + li t0, 0x8000 + csrw pmpcfg0, t0 + + + + j done \ No newline at end of file diff --git a/tests/coverage/pmpcfg2.S b/tests/coverage/pmpcfg2.S new file mode 100644 index 00000000..5966e3cd --- /dev/null +++ b/tests/coverage/pmpcfg2.S @@ -0,0 +1,12 @@ +// pmpcfg part 3 +// Kevin Wan, kewan@hmc.edu, 4/18/2023 +// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. +// See the next part in pmpcfg1.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x80 + csrw pmpcfg0, t0 + + + j done \ No newline at end of file diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index 94b7cd0e..5e187866 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -189,6 +189,7 @@ main: li t1, -1 csrw mcounteren, t1 + # Go to supervisor mode li a0, 1 ecall From fe5110874023af1269e5640d61da00b309b94609 Mon Sep 17 00:00:00 2001 From: Kevin Wan Date: Tue, 18 Apr 2023 22:09:50 -0700 Subject: [PATCH 08/35] a --- tests/coverage/pmpcfg2.S | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 tests/coverage/pmpcfg2.S diff --git a/tests/coverage/pmpcfg2.S b/tests/coverage/pmpcfg2.S deleted file mode 100644 index 5966e3cd..00000000 --- a/tests/coverage/pmpcfg2.S +++ /dev/null @@ -1,12 +0,0 @@ -// pmpcfg part 3 -// Kevin Wan, kewan@hmc.edu, 4/18/2023 -// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. -// See the next part in pmpcfg1.S - -#include "WALLY-init-lib.h" -main: - li t0, 0x80 - csrw pmpcfg0, t0 - - - j done \ No newline at end of file From 777028e43bf254584ca0e8a72289175a03936b3d Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 18 Apr 2023 23:06:52 -0700 Subject: [PATCH 09/35] Add test cases for pmpcfg.S --- tests/coverage/pmpcfg.S | 39 ++++++++++++++++++++++++++++++++ tests/coverage/pmpcfg1.S | 48 ++++++++++++++++++++++++++++++++++++++++ tests/coverage/pmpcfg2.S | 12 ++++++++++ 3 files changed, 99 insertions(+) create mode 100644 tests/coverage/pmpcfg.S create mode 100644 tests/coverage/pmpcfg1.S create mode 100644 tests/coverage/pmpcfg2.S diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S new file mode 100644 index 00000000..387a8a72 --- /dev/null +++ b/tests/coverage/pmpcfg.S @@ -0,0 +1,39 @@ +// pmpcfg part 1 +// Kevin Wan, kewan@hmc.edu, 4/18/2023 +// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. +// See the next part in pmpcfg1.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x8800000000000000 + csrw pmpcfg2, t0 + li t0, 0x88000000000000 + csrw pmpcfg2, t0 + li t0, 0x880000000000 + csrw pmpcfg2, t0 + li t0, 0x8800000000 + csrw pmpcfg2, t0 + li t0, 0x88000000 + csrw pmpcfg2, t0 + li t0, 0x880000 + csrw pmpcfg2, t0 + li t0, 0x8800 + csrw pmpcfg2, t0 + li t0, 0x88 + csrw pmpcfg2, t0 + li t0, 0x8800000000000000 + csrw pmpcfg0, t0 + li t0, 0x88000000000000 + csrw pmpcfg0, t0 + li t0, 0x880000000000 + csrw pmpcfg0, t0 + li t0, 0x8800000000 + csrw pmpcfg0, t0 + li t0, 0x88000000 + csrw pmpcfg0, t0 + li t0, 0x880000 + csrw pmpcfg0, t0 + li t0, 0x8800 + csrw pmpcfg0, t0 + + j done \ No newline at end of file diff --git a/tests/coverage/pmpcfg1.S b/tests/coverage/pmpcfg1.S new file mode 100644 index 00000000..96264c55 --- /dev/null +++ b/tests/coverage/pmpcfg1.S @@ -0,0 +1,48 @@ +// another set of pmpcfg tests. A new file is made because pmpcfg register fields are +// locked forever after writing 1 to the lock bit for the first time. + +// Kevin Wan, kewan@hmc.edu, 4/13/2023 +// This set tests locking the pmpXcfg fields in descending order again, without setting the TOR bits. +// for the other part of the tests, see pmpcfg.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x800 + csrw pmpcfg0, t0 + li t0, 0x8000000 + csrw pmpcfg0, t0 + + li t0, 0x8000000000000000 + csrw pmpcfg2, t0 + li t0, 0x80000000000000 + csrw pmpcfg2, t0 + li t0, 0x800000000000 + csrw pmpcfg2, t0 + li t0, 0x8000000000 + csrw pmpcfg2, t0 + li t0, 0x80000000 + csrw pmpcfg2, t0 + li t0, 0x800000 + csrw pmpcfg2, t0 + li t0, 0x8000 + csrw pmpcfg2, t0 + li t0, 0x80 + csrw pmpcfg2, t0 + li t0, 0x8000000000000000 + csrw pmpcfg0, t0 + li t0, 0x80000000000000 + csrw pmpcfg0, t0 + li t0, 0x800000000000 + csrw pmpcfg0, t0 + li t0, 0x8000000000 + csrw pmpcfg0, t0 + li t0, 0x80000000 + csrw pmpcfg0, t0 + li t0, 0x800000 + csrw pmpcfg0, t0 + li t0, 0x8000 + csrw pmpcfg0, t0 + + + + j done \ No newline at end of file diff --git a/tests/coverage/pmpcfg2.S b/tests/coverage/pmpcfg2.S new file mode 100644 index 00000000..5966e3cd --- /dev/null +++ b/tests/coverage/pmpcfg2.S @@ -0,0 +1,12 @@ +// pmpcfg part 3 +// Kevin Wan, kewan@hmc.edu, 4/18/2023 +// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. +// See the next part in pmpcfg1.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x80 + csrw pmpcfg0, t0 + + + j done \ No newline at end of file From 2a4bc01944a07d16bb1ca510e9ec1e9ed0e7c8cc Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 18 Apr 2023 23:15:47 -0700 Subject: [PATCH 10/35] Update tests.vh --- testbench/tests.vh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/testbench/tests.vh b/testbench/tests.vh index 6a0f8027..f777dbf1 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -53,7 +53,11 @@ string tvpaths[] = '{ "lsu", "vm64check", "pmp", - "tlbKP" + "tlbKP", + "pmpcfg", + "pmpcfg1", + "pmpcfg2" + }; string coremark[] = '{ From e3593800d989d736e79dd705225e74caff2a8d08 Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Mon, 17 Apr 2023 14:12:58 -0700 Subject: [PATCH 11/35] fix unhit exclusion in fdivsqrtfsm --- sim/coverage-exclusions-rv64gc.do | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 754d57db..41345e6e 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -35,7 +35,7 @@ do GetLineNum.do coverage exclude -srcfile lzc.sv # FDIVSQRT has -coverage exclude -scope /core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY +coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY ### Exclude D$ states and logic for the I$ instance # This is cleaner than trying to set an I$-specific pragma in cachefsm.sv (which would exclude it for the D$ instance too) From cd9feb02603c7de65c2c9b7dc6ef6f375b6e9a5a Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Wed, 19 Apr 2023 01:19:25 -0700 Subject: [PATCH 12/35] Cover CacheWay edge case: CacheDataMem we=1 while ce=0. This test basically triggers an i$ miss during a d$ (hit) store operation. It requires some tricky timing (e.g. a flushD right before the relevant store). I use a script to generate the test. --- testbench/tests.vh | 3 +- tests/coverage/dcache1.S | 83 +++++++++++++++++++++++++++++++++++++ tests/coverage/dcache1.py | 86 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 tests/coverage/dcache1.S create mode 100644 tests/coverage/dcache1.py diff --git a/testbench/tests.vh b/testbench/tests.vh index 6a0f8027..d2b8a934 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -53,7 +53,8 @@ string tvpaths[] = '{ "lsu", "vm64check", "pmp", - "tlbKP" + "tlbKP", + "dcache1", }; string coremark[] = '{ diff --git a/tests/coverage/dcache1.S b/tests/coverage/dcache1.S new file mode 100644 index 00000000..4a9b3de1 --- /dev/null +++ b/tests/coverage/dcache1.S @@ -0,0 +1,83 @@ + #include "WALLY-init-lib.h" +main: + // start way test #1 + li t0, 0x80100000 +.align 6 + // i$ boundary, way test #1 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + sd zero, 0(t0) + sd zero, 0(t0) + .word 0x00000013 + .word 0x00000013 + // start way test #2 + li t0, 0x80101000 +.align 6 + // i$ boundary, way test #2 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + sd zero, 0(t0) + sd zero, 0(t0) + .word 0x00000013 + .word 0x00000013 + // start way test #3 + li t0, 0x80102000 +.align 6 + // i$ boundary, way test #3 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + sd zero, 0(t0) + sd zero, 0(t0) + .word 0x00000013 + .word 0x00000013 + // start way test #4 + li t0, 0x80103000 +.align 6 + // i$ boundary, way test #4 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + sd zero, 0(t0) + sd zero, 0(t0) + .word 0x00000013 + .word 0x00000013 + j done diff --git a/tests/coverage/dcache1.py b/tests/coverage/dcache1.py new file mode 100644 index 00000000..59259567 --- /dev/null +++ b/tests/coverage/dcache1.py @@ -0,0 +1,86 @@ +#################### +# dcache1.py +# +# Written: avercruysse@hmc.edu 18 April 2023 +# +# Purpose: Test Coverage for D$ +# (For each way, trigger a CacheDataMem write enable while chip enable is low) +# +# A component of the CORE-V-WALLY configurable RISC-V project. +# +# Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +# +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# +# Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +# except in compliance with the License, or, at your option, the Apache License version 2.0. You +# may obtain a copy of the License at +# +# https://solderpad.org/licenses/SHL-2.1/ +# +# Unless required by applicable law or agreed to in writing, any work distributed under the +# License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +################################################ + +import os + +test_name = "dcache1.S" +dcache_num_ways = 4 +dcache_way_size_in_bytes = 4096 +# warning i$ line size is not currently parameterized. + +# arbitrary start location of where I send stores to. +mem_start_addr = 0x80100000 + +# pointer to the start of unused memory (strictly increasing) +mem_addr = mem_start_addr + + +def wl(line="", comment=None, fname=test_name): + with open(fname, "a") as f: + instr = False if (":" in line or + ".align" in line or + "# include" in line) else True + indent = 6 if instr else 0 + comment = "// " + comment if comment is not None else "" + to_write = " " * indent + line + comment + "\n" + f.write(to_write) + + +def write_repro_instrs(): + """ + Assumes that the store location has been fetched to d$, and is in t0. + """ + for i in range(16): # write a whole cache set. + if i == 12: + wl('sd zero, 0(t0)') # D$ write to set PCM = PCF + 8 for proper alignment (stallD will happen). + elif i == 13: + # the store in question happens here, at adresses 0x34, 0x74 + wl('sd zero, 0(t0)') # it should hit this time + else: + # can't be a NOP or anything else that is encoded as compressed. + # this is because the branch predictor will use the wrong address + # so the IFU cache miss will come late. + wl('.word 0x00000013') # addi x0, x0, 0 (canonical NOP, uncompressed). + +if __name__ == "__main__": + if os.path.exists(test_name): + os.remove(test_name) + # os.rename(test_name, test_name + ".old") + wl(comment="This file is generated by dcache1.py (run that script manually)") + wl('#include "WALLY-init-lib.h"') + wl('main:') + + # excercise all 4 D$ ways. If they're not all full, it uses the first empty. + # So we are sure all 4 ways are exercised. + for i in range(dcache_num_ways): + wl(comment=f"start way test #{i+1}") + wl(f'li t0, {hex(mem_addr)}') + wl(f'.align 6') # start at i$ set boundary. 6 lsb bits are zero. + wl(comment=f"i$ boundary, way test #{i+1}") + write_repro_instrs() + mem_addr += dcache_way_size_in_bytes # so that we excercise a new D$ way. + + wl("j done") From 3de03abd9df709e7f8e1cd6dce197ccdaa22f01e Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Wed, 19 Apr 2023 01:21:57 -0700 Subject: [PATCH 13/35] add D$ test case to trigger a FlushStage while SetDirtyWay=1 This hits some conditional coverage in each cacheway. A cache store hit happens at the same time as a StoreAmoMisalignedFault. --- testbench/tests.vh | 1 + tests/coverage/dcache2.S | 49 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 tests/coverage/dcache2.S diff --git a/testbench/tests.vh b/testbench/tests.vh index d2b8a934..fd48d6dc 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -55,6 +55,7 @@ string tvpaths[] = '{ "pmp", "tlbKP", "dcache1", + "dcache2" }; string coremark[] = '{ diff --git a/tests/coverage/dcache2.S b/tests/coverage/dcache2.S new file mode 100644 index 00000000..58f97a2e --- /dev/null +++ b/tests/coverage/dcache2.S @@ -0,0 +1,49 @@ +/////////////////////////////////////////// +// dcache2.S +// +// Written: avercruysse@hmc.edu 18 April 2023 +// +// Purpose: Test Coverage for D$ +// (for all 4 cache ways, trigger a FlushStage while SetDirtyWay=1) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +#include "WALLY-init-lib.h" +main: + // way 0 + li t0, 0x80100770 + sd zero, 0(t0) + sd zero, 1(t0) + + // way 1 + li t0, 0x80101770 + sd zero, 0(t0) + sd zero, 1(t0) + + // way 2 + li t0, 0x80102770 + sd zero, 0(t0) + sd zero, 1(t0) + + // way 3 + li t0, 0x80103770 + sd zero, 0(t0) + sd zero, 1(t0) + + j done From b52512b1ae0815dbb8c72cd4a485d6dc9107df00 Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Wed, 19 Apr 2023 01:28:45 -0700 Subject: [PATCH 14/35] D$ scope-specific coverage exclusions (I$ logic that never fires) The InvalidateCache signal in the D$ is for I$ only, which causes some coverage issues that need exclusion. Another manual exclusion is due to the fact that D$ writeback, flush, write_line, or flush_writeback states can't be cancelled by a flush, so those transistions are excluded. There is some other small stuff to review (logic simplification, or an exclusion pragma if removing the redundent logic would make it harder to understand the code, as is the case in the FlushAdrCntEn assign statement, in my opinion). --- sim/coverage-exclusions-rv64gc.do | 15 ++++++++++++++- src/cache/cachefsm.sv | 8 +++++--- src/cache/cacheway.sv | 2 +- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 41345e6e..38c04231 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -52,7 +52,7 @@ set end [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag-end: icache case"] coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange $start-$end coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache WRITEBACKStatement"] # exclude Atomic Operation logic -coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache storeAMO"] -item e 1 -fecexprrow 6 +coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: cache AnyMiss"] -item e 1 -fecexprrow 6 coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache storeAMO1"] -item e 1 -fecexprrow 2-4 coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache AnyUpdateHit"] -item e 1 -fecexprrow 2 # cache write logic @@ -77,6 +77,19 @@ for {set i 0} {$i < $numcacheways} {incr i} { coverage exclude -scope /dut/core/ifu/bus/icache/icache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: icache SetValidEN"] -item e 1 -fecexprrow 4 } +## D$ Exclusions. +# InvalidateCache is I$ only: +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: dcache InvalidateCheck"] -item b 2 +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: dcache InvalidateCheck"] -item s 1 +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: dcache CacheEn"] -item e 1 -fecexprrow 12 +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: cache AnyMiss"] -item e 1 -fecexprrow 4 +set numcacheways 4 +for {set i 0} {$i < $numcacheways} {incr i} { + coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: dcache invalidateway"] -item be 1 -fecexprrow 4 +} +# D$ writeback, flush, write_line, or flush_writeback states can't be cancelled by a flush +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -ftrans CurrState STATE_WRITEBACK->STATE_READY STATE_FLUSH->STATE_READY STATE_WRITE_LINE->STATE_READY STATE_FLUSH_WRITEBACK->STATE_READY + # Excluding peripherals as sources of instructions for the ifu coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker/adrdecs/clintdec diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 90d8eaad..7cd8240c 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -110,10 +110,10 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( always_comb begin NextState = STATE_READY; case (CurrState) // exclusion-tag: icache state-case - STATE_READY: if(InvalidateCache) NextState = STATE_READY; + STATE_READY: if(InvalidateCache) NextState = STATE_READY; // exclusion-tag: dcache InvalidateCheck else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement - else if(AnyMiss & LineDirty) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement + else if(AnyMiss) /* & LineDirty */ NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement else NextState = STATE_READY; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else NextState = STATE_FETCH; @@ -160,6 +160,8 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( assign SelFlush = (CurrState == STATE_READY & FlushCache) | (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_WRITEBACK); + // coverage off -item e -fecexprrow 1 + // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck) assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty); assign FlushWayCntEn = (CurrState == STATE_FLUSH & ~LineDirty) | @@ -181,6 +183,6 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( (CurrState == STATE_WRITE_LINE) | resetDelay; assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_READ_HOLD; - assign CacheEn = (~Stall | FlushCache | AnyMiss) | (CurrState != STATE_READY) | reset | InvalidateCache; + assign CacheEn = (~Stall | FlushCache | AnyMiss) | (CurrState != STATE_READY) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn endmodule // cachefsm diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 79ec65e6..368c7b58 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -155,7 +155,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, if (reset) ValidBits <= #1 '0; if(CacheEn) begin ValidWay <= #1 ValidBits[CacheSet]; - if(InvalidateCache) ValidBits <= #1 '0; + if(InvalidateCache) ValidBits <= #1 '0; // exclusion-tag: dcache invalidateway else if (SetValidEN) ValidBits[CacheSet] <= #1 SetValidWay; end end From 7ba2bfd4b62dde1b22ee98caa462718d27052799 Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Wed, 19 Apr 2023 01:32:43 -0700 Subject: [PATCH 15/35] CacheFSM logic simplification for AMO operations Ran this by Ross. --- src/cache/cachefsm.sv | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 7cd8240c..34f1778f 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -69,7 +69,7 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( ); logic resetDelay; - logic AMO, StoreAMO; + logic StoreAMO; logic AnyUpdateHit, AnyHit; logic AnyMiss; logic FlushFlag; @@ -86,16 +86,15 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( statetype CurrState, NextState; - assign AMO = CacheAtomic[1] & (&CacheRW); - assign StoreAMO = AMO | CacheRW[0]; + assign StoreAMO = CacheRW[0]; // AMO operations assert CacheRW[0] - assign AnyMiss = (StoreAMO | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: icache storeAMO + assign AnyMiss = (StoreAMO | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss assign AnyUpdateHit = (StoreAMO) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. - assign CacheAccess = (AMO | CacheRW[1] | CacheRW[0]) & CurrState == STATE_READY; // exclusion-tag: icache CacheW + assign CacheAccess = (|CacheRW) & CurrState == STATE_READY; // exclusion-tag: icache CacheW assign CacheMiss = CacheAccess & ~CacheHit; // special case on reset. When the fsm first exists reset the From 68295bd750cef56dbe53e0c84f38ea93876685cf Mon Sep 17 00:00:00 2001 From: David Harris <74973295+davidharrishmc@users.noreply.github.com> Date: Wed, 19 Apr 2023 06:23:05 -0700 Subject: [PATCH 16/35] Update tests.vh Missing comma from merge --- testbench/tests.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testbench/tests.vh b/testbench/tests.vh index 93a40610..e2d4e5ad 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -54,7 +54,7 @@ string tvpaths[] = '{ "vm64check", "pmp", "dcache1", - "dcache2" + "dcache2", "pmpcfg", "pmpcfg1", "pmpcfg2", From 2684a81754e30fb4fe57f86bf84f1a62e5956b92 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 19 Apr 2023 11:58:22 -0700 Subject: [PATCH 17/35] Add pmpcfg test cases increasing IFU coverage --- tests/coverage/pmpcfg.S | 46 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S index 74181ab6..5b3e37b5 100644 --- a/tests/coverage/pmpcfg.S +++ b/tests/coverage/pmpcfg.S @@ -1,10 +1,52 @@ // pmpcfg part 1 // Kevin Wan, kewan@hmc.edu, 4/18/2023 +// Liam Chalk, lchalk@hmc.edu, 4/19/2023 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. // See the next part in pmpcfg1.S #include "WALLY-init-lib.h" main: + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00000017 + csrw pmpcfg0, t0 + + li t0, 0x90000000 + csrw pmpaddr2, t0 + li t0, 0x00000017 + csrw pmpcfg2, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00000017 + csrw pmpcfg1, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00000017 + csrw pmpcfg2, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00000017 + csrw pmpcfg3, t0 + + li t0, 0x90000000 + csrw pmpaddr1, t0 + li t0, 0x00000017 + csrw pmpcfg1, t0 + + li t0, 0x90000000 + csrw pmpaddr1, t0 + li t0, 0x00000017 + csrw pmpcfg2, t0 + + li t0, 0x90000000 + csrw pmpaddr1, t0 + li t0, 0x00000017 + csrw pmpcfg3, t0 + li t0, 0x8800000000000000 csrw pmpcfg2, t0 li t0, 0x88000000000000 @@ -36,6 +78,4 @@ main: li t0, 0x8800 csrw pmpcfg0, t0 - - j done - + j done \ No newline at end of file From a3f3967f59dcb2c606ffd7e16efbc583b3780a48 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 19 Apr 2023 13:07:07 -0700 Subject: [PATCH 18/35] Added -fp flag to run arch64d/f tests in coverage --- sim/regression-wally | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sim/regression-wally b/sim/regression-wally index c7017720..fa112731 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -28,6 +28,7 @@ regressionDir = os.path.dirname(os.path.abspath(__file__)) os.chdir(regressionDir) coverage = '-coverage' in sys.argv +fp = '-fp' in sys.argv TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr']) # name: the name of this test configuration (used in printing human-readable @@ -140,6 +141,9 @@ if (coverage): # delete all but 64gc tests when running coverage "arch64zi", "wally64a", "wally64periph", "wally64priv", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs", "imperas64f", "imperas64d", "imperas64c", "imperas64i"] + if (fp): + tests64gc.append("arch64f") + tests64gc.append("arch64d") coverStr = '-coverage' else: coverStr = '' From 5f14dfe7488520be9c6863731273ab08eeabe5da Mon Sep 17 00:00:00 2001 From: David Harris <74973295+davidharrishmc@users.noreply.github.com> Date: Thu, 20 Apr 2023 14:09:32 -0700 Subject: [PATCH 19/35] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9cb56de0..ff76f72f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # core-v-wally Configurable RISC-V Processor -Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, and M extensions, FENCE.I, and the various privileged modes and CSRs. It is written in SystemVerilog. It passes the RISC-V Arch Tests and boots Linux on an FPGA. +Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, Q, M, and Zb* extensions, FENCE.I, and the various privileged modes and CSRs. It is written in SystemVerilog. It passes the RISC-V Arch Tests and boots Linux on an FPGA. ![Wally block diagram](wallyriscvTopAll.png) From 870c15c4f53cbba132b44c36d92df3aa87a46d62 Mon Sep 17 00:00:00 2001 From: David Harris <74973295+davidharrishmc@users.noreply.github.com> Date: Thu, 20 Apr 2023 14:15:34 -0700 Subject: [PATCH 20/35] Update README.md --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ff76f72f..b73aecdb 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # core-v-wally -Configurable RISC-V Processor -Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, Q, M, and Zb* extensions, FENCE.I, and the various privileged modes and CSRs. It is written in SystemVerilog. It passes the RISC-V Arch Tests and boots Linux on an FPGA. +Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, Q, M, and Zb* extensions, virtual memory, PMP, and the various privileged modes and CSRs. It provides optional caches, branch prediction, and standard RISC-V peripherals (CLINT, PLIC, UART, GPIO). Wally is written in SystemVerilog. It passes the RISC-V Arch Tests and boots Linux on an FPGA. Configurations range from a minimal RV32E core to a fully featured RV64GC application processor. ![Wally block diagram](wallyriscvTopAll.png) Wally is described in an upcoming textbook, *RISC-V System-on-Chip Design*, by Harris, Stine, Thompson, and Harris. Users should follow the setup instructions below. A system administrator must install CAD tools using the directions further down. +# Verification + +Wally is presently at Technology Readiness Level 4, passing the RISC-V compatibility test suite and custom tests, and booting Linux in simulation and on an FPGA. See the [Test Plan](docs/testplan.md) for details. + # New User Setup New users may wish to do the following setup to access the server via a GUI and use a text editor. From 73cca666bfeb701cd517a099895f45e8bcc84ea2 Mon Sep 17 00:00:00 2001 From: Noah Limpert Date: Thu, 20 Apr 2023 14:38:13 -0700 Subject: [PATCH 21/35] Commiting changes to add coverage to ASID, Global, Megapage size checks. --- testbench/tests.vh | 3 + tests/coverage/tlbASID.S | 133 ++++++++++++++++++++++++++++++++ tests/coverage/tlbGLB.S | 134 ++++++++++++++++++++++++++++++++ tests/coverage/tlbMP.S | 163 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 433 insertions(+) create mode 100644 tests/coverage/tlbASID.S create mode 100644 tests/coverage/tlbGLB.S create mode 100644 tests/coverage/tlbMP.S diff --git a/testbench/tests.vh b/testbench/tests.vh index e2d4e5ad..b86756b4 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -59,6 +59,9 @@ string tvpaths[] = '{ "pmpcfg1", "pmpcfg2", "tlbKP", + "tlbMP", + "tlbASID", + "tlbGLB", "ifuCamlineWrite" }; diff --git a/tests/coverage/tlbASID.S b/tests/coverage/tlbASID.S new file mode 100644 index 00000000..bf71c049 --- /dev/null +++ b/tests/coverage/tlbASID.S @@ -0,0 +1,133 @@ +/////////////////////////////////////////// +// tlbASID.S +// +// Written: mmendozamanriquez@hmc.edu 4 April 2023 +// nlimpert@hmc.edu +// +// Purpose: Test coverage for LSU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080080 // try making asid = 0. + csrw satp, t5 + + # sfence.vma x0, x0 + + # switch to supervisor mode + li a0, 1 + ecall + + li t0, 0xC0000000 + + li t2, 0 # i = 0 + li t5, 0 # j = 0 // now use as a counter for new asid loop + li t3, 32 # Max amount of Loops = 32 + +loop: bge t2, t3, nASID # exit loop if i >= loops + lw t1, 0(t0) + li t4, 0x1000 + add t0, t0, t4 + addi t2, t2, 1 + j loop + +nASID: bne t5, zero, finished + li a0, 3 // go + ecall + li t5, 0x9000100000080080 // try making asid = 1 + csrw satp, t5 + li a0, 1 + ecall + li t2, 0 + li t0, 0xC0000000 + li t5, 1 // make this not zero. + j loop + + +finished: + j done + +.data +.align 19 +# level 3 Page table situated at 0x8008 0000, should point to 8008,1000 +pagetable: + .8byte 0x200204C1 + +.align 12 // level 2 page table, contains direction to a gigapageg + .8byte 0x0 + .8byte 0x0 + .8byte 0x200000CF // gigapage that starts at 8000 0000 goes to C000 0000 + .8byte 0x200208C1 // pointer to next page table entry at 8008 2000 + +.align 12 // level 1 page table, points to level 0 page table + .8byte 0x20020CC1 + +.align 12 // level 0 page table, points to address C000 0000 // FOR NOW ALL OF THESE GO TO 8 instead of C cause they start with 2 + .8byte 0x200000CF // access xC000 0000 + .8byte 0x200004CF // access xC000 1000 + .8byte 0x200008CF // access xC000 2000 + .8byte 0x20000CCF // access xC000 3000 + + .8byte 0x200010CF // access xC000 4000 + .8byte 0x200014CF + .8byte 0x200018CF + .8byte 0x20001CCF + + .8byte 0x200020CF // access xC000 8000 + .8byte 0x200024CF + .8byte 0x200028CF + .8byte 0x20002CCF + + .8byte 0x200030CF // access xC000 C000 + .8byte 0x200034CF + .8byte 0x200038CF + .8byte 0x20003CCF + + .8byte 0x200040CF // access xC001 0000 + .8byte 0x200044CF + .8byte 0x200048CF + .8byte 0x20004CCF + + .8byte 0x200050CF // access xC001 4000 + .8byte 0x200054CF + .8byte 0x200058CF + .8byte 0x20005CCF + + .8byte 0x200060CF // access xC001 8000 + .8byte 0x200064CF + .8byte 0x200068CF + .8byte 0x20006CCF + + .8byte 0x200070CF // access xC001 C000 + .8byte 0x200074CF + .8byte 0x200078CF + .8byte 0x20007CCF + + .8byte 0x200080CF // access xC002 0000 + .8byte 0x200084CF + .8byte 0x200088CF + .8byte 0x20008CCF + + \ No newline at end of file diff --git a/tests/coverage/tlbGLB.S b/tests/coverage/tlbGLB.S new file mode 100644 index 00000000..77e3a31c --- /dev/null +++ b/tests/coverage/tlbGLB.S @@ -0,0 +1,134 @@ +/////////////////////////////////////////// +// tlbGLB.S +// +// Written: mmendozamanriquez@hmc.edu 4 April 2023 +// nlimpert@hmc.edu +// +// Purpose: coverage for the global check. +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080080 // try making asid = 0. + csrw satp, t5 + + # sfence.vma x0, x0 + + # switch to supervisor mode + li a0, 1 + ecall + + li t0, 0xC0000000 + + li t2, 0 # i = 0 + li t5, 0 # j = 0 // now use as a counter for new asid loop + li t3, 32 # Max amount of Loops = 32 + +loop: bge t2, t3, nASID # exit loop if i >= loops + lw t1, 0(t0) + li t4, 0x1000 + add t0, t0, t4 + addi t2, t2, 1 + j loop + +nASID: bne t5, zero, finished + li a0, 3 // go + ecall + li t5, 0x9000100000080080 // try making asid = 1 + csrw satp, t5 + li a0, 1 + ecall + li t2, 0 + li t0, 0xC0000000 + li t5, 1 // make this not zero. + j loop + + +finished: + j done + +.data +.align 19 +# level 3 Page table situated at 0x8008 0000, should point to 8008,1000 +pagetable: + .8byte 0x200204C1 + +.align 12 // level 2 page table, contains direction to a gigapageg + .8byte 0x0 + .8byte 0x0 + .8byte 0x200000CF // gigapage that starts at 8000 0000 goes to C000 0000 + .8byte 0x200208C1 // pointer to next page table entry at 8008 2000 + +.align 12 // level 1 page table, points to level 0 page table + .8byte 0x20020CE1 + +.align 12 // level 0 page table, points to address C000 0000 // FOR NOW ALL OF THESE GO TO 8 instead of C cause they start with 2 + .8byte 0x200000CF // access xC000 0000 + .8byte 0x200004CF // access xC000 1000 + .8byte 0x200008CF // access xC000 2000 + .8byte 0x20000CCF // access xC000 3000 + + .8byte 0x200010EF // access xC000 4000 + .8byte 0x200014EF + .8byte 0x200018EF + .8byte 0x20001CEF + + .8byte 0x200020EF // access xC000 8000 + .8byte 0x200024EF + .8byte 0x200028EF + .8byte 0x20002CEF + + .8byte 0x200030EF // access xC000 C000 + .8byte 0x200034EF + .8byte 0x200038EF + .8byte 0x20003CEF + + .8byte 0x200040EF // access xC001 0000 + .8byte 0x200044EF + .8byte 0x200048EF + .8byte 0x20004CEF + + .8byte 0x200050EF // access xC001 4000 + .8byte 0x200054EF + .8byte 0x200058EF + .8byte 0x20005CEF + + .8byte 0x200060EF // access xC001 8000 + .8byte 0x200064EF + .8byte 0x200068EF + .8byte 0x20006CEF + + .8byte 0x200070EF // access xC001 C000 + .8byte 0x200074eF + .8byte 0x200078EF + .8byte 0x20007CEF + + .8byte 0x200080EF // access xC002 0000 + .8byte 0x200084EF + .8byte 0x200088EF + .8byte 0x20008CEF + + \ No newline at end of file diff --git a/tests/coverage/tlbMP.S b/tests/coverage/tlbMP.S new file mode 100644 index 00000000..6981d1f3 --- /dev/null +++ b/tests/coverage/tlbMP.S @@ -0,0 +1,163 @@ +/////////////////////////////////////////// +// tlbMP.S +// +// Written: mmendozamanriquez@hmc.edu 4 April 2023 +// nlimpert@hmc.edu +// +// Purpose: Test coverage for LSU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080010 + csrw satp, t5 + + # sfence.vma x0, x0 + + # switch to supervisor mode + li a0, 1 + ecall + li t5, 0 + li t0, 0x84000000 // go to first megapage + li t4, 0x1000 // put this outside the loop. + li t2, 0 # i = 0 + li t3, 32 # Max amount of Loops = 16 + +loop: bge t2, t3, lKP # exit loop if i >= loops + lw t1, 0(t0) + add t0, t0, t4 + addi t2, t2, 1 + j loop + +lKP: bne t5, zero, finished + li t0, 0x80000000 + slli t4, t4, 9 + addi t5, t5, 1 + li t2, 0 + j loop + +finished: + j done + +.data + +.align 16 +# Page table situated at 0x80010000 +pagetable: + .8byte 0x200044C1 + +.align 12 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + + +.align 12 // megapages starting at 8000 0000 going to 8480 0000 (32*2 MiB beyond that) + + .8byte 0x200000CF // access 8000,0000 + .8byte 0x200800CF // access 8020,0000 + .8byte 0x201000CF // acesss 8040,0000 + .8byte 0x201800CF // acesss 8060,0000 + + .8byte 0x202000CF // access 8080,0000 + .8byte 0x202800CF // access 80A0,0000 + .8byte 0x203000CF // access 80C0,0000 + .8byte 0x203800CF // access 80E0,0000 + + .8byte 0x204000CF // access 8100,0000 + .8byte 0x204800CF + .8byte 0x205000CF + .8byte 0x205800CF + + .8byte 0x206000CF // access 8180,0000 + .8byte 0x206800CF + .8byte 0x207000CF + .8byte 0x207800CF + + .8byte 0x208000CF // access 8200,0000 + .8byte 0x208800CF + .8byte 0x209000CF + .8byte 0x209800CF + + .8byte 0x20A000CF // access 8280,0000 + .8byte 0x20A800CF + .8byte 0x20B000CF + .8byte 0x20B800CF + + .8byte 0x20C000CF // access 8300,0000 + .8byte 0x20C800CF + .8byte 0x20D000CF + .8byte 0x20D800CF + + .8byte 0x20E000CF // access 8380,0000 + .8byte 0x20E800CF + .8byte 0x20F000CF + .8byte 0x20F800CF + + .8byte 0x20004CC1 + // Kilopage entry, for addresses from 8400, 0000 to 841F, FFFF + // point to ... + +.align 12 // should start at 84000000 + .8byte 0x210000CF + .8byte 0x210004CF + .8byte 0x210008CF + .8byte 0x21000CCF + + .8byte 0x210010CF + .8byte 0x210014CF + .8byte 0x210018CF + .8byte 0x21001CCF + + .8byte 0x210020CF + .8byte 0x210024CF + .8byte 0x210028CF + .8byte 0x21002CCF + + .8byte 0x210030CF + .8byte 0x210034CF + .8byte 0x210038CF + .8byte 0x21003CCF + + .8byte 0x210040CF + .8byte 0x210044CF + .8byte 0x210048CF + .8byte 0x21004CCF + + .8byte 0x210050CF + .8byte 0x210054CF + .8byte 0x210058CF + .8byte 0x21005CCF + + .8byte 0x210060CF + .8byte 0x210064CF + .8byte 0x210068CF + .8byte 0x21006CCF + + .8byte 0x210070CF + .8byte 0x210074CF + .8byte 0x210078CF + .8byte 0x21007CCF + From cf150a2ea9b798ad5a8632f018acf60765bf8d34 Mon Sep 17 00:00:00 2001 From: Noah Limpert Date: Thu, 20 Apr 2023 14:50:06 -0700 Subject: [PATCH 22/35] Add in a test that makes match 3 = 0 for all tlb lines --- testbench/tests.vh | 1 + tests/coverage/tlbM3.S | 155 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 tests/coverage/tlbM3.S diff --git a/testbench/tests.vh b/testbench/tests.vh index b86756b4..a450b057 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -60,6 +60,7 @@ string tvpaths[] = '{ "pmpcfg2", "tlbKP", "tlbMP", + "tlbM3", "tlbASID", "tlbGLB", "ifuCamlineWrite" diff --git a/tests/coverage/tlbM3.S b/tests/coverage/tlbM3.S new file mode 100644 index 00000000..ececa1f3 --- /dev/null +++ b/tests/coverage/tlbM3.S @@ -0,0 +1,155 @@ +/////////////////////////////////////////// +// tlbKP.S +// +// Written: mmendozamanriquez@hmc.edu 4 April 2023 +// nlimpert@hmc.edu +// +// Purpose: Test coverage for LSU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080010 + csrw satp, t5 + + # sfence.vma x0, x0 + + # switch to supervisor mode + li a0, 1 + ecall + + li t0, 0x1000 + + li t2, 0 # i = 0 + li t3, 64 # Max amount of Loops = 32 + li t4, 0x1000 + +loop: bge t2, t3, interim # exit loop if i >= loops + lw t1, 0(t0) + # sfence.vma x0, x0 + add t0, t0, t4 + addi t2, t2, 1 + j loop + +interim: + li t0, 0xFFFFFFFF000 + li t2, 0 # i = 0 + + +loop2:bge t2, t3, finished # exit loop if i >= loops + lw t1, 0(t0) + add t0, t0, t4 + addi t2, t2, 1 + j loop2 + +finished: + j done + +.data + +.align 16 +# Page table situated at 0x80010000 +pagetable: + .8byte 0x200044C1 // old page table was 200040 which just pointed to itself! wrong + +.align 12 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + + +.align 12 + .8byte 0x0000000020004CC1 + //.8byte 0x00000200800CF// ADD IN THE MEGAPAGE should 3 nibbles of zeros be removed? + +.align 12 + #80000000 + .8byte 0x200000CF + .8byte 0x200004CF + .8byte 0x200008CF + .8byte 0x20000CCF + + .8byte 0x200010CF + .8byte 0x200014CF + .8byte 0x200018CF + .8byte 0x20001CCF + + .8byte 0x200020CF + .8byte 0x200024CF + .8byte 0x200028CF + .8byte 0x20002CCF + + .8byte 0x200030CF + .8byte 0x200034CF + .8byte 0x200038CF + .8byte 0x20003CCF + + .8byte 0x200040CF + .8byte 0x200044CF + .8byte 0x200048CF + .8byte 0x20004CCF + + .8byte 0x200050CF + .8byte 0x200054CF + .8byte 0x200058CF + .8byte 0x20005CCF + + .8byte 0x200060CF + .8byte 0x200064CF + .8byte 0x200068CF + .8byte 0x20006CCF + + .8byte 0x200070CF + .8byte 0x200074CF + .8byte 0x200078CF + .8byte 0x20007CCF + + .8byte 0x200080CF + .8byte 0x200084CF + .8byte 0x200088CF + .8byte 0x20008CCF + + .8byte 0x200090CF + .8byte 0x200094CF + .8byte 0x200098CF + .8byte 0x20009CCF + + .8byte 0x2000A0CF + .8byte 0x2000A4CF + .8byte 0x2000A8CF + .8byte 0x2000ACCF + + .8byte 0x2000B0CF + .8byte 0x2000B4CF + .8byte 0x2000B8CF + .8byte 0x2000BCCF + + .8byte 0x2000C0CF + .8byte 0x2000C4CF + .8byte 0x2000C8CF + .8byte 0x2000CCCF + + .8byte 0x2000D0CF + .8byte 0x2000D4CF From c431278fe65593df593e958ef745603d0f0d360c Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 16:24:58 -0700 Subject: [PATCH 23/35] Fmv h/q comments in controller --- src/fpu/fctrl.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index b9584bc9..206cefbb 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -138,10 +138,10 @@ module fctrl ( endcase 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass - else if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w / fmv.x.d to int register - 7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w.x / fmv.d.x to fp reg + else if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q fp to int register + 7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) + ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x int to fp reg 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) From ca0269c0945a222480e1ae8a1e00ef11ff8b5d3c Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 16:25:19 -0700 Subject: [PATCH 24/35] Started fdivsqrtpreproc flow organization --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 4 +- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 57 ++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index f1ad32cd..4025a30c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrt.sv +// fdivsqrtcycles.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu // Modified: 18 April 2022 // -// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// Purpose: Determine number of cycles for divsqrt // // Documentation: RISC-V System on Chip Design Chapter 13 // diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 43a5e42b..04739ee8 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -63,6 +63,10 @@ module fdivsqrtpreproc ( logic AsE, BsE; // Signs of integer inputs logic [`XLEN-1:0] AE; // input A after W64 adjustment + ////////////////////////////////////////////////////// + // Integer Preprocessing + ////////////////////////////////////////////////////// + if (`IDIV_ON_FPU) begin:intpreproc // Int Supported logic [`XLEN-1:0] BE, PosA, PosB; @@ -90,13 +94,17 @@ module fdivsqrtpreproc ( // Select integer or floating point inputs mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX); mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD); - - + mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); end else begin // Int not supported assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}}; assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}}; + assign NumerZeroE = XZeroE; end + ////////////////////////////////////////////////////// + // Integer & FP leading zero and normalization shift + ////////////////////////////////////////////////////// + // count leading zeros for Subnorm FP and to normalize integer inputs lzc #(`DIVb) lzcX (IFX, ell); lzc #(`DIVb) lzcY (IFD, mE); @@ -105,17 +113,10 @@ module fdivsqrtpreproc ( assign XPreproc = (IFX << ell) << 1; assign DPreproc = (IFD << mE) << 1; - // append leading 1 (for nonzero inputs) - // shift square root to be in range [1/4, 1) - // Normalized numbers are shifted right by 1 if the exponent is odd - // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. - mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); - assign DivX = {3'b000, ~NumerZeroE, XPreproc}; + ////////////////////////////////////////////////////// + // Integer Right Shift to digit boundary + ////////////////////////////////////////////////////// - // Divisior register - flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); - - // ***CT: factor out fdivsqrtcycles if (`IDIV_ON_FPU) begin:intrightshift // Int Supported logic [`DIVBLEN:0] ZeroDiff, p; logic ALTBE; @@ -146,11 +147,6 @@ module fdivsqrtpreproc ( assign DivXShifted = DivX; end /* verilator lint_on WIDTH */ - - // Selet integer or floating-point operands - mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); - mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); - // pipeline registers flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); @@ -163,14 +159,39 @@ module fdivsqrtpreproc ( if (`XLEN==64) flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end else begin - assign NumerZeroE = XZeroE; assign X = PreShiftX; + assign ISpecialCaseE = 0; end + ////////////////////////////////////////////////////// + // Floating-Point Preprocessing + // append leading 1 (for nonzero inputs) + // shift square root to be in range [1/4, 1) + // Normalized numbers are shifted right by 1 if the exponent is odd + // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. + ////////////////////////////////////////////////////// + + mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + assign DivX = {3'b000, ~NumerZeroE, XPreproc}; + // Sqrt is initialized on step one as R(X-1), so depends on Radix if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); + + ////////////////////////////////////////////////////// + // Selet integer or floating-point operands + ////////////////////////////////////////////////////// + + mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + + // Divisior register + flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); + + + + + // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); From ea7c50e0ee68743b53997a655a4d74874e1ce4a4 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 16:38:47 -0700 Subject: [PATCH 25/35] Reordered fdivsqrtpreproc to follow logic --- src/fpu/fdivsqrt/fdivsqrt.sv | 6 ++-- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 6 ++-- src/fpu/fdivsqrt/fdivsqrtfsm.sv | 4 +-- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 53 ++++++++++++++++------------- 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index f4d46501..f7a44363 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -62,7 +62,7 @@ module fdivsqrt( logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection logic WZeroE; // Early termination flag - logic [`DURLEN-1:0] cycles; // FSM cycles + logic [`DURLEN-1:0] CyclesE; // FSM cycles logic SpecialCaseM; // Divide by zero, square root of negative, etc. logic DivStartE; // Enable signal for flops during stall @@ -76,7 +76,7 @@ module fdivsqrt( fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles, + .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .BZeroM, .nM, .mM, .AM, @@ -85,7 +85,7 @@ module fdivsqrt( fdivsqrtfsm fdivsqrtfsm( // FSM .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, - .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, // Int-specific .IDivStartE, .ISpecialCaseE, .IntDivE); diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 4025a30c..2e17cc25 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -33,7 +33,7 @@ module fdivsqrtcycles( input logic SqrtE, input logic IntDivE, input logic [`DIVBLEN:0] nE, - output logic [`DURLEN-1:0] cycles + output logic [`DURLEN-1:0] CyclesE ); logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits // DIVN = `NF+3 @@ -68,8 +68,8 @@ module fdivsqrtcycles( always_comb begin if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); - else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 5332087a..75010f74 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -39,7 +39,7 @@ module fdivsqrtfsm( input logic StallM, FlushE, input logic IntDivE, input logic ISpecialCaseE, - input logic [`DURLEN-1:0] cycles, + input logic [`DURLEN-1:0] CyclesE, output logic IFDivStartE, output logic FDivBusyE, FDivDoneE, output logic SpecialCaseM @@ -67,7 +67,7 @@ module fdivsqrtfsm( state <= #1 IDLE; end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE // end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE - step <= cycles; + step <= CyclesE; if (SpecialCaseE) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 04739ee8..a63fad82 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -43,7 +43,7 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, output logic ISpecialCaseE, - output logic [`DURLEN-1:0] cycles, + output logic [`DURLEN-1:0] CyclesE, output logic [`DIVBLEN:0] nM, mM, output logic NegQuotM, ALTBM, IntDivM, W64M, output logic AsM, BZeroM, @@ -62,6 +62,7 @@ module fdivsqrtpreproc ( logic NegQuotE; // Integer quotient is negative logic AsE, BsE; // Signs of integer inputs logic [`XLEN-1:0] AE; // input A after W64 adjustment + logic ALTBE; ////////////////////////////////////////////////////// // Integer Preprocessing @@ -113,13 +114,16 @@ module fdivsqrtpreproc ( assign XPreproc = (IFX << ell) << 1; assign DPreproc = (IFD << mE) << 1; + // *** CT: move to fdivsqrtintpreshift + ////////////////////////////////////////////////////// // Integer Right Shift to digit boundary + // Determine DivXShifted (X shifted to digit boundary) + // and nE (number of fractional digits) ////////////////////////////////////////////////////// if (`IDIV_ON_FPU) begin:intrightshift // Int Supported logic [`DIVBLEN:0] ZeroDiff, p; - logic ALTBE; // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros @@ -129,37 +133,24 @@ module fdivsqrtpreproc ( // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; - /* verilator lint_off WIDTH */ // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps if (`LOGRK > 0) begin // more than 1 bit per cycle logic [`LOGRK-1:0] IntTrunc, RightShiftX; logic [`DIVBLEN:0] TotalIntBits, IntSteps; - + /* verilator lint_off WIDTH */ assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting assign nE = p; assign DivXShifted = DivX; end - /* verilator lint_on WIDTH */ - // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); - flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); - flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); - flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); - flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); - flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); - flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); - if (`XLEN==64) - flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end else begin - assign X = PreShiftX; assign ISpecialCaseE = 0; end @@ -183,21 +174,35 @@ module fdivsqrtpreproc ( // Selet integer or floating-point operands ////////////////////////////////////////////////////// - mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + if (`IDIV_ON_FPU) begin + mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + end else begin + assign X = PreShiftX; + end // Divisior register flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); - - - - - // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles); + fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); + + if (`IDIV_ON_FPU) begin:intpipelineregs + // pipeline registers + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); + flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); + if (`XLEN==64) + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + end + endmodule From f9ca280e01a65cfa5246b36bee08751d2f568f54 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 16:48:23 -0700 Subject: [PATCH 26/35] continued cleanup --- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index a63fad82..b3c97c27 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -55,10 +55,10 @@ module fdivsqrtpreproc ( logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division - logic signedDiv; // signed division + logic SignedDivE; // signed division logic NegQuotE; // Integer quotient is negative logic AsE, BsE; // Signs of integer inputs logic [`XLEN-1:0] AE; // input A after W64 adjustment @@ -72,20 +72,20 @@ module fdivsqrtpreproc ( logic [`XLEN-1:0] BE, PosA, PosB; // Extract inputs, signs, zero, depending on W64 mode if applicable - assign signedDiv = ~Funct3E[0]; + assign SignedDivE = ~Funct3E[0]; // Source handling if (`XLEN==64) begin // 64-bit, supports W64 - mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & signedDiv}}, ForwardedSrcAE[31:0]}, W64E, AE); - mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & signedDiv}}, ForwardedSrcBE[31:0]}, W64E, BE); + mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE); + mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE); end else begin // 32 bits only assign AE = ForwardedSrcAE; assign BE = ForwardedSrcBE; end assign AZeroE = ~(|AE); assign BZeroE = ~(|BE); - assign AsE = AE[`XLEN-1] & signedDiv; - assign BsE = BE[`XLEN-1] & signedDiv; + assign AsE = AE[`XLEN-1] & SignedDivE; + assign BsE = BE[`XLEN-1] & SignedDivE; assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative // Force integer inputs to be postiive @@ -162,10 +162,10 @@ module fdivsqrtpreproc ( // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. ////////////////////////////////////////////////////// - mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); assign DivX = {3'b000, ~NumerZeroE, XPreproc}; // Sqrt is initialized on step one as R(X-1), so depends on Radix + mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); @@ -192,7 +192,7 @@ module fdivsqrtpreproc ( if (`IDIV_ON_FPU) begin:intpipelineregs // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); From e11212598fbc5b651d73d92545480749148812fa Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 17:35:01 -0700 Subject: [PATCH 27/35] fdivsqrt cleanup --- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index b3c97c27..3de4b252 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -50,7 +50,7 @@ module fdivsqrtpreproc ( output logic [`XLEN-1:0] AM ); - logic [`DIVb-1:0] XPreproc, DPreproc; + logic [`DIVb-1:0] Xfract, Dfract; logic [`DIVb:0] PreSqrtX; logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) @@ -111,8 +111,8 @@ module fdivsqrtpreproc ( lzc #(`DIVb) lzcY (IFD, mE); // Normalization shift: shift off leading one - assign XPreproc = (IFX << ell) << 1; - assign DPreproc = (IFD << mE) << 1; + assign Xfract = (IFX << ell) << 1; + assign Dfract = (IFD << mE) << 1; // *** CT: move to fdivsqrtintpreshift @@ -154,6 +154,8 @@ module fdivsqrtpreproc ( assign ISpecialCaseE = 0; end + // CT *** fdivsqrtfplead1 + ////////////////////////////////////////////////////// // Floating-Point Preprocessing // append leading 1 (for nonzero inputs) @@ -162,10 +164,10 @@ module fdivsqrtpreproc ( // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. ////////////////////////////////////////////////////// - assign DivX = {3'b000, ~NumerZeroE, XPreproc}; + assign DivX = {3'b000, ~NumerZeroE, Xfract}; // Sqrt is initialized on step one as R(X-1), so depends on Radix - mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); @@ -181,7 +183,7 @@ module fdivsqrtpreproc ( end // Divisior register - flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); + flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D); // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); From 2ed9384238f48468220064c12f5587bb216f531c Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 21 Apr 2023 20:43:37 -0700 Subject: [PATCH 28/35] pmpcfg test cases Increased IFU coverage from 83.37% to 83.53% and LSU coverage from 93.14% to 93.28%. --- tests/coverage/pmpcfg.S | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S index 5b3e37b5..fd838041 100644 --- a/tests/coverage/pmpcfg.S +++ b/tests/coverage/pmpcfg.S @@ -1,6 +1,6 @@ // pmpcfg part 1 // Kevin Wan, kewan@hmc.edu, 4/18/2023 -// Liam Chalk, lchalk@hmc.edu, 4/19/2023 +// Liam Chalk, lchalk@hmc.edu, 4/21/2023 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. // See the next part in pmpcfg1.S @@ -19,32 +19,37 @@ main: li t0, 0x90000000 csrw pmpaddr0, t0 - li t0, 0x00000017 + li t0, 0x00001700 + csrw pmpcfg0, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00001700 csrw pmpcfg1, t0 li t0, 0x90000000 csrw pmpaddr0, t0 - li t0, 0x00000017 + li t0, 0x00001700 csrw pmpcfg2, t0 li t0, 0x90000000 csrw pmpaddr0, t0 - li t0, 0x00000017 + li t0, 0x00001700 csrw pmpcfg3, t0 li t0, 0x90000000 csrw pmpaddr1, t0 - li t0, 0x00000017 + li t0, 0x00001700 csrw pmpcfg1, t0 li t0, 0x90000000 - csrw pmpaddr1, t0 - li t0, 0x00000017 + csrw pmpaddr2, t0 + li t0, 0x00001700 csrw pmpcfg2, t0 li t0, 0x90000000 - csrw pmpaddr1, t0 - li t0, 0x00000017 + csrw pmpaddr3, t0 + li t0, 0x00001700 csrw pmpcfg3, t0 li t0, 0x8800000000000000 From 87aff3dcc7e0cb5f9e6092a7f8ad20b490ec2ecb Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 09:38:14 -0700 Subject: [PATCH 29/35] test plan update --- docs/testplans/testplan.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/testplans/testplan.md b/docs/testplans/testplan.md index a25b3a18..37390a63 100644 --- a/docs/testplans/testplan.md +++ b/docs/testplans/testplan.md @@ -1,6 +1,29 @@ -# CORE-V Wally Test Plan +# CORE-V Wally Design Verification Test Plan + +CORE-V Wally is functionally tested in the following ways. Each test is run in lock-step against ImperasDV to ensure all architectural state is correct after each instruction. + +| Functions | Coverage Method | Status | +| ----------- | ----------- |----| +| Instructions | riscv-arch-test | Pass | +| Privileged Unit | wally-riscv-arch-test | Pass | +| Virtual Memory | wally-riscv-arch-test | Pass | +| PMP | wally-riscv-arch-test | Pass +| Peripherals | wally-riscv-arch-test | Pass | +| Floating-Point | TestFloat | Pass | +| General | Code Coverage | 91% | +| General | Boot Linux in Sim | Pass | +| General | Boot Linux on FPGA | Pass | + + +The following performance validation is also run: +| Function | Method | Status | +| --- | --- | --- | +| Overall Performance | embench | Pass| +| Overall Performance | coremark | Pass | +| Branch Predictor | *** | Pass | +| Cache Miss Rate | *** | Pass | + -CORE-V Wally is tested in the following ways: * Run [RISC-V Architecture Compatibility Tests](https://github.com/riscv-non-isa/riscv-arch-test) in lock-step against the ImperasDV reference model. * Run custom tests to cover virtual memory, PMP, privileged unit, and peripherals in lock step against ImperasDV. From 0871bbe8f20143bcf3de3010bc8162b44905a80c Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 10:07:48 -0700 Subject: [PATCH 30/35] Fixted syntax error in exclusion. Arbitrarily picked -e 1; fix if this isn't right --- src/cache/cachefsm.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 34f1778f..544e3454 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -159,7 +159,7 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( assign SelFlush = (CurrState == STATE_READY & FlushCache) | (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_WRITEBACK); - // coverage off -item e -fecexprrow 1 + // coverage off -item e 1 -fecexprrow 1 // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck) assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty); From 8be5ed9b675d11f985a21361b1d256941b3abf92 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 12:22:45 -0700 Subject: [PATCH 31/35] Attempted to cause interrupt during fdivsqrt. Fixed enabling fpu in fpu.S. Fdivsqrt exclusions for coverage. --- sim/coverage-exclusions-rv64gc.do | 7 +++++-- src/fpu/fdivsqrt/fdivsqrtfsm.sv | 3 ++- tests/coverage/WALLY-init-lib.h | 3 +++ tests/coverage/fpu.S | 24 ++++++++++++++++++++++-- 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 4f90333a..45d98a72 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -31,11 +31,14 @@ do GetLineNum.do # LZA (i<64) statement confuses coverage tool -# This is ugly to exlcude the whole file - is there a better option? // coverage off isn't working +# DH 4/22/23: Exclude all LZAs coverage exclude -srcfile lzc.sv -# FDIVSQRT has +# DH 4/22/23: FDIVSQRT can't go directly from done to busy again coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY +# DH 4/22/23: The busy->idle transition only occurs if a FlushE occurs while the divider is busy. The flush is caused by a trap or return, +# which won't happen while the divider is busy. +coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state BUSY->IDLE ### Exclude D$ states and logic for the I$ instance # This is cleaner than trying to set an I$-specific pragma in cachefsm.sv (which would exclude it for the D$ instance too) diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 75010f74..d1d9dda1 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -63,10 +63,11 @@ module fdivsqrtfsm( flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc always_ff @(posedge clk) begin + // coverage off: dh 4/22/23 FlushE doesn't seem to happen while fdivsqrt is busy if (reset | FlushE) begin + // coverage on state <= #1 IDLE; end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE -// end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE step <= CyclesE; if (SpecialCaseE) state <= #1 DONE; else state <= #1 BUSY; diff --git a/tests/coverage/WALLY-init-lib.h b/tests/coverage/WALLY-init-lib.h index 6b6dd6dd..ec179a0d 100644 --- a/tests/coverage/WALLY-init-lib.h +++ b/tests/coverage/WALLY-init-lib.h @@ -63,6 +63,9 @@ trap_handler: bgez t0, exception # if msb is clear, it is an exception interrupt: # must be a timer interrupt + li t0, -1 # set mtimecmp to biggest number so it doesnt interrupt again + li t1, 0x02004000 # MTIMECMP in CLINT + sd t0, 0(t1) j trap_return # clean up and return exception: diff --git a/tests/coverage/fpu.S b/tests/coverage/fpu.S index b2a52be0..87998089 100644 --- a/tests/coverage/fpu.S +++ b/tests/coverage/fpu.S @@ -28,7 +28,7 @@ main: - #bseti t0, zero, 14 # turn on FPU + bseti t0, zero, 14 # turn on FPU csrs mstatus, t0 #Pull denormalized FP number from memory and pass it to fclass.S for coverage @@ -105,6 +105,25 @@ main: # fcvt.w.q a0, ft0 # fcvt.q.d ft3, ft0 + // fdivsqrt: test busy->idle transition caused by a FlushE while divider is busy (when interrupt arrives) + // This code doesn't actually trigger a busy->idle transition because the pending timer interrupt doesn't occur until the division finishes. + li t0, 0x3F812345 # random value slightly bigger than 1 + li t1, 0x3F823456 + fmv.w.x ft0, t0 # move int to fp register + fmv.w.x ft1, t1 + li t0, -1 # set mtimecmp to biggest number so it doesnt interrupt again + li t1, 0x02004000 # MTIMECMP in CLINT + sd t0, 0(t1) + csrsi mstatus, 0b1000 # enable interrupts with mstatus.MIE + li t1, 0x0200bff8 # read MTIME in CLINT + ld t0, 0(t1) + addi t0, t0, 11 + li t1, 0x02004000 # MTIMECMP in CLINT + sd t0, 0(t1) # write mtime+10 to cause interrupt soon This is very touchy timing and is sensitive to cache line fetch latency + nop + fdiv.s ft2, ft1, ft0 # should get interrupted, triggering a flush + csrci mstatus, 0b1000 # disable interrupts with mstatus.MIE + # Completing branch coverage in fctrl.sv .word 0x38007553 // Testing the all False case for 119 - funct7 under, op = 101 0011 .word 0x40000053 // Line 145 All False Test case - illegal instruction? @@ -145,4 +164,5 @@ TestData2: .word 0x7f800000 #INF .int 0xbf800000 #FP -1.0 .int 0x7fa00000 #SNaN -.int 0x3fffffff #OverFlow Test \ No newline at end of file +.int 0x3fffffff #OverFlow Test +DivTestData: From a5b80bc4402c4bedf7a5891535d3e3a5b43c9cea Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 15:27:05 -0700 Subject: [PATCH 32/35] Removed unproven fdivsqrt exclusion --- src/fpu/fdivsqrt/fdivsqrtfsm.sv | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index d1d9dda1..ba0758ee 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -63,9 +63,7 @@ module fdivsqrtfsm( flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc always_ff @(posedge clk) begin - // coverage off: dh 4/22/23 FlushE doesn't seem to happen while fdivsqrt is busy if (reset | FlushE) begin - // coverage on state <= #1 IDLE; end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE step <= CyclesE; From 1d532dfcfc9802f24f41368920be24ba3c3679aa Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 15:32:39 -0700 Subject: [PATCH 33/35] Fault on writes to odd-numbered PMPCFG in RV64 --- src/privileged/csrm.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/privileged/csrm.sv b/src/privileged/csrm.sv index f0e5f00d..fb519be3 100644 --- a/src/privileged/csrm.sv +++ b/src/privileged/csrm.sv @@ -171,7 +171,8 @@ module csrm #(parameter IllegalCSRMAccessM = !(`S_SUPPORTED) & (CSRAdrM == MEDELEG | CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode if (CSRAdrM >= PMPADDR0 & CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry CSRMReadValM = {{(`XLEN-(`PA_BITS-2)){1'b0}}, PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]}; - else if (CSRAdrM >= PMPCFG0 & CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin + else if (CSRAdrM >= PMPCFG0 & CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4 & (`XLEN==32 | CSRAdrM[0] == 0)) begin + // only odd-numbered PMPCFG entries exist in RV64 if (`XLEN==64) begin entry = ({CSRAdrM[11:1], 1'b0} - PMPCFG0)*4; // disregard odd entries in RV64 CSRMReadValM = {PMPCFG_ARRAY_REGW[entry+7],PMPCFG_ARRAY_REGW[entry+6],PMPCFG_ARRAY_REGW[entry+5],PMPCFG_ARRAY_REGW[entry+4], From c6817892968c9484c9f98e38bfab2e664473b35d Mon Sep 17 00:00:00 2001 From: Diego Herrera Vicioso Date: Mon, 24 Apr 2023 02:06:53 -0700 Subject: [PATCH 34/35] Excluded coverage for impossible cases in wficountreg and status.MPRV --- sim/coverage-exclusions-rv64gc.do | 6 +++++- src/privileged/csrsr.sv | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 45d98a72..b4441e5b 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -127,4 +127,8 @@ coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$lin set line [GetLineNum ../src/mmu/pmachecker.sv "WriteAccessM \\| ExecuteAccessF"] coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-5 set line [GetLineNum ../src/mmu/pmachecker.sv "ReadAccessM \\| ExecuteAccessF"] -coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-3 \ No newline at end of file +coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-3 + +# Excluding reset and clear for impossible case in the wficountreg in privdec +set line [GetLineNum ../src/generic/flop/floprc.sv "reset \\| clear"] +coverage exclude -scope /dut/core/priv/priv/pmd/wfi/wficountreg -linerange $line-$line -item c 1 -feccondrow 2 diff --git a/src/privileged/csrsr.sv b/src/privileged/csrsr.sv index 60968a68..61a6f324 100644 --- a/src/privileged/csrsr.sv +++ b/src/privileged/csrsr.sv @@ -122,7 +122,10 @@ module csrsr ( logic [1:0] EndiannessPrivMode; always_comb begin if (SelHPTW) EndiannessPrivMode = `S_MODE; + //coverage off -item c 1 -feccondrow 1 + // status.MPRV always gets reset upon leaving machine mode, so MPRV will never be high when out of machine mode else if (PrivilegeModeW == `M_MODE & STATUS_MPRV) EndiannessPrivMode = STATUS_MPP; + //coverage on else EndiannessPrivMode = PrivilegeModeW; case (EndiannessPrivMode) From 309a56b8f85c46eb802d74b47c8b19f8f592c9b8 Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 25 Apr 2023 15:37:04 -0700 Subject: [PATCH 35/35] pmpaddr0 and pmpaddr2 test cases Writing 0x00170000 and 0x17000000 to pmpaddr0 and pmpaddr2. Increased IFU coverage from 83.53% to 83.68% and LSU coverage from 93.29% to 93.45%. --- tests/coverage/pmpcfg.S | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S index fd838041..bcc8f395 100644 --- a/tests/coverage/pmpcfg.S +++ b/tests/coverage/pmpcfg.S @@ -1,6 +1,6 @@ // pmpcfg part 1 // Kevin Wan, kewan@hmc.edu, 4/18/2023 -// Liam Chalk, lchalk@hmc.edu, 4/21/2023 +// Liam Chalk, lchalk@hmc.edu, 4/25/2023 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. // See the next part in pmpcfg1.S @@ -52,6 +52,26 @@ main: li t0, 0x00001700 csrw pmpcfg3, t0 + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00170000 + csrw pmpcfg0, t0 + + li t0, 0x90000000 + csrw pmpaddr2, t0 + li t0, 0x00170000 + csrw pmpcfg2, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x17000000 + csrw pmpcfg0, t0 + + li t0, 0x90000000 + csrw pmpaddr2, t0 + li t0, 0x17000000 + csrw pmpcfg2, t0 + li t0, 0x8800000000000000 csrw pmpcfg2, t0 li t0, 0x88000000000000