Added FLEN, NE, NF to config and started using these in FMA1

2025-02-11 06:05:49 +00:00 · 2021-07-18 17:28:25 -04:00 · 2021-07-18 17:28:25 -04:00 · f22b6e7397
commit f22b6e7397
parent e31d2ef9f5
3 changed files with 52 additions and 31 deletions
--- a/wally-pipelined/config/shared/wally-shared.vh
+++ b/wally-pipelined/config/shared/wally-shared.vh
@ -26,12 +26,14 @@
 `include "wally-constants.vh"

 // macros to define supported modes
+// NOTE: No hardware support fo Q yet

 `define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
 `define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
 `define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
 `define F_SUPPORTED ((`MISA >> 5) % 2 == 1)
 `define M_SUPPORTED ((`MISA >> 12) % 2 == 1)
+`define Q_SUPPORTED ((`MISA >> 16) % 2 == 1)
 `define S_SUPPORTED ((`MISA >> 18) % 2 == 1)
 `define U_SUPPORTED ((`MISA >> 20) % 2 == 1)

@ -44,8 +46,12 @@
 `define LOG_XLEN (`XLEN == 32 ? 5 : 6)

 // Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
-`define PMPCFG_ENTRIES (`PMP_ENTRIES\8)
+`define PMPCFG_ENTRIES (`PMP_ENTRIES/8)

+// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
+`define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
+`define NE   (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
+`define NF   (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)

 // Disable spurious Verilator warnings

--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@ -1,3 +1,29 @@
+///////////////////////////////////////////
+//
+// Written: Katherine Parry, David Harris
+// Modified: 6/23/2021
+//
+// Purpose: Floating point multiply-accumulate of configurable size
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-config.vh"
+
 module fma(
    input logic             clk,
    input logic             reset,
@ -54,25 +80,24 @@ endmodule

 module fma1(
    // input logic        XSgnE, YSgnE, ZSgnE,
-    input logic [10:0] XExpE, YExpE, ZExpE,
-    input logic [51:0] XFracE, YFracE, ZFracE,
+    input logic [`NE-1:0] XExpE, YExpE, ZExpE,      // biased exponents in B(NE.0) format
+    input logic [`NF-1:0] XFracE, YFracE, ZFracE,   // fractions in U(0.NF) format]
    input logic        XAssumed1E, YAssumed1E, ZAssumed1E,
    input logic        XDenormE, YDenormE, ZDenormE,
    input logic XZeroE, YZeroE, ZZeroE,
-    input logic [10:0] BiasE,
+    input logic [`NE-1:0] BiasE,
    input logic     [2:0]       FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic                 FmtE,       // precision 1 = double 0 = single
-    output logic    [105:0]     ProdManE,   // 1.X frac * 1.Y frac
-    output logic    [161:0]     AlignedAddendE, // Z aligned for addition
-    output logic    [12:0]      ProdExpE,       // X exponent + Y exponent - bias
+    output logic    [2*`NF+1:0]     ProdManE,   // 1.X frac * 1.Y frac in U(2.2Nf) format
+    output logic    [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition in *** format
+    output logic    [`NE+1:0]      ProdExpE,       // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
    output logic                AddendStickyE,  // sticky bit that is calculated during alignment
    output logic                KillProdE      // set the product to zero before addition if the product is too small to matter
    );

-    logic [12:0]    AlignCnt;           // how far to shift the addend to align with the product
-    logic [213:0]   ZManShifted;                // output of the alignment shifter including sticky bit
-    logic [213:0]   ZManPreShifted;     // input to the alignment shifter
-    
+    logic [`NE+1:0]    AlignCnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format *** is this enough bits?
+    logic [4*`NF+5:0]   ZManShifted;                // output of the alignment shifter including sticky bit
+    logic [4*`NF+5:0]   ZManPreShifted;     // input to the alignment shifter
    
    ///////////////////////////////////////////////////////////////////////////////
    // Calculate the product
@ -83,20 +108,13 @@ module fma1(
    ///////////////////////////////////////////////////////////////////////////////
   
    // verilator lint_off WIDTH
-    assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
+    assign ProdExpE = (XZeroE|YZeroE) ? 0 :
                 XExpE + YExpE - BiasE + XDenormE + YDenormE;
+    // verilator lint_on WIDTH

    // Calculate the product's mantissa
    //      - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
    assign ProdManE =  {XAssumed1E, XFracE} * {YAssumed1E, YFracE};
-
-
-
-
-
-
-
-
   
    ///////////////////////////////////////////////////////////////////////////////
    // Alignment shifter
@ -108,8 +126,6 @@ module fma1(
    //      - Denormal numbers have an an exponent value of 1, however they are
    //        represented with an exponent of 0. add one to the exponent if it is a denormal number
    assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
-    // verilator lint_on WIDTH
-

    // Defualt Addition without shifting
    //          |   55'b0    |  106'b(product)  | 2'b0 |
@ -160,10 +176,7 @@ module fma1(

        end
    end
-
-   
    assign AlignedAddendE = ZManShifted[213:52];
-
 endmodule


--- a/wally-pipelined/src/mmu/hptw.sv
+++ b/wally-pipelined/src/mmu/hptw.sv
@ -57,7 +57,7 @@ module hptw
      logic [`PPN_BITS-1:0]	    CurrentPPN;
      logic			    MemWrite;
      logic			    Executable, Writable, Readable, Valid;
-	  logic 			MegapageMisaligned, GigapageMisaligned, TerapageMisaligned;
+	  logic 			Misaligned, MegapageMisaligned;
      logic			    ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE;
      logic			    StartWalk;
 	  logic     		TLBMiss;
@ -144,20 +144,22 @@ module hptw
 	  // Initial state and misalignment for RV32/64
 	  if (`XLEN == 32) begin
 		assign InitialWalkerState = LEVEL1_SET_ADR;
-		assign TerapageMisaligned = 0; // not applicable
-		assign GigapageMisaligned = 0; // not applicable
 		assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
+		assign Misaligned = ((WalkerState == LEVEL1) & MegapageMisaligned);
 	  end else begin
+		logic  GigapageMisaligned, TerapageMisaligned;
 		assign InitialWalkerState = (SvMode == `SV48) ? LEVEL3_SET_ADR : LEVEL2_SET_ADR;
 		assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
 		assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
 		assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0		  
+		assign Misaligned = ((WalkerState == LEVEL3) & TerapageMisaligned) | ((WalkerState == LEVEL2) & GigapageMisaligned) | ((WalkerState == LEVEL1) & MegapageMisaligned);
 	  end

    // Page Table Walker FSM
 	// If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states
 	// to decrease the latency of the HPTW.  However, if the D$ is a cycle limiter, it's better to leave the
 	// HPTW as shown below to keep the D$ setup time out of the critical path.
+	// *** Is this really true.  Talk with Ross.  Seems like it's the next state logic on critical path instead.
 	flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); 
 	always_comb 
 	  case (WalkerState)
@ -166,19 +168,19 @@ module hptw
 	    LEVEL3_SET_ADR: 			NextWalkerState = LEVEL3_READ;
 	    LEVEL3_READ: if (HPTWStall) NextWalkerState = LEVEL3_READ;
 	                else 			NextWalkerState = LEVEL3;
-	    LEVEL3: if (ValidLeafPTE && ~TerapageMisaligned) NextWalkerState = LEAF;
+	    LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
 		  		else if (ValidNonLeafPTE) NextWalkerState = LEVEL2_SET_ADR;
 		 		else 				NextWalkerState = FAULT;
 	    LEVEL2_SET_ADR: 			NextWalkerState = LEVEL2_READ;
 	    LEVEL2_READ: if (HPTWStall) NextWalkerState = LEVEL2_READ;
 	      			else 			NextWalkerState = LEVEL2;
-	    LEVEL2: if (ValidLeafPTE && ~GigapageMisaligned) NextWalkerState = LEAF;
+	    LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
 				else if (ValidNonLeafPTE) NextWalkerState = LEVEL1_SET_ADR;
 				else 				NextWalkerState = FAULT;
 	    LEVEL1_SET_ADR: 			NextWalkerState = LEVEL1_READ;
 	    LEVEL1_READ: if (HPTWStall) NextWalkerState = LEVEL1_READ;
 	      			else 			NextWalkerState = LEVEL1;
-	    LEVEL1: if (ValidLeafPTE && ~MegapageMisaligned) NextWalkerState = LEAF;
+	    LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
 	      		else if (ValidNonLeafPTE) NextWalkerState = LEVEL0_SET_ADR;
 				else 				NextWalkerState = FAULT;
 	    LEVEL0_SET_ADR: 			NextWalkerState = LEVEL0_READ;