From f22b6e739724db43bc69ffee4f474b8862403925 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 18 Jul 2021 17:28:25 -0400 Subject: [PATCH] Added FLEN, NE, NF to config and started using these in FMA1 --- wally-pipelined/config/shared/wally-shared.vh | 8 ++- wally-pipelined/src/fpu/fma.sv | 61 +++++++++++-------- wally-pipelined/src/mmu/hptw.sv | 14 +++-- 3 files changed, 52 insertions(+), 31 deletions(-) diff --git a/wally-pipelined/config/shared/wally-shared.vh b/wally-pipelined/config/shared/wally-shared.vh index 67727f223..c3709a563 100644 --- a/wally-pipelined/config/shared/wally-shared.vh +++ b/wally-pipelined/config/shared/wally-shared.vh @@ -26,12 +26,14 @@ `include "wally-constants.vh" // macros to define supported modes +// NOTE: No hardware support fo Q yet `define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1) `define D_SUPPORTED ((`MISA >> 3) % 2 == 1) `define F_SUPPORTED ((`MISA >> 5) % 2 == 1) `define M_SUPPORTED ((`MISA >> 12) % 2 == 1) +`define Q_SUPPORTED ((`MISA >> 16) % 2 == 1) `define S_SUPPORTED ((`MISA >> 18) % 2 == 1) `define U_SUPPORTED ((`MISA >> 20) % 2 == 1) @@ -44,8 +46,12 @@ `define LOG_XLEN (`XLEN == 32 ? 5 : 6) // Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries) -`define PMPCFG_ENTRIES (`PMP_ENTRIES\8) +`define PMPCFG_ENTRIES (`PMP_ENTRIES/8) +// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits +`define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32) +`define NE (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8) +`define NF (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23) // Disable spurious Verilator warnings diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index 5bf7785e1..b73d95d42 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -1,3 +1,29 @@ +/////////////////////////////////////////// +// +// Written: Katherine Parry, David Harris +// Modified: 6/23/2021 +// +// Purpose: Floating point multiply-accumulate of configurable size +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + module fma( input logic clk, input logic reset, @@ -54,25 +80,24 @@ endmodule module fma1( // input logic XSgnE, YSgnE, ZSgnE, - input logic [10:0] XExpE, YExpE, ZExpE, - input logic [51:0] XFracE, YFracE, ZFracE, + input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format + input logic [`NF-1:0] XFracE, YFracE, ZFracE, // fractions in U(0.NF) format] input logic XAssumed1E, YAssumed1E, ZAssumed1E, input logic XDenormE, YDenormE, ZDenormE, input logic XZeroE, YZeroE, ZZeroE, - input logic [10:0] BiasE, + input logic [`NE-1:0] BiasE, input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic FmtE, // precision 1 = double 0 = single - output logic [105:0] ProdManE, // 1.X frac * 1.Y frac - output logic [161:0] AlignedAddendE, // Z aligned for addition - output logic [12:0] ProdExpE, // X exponent + Y exponent - bias + output logic [2*`NF+1:0] ProdManE, // 1.X frac * 1.Y frac in U(2.2Nf) format + output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in *** format + output logic [`NE+1:0] ProdExpE, // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign output logic AddendStickyE, // sticky bit that is calculated during alignment output logic KillProdE // set the product to zero before addition if the product is too small to matter ); - logic [12:0] AlignCnt; // how far to shift the addend to align with the product - logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit - logic [213:0] ZManPreShifted; // input to the alignment shifter - + logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format *** is this enough bits? + logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bit + logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter /////////////////////////////////////////////////////////////////////////////// // Calculate the product @@ -83,20 +108,13 @@ module fma1( /////////////////////////////////////////////////////////////////////////////// // verilator lint_off WIDTH - assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : + assign ProdExpE = (XZeroE|YZeroE) ? 0 : XExpE + YExpE - BiasE + XDenormE + YDenormE; + // verilator lint_on WIDTH // Calculate the product's mantissa // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE}; - - - - - - - - /////////////////////////////////////////////////////////////////////////////// // Alignment shifter @@ -108,8 +126,6 @@ module fma1( // - Denormal numbers have an an exponent value of 1, however they are // represented with an exponent of 0. add one to the exponent if it is a denormal number assign AlignCnt = ProdExpE - ZExpE - ZDenormE; - // verilator lint_on WIDTH - // Defualt Addition without shifting // | 55'b0 | 106'b(product) | 2'b0 | @@ -160,10 +176,7 @@ module fma1( end end - - assign AlignedAddendE = ZManShifted[213:52]; - endmodule diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index ec8b050ea..e2be1dc1a 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -57,7 +57,7 @@ module hptw logic [`PPN_BITS-1:0] CurrentPPN; logic MemWrite; logic Executable, Writable, Readable, Valid; - logic MegapageMisaligned, GigapageMisaligned, TerapageMisaligned; + logic Misaligned, MegapageMisaligned; logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE; logic StartWalk; logic TLBMiss; @@ -144,20 +144,22 @@ module hptw // Initial state and misalignment for RV32/64 if (`XLEN == 32) begin assign InitialWalkerState = LEVEL1_SET_ADR; - assign TerapageMisaligned = 0; // not applicable - assign GigapageMisaligned = 0; // not applicable assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0 + assign Misaligned = ((WalkerState == LEVEL1) & MegapageMisaligned); end else begin + logic GigapageMisaligned, TerapageMisaligned; assign InitialWalkerState = (SvMode == `SV48) ? LEVEL3_SET_ADR : LEVEL2_SET_ADR; assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0 assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0 assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0 + assign Misaligned = ((WalkerState == LEVEL3) & TerapageMisaligned) | ((WalkerState == LEVEL2) & GigapageMisaligned) | ((WalkerState == LEVEL1) & MegapageMisaligned); end // Page Table Walker FSM // If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states // to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the // HPTW as shown below to keep the D$ setup time out of the critical path. + // *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead. flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); always_comb case (WalkerState) @@ -166,19 +168,19 @@ module hptw LEVEL3_SET_ADR: NextWalkerState = LEVEL3_READ; LEVEL3_READ: if (HPTWStall) NextWalkerState = LEVEL3_READ; else NextWalkerState = LEVEL3; - LEVEL3: if (ValidLeafPTE && ~TerapageMisaligned) NextWalkerState = LEAF; + LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; else if (ValidNonLeafPTE) NextWalkerState = LEVEL2_SET_ADR; else NextWalkerState = FAULT; LEVEL2_SET_ADR: NextWalkerState = LEVEL2_READ; LEVEL2_READ: if (HPTWStall) NextWalkerState = LEVEL2_READ; else NextWalkerState = LEVEL2; - LEVEL2: if (ValidLeafPTE && ~GigapageMisaligned) NextWalkerState = LEAF; + LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; else if (ValidNonLeafPTE) NextWalkerState = LEVEL1_SET_ADR; else NextWalkerState = FAULT; LEVEL1_SET_ADR: NextWalkerState = LEVEL1_READ; LEVEL1_READ: if (HPTWStall) NextWalkerState = LEVEL1_READ; else NextWalkerState = LEVEL1; - LEVEL1: if (ValidLeafPTE && ~MegapageMisaligned) NextWalkerState = LEAF; + LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF; else if (ValidNonLeafPTE) NextWalkerState = LEVEL0_SET_ADR; else NextWalkerState = FAULT; LEVEL0_SET_ADR: NextWalkerState = LEVEL0_READ;