Added FLEN, NE, NF to config and started using these in FMA1

This commit is contained in:
David Harris 2021-07-18 17:28:25 -04:00
parent e31d2ef9f5
commit f22b6e7397
3 changed files with 52 additions and 31 deletions

View File

@ -26,12 +26,14 @@
`include "wally-constants.vh" `include "wally-constants.vh"
// macros to define supported modes // macros to define supported modes
// NOTE: No hardware support fo Q yet
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1) `define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
`define F_SUPPORTED ((`MISA >> 5) % 2 == 1) `define F_SUPPORTED ((`MISA >> 5) % 2 == 1)
`define M_SUPPORTED ((`MISA >> 12) % 2 == 1) `define M_SUPPORTED ((`MISA >> 12) % 2 == 1)
`define Q_SUPPORTED ((`MISA >> 16) % 2 == 1)
`define S_SUPPORTED ((`MISA >> 18) % 2 == 1) `define S_SUPPORTED ((`MISA >> 18) % 2 == 1)
`define U_SUPPORTED ((`MISA >> 20) % 2 == 1) `define U_SUPPORTED ((`MISA >> 20) % 2 == 1)
@ -44,8 +46,12 @@
`define LOG_XLEN (`XLEN == 32 ? 5 : 6) `define LOG_XLEN (`XLEN == 32 ? 5 : 6)
// Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries) // Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
`define PMPCFG_ENTRIES (`PMP_ENTRIES\8) `define PMPCFG_ENTRIES (`PMP_ENTRIES/8)
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
`define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
`define NE (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
`define NF (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
// Disable spurious Verilator warnings // Disable spurious Verilator warnings

View File

@ -1,3 +1,29 @@
///////////////////////////////////////////
//
// Written: Katherine Parry, David Harris
// Modified: 6/23/2021
//
// Purpose: Floating point multiply-accumulate of configurable size
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module fma( module fma(
input logic clk, input logic clk,
input logic reset, input logic reset,
@ -54,25 +80,24 @@ endmodule
module fma1( module fma1(
// input logic XSgnE, YSgnE, ZSgnE, // input logic XSgnE, YSgnE, ZSgnE,
input logic [10:0] XExpE, YExpE, ZExpE, input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
input logic [51:0] XFracE, YFracE, ZFracE, input logic [`NF-1:0] XFracE, YFracE, ZFracE, // fractions in U(0.NF) format]
input logic XAssumed1E, YAssumed1E, ZAssumed1E, input logic XAssumed1E, YAssumed1E, ZAssumed1E,
input logic XDenormE, YDenormE, ZDenormE, input logic XDenormE, YDenormE, ZDenormE,
input logic XZeroE, YZeroE, ZZeroE, input logic XZeroE, YZeroE, ZZeroE,
input logic [10:0] BiasE, input logic [`NE-1:0] BiasE,
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac output logic [2*`NF+1:0] ProdManE, // 1.X frac * 1.Y frac in U(2.2Nf) format
output logic [161:0] AlignedAddendE, // Z aligned for addition output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in *** format
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias output logic [`NE+1:0] ProdExpE, // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
output logic AddendStickyE, // sticky bit that is calculated during alignment output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE // set the product to zero before addition if the product is too small to matter output logic KillProdE // set the product to zero before addition if the product is too small to matter
); );
logic [12:0] AlignCnt; // how far to shift the addend to align with the product logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format *** is this enough bits?
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [213:0] ZManPreShifted; // input to the alignment shifter logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Calculate the product // Calculate the product
@ -83,21 +108,14 @@ module fma1(
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// verilator lint_off WIDTH // verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : assign ProdExpE = (XZeroE|YZeroE) ? 0 :
XExpE + YExpE - BiasE + XDenormE + YDenormE; XExpE + YExpE - BiasE + XDenormE + YDenormE;
// verilator lint_on WIDTH
// Calculate the product's mantissa // Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE}; assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE};
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Alignment shifter // Alignment shifter
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -108,8 +126,6 @@ module fma1(
// - Denormal numbers have an an exponent value of 1, however they are // - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number // represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExpE - ZDenormE; assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
// verilator lint_on WIDTH
// Defualt Addition without shifting // Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 | // | 55'b0 | 106'b(product) | 2'b0 |
@ -160,10 +176,7 @@ module fma1(
end end
end end
assign AlignedAddendE = ZManShifted[213:52]; assign AlignedAddendE = ZManShifted[213:52];
endmodule endmodule

View File

@ -57,7 +57,7 @@ module hptw
logic [`PPN_BITS-1:0] CurrentPPN; logic [`PPN_BITS-1:0] CurrentPPN;
logic MemWrite; logic MemWrite;
logic Executable, Writable, Readable, Valid; logic Executable, Writable, Readable, Valid;
logic MegapageMisaligned, GigapageMisaligned, TerapageMisaligned; logic Misaligned, MegapageMisaligned;
logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE; logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE;
logic StartWalk; logic StartWalk;
logic TLBMiss; logic TLBMiss;
@ -144,20 +144,22 @@ module hptw
// Initial state and misalignment for RV32/64 // Initial state and misalignment for RV32/64
if (`XLEN == 32) begin if (`XLEN == 32) begin
assign InitialWalkerState = LEVEL1_SET_ADR; assign InitialWalkerState = LEVEL1_SET_ADR;
assign TerapageMisaligned = 0; // not applicable
assign GigapageMisaligned = 0; // not applicable
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0 assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
assign Misaligned = ((WalkerState == LEVEL1) & MegapageMisaligned);
end else begin end else begin
logic GigapageMisaligned, TerapageMisaligned;
assign InitialWalkerState = (SvMode == `SV48) ? LEVEL3_SET_ADR : LEVEL2_SET_ADR; assign InitialWalkerState = (SvMode == `SV48) ? LEVEL3_SET_ADR : LEVEL2_SET_ADR;
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0 assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0 assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0 assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
assign Misaligned = ((WalkerState == LEVEL3) & TerapageMisaligned) | ((WalkerState == LEVEL2) & GigapageMisaligned) | ((WalkerState == LEVEL1) & MegapageMisaligned);
end end
// Page Table Walker FSM // Page Table Walker FSM
// If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states // If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states
// to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the // to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the
// HPTW as shown below to keep the D$ setup time out of the critical path. // HPTW as shown below to keep the D$ setup time out of the critical path.
// *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead.
flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState); flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
always_comb always_comb
case (WalkerState) case (WalkerState)
@ -166,19 +168,19 @@ module hptw
LEVEL3_SET_ADR: NextWalkerState = LEVEL3_READ; LEVEL3_SET_ADR: NextWalkerState = LEVEL3_READ;
LEVEL3_READ: if (HPTWStall) NextWalkerState = LEVEL3_READ; LEVEL3_READ: if (HPTWStall) NextWalkerState = LEVEL3_READ;
else NextWalkerState = LEVEL3; else NextWalkerState = LEVEL3;
LEVEL3: if (ValidLeafPTE && ~TerapageMisaligned) NextWalkerState = LEAF; LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = LEVEL2_SET_ADR; else if (ValidNonLeafPTE) NextWalkerState = LEVEL2_SET_ADR;
else NextWalkerState = FAULT; else NextWalkerState = FAULT;
LEVEL2_SET_ADR: NextWalkerState = LEVEL2_READ; LEVEL2_SET_ADR: NextWalkerState = LEVEL2_READ;
LEVEL2_READ: if (HPTWStall) NextWalkerState = LEVEL2_READ; LEVEL2_READ: if (HPTWStall) NextWalkerState = LEVEL2_READ;
else NextWalkerState = LEVEL2; else NextWalkerState = LEVEL2;
LEVEL2: if (ValidLeafPTE && ~GigapageMisaligned) NextWalkerState = LEAF; LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = LEVEL1_SET_ADR; else if (ValidNonLeafPTE) NextWalkerState = LEVEL1_SET_ADR;
else NextWalkerState = FAULT; else NextWalkerState = FAULT;
LEVEL1_SET_ADR: NextWalkerState = LEVEL1_READ; LEVEL1_SET_ADR: NextWalkerState = LEVEL1_READ;
LEVEL1_READ: if (HPTWStall) NextWalkerState = LEVEL1_READ; LEVEL1_READ: if (HPTWStall) NextWalkerState = LEVEL1_READ;
else NextWalkerState = LEVEL1; else NextWalkerState = LEVEL1;
LEVEL1: if (ValidLeafPTE && ~MegapageMisaligned) NextWalkerState = LEAF; LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = LEVEL0_SET_ADR; else if (ValidNonLeafPTE) NextWalkerState = LEVEL0_SET_ADR;
else NextWalkerState = FAULT; else NextWalkerState = FAULT;
LEVEL0_SET_ADR: NextWalkerState = LEVEL0_READ; LEVEL0_SET_ADR: NextWalkerState = LEVEL0_READ;