Added FLEN, NE, NF to config and started using these in FMA1

This commit is contained in:
David Harris 2021-07-18 17:28:25 -04:00
parent e31d2ef9f5
commit f22b6e7397
3 changed files with 52 additions and 31 deletions

View File

@ -26,12 +26,14 @@
`include "wally-constants.vh"
// macros to define supported modes
// NOTE: No hardware support fo Q yet
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
`define F_SUPPORTED ((`MISA >> 5) % 2 == 1)
`define M_SUPPORTED ((`MISA >> 12) % 2 == 1)
`define Q_SUPPORTED ((`MISA >> 16) % 2 == 1)
`define S_SUPPORTED ((`MISA >> 18) % 2 == 1)
`define U_SUPPORTED ((`MISA >> 20) % 2 == 1)
@ -44,8 +46,12 @@
`define LOG_XLEN (`XLEN == 32 ? 5 : 6)
// Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
`define PMPCFG_ENTRIES (`PMP_ENTRIES\8)
`define PMPCFG_ENTRIES (`PMP_ENTRIES/8)
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
`define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
`define NE (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
`define NF (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
// Disable spurious Verilator warnings

View File

@ -1,3 +1,29 @@
///////////////////////////////////////////
//
// Written: Katherine Parry, David Harris
// Modified: 6/23/2021
//
// Purpose: Floating point multiply-accumulate of configurable size
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
module fma(
input logic clk,
input logic reset,
@ -54,25 +80,24 @@ endmodule
module fma1(
// input logic XSgnE, YSgnE, ZSgnE,
input logic [10:0] XExpE, YExpE, ZExpE,
input logic [51:0] XFracE, YFracE, ZFracE,
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
input logic [`NF-1:0] XFracE, YFracE, ZFracE, // fractions in U(0.NF) format]
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
input logic XDenormE, YDenormE, ZDenormE,
input logic XZeroE, YZeroE, ZZeroE,
input logic [10:0] BiasE,
input logic [`NE-1:0] BiasE,
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic [2*`NF+1:0] ProdManE, // 1.X frac * 1.Y frac in U(2.2Nf) format
output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in *** format
output logic [`NE+1:0] ProdExpE, // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE // set the product to zero before addition if the product is too small to matter
);
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [213:0] ZManPreShifted; // input to the alignment shifter
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format *** is this enough bits?
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
@ -83,20 +108,13 @@ module fma1(
///////////////////////////////////////////////////////////////////////////////
// verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
assign ProdExpE = (XZeroE|YZeroE) ? 0 :
XExpE + YExpE - BiasE + XDenormE + YDenormE;
// verilator lint_on WIDTH
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE};
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
@ -108,8 +126,6 @@ module fma1(
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
// verilator lint_on WIDTH
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
@ -160,10 +176,7 @@ module fma1(
end
end
assign AlignedAddendE = ZManShifted[213:52];
endmodule

View File

@ -57,7 +57,7 @@ module hptw
logic [`PPN_BITS-1:0] CurrentPPN;
logic MemWrite;
logic Executable, Writable, Readable, Valid;
logic MegapageMisaligned, GigapageMisaligned, TerapageMisaligned;
logic Misaligned, MegapageMisaligned;
logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE;
logic StartWalk;
logic TLBMiss;
@ -144,20 +144,22 @@ module hptw
// Initial state and misalignment for RV32/64
if (`XLEN == 32) begin
assign InitialWalkerState = LEVEL1_SET_ADR;
assign TerapageMisaligned = 0; // not applicable
assign GigapageMisaligned = 0; // not applicable
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
assign Misaligned = ((WalkerState == LEVEL1) & MegapageMisaligned);
end else begin
logic GigapageMisaligned, TerapageMisaligned;
assign InitialWalkerState = (SvMode == `SV48) ? LEVEL3_SET_ADR : LEVEL2_SET_ADR;
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
assign Misaligned = ((WalkerState == LEVEL3) & TerapageMisaligned) | ((WalkerState == LEVEL2) & GigapageMisaligned) | ((WalkerState == LEVEL1) & MegapageMisaligned);
end
// Page Table Walker FSM
// If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states
// to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the
// HPTW as shown below to keep the D$ setup time out of the critical path.
// *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead.
flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
always_comb
case (WalkerState)
@ -166,19 +168,19 @@ module hptw
LEVEL3_SET_ADR: NextWalkerState = LEVEL3_READ;
LEVEL3_READ: if (HPTWStall) NextWalkerState = LEVEL3_READ;
else NextWalkerState = LEVEL3;
LEVEL3: if (ValidLeafPTE && ~TerapageMisaligned) NextWalkerState = LEAF;
LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = LEVEL2_SET_ADR;
else NextWalkerState = FAULT;
LEVEL2_SET_ADR: NextWalkerState = LEVEL2_READ;
LEVEL2_READ: if (HPTWStall) NextWalkerState = LEVEL2_READ;
else NextWalkerState = LEVEL2;
LEVEL2: if (ValidLeafPTE && ~GigapageMisaligned) NextWalkerState = LEAF;
LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = LEVEL1_SET_ADR;
else NextWalkerState = FAULT;
LEVEL1_SET_ADR: NextWalkerState = LEVEL1_READ;
LEVEL1_READ: if (HPTWStall) NextWalkerState = LEVEL1_READ;
else NextWalkerState = LEVEL1;
LEVEL1: if (ValidLeafPTE && ~MegapageMisaligned) NextWalkerState = LEAF;
LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
else if (ValidNonLeafPTE) NextWalkerState = LEVEL0_SET_ADR;
else NextWalkerState = FAULT;
LEVEL0_SET_ADR: NextWalkerState = LEVEL0_READ;