mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Added FLEN, NE, NF to config and started using these in FMA1
This commit is contained in:
parent
e31d2ef9f5
commit
f22b6e7397
@ -26,12 +26,14 @@
|
||||
`include "wally-constants.vh"
|
||||
|
||||
// macros to define supported modes
|
||||
// NOTE: No hardware support fo Q yet
|
||||
|
||||
`define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
|
||||
`define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
|
||||
`define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
|
||||
`define F_SUPPORTED ((`MISA >> 5) % 2 == 1)
|
||||
`define M_SUPPORTED ((`MISA >> 12) % 2 == 1)
|
||||
`define Q_SUPPORTED ((`MISA >> 16) % 2 == 1)
|
||||
`define S_SUPPORTED ((`MISA >> 18) % 2 == 1)
|
||||
`define U_SUPPORTED ((`MISA >> 20) % 2 == 1)
|
||||
|
||||
@ -44,8 +46,12 @@
|
||||
`define LOG_XLEN (`XLEN == 32 ? 5 : 6)
|
||||
|
||||
// Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
|
||||
`define PMPCFG_ENTRIES (`PMP_ENTRIES\8)
|
||||
`define PMPCFG_ENTRIES (`PMP_ENTRIES/8)
|
||||
|
||||
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
|
||||
`define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
|
||||
`define NE (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
|
||||
`define NF (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
||||
|
@ -1,3 +1,29 @@
|
||||
///////////////////////////////////////////
|
||||
//
|
||||
// Written: Katherine Parry, David Harris
|
||||
// Modified: 6/23/2021
|
||||
//
|
||||
// Purpose: Floating point multiply-accumulate of configurable size
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fma(
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
@ -54,25 +80,24 @@ endmodule
|
||||
|
||||
module fma1(
|
||||
// input logic XSgnE, YSgnE, ZSgnE,
|
||||
input logic [10:0] XExpE, YExpE, ZExpE,
|
||||
input logic [51:0] XFracE, YFracE, ZFracE,
|
||||
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
|
||||
input logic [`NF-1:0] XFracE, YFracE, ZFracE, // fractions in U(0.NF) format]
|
||||
input logic XAssumed1E, YAssumed1E, ZAssumed1E,
|
||||
input logic XDenormE, YDenormE, ZDenormE,
|
||||
input logic XZeroE, YZeroE, ZZeroE,
|
||||
input logic [10:0] BiasE,
|
||||
input logic [`NE-1:0] BiasE,
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic [2*`NF+1:0] ProdManE, // 1.X frac * 1.Y frac in U(2.2Nf) format
|
||||
output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in *** format
|
||||
output logic [`NE+1:0] ProdExpE, // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE // set the product to zero before addition if the product is too small to matter
|
||||
);
|
||||
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
|
||||
logic [213:0] ZManPreShifted; // input to the alignment shifter
|
||||
|
||||
logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format *** is this enough bits?
|
||||
logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bit
|
||||
logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
@ -83,20 +108,13 @@ module fma1(
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// verilator lint_off WIDTH
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 0 :
|
||||
XExpE + YExpE - BiasE + XDenormE + YDenormE;
|
||||
// verilator lint_on WIDTH
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = {XAssumed1E, XFracE} * {YAssumed1E, YFracE};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
@ -108,8 +126,6 @@ module fma1(
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExpE - ZDenormE;
|
||||
// verilator lint_on WIDTH
|
||||
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
@ -160,10 +176,7 @@ module fma1(
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign AlignedAddendE = ZManShifted[213:52];
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
@ -57,7 +57,7 @@ module hptw
|
||||
logic [`PPN_BITS-1:0] CurrentPPN;
|
||||
logic MemWrite;
|
||||
logic Executable, Writable, Readable, Valid;
|
||||
logic MegapageMisaligned, GigapageMisaligned, TerapageMisaligned;
|
||||
logic Misaligned, MegapageMisaligned;
|
||||
logic ValidPTE, LeafPTE, ValidLeafPTE, ValidNonLeafPTE;
|
||||
logic StartWalk;
|
||||
logic TLBMiss;
|
||||
@ -144,20 +144,22 @@ module hptw
|
||||
// Initial state and misalignment for RV32/64
|
||||
if (`XLEN == 32) begin
|
||||
assign InitialWalkerState = LEVEL1_SET_ADR;
|
||||
assign TerapageMisaligned = 0; // not applicable
|
||||
assign GigapageMisaligned = 0; // not applicable
|
||||
assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
|
||||
assign Misaligned = ((WalkerState == LEVEL1) & MegapageMisaligned);
|
||||
end else begin
|
||||
logic GigapageMisaligned, TerapageMisaligned;
|
||||
assign InitialWalkerState = (SvMode == `SV48) ? LEVEL3_SET_ADR : LEVEL2_SET_ADR;
|
||||
assign TerapageMisaligned = |(CurrentPPN[26:0]); // must have zero PPN2, PPN1, PPN0
|
||||
assign GigapageMisaligned = |(CurrentPPN[17:0]); // must have zero PPN1 and PPN0
|
||||
assign MegapageMisaligned = |(CurrentPPN[8:0]); // must have zero PPN0
|
||||
assign Misaligned = ((WalkerState == LEVEL3) & TerapageMisaligned) | ((WalkerState == LEVEL2) & GigapageMisaligned) | ((WalkerState == LEVEL1) & MegapageMisaligned);
|
||||
end
|
||||
|
||||
// Page Table Walker FSM
|
||||
// If the setup time on the D$ RAM is short, it should be possible to merge the LEVELx_READ and LEVELx states
|
||||
// to decrease the latency of the HPTW. However, if the D$ is a cycle limiter, it's better to leave the
|
||||
// HPTW as shown below to keep the D$ setup time out of the critical path.
|
||||
// *** Is this really true. Talk with Ross. Seems like it's the next state logic on critical path instead.
|
||||
flopenl #(.TYPE(statetype)) WalkerStateReg(clk, reset, 1'b1, NextWalkerState, IDLE, WalkerState);
|
||||
always_comb
|
||||
case (WalkerState)
|
||||
@ -166,19 +168,19 @@ module hptw
|
||||
LEVEL3_SET_ADR: NextWalkerState = LEVEL3_READ;
|
||||
LEVEL3_READ: if (HPTWStall) NextWalkerState = LEVEL3_READ;
|
||||
else NextWalkerState = LEVEL3;
|
||||
LEVEL3: if (ValidLeafPTE && ~TerapageMisaligned) NextWalkerState = LEAF;
|
||||
LEVEL3: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||
else if (ValidNonLeafPTE) NextWalkerState = LEVEL2_SET_ADR;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL2_SET_ADR: NextWalkerState = LEVEL2_READ;
|
||||
LEVEL2_READ: if (HPTWStall) NextWalkerState = LEVEL2_READ;
|
||||
else NextWalkerState = LEVEL2;
|
||||
LEVEL2: if (ValidLeafPTE && ~GigapageMisaligned) NextWalkerState = LEAF;
|
||||
LEVEL2: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||
else if (ValidNonLeafPTE) NextWalkerState = LEVEL1_SET_ADR;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL1_SET_ADR: NextWalkerState = LEVEL1_READ;
|
||||
LEVEL1_READ: if (HPTWStall) NextWalkerState = LEVEL1_READ;
|
||||
else NextWalkerState = LEVEL1;
|
||||
LEVEL1: if (ValidLeafPTE && ~MegapageMisaligned) NextWalkerState = LEAF;
|
||||
LEVEL1: if (ValidLeafPTE && ~Misaligned) NextWalkerState = LEAF;
|
||||
else if (ValidNonLeafPTE) NextWalkerState = LEVEL0_SET_ADR;
|
||||
else NextWalkerState = FAULT;
|
||||
LEVEL0_SET_ADR: NextWalkerState = LEVEL0_READ;
|
||||
|
Loading…
Reference in New Issue
Block a user