Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
Ross Thompson 2022-03-22 21:28:50 -05:00
commit b2487f4b72
62 changed files with 10046 additions and 671 deletions

@ -1 +1 @@
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230

View File

@ -49,6 +49,8 @@
`define UARCH_SINGLECYCLE 0
`define DMEM `MEM_CACHE
`define IMEM `MEM_CACHE
`define DBUS 1
`define IBUS 1
`define VIRTMEM_SUPPORTED 1
`define VECTORED_INTERRUPTS_SUPPORTED 1

View File

@ -49,6 +49,8 @@
`define UARCH_SINGLECYCLE 0
`define DMEM `MEM_CACHE
`define IMEM `MEM_CACHE
`define DBUS 1
`define IBUS 1
`define VIRTMEM_SUPPORTED 1
`define VECTORED_INTERRUPTS_SUPPORTED 1

View File

@ -49,8 +49,10 @@
`define UARCH_SUPERSCALR 0
`define UARCH_SINGLECYCLE 0
// *** replace with MEM_BUS
`define DMEM `MEM_BUS
`define IMEM `MEM_BUS
`define DMEM `MEM_NONE
`define IMEM `MEM_NONE
`define DBUS 1
`define IBUS 1
`define VIRTMEM_SUPPORTED 0
`define VECTORED_INTERRUPTS_SUPPORTED 0

View File

@ -49,6 +49,8 @@
`define UARCH_SINGLECYCLE 0
`define DMEM `MEM_CACHE
`define IMEM `MEM_CACHE
`define DBUS 1
`define IBUS 1
`define VIRTMEM_SUPPORTED 1
`define VECTORED_INTERRUPTS_SUPPORTED 1

View File

@ -49,6 +49,8 @@
`define UARCH_SINGLECYCLE 0
`define DMEM `MEM_TIM
`define IMEM `MEM_TIM
`define DBUS 0
`define IBUS 0
`define VIRTMEM_SUPPORTED 0
`define VECTORED_INTERRUPTS_SUPPORTED 1

View File

@ -51,6 +51,8 @@
`define UARCH_SINGLECYCLE 0
`define DMEM `MEM_CACHE
`define IMEM `MEM_CACHE
`define DBUS 1
`define IBUS 1
`define VIRTMEM_SUPPORTED 1
`define VECTORED_INTERRUPTS_SUPPORTED 1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,134 @@
//////////////////////////////////////////
// wally-config.vh
//
// Written: David_Harris@hmc.edu 4 January 2021
// Modified:
//
// Purpose: Specify which features are configured
// Macros to determine which modes are supported based on MISA
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
// include shared configuration
`include "wally-shared.vh"
`define FPGA 0
`define QEMU 0
`define DESIGN_COMPILER 0
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 64
// IEEE 754 compliance
`define IEEE754 1
// MISA RISC-V configuration per specification
`define MISA (32'h00000104 | 1 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
`define ZICSR_SUPPORTED 1
`define ZIFENCEI_SUPPORTED 1
`define COUNTERS 32
`define ZICOUNTERS_SUPPORTED 1
/// Microarchitectural Features
`define UARCH_PIPELINED 1
`define UARCH_SUPERSCALR 0
`define UARCH_SINGLECYCLE 0
`define DMEM `MEM_CACHE
`define IMEM `MEM_CACHE
`define VIRTMEM_SUPPORTED 1
`define VECTORED_INTERRUPTS_SUPPORTED 1
// TLB configuration. Entries should be a power of 2
`define ITLB_ENTRIES 32
`define DTLB_ENTRIES 32
// Cache configuration. Sizes should be a power of two
// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
`define DCACHE_NUMWAYS 4
`define DCACHE_WAYSIZEINBYTES 4096
`define DCACHE_LINELENINBITS 256
`define ICACHE_NUMWAYS 4
`define ICACHE_WAYSIZEINBYTES 4096
`define ICACHE_LINELENINBITS 256
// Integer Divider Configuration
// DIV_BITSPERCYCLE must be 1, 2, or 4
`define DIV_BITSPERCYCLE 4
// Legal number of PMP entries are 0, 16, or 64
`define PMP_ENTRIES 64
// Address space
`define RESET_VECTOR 64'h0000000080000000
// Bus Interface width
`define AHBW 64
// Peripheral Physiccal Addresses
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
`define BOOTROM_SUPPORTED 1'b1
`define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTROM_RANGE 56'h00000FFF
`define RAM_SUPPORTED 1'b1
`define RAM_BASE 56'h80000000
`define RAM_RANGE 56'h7FFFFFFF
`define EXT_MEM_SUPPORTED 1'b0
`define EXT_MEM_BASE 56'h80000000
`define EXT_MEM_RANGE 56'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 56'h02000000
`define CLINT_RANGE 56'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 56'h10060000
`define GPIO_RANGE 56'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 56'h10000000
`define UART_RANGE 56'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 56'h03FFFFFF
`define SDC_SUPPORTED 1'b0
`define SDC_BASE 56'h00012100
`define SDC_RANGE 56'h0000001F
// Test modes
// Tie GPIO outputs back to inputs
`define GPIO_LOOPBACK_TEST 1
// Hardware configuration
`define UART_PRESCALE 1
// Interrupt configuration
`define PLIC_NUM_SRC 10
// comment out the following if >=32 sources
`define PLIC_NUM_SRC_LT_32
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0
`define REPLAY 0
`define HPTW_WRITES_SUPPORTED 0

View File

@ -50,6 +50,8 @@
`define UARCH_SINGLECYCLE 0
`define DMEM `MEM_CACHE
`define IMEM `MEM_CACHE
`define DBUS 1
`define IBUS 1
`define VIRTMEM_SUPPORTED 1
`define VECTORED_INTERRUPTS_SUPPORTED 1

View File

@ -50,6 +50,8 @@
`define UARCH_SINGLECYCLE 0
`define DMEM `MEM_TIM
`define IMEM `MEM_TIM
`define DBUS 0
`define IBUS 0
`define VIRTMEM_SUPPORTED 0
`define VECTORED_INTERRUPTS_SUPPORTED 1

View File

@ -50,7 +50,7 @@
`define SV39 8
`define SV48 9
`define MEM_BUS 1
`define MEM_NONE 1
`define MEM_TIM 2
`define MEM_CACHE 3

View File

@ -50,10 +50,47 @@
// Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
`define PMPCFG_ENTRIES (`PMP_ENTRIES/8)
// Floating-point half-precision
`define ZFH_SUPPORTED 0
// Floating point constants for Quad, Double, Single, and Half precisions
`define Q_LEN 128
`define Q_NE 15
`define Q_NF 112
`define Q_BIAS 16383
`define D_LEN 64
`define D_NE 11
`define D_NF 52
`define D_BIAS 1023
`define S_LEN 32
`define S_NE 8
`define S_NF 23
`define S_BIAS 127
`define H_LEN 16
`define H_NE 5
`define H_NF 10
`define H_BIAS 15
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
`define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
`define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN)
`define NE (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `F_SUPPORTED ? `S_NE : `H_NE)
`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `F_SUPPORTED ? `S_NF : `H_NF)
`define FMT (`Q_SUPPORTED ? 3 : `D_SUPPORTED ? 1 : `F_SUPPORTED ? 0 : 2)
`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS)
// Floating point constants needed for FPU paramerterization
`define FPSIZES (`Q_SUPPORTED+`D_SUPPORTED+`F_SUPPORTED+`ZFH_SUPPORTED)
`define LEN1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN : `H_LEN)
`define NE1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE : `H_NE)
`define NF1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF)
`define FMT1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 1 : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 0 : 2)
`define BIAS1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS)
`define LEN2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN : `H_LEN)
`define NE2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NE : `H_NE)
`define NF2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF : `H_NF)
`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 0 : 2)
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
// Disable spurious Verilator warnings

View File

@ -1,10 +1,33 @@
//`include "../../../config/old/rv64icfd/wally-config.vh"
`include "../../../config/old/rv64icfd/wally-config.vh"
`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
`define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
`define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
`define XLEN 64
// `define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : `F_SUPPORTED ? 32 : 16)
// `define NE (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : `F_SUPPORTED ? 8 : 5)
// `define NF (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : `F_SUPPORTED ? 23 : 10)
// `define FMT (`Q_SUPPORTED ? 3 : `D_SUPPORTED ? 1 : `F_SUPPORTED ? 0 : 2)
// `define BIAS (`Q_SUPPORTED ? 16383 : `D_SUPPORTED ? 1023 : `F_SUPPORTED ? 127 : 15)
// `define XLEN 64
// `define IEEE754 1
`define Q_SUPPORTED 1
// `define D_SUPPORTED 0
// `define F_SUPPORTED 0
`define H_SUPPORTED 0
`define FPSIZES ((`Q_SUPPORTED&`D_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) ? 4 : (`Q_SUPPORTED&`D_SUPPORTED&`F_SUPPORTED) | (`Q_SUPPORTED&`D_SUPPORTED&`H_SUPPORTED) | (`Q_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) | (`D_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) ? 3 : (`Q_SUPPORTED&`D_SUPPORTED) | (`Q_SUPPORTED&`F_SUPPORTED) | (`Q_SUPPORTED&`H_SUPPORTED) | (`D_SUPPORTED&`F_SUPPORTED) | (`D_SUPPORTED&`H_SUPPORTED) | (`F_SUPPORTED&`H_SUPPORTED) ? 2 : 1)
`define LEN1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 64 : (`F_SUPPORTED & (`FLEN !== 32)) ? 32 : 16)
`define NE1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 11 : (`F_SUPPORTED & (`FLEN !== 32)) ? 8 : 5)
`define NF1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 52 : (`F_SUPPORTED & (`FLEN !== 32)) ? 23 : 10)
`define FMT1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 1 : (`F_SUPPORTED & (`FLEN !== 32)) ? 0 : 2)
`define BIAS1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 1023 : (`F_SUPPORTED & (`FLEN !== 32)) ? 127 : 15)
`define LEN2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 32 : 16)
`define NE2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 8 : 5)
`define NF2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 23 : 10)
`define FMT2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 0 : 2)
`define BIAS2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 127 : 15)
`define LEN3 16
`define NE3 5//make constants for the constants ie 11/8/5 ect
`define NF3 10 // always support less hten max - maybe halfs
`define FMT3 2
`define BIAS3 15
module testbench3();
logic [31:0] errors=0;
@ -15,33 +38,17 @@ module testbench3();
logic [`FLEN-1:0] ans;
logic [7:0] flags;
logic [2:0] FrmE;
logic FmtE;
logic [`FPSIZES/3:0] FmtE;
logic [`FLEN-1:0] FMAResM;
logic [4:0] FMAFlgM;
integer fp;
logic [2:0] FOpCtrlE;
logic [2*`NF+1:0] ProdManE;
logic [3*`NF+5:0] AlignedAddendE;
logic [`NE+1:0] ProdExpE;
logic AddendStickyE;
logic KillProdE;
// logic XZeroE;
// logic YZeroE;
// logic ZZeroE;
// logic XDenormE;
// logic YDenormE;
// logic ZDenormE;
// logic XInfE;
// logic YInfE;
// logic ZInfE;
// logic XNaNE;
// logic YNaNE;
// logic ZNaNE;
logic wnan;
// logic XNaNE;
// logic YNaNE;
// logic ZNaNE;
logic ansnan, clk;
@ -52,88 +59,86 @@ assign FOpCtrlE = 3'b0;
// down - 010
// up - 011
// nearest max mag - 100
assign FrmE = 3'b000;
assign FmtE = 1'b1;
assign FrmE = 3'b010;
assign FmtE = (`FPSIZES/3+1)'(1);
logic [`FLEN-1:0] X, Y, Z;
// logic FmtE;
// logic [2:0] FOpCtrlE;
logic XSgnE, YSgnE, ZSgnE;
logic [`NE-1:0] XExpE, YExpE, ZExpE;
logic [`NF-1:0] XFracE, YFracE, ZFracE;
logic XAssumed1E, YAssumed1E, ZAssumed1E;
logic [`NF:0] XManE, YManE, ZManE;
logic XNormE;
logic XExpMaxE;
logic XNaNE, YNaNE, ZNaNE;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XDenormE, YDenormE, ZDenormE;
logic XZeroE, YZeroE, ZZeroE;
logic [`NE-1:0] BiasE;
logic XInfE, YInfE, ZInfE;
logic XExpMaxE;
//***rename to make significand = 1.frac m = significand
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic [`FLEN-1:0] Addend; // value to add (Z or zero)
logic YExpMaxE, ZExpMaxE; // input exponent all 1s
logic YExpMaxE, ZExpMaxE, Mult; // input exponent all 1s
assign Addend = FOpCtrlE[2] ? (`FLEN)'(0) : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation
assign XSgnE = FmtE ? X[`FLEN-1] : X[31];
assign YSgnE = FmtE ? Y[`FLEN-1] : Y[31];
assign ZSgnE = FmtE ? Addend[`FLEN-1] : Addend[31];
assign Mult = 1'b0;
unpacking unpacking(.*);
assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]};
assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]};
assign ZExpE = FmtE ? Addend[62:52] : {Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]};
// assign wnan = XNaNE|YNaNE|ZNaNE;
// assign ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[22:0];
if (`FPSIZES === 1) begin
assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
end else if (`FPSIZES === 2) begin
assign ansnan = FmtE ? &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]) : &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
assign wnan = FmtE ? &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]) : &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
end else if (`FPSIZES === 3) begin
always_comb begin
case (FmtE)
`FMT: begin
assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
assign XFracE = FmtE ? X[`NF-1:0] : {X[22:0], 29'b0};
assign YFracE = FmtE ? Y[`NF-1:0] : {Y[22:0], 29'b0};
assign ZFracE = FmtE ? Addend[`NF-1:0] : {Addend[22:0], 29'b0};
end
`FMT1: begin
assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
assign XAssumed1E = FmtE ? |X[62:52] : |X[30:23];
assign YAssumed1E = FmtE ? |Y[62:52] : |Y[30:23];
assign ZAssumed1E = FmtE ? |Z[62:52] : |Z[30:23];
end
`FMT2: begin
assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]);
assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]);
end
default: begin
assign ansnan = 0;
assign wnan = 0;
end
endcase
end
assign XExpZero = ~XAssumed1E;
assign YExpZero = ~YAssumed1E;
assign ZExpZero = ~ZAssumed1E;
assign XFracZero = ~|XFracE;
assign YFracZero = ~|YFracE;
assign ZFracZero = ~|ZFracE;
end else begin
always_comb begin
case (FmtE)
`FMT: begin
assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
assign XExpMaxE = FmtE ? &X[62:52] : &X[30:23];
assign YExpMaxE = FmtE ? &Y[62:52] : &Y[30:23];
assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23];
assign XNormE = ~(XExpMaxE|XExpZero);
assign XNaNE = XExpMaxE & ~XFracZero;
assign YNaNE = YExpMaxE & ~YFracZero;
assign ZNaNE = ZExpMaxE & ~ZFracZero;
end
`FMT1: begin
assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
assign XSNaNE = XNaNE&~XFracE[`NF-1];
assign YSNaNE = YNaNE&~YFracE[`NF-1];
assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
assign XDenormE = XExpZero & ~XFracZero;
assign YDenormE = YExpZero & ~YFracZero;
assign ZDenormE = ZExpZero & ~ZFracZero;
assign XInfE = XExpMaxE & XFracZero;
assign YInfE = YExpMaxE & YFracZero;
assign ZInfE = ZExpMaxE & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
assign BiasE = 13'h3ff;
assign wnan = FmtE ? &FMAResM[`FLEN-2:`NF] & |FMAResM[`NF-1:0] : &FMAResM[30:23] & |FMAResM[22:0];
// assign XNaNE = FmtE ? &X[62:52] & |X[51:0] : &X[62:55] & |X[54:32];
// assign YNaNE = FmtE ? &Y[62:52] & |Y[51:0] : &Y[62:55] & |Y[54:32];
// assign ZNaNE = FmtE ? &Z[62:52] & |Z[51:0] : &Z[62:55] & |Z[54:32];
assign ansnan = FmtE ? &ans[`FLEN-2:`NF] & |ans[`NF-1:0] : &ans[30:23] & |ans[22:0];
end
`FMT2: begin
assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]);
assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]);
end
`FMT3: begin
assign ansnan = &ans[`LEN3-2:`NF3]&(|ans[`NF3-1:0]);
assign wnan = &FMAResM[`LEN3-2:`NF3]&(|FMAResM[`NF3-1:0]);
end
endcase
end
end
// instantiate device under test
logic [3*`NF+5:0] SumE, SumM;
@ -141,16 +146,16 @@ assign ansnan = FmtE ? &ans[`FLEN-2:`NF] & |ans[`NF-1:0] : &ans[30:23] & |ans[22
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [8:0] NormCntE, NormCntM;
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}),
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM(XManE), .YManM(YManE), .ZManM(ZManE), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
// .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM,
.KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE),
.FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
.FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM, .Mult);
// produce clock
@ -168,61 +173,156 @@ fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZEx
always @(posedge clk)
begin
#1;
if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum];
else begin X = {{32{1'b1}}, testvectors[vectornum][135:104]};
Y = {{32{1'b1}}, testvectors[vectornum][103:72]};
Z = {{32{1'b1}}, testvectors[vectornum][71:40]};
ans = {{32{1'b1}}, testvectors[vectornum][39:8]};
flags = testvectors[vectornum][7:0];
if (`FPSIZES === 3 | `FPSIZES === 4) begin
if (FmtE==2'b11) {X, Y, Z, ans, flags} = testvectors[vectornum];
else if (FmtE==2'b01) begin
X = {{`FLEN-64{1'b1}}, testvectors[vectornum][263:200]};
Y = {{`FLEN-64{1'b1}}, testvectors[vectornum][199:136]};
Z = {{`FLEN-64{1'b1}}, testvectors[vectornum][135:72]};
ans = {{`FLEN-64{1'b1}}, testvectors[vectornum][71:8]};
flags = testvectors[vectornum][7:0];
end
else if (FmtE==2'b00) begin
X = {{`FLEN-32{1'b1}}, testvectors[vectornum][135:104]};
Y = {{`FLEN-32{1'b1}}, testvectors[vectornum][103:72]};
Z = {{`FLEN-32{1'b1}}, testvectors[vectornum][71:40]};
ans = {{`FLEN-32{1'b1}}, testvectors[vectornum][39:8]};
flags = testvectors[vectornum][7:0];
end
else begin
X = {{`FLEN-16{1'b1}}, testvectors[vectornum][71:56]};
Y = {{`FLEN-16{1'b1}}, testvectors[vectornum][55:40]};
Z = {{`FLEN-16{1'b1}}, testvectors[vectornum][39:24]};
ans = {{`FLEN-16{1'b1}}, testvectors[vectornum][23:8]};
flags = testvectors[vectornum][7:0];
end
end
else begin
if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum];
else if (FmtE==1'b0) begin
X = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+4*(`LEN1)-1:8+3*(`LEN1)]};
Y = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+3*(`LEN1)-1:8+2*(`LEN1)]};
Z = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+2*(`LEN1)-1:8+(`LEN1)]};
ans = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+(`LEN1-1):8]};
flags = testvectors[vectornum][7:0];
end
end
end
// check results on falling edge of clk
always @(negedge clk) begin
if((FmtE==1'b1) & (FMAFlgM != flags[4:0] | (!wnan & (FMAResM != ans)) | (wnan & ansnan & ~((XNaNE & (FMAResM[`FLEN-2:0] == {XExpE,1'b1,X[`NF-2:0]})) | (YNaNE & (FMAResM[`FLEN-2:0] == {YExpE,1'b1,Y[`NF-2:0]})) | (ZNaNE & (FMAResM[`FLEN-2:0] == {ZExpE,1'b1,Z[`NF-2:0]})) | (FMAResM[`FLEN-2:0] == ans[`FLEN-2:0]))))) begin
// fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w");
// if((FmtE==1'b1) & (FMAFlgM != flags[4:0] | (FMAResM != ans))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
if(XDenormE) $display( "xdenorm ");
if(YDenormE) $display( "ydenorm ");
if(ZDenormE) $display( "zdenorm ");
if(FMAFlgM[4] != 0) $display( "invld ");
if(FMAFlgM[2] != 0) $display( "ovrflw ");
if(FMAFlgM[1] != 0) $display( "unflw ");
if(FMAResM[`FLEN] & FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] == 0) $display( "FMAResM=-inf ");
if(~FMAResM[`FLEN] & FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] == 0) $display( "FMAResM=+inf ");
if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] != 0 & ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] != 0 & FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
if(ans[`FLEN] & ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] == 0) $display( "ans=-inf ");
if(~ans[`FLEN] & ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] == 0) $display( "ans=+inf ");
if(ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] != 0 & ~ans[`NF-1]) $display( "ans=sigNaN ");
if(ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] != 0 & ans[`NF-1]) $display( "ans=qutNaN ");
errors = errors + 1;
//if (errors == 10)
$stop;
end
if((FmtE==1'b0)&(FMAFlgM != flags[4:0] | (!wnan & (FMAResM != ans)) | (wnan & ansnan & ~(((XNaNE & (FMAResM[30:0] == {X[30:23],1'b1,X[21:0]})) | (YNaNE & (FMAResM[30:0] == {Y[30:23],1'b1,Y[21:0]})) | (ZNaNE & (FMAResM[30:0] == {Z[30:23],1'b1,Z[21:0]})) | (FMAResM[30:0] == ans[30:0]))) ))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
if(~(|X[30:23]) & |X[22:0]) $display( "xdenorm ");
if(~(|Y[30:23]) & |Y[22:0]) $display( "ydenorm ");
if(~(|Z[30:23]) & |Z[22:0]) $display( "zdenorm ");
if(FMAFlgM[4] != 0) $display( "invld ");
if(FMAFlgM[2] != 0) $display( "ovrflw ");
if(FMAFlgM[1] != 0) $display( "unflw ");
if(FMAResM == 64'hFF80000000000000) $display( "FMAResM=-inf ");
if(FMAResM == 64'h7F80000000000000) $display( "FMAResM=+inf ");
if(&FMAResM[30:23] & |FMAResM[22:0] & ~FMAResM[22]) $display( "FMAResM=sigNaN ");
if(&FMAResM[30:23] & |FMAResM[22:0] & FMAResM[22] ) $display( "FMAResM=qutNaN ");
if(ans == 64'hFF80000000000000) $display( "ans=-inf ");
if(ans == 64'h7F80000000000000) $display( "ans=+inf ");
if(&ans[30:23] & |ans[22:0] & ~ans[22] ) $display( "ans=sigNaN ");
if(&ans[30:23] & |ans[22:0] & ans[22]) $display( "ans=qutNaN ");
errors = errors + 1;
if (errors == 10)
$stop;
end
if (`FPSIZES === 1 | `FPSIZES === 2) begin
if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]})) || (ZNaNE && (FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0]))))) begin
// fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w");
// if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (FMAResM !== ans))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(XDenormE) $display( "xdenorm ");
if(YDenormE) $display( "ydenorm ");
if(ZDenormE) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf ");
if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf ");
if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf ");
if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf ");
if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN ");
errors = errors + 1;
//if (errors === 10)
$stop;
end
if((FmtE==1'b0)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[`LEN1-2:0] === {X[`LEN1-2:`NF1],1'b1,X[`NF1-2:0]})) || (YNaNE && (FMAResM[`LEN1-2:0] === {Y[`LEN1-2:`NF1],1'b1,Y[`NF1-2:0]})) || (ZNaNE && (FMAResM[`LEN1-2:0] === {Z[`LEN1-2:`NF1],1'b1,Z[`NF1-2:0]})) || (FMAResM[`LEN1-2:0] === ans[`LEN1-2:0]))) ))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
errors = errors + 1;
// if (errors === 9)
$stop;
end
end else begin
if((FmtE==2'b11) & (FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]})) || (ZNaNE && (FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0]))))) begin
// fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w");
// if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (FMAResM !== ans))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(XDenormE) $display( "xdenorm ");
if(YDenormE) $display( "ydenorm ");
if(ZDenormE) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf ");
if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf ");
if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf ");
if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf ");
if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN ");
errors = errors + 1;
//if (errors === 10)
$stop;
end
if((FmtE==1'b01)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[64-2:0] === {X[64-2:52],1'b1,X[52-2:0]})) || (YNaNE && (FMAResM[64-2:0] === {Y[64-2:52],1'b1,Y[52-2:0]})) || (ZNaNE && (FMAResM[64-2:0] === {Z[64-2:52],1'b1,Z[52-2:0]})) || (FMAResM[62:0] === ans[62:0]))) ))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
errors = errors + 1;
// if (errors === 9)
$stop;
end
if((FmtE==2'b00)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[32-2:0] === {X[32-2:23],1'b1,X[23-2:0]})) || (YNaNE && (FMAResM[32-2:0] === {Y[32-2:23],1'b1,Y[23-2:0]})) || (ZNaNE && (FMAResM[32-2:0] === {Z[32-2:23],1'b1,Z[23-2:0]})) || (FMAResM[30:0] === ans[30:0]))) ))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
errors = errors + 1;
// if (errors === 9)
$stop;
end
if((FmtE==2'b10)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[16-2:0] === {X[16-2:10],1'b1,X[10-2:0]})) || (YNaNE && (FMAResM[16-2:0] === {Y[16-2:10],1'b1,Y[10-2:0]})) || (ZNaNE && (FMAResM[16-2:0] === {Z[16-2:10],1'b1,Z[10-2:0]})) || (FMAResM[14:0] === ans[14:0]))) ))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
errors = errors + 1;
// if (errors === 9)
$stop;
end
end
vectornum = vectornum + 1;
if (testvectors[vectornum] === 194'bx) begin
$display("%d tests completed with %d errors", vectornum, errors);

View File

@ -1,3 +1,3 @@
testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat
testfloat_gen f128_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat
tr -d ' ' < testFloat > testFloatNoSpace

View File

@ -30,7 +30,7 @@
`include "wally-config.vh"
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) (
module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTERVAL, DCACHE) (
input logic clk,
input logic reset,
// cpu side
@ -41,24 +41,26 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) (
input logic InvalidateCacheM,
input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits.
input logic [`PA_BITS-1:0] PAdr, // physical address
input logic [(`XLEN-1)/8:0] ByteMask,
input logic [`XLEN-1:0] FinalWriteData,
output logic CacheCommitted,
output logic CacheStall,
// to performance counters to cpu
output logic CacheMiss,
output logic CacheAccess,
output logic save, restore,
// lsu control
input logic IgnoreRequestTLB,
input logic IgnoreRequestTrapM,
input logic IgnoreRequestTrapM,
input logic Cacheable,
// Bus fsm interface
output logic CacheFetchLine,
output logic CacheWriteLine,
input logic CacheBusAck,
input logic [LOGWPL-1:0] WordCount,
input logic LSUBusWriteCrit,
output logic [`PA_BITS-1:0] CacheBusAdr,
input logic [LINELEN-1:0] CacheBusWriteData,
output logic [LINELEN-1:0] ReadDataLine);
output logic [WORDLEN-1:0] ReadDataWord);
// Cache parameters
localparam LINEBYTELEN = LINELEN/8;
@ -101,6 +103,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) (
logic [NUMWAYS-1:0] SelectedWay;
logic [NUMWAYS-1:0] SetValidWay, ClearValidWay, SetDirtyWay, ClearDirtyWay;
logic [1:0] CacheRW, CacheAtomic;
logic [LINELEN-1:0] ReadDataLine;
logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;
logic save, restore;
/////////////////////////////////////////////////////////////////////////////////////////////
// Read Path
@ -114,7 +119,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) (
// Array of cache ways, along with victim, hit, dirty, and read merging logic
cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) CacheWays[NUMWAYS-1:0](
.clk, .reset, .RAdr, .PAdr, .CacheWriteData,
.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask,
.SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
.FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay,
.Invalidate(InvalidateCacheM));
@ -138,6 +143,17 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) (
flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, HitWay, HitWaySaved);
mux2 #(NUMWAYS) saverestoremux(HitWay, HitWaySaved, restore, HitWayFinal);
end else assign HitWayFinal = HitWay;
// like to fix this.
if(DCACHE)
mux2 #(LOGWPL) WordAdrrMux(.d0(PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]),
.d1(WordCount), .s(LSUBusWriteCrit),
.y(WordOffsetAddr));
else assign WordOffsetAddr = PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)];
subcachelineread #(LINELEN, WORDLEN, MUXINTERVAL, LOGWPL) subcachelineread(
.clk, .reset, .PAdr(WordOffsetAddr), .save, .restore,
.ReadDataLine, .ReadDataWord);
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Path: Write data and address. Muxes between writes from bus and writes from CPU.

View File

@ -47,7 +47,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
input logic VictimWay,
input logic FlushWay,
input logic Invalidate,
input logic [(`XLEN-1)/8:0] ByteMask,
output logic [LINELEN-1:0] ReadDataLineWay,
output logic HitWay,
@ -69,6 +69,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
logic [$clog2(NUMLINES)-1:0] RAdrD;
logic [2**LOGWPL-1:0] MemPAdrDecoded;
logic [LINELEN/`XLEN-1:0] SelectedWriteWordEn;
logic [(`XLEN-1)/8:0] FinalByteMask;
/////////////////////////////////////////////////////////////////////////////////////////////
// Write Enable demux
@ -77,13 +78,14 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
.bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded));
// If writing the whole line set all write enables to 1, else only set the correct word.
assign SelectedWriteWordEn = SetValidWay ? '1 : SetDirtyWay ? MemPAdrDecoded : '0; // OR-AND
assign FinalByteMask = SetValidWay ? '1 : ByteMask; // OR
/////////////////////////////////////////////////////////////////////////////////////////////
// Tag Array
/////////////////////////////////////////////////////////////////////////////////////////////
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk,
.Adr(RAdr), .ReadData(ReadTag),
.Adr(RAdr), .ReadData(ReadTag), .ByteMask('1),
.CacheWriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(SetValidWay));
// AND portion of distributed tag multiplexer
@ -102,7 +104,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26,
sram1p1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk, .Adr(RAdr),
.ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ),
.CacheWriteData(CacheWriteData[(words+1)*`XLEN-1:words*`XLEN]),
.WriteEnable(SelectedWriteWordEn[words]));
.WriteEnable(SelectedWriteWordEn[words]), .ByteMask(FinalByteMask));
end
// AND portion of distributed read multiplexers

View File

@ -38,18 +38,31 @@ module sram1p1rw #(parameter DEPTH=128, WIDTH=256) (
input logic [$clog2(DEPTH)-1:0] Adr,
input logic [WIDTH-1:0] CacheWriteData,
input logic WriteEnable,
input logic [(WIDTH-1)/8:0] ByteMask,
output logic [WIDTH-1:0] ReadData);
logic [WIDTH-1:0] StoredData[DEPTH-1:0];
logic [$clog2(DEPTH)-1:0] AdrD;
logic WriteEnableD;
always_ff @(posedge clk) AdrD <= Adr;
genvar index;
for(index = 0; index < WIDTH/8; index++) begin
always_ff @(posedge clk) begin
AdrD <= Adr;
if (WriteEnable) begin
StoredData[Adr] <= #1 CacheWriteData;
if (WriteEnable & ByteMask[index]) begin
StoredData[Adr][8*(index+1)-1:8*index] <= #1 CacheWriteData[8*(index+1)-1:8*index];
end
end
end
// if not a multiple of 8, MSByte is not 8 bits long.
if(WIDTH%8 != 0) begin
always_ff @(posedge clk) begin
if (WriteEnable & ByteMask[WIDTH/8]) begin
StoredData[Adr][WIDTH-1:WIDTH-WIDTH%8] <= #1 CacheWriteData[WIDTH-1:WIDTH-WIDTH%8];
end
end
end
assign ReadData = StoredData[AdrD];
endmodule

View File

@ -30,20 +30,17 @@
`include "wally-config.vh"
module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL)(
module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL, LOGWPL)(
input logic clk,
input logic reset,
input logic [`PA_BITS-1:0] PAdr,
input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr,
input logic save, restore,
input logic [LINELEN-1:0] ReadDataLine,
output logic [WORDLEN-1:0] ReadDataWord);
localparam WORDSPERLINE = LINELEN/MUXINTERVAL;
// pad is for icache. Muxing extends over the cacheline boundary.
localparam PADLEN = WORDLEN-MUXINTERVAL;
// Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can
// easily build a variable input mux.
// *** move this to LSU and IFU, also remove mux from busdp into LSU.
// *** give this a module name to match block diagram
logic [LINELEN+(WORDLEN-MUXINTERVAL)-1:0] ReadDataLinePad;
logic [WORDLEN-1:0] ReadDataLineSets [(LINELEN/MUXINTERVAL)-1:0];
logic [WORDLEN-1:0] ReadDataWordRaw, ReadDataWordSaved;
@ -60,7 +57,7 @@ module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL)(
end
// variable input mux
// *** maybe remove REPLAY config later after deciding which way is best
assign ReadDataWordRaw = ReadDataLineSets[PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]];
assign ReadDataWordRaw = ReadDataLineSets[PAdr];
if(!`REPLAY) begin
flopen #(WORDLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved);
mux2 #(WORDLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved,

View File

@ -42,6 +42,7 @@ module fcmp (
// - if negitive - no
// - if positive - yes
// note: LT does -0 < 0
//*** compare Exp and Man together
assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
assign EQ = (FSrcXE == FSrcYE);

View File

@ -103,7 +103,7 @@ module cvtfp (
assign LSBFrac = DSFrac[3];
always_comb begin
always_comb begin // ***remove guard bit
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Guard & (Round | (Sticky) | (~Round&~Sticky&LSBFrac));//round to nearest even
@ -166,6 +166,7 @@ module cvtfp (
{XSgnE, DSResExp, DSResFrac};
// select the final result based on the opperation
//*** in al units before putting into : ? put in a seperate signal
assign CvtFpResE = FmtE ? {{32{1'b1}},DSRes} : {XSgnE, SDExp, SDFrac[51]|XNaNE, SDFrac[50:0]};
end else begin
// select the double to single precision result

View File

@ -10,7 +10,6 @@ module fcvt (
input logic XNaNE, // is X NaN
input logic XInfE, // is X infinity
input logic XDenormE, // is X denormalized
input logic [10:0] BiasE, // bias - depends on precision (max exponent/2)
input logic [`XLEN-1:0] ForwardedSrcAE, // integer input
input logic [2:0] FOpCtrlE, // chooses which instruction is done (full list below)
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
@ -70,7 +69,7 @@ module fcvt (
assign Bits = Res64 ? 8'd64 : 8'd32;
// calulate the unbiased exponent
assign ExpVal = {1'b0,XExpE} - {1'b0,BiasE} + {12'b0, XDenormE};
assign ExpVal = {1'b0,XExpE} - {1'b0, (11)'(`BIAS)} + {12'b0, XDenormE};
////////////////////////////////////////////////////////
@ -121,7 +120,7 @@ module fcvt (
assign Round = FOpCtrlE[0] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
assign LSB = FOpCtrlE[0] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];
always_comb begin
always_comb begin//*** remove guard bit
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even

View File

@ -29,17 +29,12 @@
`include "wally-config.vh"
// `define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
// `define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
// `define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
// `define XLEN 64
// `define IEEE754 1
module fma(
input logic clk,
input logic reset,
input logic FlushM, // flush the memory stage
input logic StallM, // stall memory stage
input logic FmtE, FmtM, // precision 1 = double 0 = single
input logic [`FPSIZES/3:0] FmtE, FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage
@ -75,7 +70,7 @@ module fma(
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [8:0] NormCntE, NormCntM;
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
logic Mult;
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
@ -86,7 +81,7 @@ module fma(
// E/M pipeline registers
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(16) EMRegFma4(clk, reset, FlushM, ~StallM,
flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0]},
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult});
@ -98,6 +93,7 @@ module fma(
endmodule
//*** in al units before putting into : ? put in a seperate signal
module fma1(
input logic XSgnE, YSgnE, ZSgnE, // input's signs
@ -106,7 +102,7 @@ module fma1(
input logic XDenormE, YDenormE, ZDenormE, // is the input denormal
input logic XZeroE, YZeroE, ZZeroE, // is the input zero
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
input logic [`FPSIZES/3:0] FmtE, // precision 1 = double 0 = single
output logic [`NE+1:0] ProdExpE, // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
@ -115,7 +111,7 @@ module fma1(
output logic InvZE, // intert Z
output logic ZSgnEffE, // the modified Z sign
output logic PSgnE, // the product's sign
output logic [8:0] NormCntE // normalization shift cnt
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt
);
logic [`NE-1:0] Denorm; // value of a denormaized number based on precision
@ -157,37 +153,63 @@ module fma1(
add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
loa loa(.A(AlignedAddendInv+{162'b0,InvZE}), .P(ProdManKilled), .NormCntE);
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);
// Choose the positive sum and accompanying LZA result.
assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
// assign NormCntE = NegSumE ? NNormCnt : PNormCnt;
endmodule
module expadd(
input logic FmtE, // precision
input logic [`NE-1:0] XExpE, YExpE, // input exponents
input logic XDenormE, YDenormE, // are the inputs denormalized
input logic XZeroE, YZeroE, // are the inputs zero
output logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals
output logic [`NE-1:0] Denorm, // value of denormalized exponent
output logic [`NE+1:0] ProdExpE // product's exponent B^(1023)NE+2
input logic [`FPSIZES/3:0] FmtE, // precision
input logic [`NE-1:0] XExpE, YExpE, // input exponents
input logic XDenormE, YDenormE, // are the inputs denormalized
input logic XZeroE, YZeroE, // are the inputs zero
output logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals
output logic [`NE-1:0] Denorm, // value of denormalized exponent
output logic [`NE+1:0] ProdExpE // product's exponent B^(1023)NE+2
);
// denormalized numbers have diffrent values depending on which precison it is.
// double - 1
// single - 1023-127+1 = 897
assign Denorm = FmtE ? 1 : 897;
// FLEN - 1
// Other - BIAS - other bias + 1
if (`FPSIZES == 1) begin
assign Denorm = 1;
end else if (`FPSIZES == 2) begin
assign Denorm = FmtE ? (`NE)'(1) : (`NE)'(`BIAS)-(`NE)'(`BIAS1)+(`NE)'(1);
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtE)
`FMT: assign Denorm = 1;
`FMT1: assign Denorm = `BIAS-`BIAS1+1;
`FMT2: assign Denorm = `BIAS-`BIAS2+1;
default: assign Denorm = 1'bx;
endcase
end
end else begin
always_comb begin
case (FmtE)
2'h3: assign Denorm = 1;
2'h1: assign Denorm = `BIAS-`D_BIAS+1;
2'h0: assign Denorm = `BIAS-`S_BIAS+1;
2'h2: assign Denorm = `BIAS-`H_BIAS+1;
endcase
end
end
// pick denormalized value or exponent
assign XExpVal = XDenormE ? Denorm : XExpE;
assign YExpVal = YDenormE ? Denorm : YExpE;
// kill the exponent if the product is zero - either X or Y is 0
assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, `NE'h3ff})&{`NE+2{~(XZeroE|YZeroE)}};
assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
endmodule
@ -261,7 +283,7 @@ module align(
// - Denormal numbers have a diffrent exponent value depending on the precision
assign ZExpVal = ZDenormE ? Denorm : ZExpE;
// assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3);
assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - 1020+`NF - {2'b0, ZExpVal};
assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpVal};
// Defualt Addition without shifting
// | 54'b0 | 106'b(product) | 2'b0 |
@ -276,7 +298,7 @@ module align(
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) < $signed(13'b0)) begin
if ($signed(AlignCnt) < $signed((`NE+2)'(0))) begin
KillProdE = 1;
ZManShifted = ZManPreShifted;
AddendStickyE = ~(XZeroE|YZeroE);
@ -284,7 +306,7 @@ module align(
// If the Addend is shifted right
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd3*13'd`NF+13'd4)) begin
end else if ($signed(AlignCnt)<=$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5))) begin
KillProdE = 0;
ZManShifted = ZManPreShifted >> AlignCnt;
AddendStickyE = |(ZManShifted[`NF-1:0]);
@ -356,7 +378,7 @@ endmodule
module loa( //https://ieeexplore.ieee.org/abstract/document/930098
input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+1:0] P, // product
output logic [8:0] NormCntE // normalization shift count for the positive result
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift count for the positive result
);
logic [3*`NF+6:0] T;
@ -389,14 +411,14 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
endmodule
module lzc(
input logic [3*`NF+6:0] f,
output logic [8:0] NormCntE // normalization shift
input logic [3*`NF+6:0] f,
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift
);
logic [8:0] i;
logic [$clog2(3*`NF+7)-1:0] i;
always_comb begin
i = 0;
while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned(9'd3*9'd`NF+9'd6)) i = i+1; // search for leading one
while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1; // search for leading one
NormCntE = i;
end
endmodule
@ -410,27 +432,27 @@ endmodule
module fma2(
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic FmtM, // precision 1 = double 0 = single
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic Mult, // multiply opperation
input logic [8:0] NormCntM, // the normalization shift count
output logic [`FLEN-1:0] FMAResM, // FMA final result
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic Mult, // multiply opperation
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // the normalization shift count
output logic [`FLEN-1:0] FMAResM, // FMA final result
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
@ -548,28 +570,27 @@ endmodule
module normalize(
input logic [3*`NF+5:0] SumM, // the positive sum
input logic [`NE-1:0] ZExpM, // exponent of Z
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic [8:0] NormCntM, // normalization shift count
input logic FmtM, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
input logic NegSumM, // was the sum negitive
output logic [`NF+2:0] NormSum, // normalized sum
output logic SumZero, // is the sum zero
output logic NormSumSticky, UfSticky, // sticky bits
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
output logic ResultDenorm // is the result denormalized
input logic [3*`NF+5:0] SumM, // the positive sum
input logic [`NE-1:0] ZExpM, // exponent of Z
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
input logic NegSumM, // was the sum negitive
output logic [`NF+2:0] NormSum, // normalized sum
output logic SumZero, // is the sum zero
output logic NormSumSticky, UfSticky, // sticky bits
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
output logic ResultDenorm // is the result denormalized
);
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction
logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic PreResultDenorm2; // is the result denormalized - calculated before LZA corection
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction
logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
///////////////////////////////////////////////////////////////////////////////
// Normalization
@ -580,14 +601,89 @@ module normalize(
// calculate the sum's exponent
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
//convert the sum's exponent into the propper percision
if (`FPSIZES == 1) begin
assign SumExpTmp = SumExpTmpTmp;
end else if (`FPSIZES == 2) begin
assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: assign SumExpTmp = SumExpTmpTmp;
`FMT1: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS1)&{`NE+2{|SumExpTmpTmp}};
`FMT2: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS2)&{`NE+2{|SumExpTmpTmp}};
default: assign SumExpTmp = `NE+2'bx;
endcase
end
end else begin
always_comb begin
case (FmtM)
2'h3: assign SumExpTmp = SumExpTmpTmp;
2'h1: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`D_BIAS)&{`NE+2{|SumExpTmpTmp}};
2'h0: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`S_BIAS)&{`NE+2{|SumExpTmpTmp}};
2'h2: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`H_BIAS)&{`NE+2{|SumExpTmpTmp}};
endcase
end
end
logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL;
assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd2)));
assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127);
assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd25+13'd1023-13'd127)) | ~|SumExpTmpTmp;
assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero;
// determine if the result is denormalized
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
always_comb begin
case (FmtM)
`FMT: assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
`FMT1: assign PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
`FMT2: assign PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
default: assign PreResultDenorm = 1'bx;
endcase
end
end else begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
assign Sum3LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
always_comb begin
case (FmtM)
2'h3: assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
2'h1: assign PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
2'h0: assign PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
2'h2: assign PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
endcase
end
end
// 010. when should be 001.
// - shift left one
@ -599,45 +695,66 @@ module normalize(
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1;
assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
// Normalize the sum
assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
// LZA correction
assign LZAPlus1 = SumShifted[3*`NF+7];
assign LZAPlus2 = SumShifted[3*`NF+8];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1&~KillProdM ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
// Calculate the sticky bit
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM);
if (`FPSIZES == 1) begin
assign NormSumSticky = |CorrSumShifted[2*`NF+2:0];
end else if (`FPSIZES == 2) begin
// 3*NF+5 - NF1 - 3
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
(|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM);
end else if (`FPSIZES == 3) begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
(|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
(|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2));
end else begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) |
(|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2));
end
assign UfSticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm2&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
// recalculate if the result is denormalized
assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
endmodule
module fmaround(
input logic FmtM, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic UfSticky, // sticky bit for underlow calculation
input logic [`NF+2:0] NormSum, // normalized sum
input logic AddendStickyM, // addend's sticky bit
input logic NormSumSticky, // normalized sum's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ResultSgnTmp, // the result's sign
output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
output logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`NE-1:0] ResultExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic UfSticky, // sticky bit for underlow calculation
input logic [`NF+2:0] NormSum, // normalized sum
input logic AddendStickyM, // addend's sticky bit
input logic NormSumSticky, // normalized sum's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ResultSgnTmp, // the result's sign
output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
output logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`NE-1:0] ResultExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding
);
logic LSBNormSum; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
@ -676,18 +793,146 @@ module fmaround(
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
// determine guard, round, and least significant bit of the result
assign Guard = FmtM ? NormSum[2] : NormSum[31];
assign Round = FmtM ? NormSum[1] : NormSum[30];
assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32];
if (`FPSIZES == 1) begin
// determine guard, round, and least significant bit of the result
assign Guard = NormSum[2];
assign Round = NormSum[1];
assign LSBNormSum = NormSum[3];
// used to determine underflow flag
assign UfGuard = NormSum[1];
assign UfRound = NormSum[0];
assign UfLSBNormSum = NormSum[2];
// determine sticky
assign Sticky = UfSticky | NormSum[0];
end else if (`FPSIZES == 2) begin
// \/-------------NF---------------,
// | NF1 | 3 | |
// '-------NF1------^
// determine guard, round, and least significant bit of the result
assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3];
// used to determine underflow flag
assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
// determine sticky
assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]);
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: begin
// determine guard, round, and least significant bit of the result
assign Guard = NormSum[2];
assign Round = NormSum[1];
assign LSBNormSum = NormSum[3];
// used to determine underflow flag
assign UfGuard = NormSum[1];
assign UfRound = NormSum[0];
assign UfLSBNormSum = NormSum[2];
// determine sticky
assign Sticky = UfSticky | NormSum[0];
end
`FMT1: begin
// determine guard, round, and least significant bit of the result
assign Guard = NormSum[`NF-`NF1+2];
assign Round = NormSum[`NF-`NF1+1];
assign LSBNormSum = NormSum[`NF-`NF1+3];
// used to determine underflow flag
assign UfGuard = NormSum[`NF-`NF1+1];
assign UfRound = NormSum[`NF-`NF1];
assign UfLSBNormSum = NormSum[`NF-`NF1+2];
// determine sticky
assign Sticky = UfSticky | NormSum[`NF-`NF1];
end
`FMT2: begin
// determine guard, round, and least significant bit of the result
assign Guard = NormSum[`NF-`NF2+2];
assign Round = NormSum[`NF-`NF2+1];
assign LSBNormSum = NormSum[`NF-`NF2+3];
// used to determine underflow flag
assign UfGuard = NormSum[`NF-`NF2+1];
assign UfRound = NormSum[`NF-`NF2];
assign UfLSBNormSum = NormSum[`NF-`NF2+2];
// determine sticky
assign Sticky = UfSticky | NormSum[`NF-`NF2];
end
default: begin
assign Guard = 1'bx;
assign Round = 1'bx;
assign LSBNormSum = 1'bx;
assign UfGuard = 1'bx;
assign UfRound = 1'bx;
assign UfLSBNormSum = 1'bx;
assign Sticky = 1'bx;
end
endcase
end
end else begin
always_comb begin
case (FmtM)
2'h3: begin
// determine guard, round, and least significant bit of the result
assign Guard = NormSum[2];
assign Round = NormSum[1];
assign LSBNormSum = NormSum[3];
// used to determine underflow flag
assign UfGuard = NormSum[1];
assign UfRound = NormSum[0];
assign UfLSBNormSum = NormSum[2];
// determine sticky
assign Sticky = UfSticky | NormSum[0];
end
2'h1: begin
// determine guard, round, and least significant bit of the result
assign Guard = NormSum[`NF-`D_NF+2];
assign Round = NormSum[`NF-`D_NF+1];
assign LSBNormSum = NormSum[`NF-`D_NF+3];
// used to determine underflow flag
assign UfGuard = NormSum[`NF-`D_NF+1];
assign UfRound = NormSum[`NF-`D_NF];
assign UfLSBNormSum = NormSum[`NF-`D_NF+2];
// determine sticky
assign Sticky = UfSticky | NormSum[`NF-`D_NF];
end
2'h0: begin
// determine guard, round, and least significant bit of the result
assign Guard = NormSum[`NF-`S_NF+2];
assign Round = NormSum[`NF-`S_NF+1];
assign LSBNormSum = NormSum[`NF-`S_NF+3];
// used to determine underflow flag
assign UfGuard = NormSum[`NF-`S_NF+1];
assign UfRound = NormSum[`NF-`S_NF];
assign UfLSBNormSum = NormSum[`NF-`S_NF+2];
// determine sticky
assign Sticky = UfSticky | NormSum[`NF-`S_NF];
end
2'h2: begin
// determine guard, round, and least significant bit of the result
assign Guard = NormSum[`NF-`H_NF+2];
assign Round = NormSum[`NF-`H_NF+1];
assign LSBNormSum = NormSum[`NF-`H_NF+3];
// used to determine underflow flag
assign UfGuard = NormSum[`NF-`H_NF+1];
assign UfRound = NormSum[`NF-`H_NF];
assign UfLSBNormSum = NormSum[`NF-`H_NF+2];
// determine sticky
assign Sticky = UfSticky | NormSum[`NF-`H_NF];
end
endcase
end
end
// used to determine underflow flag
assign UfGuard = FmtM ? NormSum[1] : NormSum[30];
assign UfRound = FmtM ? NormSum[0] : NormSum[29];
assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31];
// determine sticky
assign Sticky = UfSticky | NormSum[0];
// Deterimine if a small number was supposed to be subtrated
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
@ -729,10 +974,40 @@ module fmaround(
assign Minus1 = CalcMinus1 & (Sticky | Guard | Round);
// Compute rounded result
assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
assign NormSumTruncated = {NormSum[`NF+2:32], NormSum[31:3]&{29{FmtM}}};
if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
`FMT1: assign RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: assign RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
default: assign RoundAdd = (`FLEN+1)'(0);
endcase
end
end else begin
always_comb begin
case (FmtM)
2'h3: assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
2'h1: assign RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: assign RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: assign RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
assign NormSumTruncated = NormSum[`NF+2:3];
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
assign ResultExp = FullResultExp[`NE-1:0];
@ -748,7 +1023,7 @@ module fmaflags(
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
input logic Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
input logic FmtM, // precision 1 = double 0 = single
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
output logic Invalid, Overflow, Underflow, // flags used to select the result
output logic [4:0] FMAFlgM // FMA flags
);
@ -771,8 +1046,34 @@ module fmaflags(
assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed
assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[7:0] | FullResultExp[8];
// - Don't set the overflow flag if an overflowed result isn't outputed
if (`FPSIZES == 1) begin
assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
end else if (`FPSIZES == 2) begin
assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
`FMT1: assign GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
`FMT2: assign GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
default: assign GtMaxExp = 1'bx;
endcase
end
end else begin
always_comb begin
case (FmtM)
2'h3: assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
2'h1: assign GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
2'h0: assign GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
2'h2: assign GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
endcase
end
end
assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Underflow flag if the number is too small to be represented in normal numbers
@ -793,57 +1094,227 @@ endmodule
module resultselect(
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic FmtM, // precision 1 = double 0 = single
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic ResultSgn, // the result's sign
input logic CalcPlus1, // rounding bits
input logic [`FLEN:0] RoundAdd, // how much to add to the result
input logic Invalid, Overflow, Underflow, // flags
input logic ResultDenorm, // is the result denormalized
input logic [`NE-1:0] ResultExp, // Result exponent
input logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`FLEN-1:0] FMAResM // FMA final result
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic ResultSgn, // the result's sign
input logic CalcPlus1, // rounding bits
input logic [`FLEN:0] RoundAdd, // how much to add to the result
input logic Invalid, Overflow, Underflow, // flags
input logic ResultDenorm, // is the result denormalized
input logic [`NE-1:0] ResultExp, // Result exponent
input logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`FLEN-1:0] FMAResM // FMA final result
);
logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
logic InfSgn;
logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
if (`FPSIZES == 1) begin
if(`IEEE754) begin
assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
if(`IEEE754) begin
assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: begin
if(`IEEE754) begin
assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
end
`FMT1: begin
if(`IEEE754) begin
assign XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
assign YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
assign ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
assign InvalidResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
assign XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
assign YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
assign ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
assign InvalidResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
assign XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
{{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
assign KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
assign UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
assign NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
end
default: begin
if(`IEEE754) begin
assign XNaNResult = (`FLEN)'(0);
assign YNaNResult = (`FLEN)'(0);
assign ZNaNResult = (`FLEN)'(0);
assign InvalidResult = (`FLEN)'(0);
end else begin
assign XNaNResult = (`FLEN)'(0);
end
assign OverflowResult = (`FLEN)'(0);
assign KillProdResult = (`FLEN)'(0);
assign UnderflowResult = (`FLEN)'(0);
assign InfResult = (`FLEN)'(0);
assign NormResult = (`FLEN)'(0);
end
endcase
end
end else begin
always_comb begin
case (FmtM)
2'h3: begin
if(`IEEE754) begin
assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
end
2'h1: begin
if(`IEEE754) begin
assign XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
assign YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
assign ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
assign InvalidResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
assign XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
{{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
assign KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:0], ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
assign UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
assign NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
assign XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
assign YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
assign ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
assign InvalidResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
assign XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
{{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
assign KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
assign UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
assign NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
assign XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
assign YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
assign ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
assign InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
assign XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
{{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
assign KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:0], ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
assign UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
assign NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
end
endcase
end
if(`IEEE754) begin
assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]};
assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]};
assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]};
assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
end else begin
assign XNaNResult = FmtM ? {1'b0, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, XExpM[7:0], 1'b1, 22'b0};
assign YNaNResult = FmtM ? {1'b0, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, YExpM[7:0], 1'b1, 22'b0};
assign ZNaNResult = FmtM ? {1'b0, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, ZExpM[7:0], 1'b1, 22'b0};
assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, 1'b0, 8'hff, 1'b1, 22'b0};
end
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
{{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} + (RoundAdd[59:29]&{31{AddendStickyM}})};
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign FMAResM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult :
XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM, XExpM[7:0], XManM[51:29]} :
YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM, YExpM[7:0], YManM[51:29]} :
ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
KillProdM ? KillProdResult :
Overflow ? OverflowResult :
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
FmtM ? {ResultSgn, ResultExp, ResultFrac} :
{{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
if(`IEEE754) begin
assign FMAResM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult :
XInfM|YInfM|ZInfM ? InfResult :
KillProdM ? KillProdResult :
Overflow ? OverflowResult :
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
NormResult;
end else begin
assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
XInfM|YInfM|ZInfM ? InfResult :
KillProdM ? KillProdResult :
Overflow ? OverflowResult :
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
NormResult;
end
endmodule

View File

@ -89,7 +89,6 @@ module fpu (
logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage
logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage
logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage
logic [10:0] BiasE; // bias based on precision (single=7f double=3ff)
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XNaNQ, YNaNQ; // is the input a NaN - divide
@ -176,10 +175,10 @@ module fpu (
// unpack unit
// - splits FP inputs into their various parts
// - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE,
unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
// FMA
// - two stage FMA
@ -231,7 +230,7 @@ module fpu (
.XSNaNE, .ClassResE);
// Convert
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
.CvtResE, .CvtFlgE);
// data to be stored in memory - to IEU

473
pipelined/src/fpu/unpack.sv Normal file
View File

@ -0,0 +1,473 @@
`include "wally-config.vh"
module unpack (
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
input logic [`FPSIZES/3:0] FmtE, // format signal 00 - single 10 - double 11 - quad 10 - half
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
output logic XNormE, // is X a normalized number
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic XExpMaxE // does X have the maximum exponent (NaN or Inf)
);
logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
logic XExpNonzero, YExpNonzero, ZExpNonzero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic XExpZero, YExpZero, ZExpZero; // is the exponent zero
logic YExpMaxE, ZExpMaxE; // is the exponent all 1s
if (`FPSIZES == 1) begin // if there is only one floating point format supported
// sign bit
assign XSgnE = X[`FLEN-1];
assign YSgnE = Y[`FLEN-1];
assign ZSgnE = Z[`FLEN-1];
// exponent
assign XExpE = X[`FLEN-2:`NF];
assign YExpE = Y[`FLEN-2:`NF];
assign ZExpE = Z[`FLEN-2:`NF];
// fraction (no assumed 1)
assign XFracE = X[`NF-1:0];
assign YFracE = Y[`NF-1:0];
assign ZFracE = Z[`NF-1:0];
// is the exponent non-zero
assign XExpNonzero = |XExpE;
assign YExpNonzero = |YExpE;
assign ZExpNonzero = |ZExpE;
// is the exponent all 1's
assign XExpMaxE = &XExpE;
assign YExpMaxE = &YExpE;
assign ZExpMaxE = &ZExpE;
end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported
//***need better names for these constants
// largest format | smaller format
//----------------------------------
// `FLEN | `LEN1 length of floating point number
// `NE | `NE1 length of exponent
// `NF | `NF1 length of fraction
// `BIAS | `BIAS1 exponent's bias value
// `FMT | `FMT1 precision's format value - Q=11 D=01 S=00 H=10
// Possible combinantions specified by spec:
// double and single
// single and half
// Not needed but can also handle:
// quad and double
// quad and single
// quad and half
// double and half
logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
assign XSgnE = FmtE ? X[`FLEN-1] : XLen1[`LEN1-1];
assign YSgnE = FmtE ? Y[`FLEN-1] : YLen1[`LEN1-1];
assign ZSgnE = FmtE ? Z[`FLEN-1] : ZLen1[`LEN1-1];
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/denorm/inf/NaN values
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
assign XExpE = FmtE ? X[`FLEN-2:`NF] : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]};
assign YExpE = FmtE ? Y[`FLEN-2:`NF] : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]};
assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]};
// extract the fraction, add trailing zeroes to the mantissa if nessisary
assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
assign YFracE = FmtE ? Y[`NF-1:0] : {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
assign ZFracE = FmtE ? Z[`NF-1:0] : {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
// is the exponent non-zero
assign XExpNonzero = FmtE ? |X[`FLEN-2:`NF] : |XLen1[`LEN1-2:`NF1];
assign YExpNonzero = FmtE ? |Y[`FLEN-2:`NF] : |YLen1[`LEN1-2:`NF1];
assign ZExpNonzero = FmtE ? |Z[`FLEN-2:`NF] : |ZLen1[`LEN1-2:`NF1];
// is the exponent all 1's
assign XExpMaxE = FmtE ? &X[`FLEN-2:`NF] : &XLen1[`LEN1-2:`NF1];
assign YExpMaxE = FmtE ? &Y[`FLEN-2:`NF] : &YLen1[`LEN1-2:`NF1];
assign ZExpMaxE = FmtE ? &Z[`FLEN-2:`NF] : &ZLen1[`LEN1-2:`NF1];
end else if (`FPSIZES == 3) begin // three floating point precsions supported
//***need better names for these constants
// largest format | larger format | smallest format
//---------------------------------------------------
// `FLEN | `LEN1 | `LEN2 length of floating point number
// `NE | `NE1 | `NE2 length of exponent
// `NF | `NF1 | `NF2 length of fraction
// `BIAS | `BIAS1 | `BIAS2 exponent's bias value
// `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 S=00 H=10
// Possible combinantions specified by spec:
// quad and double and single
// double and single and half
// Not needed but can also handle:
// quad and double and half
// quad and single and half
logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision
logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision
assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for smaller precision
assign XLen2 = &X[`FLEN-1:`LEN2] ? X[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
assign YLen2 = &Y[`FLEN-1:`LEN2] ? Y[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
assign ZLen2 = &Z[`FLEN-1:`LEN2] ? Z[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
always_comb begin
case (FmtE)
`FMT: begin // if input is largest precision (`FLEN - ie quad or double)
// extract the sign bit
XSgnE = X[`FLEN-1];
YSgnE = Y[`FLEN-1];
ZSgnE = Z[`FLEN-1];
// extract the exponent
XExpE = X[`FLEN-2:`NF];
YExpE = Y[`FLEN-2:`NF];
ZExpE = Z[`FLEN-2:`NF];
// extract the fraction
XFracE = X[`NF-1:0];
YFracE = Y[`NF-1:0];
ZFracE = Z[`NF-1:0];
// is the exponent non-zero
XExpNonzero = |X[`FLEN-2:`NF];
YExpNonzero = |Y[`FLEN-2:`NF];
ZExpNonzero = |Z[`FLEN-2:`NF];
// is the exponent all 1's
XExpMaxE = &X[`FLEN-2:`NF];
YExpMaxE = &Y[`FLEN-2:`NF];
ZExpMaxE = &Z[`FLEN-2:`NF];
end
`FMT1: begin // if input is larger precsion (`LEN1 - double or single)
// extract the sign bit
XSgnE = XLen1[`LEN1-1];
YSgnE = YLen1[`LEN1-1];
ZSgnE = ZLen1[`LEN1-1];
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/denorm/inf/NaN values
// convert the larger precision's exponent to use the largest precision's bias
XExpE = {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]};
YExpE = {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]};
ZExpE = {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
YFracE = {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
ZFracE = {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
// is the exponent non-zero
XExpNonzero = |XLen1[`LEN1-2:`NF1];
YExpNonzero = |YLen1[`LEN1-2:`NF1];
ZExpNonzero = |ZLen1[`LEN1-2:`NF1];
// is the exponent all 1's
XExpMaxE = &XLen1[`LEN1-2:`NF1];
YExpMaxE = &YLen1[`LEN1-2:`NF1];
ZExpMaxE = &ZLen1[`LEN1-2:`NF1];
end
`FMT2: begin // if input is smallest precsion (`LEN2 - single or half)
// exctract the sign bit
XSgnE = XLen2[`LEN2-1];
YSgnE = YLen2[`LEN2-1];
ZSgnE = ZLen2[`LEN2-1];
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/denorm/inf/NaN values
// convert the smallest precision's exponent to use the largest precision's bias
XExpE = {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]};
YExpE = {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]};
ZExpE = {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)};
YFracE = {YLen2[`NF2-1:0], (`NF-`NF2)'(0)};
ZFracE = {ZLen2[`NF2-1:0], (`NF-`NF2)'(0)};
// is the exponent non-zero
XExpNonzero = |XLen2[`LEN2-2:`NF2];
YExpNonzero = |YLen2[`LEN2-2:`NF2];
ZExpNonzero = |ZLen2[`LEN2-2:`NF2];
// is the exponent all 1's
XExpMaxE = &XLen2[`LEN2-2:`NF2];
YExpMaxE = &YLen2[`LEN2-2:`NF2];
ZExpMaxE = &ZLen2[`LEN2-2:`NF2];
end
default: begin
XSgnE = 0;
YSgnE = 0;
ZSgnE = 0;
XExpE = 0;
YExpE = 0;
ZExpE = 0;
XFracE = 0;
YFracE = 0;
ZFracE = 0;
XExpNonzero = 0;
YExpNonzero = 0;
ZExpNonzero = 0;
XExpMaxE = 0;
YExpMaxE = 0;
ZExpMaxE = 0;
end
endcase
end
end else begin // if all precsisons are supported - quad, double, single, and half
// quad | double | single | half
//-------------------------------------------------------------------
// `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number
// `Q_NE | `D_NE | `S_NE | `H_NE length of exponent
// `Q_NF | `D_NF | `S_NF | `H_NF length of fraction
// `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value
// `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 S=00 H=10
logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision
logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision
logic [`LEN2-1:0] XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision
assign XLen1 = &X[`Q_LEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
assign YLen1 = &Y[`Q_LEN-1:`D_LEN] ? Y[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
assign ZLen1 = &Z[`Q_LEN-1:`D_LEN] ? Z[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for single precision
assign XLen2 = &X[`Q_LEN-1:`S_LEN] ? X[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
assign YLen2 = &Y[`Q_LEN-1:`S_LEN] ? Y[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
assign ZLen2 = &Z[`Q_LEN-1:`S_LEN] ? Z[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
// Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for half precision
assign XLen3 = &X[`Q_LEN-1:`H_LEN] ? X[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
assign YLen3 = &Y[`Q_LEN-1:`H_LEN] ? Y[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
assign ZLen3 = &Z[`Q_LEN-1:`H_LEN] ? Z[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
always_comb begin
case (FmtE)
`Q_BIAS: begin // if input is quad percision
// extract sign bit
XSgnE = X[`Q_LEN-1];
YSgnE = Y[`Q_LEN-1];
ZSgnE = Z[`Q_LEN-1];
// extract the exponent
XExpE = X[`Q_LEN-2:`Q_NF];
YExpE = Y[`Q_LEN-2:`Q_NF];
ZExpE = Z[`Q_LEN-2:`Q_NF];
// extract the fraction
XFracE = X[`Q_NF-1:0];
YFracE = Y[`Q_NF-1:0];
ZFracE = Z[`Q_NF-1:0];
// is the exponent non-zero
XExpNonzero = |X[`Q_LEN-2:`Q_NF];
YExpNonzero = |Y[`Q_LEN-2:`Q_NF];
ZExpNonzero = |Z[`Q_LEN-2:`Q_NF];
// is the exponent all 1's
XExpMaxE = &X[`Q_LEN-2:`Q_NF];
YExpMaxE = &Y[`Q_LEN-2:`Q_NF];
ZExpMaxE = &Z[`Q_LEN-2:`Q_NF];
end
`D_BIAS: begin // if input is double percision
// extract sign bit
XSgnE = XLen1[`D_LEN-1];
YSgnE = YLen1[`D_LEN-1];
ZSgnE = ZLen1[`D_LEN-1];
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/denorm/inf/NaN values
// convert the double precsion exponent into quad precsion
XExpE = {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]};
YExpE = {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]};
ZExpE = {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen1[`D_NE-1:0], (`Q_NF-`D_NE)'(0)};
YFracE = {YLen1[`D_NE-1:0], (`Q_NF-`D_NE)'(0)};
ZFracE = {ZLen1[`D_NE-1:0], (`Q_NF-`D_NE)'(0)};
// is the exponent non-zero
XExpNonzero = |XLen1[`D_LEN-2:`D_NE];
YExpNonzero = |YLen1[`D_LEN-2:`D_NE];
ZExpNonzero = |ZLen1[`D_LEN-2:`D_NE];
// is the exponent all 1's
XExpMaxE = &XLen1[`D_LEN-2:`D_NE];
YExpMaxE = &YLen1[`D_LEN-2:`D_NE];
ZExpMaxE = &ZLen1[`D_LEN-2:`D_NE];
end
`S_BIAS: begin // if input is single percision
// extract sign bit
XSgnE = XLen2[`S_LEN-1];
YSgnE = YLen2[`S_LEN-1];
ZSgnE = ZLen2[`S_LEN-1];
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/denorm/inf/NaN values
// convert the single precsion exponent into quad precsion
XExpE = {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]};
YExpE = {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]};
ZExpE = {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
YFracE = {YLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
ZFracE = {ZLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
// is the exponent non-zero
XExpNonzero = |XLen2[`S_LEN-2:`S_NF];
YExpNonzero = |YLen2[`S_LEN-2:`S_NF];
ZExpNonzero = |ZLen2[`S_LEN-2:`S_NF];
// is the exponent all 1's
XExpMaxE = &XLen2[`S_LEN-2:`S_NF];
YExpMaxE = &YLen2[`S_LEN-2:`S_NF];
ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF];
end
`H_BIAS: begin // if input is half percision
// extract sign bit
XSgnE = XLen3[`H_LEN-1];
YSgnE = YLen3[`H_LEN-1];
ZSgnE = ZLen3[`H_LEN-1];
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/denorm/inf/NaN values
// convert the half precsion exponent into quad precsion
XExpE = {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]};
YExpE = {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]};
ZExpE = {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]};
// extract the fraction and add the nessesary trailing zeros
XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
YFracE = {YLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
ZFracE = {ZLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
// is the exponent non-zero
XExpNonzero = |XLen3[`H_LEN-2:`H_NF];
YExpNonzero = |YLen3[`H_LEN-2:`H_NF];
ZExpNonzero = |ZLen3[`H_LEN-2:`H_NF];
// is the exponent all 1's
XExpMaxE = &XLen3[`H_LEN-2:`H_NF];
YExpMaxE = &YLen3[`H_LEN-2:`H_NF];
ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF];
end
endcase
end
end
// is the exponent all 0's
assign XExpZero = ~XExpNonzero;
assign YExpZero = ~YExpNonzero;
assign ZExpZero = ~ZExpNonzero;
// is the fraction zero
assign XFracZero = ~|XFracE;
assign YFracZero = ~|YFracE;
assign ZFracZero = ~|ZFracE;
// add the assumed one (or zero if denormal or zero) to create the mantissa
assign XManE = {XExpNonzero, XFracE};
assign YManE = {YExpNonzero, YFracE};
assign ZManE = {ZExpNonzero, ZFracE};
// is X normalized
assign XNormE = ~(XExpMaxE|XExpZero);
// is the input a NaN
// - force to be a NaN if it isn't properly Nan Boxed
assign XNaNE = XExpMaxE & ~XFracZero;
assign YNaNE = YExpMaxE & ~YFracZero;
assign ZNaNE = ZExpMaxE & ~ZFracZero;
// is the input a singnaling NaN
assign XSNaNE = XNaNE&~XFracE[`NF-1];
assign YSNaNE = YNaNE&~YFracE[`NF-1];
assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
// is the input denormalized
assign XDenormE = XExpZero & ~XFracZero;
assign YDenormE = YExpZero & ~YFracZero;
assign ZDenormE = ZExpZero & ~ZFracZero;
// is the input infinity
assign XInfE = XExpMaxE & XFracZero;
assign YInfE = YExpMaxE & YFracZero;
assign ZInfE = ZExpMaxE & ZFracZero;
// is the input zero
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
endmodule

View File

@ -1,95 +0,0 @@
`include "wally-config.vh"
module unpack (
input logic [63:0] X, Y, Z,
input logic FmtE,
input logic [2:0] FOpCtrlE,
output logic XSgnE, YSgnE, ZSgnE,
output logic [10:0] XExpE, YExpE, ZExpE,
output logic [52:0] XManE, YManE, ZManE,
output logic XNormE,
output logic XNaNE, YNaNE, ZNaNE,
output logic XSNaNE, YSNaNE, ZSNaNE,
output logic XDenormE, YDenormE, ZDenormE,
output logic XZeroE, YZeroE, ZZeroE,
output logic [10:0] BiasE,
output logic XInfE, YInfE, ZInfE,
output logic XExpMaxE
);
logic [51:0] XFracE, YFracE, ZFracE;
logic XExpNonzero, YExpNonzero, ZExpNonzero;
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic YExpMaxE, ZExpMaxE; // input exponent all 1s
logic [31:0] XFloat, YFloat, ZFloat; // Bottom half or NaN, if RV64 and not properly NaN boxed
// Determine if number is NaN as double precision to check single precision NaN boxing
if (`F_SUPPORTED & ~`D_SUPPORTED) begin // eventually this should change to FLEN when FLEN isn't hardwared to 64
assign XFloat = X[31:0];
assign YFloat = Y[31:0];
assign ZFloat = Z[31:0];
end else begin
assign XFloat = &X[`FLEN-1:32] ? X[31:0] : 32'h7fc00000;
assign YFloat = &Y[`FLEN-1:32] ? Y[31:0] : 32'h7fc00000;
assign ZFloat = &Z[`FLEN-1:32] ? Z[31:0] : 32'h7fc00000;
end
assign XSgnE = FmtE ? X[63] : XFloat[31];
assign YSgnE = FmtE ? Y[63] : YFloat[31];
assign ZSgnE = FmtE ? Z[63] : ZFloat[31];
assign XExpE = FmtE ? X[62:52] : {XFloat[30], {3{~XFloat[30]&~XExpZero|XExpMaxE}}, XFloat[29:23]};
assign YExpE = FmtE ? Y[62:52] : {YFloat[30], {3{~YFloat[30]&~YExpZero|YExpMaxE}}, YFloat[29:23]};
assign ZExpE = FmtE ? Z[62:52] : {ZFloat[30], {3{~ZFloat[30]&~ZExpZero|ZExpMaxE}}, ZFloat[29:23]};
assign XFracE = FmtE ? X[51:0] : {XFloat[22:0], 29'b0};
assign YFracE = FmtE ? Y[51:0] : {YFloat[22:0], 29'b0};
assign ZFracE = FmtE ? Z[51:0] : {ZFloat[22:0], 29'b0};
assign XExpNonzero = FmtE ? |X[62:52] : |XFloat[30:23];
assign YExpNonzero = FmtE ? |Y[62:52] : |YFloat[30:23];
assign ZExpNonzero = FmtE ? |Z[62:52] : |ZFloat[30:23];
assign XExpZero = ~XExpNonzero;
assign YExpZero = ~YExpNonzero;
assign ZExpZero = ~ZExpNonzero;
assign XFracZero = ~|XFracE;
assign YFracZero = ~|YFracE;
assign ZFracZero = ~|ZFracE;
assign XManE = {XExpNonzero, XFracE};
assign YManE = {YExpNonzero, YFracE};
assign ZManE = {ZExpNonzero, ZFracE};
assign XExpMaxE = FmtE ? &X[62:52] : &XFloat[30:23];
assign YExpMaxE = FmtE ? &Y[62:52] : &YFloat[30:23];
assign ZExpMaxE = FmtE ? &Z[62:52] : &ZFloat[30:23];
assign XNormE = ~(XExpMaxE|XExpZero);
// force single precision input to be a NaN if it isn't properly Nan Boxed
assign XNaNE = XExpMaxE & ~XFracZero;
assign YNaNE = YExpMaxE & ~YFracZero;
assign ZNaNE = ZExpMaxE & ~ZFracZero;
assign XSNaNE = XNaNE&~XFracE[51];
assign YSNaNE = YNaNE&~YFracE[51];
assign ZSNaNE = ZNaNE&~ZFracE[51];
assign XDenormE = XExpZero & ~XFracZero;
assign YDenormE = YExpZero & ~YFracZero;
assign ZDenormE = ZExpZero & ~ZFracZero;
assign XInfE = XExpMaxE & XFracZero;
assign YInfE = YExpMaxE & YFracZero;
assign ZInfE = ZExpMaxE & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
assign BiasE = 11'h3ff; // always use 1023 because exponents are unpacked to double precision
endmodule

View File

@ -34,6 +34,7 @@ module simpleram #(parameter BASE=0, RANGE = 65535) (
input logic clk,
input logic [31:0] a,
input logic we,
input logic [`XLEN/8-1:0] ByteMask,
input logic [`XLEN-1:0] wd,
output logic [`XLEN-1:0] rd
);
@ -45,9 +46,14 @@ module simpleram #(parameter BASE=0, RANGE = 65535) (
logic [31:adrlsb] adrmsbs;
assign adrmsbs = a[31:adrlsb];
always_ff @(posedge clk) begin
always_ff @(posedge clk)
rd <= RAM[adrmsbs];
if (we) RAM[adrmsbs] <= #1 wd;
genvar index;
for(index = 0; index < `XLEN/8; index++) begin
always_ff @(posedge clk) begin
if (we & ByteMask[index]) RAM[adrmsbs][8*(index+1)-1:8*index] <= #1 wd[8*(index+1)-1:8*index];
end
end
endmodule

View File

@ -101,17 +101,8 @@ module SRAM2P1R1W
// write port
assign bwe = {WIDTH{WEN1Q}} & BitWEN1;
always_ff @(posedge clk) begin
always_ff @(posedge clk)
mem[WA1Q] <= WD1Q & bwe | mem[WA1Q] & ~bwe;
/*
genvar index;
for (index = 0; index < WIDTH; index = index + 1) begin:bitwrite
always_ff @(posedge clk) begin
if (WEN1Q & BitWEN1[index]) begin
mem[WA1Q][index] <= WD1Q[index];
end
end*/
end
endmodule

View File

@ -175,12 +175,13 @@ module ifu (
if (`IMEM == `MEM_TIM) begin : irom // *** fix up dtim taking PA_BITS rather than XLEN, *** IEUAdr is a bad name. Probably use a ROM rather than DTIM
dtim irom(.clk, .reset, .CPUBusy, .LSURWM(2'b10), .IEUAdrM(PCPF[31:0]), .IEUAdrE(PCNextFSpill),
.TrapM(1'b0), .FinalWriteDataM(),
.ReadDataWordM(AllInstrRawF), .BusStall, .LSUBusWrite(), .LSUBusRead(IFUBusRead),
.BusCommittedM(), .ReadDataWordMuxM(), .DCacheStallM(ICacheStallF),
.TrapM(1'b0), .FinalWriteDataM(), .ByteMaskM('0),
.ReadDataWordM(FinalInstrRawF), .BusStall, .LSUBusWrite(), .LSUBusRead(IFUBusRead),
.BusCommittedM(), .DCacheStallM(ICacheStallF),
.DCacheCommittedM(), .DCacheMiss(ICacheMiss), .DCacheAccess(ICacheAccess));
end else begin : bus
end
if (`IBUS) begin : bus
localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS/`XLEN : 1;
localparam integer LINELEN = (CACHE_ENABLED) ? `ICACHE_LINELENINBITS : `XLEN;
localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1;
@ -188,7 +189,6 @@ module ifu (
logic [LINELEN-1:0] ICacheBusWriteData;
logic [`PA_BITS-1:0] ICacheBusAdr;
logic ICacheBusAck;
logic save,restore;
logic [31:0] temp;
logic SelUncachedAdr;
@ -212,14 +212,15 @@ module ifu (
if(CACHE_ENABLED) begin : icache
cache #(.LINELEN(`ICACHE_LINELENINBITS),
.NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS),
.NUMWAYS(`ICACHE_NUMWAYS), .DCACHE(0))
.NUMWAYS(`ICACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0))
icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .IgnoreRequestTrapM('0),
.CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck),
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
.CacheFetchLine(ICacheFetchLine),
.CacheWriteLine(), .ReadDataLine(ReadDataLine),
.save, .restore, .Cacheable(CacheableF),
.CacheWriteLine(), .ReadDataWord(FinalInstrRawF),
.Cacheable(CacheableF),
.CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess),
.ByteMask('0), .WordCount('0), .LSUBusWriteCrit('0),
.FinalWriteData('0),
.RW(2'b10),
.Atomic('0), .FlushCache('0),
@ -227,15 +228,13 @@ module ifu (
.PAdr(PCPF),
.CacheCommitted(), .InvalidateCacheM(InvalidateICacheM));
subcachelineread #(LINELEN, 32, 16) subcachelineread(
.clk, .reset, .PAdr(PCPF), .save, .restore,
.ReadDataLine, .ReadDataWord(FinalInstrRawF));
end else begin : passthrough
assign {ICacheFetchLine, ICacheBusAdr, ICacheStallF, FinalInstrRawF} = '0;
assign ICacheAccess = CacheableF; assign ICacheMiss = CacheableF;
end
end
end else begin : nobus // block: bus
assign AllInstrRawF = FinalInstrRawF;
end
assign IFUCacheBusStallF = ICacheStallF | BusStall;
assign IFUStallF = IFUCacheBusStallF | SelNextSpillF;

View File

@ -41,7 +41,6 @@ module atomic (
input logic [1:0] LSUAtomicM,
input logic [1:0] PreLSURWM,
input logic IgnoreRequest,
input logic DTLBMissM,
output logic [`XLEN-1:0] FinalAMOWriteDataM,
output logic SquashSCW,
output logic [1:0] LSURWM);
@ -52,7 +51,7 @@ module atomic (
amoalu amoalu(.srca(ReadDataM), .srcb(LSUWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]),
.result(AMOResult));
mux2 #(`XLEN) wdmux(LSUWriteDataM, AMOResult, LSUAtomicM[1], FinalAMOWriteDataM);
assign MemReadM = PreLSURWM[1] & ~(IgnoreRequest) & ~DTLBMissM; // *** is DTLBMiss needed; might be par tof ignorerequest
assign MemReadM = PreLSURWM[1] & ~IgnoreRequest;
lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .PreLSURWM, .LSUAtomicM, .LSUPAdrM,
.SquashSCW, .LSURWM);

View File

@ -37,19 +37,19 @@ module dtim(
input logic [`XLEN-1:0] IEUAdrE,
input logic TrapM,
input logic [`XLEN-1:0] FinalWriteDataM,
input logic [`XLEN/8-1:0] ByteMaskM,
output logic [`XLEN-1:0] ReadDataWordM,
output logic BusStall,
output logic LSUBusWrite,
output logic LSUBusRead,
output logic BusCommittedM,
output logic [`XLEN-1:0] ReadDataWordMuxM,
output logic DCacheStallM,
output logic DCacheCommittedM,
output logic DCacheMiss,
output logic DCacheAccess);
simpleram #(.BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram (
.clk,
.clk, .ByteMask(ByteMaskM),
.a(CPUBusy | LSURWM[0] | reset ? IEUAdrM[31:0] : IEUAdrE[31:0]), // move mux out; this shouldn't be needed when stails are handled differently ***
.we(LSURWM[0] & ~TrapM), // have to ignore write if Trap.
.wd(FinalWriteDataM), .rd(ReadDataWordM));
@ -57,7 +57,6 @@ module dtim(
// since we have a local memory the bus connections are all disabled.
// There are no peripherals supported.
assign {BusStall, LSUBusWrite, LSUBusRead, BusCommittedM} = '0;
assign ReadDataWordMuxM = ReadDataWordM;
assign {DCacheStallM, DCacheCommittedM} = '0;
assign {DCacheMiss, DCacheAccess} = '0;

View File

@ -56,7 +56,7 @@ module interlockfsm(
logic AnyCPUReqM;
typedef enum logic[2:0] {STATE_T0_READY,
STATE_T0_REPLAY,
STATE_T1_REPLAY,
STATE_T3_DTLB_MISS,
STATE_T4_ITLB_MISS,
STATE_T5_ITLB_MISS,
@ -82,13 +82,13 @@ module interlockfsm(
else if(ToITLBMiss) InterlockNextState = STATE_T5_ITLB_MISS;
else if(ToBoth) InterlockNextState = STATE_T7_DITLB_MISS;
else InterlockNextState = STATE_T0_READY;
STATE_T0_REPLAY: if(DCacheStallM) InterlockNextState = STATE_T0_REPLAY;
STATE_T1_REPLAY: if(DCacheStallM) InterlockNextState = STATE_T1_REPLAY;
else InterlockNextState = STATE_T0_READY;
STATE_T3_DTLB_MISS: if(DTLBWriteM) InterlockNextState = STATE_T0_REPLAY;
STATE_T3_DTLB_MISS: if(DTLBWriteM) InterlockNextState = STATE_T1_REPLAY;
else InterlockNextState = STATE_T3_DTLB_MISS;
STATE_T4_ITLB_MISS: if(ITLBWriteF) InterlockNextState = STATE_T0_READY;
else InterlockNextState = STATE_T4_ITLB_MISS;
STATE_T5_ITLB_MISS: if(ITLBWriteF) InterlockNextState = STATE_T0_REPLAY;
STATE_T5_ITLB_MISS: if(ITLBWriteF) InterlockNextState = STATE_T1_REPLAY;
else InterlockNextState = STATE_T5_ITLB_MISS;
STATE_T7_DITLB_MISS: if(DTLBWriteM) InterlockNextState = STATE_T5_ITLB_MISS;
else InterlockNextState = STATE_T7_DITLB_MISS;
@ -122,12 +122,12 @@ module interlockfsm(
endcase
end
assign SelReplayMemE = (InterlockCurrState == STATE_T0_REPLAY & DCacheStallM) |
assign SelReplayMemE = (InterlockCurrState == STATE_T1_REPLAY & DCacheStallM) |
(InterlockCurrState == STATE_T3_DTLB_MISS & DTLBWriteM) |
(InterlockCurrState == STATE_T5_ITLB_MISS & ITLBWriteF);
assign SelHPTW = (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) |
(InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS);
assign IgnoreRequestTLB = (InterlockCurrState == STATE_T0_READY & (ITLBMissOrDAFaultF | DTLBMissOrDAFaultM));
assign IgnoreRequestTrapM = (InterlockCurrState == STATE_T0_READY & (TrapM)) |
((InterlockCurrState == STATE_T0_REPLAY) & (TrapM));
((InterlockCurrState == STATE_T1_REPLAY) & (TrapM));
endmodule

View File

@ -82,7 +82,6 @@ module lsu (
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // *** this one especially has a large note attached to it in pmpchecker.
);
localparam CACHE_ENABLED = `DMEM == `MEM_CACHE;
logic [`XLEN+1:0] IEUAdrExtM;
logic [`PA_BITS-1:0] LSUPAdrM;
logic DTLBMissM;
@ -105,7 +104,8 @@ module lsu (
logic LSUBusWriteCrit;
logic DataDAPageFaultM;
logic [`XLEN-1:0] LSUWriteDataM;
logic [(`XLEN-1)/8:0] ByteMaskM;
// *** TO DO: Burst mode, byte write enables to DTIM, cache, exeternal memory, remove subword write from uncore,
flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM);
@ -193,10 +193,11 @@ module lsu (
// Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM,
.ReadDataWordM, .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
.ReadDataWordMuxM, .DCacheStallM, .DCacheCommittedM,
.DCacheStallM, .DCacheCommittedM, .ByteMaskM,
.DCacheMiss, .DCacheAccess);
assign SelUncachedAdr = '0; // value does not matter.
end else begin : bus
end
if (`DBUS) begin : bus
localparam CACHE_ENABLED = `DMEM == `MEM_CACHE;
localparam integer WORDSPERLINE = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS/`XLEN : 1;
localparam integer LINELEN = (CACHE_ENABLED) ? `DCACHE_LINELENINBITS : `XLEN;
localparam integer LOGWPL = (CACHE_ENABLED) ? $clog2(WORDSPERLINE) : 1;
@ -206,8 +207,6 @@ module lsu (
logic DCacheWriteLine;
logic DCacheFetchLine;
logic DCacheBusAck;
logic save, restore;
logic [`PA_BITS-1:0] WordOffsetAddr;
logic SelBus;
logic [LOGWPL-1:0] WordCount;
@ -224,58 +223,45 @@ module lsu (
.s(SelUncachedAdr), .y(ReadDataWordMuxM));
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM), .d1(FinalWriteDataM),
.s(SelUncachedAdr), .y(LSUBusHWDATA));
mux2 #(`PA_BITS) WordAdrrMux(.d0(LSUPAdrM),
.d1({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)), .s(LSUBusWriteCrit),
.y(WordOffsetAddr)); // *** can reduce width of mux. only need the offset.
if(CACHE_ENABLED) begin : dcache
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .save, .restore, .RW(LSURWM), .Atomic(LSUAtomicM),
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`XLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.ByteMask(ByteMaskM), .WordCount,
.FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM),
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.IgnoreRequestTLB, .IgnoreRequestTrapM, .CacheCommitted(DCacheCommittedM),
.CacheBusAdr(DCacheBusAdr), .ReadDataLine(ReadDataLineM),
.CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM),
.CacheBusWriteData(DCacheBusWriteData), .CacheFetchLine(DCacheFetchLine),
.CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0));
subcachelineread #(LINELEN, `XLEN, `XLEN) subcachelineread( // *** merge into cache
.clk, .reset, .PAdr(WordOffsetAddr), .save, .restore,
.ReadDataLine(ReadDataLineM), .ReadDataWord(ReadDataWordM));
end else begin : passthrough
assign {ReadDataWordM, DCacheStallM, DCacheCommittedM, DCacheFetchLine, DCacheWriteLine} = '0;
assign DCacheMiss = CacheableM; assign DCacheAccess = CacheableM;
end
end else begin: nobus // block: bus
assign {LSUBusHWDATA, SelUncachedAdr} = '0;
assign ReadDataWordMuxM = ReadDataWordM;
end
if(`DMEM != `MEM_BUS) begin // *** always, not just with no MEM_BUS. Only produces byte write enable
logic [`XLEN-1:0] ReadDataWordMaskedM;
// ** there is definitely a sww bug with memory mapped i/o. check wally64priv.
assign ReadDataWordMaskedM = SelUncachedAdr ? '0 : ReadDataWordM; // AND-gate
// *** consider moving this AND gate into the sww.
//assign ReadDataWordMaskedM = ReadDataWordM; // *** this change only works because the i/o devices dont' write bytes other than the ones specific to their address.
subwordwrite subwordwrite(.HRDATA(ReadDataWordMaskedM), .HADDRD(LSUPAdrM[2:0]),
.HSIZED({LSUFunct3M[2], 1'b0, LSUFunct3M[1:0]}),
.HWDATAIN(FinalAMOWriteDataM), .HWDATA(FinalWriteDataM));
end else
assign FinalWriteDataM = FinalAMOWriteDataM;
subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
.Funct3M(LSUFunct3M), .ReadDataM);
/////////////////////////////////////////////////////////////////////////////////////////////
// Atomic operations
/////////////////////////////////////////////////////////////////////////////////////////////
// *** why does this need DTLBMissM?
if (`A_SUPPORTED) begin:atomic
atomic atomic(.clk, .reset, .FlushW, .CPUBusy, .ReadDataM, .LSUWriteDataM, .LSUPAdrM,
.LSUFunct7M, .LSUFunct3M, .LSUAtomicM, .PreLSURWM, .IgnoreRequest,
.DTLBMissM, .FinalAMOWriteDataM, .SquashSCW, .LSURWM);
.FinalAMOWriteDataM, .SquashSCW, .LSURWM);
end else begin:lrsc
assign SquashSCW = 0; assign LSURWM = PreLSURWM; assign FinalAMOWriteDataM = LSUWriteDataM;
end
subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
.LSUFunct3M, .FinalAMOWriteDataM, .FinalWriteDataM, .ByteMaskM);
endmodule

View File

@ -0,0 +1,87 @@
///////////////////////////////////////////
// subwordwrite.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Masking and muxing for subword writes
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module subwordwrite (
input logic [2:0] LSUPAdrM,
input logic [2:0] LSUFunct3M,
input logic [`XLEN-1:0] FinalAMOWriteDataM,
output logic [`XLEN-1:0] FinalWriteDataM,
output logic [`XLEN/8-1:0] ByteMaskM
);
logic [`XLEN-1:0] WriteDataSubwordDuplicated;
swbytemask swbytemask(.HSIZED({LSUFunct3M[2], 1'b0, LSUFunct3M[1:0]}), .HADDRD(LSUPAdrM),
.ByteMask(ByteMaskM));
if (`XLEN == 64) begin:sww
// Handle subword writes
always_comb
case(LSUFunct3M[1:0])
2'b00: WriteDataSubwordDuplicated = {8{FinalAMOWriteDataM[7:0]}}; // sb
2'b01: WriteDataSubwordDuplicated = {4{FinalAMOWriteDataM[15:0]}}; // sh
2'b10: WriteDataSubwordDuplicated = {2{FinalAMOWriteDataM[31:0]}}; // sw
2'b11: WriteDataSubwordDuplicated = FinalAMOWriteDataM; // sw
endcase
always_comb begin
FinalWriteDataM='0;
if (ByteMaskM[0]) FinalWriteDataM[7:0] = WriteDataSubwordDuplicated[7:0];
if (ByteMaskM[1]) FinalWriteDataM[15:8] = WriteDataSubwordDuplicated[15:8];
if (ByteMaskM[2]) FinalWriteDataM[23:16] = WriteDataSubwordDuplicated[23:16];
if (ByteMaskM[3]) FinalWriteDataM[31:24] = WriteDataSubwordDuplicated[31:24];
if (ByteMaskM[4]) FinalWriteDataM[39:32] = WriteDataSubwordDuplicated[39:32];
if (ByteMaskM[5]) FinalWriteDataM[47:40] = WriteDataSubwordDuplicated[47:40];
if (ByteMaskM[6]) FinalWriteDataM[55:48] = WriteDataSubwordDuplicated[55:48];
if (ByteMaskM[7]) FinalWriteDataM[63:56] = WriteDataSubwordDuplicated[63:56];
end
end else begin:sww // 32-bit
// Handle subword writes
always_comb
case(LSUFunct3M[1:0])
2'b00: WriteDataSubwordDuplicated = {4{FinalAMOWriteDataM[7:0]}}; // sb
2'b01: WriteDataSubwordDuplicated = {2{FinalAMOWriteDataM[15:0]}}; // sh
2'b10: WriteDataSubwordDuplicated = FinalAMOWriteDataM; // sw
default: WriteDataSubwordDuplicated = FinalAMOWriteDataM; // shouldn't happen
endcase
always_comb begin
FinalWriteDataM='0;
if (ByteMaskM[0]) FinalWriteDataM[7:0] = WriteDataSubwordDuplicated[7:0];
if (ByteMaskM[1]) FinalWriteDataM[15:8] = WriteDataSubwordDuplicated[15:8];
if (ByteMaskM[2]) FinalWriteDataM[23:16] = WriteDataSubwordDuplicated[23:16];
if (ByteMaskM[3]) FinalWriteDataM[31:24] = WriteDataSubwordDuplicated[31:24];
end
end
endmodule

View File

@ -0,0 +1,66 @@
///////////////////////////////////////////
// ram.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: On-chip RAM, external to core
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module swbytemask (
input logic [3:0] HSIZED,
input logic [2:0] HADDRD,
output logic [`XLEN/8-1:0] ByteMask);
if(`XLEN == 64) begin
always_comb begin
case(HSIZED[1:0])
2'b00: begin ByteMask = 8'b00000000; ByteMask[HADDRD[2:0]] = 1; end // sb
2'b01: case (HADDRD[2:1])
2'b00: ByteMask = 8'b0000_0011;
2'b01: ByteMask = 8'b0000_1100;
2'b10: ByteMask = 8'b0011_0000;
2'b11: ByteMask = 8'b1100_0000;
endcase
2'b10: if (HADDRD[2]) ByteMask = 8'b11110000;
else ByteMask = 8'b00001111;
2'b11: ByteMask = 8'b1111_1111;
endcase
end
end else begin
always_comb begin
case(HSIZED[1:0])
2'b00: begin ByteMask = 4'b0000; ByteMask[HADDRD[1:0]] = 1; end // sb
2'b01: if (HADDRD[1]) ByteMask = 4'b1100;
else ByteMask = 4'b0011;
2'b10: ByteMask = 4'b1111;
default: ByteMask = 4'b1111;
endcase
end
end
endmodule

View File

@ -38,6 +38,7 @@ module ram #(parameter BASE=0, RANGE = 65535) (
input logic HREADY,
input logic [1:0] HTRANS,
input logic [`XLEN-1:0] HWDATA,
input logic [3:0] HSIZED,
output logic [`XLEN-1:0] HREADRam,
output logic HRESPRam, HREADYRam
);
@ -53,6 +54,7 @@ module ram #(parameter BASE=0, RANGE = 65535) (
logic initTrans;
logic memwrite;
logic [3:0] busycount;
logic [`XLEN/8-1:0] ByteMaskM;
if(`FPGA) begin:ram
initial begin
@ -104,6 +106,8 @@ module ram #(parameter BASE=0, RANGE = 65535) (
end // initial begin
end // if (FPGA)
swbytemask swbytemask(.HSIZED, .HADDRD(A[2:0]), .ByteMask(ByteMaskM));
assign initTrans = HREADY & HSELRam & (HTRANS != 2'b00);
// *** this seems like a weird way to use reset
@ -148,17 +152,24 @@ module ram #(parameter BASE=0, RANGE = 65535) (
-----/\----- EXCLUDED -----/\----- */
/* verilator lint_off WIDTH */
genvar index;
always_ff @(posedge HCLK)
HWADDR <= #1 A;
if (`XLEN == 64) begin:ramrw
always_ff @(posedge HCLK) begin
HWADDR <= #1 A;
always_ff @(posedge HCLK)
HREADRam0 <= #1 RAM[A[31:3]];
if (memwrite & risingHREADYRam) RAM[HWADDR[31:3]] <= #1 HWDATA;
for(index = 0; index < `XLEN/8; index++) begin
always_ff @(posedge HCLK) begin
if (memwrite & risingHREADYRam & ByteMaskM[index]) RAM[HWADDR[31:3]][8*(index+1)-1:8*index] <= #1 HWDATA[8*(index+1)-1:8*index];
end
end
end else begin
always_ff @(posedge HCLK) begin:ramrw
HWADDR <= #1 A;
always_ff @(posedge HCLK)
HREADRam0 <= #1 RAM[A[31:2]];
if (memwrite & risingHREADYRam) RAM[HWADDR[31:2]] <= #1 HWDATA;
for(index = 0; index < `XLEN/8; index++) begin
always_ff @(posedge HCLK) begin:ramrw
if (memwrite & risingHREADYRam & ByteMaskM[index]) RAM[HWADDR[31:2]][8*(index+1)-1:8*index] <= #1 HWDATA[8*(index+1)-1:8*index];
end
end
end
/* verilator lint_on WIDTH */

View File

@ -1,111 +0,0 @@
///////////////////////////////////////////
// subwordwrite.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Masking and muxing for subword writes
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module subwordwrite (
input logic [`XLEN-1:0] HRDATA,
input logic [2:0] HADDRD,
input logic [3:0] HSIZED,
input logic [`XLEN-1:0] HWDATAIN,
output logic [`XLEN-1:0] HWDATA
);
logic [`XLEN-1:0] WriteDataSubwordDuplicated;
if (`XLEN == 64) begin:sww
logic [7:0] ByteMaskM;
// Compute write mask
always_comb
case(HSIZED[1:0])
2'b00: begin ByteMaskM = 8'b00000000; ByteMaskM[HADDRD[2:0]] = 1; end // sb
2'b01: case (HADDRD[2:1])
2'b00: ByteMaskM = 8'b00000011;
2'b01: ByteMaskM = 8'b00001100;
2'b10: ByteMaskM = 8'b00110000;
2'b11: ByteMaskM = 8'b11000000;
endcase
2'b10: if (HADDRD[2]) ByteMaskM = 8'b11110000;
else ByteMaskM = 8'b00001111;
2'b11: ByteMaskM = 8'b11111111;
endcase
// Handle subword writes
always_comb
case(HSIZED[1:0])
2'b00: WriteDataSubwordDuplicated = {8{HWDATAIN[7:0]}}; // sb
2'b01: WriteDataSubwordDuplicated = {4{HWDATAIN[15:0]}}; // sh
2'b10: WriteDataSubwordDuplicated = {2{HWDATAIN[31:0]}}; // sw
2'b11: WriteDataSubwordDuplicated = HWDATAIN; // sw
endcase
always_comb begin
HWDATA=HRDATA;
if (ByteMaskM[0]) HWDATA[7:0] = WriteDataSubwordDuplicated[7:0];
if (ByteMaskM[1]) HWDATA[15:8] = WriteDataSubwordDuplicated[15:8];
if (ByteMaskM[2]) HWDATA[23:16] = WriteDataSubwordDuplicated[23:16];
if (ByteMaskM[3]) HWDATA[31:24] = WriteDataSubwordDuplicated[31:24];
if (ByteMaskM[4]) HWDATA[39:32] = WriteDataSubwordDuplicated[39:32];
if (ByteMaskM[5]) HWDATA[47:40] = WriteDataSubwordDuplicated[47:40];
if (ByteMaskM[6]) HWDATA[55:48] = WriteDataSubwordDuplicated[55:48];
if (ByteMaskM[7]) HWDATA[63:56] = WriteDataSubwordDuplicated[63:56];
end
end else begin:sww // 32-bit
logic [3:0] ByteMaskM;
// Compute write mask
always_comb
case(HSIZED[1:0])
2'b00: begin ByteMaskM = 4'b0000; ByteMaskM[HADDRD[1:0]] = 1; end // sb
2'b01: if (HADDRD[1]) ByteMaskM = 4'b1100;
else ByteMaskM = 4'b0011;
2'b10: ByteMaskM = 4'b1111;
default: ByteMaskM = 4'b111; // shouldn't happen
endcase
// Handle subword writes
always_comb
case(HSIZED[1:0])
2'b00: WriteDataSubwordDuplicated = {4{HWDATAIN[7:0]}}; // sb
2'b01: WriteDataSubwordDuplicated = {2{HWDATAIN[15:0]}}; // sh
2'b10: WriteDataSubwordDuplicated = HWDATAIN; // sw
default: WriteDataSubwordDuplicated = HWDATAIN; // shouldn't happen
endcase
always_comb begin
HWDATA=HRDATA;
if (ByteMaskM[0]) HWDATA[7:0] = WriteDataSubwordDuplicated[7:0];
if (ByteMaskM[1]) HWDATA[15:8] = WriteDataSubwordDuplicated[15:8];
if (ByteMaskM[2]) HWDATA[23:16] = WriteDataSubwordDuplicated[23:16];
if (ByteMaskM[3]) HWDATA[31:24] = WriteDataSubwordDuplicated[31:24];
end
end
endmodule

View File

@ -38,7 +38,7 @@ module uncore (
input logic HCLK, HRESETn,
input logic TIMECLK,
input logic [31:0] HADDR,
input logic [`AHBW-1:0] HWDATAIN,
input logic [`AHBW-1:0] HWDATA,
input logic HWRITE,
input logic [2:0] HSIZE,
input logic [2:0] HBURST,
@ -68,7 +68,6 @@ module uncore (
output logic [63:0] MTIME_CLINT
);
logic [`XLEN-1:0] HWDATA;
logic [`XLEN-1:0] HREADRam, HREADCLINT, HREADPLIC, HREADGPIO, HREADUART, HREADSDC;
logic [8:0] HSELRegions;
@ -90,15 +89,6 @@ module uncore (
// unswizzle HSEL signals
assign {HSELEXT, HSELBootRom, HSELRam, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELSDC} = HSELRegions[7:0];
// subword accesses: converts HWDATAIN to HWDATA only if no dtim or cache.
if(`DMEM == `MEM_BUS)
subwordwrite sww(
.HRDATA,
.HADDRD, .HSIZED,
.HWDATAIN, .HWDATA);
else assign HWDATA = HWDATAIN;
// generate
// on-chip RAM
if (`RAM_SUPPORTED) begin : ram
@ -106,7 +96,7 @@ module uncore (
.BASE(`RAM_BASE), .RANGE(`RAM_RANGE)) ram (
.HCLK, .HRESETn,
.HSELRam, .HADDR,
.HWRITE, .HREADY,
.HWRITE, .HREADY, .HSIZED,
.HTRANS, .HWDATA, .HREADRam,
.HRESPRam, .HREADYRam);
end
@ -116,7 +106,7 @@ module uncore (
bootrom(
.HCLK, .HRESETn,
.HSELRam(HSELBootRom), .HADDR,
.HWRITE, .HREADY, .HTRANS,
.HWRITE, .HREADY, .HTRANS, .HSIZED,
.HWDATA,
.HREADRam(HREADBootRom), .HRESPRam(HRESPBootRom), .HREADYRam(HREADYBootRom));
end

View File

@ -92,7 +92,7 @@ module wallypipelinedsoc (
);
uncore uncore(.HCLK, .HRESETn, .TIMECLK,
.HADDR, .HWDATAIN(HWDATA), .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HRDATAEXT,
.HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT, .HTRANS, .HMASTLOCK, .HRDATAEXT,
.HREADYEXT, .HRESPEXT, .HRDATA, .HREADY, .HRESP, .HADDRD, .HSIZED, .HWRITED,
.TimerIntM, .SwIntM, .ExtIntM, .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, .UARTSin, .UARTSout, .MTIME_CLINT,
.HSELEXT,

View File

@ -0,0 +1,279 @@
`include "wally-config.vh"
`define PATH "../../../../tests/fp/vectors/"
string tests[] = '{
"f16_mulAdd_rne.tv",
"f16_mulAdd_rz.tv",
"f16_mulAdd_ru.tv",
"f16_mulAdd_rd.tv",
"f16_mulAdd_rnm.tv",
"f32_mulAdd_rne.tv",
"f32_mulAdd_rz.tv",
"f32_mulAdd_ru.tv",
"f32_mulAdd_rd.tv",
"f32_mulAdd_rnm.tv",
"f64_mulAdd_rne.tv",
"f64_mulAdd_rz.tv",
"f64_mulAdd_ru.tv",
"f64_mulAdd_rd.tv",
"f64_mulAdd_rnm.tv",
"f128_mulAdd_rne.tv",
"f128_mulAdd_rz.tv",
"f128_mulAdd_ru.tv",
"f128_mulAdd_rd.tv",
"f128_mulAdd_rnm.tv"
};
// steps to run FMA tests
// 1) create test vectors in riscv-wally/tests/fp with: ./run-all.sh
// 2) go to riscv-wally/pipelined/testbench/fp/tests
// 3) run ./sim-wally-batch
module fmatestbench();
logic clk;
logic [31:0] errors=0;
logic [31:0] vectornum=0;
logic [`FLEN*4+7+4+4:0] testvectors[6133248:0];
int i = `ZFH_SUPPORTED ? 0 : `F_SUPPORTED ? 5 : `D_SUPPORTED ? 10 : 15; // set i to the first test that is run
logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat
logic [`FLEN-1:0] ans; // result from TestFloat
logic [7:0] flags; // flags read form testfloat
logic [2:0] FrmE; // rounding mode
logic [`FPSIZES/3:0] FmtE; // format - 10 = half, 00 = single, 01 = double, 11 = quad
logic [3:0] FrmRead; // rounding mode read from testfloat
logic [3:0] FmtRead; // format read from testfloat
logic [`FLEN-1:0] FMAResM; // FMA's outputed result
logic [4:0] FMAFlgM; // FMA's outputed flags
logic [2:0] FOpCtrlE; // which opperation
logic wnan; // is the outputed result NaN
logic ansnan; // is the correct answer NaN
// signals needed to connect modules
logic [`NE+1:0] ProdExpE;
logic AddendStickyE;
logic KillProdE;
logic XSgnE, YSgnE, ZSgnE;
logic [`NE-1:0] XExpE, YExpE, ZExpE;
logic [`NF:0] XManE, YManE, ZManE;
logic XNormE;
logic XExpMaxE;
logic XNaNE, YNaNE, ZNaNE;
logic XSNaNE, YSNaNE, ZSNaNE;
logic XDenormE, YDenormE, ZDenormE;
logic XInfE, YInfE, ZInfE;
logic XZeroE, YZeroE, ZZeroE;
logic YExpMaxE, ZExpMaxE, Mult;
logic [3*`NF+5:0] SumE;
logic InvZE;
logic NegSumE;
logic ZSgnEffE;
logic PSgnE;
logic [$clog2(3*`NF+7)-1:0] NormCntE;
assign FOpCtrlE = 3'b0; // set to 0 because test float only tests fMADD
assign Mult = 1'b0; // set to zero because not testing multiplication
// check if the calculated result or correct answer is NaN
always_comb begin
case (FmtRead)
4'b11: begin // quad
assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
end
4'b01: begin // double
assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
end
4'b00: begin // single
assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]);
assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]);
end
4'b10: begin // half
assign ansnan = &ans[`H_LEN-2:`H_NF]&(|ans[`H_NF-1:0]);
assign wnan = &FMAResM[`H_LEN-2:`H_NF]&(|FMAResM[`H_NF-1:0]);
end
endcase
end
// instantiate devices under test
unpack unpack(.X, .Y, .Z, .FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
.XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XExpMaxE);
fma1 fma1(.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
fma2 fma2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM(XManE), .YManM(YManE), .ZManM(ZManE),
.XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE),
.XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE),
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(FmtE), .FrmM(FrmE),
.FMAFlgM, .FMAResM, .Mult);
// produce clock
always begin
clk = 1; #5; clk = 0; #5;
end
// Read first test
initial begin
$display("\n\nRunning %s vectors", tests[i]);
$readmemh({`PATH, tests[i]}, testvectors);
end
// apply test vectors on rising edge of clk
always @(posedge clk) begin
#1;
flags = testvectors[vectornum][15:8];
FrmRead = testvectors[vectornum][7:4];
FmtRead = testvectors[vectornum][3:0];
if (FmtRead==4'b11 & `Q_SUPPORTED) begin // quad
X = testvectors[vectornum][16+4*(`Q_LEN)-1:16+3*(`Q_LEN)];
Y = testvectors[vectornum][16+3*(`Q_LEN)-1:16+2*(`Q_LEN)];
Z = testvectors[vectornum][16+2*(`Q_LEN)-1:16+`Q_LEN];
ans = testvectors[vectornum][16+(`Q_LEN-1):16];
end
else if (FmtRead==4'b01 & `D_SUPPORTED) begin // double
X = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+4*(`D_LEN)-1:16+3*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+3*(`D_LEN)-1:16+2*(`D_LEN)]};
Z = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+2*(`D_LEN)-1:16+`D_LEN]};
ans = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+(`D_LEN-1):16]};
end
else if (FmtRead==4'b00 & `F_SUPPORTED) begin // single
X = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+4*(`S_LEN)-1:16+3*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+3*(`S_LEN)-1:16+2*(`S_LEN)]};
Z = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+2*(`S_LEN)-1:16+`S_LEN]};
ans = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+(`S_LEN-1):16]};
end
else if (FmtRead==4'b10 & `ZFH_SUPPORTED) begin // half
X = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+4*(`H_LEN)-1:16+3*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+3*(`H_LEN)-1:16+2*(`H_LEN)]};
Z = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+2*(`H_LEN)-1:16+`H_LEN]};
ans = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+(`H_LEN-1):16]};
end
else begin
X = {`FLEN{1'bx}};
Y = {`FLEN{1'bx}};
Z = {`FLEN{1'bx}};
ans = {`FLEN{1'bx}};
end
// trim format and rounding mode to appropriate size
if (`FPSIZES <= 2) FmtE = FmtRead === `FMT; // rewrite format if 2 or less floating formats are supported
else FmtE = FmtRead[1:0];
FrmE = FrmRead[2:0];
end
// check results on falling edge of clk
always @(negedge clk) begin
// quad
if((FmtRead==4'b11) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0] | (XNaNE&(FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) | (YNaNE&(FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]})) | (ZNaNE&(FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})))))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(XDenormE) $display( "xdenorm ");
if(YDenormE) $display( "ydenorm ");
if(ZDenormE) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf ");
if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf ");
if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf ");
if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf ");
if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN ");
errors = errors + 1;
if (errors === 1) $stop;
end
// double
if((FmtRead==4'b01) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`D_LEN-2:0] === ans[`D_LEN-2:0] | (XNaNE&(FMAResM[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) | (YNaNE&(FMAResM[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})) | (ZNaNE&(FMAResM[`D_LEN-2:0] === {Z[`D_LEN-2:`D_NF],1'b1,Z[`D_NF-2:0]})))))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
errors = errors + 1;
if (errors === 1) $stop;
end
// single
if((FmtRead==4'b00) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`S_LEN-2:0] === ans[`S_LEN-2:0] | (XNaNE&(FMAResM[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) | (YNaNE&(FMAResM[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})) | (ZNaNE&(FMAResM[`S_LEN-2:0] === {Z[`S_LEN-2:`S_NF],1'b1,Z[`S_NF-2:0]})))))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
errors = errors + 1;
if (errors === 1) $stop;
end
// half
if((FmtRead==4'b01) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`H_LEN-2:0] === ans[`H_LEN-2:0] | (XNaNE&(FMAResM[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) | (YNaNE&(FMAResM[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})) | (ZNaNE&(FMAResM[`H_LEN-2:0] === {Z[`H_LEN-2:`H_NF],1'b1,Z[`H_NF-2:0]})))))) begin
$display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
if(FMAFlgM[4] !== 0) $display( "invld ");
if(FMAFlgM[2] !== 0) $display( "ovrflw ");
if(FMAFlgM[1] !== 0) $display( "unflw ");
if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
errors = errors + 1;
if (errors === 1) $stop;
end
// if ( vectornum === 3165862) $stop; // uncomment for specific test
vectornum = vectornum + 1; // increment test
if (testvectors[vectornum][0] === 1'bx) begin // if reached the end of file
if (errors) begin // if there were errors
$display("%s completed with %d tests and %d errors", tests[i], vectornum, errors);
$stop;
end
else begin // if no errors
if(tests[i] === "") begin // if no more tests
$display("\nAll tests completed with %d errors\n", errors);
$stop;
end
$display("%s completed successfully with %d tests and %d errors (across all tests)\n", tests[i], vectornum, errors);
// increment tests - skip some precisions if needed
if ((i === 4 & ~`F_SUPPORTED) | (i === 9 & ~`D_SUPPORTED) | (i === 14 & ~`Q_SUPPORTED)) i = i+5;
if ((i === 9 & ~`D_SUPPORTED) | (i === 14 & ~`Q_SUPPORTED)) i = i+5;
if ((i === 14 & ~`Q_SUPPORTED)) i = i+5;
i = i+1;
// if no more tests - finish
if(tests[i] === "") begin
$display("\nAll tests completed with %d errors\n", errors);
$stop;
end
// read next files
$display("Running %s vectors", tests[i]);
$readmemh({`PATH, tests[i]}, testvectors);
vectornum = 0;
end
end
end
endmodule

View File

@ -0,0 +1,50 @@
# wally-pipelined.do
#
# Modification by Oklahoma State University & Harvey Mudd College
# Use with Testbench
# James Stine, 2008; David Harris 2021
# Go Cowboys!!!!!!
#
# Takes 1:10 to run RV64IC tests using gui
# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m"
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
# compile source files
# suppress spurious warnngs about
# "Extra checking for conflicts with always_comb done at vopt time"
# because vsim will run vopt
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
# $num = the added words after the call
vlog +incdir+../../../config/$1 +incdir+../../../config/shared fma-testbench.sv ../../../src/fpu/fma.sv ../../../src/fpu/unpack.sv -suppress 2583 -suppress 7063
vsim -voptargs=+acc work.fmatestbench
view wave
#-- display input and output signals as hexidecimal values
#do ./wave-dos/peripheral-waves.do
#add log -recursive /*
#do wave.do deal with when ready
#-- Run the Simulation
#run 3600
run -all
noview fma-testbench.sv
view wave

View File

@ -0,0 +1 @@
vsim -do "do fma.do rv64fp"

View File

@ -0,0 +1 @@
vsim -c -do "do fma.do rv64fp"

View File

@ -359,6 +359,8 @@ module riscvassertions;
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
assert (`DMEM == `MEM_CACHE | `DBUS ==0) else $error("Dcache rquires DBUS.");
assert (`IMEM == `MEM_CACHE | `IBUS ==0) else $error("Icache rquires IBUS.");
end
endmodule

View File

@ -0,0 +1,31 @@
#!/bin/sh
BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
OUTPUT="./vectors"
$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
# format: X_Y_Z_answer_flags_Frm_Fmt
sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rne.tv
sed -ie 's/$/_0/' $OUTPUT/f128_mulAdd_rne.tv
sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rne.tv
sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rz.tv
sed -ie 's/$/_1/' $OUTPUT/f128_mulAdd_rz.tv
sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rz.tv
sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_ru.tv
sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_ru.tv
sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_ru.tv
sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rd.tv
sed -ie 's/$/_2/' $OUTPUT/f128_mulAdd_rd.tv
sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rd.tv
sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rnm.tv
sed -ie 's/$/_4/' $OUTPUT/f128_mulAdd_rnm.tv
sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rnm.tv

31
tests/fp/create_vectors16fma.sh Executable file
View File

@ -0,0 +1,31 @@
#!/bin/sh
BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
OUTPUT="./vectors"
$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
# format: X_Y_Z_answer_flags_Frm_Fmt
sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rne.tv
sed -ie 's/$/_0/' $OUTPUT/f16_mulAdd_rne.tv
sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rne.tv
sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rz.tv
sed -ie 's/$/_1/' $OUTPUT/f16_mulAdd_rz.tv
sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rz.tv
sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_ru.tv
sed -ie 's/$/_3/' $OUTPUT/f16_mulAdd_ru.tv
sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_ru.tv
sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rd.tv
sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rd.tv
sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rd.tv
sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rnm.tv
sed -ie 's/$/_4/' $OUTPUT/f16_mulAdd_rnm.tv
sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rnm.tv

31
tests/fp/create_vectors32fma.sh Executable file
View File

@ -0,0 +1,31 @@
#!/bin/sh
BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
OUTPUT="./vectors"
$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
# format: X_Y_Z_answer_flags_Frm_Fmt
sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rne.tv
sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rne.tv
sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rne.tv
sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rz.tv
sed -ie 's/$/_1/' $OUTPUT/f32_mulAdd_rz.tv
sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rz.tv
sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_ru.tv
sed -ie 's/$/_3/' $OUTPUT/f32_mulAdd_ru.tv
sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_ru.tv
sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rd.tv
sed -ie 's/$/_2/' $OUTPUT/f32_mulAdd_rd.tv
sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rd.tv
sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rnm.tv
sed -ie 's/$/_4/' $OUTPUT/f32_mulAdd_rnm.tv
sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rnm.tv

31
tests/fp/create_vectors64fma.sh Executable file
View File

@ -0,0 +1,31 @@
#!/bin/sh
BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
OUTPUT="./vectors"
$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
# format: X_Y_Z_answer_flags_Frm_Fmt
sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rne.tv
sed -ie 's/$/_0/' $OUTPUT/f64_mulAdd_rne.tv
sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rne.tv
sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rz.tv
sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rz.tv
sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rz.tv
sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_ru.tv
sed -ie 's/$/_3/' $OUTPUT/f64_mulAdd_ru.tv
sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_ru.tv
sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rd.tv
sed -ie 's/$/_2/' $OUTPUT/f64_mulAdd_rd.tv
sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rd.tv
sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rnm.tv
sed -ie 's/$/_4/' $OUTPUT/f64_mulAdd_rnm.tv
sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rnm.tv

View File

@ -8,3 +8,7 @@
./create_vectors64cmp.sh
./create_vectors64.sh
./create_vectorsi.sh
./create_vectors16fma.sh
./create_vectors32fma.sh
./create_vectors64fma.sh
./create_vectors128fma.sh

View File

@ -1,3 +1,5 @@
include ../../Makefile.include
RVTEST_DEFINES += -march=rv$(XLEN)ia # KMG: removed compressed instructions from privileged tests
$(eval $(call compile_template,-march=rv64iac -mabi=lp64 -Drvtest_mtrap_routine=True -DXLEN=$(XLEN)))

View File

@ -55,12 +55,17 @@ RVTEST_CODE_BEGIN
.endm
.macro TRAP_HANDLER MODE, VECTORED=1 // default to vectored tests
// Set up the exception Handler, keeping the original handler in x4.
.macro TRAP_HANDLER MODE, VECTORED=1, DEBUG=0
// MODE decides which mode this trap handler will be taken in (M or S mode)
// Vectored decides whether interrumpts are handled with the vector table at trap_handler_MODE (1)
// vs Using the non-vector approach the rest of the trap handler takes (0)
// DEBUG decides whether we will print mtval a string with status.mpie, status.mie, and status.mpp to the signature (1)
// vs not saving that info to the signature (0)
// trap handler setup
// Set up the exception Handler, keeping the original handler in x4.
la x1, trap_handler_\MODE\()
.if (\VECTORED == 1)
.if (\VECTORED\() == 1)
ori x1, x1, 0x1 // set mode field of tvec to 1, forcing vectored interrupts
.endif
@ -115,17 +120,18 @@ trap_handler_\MODE\():
j trap_unvectored_\MODE\() // for the unvectored implimentation: jump past this table of addresses into the actual handler
// *** ASSUMES that a cause value of 0 for an interrupt is unimplemented
// otherwise, a vectored interrupt handler should jump to trap_handler_\MODE\() + 4 * Interrupt cause code
.4byte s_soft_interrupt_\MODE\() // 1: instruction access fault // the zero spot is taken up by the instruction to skip this table.
.4byte segfault_\MODE\() // 2: reserved
.4byte m_soft_interrupt_\MODE\() // 3: breakpoint
.4byte segfault_\MODE\() // 4: reserved
.4byte s_time_interrupt_\MODE\() // 5: load access fault
.4byte segfault_\MODE\() // 6: reserved
.4byte m_time_interrupt_\MODE\() // 7: store access fault
.4byte segfault_\MODE\() // 8: reserved
.4byte s_ext_interrupt_\MODE\() // 9: ecall from S-mode
.4byte segfault_\MODE\() // 10: reserved
.4byte m_ext_interrupt_\MODE\() // 11: ecall from M-mode
// No matter the value of VECTORED, exceptions (not interrupts) are handled in an unvecotred way
j s_soft_interrupt_\MODE\() // 1: instruction access fault // the zero spot is taken up by the instruction to skip this table.
j segfault_\MODE\() // 2: reserved
j m_soft_interrupt_\MODE\() // 3: breakpoint
j segfault_\MODE\() // 4: reserved
j s_time_interrupt_\MODE\() // 5: load access fault
j segfault_\MODE\() // 6: reserved
j m_time_interrupt_\MODE\() // 7: store access fault
j segfault_\MODE\() // 8: reserved
j s_ext_interrupt_\MODE\() // 9: ecall from S-mode
j segfault_\MODE\() // 10: reserved
j m_ext_interrupt_\MODE\() // 11: ecall from M-mode
// 12 through >=16 are reserved or designated for platform use
trap_unvectored_\MODE\():
@ -139,12 +145,34 @@ trap_unvectored_\MODE\():
addi x6, x6, 8
addi x16, x16, 8 // update pointers for logging results
.if (\DEBUG\() == 1) // record extra information (MTVAL, some status bits) about traps
csrr x1, \MODE\()tval
sd x1, 0(x16)
addi x6, x6, 8
addi x16, x16, 8
csrr x1, \MODE\()status
.if (\MODE\() == m) // Taking traps in different modes means we want to get different bits from the status register.
li x5, 0x1888 // mask bits to select MPP, MPIE, and MIE.
.else
li x5, 0x122 // mask bits to select SPP, SPIE, and SIE.
.endif
and x5, x5, x1
sd x5, 0(x16) // store masked out status bits to the output
addi x6, x6, 8
addi x16, x16, 8
.endif
// Respond to trap based on cause
// All interrupts should return after being logged
csrr x1, \MODE\()cause
li x5, 0x8000000000000000 // if msb is set, it is an interrupt
and x5, x5, x1
bnez x5, trapreturn_\MODE\() // return from interrupt
// Other trap handling is specified in the vector Table
csrr x1, \MODE\()cause
slli x1, x1, 3 // multiply cause by 8 to get offset in vector Table
la x5, exception_vector_table_\MODE\()
add x5, x5, x1 // compute address of vector in Table
@ -171,16 +199,16 @@ trapreturn_\MODE\():
// lw x5, 0(x1) // read the faulting instruction
// li x1, 3 // check bottom 2 bits of instruction to see if compressed
// and x5, x5, x1 // mask the other bits
// beq x5, x1, trapreturn_uncompressed // if 11, the instruction is return_uncompressed
// beq x5, x1, trapreturn_uncompressed_\MODE\() // if 11, the instruction is return_uncompressed
// trapreturn_compressed:
// trapreturn_compressed_\MODE\():
// csrr x1, mepc // get the mepc again
// addi x1, x1, 2 // add 2 to find the next instruction
// j trapreturn_specified // and return
// j trapreturn_specified_\MODE\() // and return
// trapreturn_uncompressed:
// csrr x1, mepc // get the mepc again
// addi x1, x1, 4 // add 4 to find the next instruction
// trapreturn_uncompressed_\MODE\():
// csrr x1, mepc // get the mepc again
// addi x1, x1, 4 // add 4 to find the next instruction
trapreturn_specified_\MODE\():
// reset the necessary pointers and registers (x1, x5, x6, and the return address going to mepc)
@ -224,6 +252,7 @@ trapreturn_finished_\MODE\():
csrw \MODE\()epc, x1 // update the epc with address of next instruction
ld x5, -16(sp) // restore registers from stack before returning
ld x1, -8(sp)
csrw \MODE\()ip, 0x0 // clear interrupt pending register to indicate interrupt has been handled
\MODE\()ret // return from trap
ecallhandler_\MODE\():
@ -257,10 +286,14 @@ ecallhandler_changetousermode_\MODE\():
csrc mstatus, x1
j trapreturn_\MODE\()
instrfault_\MODE\():
ld x1, -8(sp) // load return address int x1 (the address AFTER the jal into faulting page)
instrpagefault_\MODE\():
ld x1, -8(sp) // load return address int x1 (the address AFTER the jal to the faulting address)
j trapreturn_finished_\MODE\() // puts x1 into mepc, restores stack and returns to program (outside of faulting page)
instrfault_\MODE\():
ld x1, -8(sp) // load return address int x1 (the address AFTER the jal to the faulting address)
j trapreturn_finished_\MODE\() // return to the code after recording the mcause
illegalinstr_\MODE\():
j trapreturn_\MODE\() // return to the code after recording the mcause
@ -268,23 +301,63 @@ accessfault_\MODE\():
// *** What do I have to do here?
j trapreturn_\MODE\()
s_soft_interrupt_\MODE\(): // these labels are here to make sure the code compiles, but don't actually do anything yet
addr_misaligned_\MODE\():
j trapreturn_\MODE\()
breakpt_\MODE\():
j trapreturn_\MODE\()
s_soft_interrupt_\MODE\(): // these labels are here to make sure the code compiles, but don't actually do anything yet
li x5, 0x7EC // write 0x7EC (looks like VEC) to the output before the mcause and extras to indicate that this trap was handled with a vector table.
sd x5, 0(x16)
addi x6, x6, 8
addi x16, x16, 8
la x28, 0x02000000 // Reset by clearing MSIP interrupt from CLINT
sw x0, 0(x28)
j trap_unvectored_\MODE\()
m_soft_interrupt_\MODE\():
j trapreturn_\MODE\()
li x5, 0x7EC
sd x5, 0(x16)
addi x6, x6, 8
addi x16, x16, 8
la x28, 0x02000000 // Reset by clearing MSIP interrupt from CLINT
sw x0, 0(x28)
j trap_unvectored_\MODE\()
s_time_interrupt_\MODE\():
j trapreturn_\MODE\()
li x5, 0x7EC
sd x5, 0(x16)
addi x6, x6, 8
addi x16, x16, 8
j trap_unvectored_\MODE\()
m_time_interrupt_\MODE\():
j trapreturn_\MODE\()
li x5, 0x7EC
sd x5, 0(x16)
addi x6, x6, 8
addi x16, x16, 8
j trap_unvectored_\MODE\()
s_ext_interrupt_\MODE\():
j trapreturn_\MODE\()
li x5, 0x7EC
sd x5, 0(x16)
addi x6, x6, 8
addi x16, x16, 8
li x28, 0x10060000 // reset interrupt by clearing all the GPIO bits
sw x0, 8(x28) // disable the first pin as an output
sw x0, 40(x28) // write a 0 to the first output pin (reset interrupt)
j trap_unvectored_\MODE\()
m_ext_interrupt_\MODE\():
j trapreturn_\MODE\()
li x5, 0x7EC
sd x5, 0(x16)
addi x6, x6, 8
addi x16, x16, 8
li x28, 0x10060000 // reset interrupt by clearing all the GPIO bits
sw x0, 8(x28) // disable the first pin as an output
sw x0, 40(x28) // write a 0 to the first output pin (reset interrupt)
j trap_unvectored_\MODE\()
// Table of trap behavior
@ -294,19 +367,19 @@ m_ext_interrupt_\MODE\():
.align 3 // aligns this data table to an 8 byte boundary
exception_vector_table_\MODE\():
.8byte segfault_\MODE\() // 0: instruction address misaligned
.8byte addr_misaligned_\MODE\() // 0: instruction address misaligned
.8byte instrfault_\MODE\() // 1: instruction access fault
.8byte illegalinstr_\MODE\() // 2: illegal instruction
.8byte segfault_\MODE\() // 3: breakpoint
.8byte segfault_\MODE\() // 4: load address misaligned
.8byte breakpt_\MODE\() // 3: breakpoint
.8byte addr_misaligned_\MODE\() // 4: load address misaligned
.8byte accessfault_\MODE\() // 5: load access fault
.8byte segfault_\MODE\() // 6: store address misaligned
.8byte addr_misaligned_\MODE\() // 6: store address misaligned
.8byte accessfault_\MODE\() // 7: store access fault
.8byte ecallhandler_\MODE\() // 8: ecall from U-mode
.8byte ecallhandler_\MODE\() // 9: ecall from S-mode
.8byte segfault_\MODE\() // 10: reserved
.8byte ecallhandler_\MODE\() // 11: ecall from M-mode
.8byte instrfault_\MODE\() // 12: instruction page fault
.8byte instrpagefault_\MODE\() // 12: instruction page fault
.8byte trapreturn_\MODE\() // 13: load page fault
.8byte segfault_\MODE\() // 14: reserved
.8byte trapreturn_\MODE\() // 15: store page fault
@ -438,7 +511,7 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
// they generally do not fault or cause issues as long as these modes are enabled
// *** add functionality to check if modes are enabled before jumping? maybe cause a fault if not?
.macro GOTO_M_MODE RETURN_VPN RETURN_PAGETYPE
.macro GOTO_M_MODE RETURN_VPN=0x0 RETURN_PAGETYPE=0x0
li a0, 2 // determine trap handler behavior (go to machine mode)
li a1, \RETURN_VPN // return VPN
li a2, \RETURN_PAGETYPE // return page types
@ -446,7 +519,7 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
// now in S mode
.endm
.macro GOTO_S_MODE RETURN_VPN RETURN_PAGETYPE
.macro GOTO_S_MODE RETURN_VPN=0x0 RETURN_PAGETYPE=0x0
li a0, 3 // determine trap handler behavior (go to supervisor mode)
li a1, \RETURN_VPN // return VPN
li a2, \RETURN_PAGETYPE // return page types
@ -454,7 +527,7 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
// now in S mode
.endm
.macro GOTO_U_MODE RETURN_VPN RETURN_PAGETYPE
.macro GOTO_U_MODE RETURN_VPN=0x0 RETURN_PAGETYPE=0x0
li a0, 4 // determine trap handler behavior (go to user mode)
li a1, \RETURN_VPN // return VPN
li a2, \RETURN_PAGETYPE // return page types
@ -554,6 +627,87 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
addi x16, x16, 8
.endm
// The following tests involve causing many of the interrupts and exceptions that are easily done in a few lines
// This effectively includes everything that isn't to do with page faults (virtual memory)
.macro CAUSE_INSTR_ADDR_MISALIGNED
// cause a misaligned address trap
auipc x28, 0 // get current PC, which is aligned
addi x28, x28, 0x1 // add 1 to pc to create misaligned address
jalr x28 // cause instruction address midaligned trap
.endm
.macro CAUSE_INSTR_ACCESS
la x28, 0x0 // address zero is an address with no memory
jalr x28 // cause instruction access trap
.endm
.macro CAUSE_ILLEGAL_INSTR
.word 0x00000000 // a 32 bit zros is an illegal instruction
.endm
.macro CAUSE_BREAKPNT // ****
ebreak
.endm
.macro CAUSE_LOAD_ADDR_MISALIGNED
auipc x28, 0 // get current PC, which is aligned
addi x28, x28, 1
lw x29, 0(x28) // load from a misaligned address
.endm
.macro CAUSE_LOAD_ACC
la x28, 0 // 0 is an address with no memory
lw x29, 0(x28) // load from unimplemented address
.endm
.macro CAUSE_STORE_ADDR_MISALIGNED
auipc x28, 0 // get current PC, which is aligned
addi x28, x28, 1
sw x29, 0(x28) // store to a misaligned address
.endm
.macro CAUSE_STORE_ACC
la x28, 0 // 0 is an address with no memory
sw x29, 0(x28) // store to unimplemented address
.endm
.macro CAUSE_ECALL
// *** ASSUMES you have already gone to the mode you need to call this from.
ecall
.endm
.macro CAUSE_TIME_INTERRUPT
// The following code works for both RV32 and RV64.
// RV64 alone would be easier using double-word adds and stores
li x28, 0x100 // Desired offset from the present time
la x29, 0x02004000 // MTIMECMP register in CLINT
la x30, 0x0200BFF8 // MTIME register in CLINT
lw x7, 0(x30) // low word of MTIME
lw x31, 4(x30) // high word of MTIME
add x28, x7, x28 // add desired offset to the current time
bgtu x28, x7, nowrap // check new time exceeds current time (no wraparound)
addi x31, x31, 1 // if wrap, increment most significant word
sw x31,4(x29) // store into most significant word of MTIMECMP
nowrap:
sw x28, 0(x29) // store into least significant word of MTIMECMP
loop: j loop // wait until interrupt occurs
.endm
.macro CAUSE_SOFT_INTERRUPT
la x28, 0x02000000 // MSIP register in CLINT
li x29, 1 // 1 in the lsb
sw x29, 0(x28) // Write MSIP bit
.endm
.macro CAUSE_EXT_INTERRUPT
li x28, 0x10060000 // load base GPIO memory location
li x29, 0x1
sw x29, 8(x28) // enable the first pin as an output
sw x29, 28(x28) // set first pin to high interrupt enable
sw x29, 40(x28) // write a 1 to the first output pin (cause interrupt)
.endm
.macro END_TESTS
// invokes one final ecall to return to machine mode then terminates this program, so the output is
// 0x8: termination called from U mode

View File

@ -0,0 +1,45 @@
///////////////////////////////////////////
//
// WALLY-unvectored-interrupt
//
// Author: Kip Macsai-Goren <kmacsaigoren@g.hmc.edu>
//
// Created 2022-03-11
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
#include "WALLY-TEST-LIB-64.h"
INIT_TESTS
// test 5.3.1.5 Unvectored interrupt tests
TRAP_HANDLER m, VECTORED=0, DEBUG=1 // turn off vectored interrupts, while turning on recording of mstatus bits.
li x28, 0x8
csrs sstatus, x28 // set sstatus.MIE bit to 1 // *** might be unneccessary for s mode
// WRITE_READ_CSR mie, 0xFFFF *** commented out until I can get the trap handler (and spike for time interrupts) to work correctly with interrupts
// cause traps, ensuring that we DONT go through the vectored part of the trap handler
// *** this assumes that interrupt code 0 remains reserved
// CAUSE_TIME_INTERRUPT *** intentionally causing this trap seems difficult in spike. although it is possible for it to accidentally happen.
// CAUSE_SOFT_INTERRUPT *** exiting out of the trap handler after these is current;y broken
// CAUSE_EXT_INTERRUPT
END_TESTS
TEST_STACK_AND_DATA

View File

@ -0,0 +1,55 @@
///////////////////////////////////////////
//
// WALLY-unvectored-interrupt
//
// Author: Kip Macsai-Goren <kmacsaigoren@g.hmc.edu>
//
// Created 2022-03-11
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
#include "WALLY-TEST-LIB-64.h"
INIT_TESTS
// test 5.3.1.5 Unvectored interrupt tests
TRAP_HANDLER s, VECTORED=0, DEBUG=1 // turn off vectored interrupts, while turning on recording of mstatus bits.
// li x28, 0x8
// csrs sstatus, x28 // set sstatus.MIE bit to 1 // *** might be unneccessary for s mode
// WRITE_READ_CSR mie, 0xFFFF *** commented out until I can get the trap handler (and spike for time interrupts) to work correctly with interrupts
WRITE_READ_CSR mideleg, 0xFFFFFFFFFFFFFFFF
GOTO_S_MODE
// cause traps, ensuring that we DONT go through the vectored part of the trap handler
// *** this assumes that interrupt code 0 remains reserved
// CAUSE_TIME_INTERRUPT *** intentionally causing this trap seems difficult in spike. although it is possible for it to accidentally happen.
// CAUSE_SOFT_INTERRUPT *** exiting out of the trap handler after these is current;y broken
// CAUSE_EXT_INTERRUPT
GOTO_U_MODE
// CAUSE_TIME_INTERRUPT *** intentionally causing this trap seems difficult in spike. although it is possible for it to accidentally happen.
// CAUSE_SOFT_INTERRUPT *** exiting out of the trap handler after these is current;y broken
// CAUSE_EXT_INTERRUPT
END_TESTS
TEST_STACK_AND_DATA

View File

@ -0,0 +1,76 @@
///////////////////////////////////////////
//
// WALLY-trap
//
// Author: Kip Macsai-Goren <kmacsaigoren@g.hmc.edu>
//
// Created 2022-02-20
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
#include "WALLY-TEST-LIB-64.h"
INIT_TESTS
TRAP_HANDLER m, DEBUG=1 // turn on recording mtval and status bits on traps
li x28, 0x8
csrs mstatus, x28 // set mstatus.MIE bit to 1
// WRITE_READ_CSR mie, 0xFFFF *** commented out until I can get the trap handler (and spike for time interrupts) to work correctly with interrupts
// test 5.3.1.4 Basic trap tests
// CAUSE_INSTR_ADDR_MISALIGNED //skipped becuase this exception may be impossible when compressed instructions are enabled)
CAUSE_INSTR_ACCESS
CAUSE_ILLEGAL_INSTR
CAUSE_BREAKPNT
CAUSE_LOAD_ADDR_MISALIGNED
CAUSE_LOAD_ACC
CAUSE_STORE_ADDR_MISALIGNED
CAUSE_STORE_ACC
GOTO_U_MODE // Causes M mode ecall
GOTO_S_MODE // Causes U mode ecall
GOTO_M_MODE // Causes S mode ecall
// CAUSE_TIME_INTERRUPT *** intentionally causing this trap seems difficult in spike. although it is possible for it to accidentally happen.
// CAUSE_SOFT_INTERRUPT *** exiting out of the trap handler after these is current;y broken
// CAUSE_EXT_INTERRUPT
// try the traps again with mideleg = medeleg = all 1's to ensure traps still go to M mode from M mode
WRITE_READ_CSR medeleg, 0xFFFFFFFFFFFFFFFF
WRITE_READ_CSR mideleg, 0xFFFFFFFFFFFFFFFF
// CAUSE_INSTR_ADDR_MISALIGNED //skipped becuase this exception may be impossible when compressed instructions are enabled)
CAUSE_INSTR_ACCESS
CAUSE_ILLEGAL_INSTR
CAUSE_BREAKPNT
CAUSE_LOAD_ADDR_MISALIGNED
CAUSE_LOAD_ACC
CAUSE_STORE_ADDR_MISALIGNED
CAUSE_STORE_ACC
CAUSE_ECALL // M mode ecall
// GOTO_U_MODE // leave these untested since we only need to ensure that from M mode are not delegated
// GOTO_S_MODE
// CAUSE_TIME_INTERRUPT *** intentionally causing this trap seems difficult in spike. although it is possible for it to accidentally happen.
// CAUSE_SOFT_INTERRUPT *** exiting out of the trap handler after these is current;y broken
// CAUSE_EXT_INTERRUPT
END_TESTS
TEST_STACK_AND_DATA

View File

@ -0,0 +1,85 @@
///////////////////////////////////////////
//
// WALLY-trap-s
//
// Author: Kip Macsai-Goren <kmacsaigoren@g.hmc.edu>
//
// Created 2022-03-11
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
#include "WALLY-TEST-LIB-64.h"
INIT_TESTS
// test 5.3.1.4 Basic trap tests
TRAP_HANDLER m, DEBUG=1 // turn on recording mtval and status bits on traps
TRAP_HANDLER s, DEBUG=1 // have S mode trap handler as well
// Like WALLY-trap, cause all the same traps from S mode and make sure they go to machine mode with zeroed mideleg, medeleg
GOTO_S_MODE
li x28, 0x8
csrs sstatus, x28 // set sstatus.MIE bit to 1 // *** might be unneccessary for s mode
// WRITE_READ_CSR mie, 0xFFFF *** commented out until I can get the trap handler (and spike for time interrupts) to work correctly with interrupts
// CAUSE_INSTR_ADDR_MISALIGNED //skipped becuase this exception may be impossible when compressed instructions are enabled)
CAUSE_INSTR_ACCESS
CAUSE_ILLEGAL_INSTR
CAUSE_BREAKPNT
CAUSE_LOAD_ADDR_MISALIGNED
CAUSE_LOAD_ACC
CAUSE_STORE_ADDR_MISALIGNED
CAUSE_STORE_ACC
CAUSE_ECALL
// CAUSE_TIME_INTERRUPT *** intentionally causing this trap seems difficult in spike. although it is possible for it to accidentally happen.
// CAUSE_SOFT_INTERRUPT *** exiting out of the trap handler after these is current;y broken
// CAUSE_EXT_INTERRUPT
// Now delegate all traps to S mode and attempt them again, ensuring they now go to the S mode trap handler
// We can tell which one becuase the different trap handler modes write different bits of the status register
// to the output when debug is on.
GOTO_M_MODE // so we can write the delegate registers
WRITE_READ_CSR medeleg, 0xFFFFFFFFFFFFFFFF
WRITE_READ_CSR mideleg, 0xFFFFFFFFFFFFFFFF
GOTO_S_MODE
// CAUSE_INSTR_ADDR_MISALIGNED //skipped becuase this exception may be impossible when compressed instructions are enabled)
CAUSE_INSTR_ACCESS
CAUSE_ILLEGAL_INSTR
CAUSE_BREAKPNT
CAUSE_LOAD_ADDR_MISALIGNED
CAUSE_LOAD_ACC
CAUSE_STORE_ADDR_MISALIGNED
CAUSE_STORE_ACC
CAUSE_ECALL
// CAUSE_TIME_INTERRUPT *** intentionally causing this trap seems difficult in spike. although it is possible for it to accidentally happen.
// CAUSE_SOFT_INTERRUPT *** exiting out of the trap handler after these is current;y broken
// CAUSE_EXT_INTERRUPT
END_TESTS
TEST_STACK_AND_DATA

View File

@ -0,0 +1,84 @@
///////////////////////////////////////////
//
// WALLY-trap-u
//
// Author: Kip Macsai-Goren <kmacsaigoren@g.hmc.edu>
//
// Created 2022-03-11
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
#include "WALLY-TEST-LIB-64.h"
INIT_TESTS
// test 5.3.1.4 Basic trap tests
TRAP_HANDLER m, DEBUG=1 // turn on recording mtval and status bits on traps
TRAP_HANDLER s, DEBUG=1 // have S mode trap handler as well
// Like WALLY-trap, cause all the same traps from U mode and make sure they go to machine mode with zeroed mideleg, medeleg
GOTO_U_MODE
// li x28, 0x8
// csrs sstatus, x28 // set sstatus.MIE bit to 1 // *** might be unneccessary for s mode
// WRITE_READ_CSR mie, 0xFFFF *** commented out until I can get the trap handler (and spike for time interrupts) to work correctly with interrupts
// CAUSE_INSTR_ADDR_MISALIGNED //skipped becuase this exception may be impossible when compressed instructions are enabled)
CAUSE_INSTR_ACCESS
CAUSE_ILLEGAL_INSTR
CAUSE_BREAKPNT
CAUSE_LOAD_ADDR_MISALIGNED
CAUSE_LOAD_ACC
CAUSE_STORE_ADDR_MISALIGNED
CAUSE_STORE_ACC
CAUSE_ECALL
// CAUSE_TIME_INTERRUPT *** intentionally causing this trap seems difficult in spike. although it is possible for it to accidentally happen.
// CAUSE_SOFT_INTERRUPT *** exiting out of the trap handler after these is current;y broken
// CAUSE_EXT_INTERRUPT
// Now delegate all traps to S mode and attempt them again, ensuring they now go to the S mode trap handler
// We can tell which one becuase the different trap handler modes write different bits of the status register
// to the output when debug is on.
GOTO_M_MODE // so we can write the delegate registers
WRITE_READ_CSR medeleg, 0xFFFFFFFFFFFFFFFF
WRITE_READ_CSR mideleg, 0xFFFFFFFFFFFFFFFF
GOTO_U_MODE
// CAUSE_INSTR_ADDR_MISALIGNED //skipped becuase this exception may be impossible when compressed instructions are enabled)
CAUSE_INSTR_ACCESS
CAUSE_ILLEGAL_INSTR
CAUSE_BREAKPNT
CAUSE_LOAD_ADDR_MISALIGNED
CAUSE_LOAD_ACC
CAUSE_STORE_ADDR_MISALIGNED
CAUSE_STORE_ACC
CAUSE_ECALL
// CAUSE_TIME_INTERRUPT *** intentionally causing this trap seems difficult in spike. although it is possible for it to accidentally happen.
// CAUSE_SOFT_INTERRUPT *** exiting out of the trap handler after these is current;y broken
// CAUSE_EXT_INTERRUPT
END_TESTS
TEST_STACK_AND_DATA