diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b26..be67c99bd 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/linux/testvector-generation/parseGDBtoTrace.py b/linux/testvector-generation/parseGDBtoTrace.py index db444f696..412db5bb5 100755 --- a/linux/testvector-generation/parseGDBtoTrace.py +++ b/linux/testvector-generation/parseGDBtoTrace.py @@ -138,9 +138,9 @@ if len(sys.argv) != 2: sys.exit('Error parseGDBtoTrace.py expects 1 arg:\n >') interruptFname = sys.argv[1] # reg number -RegNumber = {'zero': 0, 'ra': 1, 'sp': 2, 'gp': 3, 'tp': 4, 't0': 5, 't1': 6, 't2': 7, 's0': 8, 's1': 9, 'a0': 10, 'a1': 11, 'a2': 12, 'a3': 13, 'a4': 14, 'a5': 15, 'a6': 16, 'a7': 17, 's2': 18, 's3': 19, 's4': 20, 's5': 21, 's6': 22, 's7': 23, 's8': 24, 's9': 25, 's10': 26, 's11': 27, 't3': 28, 't4': 29, 't5': 30, 't6': 31, 'mhartid': 32, 'mstatus': 33, 'mip': 34, 'mie': 35, 'mideleg': 36, 'medeleg': 37, 'mtvec': 38, 'stvec': 39, 'mepc': 40, 'sepc': 41, 'mcause': 42, 'scause': 43, 'mtval': 44, 'stval': 45} +RegNumber = {'zero': 0, 'ra': 1, 'sp': 2, 'gp': 3, 'tp': 4, 't0': 5, 't1': 6, 't2': 7, 's0': 8, 's1': 9, 'a0': 10, 'a1': 11, 'a2': 12, 'a3': 13, 'a4': 14, 'a5': 15, 'a6': 16, 'a7': 17, 's2': 18, 's3': 19, 's4': 20, 's5': 21, 's6': 22, 's7': 23, 's8': 24, 's9': 25, 's10': 26, 's11': 27, 't3': 28, 't4': 29, 't5': 30, 't6': 31, 'mhartid': 32, 'mstatus': 33, 'mip': 34, 'mie': 35, 'mideleg': 36, 'medeleg': 37, 'mtvec': 38, 'stvec': 39, 'mepc': 40, 'sepc': 41, 'mcause': 42, 'scause': 43, 'mtval': 44, 'stval': 45, 'sstatus': 46, 'sip': 47, 'sie': 48} # initial state -CurrentInstr = ['0', '0', None, 'other', {'zero': 0, 'ra': 0, 'sp': 0, 'gp': 0, 'tp': 0, 't0': 0, 't1': 0, 't2': 0, 's0': 0, 's1': 0, 'a0': 0, 'a1': 0, 'a2': 0, 'a3': 0, 'a4': 0, 'a5': 0, 'a6': 0, 'a7': 0, 's2': 0, 's3': 0, 's4': 0, 's5': 0, 's6': 0, 's7': 0, 's8': 0, 's9': 0, 's10': 0, 's11': 0, 't3': 0, 't4': 0, 't5': 0, 't6': 0, 'mhartid': 0, 'mstatus': 0, 'mip': 0, 'mie': 0, 'mideleg': 0, 'medeleg': 0, 'mtvec': 0, 'stvec': 0, 'mepc': 0, 'sepc': 0, 'mcause': 0, 'scause': 0, 'mtval': 0, 'stval': 0}, {}, None, None, None] +CurrentInstr = ['0', '0', None, 'other', {'zero': 0, 'ra': 0, 'sp': 0, 'gp': 0, 'tp': 0, 't0': 0, 't1': 0, 't2': 0, 's0': 0, 's1': 0, 'a0': 0, 'a1': 0, 'a2': 0, 'a3': 0, 'a4': 0, 'a5': 0, 'a6': 0, 'a7': 0, 's2': 0, 's3': 0, 's4': 0, 's5': 0, 's6': 0, 's7': 0, 's8': 0, 's9': 0, 's10': 0, 's11': 0, 't3': 0, 't4': 0, 't5': 0, 't6': 0, 'mhartid': 0, 'mstatus': 0, 'mip': 0, 'mie': 0, 'mideleg': 0, 'medeleg': 0, 'mtvec': 0, 'stvec': 0, 'mepc': 0, 'sepc': 0, 'mcause': 0, 'scause': 0, 'mtval': 0, 'stval': 0, 'sstatus': 0, 'sip': 0, 'sie': 0}, {}, None, None, None] #with open (InputFile, 'r') as InputFileFP: #lines = InputFileFP.readlines() diff --git a/linux/testvector-generation/parseState.py b/linux/testvector-generation/parseState.py index abc36fb2f..1f7e93c09 100755 --- a/linux/testvector-generation/parseState.py +++ b/linux/testvector-generation/parseState.py @@ -34,7 +34,7 @@ stateGDBpath = outDir+'stateGDB.txt' if not os.path.exists(stateGDBpath): sys.exit('Error input file '+stateGDBpath+'not found') -singleCSRs = ['pc','mip','mie','mscratch','mcause','mepc','mtvec','medeleg','mideleg','sscratch','scause','sepc','stvec','sedeleg','sideleg','satp','mstatus','priv'] +singleCSRs = ['pc','mip','mie','mscratch','mcause','mepc','mtvec','medeleg','mideleg','sscratch','scause','sepc','stvec','sedeleg','sideleg','satp','mstatus','priv','sie','sip','sstatus'] # priv (current privilege mode) isn't technically a CSR but we can log it with the same machinery thirtyTwoBitCSRs = ['mcounteren','scounteren'] listCSRs = ['hpmcounter','pmpaddr'] diff --git a/pipelined/config/rv64fp/BTBPredictor.txt b/pipelined/config/rv64fp/BTBPredictor.txt new file mode 100644 index 000000000..b761147c6 --- /dev/null +++ b/pipelined/config/rv64fp/BTBPredictor.txtdiff --git a/pipelined/config/rv64fp/twoBitPredictor.txt b/pipelined/config/rv64fp/twoBitPredictor.txt new file mode 100644 index 000000000..ff57bd473 --- /dev/null +++ b/pipelined/config/rv64fp/twoBitPredictor.txt @@ -0,0 +1,1024 @@ +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 +00 diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh new file mode 100644 index 000000000..249af8485 --- /dev/null +++ b/pipelined/config/rv64fp/wally-config.vh @@ -0,0 +1,134 @@ +////////////////////////////////////////// +// wally-config.vh +// +// Written: David_Harris@hmc.edu 4 January 2021 +// Modified: +// +// Purpose: Specify which features are configured +// Macros to determine which modes are supported based on MISA +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +// include shared configuration +`include "wally-shared.vh" + +`define FPGA 0 +`define QEMU 0 +`define DESIGN_COMPILER 0 + +// RV32 or RV64: XLEN = 32 or 64 +`define XLEN 64 + +// IEEE 754 compliance +`define IEEE754 1 + +// MISA RISC-V configuration per specification +`define MISA (32'h00000104 | 1 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 ) +`define ZICSR_SUPPORTED 1 +`define ZIFENCEI_SUPPORTED 1 +`define COUNTERS 32 +`define ZICOUNTERS_SUPPORTED 1 + +/// Microarchitectural Features +`define UARCH_PIPELINED 1 +`define UARCH_SUPERSCALR 0 +`define UARCH_SINGLECYCLE 0 +`define DMEM `MEM_CACHE +`define IMEM `MEM_CACHE +`define VIRTMEM_SUPPORTED 1 +`define VECTORED_INTERRUPTS_SUPPORTED 1 + +// TLB configuration. Entries should be a power of 2 +`define ITLB_ENTRIES 32 +`define DTLB_ENTRIES 32 + +// Cache configuration. Sizes should be a power of two +// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines +`define DCACHE_NUMWAYS 4 +`define DCACHE_WAYSIZEINBYTES 4096 +`define DCACHE_LINELENINBITS 256 +`define ICACHE_NUMWAYS 4 +`define ICACHE_WAYSIZEINBYTES 4096 +`define ICACHE_LINELENINBITS 256 + +// Integer Divider Configuration +// DIV_BITSPERCYCLE must be 1, 2, or 4 +`define DIV_BITSPERCYCLE 4 + +// Legal number of PMP entries are 0, 16, or 64 +`define PMP_ENTRIES 64 + +// Address space +`define RESET_VECTOR 64'h0000000080000000 + +// Bus Interface width +`define AHBW 64 + +// Peripheral Physiccal Addresses +// Peripheral memory space extends from BASE to BASE+RANGE +// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits + +// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? +`define BOOTROM_SUPPORTED 1'b1 +`define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder +`define BOOTROM_RANGE 56'h00000FFF +`define RAM_SUPPORTED 1'b1 +`define RAM_BASE 56'h80000000 +`define RAM_RANGE 56'h7FFFFFFF +`define EXT_MEM_SUPPORTED 1'b0 +`define EXT_MEM_BASE 56'h80000000 +`define EXT_MEM_RANGE 56'h07FFFFFF +`define CLINT_SUPPORTED 1'b1 +`define CLINT_BASE 56'h02000000 +`define CLINT_RANGE 56'h0000FFFF +`define GPIO_SUPPORTED 1'b1 +`define GPIO_BASE 56'h10060000 +`define GPIO_RANGE 56'h000000FF +`define UART_SUPPORTED 1'b1 +`define UART_BASE 56'h10000000 +`define UART_RANGE 56'h00000007 +`define PLIC_SUPPORTED 1'b1 +`define PLIC_BASE 56'h0C000000 +`define PLIC_RANGE 56'h03FFFFFF +`define SDC_SUPPORTED 1'b0 +`define SDC_BASE 56'h00012100 +`define SDC_RANGE 56'h0000001F + +// Test modes + +// Tie GPIO outputs back to inputs +`define GPIO_LOOPBACK_TEST 1 + +// Hardware configuration +`define UART_PRESCALE 1 + +// Interrupt configuration +`define PLIC_NUM_SRC 10 +// comment out the following if >=32 sources +`define PLIC_NUM_SRC_LT_32 +`define PLIC_GPIO_ID 3 +`define PLIC_UART_ID 10 + +`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt" +`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt" +`define BPRED_ENABLED 1 +`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE +`define TESTSBP 0 + +`define REPLAY 0 +`define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 277814f80..198a4ab2e 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -50,10 +50,47 @@ // Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries) `define PMPCFG_ENTRIES (`PMP_ENTRIES/8) + +// Floating-point half-precision +`define ZFH_SUPPORTED 0 + +// Floating point constants for Quad, Double, Single, and Half precisions +`define Q_LEN 128 +`define Q_NE 15 +`define Q_NF 112 +`define Q_BIAS 16383 +`define D_LEN 64 +`define D_NE 11 +`define D_NF 52 +`define D_BIAS 1023 +`define S_LEN 32 +`define S_NE 8 +`define S_NF 23 +`define S_BIAS 127 +`define H_LEN 16 +`define H_NE 5 +`define H_NF 10 +`define H_BIAS 15 + // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits -`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32) -`define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8) -`define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23) +`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN) +`define NE (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `F_SUPPORTED ? `S_NE : `H_NE) +`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `F_SUPPORTED ? `S_NF : `H_NF) +`define FMT (`Q_SUPPORTED ? 3 : `D_SUPPORTED ? 1 : `F_SUPPORTED ? 0 : 2) +`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS) + +// Floating point constants needed for FPU paramerterization +`define FPSIZES (`Q_SUPPORTED+`D_SUPPORTED+`F_SUPPORTED+`ZFH_SUPPORTED) +`define LEN1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN : `H_LEN) +`define NE1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE : `H_NE) +`define NF1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF) +`define FMT1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 1 : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 0 : 2) +`define BIAS1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS) +`define LEN2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN : `H_LEN) +`define NE2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NE : `H_NE) +`define NF2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF : `H_NF) +`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 0 : 2) +`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS) // Disable spurious Verilator warnings diff --git a/pipelined/fpu-testfloat/FMA/tbgen/tb.sv b/pipelined/fpu-testfloat/FMA/tbgen/tb.sv index 57c4e2ff0..5532aa634 100644 --- a/pipelined/fpu-testfloat/FMA/tbgen/tb.sv +++ b/pipelined/fpu-testfloat/FMA/tbgen/tb.sv @@ -1,10 +1,33 @@ -//`include "../../../config/old/rv64icfd/wally-config.vh" +`include "../../../config/old/rv64icfd/wally-config.vh" -`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32) -`define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8) -`define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23) -`define XLEN 64 +// `define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : `F_SUPPORTED ? 32 : 16) +// `define NE (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : `F_SUPPORTED ? 8 : 5) +// `define NF (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : `F_SUPPORTED ? 23 : 10) +// `define FMT (`Q_SUPPORTED ? 3 : `D_SUPPORTED ? 1 : `F_SUPPORTED ? 0 : 2) +// `define BIAS (`Q_SUPPORTED ? 16383 : `D_SUPPORTED ? 1023 : `F_SUPPORTED ? 127 : 15) +// `define XLEN 64 +// `define IEEE754 1 +`define Q_SUPPORTED 1 +// `define D_SUPPORTED 0 +// `define F_SUPPORTED 0 +`define H_SUPPORTED 0 +`define FPSIZES ((`Q_SUPPORTED&`D_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) ? 4 : (`Q_SUPPORTED&`D_SUPPORTED&`F_SUPPORTED) | (`Q_SUPPORTED&`D_SUPPORTED&`H_SUPPORTED) | (`Q_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) | (`D_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) ? 3 : (`Q_SUPPORTED&`D_SUPPORTED) | (`Q_SUPPORTED&`F_SUPPORTED) | (`Q_SUPPORTED&`H_SUPPORTED) | (`D_SUPPORTED&`F_SUPPORTED) | (`D_SUPPORTED&`H_SUPPORTED) | (`F_SUPPORTED&`H_SUPPORTED) ? 2 : 1) +`define LEN1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 64 : (`F_SUPPORTED & (`FLEN !== 32)) ? 32 : 16) +`define NE1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 11 : (`F_SUPPORTED & (`FLEN !== 32)) ? 8 : 5) +`define NF1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 52 : (`F_SUPPORTED & (`FLEN !== 32)) ? 23 : 10) +`define FMT1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 1 : (`F_SUPPORTED & (`FLEN !== 32)) ? 0 : 2) +`define BIAS1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 1023 : (`F_SUPPORTED & (`FLEN !== 32)) ? 127 : 15) +`define LEN2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 32 : 16) +`define NE2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 8 : 5) +`define NF2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 23 : 10) +`define FMT2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 0 : 2) +`define BIAS2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 127 : 15) +`define LEN3 16 +`define NE3 5//make constants for the constants ie 11/8/5 ect +`define NF3 10 // always support less hten max - maybe halfs +`define FMT3 2 +`define BIAS3 15 module testbench3(); logic [31:0] errors=0; @@ -15,33 +38,17 @@ module testbench3(); logic [`FLEN-1:0] ans; logic [7:0] flags; logic [2:0] FrmE; - logic FmtE; + logic [`FPSIZES/3:0] FmtE; logic [`FLEN-1:0] FMAResM; logic [4:0] FMAFlgM; -integer fp; logic [2:0] FOpCtrlE; logic [2*`NF+1:0] ProdManE; logic [3*`NF+5:0] AlignedAddendE; logic [`NE+1:0] ProdExpE; logic AddendStickyE; logic KillProdE; -// logic XZeroE; -// logic YZeroE; -// logic ZZeroE; -// logic XDenormE; -// logic YDenormE; -// logic ZDenormE; -// logic XInfE; -// logic YInfE; -// logic ZInfE; -// logic XNaNE; -// logic YNaNE; -// logic ZNaNE; logic wnan; -// logic XNaNE; -// logic YNaNE; -// logic ZNaNE; logic ansnan, clk; @@ -52,88 +59,86 @@ assign FOpCtrlE = 3'b0; // down - 010 // up - 011 // nearest max mag - 100 -assign FrmE = 3'b000; -assign FmtE = 1'b1; +assign FrmE = 3'b010; +assign FmtE = (`FPSIZES/3+1)'(1); logic [`FLEN-1:0] X, Y, Z; // logic FmtE; // logic [2:0] FOpCtrlE; logic XSgnE, YSgnE, ZSgnE; logic [`NE-1:0] XExpE, YExpE, ZExpE; - logic [`NF-1:0] XFracE, YFracE, ZFracE; - logic XAssumed1E, YAssumed1E, ZAssumed1E; + logic [`NF:0] XManE, YManE, ZManE; logic XNormE; + logic XExpMaxE; logic XNaNE, YNaNE, ZNaNE; logic XSNaNE, YSNaNE, ZSNaNE; logic XDenormE, YDenormE, ZDenormE; logic XZeroE, YZeroE, ZZeroE; logic [`NE-1:0] BiasE; logic XInfE, YInfE, ZInfE; - logic XExpMaxE; - //***rename to make significand = 1.frac m = significand - logic XFracZero, YFracZero, ZFracZero; // input fraction zero - logic XExpZero, YExpZero, ZExpZero; // input exponent zero logic [`FLEN-1:0] Addend; // value to add (Z or zero) - logic YExpMaxE, ZExpMaxE; // input exponent all 1s + logic YExpMaxE, ZExpMaxE, Mult; // input exponent all 1s - assign Addend = FOpCtrlE[2] ? (`FLEN)'(0) : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation - assign XSgnE = FmtE ? X[`FLEN-1] : X[31]; - assign YSgnE = FmtE ? Y[`FLEN-1] : Y[31]; - assign ZSgnE = FmtE ? Addend[`FLEN-1] : Addend[31]; + assign Mult = 1'b0; + unpacking unpacking(.*); - assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; - assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; - assign ZExpE = FmtE ? Addend[62:52] : {Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]}; +// assign wnan = XNaNE|YNaNE|ZNaNE; +// assign ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[22:0]; + + if (`FPSIZES === 1) begin + assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]); + assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]); + end else if (`FPSIZES === 2) begin + assign ansnan = FmtE ? &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]) : &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]); + assign wnan = FmtE ? &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]) : &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]); + end else if (`FPSIZES === 3) begin + always_comb begin + case (FmtE) + `FMT: begin + assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]); + assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]); - assign XFracE = FmtE ? X[`NF-1:0] : {X[22:0], 29'b0}; - assign YFracE = FmtE ? Y[`NF-1:0] : {Y[22:0], 29'b0}; - assign ZFracE = FmtE ? Addend[`NF-1:0] : {Addend[22:0], 29'b0}; + end + `FMT1: begin + assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]); + assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]); - assign XAssumed1E = FmtE ? |X[62:52] : |X[30:23]; - assign YAssumed1E = FmtE ? |Y[62:52] : |Y[30:23]; - assign ZAssumed1E = FmtE ? |Z[62:52] : |Z[30:23]; + end + `FMT2: begin + assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]); + assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]); + end + default: begin + assign ansnan = 0; + assign wnan = 0; + end + endcase + end - assign XExpZero = ~XAssumed1E; - assign YExpZero = ~YAssumed1E; - assign ZExpZero = ~ZAssumed1E; - - assign XFracZero = ~|XFracE; - assign YFracZero = ~|YFracE; - assign ZFracZero = ~|ZFracE; + end else begin + always_comb begin + case (FmtE) + `FMT: begin + assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]); + assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]); - assign XExpMaxE = FmtE ? &X[62:52] : &X[30:23]; - assign YExpMaxE = FmtE ? &Y[62:52] : &Y[30:23]; - assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23]; - - assign XNormE = ~(XExpMaxE|XExpZero); - - assign XNaNE = XExpMaxE & ~XFracZero; - assign YNaNE = YExpMaxE & ~YFracZero; - assign ZNaNE = ZExpMaxE & ~ZFracZero; + end + `FMT1: begin + assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]); + assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]); - assign XSNaNE = XNaNE&~XFracE[`NF-1]; - assign YSNaNE = YNaNE&~YFracE[`NF-1]; - assign ZSNaNE = ZNaNE&~ZFracE[`NF-1]; - - assign XDenormE = XExpZero & ~XFracZero; - assign YDenormE = YExpZero & ~YFracZero; - assign ZDenormE = ZExpZero & ~ZFracZero; - - assign XInfE = XExpMaxE & XFracZero; - assign YInfE = YExpMaxE & YFracZero; - assign ZInfE = ZExpMaxE & ZFracZero; - - assign XZeroE = XExpZero & XFracZero; - assign YZeroE = YExpZero & YFracZero; - assign ZZeroE = ZExpZero & ZFracZero; - - assign BiasE = 13'h3ff; - -assign wnan = FmtE ? &FMAResM[`FLEN-2:`NF] & |FMAResM[`NF-1:0] : &FMAResM[30:23] & |FMAResM[22:0]; -// assign XNaNE = FmtE ? &X[62:52] & |X[51:0] : &X[62:55] & |X[54:32]; -// assign YNaNE = FmtE ? &Y[62:52] & |Y[51:0] : &Y[62:55] & |Y[54:32]; -// assign ZNaNE = FmtE ? &Z[62:52] & |Z[51:0] : &Z[62:55] & |Z[54:32]; -assign ansnan = FmtE ? &ans[`FLEN-2:`NF] & |ans[`NF-1:0] : &ans[30:23] & |ans[22:0]; + end + `FMT2: begin + assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]); + assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]); + end + `FMT3: begin + assign ansnan = &ans[`LEN3-2:`NF3]&(|ans[`NF3-1:0]); + assign wnan = &FMAResM[`LEN3-2:`NF3]&(|FMAResM[`NF3-1:0]); + end + endcase + end + end // instantiate device under test logic [3*`NF+5:0] SumE, SumM; @@ -141,16 +146,16 @@ assign ansnan = FmtE ? &ans[`FLEN-2:`NF] & |ans[`NF-1:0] : &ans[30:23] & |ans[22 logic NegSumE, NegSumM; logic ZSgnEffE, ZSgnEffM; logic PSgnE, PSgnM; - logic [8:0] NormCntE, NormCntM; + logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM; - fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}), + fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE); -fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE), +fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM(XManE), .YManM(YManE), .ZManM(ZManE), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE), // .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), - .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM); + .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM, .Mult); // produce clock @@ -168,61 +173,156 @@ fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZEx always @(posedge clk) begin #1; - if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum]; - else begin X = {{32{1'b1}}, testvectors[vectornum][135:104]}; - Y = {{32{1'b1}}, testvectors[vectornum][103:72]}; - Z = {{32{1'b1}}, testvectors[vectornum][71:40]}; - ans = {{32{1'b1}}, testvectors[vectornum][39:8]}; - flags = testvectors[vectornum][7:0]; + if (`FPSIZES === 3 | `FPSIZES === 4) begin + if (FmtE==2'b11) {X, Y, Z, ans, flags} = testvectors[vectornum]; + else if (FmtE==2'b01) begin + X = {{`FLEN-64{1'b1}}, testvectors[vectornum][263:200]}; + Y = {{`FLEN-64{1'b1}}, testvectors[vectornum][199:136]}; + Z = {{`FLEN-64{1'b1}}, testvectors[vectornum][135:72]}; + ans = {{`FLEN-64{1'b1}}, testvectors[vectornum][71:8]}; + flags = testvectors[vectornum][7:0]; + end + else if (FmtE==2'b00) begin + X = {{`FLEN-32{1'b1}}, testvectors[vectornum][135:104]}; + Y = {{`FLEN-32{1'b1}}, testvectors[vectornum][103:72]}; + Z = {{`FLEN-32{1'b1}}, testvectors[vectornum][71:40]}; + ans = {{`FLEN-32{1'b1}}, testvectors[vectornum][39:8]}; + flags = testvectors[vectornum][7:0]; + end + else begin + X = {{`FLEN-16{1'b1}}, testvectors[vectornum][71:56]}; + Y = {{`FLEN-16{1'b1}}, testvectors[vectornum][55:40]}; + Z = {{`FLEN-16{1'b1}}, testvectors[vectornum][39:24]}; + ans = {{`FLEN-16{1'b1}}, testvectors[vectornum][23:8]}; + flags = testvectors[vectornum][7:0]; + end + end + else begin + if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum]; + else if (FmtE==1'b0) begin + X = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+4*(`LEN1)-1:8+3*(`LEN1)]}; + Y = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+3*(`LEN1)-1:8+2*(`LEN1)]}; + Z = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+2*(`LEN1)-1:8+(`LEN1)]}; + ans = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+(`LEN1-1):8]}; + flags = testvectors[vectornum][7:0]; + end end end // check results on falling edge of clk always @(negedge clk) begin - - if((FmtE==1'b1) & (FMAFlgM != flags[4:0] | (!wnan & (FMAResM != ans)) | (wnan & ansnan & ~((XNaNE & (FMAResM[`FLEN-2:0] == {XExpE,1'b1,X[`NF-2:0]})) | (YNaNE & (FMAResM[`FLEN-2:0] == {YExpE,1'b1,Y[`NF-2:0]})) | (ZNaNE & (FMAResM[`FLEN-2:0] == {ZExpE,1'b1,Z[`NF-2:0]})) | (FMAResM[`FLEN-2:0] == ans[`FLEN-2:0]))))) begin - // fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w"); - // if((FmtE==1'b1) & (FMAFlgM != flags[4:0] | (FMAResM != ans))) begin - $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); - if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero "); - if(XDenormE) $display( "xdenorm "); - if(YDenormE) $display( "ydenorm "); - if(ZDenormE) $display( "zdenorm "); - if(FMAFlgM[4] != 0) $display( "invld "); - if(FMAFlgM[2] != 0) $display( "ovrflw "); - if(FMAFlgM[1] != 0) $display( "unflw "); - if(FMAResM[`FLEN] & FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] == 0) $display( "FMAResM=-inf "); - if(~FMAResM[`FLEN] & FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] == 0) $display( "FMAResM=+inf "); - if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] != 0 & ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN "); - if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] != 0 & FMAResM[`NF-1]) $display( "FMAResM=qutNaN "); - if(ans[`FLEN] & ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] == 0) $display( "ans=-inf "); - if(~ans[`FLEN] & ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] == 0) $display( "ans=+inf "); - if(ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] != 0 & ~ans[`NF-1]) $display( "ans=sigNaN "); - if(ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] != 0 & ans[`NF-1]) $display( "ans=qutNaN "); - errors = errors + 1; - //if (errors == 10) - $stop; - end - if((FmtE==1'b0)&(FMAFlgM != flags[4:0] | (!wnan & (FMAResM != ans)) | (wnan & ansnan & ~(((XNaNE & (FMAResM[30:0] == {X[30:23],1'b1,X[21:0]})) | (YNaNE & (FMAResM[30:0] == {Y[30:23],1'b1,Y[21:0]})) | (ZNaNE & (FMAResM[30:0] == {Z[30:23],1'b1,Z[21:0]})) | (FMAResM[30:0] == ans[30:0]))) ))) begin - $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); - if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero "); - if(~(|X[30:23]) & |X[22:0]) $display( "xdenorm "); - if(~(|Y[30:23]) & |Y[22:0]) $display( "ydenorm "); - if(~(|Z[30:23]) & |Z[22:0]) $display( "zdenorm "); - if(FMAFlgM[4] != 0) $display( "invld "); - if(FMAFlgM[2] != 0) $display( "ovrflw "); - if(FMAFlgM[1] != 0) $display( "unflw "); - if(FMAResM == 64'hFF80000000000000) $display( "FMAResM=-inf "); - if(FMAResM == 64'h7F80000000000000) $display( "FMAResM=+inf "); - if(&FMAResM[30:23] & |FMAResM[22:0] & ~FMAResM[22]) $display( "FMAResM=sigNaN "); - if(&FMAResM[30:23] & |FMAResM[22:0] & FMAResM[22] ) $display( "FMAResM=qutNaN "); - if(ans == 64'hFF80000000000000) $display( "ans=-inf "); - if(ans == 64'h7F80000000000000) $display( "ans=+inf "); - if(&ans[30:23] & |ans[22:0] & ~ans[22] ) $display( "ans=sigNaN "); - if(&ans[30:23] & |ans[22:0] & ans[22]) $display( "ans=qutNaN "); - errors = errors + 1; - if (errors == 10) - $stop; - end + if (`FPSIZES === 1 | `FPSIZES === 2) begin + if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]})) || (ZNaNE && (FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0]))))) begin + // fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w"); + // if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (FMAResM !== ans))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(XDenormE) $display( "xdenorm "); + if(YDenormE) $display( "ydenorm "); + if(ZDenormE) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf "); + if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf "); + if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN "); + if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN "); + if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf "); + if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf "); + if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN "); + if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN "); + errors = errors + 1; + //if (errors === 10) + $stop; + end + if((FmtE==1'b0)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[`LEN1-2:0] === {X[`LEN1-2:`NF1],1'b1,X[`NF1-2:0]})) || (YNaNE && (FMAResM[`LEN1-2:0] === {Y[`LEN1-2:`NF1],1'b1,Y[`NF1-2:0]})) || (ZNaNE && (FMAResM[`LEN1-2:0] === {Z[`LEN1-2:`NF1],1'b1,Z[`NF1-2:0]})) || (FMAResM[`LEN1-2:0] === ans[`LEN1-2:0]))) ))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm "); + if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm "); + if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN "); + if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN "); + if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN "); + if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN "); + errors = errors + 1; + // if (errors === 9) + $stop; + end + end else begin + + if((FmtE==2'b11) & (FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]})) || (ZNaNE && (FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0]))))) begin + // fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w"); + // if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (FMAResM !== ans))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(XDenormE) $display( "xdenorm "); + if(YDenormE) $display( "ydenorm "); + if(ZDenormE) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf "); + if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf "); + if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN "); + if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN "); + if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf "); + if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf "); + if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN "); + if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN "); + errors = errors + 1; + //if (errors === 10) + $stop; + end + if((FmtE==1'b01)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[64-2:0] === {X[64-2:52],1'b1,X[52-2:0]})) || (YNaNE && (FMAResM[64-2:0] === {Y[64-2:52],1'b1,Y[52-2:0]})) || (ZNaNE && (FMAResM[64-2:0] === {Z[64-2:52],1'b1,Z[52-2:0]})) || (FMAResM[62:0] === ans[62:0]))) ))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm "); + if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm "); + if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN "); + if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN "); + if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN "); + if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN "); + errors = errors + 1; + // if (errors === 9) + $stop; + end + if((FmtE==2'b00)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[32-2:0] === {X[32-2:23],1'b1,X[23-2:0]})) || (YNaNE && (FMAResM[32-2:0] === {Y[32-2:23],1'b1,Y[23-2:0]})) || (ZNaNE && (FMAResM[32-2:0] === {Z[32-2:23],1'b1,Z[23-2:0]})) || (FMAResM[30:0] === ans[30:0]))) ))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm "); + if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm "); + if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN "); + if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN "); + if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN "); + if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN "); + errors = errors + 1; + // if (errors === 9) + $stop; + end + if((FmtE==2'b10)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[16-2:0] === {X[16-2:10],1'b1,X[10-2:0]})) || (YNaNE && (FMAResM[16-2:0] === {Y[16-2:10],1'b1,Y[10-2:0]})) || (ZNaNE && (FMAResM[16-2:0] === {Z[16-2:10],1'b1,Z[10-2:0]})) || (FMAResM[14:0] === ans[14:0]))) ))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm "); + if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm "); + if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN "); + if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN "); + if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN "); + if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN "); + errors = errors + 1; + // if (errors === 9) + $stop; + end + end + vectornum = vectornum + 1; if (testvectors[vectornum] === 194'bx) begin $display("%d tests completed with %d errors", vectornum, errors); diff --git a/pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh b/pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh index 0741e9d6d..8620f3b03 100755 --- a/pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh +++ b/pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh @@ -1,3 +1,3 @@ -testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even -seed 113355 -level 1 > testFloat +testfloat_gen f128_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat tr -d ' ' < testFloat > testFloatNoSpace diff --git a/pipelined/src/fpu/fcmp.sv b/pipelined/src/fpu/fcmp.sv index 3b058772c..1fbcae5e2 100755 --- a/pipelined/src/fpu/fcmp.sv +++ b/pipelined/src/fpu/fcmp.sv @@ -42,6 +42,7 @@ module fcmp ( // - if negitive - no // - if positive - yes // note: LT does -0 < 0 + //*** compare Exp and Man together assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE> AlignCnt; AddendStickyE = |(ZManShifted[`NF-1:0]); @@ -356,7 +378,7 @@ endmodule module loa( //https://ieeexplore.ieee.org/abstract/document/930098 input logic [3*`NF+6:0] A, // addend input logic [2*`NF+1:0] P, // product - output logic [8:0] NormCntE // normalization shift count for the positive result + output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift count for the positive result ); logic [3*`NF+6:0] T; @@ -389,14 +411,14 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098 endmodule module lzc( - input logic [3*`NF+6:0] f, - output logic [8:0] NormCntE // normalization shift + input logic [3*`NF+6:0] f, + output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift ); - logic [8:0] i; + logic [$clog2(3*`NF+7)-1:0] i; always_comb begin i = 0; - while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned(9'd3*9'd`NF+9'd6)) i = i+1; // search for leading one + while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1; // search for leading one NormCntE = i; end endmodule @@ -410,27 +432,27 @@ endmodule module fma2( - input logic XSgnM, YSgnM, // input signs - input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents - input logic [`NF:0] XManM, YManM, ZManM, // input mantissas - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic FmtM, // precision 1 = double 0 = single - input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias - input logic AddendStickyM, // sticky bit that is calculated during alignment - input logic KillProdM, // set the product to zero before addition if the product is too small to matter - input logic XZeroM, YZeroM, ZZeroM, // inputs are zero - input logic XInfM, YInfM, ZInfM, // inputs are infinity - input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs - input logic [3*`NF+5:0] SumM, // the positive sum - input logic NegSumM, // was the sum negitive - input logic InvZM, // do you invert Z - input logic ZSgnEffM, // the modified Z sign - depends on instruction - input logic PSgnM, // the product's sign - input logic Mult, // multiply opperation - input logic [8:0] NormCntM, // the normalization shift count - output logic [`FLEN-1:0] FMAResM, // FMA final result - output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} + input logic XSgnM, YSgnM, // input signs + input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents + input logic [`NF:0] XManM, YManM, ZManM, // input mantissas + input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single + input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias + input logic AddendStickyM, // sticky bit that is calculated during alignment + input logic KillProdM, // set the product to zero before addition if the product is too small to matter + input logic XZeroM, YZeroM, ZZeroM, // inputs are zero + input logic XInfM, YInfM, ZInfM, // inputs are infinity + input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN + input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs + input logic [3*`NF+5:0] SumM, // the positive sum + input logic NegSumM, // was the sum negitive + input logic InvZM, // do you invert Z + input logic ZSgnEffM, // the modified Z sign - depends on instruction + input logic PSgnM, // the product's sign + input logic Mult, // multiply opperation + input logic [$clog2(3*`NF+7)-1:0] NormCntM, // the normalization shift count + output logic [`FLEN-1:0] FMAResM, // FMA final result + output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} @@ -548,28 +570,27 @@ endmodule module normalize( - input logic [3*`NF+5:0] SumM, // the positive sum - input logic [`NE-1:0] ZExpM, // exponent of Z - input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias - input logic [8:0] NormCntM, // normalization shift count - input logic FmtM, // precision 1 = double 0 = single - input logic KillProdM, // is the product set to zero - input logic AddendStickyM, // the sticky bit caclulated from the aligned addend - input logic NegSumM, // was the sum negitive - output logic [`NF+2:0] NormSum, // normalized sum - output logic SumZero, // is the sum zero - output logic NormSumSticky, UfSticky, // sticky bits - output logic [`NE+1:0] SumExp, // exponent of the normalized sum - output logic ResultDenorm // is the result denormalized + input logic [3*`NF+5:0] SumM, // the positive sum + input logic [`NE-1:0] ZExpM, // exponent of Z + input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias + input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count + input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single + input logic KillProdM, // is the product set to zero + input logic AddendStickyM, // the sticky bit caclulated from the aligned addend + input logic NegSumM, // was the sum negitive + output logic [`NF+2:0] NormSum, // normalized sum + output logic SumZero, // is the sum zero + output logic NormSumSticky, UfSticky, // sticky bits + output logic [`NE+1:0] SumExp, // exponent of the normalized sum + output logic ResultDenorm // is the result denormalized ); - logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results - logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later - logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction - logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction - logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias - logic PreResultDenorm; // is the result denormalized - calculated before LZA corection - logic PreResultDenorm2; // is the result denormalized - calculated before LZA corection - logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction + logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results + logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later + logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction + logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction + logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias + logic PreResultDenorm; // is the result denormalized - calculated before LZA corection + logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction /////////////////////////////////////////////////////////////////////////////// // Normalization @@ -580,14 +601,89 @@ module normalize( // calculate the sum's exponent assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); - assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; + + //convert the sum's exponent into the propper percision + if (`FPSIZES == 1) begin + assign SumExpTmp = SumExpTmpTmp; + + end else if (`FPSIZES == 2) begin + assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}}; + + end else if (`FPSIZES == 3) begin + always_comb begin + case (FmtM) + `FMT: SumExpTmp = SumExpTmpTmp; + `FMT1: SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS1)&{`NE+2{|SumExpTmpTmp}}; + `FMT2: SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS2)&{`NE+2{|SumExpTmpTmp}}; + default: SumExpTmp = `NE+2'bx; + endcase + end + + end else begin + always_comb begin + case (FmtM) + 2'h3: SumExpTmp = SumExpTmpTmp; + 2'h1: SumExpTmp = (SumExpTmpTmp-`BIAS+`D_BIAS)&{`NE+2{|SumExpTmpTmp}}; + 2'h0: SumExpTmp = (SumExpTmpTmp-`BIAS+`S_BIAS)&{`NE+2{|SumExpTmpTmp}}; + 2'h2: SumExpTmp = (SumExpTmpTmp-`BIAS+`H_BIAS)&{`NE+2{|SumExpTmpTmp}}; + endcase + end + + end - logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL; - assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; - assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd2))); - assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127); - assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd25+13'd1023-13'd127)) | ~|SumExpTmpTmp; - assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; + // determine if the result is denormalized + + if (`FPSIZES == 1) begin + logic Sum0LEZ, Sum0GEFL; + assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; + assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); + assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero; + + end else if (`FPSIZES == 2) begin + logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; + assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; + assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); + assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); + assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp; + assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero; + + end else if (`FPSIZES == 3) begin + logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; + assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; + assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); + assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); + assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp; + assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)); + assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp; + always_comb begin + case (FmtM) + `FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero; + `FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero; + `FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero; + default: PreResultDenorm = 1'bx; + endcase + end + + end else begin + logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL; + assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; + assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2)); + assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)); + assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp; + assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)); + assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp; + assign Sum3LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)); + assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp; + always_comb begin + case (FmtM) + 2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero; + 2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero; + 2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero; + 2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero; + endcase + end + + end // 010. when should be 001. // - shift left one @@ -599,45 +695,66 @@ module normalize( // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; + assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1; // Normalize the sum assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift; // LZA correction assign LZAPlus1 = SumShifted[3*`NF+7]; assign LZAPlus2 = SumShifted[3*`NF+8]; // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone - assign CorrSumShifted = LZAPlus1&~KillProdM ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; + assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; + // Calculate the sticky bit - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM); + if (`FPSIZES == 1) begin + assign NormSumSticky = |CorrSumShifted[2*`NF+2:0]; + + end else if (`FPSIZES == 2) begin + // 3*NF+5 - NF1 - 3 + assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | + (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM); + + end else if (`FPSIZES == 3) begin + assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | + (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) | + (|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2)); + + end else begin + assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | + (|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | + (|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) | + (|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2)); + + end + assign UfSticky = AddendStickyM | NormSumSticky; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm2&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}}; + assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}}; // recalculate if the result is denormalized - assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7]; + assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7]; endmodule module fmaround( - input logic FmtM, // precision 1 = double 0 = single - input logic [2:0] FrmM, // rounding mode - input logic UfSticky, // sticky bit for underlow calculation - input logic [`NF+2:0] NormSum, // normalized sum - input logic AddendStickyM, // addend's sticky bit - input logic NormSumSticky, // normalized sum's sticky bit - input logic ZZeroM, // is Z zero - input logic InvZM, // invert Z - input logic [`NE+1:0] SumExp, // exponent of the normalized sum - input logic ResultSgnTmp, // the result's sign - output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result - output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow - output logic [`NF-1:0] ResultFrac, // Result fraction - output logic [`NE-1:0] ResultExp, // Result exponent - output logic Sticky, // sticky bit - output logic [`FLEN:0] RoundAdd, // how much to add to the result - output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding + input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single + input logic [2:0] FrmM, // rounding mode + input logic UfSticky, // sticky bit for underlow calculation + input logic [`NF+2:0] NormSum, // normalized sum + input logic AddendStickyM, // addend's sticky bit + input logic NormSumSticky, // normalized sum's sticky bit + input logic ZZeroM, // is Z zero + input logic InvZM, // invert Z + input logic [`NE+1:0] SumExp, // exponent of the normalized sum + input logic ResultSgnTmp, // the result's sign + output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result + output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow + output logic [`NF-1:0] ResultFrac, // Result fraction + output logic [`NE-1:0] ResultExp, // Result exponent + output logic Sticky, // sticky bit + output logic [`FLEN:0] RoundAdd, // how much to add to the result + output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding ); logic LSBNormSum; // bit used for rounding - least significant bit of the normalized sum logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number @@ -676,18 +793,146 @@ module fmaround( // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) // 110/111 - Plus1 - // determine guard, round, and least significant bit of the result - assign Guard = FmtM ? NormSum[2] : NormSum[31]; - assign Round = FmtM ? NormSum[1] : NormSum[30]; - assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32]; + if (`FPSIZES == 1) begin + // determine guard, round, and least significant bit of the result + assign Guard = NormSum[2]; + assign Round = NormSum[1]; + assign LSBNormSum = NormSum[3]; + + // used to determine underflow flag + assign UfGuard = NormSum[1]; + assign UfRound = NormSum[0]; + assign UfLSBNormSum = NormSum[2]; + + // determine sticky + assign Sticky = UfSticky | NormSum[0]; + + end else if (`FPSIZES == 2) begin + // \/-------------NF---------------, + // | NF1 | 3 | | + // '-------NF1------^ + + // determine guard, round, and least significant bit of the result + assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; + assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1]; + assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3]; + + // used to determine underflow flag + assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1]; + assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1]; + assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; + + // determine sticky + assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]); + + end else if (`FPSIZES == 3) begin + always_comb begin + case (FmtM) + `FMT: begin + // determine guard, round, and least significant bit of the result + Guard = NormSum[2]; + Round = NormSum[1]; + LSBNormSum = NormSum[3]; + // used to determine underflow flag + UfGuard = NormSum[1]; + UfRound = NormSum[0]; + UfLSBNormSum = NormSum[2]; + // determine sticky + Sticky = UfSticky | NormSum[0]; + end + `FMT1: begin + // determine guard, round, and least significant bit of the result + Guard = NormSum[`NF-`NF1+2]; + Round = NormSum[`NF-`NF1+1]; + LSBNormSum = NormSum[`NF-`NF1+3]; + // used to determine underflow flag + UfGuard = NormSum[`NF-`NF1+1]; + UfRound = NormSum[`NF-`NF1]; + UfLSBNormSum = NormSum[`NF-`NF1+2]; + // determine sticky + Sticky = UfSticky | NormSum[`NF-`NF1]; + end + `FMT2: begin + // determine guard, round, and least significant bit of the result + Guard = NormSum[`NF-`NF2+2]; + Round = NormSum[`NF-`NF2+1]; + LSBNormSum = NormSum[`NF-`NF2+3]; + // used to determine underflow flag + UfGuard = NormSum[`NF-`NF2+1]; + UfRound = NormSum[`NF-`NF2]; + UfLSBNormSum = NormSum[`NF-`NF2+2]; + // determine sticky + Sticky = UfSticky | NormSum[`NF-`NF2]; + end + default: begin + Guard = 1'bx; + Round = 1'bx; + LSBNormSum = 1'bx; + UfGuard = 1'bx; + UfRound = 1'bx; + UfLSBNormSum = 1'bx; + Sticky = 1'bx; + end + endcase + end + + end else begin + always_comb begin + case (FmtM) + 2'h3: begin + // determine guard, round, and least significant bit of the result + Guard = NormSum[2]; + Round = NormSum[1]; + LSBNormSum = NormSum[3]; + // used to determine underflow flag + UfGuard = NormSum[1]; + UfRound = NormSum[0]; + UfLSBNormSum = NormSum[2]; + // determine sticky + Sticky = UfSticky | NormSum[0]; + end + 2'h1: begin + // determine guard, round, and least significant bit of the result + Guard = NormSum[`NF-`D_NF+2]; + Round = NormSum[`NF-`D_NF+1]; + LSBNormSum = NormSum[`NF-`D_NF+3]; + // used to determine underflow flag + UfGuard = NormSum[`NF-`D_NF+1]; + UfRound = NormSum[`NF-`D_NF]; + UfLSBNormSum = NormSum[`NF-`D_NF+2]; + // determine sticky + Sticky = UfSticky | NormSum[`NF-`D_NF]; + end + 2'h0: begin + // determine guard, round, and least significant bit of the result + Guard = NormSum[`NF-`S_NF+2]; + Round = NormSum[`NF-`S_NF+1]; + LSBNormSum = NormSum[`NF-`S_NF+3]; + // used to determine underflow flag + UfGuard = NormSum[`NF-`S_NF+1]; + UfRound = NormSum[`NF-`S_NF]; + UfLSBNormSum = NormSum[`NF-`S_NF+2]; + // determine sticky + Sticky = UfSticky | NormSum[`NF-`S_NF]; + end + 2'h2: begin + // determine guard, round, and least significant bit of the result + Guard = NormSum[`NF-`H_NF+2]; + Round = NormSum[`NF-`H_NF+1]; + LSBNormSum = NormSum[`NF-`H_NF+3]; + // used to determine underflow flag + UfGuard = NormSum[`NF-`H_NF+1]; + UfRound = NormSum[`NF-`H_NF]; + UfLSBNormSum = NormSum[`NF-`H_NF+2]; + // determine sticky + Sticky = UfSticky | NormSum[`NF-`H_NF]; + end + endcase + end + + end - // used to determine underflow flag - assign UfGuard = FmtM ? NormSum[1] : NormSum[30]; - assign UfRound = FmtM ? NormSum[0] : NormSum[29]; - assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31]; - // determine sticky - assign Sticky = UfSticky | NormSum[0]; // Deterimine if a small number was supposed to be subtrated assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here @@ -729,10 +974,40 @@ module fmaround( assign Minus1 = CalcMinus1 & (Sticky | Guard | Round); // Compute rounded result - assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} : - Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0}; - assign NormSumTruncated = {NormSum[`NF+2:32], NormSum[31:3]&{29{FmtM}}}; + if (`FPSIZES == 1) begin + assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1}; + end else if (`FPSIZES == 2) begin + // \/FLEN+1 + // | NE+2 | NF | + // '-NE+2-^----NF1----^ + // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 + assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} : + Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)}; + + end else if (`FPSIZES == 3) begin + always_comb begin + case (FmtM) + `FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1}; + `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)}; + `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)}; + default: RoundAdd = (`FLEN+1)'(0); + endcase + end + + end else begin + always_comb begin + case (FmtM) + 2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1}; + 2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)}; + 2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)}; + 2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)}; + endcase + end + + end + + assign NormSumTruncated = NormSum[`NF+2:3]; assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; assign ResultExp = FullResultExp[`NE-1:0]; @@ -748,7 +1023,7 @@ module fmaflags( input logic [`NE+1:0] SumExp, // exponent of the normalized sum input logic ZSgnEffM, PSgnM, // the product and modified Z signs input logic Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding - input logic FmtM, // precision 1 = double 0 = single + input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single output logic Invalid, Overflow, Underflow, // flags used to select the result output logic [4:0] FMAFlgM // FMA flags ); @@ -771,8 +1046,34 @@ module fmaflags( assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); // Set Overflow flag if the number is too big to be represented - // - Don't set the overflow flag if an overflowed result isn't outputed - assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[7:0] | FullResultExp[8]; + // - Don't set the overflow flag if an overflowed result isn't outputed + if (`FPSIZES == 1) begin + assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE]; + + end else if (`FPSIZES == 2) begin + assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1]; + + end else if (`FPSIZES == 3) begin + always_comb begin + case (FmtM) + `FMT: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE]; + `FMT1: GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1]; + `FMT2: GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2]; + default: GtMaxExp = 1'bx; + endcase + end + + end else begin + always_comb begin + case (FmtM) + 2'h3: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE]; + 2'h1: GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE]; + 2'h0: GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE]; + 2'h2: GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE]; + endcase + end + + end assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Set Underflow flag if the number is too small to be represented in normal numbers @@ -793,57 +1094,227 @@ endmodule module resultselect( - input logic XSgnM, YSgnM, // input signs - input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents - input logic [`NF:0] XManM, YManM, ZManM, // input mantissas - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic FmtM, // precision 1 = double 0 = single - input logic AddendStickyM, // sticky bit that is calculated during alignment - input logic KillProdM, // set the product to zero before addition if the product is too small to matter - input logic XInfM, YInfM, ZInfM, // inputs are infinity - input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - input logic ZSgnEffM, // the modified Z sign - depends on instruction - input logic PSgnM, // the product's sign - input logic ResultSgn, // the result's sign - input logic CalcPlus1, // rounding bits - input logic [`FLEN:0] RoundAdd, // how much to add to the result - input logic Invalid, Overflow, Underflow, // flags - input logic ResultDenorm, // is the result denormalized - input logic [`NE-1:0] ResultExp, // Result exponent - input logic [`NF-1:0] ResultFrac, // Result fraction - output logic [`FLEN-1:0] FMAResM // FMA final result + input logic XSgnM, YSgnM, // input signs + input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents + input logic [`NF:0] XManM, YManM, ZManM, // input mantissas + input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single + input logic AddendStickyM, // sticky bit that is calculated during alignment + input logic KillProdM, // set the product to zero before addition if the product is too small to matter + input logic XInfM, YInfM, ZInfM, // inputs are infinity + input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN + input logic ZSgnEffM, // the modified Z sign - depends on instruction + input logic PSgnM, // the product's sign + input logic ResultSgn, // the result's sign + input logic CalcPlus1, // rounding bits + input logic [`FLEN:0] RoundAdd, // how much to add to the result + input logic Invalid, Overflow, Underflow, // flags + input logic ResultDenorm, // is the result denormalized + input logic [`NE-1:0] ResultExp, // Result exponent + input logic [`NF-1:0] ResultFrac, // Result fraction + output logic [`FLEN-1:0] FMAResM // FMA final result ); - logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results + logic InfSgn; + logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results + assign InfSgn = ZInfM ? ZSgnEffM : PSgnM; + if (`FPSIZES == 1) begin + if(`IEEE754) begin + assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; + assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; + assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; + assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : + {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}}; + assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; + assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))}; + assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)}; + assign NormResult = {ResultSgn, ResultExp, ResultFrac}; + + end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? + if(`IEEE754) begin + assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]}; + assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]}; + assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]}; + assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end + + assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : + {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : + ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : + {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; + assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; + assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)}; + assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]}; + + end else if (`FPSIZES == 3) begin + always_comb begin + case (FmtM) + `FMT: begin + if(`IEEE754) begin + XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; + YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; + ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; + InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : + {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}}; + KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; + UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))}; + InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)}; + NormResult = {ResultSgn, ResultExp, ResultFrac}; + end + `FMT1: begin + if(`IEEE754) begin + XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]}; + YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]}; + ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]}; + InvalidResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end + OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : + {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)}; + KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; + UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; + InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)}; + NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]}; + end + `FMT2: begin + if(`IEEE754) begin + XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]}; + YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]}; + ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]}; + InvalidResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end else begin + XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end + + OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : + {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)}; + KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})}; + UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; + InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)}; + NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]}; + end + default: begin + if(`IEEE754) begin + XNaNResult = (`FLEN)'(0); + YNaNResult = (`FLEN)'(0); + ZNaNResult = (`FLEN)'(0); + InvalidResult = (`FLEN)'(0); + end else begin + XNaNResult = (`FLEN)'(0); + end + OverflowResult = (`FLEN)'(0); + KillProdResult = (`FLEN)'(0); + UnderflowResult = (`FLEN)'(0); + InfResult = (`FLEN)'(0); + NormResult = (`FLEN)'(0); + end + endcase + end + + end else begin + always_comb begin + case (FmtM) + 2'h3: begin + if(`IEEE754) begin + XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]}; + YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]}; + ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]}; + InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : + {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}}; + KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; + UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))}; + InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)}; + NormResult = {ResultSgn, ResultExp, ResultFrac}; + end + 2'h1: begin + if(`IEEE754) begin + XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]}; + YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]}; + ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]}; + InvalidResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end else begin + XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end + OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : + {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; + KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:0], ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})}; + UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; + InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; + NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]}; + end + 2'h0: begin + if(`IEEE754) begin + XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]}; + YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]}; + ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]}; + InvalidResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end else begin + XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end + + OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : + {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; + KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})}; + UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; + InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; + NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]}; + end + 2'h2: begin + if(`IEEE754) begin + XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]}; + YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]}; + ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]}; + InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end else begin + XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end + + OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : + {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; + + KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:0], ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})}; + UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; + InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; + NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]}; + end + endcase + end - if(`IEEE754) begin - assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]}; - assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]}; - assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]}; - assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; - end else begin - assign XNaNResult = FmtM ? {1'b0, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, XExpM[7:0], 1'b1, 22'b0}; - assign YNaNResult = FmtM ? {1'b0, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, YExpM[7:0], 1'b1, 22'b0}; - assign ZNaNResult = FmtM ? {1'b0, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, ZExpM[7:0], 1'b1, 22'b0}; - assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, 1'b0, 8'hff, 1'b1, 22'b0}; end - - assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : - {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : - ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} : - {{32{1'b1}}, ResultSgn, 8'hff, 23'b0}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} + (RoundAdd[59:29]&{31{AddendStickyM}})}; - assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; - assign FMAResM = XNaNM ? XNaNResult : - YNaNM ? YNaNResult : - ZNaNM ? ZNaNResult : - Invalid ? InvalidResult : - XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM, XExpM[7:0], XManM[51:29]} : - YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM, YExpM[7:0], YManM[51:29]} : - ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} : - KillProdM ? KillProdResult : - Overflow ? OverflowResult : - Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult : - FmtM ? {ResultSgn, ResultExp, ResultFrac} : - {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]}; + if(`IEEE754) begin + assign FMAResM = XNaNM ? XNaNResult : + YNaNM ? YNaNResult : + ZNaNM ? ZNaNResult : + Invalid ? InvalidResult : + XInfM|YInfM|ZInfM ? InfResult : + KillProdM ? KillProdResult : + Overflow ? OverflowResult : + Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult : + NormResult; + end else begin + assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult : + XInfM|YInfM|ZInfM ? InfResult : + KillProdM ? KillProdResult : + Overflow ? OverflowResult : + Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult : + NormResult; + end endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 0dd6ea1b2..2ffcb1264 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -89,7 +89,6 @@ module fpu ( logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic [10:0] BiasE; // bias based on precision (single=7f double=3ff) logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage logic XNaNQ, YNaNQ; // is the input a NaN - divide @@ -176,10 +175,10 @@ module fpu ( // unpack unit // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, + unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); + .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); // FMA // - two stage FMA @@ -231,7 +230,7 @@ module fpu ( .XSNaNE, .ClassResE); // Convert - fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE, + fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE, .CvtResE, .CvtFlgE); // data to be stored in memory - to IEU diff --git a/pipelined/src/fpu/fpudivsqrtrecur.sv b/pipelined/src/fpu/fpudivsqrtrecur.sv index fd47d2d87..62a441367 100644 --- a/pipelined/src/fpu/fpudivsqrtrecur.sv +++ b/pipelined/src/fpu/fpudivsqrtrecur.sv @@ -64,8 +64,8 @@ module fpudivsqrtrecur ( always_comb begin if (FSqrtE & XSgnE | FDivE & XZeroE & YZeroE | XNaNE | FDivE & YNaNE) FDivSqrtResM = 0; // ***replace with NAN; // *** which one - else if (FDivE & YZeroE | XInfE) FDivSqrtResM = {FDivSqrtResSgn, `NE'b1, `NF'b0}; // infinity - else if (FDivE & YInfE) FDivSqrtResM = {FDivSqrtResSgn, `NE'b0, `NF'b0}; // zero + else if (FDivE & YZeroE | XInfE) FDivSqrtResM = {FDivSqrtResSgn, (`NE)'(1), (`NF)'(0)}; // infinity + else if (FDivE & YInfE) FDivSqrtResM = {FDivSqrtResSgn, (`NE)'(0), (`NF)'(0)}; // zero else FDivSqrtResM = FDivSqrtRecurRes; end diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv new file mode 100644 index 000000000..3041cd72f --- /dev/null +++ b/pipelined/src/fpu/unpack.sv @@ -0,0 +1,473 @@ +`include "wally-config.vh" + +module unpack ( + input logic [`FLEN-1:0] X, Y, Z, // inputs from register file + input logic [`FPSIZES/3:0] FmtE, // format signal 00 - single 10 - double 11 - quad 10 - half + output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ + output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) + output logic XNormE, // is X a normalized number + output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN + output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN + output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized + output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero + output logic XInfE, YInfE, ZInfE, // is XYZ infinity + output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) +); + + logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ + logic XExpNonzero, YExpNonzero, ZExpNonzero; // is the exponent of XYZ non-zero + logic XFracZero, YFracZero, ZFracZero; // is the fraction zero + logic XExpZero, YExpZero, ZExpZero; // is the exponent zero + logic YExpMaxE, ZExpMaxE; // is the exponent all 1s + + if (`FPSIZES == 1) begin // if there is only one floating point format supported + + // sign bit + assign XSgnE = X[`FLEN-1]; + assign YSgnE = Y[`FLEN-1]; + assign ZSgnE = Z[`FLEN-1]; + + // exponent + assign XExpE = X[`FLEN-2:`NF]; + assign YExpE = Y[`FLEN-2:`NF]; + assign ZExpE = Z[`FLEN-2:`NF]; + + // fraction (no assumed 1) + assign XFracE = X[`NF-1:0]; + assign YFracE = Y[`NF-1:0]; + assign ZFracE = Z[`NF-1:0]; + + // is the exponent non-zero + assign XExpNonzero = |XExpE; + assign YExpNonzero = |YExpE; + assign ZExpNonzero = |ZExpE; + + // is the exponent all 1's + assign XExpMaxE = &XExpE; + assign YExpMaxE = &YExpE; + assign ZExpMaxE = &ZExpE; + + + end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported + + //***need better names for these constants + // largest format | smaller format + //---------------------------------- + // `FLEN | `LEN1 length of floating point number + // `NE | `NE1 length of exponent + // `NF | `NF1 length of fraction + // `BIAS | `BIAS1 exponent's bias value + // `FMT | `FMT1 precision's format value - Q=11 D=01 S=00 H=10 + + // Possible combinantions specified by spec: + // double and single + // single and half + + // Not needed but can also handle: + // quad and double + // quad and single + // quad and half + // double and half + + logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN + assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; + assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; + assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; + + // choose sign bit depending on format - 1=larger precsion 0=smaller precision + assign XSgnE = FmtE ? X[`FLEN-1] : XLen1[`LEN1-1]; + assign YSgnE = FmtE ? Y[`FLEN-1] : YLen1[`LEN1-1]; + assign ZSgnE = FmtE ? Z[`FLEN-1] : ZLen1[`LEN1-1]; + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/denorm/inf/NaN values + + // extract the exponent, converting the smaller exponent into the larger precision if nessisary + assign XExpE = FmtE ? X[`FLEN-2:`NF] : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; + assign YExpE = FmtE ? Y[`FLEN-2:`NF] : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; + assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; + + // extract the fraction, add trailing zeroes to the mantissa if nessisary + assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)}; + assign YFracE = FmtE ? Y[`NF-1:0] : {YLen1[`NF1-1:0], (`NF-`NF1)'(0)}; + assign ZFracE = FmtE ? Z[`NF-1:0] : {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)}; + + // is the exponent non-zero + assign XExpNonzero = FmtE ? |X[`FLEN-2:`NF] : |XLen1[`LEN1-2:`NF1]; + assign YExpNonzero = FmtE ? |Y[`FLEN-2:`NF] : |YLen1[`LEN1-2:`NF1]; + assign ZExpNonzero = FmtE ? |Z[`FLEN-2:`NF] : |ZLen1[`LEN1-2:`NF1]; + + // is the exponent all 1's + assign XExpMaxE = FmtE ? &X[`FLEN-2:`NF] : &XLen1[`LEN1-2:`NF1]; + assign YExpMaxE = FmtE ? &Y[`FLEN-2:`NF] : &YLen1[`LEN1-2:`NF1]; + assign ZExpMaxE = FmtE ? &Z[`FLEN-2:`NF] : &ZLen1[`LEN1-2:`NF1]; + + + end else if (`FPSIZES == 3) begin // three floating point precsions supported + + //***need better names for these constants + // largest format | larger format | smallest format + //--------------------------------------------------- + // `FLEN | `LEN1 | `LEN2 length of floating point number + // `NE | `NE1 | `NE2 length of exponent + // `NF | `NF1 | `NF2 length of fraction + // `BIAS | `BIAS1 | `BIAS2 exponent's bias value + // `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 S=00 H=10 + + // Possible combinantions specified by spec: + // quad and double and single + // double and single and half + + // Not needed but can also handle: + // quad and double and half + // quad and single and half + + logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision + logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision + assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; + assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; + assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for smaller precision + assign XLen2 = &X[`FLEN-1:`LEN2] ? X[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; + assign YLen2 = &Y[`FLEN-1:`LEN2] ? Y[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; + assign ZLen2 = &Z[`FLEN-1:`LEN2] ? Z[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; + + always_comb begin + case (FmtE) + `FMT: begin // if input is largest precision (`FLEN - ie quad or double) + // extract the sign bit + XSgnE = X[`FLEN-1]; + YSgnE = Y[`FLEN-1]; + ZSgnE = Z[`FLEN-1]; + + // extract the exponent + XExpE = X[`FLEN-2:`NF]; + YExpE = Y[`FLEN-2:`NF]; + ZExpE = Z[`FLEN-2:`NF]; + + // extract the fraction + XFracE = X[`NF-1:0]; + YFracE = Y[`NF-1:0]; + ZFracE = Z[`NF-1:0]; + + // is the exponent non-zero + XExpNonzero = |X[`FLEN-2:`NF]; + YExpNonzero = |Y[`FLEN-2:`NF]; + ZExpNonzero = |Z[`FLEN-2:`NF]; + + // is the exponent all 1's + XExpMaxE = &X[`FLEN-2:`NF]; + YExpMaxE = &Y[`FLEN-2:`NF]; + ZExpMaxE = &Z[`FLEN-2:`NF]; + end + `FMT1: begin // if input is larger precsion (`LEN1 - double or single) + + // extract the sign bit + XSgnE = XLen1[`LEN1-1]; + YSgnE = YLen1[`LEN1-1]; + ZSgnE = ZLen1[`LEN1-1]; + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/denorm/inf/NaN values + + // convert the larger precision's exponent to use the largest precision's bias + XExpE = {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; + YExpE = {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; + ZExpE = {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; + + // extract the fraction and add the nessesary trailing zeros + XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)}; + YFracE = {YLen1[`NF1-1:0], (`NF-`NF1)'(0)}; + ZFracE = {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)}; + + // is the exponent non-zero + XExpNonzero = |XLen1[`LEN1-2:`NF1]; + YExpNonzero = |YLen1[`LEN1-2:`NF1]; + ZExpNonzero = |ZLen1[`LEN1-2:`NF1]; + + // is the exponent all 1's + XExpMaxE = &XLen1[`LEN1-2:`NF1]; + YExpMaxE = &YLen1[`LEN1-2:`NF1]; + ZExpMaxE = &ZLen1[`LEN1-2:`NF1]; + end + `FMT2: begin // if input is smallest precsion (`LEN2 - single or half) + + // exctract the sign bit + XSgnE = XLen2[`LEN2-1]; + YSgnE = YLen2[`LEN2-1]; + ZSgnE = ZLen2[`LEN2-1]; + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/denorm/inf/NaN values + + // convert the smallest precision's exponent to use the largest precision's bias + XExpE = {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]}; + YExpE = {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]}; + ZExpE = {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]}; + + // extract the fraction and add the nessesary trailing zeros + XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)}; + YFracE = {YLen2[`NF2-1:0], (`NF-`NF2)'(0)}; + ZFracE = {ZLen2[`NF2-1:0], (`NF-`NF2)'(0)}; + + // is the exponent non-zero + XExpNonzero = |XLen2[`LEN2-2:`NF2]; + YExpNonzero = |YLen2[`LEN2-2:`NF2]; + ZExpNonzero = |ZLen2[`LEN2-2:`NF2]; + + // is the exponent all 1's + XExpMaxE = &XLen2[`LEN2-2:`NF2]; + YExpMaxE = &YLen2[`LEN2-2:`NF2]; + ZExpMaxE = &ZLen2[`LEN2-2:`NF2]; + end + default: begin + XSgnE = 0; + YSgnE = 0; + ZSgnE = 0; + XExpE = 0; + YExpE = 0; + ZExpE = 0; + XFracE = 0; + YFracE = 0; + ZFracE = 0; + XExpNonzero = 0; + YExpNonzero = 0; + ZExpNonzero = 0; + XExpMaxE = 0; + YExpMaxE = 0; + ZExpMaxE = 0; + end + endcase + end + + end else begin // if all precsisons are supported - quad, double, single, and half + + // quad | double | single | half + //------------------------------------------------------------------- + // `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number + // `Q_NE | `D_NE | `S_NE | `H_NE length of exponent + // `Q_NF | `D_NF | `S_NF | `H_NF length of fraction + // `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value + // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 S=00 H=10 + + + logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision + logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision + logic [`LEN2-1:0] XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision + assign XLen1 = &X[`Q_LEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; + assign YLen1 = &Y[`Q_LEN-1:`D_LEN] ? Y[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; + assign ZLen1 = &Z[`Q_LEN-1:`D_LEN] ? Z[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for single precision + assign XLen2 = &X[`Q_LEN-1:`S_LEN] ? X[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; + assign YLen2 = &Y[`Q_LEN-1:`S_LEN] ? Y[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; + assign ZLen2 = &Z[`Q_LEN-1:`S_LEN] ? Z[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for half precision + assign XLen3 = &X[`Q_LEN-1:`H_LEN] ? X[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; + assign YLen3 = &Y[`Q_LEN-1:`H_LEN] ? Y[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; + assign ZLen3 = &Z[`Q_LEN-1:`H_LEN] ? Z[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; + + always_comb begin + case (FmtE) + 2'b11: begin // if input is quad percision + // extract sign bit + XSgnE = X[`Q_LEN-1]; + YSgnE = Y[`Q_LEN-1]; + ZSgnE = Z[`Q_LEN-1]; + + // extract the exponent + XExpE = X[`Q_LEN-2:`Q_NF]; + YExpE = Y[`Q_LEN-2:`Q_NF]; + ZExpE = Z[`Q_LEN-2:`Q_NF]; + + // extract the fraction + XFracE = X[`Q_NF-1:0]; + YFracE = Y[`Q_NF-1:0]; + ZFracE = Z[`Q_NF-1:0]; + + // is the exponent non-zero + XExpNonzero = |X[`Q_LEN-2:`Q_NF]; + YExpNonzero = |Y[`Q_LEN-2:`Q_NF]; + ZExpNonzero = |Z[`Q_LEN-2:`Q_NF]; + + // is the exponent all 1's + XExpMaxE = &X[`Q_LEN-2:`Q_NF]; + YExpMaxE = &Y[`Q_LEN-2:`Q_NF]; + ZExpMaxE = &Z[`Q_LEN-2:`Q_NF]; + end + 2'b01: begin // if input is double percision + // extract sign bit + XSgnE = XLen1[`D_LEN-1]; + YSgnE = YLen1[`D_LEN-1]; + ZSgnE = ZLen1[`D_LEN-1]; + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/denorm/inf/NaN values + + // convert the double precsion exponent into quad precsion + XExpE = {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]}; + YExpE = {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]}; + ZExpE = {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]}; + + // extract the fraction and add the nessesary trailing zeros + XFracE = {XLen1[`D_NE-1:0], (`Q_NF-`D_NE)'(0)}; + YFracE = {YLen1[`D_NE-1:0], (`Q_NF-`D_NE)'(0)}; + ZFracE = {ZLen1[`D_NE-1:0], (`Q_NF-`D_NE)'(0)}; + + // is the exponent non-zero + XExpNonzero = |XLen1[`D_LEN-2:`D_NE]; + YExpNonzero = |YLen1[`D_LEN-2:`D_NE]; + ZExpNonzero = |ZLen1[`D_LEN-2:`D_NE]; + + // is the exponent all 1's + XExpMaxE = &XLen1[`D_LEN-2:`D_NE]; + YExpMaxE = &YLen1[`D_LEN-2:`D_NE]; + ZExpMaxE = &ZLen1[`D_LEN-2:`D_NE]; + end + 2'b00: begin // if input is single percision + // extract sign bit + XSgnE = XLen2[`S_LEN-1]; + YSgnE = YLen2[`S_LEN-1]; + ZSgnE = ZLen2[`S_LEN-1]; + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/denorm/inf/NaN values + + // convert the single precsion exponent into quad precsion + XExpE = {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]}; + YExpE = {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]}; + ZExpE = {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]}; + + // extract the fraction and add the nessesary trailing zeros + XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; + YFracE = {YLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; + ZFracE = {ZLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; + + // is the exponent non-zero + XExpNonzero = |XLen2[`S_LEN-2:`S_NF]; + YExpNonzero = |YLen2[`S_LEN-2:`S_NF]; + ZExpNonzero = |ZLen2[`S_LEN-2:`S_NF]; + + // is the exponent all 1's + XExpMaxE = &XLen2[`S_LEN-2:`S_NF]; + YExpMaxE = &YLen2[`S_LEN-2:`S_NF]; + ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF]; + end + 2'b10: begin // if input is half percision + // extract sign bit + XSgnE = XLen3[`H_LEN-1]; + YSgnE = YLen3[`H_LEN-1]; + ZSgnE = ZLen3[`H_LEN-1]; + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/denorm/inf/NaN values + + // convert the half precsion exponent into quad precsion + XExpE = {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]}; + YExpE = {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]}; + ZExpE = {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]}; + + // extract the fraction and add the nessesary trailing zeros + XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; + YFracE = {YLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; + ZFracE = {ZLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; + + // is the exponent non-zero + XExpNonzero = |XLen3[`H_LEN-2:`H_NF]; + YExpNonzero = |YLen3[`H_LEN-2:`H_NF]; + ZExpNonzero = |ZLen3[`H_LEN-2:`H_NF]; + + // is the exponent all 1's + XExpMaxE = &XLen3[`H_LEN-2:`H_NF]; + YExpMaxE = &YLen3[`H_LEN-2:`H_NF]; + ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF]; + end + endcase + end + + end + + // is the exponent all 0's + assign XExpZero = ~XExpNonzero; + assign YExpZero = ~YExpNonzero; + assign ZExpZero = ~ZExpNonzero; + + // is the fraction zero + assign XFracZero = ~|XFracE; + assign YFracZero = ~|YFracE; + assign ZFracZero = ~|ZFracE; + + // add the assumed one (or zero if denormal or zero) to create the mantissa + assign XManE = {XExpNonzero, XFracE}; + assign YManE = {YExpNonzero, YFracE}; + assign ZManE = {ZExpNonzero, ZFracE}; + + // is X normalized + assign XNormE = ~(XExpMaxE|XExpZero); + + // is the input a NaN + // - force to be a NaN if it isn't properly Nan Boxed + assign XNaNE = XExpMaxE & ~XFracZero; + assign YNaNE = YExpMaxE & ~YFracZero; + assign ZNaNE = ZExpMaxE & ~ZFracZero; + + // is the input a singnaling NaN + assign XSNaNE = XNaNE&~XFracE[`NF-1]; + assign YSNaNE = YNaNE&~YFracE[`NF-1]; + assign ZSNaNE = ZNaNE&~ZFracE[`NF-1]; + + // is the input denormalized + assign XDenormE = XExpZero & ~XFracZero; + assign YDenormE = YExpZero & ~YFracZero; + assign ZDenormE = ZExpZero & ~ZFracZero; + + // is the input infinity + assign XInfE = XExpMaxE & XFracZero; + assign YInfE = YExpMaxE & YFracZero; + assign ZInfE = ZExpMaxE & ZFracZero; + + // is the input zero + assign XZeroE = XExpZero & XFracZero; + assign YZeroE = YExpZero & YFracZero; + assign ZZeroE = ZExpZero & ZFracZero; + +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/unpacking.sv b/pipelined/src/fpu/unpacking.sv deleted file mode 100644 index f503e47be..000000000 --- a/pipelined/src/fpu/unpacking.sv +++ /dev/null @@ -1,95 +0,0 @@ -`include "wally-config.vh" - -module unpack ( - input logic [63:0] X, Y, Z, - input logic FmtE, - input logic [2:0] FOpCtrlE, - output logic XSgnE, YSgnE, ZSgnE, - output logic [10:0] XExpE, YExpE, ZExpE, - output logic [52:0] XManE, YManE, ZManE, - output logic XNormE, - output logic XNaNE, YNaNE, ZNaNE, - output logic XSNaNE, YSNaNE, ZSNaNE, - output logic XDenormE, YDenormE, ZDenormE, - output logic XZeroE, YZeroE, ZZeroE, - output logic [10:0] BiasE, - output logic XInfE, YInfE, ZInfE, - output logic XExpMaxE -); - - logic [51:0] XFracE, YFracE, ZFracE; - logic XExpNonzero, YExpNonzero, ZExpNonzero; - logic XFracZero, YFracZero, ZFracZero; // input fraction zero - logic XExpZero, YExpZero, ZExpZero; // input exponent zero - logic YExpMaxE, ZExpMaxE; // input exponent all 1s - logic [31:0] XFloat, YFloat, ZFloat; // Bottom half or NaN, if RV64 and not properly NaN boxed - - // Determine if number is NaN as double precision to check single precision NaN boxing - if (`F_SUPPORTED & ~`D_SUPPORTED) begin // eventually this should change to FLEN when FLEN isn't hardwared to 64 - assign XFloat = X[31:0]; - assign YFloat = Y[31:0]; - assign ZFloat = Z[31:0]; - end else begin - assign XFloat = &X[`FLEN-1:32] ? X[31:0] : 32'h7fc00000; - assign YFloat = &Y[`FLEN-1:32] ? Y[31:0] : 32'h7fc00000; - assign ZFloat = &Z[`FLEN-1:32] ? Z[31:0] : 32'h7fc00000; - end - - assign XSgnE = FmtE ? X[63] : XFloat[31]; - assign YSgnE = FmtE ? Y[63] : YFloat[31]; - assign ZSgnE = FmtE ? Z[63] : ZFloat[31]; - - assign XExpE = FmtE ? X[62:52] : {XFloat[30], {3{~XFloat[30]&~XExpZero|XExpMaxE}}, XFloat[29:23]}; - assign YExpE = FmtE ? Y[62:52] : {YFloat[30], {3{~YFloat[30]&~YExpZero|YExpMaxE}}, YFloat[29:23]}; - assign ZExpE = FmtE ? Z[62:52] : {ZFloat[30], {3{~ZFloat[30]&~ZExpZero|ZExpMaxE}}, ZFloat[29:23]}; - - assign XFracE = FmtE ? X[51:0] : {XFloat[22:0], 29'b0}; - assign YFracE = FmtE ? Y[51:0] : {YFloat[22:0], 29'b0}; - assign ZFracE = FmtE ? Z[51:0] : {ZFloat[22:0], 29'b0}; - - assign XExpNonzero = FmtE ? |X[62:52] : |XFloat[30:23]; - assign YExpNonzero = FmtE ? |Y[62:52] : |YFloat[30:23]; - assign ZExpNonzero = FmtE ? |Z[62:52] : |ZFloat[30:23]; - - assign XExpZero = ~XExpNonzero; - assign YExpZero = ~YExpNonzero; - assign ZExpZero = ~ZExpNonzero; - - assign XFracZero = ~|XFracE; - assign YFracZero = ~|YFracE; - assign ZFracZero = ~|ZFracE; - - assign XManE = {XExpNonzero, XFracE}; - assign YManE = {YExpNonzero, YFracE}; - assign ZManE = {ZExpNonzero, ZFracE}; - - assign XExpMaxE = FmtE ? &X[62:52] : &XFloat[30:23]; - assign YExpMaxE = FmtE ? &Y[62:52] : &YFloat[30:23]; - assign ZExpMaxE = FmtE ? &Z[62:52] : &ZFloat[30:23]; - - assign XNormE = ~(XExpMaxE|XExpZero); - - // force single precision input to be a NaN if it isn't properly Nan Boxed - assign XNaNE = XExpMaxE & ~XFracZero; - assign YNaNE = YExpMaxE & ~YFracZero; - assign ZNaNE = ZExpMaxE & ~ZFracZero; - - assign XSNaNE = XNaNE&~XFracE[51]; - assign YSNaNE = YNaNE&~YFracE[51]; - assign ZSNaNE = ZNaNE&~ZFracE[51]; - - assign XDenormE = XExpZero & ~XFracZero; - assign YDenormE = YExpZero & ~YFracZero; - assign ZDenormE = ZExpZero & ~ZFracZero; - - assign XInfE = XExpMaxE & XFracZero; - assign YInfE = YExpMaxE & YFracZero; - assign ZInfE = ZExpMaxE & ZFracZero; - - assign XZeroE = XExpZero & XFracZero; - assign YZeroE = YExpZero & YFracZero; - assign ZZeroE = ZExpZero & ZFracZero; - - assign BiasE = 11'h3ff; // always use 1023 because exponents are unpacked to double precision - -endmodule \ No newline at end of file diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index aa617db68..5f1c31ea9 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -161,7 +161,7 @@ module lsu ( .Cacheable(CacheableM), .Idempotent(), .AtomicAllowed(), .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAmoAccessFaultM, .InstrPageFaultF(),.LoadPageFaultM, .StoreAmoPageFaultM, - .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, + .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, // *** these faults need to be supressed during hptw. .DAPageFault(DataDAPageFaultM), // *** should use LSURWM as this is includes the lr/sc squash. However this introduces a combo loop // from squash, depends on LSUPAdrM, depends on TLBHit, depends on these *AccessM inputs. diff --git a/pipelined/src/privileged/csri.sv b/pipelined/src/privileged/csri.sv index 974b3616f..8a3f42fdf 100644 --- a/pipelined/src/privileged/csri.sv +++ b/pipelined/src/privileged/csri.sv @@ -95,21 +95,20 @@ module csri #(parameter // else if (WriteUIEM) IE_REGW = (CSRWriteValM & 12'h111) | (IE_REGW & 12'hAAA); // only U field // restricted views of registers - always_comb begin:regs - // Add MEIP read-only signal - IP_REGW = {IntInM[11],1'b0,IP_REGW_writeable}; + // Add MEIP read-only signal + assign IP_REGW = {IntInM[11],1'b0,IP_REGW_writeable}; // Machine Mode - MIP_REGW = IP_REGW; - MIE_REGW = IE_REGW; + assign MIP_REGW = IP_REGW; + assign MIE_REGW = IE_REGW; - // Supervisor mode - if (`S_SUPPORTED) begin - SIP_REGW = IP_REGW & MIDELEG_REGW[11:0] & 'h222; // only delegated interrupts visible - SIE_REGW = IE_REGW & MIDELEG_REGW[11:0] & 'h222; - end else begin - SIP_REGW = 12'b0; - SIE_REGW = 12'b0; - end + // Supervisor mode + if (`S_SUPPORTED) begin + assign SIP_REGW = IP_REGW & MIDELEG_REGW[11:0] & 'h222; // only delegated interrupts visible + assign SIE_REGW = IE_REGW & MIDELEG_REGW[11:0] & 'h222; + end else begin + assign SIP_REGW = 12'b0; + assign SIE_REGW = 12'b0; end + endmodule diff --git a/pipelined/testbench/fp/tests/fma-testbench.sv b/pipelined/testbench/fp/tests/fma-testbench.sv new file mode 100644 index 000000000..6ce50387d --- /dev/null +++ b/pipelined/testbench/fp/tests/fma-testbench.sv @@ -0,0 +1,279 @@ + +`include "wally-config.vh" +`define PATH "../../../../tests/fp/vectors/" + +string tests[] = '{ + "f16_mulAdd_rne.tv", + "f16_mulAdd_rz.tv", + "f16_mulAdd_ru.tv", + "f16_mulAdd_rd.tv", + "f16_mulAdd_rnm.tv", + "f32_mulAdd_rne.tv", + "f32_mulAdd_rz.tv", + "f32_mulAdd_ru.tv", + "f32_mulAdd_rd.tv", + "f32_mulAdd_rnm.tv", + "f64_mulAdd_rne.tv", + "f64_mulAdd_rz.tv", + "f64_mulAdd_ru.tv", + "f64_mulAdd_rd.tv", + "f64_mulAdd_rnm.tv", + "f128_mulAdd_rne.tv", + "f128_mulAdd_rz.tv", + "f128_mulAdd_ru.tv", + "f128_mulAdd_rd.tv", + "f128_mulAdd_rnm.tv" +}; + +// steps to run FMA tests +// 1) create test vectors in riscv-wally/tests/fp with: ./run-all.sh +// 2) go to riscv-wally/pipelined/testbench/fp/tests +// 3) run ./sim-wally-batch + +module fmatestbench(); + + logic clk; + logic [31:0] errors=0; + logic [31:0] vectornum=0; + logic [`FLEN*4+7+4+4:0] testvectors[6133248:0]; + int i = `ZFH_SUPPORTED ? 0 : `F_SUPPORTED ? 5 : `D_SUPPORTED ? 10 : 15; // set i to the first test that is run + + logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat + logic [`FLEN-1:0] ans; // result from TestFloat + logic [7:0] flags; // flags read form testfloat + logic [2:0] FrmE; // rounding mode + logic [`FPSIZES/3:0] FmtE; // format - 10 = half, 00 = single, 01 = double, 11 = quad + logic [3:0] FrmRead; // rounding mode read from testfloat + logic [3:0] FmtRead; // format read from testfloat + logic [`FLEN-1:0] FMAResM; // FMA's outputed result + logic [4:0] FMAFlgM; // FMA's outputed flags + logic [2:0] FOpCtrlE; // which opperation + logic wnan; // is the outputed result NaN + logic ansnan; // is the correct answer NaN + + // signals needed to connect modules + logic [`NE+1:0] ProdExpE; + logic AddendStickyE; + logic KillProdE; + logic XSgnE, YSgnE, ZSgnE; + logic [`NE-1:0] XExpE, YExpE, ZExpE; + logic [`NF:0] XManE, YManE, ZManE; + logic XNormE; + logic XExpMaxE; + logic XNaNE, YNaNE, ZNaNE; + logic XSNaNE, YSNaNE, ZSNaNE; + logic XDenormE, YDenormE, ZDenormE; + logic XInfE, YInfE, ZInfE; + logic XZeroE, YZeroE, ZZeroE; + logic YExpMaxE, ZExpMaxE, Mult; + logic [3*`NF+5:0] SumE; + logic InvZE; + logic NegSumE; + logic ZSgnEffE; + logic PSgnE; + logic [$clog2(3*`NF+7)-1:0] NormCntE; + + + assign FOpCtrlE = 3'b0; // set to 0 because test float only tests fMADD + assign Mult = 1'b0; // set to zero because not testing multiplication + + // check if the calculated result or correct answer is NaN + always_comb begin + case (FmtRead) + 4'b11: begin // quad + assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]); + assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]); + + end + 4'b01: begin // double + assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]); + assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]); + + end + 4'b00: begin // single + assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]); + assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]); + end + 4'b10: begin // half + assign ansnan = &ans[`H_LEN-2:`H_NF]&(|ans[`H_NF-1:0]); + assign wnan = &FMAResM[`H_LEN-2:`H_NF]&(|FMAResM[`H_NF-1:0]); + end + endcase + end + + // instantiate devices under test + unpack unpack(.X, .Y, .Z, .FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, + .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, + .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XExpMaxE); + fma1 fma1(.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, + .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE, + .ProdExpE, .AddendStickyE, .KillProdE); + fma2 fma2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM(XManE), .YManM(YManE), .ZManM(ZManE), + .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), + .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), + .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(FmtE), .FrmM(FrmE), + .FMAFlgM, .FMAResM, .Mult); + + + // produce clock + always begin + clk = 1; #5; clk = 0; #5; + end + + // Read first test + initial begin + $display("\n\nRunning %s vectors", tests[i]); + $readmemh({`PATH, tests[i]}, testvectors); + end + + // apply test vectors on rising edge of clk + always @(posedge clk) begin + #1; + flags = testvectors[vectornum][15:8]; + FrmRead = testvectors[vectornum][7:4]; + FmtRead = testvectors[vectornum][3:0]; + if (FmtRead==4'b11 & `Q_SUPPORTED) begin // quad + X = testvectors[vectornum][16+4*(`Q_LEN)-1:16+3*(`Q_LEN)]; + Y = testvectors[vectornum][16+3*(`Q_LEN)-1:16+2*(`Q_LEN)]; + Z = testvectors[vectornum][16+2*(`Q_LEN)-1:16+`Q_LEN]; + ans = testvectors[vectornum][16+(`Q_LEN-1):16]; + end + else if (FmtRead==4'b01 & `D_SUPPORTED) begin // double + X = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+4*(`D_LEN)-1:16+3*(`D_LEN)]}; + Y = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+3*(`D_LEN)-1:16+2*(`D_LEN)]}; + Z = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+2*(`D_LEN)-1:16+`D_LEN]}; + ans = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+(`D_LEN-1):16]}; + end + else if (FmtRead==4'b00 & `F_SUPPORTED) begin // single + X = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+4*(`S_LEN)-1:16+3*(`S_LEN)]}; + Y = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+3*(`S_LEN)-1:16+2*(`S_LEN)]}; + Z = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+2*(`S_LEN)-1:16+`S_LEN]}; + ans = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+(`S_LEN-1):16]}; + end + else if (FmtRead==4'b10 & `ZFH_SUPPORTED) begin // half + X = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+4*(`H_LEN)-1:16+3*(`H_LEN)]}; + Y = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+3*(`H_LEN)-1:16+2*(`H_LEN)]}; + Z = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+2*(`H_LEN)-1:16+`H_LEN]}; + ans = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+(`H_LEN-1):16]}; + end + else begin + X = {`FLEN{1'bx}}; + Y = {`FLEN{1'bx}}; + Z = {`FLEN{1'bx}}; + ans = {`FLEN{1'bx}}; + end + + // trim format and rounding mode to appropriate size + if (`FPSIZES <= 2) FmtE = FmtRead === `FMT; // rewrite format if 2 or less floating formats are supported + else FmtE = FmtRead[1:0]; + FrmE = FrmRead[2:0]; + end + + // check results on falling edge of clk + always @(negedge clk) begin + // quad + if((FmtRead==4'b11) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0] | (XNaNE&(FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) | (YNaNE&(FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]})) | (ZNaNE&(FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})))))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(XDenormE) $display( "xdenorm "); + if(YDenormE) $display( "ydenorm "); + if(ZDenormE) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf "); + if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf "); + if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN "); + if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN "); + if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf "); + if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf "); + if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN "); + if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN "); + errors = errors + 1; + if (errors === 1) $stop; + end + // double + if((FmtRead==4'b01) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`D_LEN-2:0] === ans[`D_LEN-2:0] | (XNaNE&(FMAResM[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) | (YNaNE&(FMAResM[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})) | (ZNaNE&(FMAResM[`D_LEN-2:0] === {Z[`D_LEN-2:`D_NF],1'b1,Z[`D_NF-2:0]})))))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm "); + if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm "); + if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN "); + if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN "); + if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN "); + if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN "); + errors = errors + 1; + if (errors === 1) $stop; + end + // single + if((FmtRead==4'b00) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`S_LEN-2:0] === ans[`S_LEN-2:0] | (XNaNE&(FMAResM[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) | (YNaNE&(FMAResM[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})) | (ZNaNE&(FMAResM[`S_LEN-2:0] === {Z[`S_LEN-2:`S_NF],1'b1,Z[`S_NF-2:0]})))))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm "); + if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm "); + if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN "); + if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN "); + if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN "); + if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN "); + errors = errors + 1; + if (errors === 1) $stop; + end + // half + if((FmtRead==4'b01) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`H_LEN-2:0] === ans[`H_LEN-2:0] | (XNaNE&(FMAResM[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) | (YNaNE&(FMAResM[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})) | (ZNaNE&(FMAResM[`H_LEN-2:0] === {Z[`H_LEN-2:`H_NF],1'b1,Z[`H_NF-2:0]})))))) begin + $display( "%h %h %h %h %h %h %h Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags); + if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm "); + if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm "); + if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm "); + if(FMAFlgM[4] !== 0) $display( "invld "); + if(FMAFlgM[2] !== 0) $display( "ovrflw "); + if(FMAFlgM[1] !== 0) $display( "unflw "); + if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN "); + if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN "); + if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN "); + if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN "); + errors = errors + 1; + if (errors === 1) $stop; + end + + // if ( vectornum === 3165862) $stop; // uncomment for specific test + vectornum = vectornum + 1; // increment test + if (testvectors[vectornum][0] === 1'bx) begin // if reached the end of file + if (errors) begin // if there were errors + $display("%s completed with %d tests and %d errors", tests[i], vectornum, errors); + $stop; + end + else begin // if no errors + if(tests[i] === "") begin // if no more tests + $display("\nAll tests completed with %d errors\n", errors); + $stop; + end + + $display("%s completed successfully with %d tests and %d errors (across all tests)\n", tests[i], vectornum, errors); + + // increment tests - skip some precisions if needed + if ((i === 4 & ~`F_SUPPORTED) | (i === 9 & ~`D_SUPPORTED) | (i === 14 & ~`Q_SUPPORTED)) i = i+5; + if ((i === 9 & ~`D_SUPPORTED) | (i === 14 & ~`Q_SUPPORTED)) i = i+5; + if ((i === 14 & ~`Q_SUPPORTED)) i = i+5; + i = i+1; + + // if no more tests - finish + if(tests[i] === "") begin + $display("\nAll tests completed with %d errors\n", errors); + $stop; + end + + // read next files + $display("Running %s vectors", tests[i]); + $readmemh({`PATH, tests[i]}, testvectors); + vectornum = 0; + end + end + end +endmodule diff --git a/pipelined/testbench/fp/tests/fma.do b/pipelined/testbench/fp/tests/fma.do new file mode 100644 index 000000000..6349be0ef --- /dev/null +++ b/pipelined/testbench/fp/tests/fma.do @@ -0,0 +1,50 @@ +# wally-pipelined.do +# +# Modification by Oklahoma State University & Harvey Mudd College +# Use with Testbench +# James Stine, 2008; David Harris 2021 +# Go Cowboys!!!!!! +# +# Takes 1:10 to run RV64IC tests using gui + +# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m" + +# Use this wally-pipelined.do file to run this example. +# Either bring up ModelSim and type the following at the "ModelSim>" prompt: +# do wally-pipelined.do +# or, to run from a shell, type the following at the shell prompt: +# vsim -do wally-pipelined.do -c +# (omit the "-c" to see the GUI while running from the shell) + +onbreak {resume} + +# create library +if [file exists work] { + vdel -all +} +vlib work + +# compile source files +# suppress spurious warnngs about +# "Extra checking for conflicts with always_comb done at vopt time" +# because vsim will run vopt + +# start and run simulation +# remove +acc flag for faster sim during regressions if there is no need to access internal signals +# $num = the added words after the call +vlog +incdir+../../../config/$1 +incdir+../../../config/shared fma-testbench.sv ../../../src/fpu/fma.sv ../../../src/fpu/unpack.sv -suppress 2583 -suppress 7063 + +vsim -voptargs=+acc work.fmatestbench + +view wave +#-- display input and output signals as hexidecimal values +#do ./wave-dos/peripheral-waves.do +#add log -recursive /* +#do wave.do deal with when ready + +#-- Run the Simulation +#run 3600 +run -all +noview fma-testbench.sv +view wave + diff --git a/pipelined/testbench/fp/tests/sim-fma b/pipelined/testbench/fp/tests/sim-fma new file mode 100755 index 000000000..5027d43e4 --- /dev/null +++ b/pipelined/testbench/fp/tests/sim-fma @@ -0,0 +1 @@ +vsim -do "do fma.do rv64fp" diff --git a/pipelined/testbench/fp/tests/sim-fma-batch b/pipelined/testbench/fp/tests/sim-fma-batch new file mode 100755 index 000000000..321e0678d --- /dev/null +++ b/pipelined/testbench/fp/tests/sim-fma-batch @@ -0,0 +1 @@ +vsim -c -do "do fma.do rv64fp" \ No newline at end of file diff --git a/pipelined/testbench/testbench-linux.sv b/pipelined/testbench/testbench-linux.sv index 8723d6b9c..62d13a1a3 100644 --- a/pipelined/testbench/testbench-linux.sv +++ b/pipelined/testbench/testbench-linux.sv @@ -141,11 +141,11 @@ module testbench; logic [`XLEN-1:0] ExpectedCSRArrayValue``STAGE[10:0]; `DECLARE_TRACE_SCANNER_SIGNALS(E) `DECLARE_TRACE_SCANNER_SIGNALS(M) - integer NextMIPexpected; + integer NextMIPexpected, NextSIPexpected; integer NextMepcExpected; // Memory stage expected values from trace logic checkInstrM; - integer MIPexpected; + integer MIPexpected, SIPexpected; string name; logic [`AHBW-1:0] readDataExpected; // Write back stage expected values from trace @@ -168,11 +168,14 @@ module testbench; integer NumCSRPostWIndex; logic [`XLEN-1:0] InstrCountW; integer RequestDelayedMIP; + integer RequestDelayedSIP; integer ForceMIPFuture; integer CSRIndex; longint MepcExpected; integer CheckMIPFutureE; integer CheckMIPFutureM; + integer CheckSIPFutureE; + integer CheckSIPFutureM; // Useful Aliases `define RF dut.core.ieu.dp.regf.rf `define PC dut.core.ifu.pcreg.q @@ -185,6 +188,8 @@ module testbench; `define MIDELEG `CSR_BASE.csrm.deleg.MIDELEGreg.q `define MIE `CSR_BASE.csri.MIE_REGW `define MIP `CSR_BASE.csri.MIP_REGW + `define SIE `CSR_BASE.csri.SIE_REGW + `define SIP `CSR_BASE.csri.SIP_REGW `define MCAUSE `CSR_BASE.csrm.MCAUSEreg.q `define SCAUSE `CSR_BASE.csrs.csrs.SCAUSEreg.q `define MEPC `CSR_BASE.csrm.MEPCreg.q @@ -197,6 +202,7 @@ module testbench; `define STVEC `CSR_BASE.csrs.csrs.STVECreg.q `define SATP `CSR_BASE.csrs.csrs.genblk1.SATPreg.q `define MSTATUS `CSR_BASE.csrsr.MSTATUS_REGW + `define SSTATUS `CSR_BASE.csrsr.SSTATUS_REGW `define STATUS_TSR `CSR_BASE.csrsr.STATUS_TSR_INT `define STATUS_TW `CSR_BASE.csrsr.STATUS_TW_INT `define STATUS_TVM `CSR_BASE.csrsr.STATUS_TVM_INT @@ -297,6 +303,8 @@ module testbench; `INIT_CHECKPOINT_VAL(MIDELEG, [`XLEN-1:0]); `INIT_CHECKPOINT_VAL(MIE, [11:0]); `INIT_CHECKPOINT_VAL(MIP, [11:0]); + `INIT_CHECKPOINT_VAL(SIE, [11:0]); + `INIT_CHECKPOINT_VAL(SIP, [11:0]); `INIT_CHECKPOINT_VAL(MCAUSE, [`XLEN-1:0]); `INIT_CHECKPOINT_VAL(SCAUSE, [`XLEN-1:0]); `INIT_CHECKPOINT_VAL(MEPC, [`XLEN-1:0]); @@ -310,6 +318,7 @@ module testbench; `INIT_CHECKPOINT_VAL(SATP, [`XLEN-1:0]); `INIT_CHECKPOINT_VAL(PRIV, [1:0]); `MAKE_CHECKPOINT_INIT_SIGNAL(MSTATUS, [`XLEN-1:0],0,0); + `MAKE_CHECKPOINT_INIT_SIGNAL(SSTATUS, [`XLEN-1:0],0,0); // Many UART registers are difficult to initialize because under the hood // they are not simple registers. Instead some are generated by interesting // combinational blocks such that they depend upon a variety of different @@ -463,6 +472,10 @@ module testbench; CheckMIPFutureE = 1; \ NextMIPexpected = ExpectedCSRArrayValueE[NumCSRE]; \ end \ + if(ExpectedCSRArrayE[NumCSRE].substr(0, 2) == "sip") begin \ + CheckSIPFutureE = 1; \ + NextSIPexpected = ExpectedCSRArrayValueE[NumCSRE]; \ + end \ if(ExpectedCSRArrayE[NumCSRE].substr(0,3) == "mepc") begin \ // $display("hello! we are here."); \ MepcExpected = ExpectedCSRArrayValueE[NumCSRE]; \ @@ -475,7 +488,7 @@ module testbench; end \ if(`"STAGE`"=="M") begin \ // override on special conditions \ - if (dut.core.lsu.LSUPAdrM == 'h10000005) \ + if ((dut.core.lsu.LSUPAdrM == 'h10000002) | (dut.core.lsu.LSUPAdrM == 'h10000005) | (dut.core.lsu.LSUPAdrM == 'h10000006)) \ //$display("%tns, %d instrs: Overwrite UART's LSR in memory stage.", $time, InstrCountW-1); \ force dut.core.ieu.dp.ReadDataM = ExpectedMemReadDataM; \ else \ @@ -504,13 +517,16 @@ module testbench; // $display("%tns: ExpectedPCM %x",$time,ExpectedPCM); // $display("%tns: ExpectedPCE %x",$time,ExpectedPCE); // $display("%tns: ExpectedPCW %x",$time,ExpectedPCW); + // *** this is probably not right anymore since either MIP or SIP can be forced. if((ExpectedPCE != MepcExpected) & ((MepcExpected - ExpectedPCE) * (MepcExpected - ExpectedPCE) <= 200) | ~dut.core.ieu.c.InstrValidM) begin RequestDelayedMIP <= 1; $display("%tns: Requesting Delayed MIP. Current MEPC value is %x",$time,MepcExpected); end else begin // update MIP immediately $display("%tns: Updating MIP to %x",$time,NextMIPexpected); MIPexpected = NextMIPexpected; - force dut.core.priv.priv.csr.csri.MIP_REGW = MIPexpected; + //force dut.core.priv.priv.csr.csri.MIP_REGW = MIPexpected; + //force dut.core.priv.priv.csr.csri.SIP_REGW = MIPexpected; + force dut.core.priv.priv.csr.csri.IP_REGW = MIPexpected; end // $display("%tn: ExpectedCSRArrayM = %p",$time,ExpectedCSRArrayM); // $display("%tn: ExpectedCSRArrayValueM = %p",$time,ExpectedCSRArrayValueM); @@ -525,12 +541,52 @@ module testbench; $display("%tns: Executing Delayed MIP. Current MEPC value is %x",$time,dut.core.priv.priv.csr.csrm.MEPC_REGW); $display("%tns: Updating MIP to %x",$time,NextMIPexpected); MIPexpected = NextMIPexpected; - force dut.core.priv.priv.csr.csri.MIP_REGW = MIPexpected; + //force dut.core.priv.priv.csr.csri.MIP_REGW = MIPexpected; + //force dut.core.priv.priv.csr.csri.SIP_REGW = MIPexpected; + force dut.core.priv.priv.csr.csri.IP_REGW = MIPexpected; $display("%tns: Finished Executing Delayed MIP. Current MEPC value is %x",$time,dut.core.priv.priv.csr.csrm.MEPC_REGW); RequestDelayedMIP = 0; end end + // SIP spoofing +/* -----\/----- EXCLUDED -----\/----- + always @(posedge clk) begin + #1; + if(CheckSIPFutureE) CheckSIPFutureE <= 0; + CheckSIPFutureM <= CheckSIPFutureE; + if(CheckSIPFutureM) begin + // $display("%tns: ExpectedPCM %x",$time,ExpectedPCM); + // $display("%tns: ExpectedPCE %x",$time,ExpectedPCE); + // $display("%tns: ExpectedPCW %x",$time,ExpectedPCW); + if((ExpectedPCE != MepcExpected) & ((MepcExpected - ExpectedPCE) * (MepcExpected - ExpectedPCE) <= 200) | ~dut.core.ieu.c.InstrValidM) begin + RequestDelayedSIP <= 1; + $display("%tns: Requesting Delayed SIP. Current MEPC value is %x",$time,MepcExpected); + end else begin // update SIP immediately + $display("%tns: Updating SIP to %x",$time,NextSIPexpected); + SIPexpected = NextSIPexpected; + force dut.core.priv.priv.csr.csri.SIP_REGW = SIPexpected; + end + // $display("%tn: ExpectedCSRArrayM = %p",$time,ExpectedCSRArrayM); + // $display("%tn: ExpectedCSRArrayValueM = %p",$time,ExpectedCSRArrayValueM); + // $display("%tn: ExpectedTokens = %p",$time,ExpectedTokensM); + // $display("%tn: MepcExpected = %x",$time,MepcExpected); + // $display("%tn: ExpectedPCE = %x",$time,ExpectedPCE); + // $display("%tns: Difference/multiplication thing: %x",$time,(MepcExpected - ExpectedPCE) * (MepcExpected - ExpectedPCE)); + // $display("%tn: ExpectedCSRArrayM[NumCSRM] %x",$time,ExpectedCSRArrayM[NumCSRM]); + // $display("%tn: ExpectedCSRArrayValueM[NumCSRM] %x",$time,ExpectedCSRArrayValueM[NumCSRM]); + end + if(RequestDelayedSIP & checkInstrM) begin + $display("%tns: Executing Delayed SIP. Current MEPC value is %x",$time,dut.core.priv.priv.csr.csrm.MEPC_REGW); + $display("%tns: Updating SIP to %x",$time,NextSIPexpected); + SIPexpected = NextSIPexpected; + force dut.core.priv.priv.csr.csri.SIP_REGW = SIPexpected; + $display("%tns: Finished Executing Delayed SIP. Current MEPC value is %x",$time,dut.core.priv.priv.csr.csrm.MEPC_REGW); + RequestDelayedSIP = 0; + end + end + -----/\----- EXCLUDED -----/\----- */ + // step 1: register expected state into the write back stage. always @(posedge clk) begin if (reset) begin @@ -634,9 +690,12 @@ module testbench; case(ExpectedCSRArrayW[NumCSRPostWIndex]) "mhartid": `checkCSR(dut.core.priv.priv.csr.csrm.MHARTID_REGW) "mstatus": `checkCSR(dut.core.priv.priv.csr.csrm.MSTATUS_REGW) + "sstatus": `checkCSR(dut.core.priv.priv.csr.csrs.SSTATUS_REGW) "mtvec": `checkCSR(dut.core.priv.priv.csr.csrm.MTVEC_REGW) "mip": `checkCSR(dut.core.priv.priv.csr.csrm.MIP_REGW) "mie": `checkCSR(dut.core.priv.priv.csr.csrm.MIE_REGW) + "sip": `checkCSR(dut.core.priv.priv.csr.csrs.SIP_REGW) + "sie": `checkCSR(dut.core.priv.priv.csr.csrs.SIE_REGW) "mideleg": `checkCSR(dut.core.priv.priv.csr.csrm.MIDELEG_REGW) "medeleg": `checkCSR(dut.core.priv.priv.csr.csrm.MEDELEG_REGW) "mepc": `checkCSR(dut.core.priv.priv.csr.csrm.MEPC_REGW) diff --git a/tests/fp/create_vectors128fma.sh b/tests/fp/create_vectors128fma.sh new file mode 100755 index 000000000..361a4add7 --- /dev/null +++ b/tests/fp/create_vectors128fma.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +BUILD="./TestFloat-3e/build/Linux-x86_64-GCC" +OUTPUT="./vectors" + +$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv +$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv +$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv +$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv +$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv + +# format: X_Y_Z_answer_flags_Frm_Fmt +sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rne.tv +sed -ie 's/$/_0/' $OUTPUT/f128_mulAdd_rne.tv +sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rne.tv + +sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rz.tv +sed -ie 's/$/_1/' $OUTPUT/f128_mulAdd_rz.tv +sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rz.tv + +sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_ru.tv +sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_ru.tv +sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_ru.tv + +sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rd.tv +sed -ie 's/$/_2/' $OUTPUT/f128_mulAdd_rd.tv +sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rd.tv + +sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rnm.tv +sed -ie 's/$/_4/' $OUTPUT/f128_mulAdd_rnm.tv +sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rnm.tv \ No newline at end of file diff --git a/tests/fp/create_vectors16fma.sh b/tests/fp/create_vectors16fma.sh new file mode 100755 index 000000000..d46e87680 --- /dev/null +++ b/tests/fp/create_vectors16fma.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +BUILD="./TestFloat-3e/build/Linux-x86_64-GCC" +OUTPUT="./vectors" + +$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv +$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv +$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv +$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv +$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv + +# format: X_Y_Z_answer_flags_Frm_Fmt +sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rne.tv +sed -ie 's/$/_0/' $OUTPUT/f16_mulAdd_rne.tv +sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rne.tv + +sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rz.tv +sed -ie 's/$/_1/' $OUTPUT/f16_mulAdd_rz.tv +sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rz.tv + +sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_ru.tv +sed -ie 's/$/_3/' $OUTPUT/f16_mulAdd_ru.tv +sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_ru.tv + +sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rd.tv +sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rd.tv +sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rd.tv + +sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rnm.tv +sed -ie 's/$/_4/' $OUTPUT/f16_mulAdd_rnm.tv +sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rnm.tv \ No newline at end of file diff --git a/tests/fp/create_vectors32fma.sh b/tests/fp/create_vectors32fma.sh new file mode 100755 index 000000000..7e48d1abe --- /dev/null +++ b/tests/fp/create_vectors32fma.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +BUILD="./TestFloat-3e/build/Linux-x86_64-GCC" +OUTPUT="./vectors" + +$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv +$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv +$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv +$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv +$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv + +# format: X_Y_Z_answer_flags_Frm_Fmt +sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rne.tv +sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rne.tv +sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rne.tv + +sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rz.tv +sed -ie 's/$/_1/' $OUTPUT/f32_mulAdd_rz.tv +sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rz.tv + +sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_ru.tv +sed -ie 's/$/_3/' $OUTPUT/f32_mulAdd_ru.tv +sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_ru.tv + +sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rd.tv +sed -ie 's/$/_2/' $OUTPUT/f32_mulAdd_rd.tv +sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rd.tv + +sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rnm.tv +sed -ie 's/$/_4/' $OUTPUT/f32_mulAdd_rnm.tv +sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rnm.tv \ No newline at end of file diff --git a/tests/fp/create_vectors64fma.sh b/tests/fp/create_vectors64fma.sh new file mode 100755 index 000000000..615245b30 --- /dev/null +++ b/tests/fp/create_vectors64fma.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +BUILD="./TestFloat-3e/build/Linux-x86_64-GCC" +OUTPUT="./vectors" + +$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv +$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv +$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv +$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv +$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv + +# format: X_Y_Z_answer_flags_Frm_Fmt +sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rne.tv +sed -ie 's/$/_0/' $OUTPUT/f64_mulAdd_rne.tv +sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rne.tv + +sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rz.tv +sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rz.tv +sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rz.tv + +sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_ru.tv +sed -ie 's/$/_3/' $OUTPUT/f64_mulAdd_ru.tv +sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_ru.tv + +sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rd.tv +sed -ie 's/$/_2/' $OUTPUT/f64_mulAdd_rd.tv +sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rd.tv + +sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rnm.tv +sed -ie 's/$/_4/' $OUTPUT/f64_mulAdd_rnm.tv +sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rnm.tv \ No newline at end of file diff --git a/tests/fp/run_all.sh b/tests/fp/run_all.sh index 8d2a17ceb..d34366b93 100755 --- a/tests/fp/run_all.sh +++ b/tests/fp/run_all.sh @@ -8,3 +8,7 @@ ./create_vectors64cmp.sh ./create_vectors64.sh ./create_vectorsi.sh +./create_vectors16fma.sh +./create_vectors32fma.sh +./create_vectors64fma.sh +./create_vectors128fma.sh