Fixed cvtint bug by adding 2 bits to convert width; initial implementation of fround passes basic regression but fails some nightly regression cases

This commit is contained in:
David Harris 2024-05-11 22:32:51 -07:00
parent c0743a1fcf
commit 009d251433
15 changed files with 399 additions and 263 deletions

View File

@ -125,19 +125,19 @@ derivconfigtests = [
["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
# fpu permutations
["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]],
["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]],
["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]],
["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]],
["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]],
["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]],
["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]],
["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]],
["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]],
["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]],
["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfaf"]],
["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32zfaf"]],
["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32zfaf", "arch32zfad"]],
["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i", "arch32zfaf", "arch32zfad"]],
["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i", "arch32zfaf", "arch32zfad"]],
["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfaf"]],
["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64zfaf"]],
["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64zfaf", "arch64zfad"]],
["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i", "arch64zfaf", "arch64zfad"]],
["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q", "arch64zfaf", "arch64zfad"]],
]
bpredtests = [

View File

@ -75,6 +75,7 @@ localparam NE = Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE;
localparam NF = Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF;
localparam FMT = Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0;
localparam BIAS = Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS;
localparam LOGFLEN = $clog2(FLEN);
// Floating point constants needed for FPU paramerterization
// LEN1/NE1/NF1/FNT1 is the size of the second longest supported format
@ -124,7 +125,8 @@ localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
// because NORMSHIFTSZ becomes limited by convert rather than divider
// Figure out why extra two bits are needed for convert (and only in testbench_fp, not Wally)
// Might be a testbench_fp issue
localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (3*NF+6));
//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (3*NF+6));
localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (3*NF+6));
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ)
localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Drop leading 2 integer bits

View File

@ -173,6 +173,7 @@ localparam cvw_t P = '{
H_BIAS : H_BIAS,
H_FMT : H_FMT,
FLEN : FLEN,
LOGFLEN : LOGFLEN,
NE : NE ,
NF : NF ,
FMT : FMT ,

View File

@ -260,7 +260,8 @@ typedef struct packed {
logic [1:0] H_FMT;
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
int FLEN;
int FLEN;
int LOGFLEN;
int NE ;
int NF ;
logic [1:0] FMT ;

View File

@ -48,7 +48,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
output logic XEnE, YEnE, ZEnE, // enable inputs
// operation mux selections
output logic FCvtIntE, FCvtIntW, // convert to integer operation
output logic [2:0] FrmM, // FP rounding mode
output logic [2:0] FrmE, FrmM, // FP rounding mode
output logic [P.FMTBITS-1:0] FmtE, FmtM, // FP format
output logic [2:0] OpCtrlE, OpCtrlM, // Select which operation to do in each component
output logic FpLoadStoreM, // FP load or store instruction
@ -56,6 +56,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
output logic FPUActiveE, // FP instruction being executed
output logic ZfaE, ZfaM, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod)
output logic ZfaFRoundNXE, // Zfa froundnx instruction
// register control signals
output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable
output logic FWriteIntE, FWriteIntM, // Write to integer register
@ -66,7 +67,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
output logic FDivStartE, IDivStartE // Start division or squareroot
);
`define FCTRLW 13
`define FCTRLW 14
logic [`FCTRLW-1:0] ControlsD; // control signals
logic FRegWriteD; // FP register write enable
@ -75,13 +76,14 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
logic [2:0] OpCtrlD; // Select which operation to do in each component
logic [1:0] PostProcSelD; // select result in the post processing unit
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
logic [2:0] FrmD, FrmE; // FP rounding mode
logic [2:0] FrmD; // FP rounding mode
logic [P.FMTBITS-1:0] FmtD; // FP format
logic [1:0] Fmt, Fmt2; // format - before possible reduction
logic SupportedFmt; // is the format supported
logic SupportedFmt2; // is the source format supported for fp -> fp
logic FCvtIntD, FCvtIntM; // convert to integer operation
logic ZfaD; // Zfa variants of instructions
logic ZfaFRoundNXD; // Zfa froundnx instruction
// FPU Instruction Decoder
assign Fmt = Funct7D[1:0];
@ -93,156 +95,156 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
(Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED));
// decode the instruction
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt_Zfa
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt_Zfa_FroundNX
always_comb
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0;
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0_0;
else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt)
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // for anything other than loads and stores, check for supported format
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0_0; // for anything other than loads and stores, check for supported format
else begin
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // default: non-implemented instruction
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0_0; // default: non-implemented instruction
/* verilator lint_off CASEINCOMPLETE */ // default value above has priority so no other default needed
case(OpD)
7'b0000111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flw
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // fld
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flq
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flh
3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // flw
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // fld
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // flq
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // flh
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsw
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsd
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsq
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsh
3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsw
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsd
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsq
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsh
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0_0; // fnmadd
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0_0_0; // fnmadd
7'b1010011: casez(Funct7D)
7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0_0; // fdiv
7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0_0; // fsqrt
7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0_0_0; // fdiv
7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0_0_0; // fsqrt
7'b00100??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0_0; // fsgnjx
3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0_0_0; // fsgnjx
endcase
7'b00101??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_0; // fmax
3'b010: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_1; // fminm (Zfa)
3'b011: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_1; // fmaxm (Zfa)
3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_0_0; // fmax
3'b010: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_1_0; // fminm (Zfa)
3'b011: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_1_0; // fmaxm (Zfa)
endcase
7'b10100??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_0; // fle
3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_0; // flt
3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0_0; // feq
3'b100: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_1; // fleq (Zfa)
3'b101: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_1; // fltq (Zfa)
3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_0_0; // fle
3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_0_0; // flt
3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0_0_0; // feq
3'b100: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_1_0; // fleq (Zfa)
3'b101: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_1_0; // fltq (Zfa)
endcase
7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)
ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0_0; // fclass
ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0_0_0; // fclass
else if (Funct3D == 3'b000 & Rs2D == 5'b00000)
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0_0; // fmv.x.w/d/h/q fp to int register
else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001)
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.d (Zfa)
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1_0; // fmvh.x.d (Zfa)
// Q not supported in RV64GC
// coverage off
else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001)
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.q (Zfa)
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1_0; // fmvh.x.q (Zfa)
// coverage on
7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000)
ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0; // fmv.w/d/h/q.x int to fp reg
ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0_0; // fmv.w/d/h/q.x int to fp reg
else if (P.ZFA_SUPPORTED & Funct3D == 3'b000 & Rs2D == 5'b00001)
ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0_1; // fli (Zfa)
ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0_1_0; // fli (Zfa)
7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_0; // fcvt.s.(d/q/h)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_0_0; // fcvt.s.(d/q/h)
else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.s (Zfa) *** needs ctrl for all rounds
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.s (Zfa)
else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.s (Zfa) *** needs ctrl for all rounds
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.s (Zfa)
7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01)
ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0; // fcvt.d.(s/h/q)
ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0_0; // fcvt.d.(s/h/q)
else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.d (Zfa)
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.d (Zfa)
else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.d (Zfa)
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.d (Zfa)
7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10)
ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0; // fcvt.h.(s/d/q)
ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0_0; // fcvt.h.(s/d/q)
else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.h (Zfa)
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.h (Zfa)
else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.h (Zfa)
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.h (Zfa)
// coverage off
// Not covered in testing because rv64gc does not support quad precision
7'b0100011: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b11)
ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0_0; // fcvt.q.(s/h/d)
ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0_0_0; // fcvt.q.(s/h/d)
else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.q (Zfa)
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.q (Zfa)
else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.q (Zfa)
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.q (Zfa)
// coverage on
7'b1101000: case(Rs2D)
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.s.w w->s
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.s.wu wu->s
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.s.l l->s
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.s.lu lu->s
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.s.w w->s
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.s.wu wu->s
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.s.l l->s
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.s.lu lu->s
endcase
7'b1100000: case(Rs2D)
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.s s->w
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.s s->wu
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.s s->l
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.s s->lu
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.s s->w
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.s s->wu
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.s s->l
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.s s->lu
endcase
7'b1101001: case(Rs2D)
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.d.w w->d
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.d.wu wu->d
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.d.l l->d
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.d.lu lu->d
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.d.w w->d
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.d.wu wu->d
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.d.l l->d
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.d.lu lu->d
endcase
7'b1100001: case(Rs2D)
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.d d->w
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.d d->wu
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.d d->l
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.d d->lu
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.d d->w
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.d d->wu
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.d d->l
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.d d->lu
5'b01000: if (P.ZFA_SUPPORTED & P.D_SUPPORTED & Funct3D == 3'b001)
ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_1; // fcvtmod.w.d (Zfa)
ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_1_0; // fcvtmod.w.d (Zfa)
endcase
7'b1101010: case(Rs2D)
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.h.w w->h
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.h.wu wu->h
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.h.l l->h
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.h.lu lu->h
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.h.w w->h
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.h.wu wu->h
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.h.l l->h
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.h.lu lu->h
endcase
7'b1100010: case(Rs2D)
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.h h->w
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.h h->wu
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.h h->l
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.h h->lu
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.h h->w
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.h h->wu
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.h h->l
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.h h->lu
endcase
// Not covered in testing because rv64gc does not support quad precision
// coverage off
7'b1101011: case(Rs2D)
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.q.w w->q
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.q.wu wu->q
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.q.l l->q
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.q.lu lu->q
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.q.w w->q
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.q.wu wu->q
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.q.l l->q
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.q.lu lu->q
endcase
7'b1100011: case(Rs2D)
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.q q->w
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.q q->wu
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.q q->l
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.q q->lu
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.q q->w
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.q q->wu
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.q q->l
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.q q->lu
endcase
// coverage off
// Not covered in testing because rv64gc is not RV64Q or RV32D
7'b1011001: if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct3D == 3'b000)
ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.d.x (Zfa) *** untested, controls could be wrong
ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fmvp.d.x (Zfa) *** untested, controls could be wrong
7'b1011011: if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct3D == 3'b000)
ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.q.x (Zfa)
ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fmvp.q.x (Zfa)
// coverage on
endcase
endcase
@ -250,7 +252,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
/* verilator lint_on CASEINCOMPLETE */
// unswizzle control bits
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD, ZfaD} = ControlsD;
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD, ZfaD, ZfaFRoundNXD} = ControlsD;
// rounding modes:
// 000 - round to nearest, ties to even
@ -259,7 +261,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
// 011 - round up - round twords positive infinity
// 100 - round to nearest, ties to max magnitude - round to nearest, ties away from zero
// 111 - dynamic - choose FRM_REGW as rounding mode
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
assign FrmD = (Funct3D == 3'b111) ? FRM_REGW : Funct3D;
// Precision
// 00 - single
@ -269,7 +271,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
if (P.FPSIZES == 1)
assign FmtD = 1'b0;
else if (P.FPSIZES == 2)begin
else if (P.FPSIZES == 2) begin
logic [1:0] FmtTmp;
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
assign FmtD = (P.FMT == FmtTmp);
@ -313,6 +315,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
// 011 - mv to fp 01
// 110 - min 10
// 101 - max 10
// 100 - fround 11
// 111 - fli 11
// OpCtrl:
@ -350,9 +353,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
assign Adr3D = InstrD[31:27];
// D/E pipleine register
flopenrc #(15+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ZfaD, ~IllegalFPUInstrD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE, FPUActiveE});
flopenrc #(`FCTRLW+2+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ZfaD, ZfaFRoundNXD, ~IllegalFPUInstrD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE, ZfaFRoundNXE, FPUActiveE});
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E});
flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE});
@ -365,7 +368,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
flopenrc #(14+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM, ZfaM});
// renameing for readability
assign FpLoadStoreM = FResSelM[1];
@ -373,5 +376,5 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResSelM, FCvtIntM},
{FRegWriteW, FResSelW, FCvtIntW});
endmodule

View File

@ -37,6 +37,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic [P.NE-2:0] BiasE, // Bias of exponent
input logic [P.LOGFLEN-1:0] NfE, // Number of fractional bits in selected format
input logic FDivStartE, IDivStartE,
input logic StallM,
input logic FlushE,
@ -75,7 +77,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
.FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
.FmtE, .Bias(BiasE), .Nf(NfE), .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
// Int-specific
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
.BZeroM, .IntNormShiftM, .AM,

View File

@ -29,39 +29,14 @@
module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] FmtE,
input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format
input logic SqrtE,
input logic IntDivE,
input logic [P.DIVBLEN-1:0] IntResultBitsE,
output logic [P.DURLEN-1:0] CyclesE
);
logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits
/* verilator lint_off WIDTH */
if (P.FPSIZES == 1)
assign Nf = P.NF;
else if (P.FPSIZES == 2)
always_comb
case (FmtE)
1'b0: Nf = P.NF1;
1'b1: Nf = P.NF;
endcase
else if (P.FPSIZES == 3)
always_comb
case (FmtE)
P.FMT: Nf = P.NF;
P.FMT1: Nf = P.NF1;
P.FMT2: Nf = P.NF2;
default: Nf = 'x; // shouldn't happen
endcase
else if (P.FPSIZES == 4)
always_comb
case(FmtE)
P.S_FMT: Nf = P.S_NF;
P.D_FMT: Nf = P.D_NF;
P.H_FMT: Nf = P.H_NF;
P.Q_FMT: Nf = P.Q_NF;
endcase
logic [P.DIVBLEN-1:0] FPResultBitsE, ResultBitsE; // number of fractional (result) bits
// Cycle logic
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
@ -70,6 +45,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
// FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle.
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
/* verilator lint_off WIDTH */
always_comb begin
FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1

View File

@ -28,49 +28,21 @@
////////////////////////////////////////////////////////////////////////////////////////////////
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt,
input logic [P.NE-2:0] Bias, // Bias of exponent
input logic [P.NE-1:0] Xe, Ye, // input exponents
input logic Sqrt,
input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye
output logic [P.NE+1:0] Ue // result exponent
);
logic [P.NE-2:0] Bias;
logic [P.NE+1:0] SXExp;
logic [P.NE+1:0] SExp;
logic [P.NE+1:0] DExp;
// Determine exponent bias according to the format
if (P.FPSIZES == 1) begin
assign Bias = (P.NE-1)'(P.BIAS);
end else if (P.FPSIZES == 2) begin
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
end else if (P.FPSIZES == 3) begin
always_comb
case (Fmt)
P.FMT: Bias = (P.NE-1)'(P.BIAS);
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
default: Bias = 'x;
endcase
end else if (P.FPSIZES == 4) begin
always_comb
case (Fmt)
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
endcase
end
// Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
// division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});

View File

@ -33,6 +33,8 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
input logic [P.NF:0] Xm, Ym, // Floating-point significands
input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents
input logic [P.FMTBITS-1:0] FmtE,
input logic [P.NE-2:0] Bias, // Bias of exponent
input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format
input logic SqrtE,
input logic XZeroE,
input logic [2:0] Funct3E,
@ -209,11 +211,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
// Floating-point exponent
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE));
fdivsqrtexpcalc #(P) expcalc(.Bias, .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE));
flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
// Number of FSM cycles (to FSM)
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
fdivsqrtcycles #(P) cyclecalc(.FmtE, .Nf, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
if (P.IDIV_ON_FPU) begin:intpipelineregs
logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;

86
src/fpu/fmtparams.sv Normal file
View File

@ -0,0 +1,86 @@
///////////////////////////////////////////
// fmtparams.sv
//
// Written: David_Harris@hmc.edu
// Modified: 5/11/24
//
// Purpose: Look up bias of exponent and number of fractional bits for the selected format
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module fmtparams import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt,
output logic [P.NE-2:0] Bias,
output logic [P.LOGFLEN-1:0] Nf
);
if (P.FPSIZES == 1) begin
assign Bias = (P.NE-1)'(P.BIAS);
end else if (P.FPSIZES == 2) begin
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
end else if (P.FPSIZES == 3) begin
always_comb
case (Fmt)
P.FMT: Bias = (P.NE-1)'(P.BIAS);
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
default: Bias = 'x;
endcase
end else if (P.FPSIZES == 4) begin
always_comb
case (Fmt)
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
endcase
end
/* verilator lint_off WIDTH */
if (P.FPSIZES == 1)
assign Nf = P.NF;
else if (P.FPSIZES == 2)
always_comb
case (Fmt)
1'b0: Nf = P.NF1;
1'b1: Nf = P.NF;
endcase
else if (P.FPSIZES == 3)
always_comb
case (Fmt)
P.FMT: Nf = P.NF;
P.FMT1: Nf = P.NF1;
P.FMT2: Nf = P.NF2;
default: Nf = 'x; // shouldn't happen
endcase
else if (P.FPSIZES == 4)
always_comb
case(Fmt)
P.S_FMT: Nf = P.S_NF;
P.D_FMT: Nf = P.D_NF;
P.H_FMT: Nf = P.H_NF;
P.Q_FMT: Nf = P.Q_NF;
endcase
/* verilator lint_on WIDTH */
endmodule

View File

@ -70,7 +70,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
// control signals
logic FRegWriteW; // FP register write enable
logic [2:0] FrmM; // FP rounding mode
logic [2:0] FrmE, FrmM; // FP rounding mode
logic [P.FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
logic FDivStartE, IDivStartE; // Start division or squareroot
logic FWriteIntM; // Write to integer register
@ -85,6 +85,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
logic FRegWriteE; // Write floating-point register
logic FPUActiveE; // FP instruction being executed
logic ZfaE, ZfaM; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d)
logic ZfaFRoundNXE; // Zfa froundnx variant
// regfile signals
logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
@ -112,6 +113,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XExpMaxE; // is the exponent all ones (max value)
logic [P.FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
logic [P.NE-2:0] BiasE; // Bias of exponent
logic [P.LOGFLEN-1:0] NfE; // Number of fractional bits
// Fma Signals
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
@ -150,7 +153,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
logic [P.XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
logic [P.FLEN-1:0] PostProcResM; // Postprocessor output
logic [4:0] PostProcFlgM; // Postprocessor flags
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic PreNVE, PreNVM; // selected invalid flag that is ready in the memory stage
logic PreNXE, PreNXM; // selected inexact flag that is ready in the memory stage
logic [P.FLEN-1:0] FpResM, FpResW; // FPU preliminary result
logic [P.FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register
@ -162,9 +166,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
logic mvsgn; // sign bit for extending move
logic [P.FLEN-1:0] FliResE; // Zfa Floating-point load immediate value
logic [P.FLEN-1:0] FRoundE; // Zfa fround output
logic [4:0] FRoundFlagsE; // Zfa fround flags
logic [P.FLEN-1:0] ZfaResE; // Result of Zfa fli or fround instruction
logic FRoundNVE, FRoundNXE; // Zfa fround invalid and inexact flags
//////////////////////////////////////////////////////////////////////////////////////////
// Decode Stage: fctrl decoder, read register file
@ -174,7 +177,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
.IntDivE, .InstrD,
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .ZfaM, .FrmM, .FmtE, .FmtM,
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .ZfaM, .ZfaFRoundNXE, .FrmE, .FrmM, .FmtE, .FmtM,
.FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM,
.IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE,
.FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW,
@ -237,7 +240,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
.XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE),
.YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE));
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE), .Bias(BiasE), .Nf(NfE));
// fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub
fma #(P) fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
@ -246,7 +249,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
// divide and square root: fdiv, fsqrt, optionally integer division
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .BiasE, .NfE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM,
.UmM, .FIntDivResultM);
@ -270,23 +273,26 @@ module fpu import cvw::*; #(parameter cvw_t P) (
.ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE));
// ZFA: fround and floating-point load immediate fli
if (P.ZFA_SUPPORTED) begin
if (P.ZFA_SUPPORTED) begin:Zfa
logic [4:0] Rs1E;
logic [1:0] Fmt2E; // Two-bit format field from instruction
logic [P.FLEN-1:0] FRoundE; // Zfa fround output
logic [P.FLEN-1:0] FliResE; // Zfa Floating-point load immediate value
// fround
fround #(P) fround(.Xs(XsE), .Xe(XeE), .Xm(XmE),
.XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE),
.FRound(FRoundE), .FRoundFlags(FRoundFlagsE));
fround #(P) fround(.X(XE), .Xs(XsE), .Xe(XeE), .Xm(XmE),
.XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE), .Frm(FrmE), .Nf(NfE),
.ZfaFRoundNX(ZfaFRoundNXE),
.FRound(FRoundE), .FRoundNV(FRoundNVE), .FRoundNX(FRoundNXE));
// fli
flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, InstrD[19:15], Rs1E);
flopenrc #(2) Fmt2EReg(clk, reset, FlushE, ~StallE, InstrD[26:25], Fmt2E);
fli #(P) fli(.Rs1(Rs1E), .Fmt(Fmt2E), .Imm(FliResE));
mux2 #(P.FLEN) ZfaResMux(FRoundE, FliResE, OpCtrlE[0], ZfaResE);
end else begin
assign FRoundE = '0;
assign FRoundFlagsE = '0;
assign FliResE = '0;
assign {FRoundNXE, FRoundNVE} = '0;
assign ZfaResE = 'x;
end
// fmv.*.x: NaN Box SrcA to extend integer to requested FP size
@ -311,8 +317,9 @@ module fpu import cvw::*; #(parameter cvw_t P) (
else assign IntSrcE = PreIntSrcE;
// select a result that may be written to the FP register
mux4 #(P.FLEN) FResMux(SgnResE, IntSrcE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
mux4 #(P.FLEN) FResMux(SgnResE, IntSrcE, CmpFpResE, ZfaResE, {OpCtrlE[2], &OpCtrlE[1:0] | (OpCtrlE == 3'b100) & ZfaE}, PreFpResE);
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE) | FRoundNVE & (OpCtrlE == 3'b100) & ZfaE;
assign PreNXE = FRoundNXE & (OpCtrlE == 3'b100);
// fmv.x.*: select the result that may be written to the integer register
if(P.FPSIZES == 1) begin
@ -350,7 +357,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM,
{XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(2) EMRegCmpFlg (clk, reset, FlushM, ~StallM, {PreNVE, PreNXE}, {PreNVM, PreNXM});
flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
{FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
@ -373,8 +380,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
// FPU flag selection - to privileged
//mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
mux2 #(5) FPUFlgMux({PreNVM, 4'b0}, PostProcFlgM, (FResSelM == 2'b01), SetFflagsM);
mux2 #(5) FPUFlgMux({PreNVM, 3'b0, PreNXM}, PostProcFlgM, (FResSelM == 2'b01), SetFflagsM);
mux2 #(P.FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
// M/W pipe registers

View File

@ -28,60 +28,34 @@
////////////////////////////////////////////////////////////////////////////////////////////////
module fround import cvw::*; #(parameter cvw_t P) (
input logic [P.FLEN-1:0] X, // input before unpacking
input logic Xs, // input's sign
input logic [P.NE-1:0] Xe, // input's exponent
input logic [P.NF:0] Xm, // input's fraction
input logic [P.NF:0] Xm, // input's fraction with leading integer bit (U1.NF)
input logic XNaN, // X is NaN
input logic XSNaN, // X is Signalling NaN
input logic XZero, // X is Zero
input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
input logic [2:0] Frm, // rounding mode
input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format
input logic ZfaFRoundNX, // froundnx instruction can set inexact flag
output logic [P.FLEN-1:0] FRound, // Rounded result
output logic [4:0] FRoundFlags // Rounder flags
output logic FRoundNV, // fround invalid
output logic FRoundNX // fround inexact
);
logic [P.NE-2:0] Bias;
logic [P.NE-1:0] E;
logic [P.NF:0] Imask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd;
logic Lnonneg, Lp, Rnonneg, Rp, Tp;
//////////////////////////////////////////
// Determine exponent bias according to the format
//////////////////////////////////////////
// *** replicated from fdivsqrt; find a way to share
if (P.FPSIZES == 1) begin
assign Bias = (P.NE-1)'(P.BIAS);
end else if (P.FPSIZES == 2) begin
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
end else if (P.FPSIZES == 3) begin
always_comb
case (Fmt)
P.FMT: Bias = (P.NE-1)'(P.BIAS);
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
default: Bias = 'x;
endcase
end else if (P.FPSIZES == 4) begin
always_comb
case (Fmt)
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
endcase
end
/*
logic [P.NE-1:0] E, Xep1, EminusNf;
logic [P.NF:0] IMask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd;
logic [P.FLEN-1:0] W, PackedW;
logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf, Exact;
// Unbiased exponent
assign E = Xe - Bias;
assign E = Xe - P.BIAS[P.NE-1:0];
assign Xep1 = Xe + 1;
//////////////////////////////////////////
// Compute LSB L', rounding bit R' and Sticky bit T'
// if (E < 0) // negative exponents round to 0 or 1.
// if (E < 0) // negative exponents round to 0 or 1.
// L' = 0 // LSB = 0
// if (E = -1) R' = 1, TMask = 0.1111...111 // if (E = -1) 0.5  X < 1. Round bit is 1
// else R' = 0; TMask = 1.1111...111 // if (E < -1), X < 0.5. Round bit is 0
@ -100,19 +74,19 @@ module fround import cvw::*; #(parameter cvw_t P) (
//////////////////////////////////////////
// Check if exponent is negative and -1
assign Elt0 = (E < 0);
assign Eeqm1 = (E == -1);
assign Elt0 = E[P.NE-1]; // (E < 0);
assign Eeqm1 = ($signed(E) == -1);
// Logic for nonnegative mask and rounding bits
assign Imask = {1'b1, {P.NF{1'b0}}} >>> E;
assign IMask = {1'b1, {P.NF{1'b0}}} >>> E;
assign Tmasknonneg = ~(IMask >>> 1'b1);
assign HotE = IMask & !(IMask << 1'b1);
assign HotE = IMask & ~(IMask << 1'b1);
assign HotEP1 = HotE >> 1'b1;
assign Lnonneg = |(Xm & HotE);
assign Rnonneg = |(Xm & HotEP1);
assign Trunc = Xm & Imask;
assign Rnd = Trunc + HotE;
assign Trunc = Xm & IMask;
assign {Two, Rnd} = Trunc + HotE; // Two means result is 10.000000 = 2.0
// mux and AND-OR logic to select final rounding bits
mux2 #(1) Lmux(Lnonneg, 1'b0, Elt0, Lp);
mux2 #(1) Rmux(Rnonneg, Eeqm1, Elt0, Rp);
@ -120,7 +94,6 @@ module fround import cvw::*; #(parameter cvw_t P) (
mux2 #(P.NF+1) Tmaskmux(Tmasknonneg, Tmaskneg, Elt0, Tmask);
assign Tp = |(Xm & Tmask);
///////////////////////////
// Rounding, flags, special Cases
// Flags = 0 // unless overridden later
@ -144,11 +117,15 @@ module fround import cvw::*; #(parameter cvw_t P) (
///////////////////////////
// Exact logic
assign Exact = (E >= Nf | XZero); // result will be exact; no need to round
/* verilator lint_off WIDTH */
assign EminusNf = E - Nf;
/* verilator lint_on WIDTH */
assign EgeNf = ~EminusNf[P.NE-1] & (~E[P.NE-1] | E[P.NE-2:0] == '0); // E >= Nf if MSB of E-Nf is 0 and E was positive
assign Exact = (EgeNf | XZero) & ~XNaN; // result will be exact; no need to round
// Rounding logic: determine whether to round up in magnitude
always_comb
case (Rm) // *** make sure this includes dynamic
always_comb begin
case (Frm) // Frm is either specified in the instruction or is the dynamic rounding mode
3'b000: RoundUp = Rp & (Lp | Tp); // RNE
3'b001: RoundUp = 0; // RZ
3'b010: RoundUp = Xs & (Rp | Tp); // RN
@ -157,22 +134,23 @@ module fround import cvw::*; #(parameter cvw_t P) (
default: RoundUp = 0; // should never happen
endcase
// output logic
if (XNaN) W = CanonicalNan; // ***
else if (Exact) W = X;
else if (Elt0)
if (RoundUp) W = {Xs, bias, {P.NF}} // *** format conversions
// If result is not exact, select output in unpacked FLEN format initially
if (XNaN) W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}}; // Canonical NaN
else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1
if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1
else W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0
else begin // |X| > 1 rounds to an integer
if (RoundUp & Two) W = {Xs, Xep1, {(P.NF){1'b0}}}; // Round up to 2.0
else if (RoundUp) W = {Xs, Xe, Rnd[P.NF-1:0]}; // Round up to Rnd
else W = {Xs, Xe, Trunc[P.NF-1:0]}; // Round down to Trunc
end
end
*** may not need to round to infinity; update docs and pseudocode above
always_comb
packoutput #(P) packoutput(W, Fmt, PackedW); // pack and NaN-box based on selected format.
mux2 #(P.FLEN) resultmux(PackedW, X, Exact, FRound);
// Flags
assign Invalid = XSNaN;
assign Inexact = FRoundNX & ~(XNaN | Exact) & (Rp | T');
*/
assign FRound = '0;
assign FRoundFlags = '0;
assign FRoundNV = XSNaN; // invalid if input is signaling NaN
assign FRoundNX = ZfaFRoundNX & ~(XNaN | Exact) & (Rp | Tp); // Inexact if Round or Sticky bit set for FRoundNX instruction
endmodule

101
src/fpu/packoutput.sv Normal file
View File

@ -0,0 +1,101 @@
///////////////////////////////////////////
// packoutput.sv
//
// Written: David_Harris@hmc.edu
// Modified: 5/11/24
//
// Purpose: Pack the output of the FPU
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
// https://github.com/openhwgroup/cvw
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module packoutput import cvw::*; #(parameter cvw_t P) (
input logic [P.FLEN-1:0] Unpacked,
input logic [P.FMTBITS-1:0] Fmt,
output logic [P.FLEN-1:0] Packed
);
logic Sign;
logic [P.NE1-1:0] Exp1;
logic [P.NF1-1:0] Fract1;
logic [P.NE2-1:0] Exp2;
logic [P.NF2-1:0] Fract2;
logic [P.H_NE-1:0] Exp3;
logic [P.H_NF-1:0] Fract3;
// Pack exponent and fraction, with NaN-boxing to full FLEN
assign Sign = Unpacked[P.FLEN-1];
if (P.FPSIZES == 1) begin
assign Packed = Unpacked;
end else if (P.FPSIZES == 2) begin
int NF = P.NF;
int NE1 = P.NE1;
int top = P.NF + P.NE1-2;
int bot = P.NF - P.NF1;
always_comb
case (Fmt)
1'b1: Packed = Unpacked;
1'b0: begin
Exp1 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE1-2:P.NF]};
Fract1 = Unpacked[P.NF-1:P.NF-P.NF1];
Packed = {{(P.FLEN-P.LEN1){1'b1}}, Sign, Exp1, Fract1};
end
endcase
end else if (P.FPSIZES == 3) begin
always_comb
case (Fmt)
P.FMT: Packed = Unpacked;
P.FMT1: begin
Exp1 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE1-2:P.NF]};
Fract1 = Unpacked[P.NF-1:P.NF-P.NF1];
Packed = {{(P.FLEN-P.LEN1){1'b1}}, Sign, Exp1, Fract1};
end
P.FMT2: begin
Exp2 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE2-2:P.NF]};
Fract2 = Unpacked[P.NF-1:P.NF-P.NF2];
Packed = {{(P.FLEN-P.LEN2){1'b1}}, Sign, Exp2, Fract2};
end
default: Packed = 'x;
endcase
end else if (P.FPSIZES == 4) begin
always_comb
case (Fmt)
2'h3: Packed = Unpacked; // Quad
2'h1: begin // double
Exp1 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE1-2:P.NF]};
Fract1 = Unpacked[P.NF-1:P.NF-P.NF1];
Packed = {{(P.FLEN-P.LEN1){1'b1}}, Sign, Exp1, Fract1};
end
2'h0: begin // float
Exp2 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE2-2:P.NF]};
Fract2 = Unpacked[P.NF-1:P.NF-P.NF2];
Packed = {{(P.FLEN-P.LEN2){1'b1}}, Sign, Exp2, Fract2};
end
2'h2: begin // half
Exp3 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.H_NE-2:P.NF]};
Fract3 = Unpacked[P.NF-1:P.NF-P.H_NF];
Packed = {{(P.FLEN-P.H_LEN){1'b1}}, Sign, Exp3, Fract3};
end
endcase
end
endmodule

View File

@ -41,13 +41,15 @@ module unpack import cvw::*; #(parameter cvw_t P) (
output logic XZero, YZero, ZZero, // is XYZ zero
output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMax, // does X have the maximum exponent (NaN or Inf)
output logic [P.FLEN-1:0] XPostBox // X after being properly NaN-boxed
output logic [P.FLEN-1:0] XPostBox, // X after being properly NaN-boxed
output logic [P.NE-2:0] Bias, // Exponent bias
output logic [P.LOGFLEN-1:0] Nf // Number of fractional bits
);
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic YExpMax, ZExpMax; // is the exponent all 1s
unpackinput #(P) unpackinputX (.A(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), .FPUActive,
.NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
.Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero),
@ -63,4 +65,7 @@ module unpack import cvw::*; #(parameter cvw_t P) (
.Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero),
.Subnorm(), .PostBox());
// look up bias and fractional bits for the given format
fmtparams #(P) fmtparams(Fmt, Bias, Nf);
endmodule

View File

@ -1641,7 +1641,7 @@ string imperas32f[] = '{
string arch64d[] = '{
`RISCVARCHTEST,
// for speed
"rv64i_m/D/src/fadd.d_b10-01.S",
"rv64i_m/D/src/fadd.d_b10-01.S",
"rv64i_m/D/src/fadd.d_b1-01.S",
"rv64i_m/D/src/fadd.d_b11-01.S",
"rv64i_m/D/src/fadd.d_b12-01.S",
@ -2278,6 +2278,7 @@ string arch64zknh[] = '{
string arch32zfaf[] = '{
//`RISCVARCHTEST,
`WALLYTEST,
"rv32i_m/F_Zfa/src/fround_b1-01.S",
"rv32i_m/F_Zfa/src/fleq_b1-01.S",
"rv32i_m/F_Zfa/src/fleq_b19-01.S",
"rv32i_m/F_Zfa/src/fli.s-01.S",
@ -2289,12 +2290,12 @@ string arch64zknh[] = '{
"rv32i_m/F_Zfa/src/fminm_b19-01.S",
"rv32i_m/F_Zfa/src/fmaxm_b1-01.S",
"rv32i_m/F_Zfa/src/fmaxm_b19-01.S"
/* "rv32i_m/F_Zfa/src/fround_b1-01.S" */
};
string arch32zfad[] = '{
//`RISCVARCHTEST,
`WALLYTEST,
"rv32i_m/D_Zfa/src/fround_b1-01.S",
"rv32i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S",
"rv32i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S",
"rv32i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S",
@ -2326,12 +2327,12 @@ string arch64zknh[] = '{
"rv32i_m/D_Zfa/src/fmvh.x.d_b27-01.S",
"rv32i_m/D_Zfa/src/fmvh.x.d_b28-01.S",
"rv32i_m/D_Zfa/src/fmvh.x.d_b29-01.S"
/* "rv32i_m/D_Zfa/src/fround_b1-01.S" */
};
string arch64zfaf[] = '{
//`RISCVARCHTEST,
`WALLYTEST,
"rv64i_m/F_Zfa/src/fround_b1-01.S",
"rv64i_m/F_Zfa/src/fleq_b1-01.S",
"rv64i_m/F_Zfa/src/fleq_b19-01.S",
"rv64i_m/F_Zfa/src/fli.s-01.S",
@ -2341,12 +2342,12 @@ string arch64zknh[] = '{
"rv64i_m/F_Zfa/src/fminm_b19-01.S",
"rv64i_m/F_Zfa/src/fmaxm_b1-01.S",
"rv64i_m/F_Zfa/src/fmaxm_b19-01.S"
/* "rv64i_m/F_Zfa/src/fround_b1-01.S" */
};
string arch64zfad[] = '{
//`RISCVARCHTEST,
`WALLYTEST,
"rv64i_m/D_Zfa/src/fround_b1-01.S",
"rv64i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S",
"rv64i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S",
"rv64i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S",
@ -2363,7 +2364,7 @@ string arch64zknh[] = '{
"rv64i_m/D_Zfa/src/fminm_b19-01.S",
"rv64i_m/D_Zfa/src/fmaxm_b1-01.S",
"rv64i_m/D_Zfa/src/fmaxm_b19-01.S"
/* "rv64i_m/D_Zfa/src/fround_b1-01.S" */
};
string arch32d_fma[] = '{