mirror of
https://github.com/openhwgroup/cvw
synced 2025-01-23 13:04:28 +00:00
Fixed cvtint bug by adding 2 bits to convert width; initial implementation of fround passes basic regression but fails some nightly regression cases
This commit is contained in:
parent
c0743a1fcf
commit
009d251433
@ -125,19 +125,19 @@ derivconfigtests = [
|
||||
["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]],
|
||||
|
||||
# fpu permutations
|
||||
["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]],
|
||||
["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]],
|
||||
["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]],
|
||||
["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]],
|
||||
["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]],
|
||||
["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]],
|
||||
["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]],
|
||||
["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]],
|
||||
["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]],
|
||||
["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]],
|
||||
["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfaf"]],
|
||||
["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32zfaf"]],
|
||||
["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32zfaf", "arch32zfad"]],
|
||||
["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i", "arch32zfaf", "arch32zfad"]],
|
||||
["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i", "arch32zfaf", "arch32zfad"]],
|
||||
["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfaf"]],
|
||||
["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64zfaf"]],
|
||||
["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64zfaf", "arch64zfad"]],
|
||||
["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i", "arch64zfaf", "arch64zfad"]],
|
||||
["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q", "arch64zfaf", "arch64zfad"]],
|
||||
]
|
||||
|
||||
bpredtests = [
|
||||
|
@ -75,6 +75,7 @@ localparam NE = Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE;
|
||||
localparam NF = Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF;
|
||||
localparam FMT = Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0;
|
||||
localparam BIAS = Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS;
|
||||
localparam LOGFLEN = $clog2(FLEN);
|
||||
|
||||
// Floating point constants needed for FPU paramerterization
|
||||
// LEN1/NE1/NF1/FNT1 is the size of the second longest supported format
|
||||
@ -124,7 +125,8 @@ localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
|
||||
// because NORMSHIFTSZ becomes limited by convert rather than divider
|
||||
// Figure out why extra two bits are needed for convert (and only in testbench_fp, not Wally)
|
||||
// Might be a testbench_fp issue
|
||||
localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (3*NF+6));
|
||||
//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (3*NF+6));
|
||||
localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (3*NF+6));
|
||||
|
||||
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ)
|
||||
localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Drop leading 2 integer bits
|
||||
|
@ -173,6 +173,7 @@ localparam cvw_t P = '{
|
||||
H_BIAS : H_BIAS,
|
||||
H_FMT : H_FMT,
|
||||
FLEN : FLEN,
|
||||
LOGFLEN : LOGFLEN,
|
||||
NE : NE ,
|
||||
NF : NF ,
|
||||
FMT : FMT ,
|
||||
|
@ -260,7 +260,8 @@ typedef struct packed {
|
||||
logic [1:0] H_FMT;
|
||||
|
||||
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
|
||||
int FLEN;
|
||||
int FLEN;
|
||||
int LOGFLEN;
|
||||
int NE ;
|
||||
int NF ;
|
||||
logic [1:0] FMT ;
|
||||
|
197
src/fpu/fctrl.sv
197
src/fpu/fctrl.sv
@ -48,7 +48,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
output logic XEnE, YEnE, ZEnE, // enable inputs
|
||||
// operation mux selections
|
||||
output logic FCvtIntE, FCvtIntW, // convert to integer operation
|
||||
output logic [2:0] FrmM, // FP rounding mode
|
||||
output logic [2:0] FrmE, FrmM, // FP rounding mode
|
||||
output logic [P.FMTBITS-1:0] FmtE, FmtM, // FP format
|
||||
output logic [2:0] OpCtrlE, OpCtrlM, // Select which operation to do in each component
|
||||
output logic FpLoadStoreM, // FP load or store instruction
|
||||
@ -56,6 +56,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage
|
||||
output logic FPUActiveE, // FP instruction being executed
|
||||
output logic ZfaE, ZfaM, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod)
|
||||
output logic ZfaFRoundNXE, // Zfa froundnx instruction
|
||||
// register control signals
|
||||
output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable
|
||||
output logic FWriteIntE, FWriteIntM, // Write to integer register
|
||||
@ -66,7 +67,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
output logic FDivStartE, IDivStartE // Start division or squareroot
|
||||
);
|
||||
|
||||
`define FCTRLW 13
|
||||
`define FCTRLW 14
|
||||
|
||||
logic [`FCTRLW-1:0] ControlsD; // control signals
|
||||
logic FRegWriteD; // FP register write enable
|
||||
@ -75,13 +76,14 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
logic [2:0] OpCtrlD; // Select which operation to do in each component
|
||||
logic [1:0] PostProcSelD; // select result in the post processing unit
|
||||
logic [1:0] FResSelD; // Select one of the results that finish in the memory stage
|
||||
logic [2:0] FrmD, FrmE; // FP rounding mode
|
||||
logic [2:0] FrmD; // FP rounding mode
|
||||
logic [P.FMTBITS-1:0] FmtD; // FP format
|
||||
logic [1:0] Fmt, Fmt2; // format - before possible reduction
|
||||
logic SupportedFmt; // is the format supported
|
||||
logic SupportedFmt2; // is the source format supported for fp -> fp
|
||||
logic FCvtIntD, FCvtIntM; // convert to integer operation
|
||||
logic ZfaD; // Zfa variants of instructions
|
||||
logic ZfaFRoundNXD; // Zfa froundnx instruction
|
||||
|
||||
// FPU Instruction Decoder
|
||||
assign Fmt = Funct7D[1:0];
|
||||
@ -93,156 +95,156 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
(Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED));
|
||||
|
||||
// decode the instruction
|
||||
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt_Zfa
|
||||
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt_Zfa_FroundNX
|
||||
always_comb
|
||||
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
|
||||
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0;
|
||||
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0_0;
|
||||
else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt)
|
||||
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // for anything other than loads and stores, check for supported format
|
||||
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0_0; // for anything other than loads and stores, check for supported format
|
||||
else begin
|
||||
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // default: non-implemented instruction
|
||||
ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0_0; // default: non-implemented instruction
|
||||
/* verilator lint_off CASEINCOMPLETE */ // default value above has priority so no other default needed
|
||||
case(OpD)
|
||||
7'b0000111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flw
|
||||
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // fld
|
||||
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flq
|
||||
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flh
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // flw
|
||||
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // fld
|
||||
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // flq
|
||||
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // flh
|
||||
endcase
|
||||
7'b0100111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsw
|
||||
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsd
|
||||
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsq
|
||||
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsh
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsw
|
||||
3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsd
|
||||
3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsq
|
||||
3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsh
|
||||
endcase
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0_0; // fmsub
|
||||
7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0_0; // fnmsub
|
||||
7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0_0; // fnmadd
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0_0_0; // fmsub
|
||||
7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0_0_0; // fnmsub
|
||||
7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0_0_0; // fnmadd
|
||||
7'b1010011: casez(Funct7D)
|
||||
7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0_0; // fadd
|
||||
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0_0; // fdiv
|
||||
7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0_0; // fsqrt
|
||||
7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0_0_0; // fadd
|
||||
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0_0_0; // fdiv
|
||||
7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0_0_0; // fsqrt
|
||||
7'b00100??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0_0; // fsgnjn
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0_0; // fsgnjx
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0_0_0; // fsgnjn
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0_0_0; // fsgnjx
|
||||
endcase
|
||||
7'b00101??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_0; // fmin
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_0; // fmax
|
||||
3'b010: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_1; // fminm (Zfa)
|
||||
3'b011: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_1; // fmaxm (Zfa)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_0_0; // fmin
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_0_0; // fmax
|
||||
3'b010: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_1_0; // fminm (Zfa)
|
||||
3'b011: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_1_0; // fmaxm (Zfa)
|
||||
endcase
|
||||
7'b10100??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_0; // fle
|
||||
3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_0; // flt
|
||||
3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0_0; // feq
|
||||
3'b100: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_1; // fleq (Zfa)
|
||||
3'b101: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_1; // fltq (Zfa)
|
||||
3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_0_0; // fle
|
||||
3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_0_0; // flt
|
||||
3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0_0_0; // feq
|
||||
3'b100: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_1_0; // fleq (Zfa)
|
||||
3'b101: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_1_0; // fltq (Zfa)
|
||||
endcase
|
||||
7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)
|
||||
ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0_0; // fclass
|
||||
ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0_0_0; // fclass
|
||||
else if (Funct3D == 3'b000 & Rs2D == 5'b00000)
|
||||
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register
|
||||
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0_0; // fmv.x.w/d/h/q fp to int register
|
||||
else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001)
|
||||
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.d (Zfa)
|
||||
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1_0; // fmvh.x.d (Zfa)
|
||||
// Q not supported in RV64GC
|
||||
// coverage off
|
||||
else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001)
|
||||
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.q (Zfa)
|
||||
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1_0; // fmvh.x.q (Zfa)
|
||||
// coverage on
|
||||
7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000)
|
||||
ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0; // fmv.w/d/h/q.x int to fp reg
|
||||
ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0_0; // fmv.w/d/h/q.x int to fp reg
|
||||
else if (P.ZFA_SUPPORTED & Funct3D == 3'b000 & Rs2D == 5'b00001)
|
||||
ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0_1; // fli (Zfa)
|
||||
ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0_1_0; // fli (Zfa)
|
||||
7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_0; // fcvt.s.(d/q/h)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_0_0; // fcvt.s.(d/q/h)
|
||||
else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.s (Zfa) *** needs ctrl for all rounds
|
||||
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.s (Zfa)
|
||||
else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.s (Zfa) *** needs ctrl for all rounds
|
||||
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.s (Zfa)
|
||||
7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0; // fcvt.d.(s/h/q)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0_0; // fcvt.d.(s/h/q)
|
||||
else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.d (Zfa)
|
||||
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.d (Zfa)
|
||||
else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.d (Zfa)
|
||||
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.d (Zfa)
|
||||
7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0; // fcvt.h.(s/d/q)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0_0; // fcvt.h.(s/d/q)
|
||||
else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.h (Zfa)
|
||||
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.h (Zfa)
|
||||
else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.h (Zfa)
|
||||
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.h (Zfa)
|
||||
// coverage off
|
||||
// Not covered in testing because rv64gc does not support quad precision
|
||||
7'b0100011: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b11)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0_0; // fcvt.q.(s/h/d)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0_0_0; // fcvt.q.(s/h/d)
|
||||
else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.q (Zfa)
|
||||
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.q (Zfa)
|
||||
else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.q (Zfa)
|
||||
ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.q (Zfa)
|
||||
// coverage on
|
||||
7'b1101000: case(Rs2D)
|
||||
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.s.w w->s
|
||||
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.s.wu wu->s
|
||||
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.s.l l->s
|
||||
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.s.lu lu->s
|
||||
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.s.w w->s
|
||||
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.s.wu wu->s
|
||||
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.s.l l->s
|
||||
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.s.lu lu->s
|
||||
endcase
|
||||
7'b1100000: case(Rs2D)
|
||||
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.s s->w
|
||||
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.s s->wu
|
||||
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.s s->l
|
||||
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.s s->lu
|
||||
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.s s->w
|
||||
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.s s->wu
|
||||
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.s s->l
|
||||
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.s s->lu
|
||||
endcase
|
||||
7'b1101001: case(Rs2D)
|
||||
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.d.w w->d
|
||||
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.d.wu wu->d
|
||||
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.d.l l->d
|
||||
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.d.lu lu->d
|
||||
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.d.w w->d
|
||||
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.d.wu wu->d
|
||||
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.d.l l->d
|
||||
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.d.lu lu->d
|
||||
endcase
|
||||
7'b1100001: case(Rs2D)
|
||||
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.d d->w
|
||||
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.d d->wu
|
||||
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.d d->l
|
||||
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.d d->lu
|
||||
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.d d->w
|
||||
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.d d->wu
|
||||
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.d d->l
|
||||
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.d d->lu
|
||||
5'b01000: if (P.ZFA_SUPPORTED & P.D_SUPPORTED & Funct3D == 3'b001)
|
||||
ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_1; // fcvtmod.w.d (Zfa)
|
||||
ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_1_0; // fcvtmod.w.d (Zfa)
|
||||
endcase
|
||||
7'b1101010: case(Rs2D)
|
||||
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.h.w w->h
|
||||
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.h.wu wu->h
|
||||
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.h.l l->h
|
||||
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.h.lu lu->h
|
||||
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.h.w w->h
|
||||
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.h.wu wu->h
|
||||
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.h.l l->h
|
||||
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.h.lu lu->h
|
||||
endcase
|
||||
7'b1100010: case(Rs2D)
|
||||
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.h h->w
|
||||
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.h h->wu
|
||||
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.h h->l
|
||||
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.h h->lu
|
||||
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.h h->w
|
||||
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.h h->wu
|
||||
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.h h->l
|
||||
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.h h->lu
|
||||
endcase
|
||||
// Not covered in testing because rv64gc does not support quad precision
|
||||
// coverage off
|
||||
7'b1101011: case(Rs2D)
|
||||
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.q.w w->q
|
||||
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.q.wu wu->q
|
||||
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.q.l l->q
|
||||
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.q.lu lu->q
|
||||
5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.q.w w->q
|
||||
5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.q.wu wu->q
|
||||
5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.q.l l->q
|
||||
5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.q.lu lu->q
|
||||
endcase
|
||||
7'b1100011: case(Rs2D)
|
||||
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.q q->w
|
||||
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.q q->wu
|
||||
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.q q->l
|
||||
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.q q->lu
|
||||
5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.q q->w
|
||||
5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.q q->wu
|
||||
5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.q q->l
|
||||
5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.q q->lu
|
||||
endcase
|
||||
// coverage off
|
||||
// Not covered in testing because rv64gc is not RV64Q or RV32D
|
||||
7'b1011001: if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct3D == 3'b000)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.d.x (Zfa) *** untested, controls could be wrong
|
||||
ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fmvp.d.x (Zfa) *** untested, controls could be wrong
|
||||
7'b1011011: if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct3D == 3'b000)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.q.x (Zfa)
|
||||
ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fmvp.q.x (Zfa)
|
||||
// coverage on
|
||||
endcase
|
||||
endcase
|
||||
@ -250,7 +252,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
/* verilator lint_on CASEINCOMPLETE */
|
||||
|
||||
// unswizzle control bits
|
||||
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD, ZfaD} = ControlsD;
|
||||
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD, ZfaD, ZfaFRoundNXD} = ControlsD;
|
||||
|
||||
// rounding modes:
|
||||
// 000 - round to nearest, ties to even
|
||||
@ -259,7 +261,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
// 011 - round up - round twords positive infinity
|
||||
// 100 - round to nearest, ties to max magnitude - round to nearest, ties away from zero
|
||||
// 111 - dynamic - choose FRM_REGW as rounding mode
|
||||
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
|
||||
assign FrmD = (Funct3D == 3'b111) ? FRM_REGW : Funct3D;
|
||||
|
||||
// Precision
|
||||
// 00 - single
|
||||
@ -269,7 +271,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
if (P.FPSIZES == 1)
|
||||
assign FmtD = 1'b0;
|
||||
else if (P.FPSIZES == 2)begin
|
||||
else if (P.FPSIZES == 2) begin
|
||||
logic [1:0] FmtTmp;
|
||||
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
|
||||
assign FmtD = (P.FMT == FmtTmp);
|
||||
@ -313,6 +315,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
// 011 - mv to fp 01
|
||||
// 110 - min 10
|
||||
// 101 - max 10
|
||||
// 100 - fround 11
|
||||
// 111 - fli 11
|
||||
|
||||
// OpCtrl:
|
||||
@ -350,9 +353,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
assign Adr3D = InstrD[31:27];
|
||||
|
||||
// D/E pipleine register
|
||||
flopenrc #(15+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ZfaD, ~IllegalFPUInstrD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE, FPUActiveE});
|
||||
flopenrc #(`FCTRLW+2+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE,
|
||||
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ZfaD, ZfaFRoundNXD, ~IllegalFPUInstrD},
|
||||
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE, ZfaFRoundNXE, FPUActiveE});
|
||||
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
|
||||
flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE});
|
||||
@ -365,7 +368,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
flopenrc #(14+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
|
||||
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE},
|
||||
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM, ZfaM});
|
||||
|
||||
|
||||
// renameing for readability
|
||||
assign FpLoadStoreM = FResSelM[1];
|
||||
|
||||
@ -373,5 +376,5 @@ module fctrl import cvw::*; #(parameter cvw_t P) (
|
||||
flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FRegWriteM, FResSelM, FCvtIntM},
|
||||
{FRegWriteW, FResSelW, FCvtIntW});
|
||||
|
||||
|
||||
endmodule
|
||||
|
@ -37,6 +37,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic [P.NE-2:0] BiasE, // Bias of exponent
|
||||
input logic [P.LOGFLEN-1:0] NfE, // Number of fractional bits in selected format
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
@ -75,7 +77,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
|
||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||
.FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
|
||||
.FmtE, .Bias(BiasE), .Nf(NfE), .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
|
||||
// Int-specific
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
|
||||
.BZeroM, .IntNormShiftM, .AM,
|
||||
|
@ -29,39 +29,14 @@
|
||||
|
||||
module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format
|
||||
input logic SqrtE,
|
||||
input logic IntDivE,
|
||||
input logic [P.DIVBLEN-1:0] IntResultBitsE,
|
||||
output logic [P.DURLEN-1:0] CyclesE
|
||||
);
|
||||
|
||||
logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
if (P.FPSIZES == 1)
|
||||
assign Nf = P.NF;
|
||||
else if (P.FPSIZES == 2)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
1'b0: Nf = P.NF1;
|
||||
1'b1: Nf = P.NF;
|
||||
endcase
|
||||
else if (P.FPSIZES == 3)
|
||||
always_comb
|
||||
case (FmtE)
|
||||
P.FMT: Nf = P.NF;
|
||||
P.FMT1: Nf = P.NF1;
|
||||
P.FMT2: Nf = P.NF2;
|
||||
default: Nf = 'x; // shouldn't happen
|
||||
endcase
|
||||
else if (P.FPSIZES == 4)
|
||||
always_comb
|
||||
case(FmtE)
|
||||
P.S_FMT: Nf = P.S_NF;
|
||||
P.D_FMT: Nf = P.D_NF;
|
||||
P.H_FMT: Nf = P.H_NF;
|
||||
P.Q_FMT: Nf = P.Q_NF;
|
||||
endcase
|
||||
logic [P.DIVBLEN-1:0] FPResultBitsE, ResultBitsE; // number of fractional (result) bits
|
||||
|
||||
// Cycle logic
|
||||
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
|
||||
@ -70,6 +45,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
||||
// FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle.
|
||||
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
always_comb begin
|
||||
FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1
|
||||
|
||||
|
@ -28,49 +28,21 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] Fmt,
|
||||
input logic [P.NE-2:0] Bias, // Bias of exponent
|
||||
input logic [P.NE-1:0] Xe, Ye, // input exponents
|
||||
input logic Sqrt,
|
||||
input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye
|
||||
output logic [P.NE+1:0] Ue // result exponent
|
||||
);
|
||||
|
||||
logic [P.NE-2:0] Bias;
|
||||
|
||||
logic [P.NE+1:0] SXExp;
|
||||
logic [P.NE+1:0] SExp;
|
||||
logic [P.NE+1:0] DExp;
|
||||
|
||||
// Determine exponent bias according to the format
|
||||
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign Bias = (P.NE-1)'(P.BIAS);
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
P.FMT: Bias = (P.NE-1)'(P.BIAS);
|
||||
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
|
||||
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
|
||||
default: Bias = 'x;
|
||||
endcase
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
|
||||
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
|
||||
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
|
||||
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
|
||||
endcase
|
||||
end
|
||||
|
||||
// Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms
|
||||
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
|
||||
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
|
||||
|
||||
|
||||
// division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms
|
||||
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
||||
|
||||
|
@ -33,6 +33,8 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NF:0] Xm, Ym, // Floating-point significands
|
||||
input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic [P.NE-2:0] Bias, // Bias of exponent
|
||||
input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format
|
||||
input logic SqrtE,
|
||||
input logic XZeroE,
|
||||
input logic [2:0] Funct3E,
|
||||
@ -209,11 +211,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
|
||||
|
||||
// Floating-point exponent
|
||||
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE));
|
||||
fdivsqrtexpcalc #(P) expcalc(.Bias, .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE));
|
||||
flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
|
||||
|
||||
// Number of FSM cycles (to FSM)
|
||||
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
|
||||
fdivsqrtcycles #(P) cyclecalc(.FmtE, .Nf, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
|
||||
|
||||
if (P.IDIV_ON_FPU) begin:intpipelineregs
|
||||
logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
|
||||
|
86
src/fpu/fmtparams.sv
Normal file
86
src/fpu/fmtparams.sv
Normal file
@ -0,0 +1,86 @@
|
||||
|
||||
///////////////////////////////////////////
|
||||
// fmtparams.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu
|
||||
// Modified: 5/11/24
|
||||
//
|
||||
// Purpose: Look up bias of exponent and number of fractional bits for the selected format
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fmtparams import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] Fmt,
|
||||
output logic [P.NE-2:0] Bias,
|
||||
output logic [P.LOGFLEN-1:0] Nf
|
||||
);
|
||||
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign Bias = (P.NE-1)'(P.BIAS);
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
P.FMT: Bias = (P.NE-1)'(P.BIAS);
|
||||
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
|
||||
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
|
||||
default: Bias = 'x;
|
||||
endcase
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
|
||||
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
|
||||
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
|
||||
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
|
||||
endcase
|
||||
end
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
if (P.FPSIZES == 1)
|
||||
assign Nf = P.NF;
|
||||
else if (P.FPSIZES == 2)
|
||||
always_comb
|
||||
case (Fmt)
|
||||
1'b0: Nf = P.NF1;
|
||||
1'b1: Nf = P.NF;
|
||||
endcase
|
||||
else if (P.FPSIZES == 3)
|
||||
always_comb
|
||||
case (Fmt)
|
||||
P.FMT: Nf = P.NF;
|
||||
P.FMT1: Nf = P.NF1;
|
||||
P.FMT2: Nf = P.NF2;
|
||||
default: Nf = 'x; // shouldn't happen
|
||||
endcase
|
||||
else if (P.FPSIZES == 4)
|
||||
always_comb
|
||||
case(Fmt)
|
||||
P.S_FMT: Nf = P.S_NF;
|
||||
P.D_FMT: Nf = P.D_NF;
|
||||
P.H_FMT: Nf = P.H_NF;
|
||||
P.Q_FMT: Nf = P.Q_NF;
|
||||
endcase
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
endmodule
|
@ -70,7 +70,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
// control signals
|
||||
logic FRegWriteW; // FP register write enable
|
||||
logic [2:0] FrmM; // FP rounding mode
|
||||
logic [2:0] FrmE, FrmM; // FP rounding mode
|
||||
logic [P.FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
|
||||
logic FDivStartE, IDivStartE; // Start division or squareroot
|
||||
logic FWriteIntM; // Write to integer register
|
||||
@ -85,6 +85,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
logic FRegWriteE; // Write floating-point register
|
||||
logic FPUActiveE; // FP instruction being executed
|
||||
logic ZfaE, ZfaM; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d)
|
||||
logic ZfaFRoundNXE; // Zfa froundnx variant
|
||||
|
||||
// regfile signals
|
||||
logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
@ -112,6 +113,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
|
||||
logic XExpMaxE; // is the exponent all ones (max value)
|
||||
logic [P.FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations
|
||||
logic [P.NE-2:0] BiasE; // Bias of exponent
|
||||
logic [P.LOGFLEN-1:0] NfE; // Number of fractional bits
|
||||
|
||||
// Fma Signals
|
||||
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
|
||||
@ -150,7 +153,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
logic [P.XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
|
||||
logic [P.FLEN-1:0] PostProcResM; // Postprocessor output
|
||||
logic [4:0] PostProcFlgM; // Postprocessor flags
|
||||
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
|
||||
logic PreNVE, PreNVM; // selected invalid flag that is ready in the memory stage
|
||||
logic PreNXE, PreNXM; // selected inexact flag that is ready in the memory stage
|
||||
logic [P.FLEN-1:0] FpResM, FpResW; // FPU preliminary result
|
||||
logic [P.FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
|
||||
logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register
|
||||
@ -162,9 +166,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
|
||||
logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
|
||||
logic mvsgn; // sign bit for extending move
|
||||
logic [P.FLEN-1:0] FliResE; // Zfa Floating-point load immediate value
|
||||
logic [P.FLEN-1:0] FRoundE; // Zfa fround output
|
||||
logic [4:0] FRoundFlagsE; // Zfa fround flags
|
||||
logic [P.FLEN-1:0] ZfaResE; // Result of Zfa fli or fround instruction
|
||||
logic FRoundNVE, FRoundNXE; // Zfa fround invalid and inexact flags
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Decode Stage: fctrl decoder, read register file
|
||||
@ -174,7 +177,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
|
||||
.IntDivE, .InstrD,
|
||||
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
|
||||
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .ZfaM, .FrmM, .FmtE, .FmtM,
|
||||
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .ZfaM, .ZfaFRoundNXE, .FrmE, .FrmM, .FmtE, .FmtM,
|
||||
.FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM,
|
||||
.IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE,
|
||||
.FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW,
|
||||
@ -237,7 +240,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
.XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE),
|
||||
.YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE),
|
||||
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
|
||||
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE));
|
||||
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE), .Bias(BiasE), .Nf(NfE));
|
||||
|
||||
// fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub
|
||||
fma #(P) fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
|
||||
@ -246,7 +249,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
// divide and square root: fdiv, fsqrt, optionally integer division
|
||||
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .BiasE, .NfE, .FDivStartE, .IDivStartE, .XsE,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
|
||||
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM,
|
||||
.UmM, .FIntDivResultM);
|
||||
@ -270,23 +273,26 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
.ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE));
|
||||
|
||||
// ZFA: fround and floating-point load immediate fli
|
||||
if (P.ZFA_SUPPORTED) begin
|
||||
if (P.ZFA_SUPPORTED) begin:Zfa
|
||||
logic [4:0] Rs1E;
|
||||
logic [1:0] Fmt2E; // Two-bit format field from instruction
|
||||
logic [P.FLEN-1:0] FRoundE; // Zfa fround output
|
||||
logic [P.FLEN-1:0] FliResE; // Zfa Floating-point load immediate value
|
||||
|
||||
// fround
|
||||
fround #(P) fround(.Xs(XsE), .Xe(XeE), .Xm(XmE),
|
||||
.XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE),
|
||||
.FRound(FRoundE), .FRoundFlags(FRoundFlagsE));
|
||||
fround #(P) fround(.X(XE), .Xs(XsE), .Xe(XeE), .Xm(XmE),
|
||||
.XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE), .Frm(FrmE), .Nf(NfE),
|
||||
.ZfaFRoundNX(ZfaFRoundNXE),
|
||||
.FRound(FRoundE), .FRoundNV(FRoundNVE), .FRoundNX(FRoundNXE));
|
||||
|
||||
// fli
|
||||
flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, InstrD[19:15], Rs1E);
|
||||
flopenrc #(2) Fmt2EReg(clk, reset, FlushE, ~StallE, InstrD[26:25], Fmt2E);
|
||||
fli #(P) fli(.Rs1(Rs1E), .Fmt(Fmt2E), .Imm(FliResE));
|
||||
mux2 #(P.FLEN) ZfaResMux(FRoundE, FliResE, OpCtrlE[0], ZfaResE);
|
||||
end else begin
|
||||
assign FRoundE = '0;
|
||||
assign FRoundFlagsE = '0;
|
||||
assign FliResE = '0;
|
||||
assign {FRoundNXE, FRoundNVE} = '0;
|
||||
assign ZfaResE = 'x;
|
||||
end
|
||||
|
||||
// fmv.*.x: NaN Box SrcA to extend integer to requested FP size
|
||||
@ -311,8 +317,9 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
else assign IntSrcE = PreIntSrcE;
|
||||
|
||||
// select a result that may be written to the FP register
|
||||
mux4 #(P.FLEN) FResMux(SgnResE, IntSrcE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
|
||||
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
|
||||
mux4 #(P.FLEN) FResMux(SgnResE, IntSrcE, CmpFpResE, ZfaResE, {OpCtrlE[2], &OpCtrlE[1:0] | (OpCtrlE == 3'b100) & ZfaE}, PreFpResE);
|
||||
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE) | FRoundNVE & (OpCtrlE == 3'b100) & ZfaE;
|
||||
assign PreNXE = FRoundNXE & (OpCtrlE == 3'b100);
|
||||
|
||||
// fmv.x.*: select the result that may be written to the integer register
|
||||
if(P.FPSIZES == 1) begin
|
||||
@ -350,7 +357,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM,
|
||||
{XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
|
||||
{XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
|
||||
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
|
||||
flopenrc #(2) EMRegCmpFlg (clk, reset, FlushM, ~StallM, {PreNVE, PreNXE}, {PreNVM, PreNXM});
|
||||
flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
|
||||
flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
|
||||
@ -373,8 +380,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
|
||||
|
||||
// FPU flag selection - to privileged
|
||||
//mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
|
||||
mux2 #(5) FPUFlgMux({PreNVM, 4'b0}, PostProcFlgM, (FResSelM == 2'b01), SetFflagsM);
|
||||
mux2 #(5) FPUFlgMux({PreNVM, 3'b0, PreNXM}, PostProcFlgM, (FResSelM == 2'b01), SetFflagsM);
|
||||
mux2 #(P.FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
|
||||
|
||||
// M/W pipe registers
|
||||
|
@ -28,60 +28,34 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fround import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FLEN-1:0] X, // input before unpacking
|
||||
input logic Xs, // input's sign
|
||||
input logic [P.NE-1:0] Xe, // input's exponent
|
||||
input logic [P.NF:0] Xm, // input's fraction
|
||||
input logic [P.NF:0] Xm, // input's fraction with leading integer bit (U1.NF)
|
||||
input logic XNaN, // X is NaN
|
||||
input logic XSNaN, // X is Signalling NaN
|
||||
input logic XZero, // X is Zero
|
||||
input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format
|
||||
input logic ZfaFRoundNX, // froundnx instruction can set inexact flag
|
||||
output logic [P.FLEN-1:0] FRound, // Rounded result
|
||||
output logic [4:0] FRoundFlags // Rounder flags
|
||||
output logic FRoundNV, // fround invalid
|
||||
output logic FRoundNX // fround inexact
|
||||
);
|
||||
|
||||
logic [P.NE-2:0] Bias;
|
||||
logic [P.NE-1:0] E;
|
||||
logic [P.NF:0] Imask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd;
|
||||
logic Lnonneg, Lp, Rnonneg, Rp, Tp;
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Determine exponent bias according to the format
|
||||
//////////////////////////////////////////
|
||||
// *** replicated from fdivsqrt; find a way to share
|
||||
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign Bias = (P.NE-1)'(P.BIAS);
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
P.FMT: Bias = (P.NE-1)'(P.BIAS);
|
||||
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
|
||||
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
|
||||
default: Bias = 'x;
|
||||
endcase
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
|
||||
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
|
||||
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
|
||||
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
|
||||
endcase
|
||||
end
|
||||
|
||||
/*
|
||||
logic [P.NE-1:0] E, Xep1, EminusNf;
|
||||
logic [P.NF:0] IMask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd;
|
||||
logic [P.FLEN-1:0] W, PackedW;
|
||||
logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf, Exact;
|
||||
|
||||
// Unbiased exponent
|
||||
assign E = Xe - Bias;
|
||||
assign E = Xe - P.BIAS[P.NE-1:0];
|
||||
assign Xep1 = Xe + 1;
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Compute LSB L', rounding bit R' and Sticky bit T'
|
||||
// if (E < 0) // negative exponents round to 0 or 1.
|
||||
// if (E < 0) // negative exponents round to 0 or 1.
|
||||
// L' = 0 // LSB = 0
|
||||
// if (E = -1) R' = 1, TMask = 0.1111...111 // if (E = -1) 0.5 X < 1. Round bit is 1
|
||||
// else R' = 0; TMask = 1.1111...111 // if (E < -1), X < 0.5. Round bit is 0
|
||||
@ -100,19 +74,19 @@ module fround import cvw::*; #(parameter cvw_t P) (
|
||||
//////////////////////////////////////////
|
||||
|
||||
// Check if exponent is negative and -1
|
||||
assign Elt0 = (E < 0);
|
||||
assign Eeqm1 = (E == -1);
|
||||
assign Elt0 = E[P.NE-1]; // (E < 0);
|
||||
assign Eeqm1 = ($signed(E) == -1);
|
||||
|
||||
// Logic for nonnegative mask and rounding bits
|
||||
assign Imask = {1'b1, {P.NF{1'b0}}} >>> E;
|
||||
assign IMask = {1'b1, {P.NF{1'b0}}} >>> E;
|
||||
assign Tmasknonneg = ~(IMask >>> 1'b1);
|
||||
assign HotE = IMask & !(IMask << 1'b1);
|
||||
assign HotE = IMask & ~(IMask << 1'b1);
|
||||
assign HotEP1 = HotE >> 1'b1;
|
||||
assign Lnonneg = |(Xm & HotE);
|
||||
assign Rnonneg = |(Xm & HotEP1);
|
||||
assign Trunc = Xm & Imask;
|
||||
assign Rnd = Trunc + HotE;
|
||||
|
||||
assign Trunc = Xm & IMask;
|
||||
assign {Two, Rnd} = Trunc + HotE; // Two means result is 10.000000 = 2.0
|
||||
|
||||
// mux and AND-OR logic to select final rounding bits
|
||||
mux2 #(1) Lmux(Lnonneg, 1'b0, Elt0, Lp);
|
||||
mux2 #(1) Rmux(Rnonneg, Eeqm1, Elt0, Rp);
|
||||
@ -120,7 +94,6 @@ module fround import cvw::*; #(parameter cvw_t P) (
|
||||
mux2 #(P.NF+1) Tmaskmux(Tmasknonneg, Tmaskneg, Elt0, Tmask);
|
||||
assign Tp = |(Xm & Tmask);
|
||||
|
||||
|
||||
///////////////////////////
|
||||
// Rounding, flags, special Cases
|
||||
// Flags = 0 // unless overridden later
|
||||
@ -144,11 +117,15 @@ module fround import cvw::*; #(parameter cvw_t P) (
|
||||
///////////////////////////
|
||||
|
||||
// Exact logic
|
||||
assign Exact = (E >= Nf | XZero); // result will be exact; no need to round
|
||||
/* verilator lint_off WIDTH */
|
||||
assign EminusNf = E - Nf;
|
||||
/* verilator lint_on WIDTH */
|
||||
assign EgeNf = ~EminusNf[P.NE-1] & (~E[P.NE-1] | E[P.NE-2:0] == '0); // E >= Nf if MSB of E-Nf is 0 and E was positive
|
||||
assign Exact = (EgeNf | XZero) & ~XNaN; // result will be exact; no need to round
|
||||
|
||||
// Rounding logic: determine whether to round up in magnitude
|
||||
always_comb
|
||||
case (Rm) // *** make sure this includes dynamic
|
||||
always_comb begin
|
||||
case (Frm) // Frm is either specified in the instruction or is the dynamic rounding mode
|
||||
3'b000: RoundUp = Rp & (Lp | Tp); // RNE
|
||||
3'b001: RoundUp = 0; // RZ
|
||||
3'b010: RoundUp = Xs & (Rp | Tp); // RN
|
||||
@ -157,22 +134,23 @@ module fround import cvw::*; #(parameter cvw_t P) (
|
||||
default: RoundUp = 0; // should never happen
|
||||
endcase
|
||||
|
||||
// output logic
|
||||
if (XNaN) W = CanonicalNan; // ***
|
||||
else if (Exact) W = X;
|
||||
else if (Elt0)
|
||||
if (RoundUp) W = {Xs, bias, {P.NF}} // *** format conversions
|
||||
// If result is not exact, select output in unpacked FLEN format initially
|
||||
if (XNaN) W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}}; // Canonical NaN
|
||||
else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1
|
||||
if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1
|
||||
else W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0
|
||||
else begin // |X| > 1 rounds to an integer
|
||||
if (RoundUp & Two) W = {Xs, Xep1, {(P.NF){1'b0}}}; // Round up to 2.0
|
||||
else if (RoundUp) W = {Xs, Xe, Rnd[P.NF-1:0]}; // Round up to Rnd
|
||||
else W = {Xs, Xe, Trunc[P.NF-1:0]}; // Round down to Trunc
|
||||
end
|
||||
end
|
||||
|
||||
*** may not need to round to infinity; update docs and pseudocode above
|
||||
|
||||
always_comb
|
||||
packoutput #(P) packoutput(W, Fmt, PackedW); // pack and NaN-box based on selected format.
|
||||
mux2 #(P.FLEN) resultmux(PackedW, X, Exact, FRound);
|
||||
|
||||
// Flags
|
||||
assign Invalid = XSNaN;
|
||||
assign Inexact = FRoundNX & ~(XNaN | Exact) & (Rp | T');
|
||||
*/
|
||||
|
||||
assign FRound = '0;
|
||||
assign FRoundFlags = '0;
|
||||
assign FRoundNV = XSNaN; // invalid if input is signaling NaN
|
||||
assign FRoundNX = ZfaFRoundNX & ~(XNaN | Exact) & (Rp | Tp); // Inexact if Round or Sticky bit set for FRoundNX instruction
|
||||
|
||||
endmodule
|
||||
|
101
src/fpu/packoutput.sv
Normal file
101
src/fpu/packoutput.sv
Normal file
@ -0,0 +1,101 @@
|
||||
|
||||
///////////////////////////////////////////
|
||||
// packoutput.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu
|
||||
// Modified: 5/11/24
|
||||
//
|
||||
// Purpose: Pack the output of the FPU
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
// https://github.com/openhwgroup/cvw
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module packoutput import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FLEN-1:0] Unpacked,
|
||||
input logic [P.FMTBITS-1:0] Fmt,
|
||||
output logic [P.FLEN-1:0] Packed
|
||||
);
|
||||
|
||||
logic Sign;
|
||||
logic [P.NE1-1:0] Exp1;
|
||||
logic [P.NF1-1:0] Fract1;
|
||||
logic [P.NE2-1:0] Exp2;
|
||||
logic [P.NF2-1:0] Fract2;
|
||||
logic [P.H_NE-1:0] Exp3;
|
||||
logic [P.H_NF-1:0] Fract3;
|
||||
|
||||
// Pack exponent and fraction, with NaN-boxing to full FLEN
|
||||
|
||||
assign Sign = Unpacked[P.FLEN-1];
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign Packed = Unpacked;
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
int NF = P.NF;
|
||||
int NE1 = P.NE1;
|
||||
int top = P.NF + P.NE1-2;
|
||||
int bot = P.NF - P.NF1;
|
||||
always_comb
|
||||
case (Fmt)
|
||||
1'b1: Packed = Unpacked;
|
||||
1'b0: begin
|
||||
Exp1 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE1-2:P.NF]};
|
||||
Fract1 = Unpacked[P.NF-1:P.NF-P.NF1];
|
||||
Packed = {{(P.FLEN-P.LEN1){1'b1}}, Sign, Exp1, Fract1};
|
||||
end
|
||||
endcase
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
P.FMT: Packed = Unpacked;
|
||||
P.FMT1: begin
|
||||
Exp1 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE1-2:P.NF]};
|
||||
Fract1 = Unpacked[P.NF-1:P.NF-P.NF1];
|
||||
Packed = {{(P.FLEN-P.LEN1){1'b1}}, Sign, Exp1, Fract1};
|
||||
end
|
||||
P.FMT2: begin
|
||||
Exp2 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE2-2:P.NF]};
|
||||
Fract2 = Unpacked[P.NF-1:P.NF-P.NF2];
|
||||
Packed = {{(P.FLEN-P.LEN2){1'b1}}, Sign, Exp2, Fract2};
|
||||
end
|
||||
default: Packed = 'x;
|
||||
endcase
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (Fmt)
|
||||
2'h3: Packed = Unpacked; // Quad
|
||||
2'h1: begin // double
|
||||
Exp1 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE1-2:P.NF]};
|
||||
Fract1 = Unpacked[P.NF-1:P.NF-P.NF1];
|
||||
Packed = {{(P.FLEN-P.LEN1){1'b1}}, Sign, Exp1, Fract1};
|
||||
end
|
||||
2'h0: begin // float
|
||||
Exp2 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE2-2:P.NF]};
|
||||
Fract2 = Unpacked[P.NF-1:P.NF-P.NF2];
|
||||
Packed = {{(P.FLEN-P.LEN2){1'b1}}, Sign, Exp2, Fract2};
|
||||
end
|
||||
2'h2: begin // half
|
||||
Exp3 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.H_NE-2:P.NF]};
|
||||
Fract3 = Unpacked[P.NF-1:P.NF-P.H_NF];
|
||||
Packed = {{(P.FLEN-P.H_LEN){1'b1}}, Sign, Exp3, Fract3};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
endmodule
|
@ -41,13 +41,15 @@ module unpack import cvw::*; #(parameter cvw_t P) (
|
||||
output logic XZero, YZero, ZZero, // is XYZ zero
|
||||
output logic XInf, YInf, ZInf, // is XYZ infinity
|
||||
output logic XExpMax, // does X have the maximum exponent (NaN or Inf)
|
||||
output logic [P.FLEN-1:0] XPostBox // X after being properly NaN-boxed
|
||||
output logic [P.FLEN-1:0] XPostBox, // X after being properly NaN-boxed
|
||||
output logic [P.NE-2:0] Bias, // Exponent bias
|
||||
output logic [P.LOGFLEN-1:0] Nf // Number of fractional bits
|
||||
);
|
||||
|
||||
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
|
||||
logic YExpMax, ZExpMax; // is the exponent all 1s
|
||||
|
||||
|
||||
unpackinput #(P) unpackinputX (.A(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), .FPUActive,
|
||||
.NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
|
||||
.Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero),
|
||||
@ -63,4 +65,7 @@ module unpack import cvw::*; #(parameter cvw_t P) (
|
||||
.Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero),
|
||||
.Subnorm(), .PostBox());
|
||||
|
||||
// look up bias and fractional bits for the given format
|
||||
fmtparams #(P) fmtparams(Fmt, Bias, Nf);
|
||||
|
||||
endmodule
|
||||
|
@ -1641,7 +1641,7 @@ string imperas32f[] = '{
|
||||
string arch64d[] = '{
|
||||
`RISCVARCHTEST,
|
||||
// for speed
|
||||
"rv64i_m/D/src/fadd.d_b10-01.S",
|
||||
"rv64i_m/D/src/fadd.d_b10-01.S",
|
||||
"rv64i_m/D/src/fadd.d_b1-01.S",
|
||||
"rv64i_m/D/src/fadd.d_b11-01.S",
|
||||
"rv64i_m/D/src/fadd.d_b12-01.S",
|
||||
@ -2278,6 +2278,7 @@ string arch64zknh[] = '{
|
||||
string arch32zfaf[] = '{
|
||||
//`RISCVARCHTEST,
|
||||
`WALLYTEST,
|
||||
"rv32i_m/F_Zfa/src/fround_b1-01.S",
|
||||
"rv32i_m/F_Zfa/src/fleq_b1-01.S",
|
||||
"rv32i_m/F_Zfa/src/fleq_b19-01.S",
|
||||
"rv32i_m/F_Zfa/src/fli.s-01.S",
|
||||
@ -2289,12 +2290,12 @@ string arch64zknh[] = '{
|
||||
"rv32i_m/F_Zfa/src/fminm_b19-01.S",
|
||||
"rv32i_m/F_Zfa/src/fmaxm_b1-01.S",
|
||||
"rv32i_m/F_Zfa/src/fmaxm_b19-01.S"
|
||||
/* "rv32i_m/F_Zfa/src/fround_b1-01.S" */
|
||||
};
|
||||
|
||||
string arch32zfad[] = '{
|
||||
//`RISCVARCHTEST,
|
||||
`WALLYTEST,
|
||||
"rv32i_m/D_Zfa/src/fround_b1-01.S",
|
||||
"rv32i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S",
|
||||
"rv32i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S",
|
||||
"rv32i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S",
|
||||
@ -2326,12 +2327,12 @@ string arch64zknh[] = '{
|
||||
"rv32i_m/D_Zfa/src/fmvh.x.d_b27-01.S",
|
||||
"rv32i_m/D_Zfa/src/fmvh.x.d_b28-01.S",
|
||||
"rv32i_m/D_Zfa/src/fmvh.x.d_b29-01.S"
|
||||
/* "rv32i_m/D_Zfa/src/fround_b1-01.S" */
|
||||
};
|
||||
|
||||
string arch64zfaf[] = '{
|
||||
//`RISCVARCHTEST,
|
||||
`WALLYTEST,
|
||||
"rv64i_m/F_Zfa/src/fround_b1-01.S",
|
||||
"rv64i_m/F_Zfa/src/fleq_b1-01.S",
|
||||
"rv64i_m/F_Zfa/src/fleq_b19-01.S",
|
||||
"rv64i_m/F_Zfa/src/fli.s-01.S",
|
||||
@ -2341,12 +2342,12 @@ string arch64zknh[] = '{
|
||||
"rv64i_m/F_Zfa/src/fminm_b19-01.S",
|
||||
"rv64i_m/F_Zfa/src/fmaxm_b1-01.S",
|
||||
"rv64i_m/F_Zfa/src/fmaxm_b19-01.S"
|
||||
/* "rv64i_m/F_Zfa/src/fround_b1-01.S" */
|
||||
};
|
||||
|
||||
string arch64zfad[] = '{
|
||||
//`RISCVARCHTEST,
|
||||
`WALLYTEST,
|
||||
"rv64i_m/D_Zfa/src/fround_b1-01.S",
|
||||
"rv64i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S",
|
||||
"rv64i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S",
|
||||
"rv64i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S",
|
||||
@ -2363,7 +2364,7 @@ string arch64zknh[] = '{
|
||||
"rv64i_m/D_Zfa/src/fminm_b19-01.S",
|
||||
"rv64i_m/D_Zfa/src/fmaxm_b1-01.S",
|
||||
"rv64i_m/D_Zfa/src/fmaxm_b19-01.S"
|
||||
/* "rv64i_m/D_Zfa/src/fround_b1-01.S" */
|
||||
|
||||
};
|
||||
|
||||
string arch32d_fma[] = '{
|
||||
|
Loading…
Reference in New Issue
Block a user