renamed top level FPU wires

This commit is contained in:
Katherine Parry 2021-05-25 20:04:34 -04:00
parent 33cd133a65
commit e7190b0690
12 changed files with 707 additions and 811 deletions

View File

@ -6,16 +6,15 @@ module fctrl (
input logic [2:0] Funct3D,
input logic [2:0] FRM_REGW,
output logic IllegalFPUInstrD,
output logic FRegWriteD,
output logic DivSqrtStartD,
//output logic [2:0] regSelD,
output logic FWriteEnD,
output logic FDivStartD,
output logic [2:0] FResultSelD,
output logic [3:0] OpCtrlD,
output logic [3:0] FOpCtrlD,
output logic FmtD,
output logic [2:0] FrmD,
output logic [1:0] FMemRWD,
output logic OutputInput2D,
output logic In2UsedD, In3UsedD,
output logic FOutputInput2D,
output logic FInput2UsedD, FInput3UsedD,
output logic FWriteIntD);
@ -102,9 +101,9 @@ module fctrl (
end
end
assign OutputInput2D = OpD == 7'b0100111;
assign FOutputInput2D = OpD == 7'b0100111;
assign FMemRWD[0] = OutputInput2D;
assign FMemRWD[0] = FOutputInput2D;
assign FMemRWD[1] = OpD == 7'b0000111;
@ -131,7 +130,7 @@ module fctrl (
//this value is used enough to be shorthand
//if op is div/sqrt - start div/sqrt
assign DivSqrtStartD = ~|FResultSelD; // is FResultSelD == 000
assign FDivStartD = ~|FResultSelD; // is FResultSelD == 000
//operation control for each fp operation
//has to be expanded over standard to account for
@ -144,7 +143,7 @@ module fctrl (
//version I used for this repo
//let's do separate SOP for each type of operation
// assign OpCtrlD[3] = 1'b0;
// assign FOpCtrlD[3] = 1'b0;
//
//
@ -152,12 +151,12 @@ module fctrl (
always_comb begin
IllegalFPUInstr1D = 0;
In3UsedD = 0;
FInput3UsedD = 0;
case (FResultSelD)
// div/sqrt
// fdiv = ???0
// fsqrt = ???1
3'b000 : begin OpCtrlD = {3'b0, Funct7D[5]}; In2UsedD = ~Funct7D[5]; end
3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end
// cmp
// fmin = ?100
// fmax = ?101
@ -165,7 +164,7 @@ module fctrl (
// flt = ?001
// fle = ?011
// {?, is min or max, is eq or le, is lt or le}
3'b001 : begin OpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; In2UsedD = 1'b1; end
3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end
//fma/mult
// fmadd = ?000
// fmsub = ?001
@ -173,12 +172,12 @@ module fctrl (
// fnmsub = ?011
// fmul = ?100
// {?, is mul, is negitive, is sub}
3'b010 : begin OpCtrlD = {1'b0, OpD[4:2]}; In2UsedD = 1'b1; In3UsedD = ~OpD[4]; end
3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end
// sgn inj
// fsgnj = ??00
// fsgnjn = ??01
// fsgnjx = ??10
3'b011 : begin OpCtrlD = {2'b0, Funct3D[1:0]}; In2UsedD = 1'b1; end
3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end
// add/sub/cnvt
// fadd = 0000
// fsub = 0001
@ -193,13 +192,13 @@ module fctrl (
// fcvt.d.wu = 1111
// fcvt.d.s = 1000
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub
3'b100 : begin OpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; In2UsedD = ~Funct7D[5]; end
3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end
// classify {?, ?, ?, ?}
3'b101 : begin OpCtrlD = 4'b0; In2UsedD = 1'b0; end
3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end
// output SrcAW
// fmv.w.x = ???0
// fmv.w.d = ???1
3'b110 : begin OpCtrlD = {3'b0, Funct7D[0]}; In2UsedD = 1'b0; end
3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end
// output Input1
// flw = ?000
// fld = ?001
@ -207,9 +206,9 @@ module fctrl (
// fsd = ?011 // output Input2
// fmv.x.w = ?100
// fmv.x.d = ?101
// {?, is mv, is store, is double or fcvt.d.w}
3'b111 : begin OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; In2UsedD = OpD[5]; end
default : begin OpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; In2UsedD = 1'b0; end
// {?, is mv, is store, is double or fmv}
3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end
default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end
endcase
end
@ -219,5 +218,5 @@ module fctrl (
// is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv
assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]);
// if not writting to int reg and not a store function and not move
assign FRegWriteD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP;
assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP;
endmodule

View File

@ -15,13 +15,13 @@
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to Input1E or Input3E inputs
// bypass Handles bypass of result to FInput1E or FInput3E inputs
// sign One bit sign handling block
// special Catch special cases (inputs = 0 / infinity / etc.)
//
// The FMAC computes FmaResultM=Input1E*Input2E+Input3E, rounded with the mode specified by
// The FMAC computes FmaResultM=FInput1E*FInput2E+FInput3E, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the Input1E or Input3E inputs for use on the next cycle. In addition, four signals
// the FInput1E or FInput3E inputs for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IEEE flags.
@ -29,15 +29,15 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fma1(Input1E, Input2E, Input3E, FrmE,
module fma1(FInput1E, FInput2E, FInput3E, FrmE,
rE, sE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE, aligncntE, aeE
, xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE,
xinfE, yinfE, zinfE, nanE, prodinfE);
/////////////////////////////////////////////////////////////////////////////
input logic [63:0] Input1E; // input 1
input logic [63:0] Input2E; // input 2
input logic [63:0] Input3E; // input 3
input logic [63:0] FInput1E; // input 1
input logic [63:0] FInput2E; // input 2
input logic [63:0] FInput3E; // input 3
input logic [2:0] FrmE; // Rounding mode
output logic [12:0] aligncntE; // status flags
output logic [105:0] rE; // one result of partial product sum
@ -45,7 +45,7 @@ module fma1(Input1E, Input2E, Input3E, FrmE,
output logic [163:0] tE; // output logic of alignment shifter
output logic [12:0] aeE; // multiplier expoent
output logic bsE; // sticky bit of addend
output logic killprodE; // Input3E >> product
output logic killprodE; // FInput3E >> product
output logic xzeroE;
output logic yzeroE;
output logic zzeroE;
@ -68,7 +68,7 @@ module fma1(Input1E, Input2E, Input3E, FrmE,
// output logic [12:0] aligncntE; // shift count for alignment
logic prodof; // Input1E*Input2E out of range
logic prodof; // FInput1E*FInput2E out of range
@ -84,12 +84,12 @@ module fma1(Input1E, Input2E, Input3E, FrmE,
// Instantiate fraction datapath
multiply multiply(.xman(Input1E[51:0]), .yman(Input2E[51:0]), .*);
align align(.zman(Input3E[51:0]),.*);
multiply multiply(.xman(FInput1E[51:0]), .yman(FInput2E[51:0]), .*);
align align(.zman(FInput3E[51:0]),.*);
// Instantiate exponent datapath
expgen1 expgen1(.xexp(Input1E[62:52]),.yexp(Input2E[62:52]),.zexp(Input3E[62:52]),.*);
expgen1 expgen1(.xexp(FInput1E[62:52]),.yexp(FInput2E[62:52]),.zexp(FInput3E[62:52]),.*);
// Instantiate special case detection across datapath & exponent path
special special(.*);

View File

@ -15,13 +15,13 @@
// normalize Normalization shifter
// round Rounding of result
// exception Handles exceptional cases
// bypass Handles bypass of result to Input1M or Input3M input logics
// bypass Handles bypass of result to FInput1M or FInput3M input logics
// sign One bit sign handling block
// special Catch special cases (input logics = 0 / infinity / etc.)
//
// The FMAC computes FmaResultM=Input1M*Input2M+Input3M, rounded with the mode specified by
// The FMAC computes FmaResultM=FInput1M*FInput2M+FInput3M, rounded with the mode specified by
// RN, RZ, RM, or RP. The result is optionally bypassed back to
// the Input1M or Input3M input logics for use on the next cycle. In addition, four signals
// the FInput1M or FInput3M input logics for use on the next cycle. In addition, four signals
// are produced: trap, overflow, underflow, and inexact. Trap indicates
// an infinity, NaN, or denormalized number to be handled in software;
// the other three signals are IMMM flags.
@ -29,7 +29,7 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module fma2(Input1M, Input2M, Input3M, FrmM,
module fma2(FInput1M, FInput2M, FInput3M, FrmM,
FmaResultM, FmaFlagsM, aligncntM, rM, sM,
tM, normcntM, aeM, bsM,killprodM,
xzeroM, yzeroM,zzeroM,xdenormM,ydenormM,
@ -39,9 +39,9 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
);
/////////////////////////////////////////////////////////////////////////////
input logic [63:0] Input1M; // input logic 1
input logic [63:0] Input2M; // input logic 2
input logic [63:0] Input3M; // input logic 3
input logic [63:0] FInput1M; // input logic 1
input logic [63:0] FInput2M; // input logic 2
input logic [63:0] FInput3M; // input logic 3
input logic [2:0] FrmM; // Rounding mode
input logic [12:0] aligncntM; // status flags
input logic [105:0] rM; // one result of partial product sum
@ -50,7 +50,7 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
input logic [8:0] normcntM; // shift count for normalizer
input logic [12:0] aeM; // multiplier expoent
input logic bsM; // sticky bit of addend
input logic killprodM; // Input3M >> product
input logic killprodM; // FInput3M >> product
input logic prodinfM;
input logic xzeroM;
input logic yzeroM;
@ -69,7 +69,7 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
input logic sumshiftzeroM;
output logic [63:0] FmaResultM; // output FmaResultM=Input1M*Input2M+Input3M
output logic [63:0] FmaResultM; // output FmaResultM=FInput1M*FInput2M+FInput3M
output logic [4:0] FmaFlagsM; // status flags
@ -120,18 +120,18 @@ module fma2(Input1M, Input2M, Input3M, FrmM,
add add(.*);
lza lza(.*);
normalize normalize(.zexp(Input3M[62:52]),.*);
round round(.xman(Input1M[51:0]), .yman(Input2M[51:0]),.zman(Input3M[51:0]),.*);
normalize normalize(.zexp(FInput3M[62:52]),.*);
round round(.xman(FInput1M[51:0]), .yman(FInput2M[51:0]),.zman(FInput3M[51:0]),.*);
// Instantiate exponent datapath
expgen2 expgen2(.xexp(Input1M[62:52]),.yexp(Input2M[62:52]),.zexp(Input3M[62:52]),.*);
expgen2 expgen2(.xexp(FInput1M[62:52]),.yexp(FInput2M[62:52]),.zexp(FInput3M[62:52]),.*);
// Instantiate control logic
sign sign(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.*);
flag2 flag2(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.vbits(v[1:0]),.*);
sign sign(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.*);
flag2 flag2(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.vbits(v[1:0]),.*);
assign FmaResultM = {wsign,wexp,wman};

View File

@ -23,25 +23,25 @@
//
// `timescale 1ps/1ps
module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, DivFrm, DivOpType, DivP, DivOvEn, DivUnEn,
DivStart, reset, clk, DivBusyM);
module fpdiv (FDivSqrtDoneM, FDivResultM, FDivFlagsM, DivDenormM, FInput1E, FInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
FDivStartE, reset, clk, DivBusyM);
input [63:0] DivOp1; // 1st input operand (A)
input [63:0] DivOp2; // 2nd input operand (B)
input [2:0] DivFrm; // Rounding mode - specify values
input [63:0] FInput1E; // 1st input operand (A)
input [63:0] FInput2E; // 2nd input operand (B)
input [2:0] FrmE; // Rounding mode - specify values
input DivOpType; // Function opcode
input DivP; // Result Precision (0 for double, 1 for single)
input FmtE; // Result Precision (0 for double, 1 for single)
input DivOvEn; // Overflow trap enabled
input DivUnEn; // Underflow trap enabled
input DivStart;
input FDivStartE;
input reset;
input clk;
output [63:0] DivResultM; // Result of operation
output [4:0] DivFlagsM; // IEEE exception flags
output [63:0] FDivResultM; // Result of operation
output [4:0] FDivFlagsM; // IEEE exception flags
output DivDenormM; // DivDenormM on input or output
output DivSqrtDone;
output FDivSqrtDoneM;
output DivBusyM;
supply1 vdd;
@ -94,16 +94,16 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
logic exp_cout1, exp_cout2, exp_odd, open;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the DivOpType , and their precision DivP.
// the orignal operands, the DivOpType , and their precision FmtE.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs_div divconv1 (Float1, Float2, DivOp1, DivOp2, DivOpType, DivP);
convert_inputs_div divconv1 (Float1, Float2, FInput1E, FInput2E, DivOpType, FmtE);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input DivFlagsM. The "sel_inv" is used in
// "Denormalized" Input FDivFlagsM. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if DivOp1 and DivOp2 are not zero or denormalized.
// and op2_Norm are one if FInput1E and FInput2E are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
Float1, Float2, DivOpType);
@ -135,26 +135,26 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di
sel_muxa, sel_muxb, sel_muxr,
reset, clk,
load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, DivP, DivOpType, exp_odd);
load_regr, load_regs, FmtE, DivOpType, exp_odd);
// FSM : control divider
fsm control (DivSqrtDone, load_rega, load_regb, load_regc, load_regd,
fsm control (FDivSqrtDoneM, load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
clk, reset, DivStart, DivOpType, DivBusyM);
clk, reset, FDivStartE, DivOpType, DivBusyM);
// Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and
// set the exception flags.
//***add max magnitude and swap negitive and positive infinity
rounder_div divround1 (Result, DenormIO, FlagsIn,
DivFrm, DivP, DivOvEn, DivUnEn, expF,
FrmE, FmtE, DivOvEn, DivUnEn, expF,
sel_inv, Invalid, DenormIn, signResult,
q1, qm1, qp1, q0, qm0, qp0, regr_out);
// Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, DivSqrtDone, Result, DivResultM);
flopenr #(1) regb (clk, reset, DivSqrtDone, DenormIO, DivDenormM);
flopenr #(5) regc (clk, reset, DivSqrtDone, FlagsIn, DivFlagsM);
flopenr #(64) rega (clk, reset, FDivSqrtDoneM, Result, FDivResultM);
flopenr #(1) regb (clk, reset, FDivSqrtDoneM, DenormIO, DivDenormM);
flopenr #(5) regc (clk, reset, FDivSqrtDoneM, FlagsIn, FDivFlagsM);
endmodule // fpadd
@ -198,7 +198,7 @@ module brent_kung (c, p, g);
logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/DivP pairs that span 1 bits
// Stage 1: Generates G/FmtE pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
@ -207,20 +207,20 @@ module brent_kung (c, p, g);
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/DivP pairs that span 2 bits
// Stage 2: Generates G/FmtE pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/DivP pairs that span 4 bits
// Stage 3: Generates G/FmtE pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/DivP pairs that span 8 bits
// Stage 4: Generates G/FmtE pairs that span 8 bits
// Stage 5: Generates G/DivP pairs that span 4 bits
// Stage 5: Generates G/FmtE pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/DivP pairs that span 2 bits
// Stage 6: Generates G/FmtE pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);

View File

@ -23,10 +23,8 @@
///////////////////////////////////////////
`include "wally-config.vh"
// `include "../../config/rv64icfd/wally-config.vh" //debug
module fpu (
//input logic [2:0] FrmD,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic reset,
//input logic clear, // *** not being used anywhere
@ -42,147 +40,79 @@ module fpu (
output logic [31:0] FSROutW,
output logic [1:0] FMemRWM,
output logic FStallD,
output logic FWriteIntW,
output logic FWriteIntM,
output logic [`XLEN-1:0] FWriteDataM, // Integer input being written into fpreg
output logic DivSqrtDoneE,
output logic FWriteIntM, FWriteIntW,
output logic [`XLEN-1:0] FWriteDataM,
output logic FDivSqrtDoneM,
output logic IllegalFPUInstrD,
output logic [`XLEN-1:0] FPUResultW);
//NOTE:
//For readability and ease of modification, logic signals will be
//instantiated as they occur within the pipeline. This will keep local
//signals, modules, and combinational logic closely defined.
//used for OSU DP-size hardware to wally XLEN interfacing
integer XLENDIFF;
assign XLENDIFF = `XLEN - 64;
integer XLENDIFFN;
assign XLENDIFFN = 63 - `XLEN;
// BEGIN PIPELINE CONTROL LOGIC
logic PipeEnableDE;
logic PipeEnableEM;
logic PipeEnableMW;
logic PipeClearDE;
logic PipeClearEM;
logic PipeClearMW;
//temporarily assign pipe clear and enable signals
//to never flush & always be running
localparam PipeClear = 1'b0;
localparam PipeEnable = 1'b1;
always_comb begin
PipeEnableDE = ~StallE;
PipeEnableEM = ~StallM;
PipeEnableMW = ~StallW;
PipeClearDE = FlushE;
PipeClearEM = FlushM;
PipeClearMW = FlushW;
end
//control logic signal instantiation
logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable
logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode
logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD, FWriteIntE; // Write to integer register
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory
logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal
logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal
logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal
logic FInput2UsedD; // Is input 2 used
logic FInput3UsedD; // Is input 3 used
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
// Wally-spec D stage control logic signal instantiation
logic FRegWriteD;
logic [2:0] FResultSelD;
logic [2:0] FrmD;
logic FmtD;
logic DivSqrtStartD;
logic [3:0] OpCtrlD;
logic FWriteIntD;
logic OutputInput2D;
logic [1:0] FMemRWD;
// regfile signals
logic [4:0] RdE, RdM, RdW; // ***Can take from ieu
logic [`XLEN-1:0] FWDM; // Write data for FP register
logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register
logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E;
logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE;
logic [`XLEN-1:0] FInput2E, FInput2M;
logic [`XLEN-1:0] FInput3E, FInput3M;
logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
logic DivBusyM;
logic [1:0] Input1MuxD, Input2MuxD;
logic Input3MuxD;
logic In2UsedD, In3UsedD;
//Hazard unit for FPU
fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
//top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
//instantiation of D stage regfile signals (includes some W stage signals
//for easy reference)
logic [2:0] FrmW;
logic FmtW;
logic FRegWriteW;
logic [4:0] RdW, Rs1D, Rs2D, Rs3D;
logic [`XLEN-1:0] WriteDataW;
logic [63:0] FPUResultDirW;
logic [`XLEN-1:0] ReadData1D, ReadData2D, ReadData3D;
//regfile instantiation
//freg3adr fpregfile (FmtW, reset, PipeClear, clk, RdW,
// FRegWriteW,
// InstrD[19:15], InstrD[24:20], InstrD[31:27],
// FPUResultDirW,
// ReadData1D, ReadData2D, ReadData3D);
FPregfile fpregfile (clk, reset, FRegWriteW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResultDirW,
ReadData1D, ReadData2D, ReadData3D);
// wally-spec E stage control logic signal instantiation
logic FRegWriteE;
logic [2:0] FResultSelE;
logic [2:0] FrmE;
logic FmtE;
logic DivSqrtStartE;
logic [3:0] OpCtrlE;
logic [1:0] Input1MuxE, Input2MuxE;
logic Input3MuxE;
logic [63:0] FPUResultDirE;
logic FWriteIntE;
logic OutputInput2E;
logic [1:0] FMemRWE;
//instantiation of E stage regfile signals
logic [4:0] RdE;
logic [`XLEN-1:0] ReadData1E, ReadData2E, ReadData3E;
logic [`XLEN-1:0] Input1E, Input2E, Input3E, Input1tmpE;
//instantiation of E/M stage div/sqrt signals
logic DivSqrtDone, DivDenormM;
logic [63:0] DivResultM;
logic [4:0] DivFlagsM;
logic [63:0] DivOp1, DivOp2;
logic [2:0] DivFrm;
logic DivOpType;
logic DivP;
// div/sqrt signals
logic DivDenormM, DivDenormW;
logic DivOvEn, DivUnEn;
logic DivStart;
logic DivBusyM;
logic [63:0] FDivResultM, FDivResultW;
logic [4:0] FDivFlagsM, FDivFlagsW;
//instantiate E stage FMA signals here
logic [12:0] aligncntE;
logic [105:0] rE;
logic [105:0] sE;
logic [163:0] tE;
logic [8:0] normcntE;
logic [12:0] aeE;
logic bsE;
logic killprodE;
logic prodofE;
logic xzeroE;
logic yzeroE;
logic zzeroE;
logic xdenormE;
logic ydenormE;
logic zdenormE;
logic xinfE;
logic yinfE;
logic zinfE;
logic xnanE;
logic ynanE;
logic znanE;
logic nanE;
logic [8:0] sumshiftE;
logic sumshiftzeroE;
logic prodinfE;
// FMA signals
logic [12:0] aligncntE, aligncntM;
logic [105:0] rE, rM;
logic [105:0] sE, sM;
logic [163:0] tE, tM;
logic [8:0] normcntE, normcntM;
logic [12:0] aeE, aeM;
logic bsE, bsM;
logic killprodE, killprodM;
logic prodofE, prodofM;
logic xzeroE, xzeroM;
logic yzeroE, yzeroM;
logic zzeroE, zzeroM;
logic xdenormE, xdenormM;
logic ydenormE, ydenormM;
logic zdenormE, zdenormM;
logic xinfE, xinfM;
logic yinfE, yinfM;
logic zinfE, zinfM;
logic xnanE, xnanM;
logic ynanE, ynanM;
logic znanE, znanM;
logic nanE, nanM;
logic [8:0] sumshiftE, sumshiftM;
logic sumshiftzeroE, sumshiftzeroM;
logic prodinfE, prodinfM;
logic [63:0] FmaResultM, FmaResultW;
logic [4:0] FmaFlagsM, FmaFlagsW;
//instantiation of E stage add/cvt signals
// add/cvt signals
logic [63:0] AddSumE, AddSumTcE;
logic [3:0] AddSelInvE;
logic [10:0] AddExpPostSumE;
@ -192,158 +122,9 @@ module fpu (
logic [63:0] AddFloat1E, AddFloat2E;
logic [11:0] AddExp1DenormE, AddExp2DenormE;
logic [10:0] AddExponentE;
logic [63:0] AddOp1E, AddOp2E;
logic [2:0] AddRmE;
logic [3:0] AddOpTypeE;
logic AddPE, AddOvEnE, AddUnEnE;
//instantiation of E stage cmp signals
logic [7:0] WE, XE;
logic ANaNE, BNaNE, AzeroE, BzeroE;
logic [63:0] CmpOp1E, CmpOp2E;
logic [1:0] CmpSelE;
//instantiation of E/M stage fsgn signals (due to bypass logic)
logic [63:0] SgnOp1E, SgnOp2E;
logic [1:0] SgnOpCodeE, SgnOpCodeM;
logic [63:0] SgnResultE, SgnResultM;
logic [4:0] SgnFlagsE, SgnFlagsM;
//*****************
//fpregfile D/E pipe registers
//*****************
flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, ReadData1D, ReadData1E);
flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, ReadData2D, ReadData2E);
flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, ReadData3D, ReadData3E);
//*****************
//other D/E pipe registers
//*****************
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FRegWriteD, FRegWriteE);
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE);
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE);
flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, Input1MuxD, Input1MuxE);
flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, Input2MuxD, Input2MuxE);
flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, Input3MuxD, Input3MuxE);
flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResultDirW, FPUResultDirE);
flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, OutputInput2D, OutputInput2E);
flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
// input muxs for forwarding
mux4 #(64) Input1Emux(ReadData1E, FPUResultDirW, FPUResultDirE, SrcAM, Input1MuxE, Input1tmpE);
mux3 #(64) Input2Emux(ReadData2E, FPUResultDirW, FPUResultDirE, Input2MuxE, Input2E);
mux2 #(64) Input3Emux(ReadData3E, FPUResultDirE, Input3MuxE, Input3E);
mux2 #(64) OutputInput2mux(Input1tmpE, Input2E, OutputInput2E, Input1E);
fma1 fma1 (.*);
//first and only instance of floating-point divider
fpdiv fpdivsqrt (.*);
//first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE,
AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE,
AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE,
AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E,
AddExp1DenormE, AddExp2DenormE, AddExponentE,
Input1E, Input2E, FrmE, OpCtrlE, FmtE);
//first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, Input1E, Input2E, OpCtrlE[1:0]);
//first and only instance of floating-point sign converter
fpusgn fpsgn (.*);
//interface between XLEN size datapath and double-precision sized
//floating-point results
//
//define offsets for LSB zero extension or truncation
always_comb begin
//truncate to 64 bits
//(causes warning during compilation - case never reached)
// if(`XLEN > 64) begin // ***KEP this isn't usedand it causes a lint error
// DivOp1 = Input1E[`XLEN-1:`XLEN-64];
// DivOp2 = Input2E[`XLEN-1:`XLEN-64];
// AddOp1E = Input1E[`XLEN-1:`XLEN-64];
// AddOp2E = Input2E[`XLEN-1:`XLEN-64];
// CmpOp1E = Input1E[`XLEN-1:`XLEN-64];
// CmpOp2E = Input2E[`XLEN-1:`XLEN-64];
// SgnOp1E = Input1E[`XLEN-1:`XLEN-64];
// SgnOp2E = Input2E[`XLEN-1:`XLEN-64];
// end
// //zero extend to 64 bits
// else begin
// DivOp1 = {Input1E,{64-`XLEN{1'b0}}};
// DivOp2 = {Input2E,{64-`XLEN{1'b0}}};
// AddOp1E = {Input1E,{64-`XLEN{1'b0}}};
// AddOp2E = {Input2E,{64-`XLEN{1'b0}}};
// CmpOp1E = {Input1E,{64-`XLEN{1'b0}}};
// CmpOp2E = {Input2E,{64-`XLEN{1'b0}}};
// SgnOp1E = {Input1E,{64-`XLEN{1'b0}}};
// SgnOp2E = {Input2E,{64-`XLEN{1'b0}}};
// end
//assign op codes
AddOpTypeE[3:0] = OpCtrlE[3:0];
CmpSelE[1:0] = OpCtrlE[1:0];
DivOpType = OpCtrlE[0];
SgnOpCodeE[1:0] = OpCtrlE[1:0];
end
//E stage control signal interfacing between wally spec and OSU fp hardware
//op codes
//wally-spec M stage control logic signal instantiation
logic FRegWriteM;
logic [2:0] FResultSelM;
logic [2:0] FrmM;
logic FmtM;
logic [3:0] OpCtrlM;
//instantiate M stage FMA signals here ***rename fma signals and resize for XLEN
logic [63:0] FmaResultM;
logic [4:0] FmaFlagsM;
logic [12:0] aligncntM;
logic [105:0] rM;
logic [105:0] sM;
logic [163:0] tM;
logic [8:0] normcntM;
logic [12:0] aeM;
logic bsM;
logic killprodM;
logic prodofM;
logic xzeroM;
logic yzeroM;
logic zzeroM;
logic xdenormM;
logic ydenormM;
logic zdenormM;
logic xinfM;
logic yinfM;
logic zinfM;
logic xnanM;
logic ynanM;
logic znanM;
logic nanM;
logic [8:0] sumshiftM;
logic sumshiftzeroM;
logic prodinfM;
//instantiation of M stage regfile signals
logic [4:0] RdM;
logic [`XLEN-1:0] Input1M, Input2M, Input3M;
logic [`XLEN-1:0] LoadStoreResultM;
//instantiation of M stage add/cvt signals
logic [63:0] AddResultM;
logic [4:0] AddFlagsM;
logic AddDenormM;
logic [63:0] AddSumM, AddSumTcM;
logic [3:0] AddSelInvM;
@ -358,22 +139,173 @@ module fpu (
logic [2:0] AddRmM;
logic [3:0] AddOpTypeM;
logic AddPM, AddOvEnM, AddUnEnM;
logic [63:0] FAddResultM, FAddResultW;
logic [4:0] FAddFlagsM, FAddFlagsW;
//cmp signals
logic [7:0] WE, WM;
logic [7:0] XE, XM;
logic ANaNE, ANaNM;
logic BNaNE, BNaNM;
logic AzeroE, AzeroM;
logic BzeroE, BzeroM;
logic CmpInvalidM, CmpInvalidW;
logic [1:0] CmpFCCM, CmpFCCW;
logic [63:0] FCmpResultW;
// fsgn signals
logic [63:0] SgnResultE, SgnResultM, SgnResultW;
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
//instantiation of W stage regfile signals
logic [`XLEN-1:0] SrcAW;
// classify signals
logic [63:0] ClassResultE, ClassResultM, ClassResultW;
logic [4:0] ClassFlagsE, ClassFlagsM, ClassFlagsW;
// other
logic [63:0] FPUResult64W, FPUResult64E; // 64-bit FPU result
logic [4:0] FPUFlagsW;
// pipeline control logic
logic PipeEnableDE;
logic PipeEnableEM;
logic PipeEnableMW;
logic PipeClearDE;
logic PipeClearEM;
logic PipeClearMW;
//temporarily assign pipe clear and enable signals
//to never flush & always be running
localparam PipeClear = 1'b0;
localparam PipeEnable = 1'b1;
always_comb begin
PipeEnableDE = ~StallE;
PipeEnableEM = ~StallM;
PipeEnableMW = ~StallW;
PipeClearDE = FlushE;
PipeClearEM = FlushM;
PipeClearMW = FlushW;
end
//DECODE STAGE
//Hazard unit for FPU
fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
//top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
//regfile instantiation
FPregfile fpregfile (clk, reset, FWriteEnW,
InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
FPUResult64W,
FRD1D, FRD2D, FRD3D);
//instantiation of M stage cmp signals
logic CmpInvalidM;
logic [1:0] CmpFCCM;
logic [7:0] WM, XM;
logic ANaNM, BNaNM, AzeroM, BzeroM;
logic [63:0] CmpOp1M, CmpOp2M;
logic [1:0] CmpSelM;
//*****************
//fpregfile D/E pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, Input1E, Input1M);
flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, Input2E, Input2M);
flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, Input3E, Input3M);
flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E);
//*****************
//other D/E pipe registers
//*****************
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE);
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE);
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
//EXECUTION STAGE
// input muxs for forwarding
mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE);
mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E);
mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
fma1 fma1 (.*);
//first and only instance of floating-point divider
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .*);
//first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.*);
//first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]);
//first and only instance of floating-point sign converter
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
//*****************
//fpregfile D/E pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M);
flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M);
flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);
//*****************
//fma E/M pipe registers
@ -427,8 +359,6 @@ module fpu (
flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM);
flopenrc #(64) EMRegAdd21(clk, reset, PipeClearEM, PipeEnableEM, AddOp1E, AddOp1M);
flopenrc #(64) EMRegAdd22(clk, reset, PipeClearEM, PipeEnableEM, AddOp2E, AddOp2M);
flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM);
flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM);
@ -444,73 +374,58 @@ module fpu (
flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM);
flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM);
flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM);
flopenrc #(64) EMRegCmp7(clk, reset, PipeClearEM, PipeEnableEM, CmpOp1E, CmpOp1M);
flopenrc #(64) EMRegCmp8(clk, reset, PipeClearEM, PipeEnableEM, CmpOp2E, CmpOp2M);
flopenrc #(2) EMRegCmp9(clk, reset, PipeClearEM, PipeEnableEM, CmpSelE, CmpSelM);
//put this in for the event we want to delay fsgn - will otherwise bypass
//*****************
//fpsgn E/M pipe registers
//*****************
flopenrc #(2) EMRegSgn1(clk, reset, PipeClearEM, PipeEnableEM, SgnOpCodeE, SgnOpCodeM);
flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);
//*****************
//other E/M pipe registers
//*****************
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FRegWriteE, FRegWriteM);
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM);
flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM);
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);
assign FWriteDataM = Input1M;
mux2 #(64) LoadStoreResultMux(HRDATA, Input1M, |OpCtrlM[2:1], LoadStoreResultM);
//BEGIN MEMORY STAGE
assign FWriteDataM = FInput1M;
mux2 #(64) FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
fma2 fma2(.*);
//second instance of two-stage floating-point add/cvt unit
fpuaddcvt2 fpadd2 (.*);
//second instance of two-stage floating-point comparator
fpucmp2 fpcmp2 (CmpInvalidM, CmpFCCM, ANaNM, BNaNM, AzeroM, BzeroM, WM, XM, CmpSelM, CmpOp1M, CmpOp2M);
fpucmp2 fpcmp2 (CmpInvalidM, CmpFCCM, ANaNM, BNaNM, AzeroM, BzeroM, WM, XM, {1'b0, FmtM}, FInput1M, FInput2M);
//wally-spec W stage control logic signal instantiation
logic [2:0] FResultSelW;
//instantiate W stage fma signals here
logic [63:0] FmaResultW;
logic [4:0] FmaFlagsW;
//instantiation of W stage div/sqrt signals
logic DivDenormW;
logic [63:0] DivResultW;
logic [4:0] DivFlagsW;
//instantiation of W stage fsgn signals
logic [63:0] SgnResultW;
logic [4:0] SgnFlagsW;
//instantiation of W stage regfile signals
logic [`XLEN-1:0] LoadStoreResultW;
logic [`XLEN-1:0] SrcAW;
//instantiation of W stage add/cvt signals
logic [63:0] AddResultW;
logic [4:0] AddFlagsW;
logic AddDenormW;
//instantiation of W stage cmp signals
logic [63:0] CmpResultW;
logic CmpInvalidW;
logic [1:0] CmpFCCW;
//instantiation of W stage classify signals
logic [63:0] ClassResultW;
logic [4:0] ClassFlagsW;
//*****************
//fma M/W pipe registers
@ -521,16 +436,15 @@ module fpu (
//*****************
//fpdiv M/W pipe registers
//*****************
flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, DivResultM, DivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, DivFlagsM, DivFlagsW);
flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW);
//*****************
//fpadd M/W pipe registers
//*****************
flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, AddResultM, AddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, AddFlagsM, AddFlagsW);
flopenrc #(1) MWRegAdd3(clk, reset, PipeClearMW, PipeEnableMW, AddDenormM, AddDenormW);
flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW);
//*****************
//fpcmp M/W pipe registers
@ -547,30 +461,31 @@ module fpu (
//*****************
//other M/W pipe registers
//*****************
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FRegWriteM, FRegWriteW);
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, LoadStoreResultM, LoadStoreResultW);
flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
//flag signal mux via in-line ternaries
logic [4:0] FPUFlagsW;
//if bit 2 is active set to sign flags - otherwise:
//iff bit one is high - if bit zero is active set to fma flags - otherwise
//set to cmp flags
//iff bit one is low - if bit zero is active set to add/cvt flags - otherwise
//set to div/sqrt flags
//assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : (
// (FResultSelW[1]) ?
// ( (FResultSelW[0]) ? (FmaFlagsW) : ({CmpInvalidW,4'b0000}) )
// : ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) )
// );
//#########################################
//BEGIN WRITEBACK STAGE
//#########################################
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUFlagsW = DivFlagsW;
3'b000 : FPUFlagsW = FDivFlagsW;
// cmp
3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
//fma/mult
@ -578,45 +493,37 @@ module fpu (
// sgn inj
3'b011 : FPUFlagsW = SgnFlagsW;
// add/sub/cnvt
3'b100 : FPUFlagsW = AddFlagsW;
3'b100 : FPUFlagsW = FAddFlagsW;
// classify
3'b101 : FPUFlagsW = ClassFlagsW;
// output SrcAW
3'b110 : FPUFlagsW = 5'b0;
// output ReadData1
// output FRD1
3'b111 : FPUFlagsW = 5'b0;
default : FPUFlagsW = 5'bxxxxx;
endcase
end
//result mux via in-line ternaries
//the uses the same logic as for flag signals
//assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : (
// (FResultSelW[1]) ?
// ( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) )
// : ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) )
// );
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUResultDirW = DivResultW;
3'b000 : FPUResult64W = FDivResultW;
// cmp
3'b001 : FPUResultDirW = CmpResultW;
3'b001 : FPUResult64W = FCmpResultW;
//fma/mult
3'b010 : FPUResultDirW = FmaResultW;
3'b010 : FPUResult64W = FmaResultW;
// sgn inj
3'b011 : FPUResultDirW = SgnResultW;
3'b011 : FPUResult64W = SgnResultW;
// add/sub/cnvt
3'b100 : FPUResultDirW = AddResultW;
3'b100 : FPUResult64W = FAddResultW;
// classify
3'b101 : FPUResultDirW = ClassResultW;
3'b101 : FPUResult64W = ClassResultW;
// output SrcAW
3'b110 : FPUResultDirW = SrcAW;
3'b110 : FPUResult64W = SrcAW;
// Load/Store/Move to FP-register
3'b111 : FPUResultDirW = LoadStoreResultW;
default : FPUResultDirW = {64{1'bx}};
3'b111 : FPUResult64W = FLoadStoreResultW;
default : FPUResult64W = {64{1'bx}};
endcase
end
//interface between XLEN size datapath and double-precision sized
@ -626,21 +533,8 @@ module fpu (
always_comb begin
//zero extension
// Teo 04/13/2021
// Commented out XLENDIFF{1'b0} due to error:
// Repetition multiplier must be constant.
//if(`XLEN > 64) begin
// FPUResultW = {FPUResultDirW,{XLENDIFF{1'b0}}};
//end
//truncate
//else begin
FPUResultW = FPUResultDirW[63:64-`XLEN];
FPUResultW = FPUResult64W[63:64-`XLEN];
SetFflagsM = FPUFlagsW;
//end
end
endmodule // fpu
endmodule

View File

@ -27,16 +27,15 @@
//
module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin);
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE);
input logic [63:0] op1; // 1st input operand (A)
input logic [63:0] op2; // 2nd input operand (B)
input logic [2:0] rm; // Rounding mode - specify values
input logic [3:0] op_type; // Function opcode
input logic Pin; // Result Precision (1 for double, 0 for single)
input logic [63:0] FInput1E; // 1st input operand (A)
input logic [63:0] FInput2E; // 2nd input operand (B)
input logic [3:0] FOpCtrlE; // Function opcode
input logic FmtE; // Result Precision (1 for double, 0 for single)
wire P;
assign P = ~Pin | op_type[2];
assign P = ~FmtE | FOpCtrlE[2];
wire [63:0] IntValue;
wire [11:0] exp1, exp2;
@ -54,44 +53,44 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
wire zeroB;
wire [5:0] align_shift;
output logic [63:0] Float1;
output logic [63:0] Float2;
output logic [10:0] exponent;
output logic [10:0] exponent_postsum;
output logic [11:0] exp1_denorm, exp2_denorm;//KEP used to be [10:0]
output logic [63:0] sum, sum_tc;
output logic [3:0] sel_inv;
output logic corr_sign;
output logic signA;
output logic op1_Norm, op2_Norm;
output logic opA_Norm, opB_Norm;
output logic Invalid;
output logic DenormIn;
output logic [63:0] AddFloat1E;
output logic [63:0] AddFloat2E;
output logic [10:0] AddExponentE;
output logic [10:0] AddExpPostSumE;
output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0]
output logic [63:0] AddSumE, AddSumTcE;
output logic [3:0] AddSelInvE;
output logic AddCorrSignE;
output logic AddSignAE;
output logic AddOp1NormE, AddOp2NormE;
output logic AddOpANormE, AddOpBNormE;
output logic AddInvalidE;
output logic AddDenormInE;
// output logic exp_valid;
output logic convert;
output logic swap;
output logic normal_overflow;
output logic AddConvertE;
output logic AddSwapE;
output logic AddNormOvflowE;
wire [5:0] ZP_mantissaA;
wire [5:0] ZP_mantissaB;
wire ZV_mantissaA;
wire ZV_mantissaB;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the op_type , and their precision P.
// the orignal operands, the FOpCtrlE , and their precision P.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (Float1, Float2, op1, op2, op_type, P);
convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if op1 and op2 are not zero or denormalized.
// "Denormalized" Input Flags. The "AddSelInvE" is used in
// the third pipeline stage to select the result. Also, AddOp1NormE
// and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, sub,
Float1, Float2, op_type);
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
AddFloat1E, AddFloat2E, FOpCtrlE);
// Perform Exponent Subtraction (used for alignment). For performance
// both exponent subtractions are performed in parallel. This was
@ -99,25 +98,25 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
// the two parallel additions. The input values are zero-extended to 12
// bits prior to performing the addition.
assign exp1 = {1'b0, Float1[62:52]};
assign exp2 = {1'b0, Float2[62:52]};
assign exp1 = {1'b0, AddFloat1E[62:52]};
assign exp2 = {1'b0, AddFloat2E[62:52]};
assign exp_diff1 = exp1 - exp2;
assign exp_diff2 = DenormIn ? ({Float2[63], exp2[10:0]} - {Float1[63], exp1[10:0]}): exp2 - exp1;
assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1;
// The second operand (B) should be set to zero, if op_type does not
// The second operand (B) should be set to zero, if FOpCtrlE does not
// specify addition or subtraction
assign zeroB = op_type[2] | op_type[1];
assign zeroB = FOpCtrlE[2] | FOpCtrlE[1];
// Swapped operands if zeroB is not one and exp1 < exp2.
// Swapping causes exp2 to be used for the result exponent.
// SwapFmtEg causes exp2 to be used for the result exponent.
// Only the exponent of the larger operand is used to determine
// the final result.
assign swap = exp_diff1[11] & ~zeroB;
assign exponent = swap ? exp2[10:0] : exp1[10:0];
assign exponent_postsum = swap ? exp2[10:0] : exp1[10:0];
assign mantissaA = swap ? Float2[51:0] : Float1[51:0];
assign mantissaB = swap ? Float1[51:0] : Float2[51:0];
assign signA = swap ? Float2[63] : Float1[63];
assign AddSwapE = exp_diff1[11] & ~zeroB;
assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0];
assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0];
assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0];
assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0];
assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63];
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
@ -127,12 +126,12 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
// Denormalized exponents created by subtracting the leading zeroes from the original exponents
assign exp1_denorm = swap ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
assign exp2_denorm = swap ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});
// Determine the alignment shift and limit it to 63. If any bit from
// exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
assign exp_shift = swap ? exp_diff2 : exp_diff1;
assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
| exp_shift[8] | exp_shift[7] | exp_shift[6];
assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift
@ -147,10 +146,10 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
// and loss of sign information. The two bits to the right of the
// original mantissa form the "guard" and "round" bits that are used
// to round the result.
assign opA_Norm = swap ? op2_Norm : op1_Norm;
assign opB_Norm = swap ? op1_Norm : op2_Norm;
assign mantissaA1 = {2'h0, opA_Norm, mantissaA[51:0]&{52{opA_Norm}}, 2'h0};
assign mantissaB1 = {2'h0, opB_Norm, mantissaB[51:0]&{52{opB_Norm}}, 2'h0};
assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE;
assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE;
assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};
// Perform mantissa alignment using a 57-bit barrel shifter
// If any of the bits shifted out are one, Sticky_out is set.
@ -160,8 +159,8 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
// Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion)
assign IntValue [31:0] = op1[31:0];
assign IntValue [63:32] = op_type[0] ? {32{op1[31]}} : op1[63:32];
assign IntValue [31:0] = FInput1E[31:0];
assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
@ -169,30 +168,30 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm,
// and the exponent value is left unchanged.
// Under denormalized cases, the exponent before the rounder is set to 1
// if the normal shift value is 11.
assign convert = ~op_type[2] & op_type[1];
assign mantissaA3 = (op_type[3]) ? (op_type[0] ? Float1 : ~Float1) : (DenormIn ? ({12'h0, mantissaA}) : (convert ? IntValue : {mantissaA1, 7'h0}));
assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1];
assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0}));
// Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
// 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
// zeros.
assign mantissaB3[63:7] = (op_type[3]) ? (57'h0) : (DenormIn ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
assign mantissaB3[6] = (op_type[3]) ? (1'b0) : (DenormIn ? mantissaB[6] : Sticky_out & ~zeroB);
assign mantissaB3[5:0] = (op_type[3]) ? (6'h01) : (DenormIn ? mantissaB[5:0] : 6'h0);
assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}});
assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB);
assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0);
// The sign of the result needs to be corrected if the true
// operation is subtraction and the input operands were swapped.
assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap;
assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;
// 64-bit Mantissa Adder/Subtractor
cla64 add1 (sum, mantissaA3, mantissaB3, sub);
cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub);
// 64-bit Mantissa Subtractor - to get the two's complement of the
// result when the sign from the adder/subtractor is negative.
cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3);
cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3);
// Finds normal underflow result to determine whether to round final exponent down
//***KEP used to be (sum == 16'h0) I am unsure what it's supposed to be
assign normal_overflow = (DenormIn & (sum == 64'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]);
//***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);
endmodule // fpadd

View File

@ -27,15 +27,13 @@
//
module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM);
module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM);
input [63:0] AddOp1M; // 1st input operand (A)
input [63:0] AddOp2M; // 2nd input operand (B)
input [2:0] AddRmM; // Rounding mode - specify values
input [3:0] AddOpTypeM; // Function opcode
input AddPM; // Result Precision (0 for double, 1 for single)
input AddOvEnM; // Overflow trap enabled
input AddUnEnM; // Underflow trap enabled
input [2:0] FrmM; // Rounding mode - specify values
input [3:0] FOpCtrlM; // Function opcode
input FmtM; // Result Precision (0 for double, 1 for single)
// input AddOvEnM; // Overflow trap enabled
// input AddUnEnM; // Underflow trap enabled
input [63:0] AddSumM, AddSumTcM;
input [63:0] AddFloat1M;
input [63:0] AddFloat2M;
@ -53,12 +51,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
input AddSwapM;
// input AddNormOvflowM;
output [63:0] AddResultM; // Result of operation
output [4:0] AddFlagsM; // IEEE exception flags
output [63:0] FAddResultM; // Result of operation
output [4:0] FAddFlagsM; // IEEE exception flags
output AddDenormM; // AddDenormM on input or output
wire P;
assign P = AddPM | AddOpTypeM[2];
assign P = FmtM | FOpCtrlM[2];
wire [10:0] exp_pre;
wire [63:0] Result;
@ -82,6 +80,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
wire [63:0] sum_corr;
logic AddNormOvflowM;
logic AddOvEnM; // Overflow trap enabled
logic AddUnEnM; // Underflow trap enabled
assign AddOvEnM = 1'b1;
assign AddUnEnM = 1'b1;
//AddExponentM value pre-rounding with considerations for denormalized
//cases/conversion cases
assign exp_pre = AddDenormInM ?
@ -101,7 +105,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;
// Determines the correct comparison result based on operation and sign of resulting AddSumM
assign mantissa_comp = (AddOpTypeM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;
// If the signs are different and both operands aren't denormalized
// the normal underflow bit is needed and therefore updated.
@ -113,12 +117,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
// If the AddSumM is negative, use its two complement instead.
// This value has to be 64-bits to correctly handle the
// case 10...00
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & AddOpTypeM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~AddOpTypeM[0]) ))
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (AddOpTypeM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) ))
? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM));
// Finds normal underflow result to determine whether to round final AddExponentM down
//KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);
// Leading-Zero Detector. Determine the size of the shift needed for
// normalization. If sum_corrected is all zeros, the exp_valid is
@ -132,7 +136,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
// be right shifted. It outputs the normalized AddSumM.
barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);
assign sum_norm_w_bypass = (AddOpTypeM[3]) ? (AddOpTypeM[0] ? ~sum_corr : sum_corr) : (sum_norm);
assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm);
// Round the mantissa to a 52-bit value, with the leading one
// removed. If the result is a single precision number, the actual
@ -141,18 +145,18 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel
// exactly where the rounding point is. The rounding units also
// handles special cases and set the exception flags.
// Changed DenormIO -> AddDenormM and FlagsIn -> AddFlagsM in order to
// Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to
// help in processor reservation station detection of load/stores. In
// other words, the processor would like to know ahead of time that
// if the result is an exception then don't load or store.
rounder round1 (Result, DenormIO, FlagsIn, AddRmM, P, AddOvEnM, AddUnEnM, exp_valid,
rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid,
AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass,
AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52],
AddNormOvflowM, normal_underflow, AddSwapM, AddOpTypeM, AddSumM);
AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM);
// Store the final result and the exception flags in registers.
assign AddResultM = Result;
assign {AddDenormM, AddFlagsM} = {DenormIO, FlagsIn};
assign FAddResultM = Result;
assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn};
endmodule // fpadd

View File

@ -37,7 +37,7 @@
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);
module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec
input logic [63:0] op1;
input logic [63:0] op2;

View File

@ -27,45 +27,45 @@
module fpuhazard(
input logic [4:0] Adr1, Adr2, Adr3,
input logic FRegWriteE, FRegWriteM, FRegWriteW,
input logic FWriteEnE, FWriteEnM, FWriteEnW,
input logic [4:0] RdE, RdM, RdW,
input logic DivBusyM,
input logic RegWriteD,
input logic [2:0] FResultSelD, FResultSelE,
input logic IllegalFPUInstrD,
input logic In2UsedD, In3UsedD,
input logic FInput2UsedD, FInput3UsedD,
// Stall outputs
output logic FStallD,
output logic [1:0] Input1MuxD, Input2MuxD,
output logic Input3MuxD
output logic [1:0] FForwardInput1D, FForwardInput2D,
output logic FForwardInput3D
);
always_comb begin
// set ReadData as default
Input1MuxD = 2'b00;
Input2MuxD = 2'b00;
Input3MuxD = 1'b0;
FForwardInput1D = 2'b00;
FForwardInput2D = 2'b00;
FForwardInput3D = 1'b0;
FStallD = DivBusyM;
if (~IllegalFPUInstrD) begin
// if taking a value from int register
if ((Adr1 == RdE) & (FRegWriteE | ((FResultSelE == 3'b110) & RegWriteD)))
if (FResultSelE == 3'b110) Input1MuxD = 2'b11; // choose SrcAM
if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD)))
if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM
else FStallD = 1'b1; // otherwise stall
else if ((Adr1 == RdM) & FRegWriteM) Input1MuxD = 2'b01; // choose FPUResultDirW
else if ((Adr1 == RdW) & FRegWriteW) Input1MuxD = 2'b11; // choose FPUResultDirE
else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW
else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE
if(In2UsedD)
if ((Adr2 == RdE) & FRegWriteE) FStallD = 1'b1;
else if ((Adr2 == RdM) & FRegWriteM) Input2MuxD = 2'b01; // choose FPUResultDirW
else if ((Adr2 == RdW) & FRegWriteW) Input2MuxD = 2'b10; // choose FPUResultDirE
if(FInput2UsedD)
if ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1;
else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW
else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE
if(In3UsedD)
if ((Adr3 == RdE) & FRegWriteE) FStallD = 1'b1;
else if ((Adr3 == RdM) & FRegWriteM) FStallD = 1'b1;
else if ((Adr3 == RdW) & FRegWriteW) Input3MuxD = 1'b1; // choose FPUResultDirE
if(FInput3UsedD)
if ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1;
else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1;
else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE
end
end

View File

@ -1,8 +1,8 @@
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SgnOp1E, SgnOp2E);
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E);
input [63:0] SgnOp1E, SgnOp2E;
input [63:0] FInput1E, FInput2E;
input [1:0] SgnOpCodeE;
output [63:0] SgnResultE;
output [4:0] SgnFlagsE;
@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SgnOp1E, SgnOp2E);
//op code designation:
//
//00 - fsgnj - directly copy over sign value of SgnOp2E
//01 - fsgnjn - negate sign value of SgnOp2E
//10 - fsgnjx - XOR sign values of SgnOp1E & SgnOp2E
//00 - fsgnj - directly copy over sign value of FInput2E
//01 - fsgnjn - negate sign value of FInput2E
//10 - fsgnjx - XOR sign values of FInput1E & FInput2E
//
assign SgnResultE[63] = SgnOpCodeE[1] ? (SgnOp1E[63] ^ SgnOp2E[63]) : (SgnOp2E[63] ^ SgnOpCodeE[0]);
assign SgnResultE[62:0] = SgnOp1E[62:0];
assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]);
assign SgnResultE[62:0] = FInput1E[62:0];
//If the exponent is all ones, then the value is either Inf or NaN,
//both of which will produce a QNaN/SNaN value of some sort. This will
//set the invalid flag high.
assign AonesExp = SgnOp1E[62]&SgnOp1E[61]&SgnOp1E[60]&SgnOp1E[59]&SgnOp1E[58]&SgnOp1E[57]&SgnOp1E[56]&SgnOp1E[55]&SgnOp1E[54]&SgnOp1E[53]&SgnOp1E[52];
assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52];
//the only flag that can occur during this operation is invalid
//due to changing sign on already existing NaN

View File

@ -10,46 +10,46 @@
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
module special(Input1E, Input2E, Input3E, xzeroE, yzeroE, zzeroE,
module special(FInput1E, FInput2E, FInput3E, xzeroE, yzeroE, zzeroE,
xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE);
/////////////////////////////////////////////////////////////////////////////
input logic [63:0] Input1E; // Input Input1E
input logic [63:0] Input2E; // Input Input2E
input logic [63:0] Input3E; // Input Input3E
output logic xzeroE; // Input Input1E = 0
output logic yzeroE; // Input Input2E = 0
output logic zzeroE; // Input Input3E = 0
output logic xnanE; // Input1E is NaN
output logic ynanE; // Input2E is NaN
output logic znanE; // Input3E is NaN
output logic xdenormE; // Input1E is denormalized
output logic ydenormE; // Input2E is denormalized
output logic zdenormE; // Input3E is denormalized
output logic xinfE; // Input1E is infinity
output logic yinfE; // Input2E is infinity
output logic zinfE; // Input3E is infinity
input logic [63:0] FInput1E; // Input FInput1E
input logic [63:0] FInput2E; // Input FInput2E
input logic [63:0] FInput3E; // Input FInput3E
output logic xzeroE; // Input FInput1E = 0
output logic yzeroE; // Input FInput2E = 0
output logic zzeroE; // Input FInput3E = 0
output logic xnanE; // FInput1E is NaN
output logic ynanE; // FInput2E is NaN
output logic znanE; // FInput3E is NaN
output logic xdenormE; // FInput1E is denormalized
output logic ydenormE; // FInput2E is denormalized
output logic zdenormE; // FInput3E is denormalized
output logic xinfE; // FInput1E is infinity
output logic yinfE; // FInput2E is infinity
output logic zinfE; // FInput3E is infinity
// In the actual circuit design, the gates looking at bits
// 51:0 and at bits 62:52 should be shared among the various detectors.
// Check if input is NaN
assign xnanE = &Input1E[62:52] && |Input1E[51:0];
assign ynanE = &Input2E[62:52] && |Input2E[51:0];
assign znanE = &Input3E[62:52] && |Input3E[51:0];
assign xnanE = &FInput1E[62:52] && |FInput1E[51:0];
assign ynanE = &FInput2E[62:52] && |FInput2E[51:0];
assign znanE = &FInput3E[62:52] && |FInput3E[51:0];
// Check if input is denormalized
assign xdenormE = ~(|Input1E[62:52]) && |Input1E[51:0];
assign ydenormE = ~(|Input2E[62:52]) && |Input2E[51:0];
assign zdenormE = ~(|Input3E[62:52]) && |Input3E[51:0];
assign xdenormE = ~(|FInput1E[62:52]) && |FInput1E[51:0];
assign ydenormE = ~(|FInput2E[62:52]) && |FInput2E[51:0];
assign zdenormE = ~(|FInput3E[62:52]) && |FInput3E[51:0];
// Check if input is infinity
assign xinfE = &Input1E[62:52] && ~(|Input1E[51:0]);
assign yinfE = &Input2E[62:52] && ~(|Input2E[51:0]);
assign zinfE = &Input3E[62:52] && ~(|Input3E[51:0]);
assign xinfE = &FInput1E[62:52] && ~(|FInput1E[51:0]);
assign yinfE = &FInput2E[62:52] && ~(|FInput2E[51:0]);
assign zinfE = &FInput3E[62:52] && ~(|FInput3E[51:0]);
// Check if inputs are all zero
// Also forces denormalized inputs to zero.
@ -57,11 +57,11 @@ module special(Input1E, Input2E, Input3E, xzeroE, yzeroE, zzeroE,
// to just check if the exponent is zero.
// KATHERINE - commented following (21/01/11)
// assign xzeroE = ~(|Input1E[62:0]) || xdenormE;
// assign yzeroE = ~(|Input2E[62:0]) || ydenormE;
// assign zzeroE = ~(|Input3E[62:0]) || zdenormE;
// assign xzeroE = ~(|FInput1E[62:0]) || xdenormE;
// assign yzeroE = ~(|FInput2E[62:0]) || ydenormE;
// assign zzeroE = ~(|FInput3E[62:0]) || zdenormE;
// KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number
assign xzeroE = ~(|Input1E[62:0]);
assign yzeroE = ~(|Input2E[62:0]);
assign zzeroE = ~(|Input3E[62:0]);
assign xzeroE = ~(|FInput1E[62:0]);
assign yzeroE = ~(|FInput2E[62:0]);
assign zzeroE = ~(|FInput3E[62:0]);
endmodule

View File

@ -100,7 +100,7 @@ module wallypipelinedhart (
logic FStallD;
logic FWriteIntW, FWriteIntM;
logic [31:0] FSROutW;
logic DivSqrtDoneE;
logic FDivSqrtDoneM;
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic [`XLEN-1:0] FPUResultW;