From e7190b06903f8fd2162824beddbd2d88b26ed0e7 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 25 May 2021 20:04:34 -0400 Subject: [PATCH] renamed top level FPU wires --- wally-pipelined/src/fpu/fctrl.sv | 43 +- wally-pipelined/src/fpu/fma1.sv | 24 +- wally-pipelined/src/fpu/fma2.sv | 28 +- wally-pipelined/src/fpu/fpdiv.sv | 54 +- wally-pipelined/src/fpu/fpu.sv | 1080 ++++++++--------- wally-pipelined/src/fpu/fpuaddcvt1.sv | 121 +- wally-pipelined/src/fpu/fpuaddcvt2.sv | 46 +- wally-pipelined/src/fpu/fpucmp1.sv | 2 +- wally-pipelined/src/fpu/fpuhazard.sv | 40 +- wally-pipelined/src/fpu/fsgn.sv | 16 +- wally-pipelined/src/fpu/special.sv | 62 +- .../src/wally/wallypipelinedhart.sv | 2 +- 12 files changed, 707 insertions(+), 811 deletions(-) diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index f24368e17..840c95301 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -6,16 +6,15 @@ module fctrl ( input logic [2:0] Funct3D, input logic [2:0] FRM_REGW, output logic IllegalFPUInstrD, - output logic FRegWriteD, - output logic DivSqrtStartD, - //output logic [2:0] regSelD, + output logic FWriteEnD, + output logic FDivStartD, output logic [2:0] FResultSelD, - output logic [3:0] OpCtrlD, + output logic [3:0] FOpCtrlD, output logic FmtD, output logic [2:0] FrmD, output logic [1:0] FMemRWD, - output logic OutputInput2D, - output logic In2UsedD, In3UsedD, + output logic FOutputInput2D, + output logic FInput2UsedD, FInput3UsedD, output logic FWriteIntD); @@ -102,9 +101,9 @@ module fctrl ( end end - assign OutputInput2D = OpD == 7'b0100111; + assign FOutputInput2D = OpD == 7'b0100111; - assign FMemRWD[0] = OutputInput2D; + assign FMemRWD[0] = FOutputInput2D; assign FMemRWD[1] = OpD == 7'b0000111; @@ -131,7 +130,7 @@ module fctrl ( //this value is used enough to be shorthand //if op is div/sqrt - start div/sqrt - assign DivSqrtStartD = ~|FResultSelD; // is FResultSelD == 000 + assign FDivStartD = ~|FResultSelD; // is FResultSelD == 000 //operation control for each fp operation //has to be expanded over standard to account for @@ -144,7 +143,7 @@ module fctrl ( //version I used for this repo //let's do separate SOP for each type of operation -// assign OpCtrlD[3] = 1'b0; +// assign FOpCtrlD[3] = 1'b0; // // @@ -152,12 +151,12 @@ module fctrl ( always_comb begin IllegalFPUInstr1D = 0; - In3UsedD = 0; + FInput3UsedD = 0; case (FResultSelD) // div/sqrt // fdiv = ???0 // fsqrt = ???1 - 3'b000 : begin OpCtrlD = {3'b0, Funct7D[5]}; In2UsedD = ~Funct7D[5]; end + 3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end // cmp // fmin = ?100 // fmax = ?101 @@ -165,7 +164,7 @@ module fctrl ( // flt = ?001 // fle = ?011 // {?, is min or max, is eq or le, is lt or le} - 3'b001 : begin OpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; In2UsedD = 1'b1; end + 3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end //fma/mult // fmadd = ?000 // fmsub = ?001 @@ -173,12 +172,12 @@ module fctrl ( // fnmsub = ?011 // fmul = ?100 // {?, is mul, is negitive, is sub} - 3'b010 : begin OpCtrlD = {1'b0, OpD[4:2]}; In2UsedD = 1'b1; In3UsedD = ~OpD[4]; end + 3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end // sgn inj // fsgnj = ??00 // fsgnjn = ??01 // fsgnjx = ??10 - 3'b011 : begin OpCtrlD = {2'b0, Funct3D[1:0]}; In2UsedD = 1'b1; end + 3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end // add/sub/cnvt // fadd = 0000 // fsub = 0001 @@ -193,13 +192,13 @@ module fctrl ( // fcvt.d.wu = 1111 // fcvt.d.s = 1000 // { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub - 3'b100 : begin OpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; In2UsedD = ~Funct7D[5]; end + 3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end // classify {?, ?, ?, ?} - 3'b101 : begin OpCtrlD = 4'b0; In2UsedD = 1'b0; end + 3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end // output SrcAW // fmv.w.x = ???0 // fmv.w.d = ???1 - 3'b110 : begin OpCtrlD = {3'b0, Funct7D[0]}; In2UsedD = 1'b0; end + 3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end // output Input1 // flw = ?000 // fld = ?001 @@ -207,9 +206,9 @@ module fctrl ( // fsd = ?011 // output Input2 // fmv.x.w = ?100 // fmv.x.d = ?101 - // {?, is mv, is store, is double or fcvt.d.w} - 3'b111 : begin OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; In2UsedD = OpD[5]; end - default : begin OpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; In2UsedD = 1'b0; end + // {?, is mv, is store, is double or fmv} + 3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end + default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end endcase end @@ -219,5 +218,5 @@ module fctrl ( // is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]); // if not writting to int reg and not a store function and not move - assign FRegWriteD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP; + assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP; endmodule diff --git a/wally-pipelined/src/fpu/fma1.sv b/wally-pipelined/src/fpu/fma1.sv index 59b516007..e4f818c8f 100644 --- a/wally-pipelined/src/fpu/fma1.sv +++ b/wally-pipelined/src/fpu/fma1.sv @@ -15,13 +15,13 @@ // normalize Normalization shifter // round Rounding of result // exception Handles exceptional cases -// bypass Handles bypass of result to Input1E or Input3E inputs +// bypass Handles bypass of result to FInput1E or FInput3E inputs // sign One bit sign handling block // special Catch special cases (inputs = 0 / infinity / etc.) // -// The FMAC computes FmaResultM=Input1E*Input2E+Input3E, rounded with the mode specified by +// The FMAC computes FmaResultM=FInput1E*FInput2E+FInput3E, rounded with the mode specified by // RN, RZ, RM, or RP. The result is optionally bypassed back to -// the Input1E or Input3E inputs for use on the next cycle. In addition, four signals +// the FInput1E or FInput3E inputs for use on the next cycle. In addition, four signals // are produced: trap, overflow, underflow, and inexact. Trap indicates // an infinity, NaN, or denormalized number to be handled in software; // the other three signals are IEEE flags. @@ -29,15 +29,15 @@ ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// -module fma1(Input1E, Input2E, Input3E, FrmE, +module fma1(FInput1E, FInput2E, FInput3E, FrmE, rE, sE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE, aligncntE, aeE , xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE, nanE, prodinfE); ///////////////////////////////////////////////////////////////////////////// - input logic [63:0] Input1E; // input 1 - input logic [63:0] Input2E; // input 2 - input logic [63:0] Input3E; // input 3 + input logic [63:0] FInput1E; // input 1 + input logic [63:0] FInput2E; // input 2 + input logic [63:0] FInput3E; // input 3 input logic [2:0] FrmE; // Rounding mode output logic [12:0] aligncntE; // status flags output logic [105:0] rE; // one result of partial product sum @@ -45,7 +45,7 @@ module fma1(Input1E, Input2E, Input3E, FrmE, output logic [163:0] tE; // output logic of alignment shifter output logic [12:0] aeE; // multiplier expoent output logic bsE; // sticky bit of addend - output logic killprodE; // Input3E >> product + output logic killprodE; // FInput3E >> product output logic xzeroE; output logic yzeroE; output logic zzeroE; @@ -68,7 +68,7 @@ module fma1(Input1E, Input2E, Input3E, FrmE, // output logic [12:0] aligncntE; // shift count for alignment - logic prodof; // Input1E*Input2E out of range + logic prodof; // FInput1E*FInput2E out of range @@ -84,12 +84,12 @@ module fma1(Input1E, Input2E, Input3E, FrmE, // Instantiate fraction datapath - multiply multiply(.xman(Input1E[51:0]), .yman(Input2E[51:0]), .*); - align align(.zman(Input3E[51:0]),.*); + multiply multiply(.xman(FInput1E[51:0]), .yman(FInput2E[51:0]), .*); + align align(.zman(FInput3E[51:0]),.*); // Instantiate exponent datapath - expgen1 expgen1(.xexp(Input1E[62:52]),.yexp(Input2E[62:52]),.zexp(Input3E[62:52]),.*); + expgen1 expgen1(.xexp(FInput1E[62:52]),.yexp(FInput2E[62:52]),.zexp(FInput3E[62:52]),.*); // Instantiate special case detection across datapath & exponent path special special(.*); diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index 23e6bb6b5..467a4d285 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -15,13 +15,13 @@ // normalize Normalization shifter // round Rounding of result // exception Handles exceptional cases -// bypass Handles bypass of result to Input1M or Input3M input logics +// bypass Handles bypass of result to FInput1M or FInput3M input logics // sign One bit sign handling block // special Catch special cases (input logics = 0 / infinity / etc.) // -// The FMAC computes FmaResultM=Input1M*Input2M+Input3M, rounded with the mode specified by +// The FMAC computes FmaResultM=FInput1M*FInput2M+FInput3M, rounded with the mode specified by // RN, RZ, RM, or RP. The result is optionally bypassed back to -// the Input1M or Input3M input logics for use on the next cycle. In addition, four signals +// the FInput1M or FInput3M input logics for use on the next cycle. In addition, four signals // are produced: trap, overflow, underflow, and inexact. Trap indicates // an infinity, NaN, or denormalized number to be handled in software; // the other three signals are IMMM flags. @@ -29,7 +29,7 @@ ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// -module fma2(Input1M, Input2M, Input3M, FrmM, +module fma2(FInput1M, FInput2M, FInput3M, FrmM, FmaResultM, FmaFlagsM, aligncntM, rM, sM, tM, normcntM, aeM, bsM,killprodM, xzeroM, yzeroM,zzeroM,xdenormM,ydenormM, @@ -39,9 +39,9 @@ module fma2(Input1M, Input2M, Input3M, FrmM, ); ///////////////////////////////////////////////////////////////////////////// - input logic [63:0] Input1M; // input logic 1 - input logic [63:0] Input2M; // input logic 2 - input logic [63:0] Input3M; // input logic 3 + input logic [63:0] FInput1M; // input logic 1 + input logic [63:0] FInput2M; // input logic 2 + input logic [63:0] FInput3M; // input logic 3 input logic [2:0] FrmM; // Rounding mode input logic [12:0] aligncntM; // status flags input logic [105:0] rM; // one result of partial product sum @@ -50,7 +50,7 @@ module fma2(Input1M, Input2M, Input3M, FrmM, input logic [8:0] normcntM; // shift count for normalizer input logic [12:0] aeM; // multiplier expoent input logic bsM; // sticky bit of addend - input logic killprodM; // Input3M >> product + input logic killprodM; // FInput3M >> product input logic prodinfM; input logic xzeroM; input logic yzeroM; @@ -69,7 +69,7 @@ module fma2(Input1M, Input2M, Input3M, FrmM, input logic sumshiftzeroM; - output logic [63:0] FmaResultM; // output FmaResultM=Input1M*Input2M+Input3M + output logic [63:0] FmaResultM; // output FmaResultM=FInput1M*FInput2M+FInput3M output logic [4:0] FmaFlagsM; // status flags @@ -120,18 +120,18 @@ module fma2(Input1M, Input2M, Input3M, FrmM, add add(.*); lza lza(.*); - normalize normalize(.zexp(Input3M[62:52]),.*); - round round(.xman(Input1M[51:0]), .yman(Input2M[51:0]),.zman(Input3M[51:0]),.*); + normalize normalize(.zexp(FInput3M[62:52]),.*); + round round(.xman(FInput1M[51:0]), .yman(FInput2M[51:0]),.zman(FInput3M[51:0]),.*); // Instantiate exponent datapath - expgen2 expgen2(.xexp(Input1M[62:52]),.yexp(Input2M[62:52]),.zexp(Input3M[62:52]),.*); + expgen2 expgen2(.xexp(FInput1M[62:52]),.yexp(FInput2M[62:52]),.zexp(FInput3M[62:52]),.*); // Instantiate control logic -sign sign(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.*); -flag2 flag2(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.vbits(v[1:0]),.*); +sign sign(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.*); +flag2 flag2(.xsign(FInput1M[63]),.ysign(FInput2M[63]),.zsign(FInput3M[63]),.vbits(v[1:0]),.*); assign FmaResultM = {wsign,wexp,wman}; diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv index 1574b79ef..0d4933596 100755 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ b/wally-pipelined/src/fpu/fpdiv.sv @@ -23,25 +23,25 @@ // // `timescale 1ps/1ps -module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, DivFrm, DivOpType, DivP, DivOvEn, DivUnEn, - DivStart, reset, clk, DivBusyM); +module fpdiv (FDivSqrtDoneM, FDivResultM, FDivFlagsM, DivDenormM, FInput1E, FInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn, + FDivStartE, reset, clk, DivBusyM); - input [63:0] DivOp1; // 1st input operand (A) - input [63:0] DivOp2; // 2nd input operand (B) - input [2:0] DivFrm; // Rounding mode - specify values + input [63:0] FInput1E; // 1st input operand (A) + input [63:0] FInput2E; // 2nd input operand (B) + input [2:0] FrmE; // Rounding mode - specify values input DivOpType; // Function opcode - input DivP; // Result Precision (0 for double, 1 for single) + input FmtE; // Result Precision (0 for double, 1 for single) input DivOvEn; // Overflow trap enabled input DivUnEn; // Underflow trap enabled - input DivStart; + input FDivStartE; input reset; input clk; - output [63:0] DivResultM; // Result of operation - output [4:0] DivFlagsM; // IEEE exception flags + output [63:0] FDivResultM; // Result of operation + output [4:0] FDivFlagsM; // IEEE exception flags output DivDenormM; // DivDenormM on input or output - output DivSqrtDone; + output FDivSqrtDoneM; output DivBusyM; supply1 vdd; @@ -94,16 +94,16 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di logic exp_cout1, exp_cout2, exp_odd, open; // Convert the input operands to their appropriate forms based on - // the orignal operands, the DivOpType , and their precision DivP. + // the orignal operands, the DivOpType , and their precision FmtE. // Single precision inputs are converted to double precision // and the sign of the first operand is set appropratiately based on // if the operation is absolute value or negation. - convert_inputs_div divconv1 (Float1, Float2, DivOp1, DivOp2, DivOpType, DivP); + convert_inputs_div divconv1 (Float1, Float2, FInput1E, FInput2E, DivOpType, FmtE); // Test for exceptions and return the "Invalid Operation" and - // "Denormalized" Input DivFlagsM. The "sel_inv" is used in + // "Denormalized" Input FDivFlagsM. The "sel_inv" is used in // the third pipeline stage to select the result. Also, op1_Norm - // and op2_Norm are one if DivOp1 and DivOp2 are not zero or denormalized. + // and op2_Norm are one if FInput1E and FInput2E are not zero or denormalized. // sub is one if the effective operation is subtaction. exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, Float1, Float2, DivOpType); @@ -135,26 +135,26 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di sel_muxa, sel_muxb, sel_muxr, reset, clk, load_rega, load_regb, load_regc, load_regd, - load_regr, load_regs, DivP, DivOpType, exp_odd); + load_regr, load_regs, FmtE, DivOpType, exp_odd); // FSM : control divider - fsm control (DivSqrtDone, load_rega, load_regb, load_regc, load_regd, + fsm control (FDivSqrtDoneM, load_rega, load_regb, load_regc, load_regd, load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, - clk, reset, DivStart, DivOpType, DivBusyM); + clk, reset, FDivStartE, DivOpType, DivBusyM); // Round the mantissa to a 52-bit value, with the leading one // removed. The rounding units also handles special cases and // set the exception flags. //***add max magnitude and swap negitive and positive infinity rounder_div divround1 (Result, DenormIO, FlagsIn, - DivFrm, DivP, DivOvEn, DivUnEn, expF, + FrmE, FmtE, DivOvEn, DivUnEn, expF, sel_inv, Invalid, DenormIn, signResult, q1, qm1, qp1, q0, qm0, qp0, regr_out); // Store the final result and the exception flags in registers. - flopenr #(64) rega (clk, reset, DivSqrtDone, Result, DivResultM); - flopenr #(1) regb (clk, reset, DivSqrtDone, DenormIO, DivDenormM); - flopenr #(5) regc (clk, reset, DivSqrtDone, FlagsIn, DivFlagsM); + flopenr #(64) rega (clk, reset, FDivSqrtDoneM, Result, FDivResultM); + flopenr #(1) regb (clk, reset, FDivSqrtDoneM, DenormIO, DivDenormM); + flopenr #(5) regc (clk, reset, FDivSqrtDoneM, FlagsIn, FDivFlagsM); endmodule // fpadd @@ -198,7 +198,7 @@ module brent_kung (c, p, g); logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0; // parallel-prefix, Brent-Kung - // Stage 1: Generates G/DivP pairs that span 1 bits + // Stage 1: Generates G/FmtE pairs that span 1 bits grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]); black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]}); black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]}); @@ -207,20 +207,20 @@ module brent_kung (c, p, g); black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]}); black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]}); - // Stage 2: Generates G/DivP pairs that span 2 bits + // Stage 2: Generates G/FmtE pairs that span 2 bits grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2); black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4}); black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8}); - // Stage 3: Generates G/DivP pairs that span 4 bits + // Stage 3: Generates G/FmtE pairs that span 4 bits grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4); - // Stage 4: Generates G/DivP pairs that span 8 bits + // Stage 4: Generates G/FmtE pairs that span 8 bits - // Stage 5: Generates G/DivP pairs that span 4 bits + // Stage 5: Generates G/FmtE pairs that span 4 bits grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8); - // Stage 6: Generates G/DivP pairs that span 2 bits + // Stage 6: Generates G/FmtE pairs that span 2 bits grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4); grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8); grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12); diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index cbc0f4820..9f40300a6 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -23,10 +23,8 @@ /////////////////////////////////////////// `include "wally-config.vh" -// `include "../../config/rv64icfd/wally-config.vh" //debug module fpu ( - //input logic [2:0] FrmD, input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic reset, //input logic clear, // *** not being used anywhere @@ -42,605 +40,501 @@ module fpu ( output logic [31:0] FSROutW, output logic [1:0] FMemRWM, output logic FStallD, - output logic FWriteIntW, - output logic FWriteIntM, - output logic [`XLEN-1:0] FWriteDataM, // Integer input being written into fpreg - output logic DivSqrtDoneE, + output logic FWriteIntM, FWriteIntW, + output logic [`XLEN-1:0] FWriteDataM, + output logic FDivSqrtDoneM, output logic IllegalFPUInstrD, output logic [`XLEN-1:0] FPUResultW); - //NOTE: - //For readability and ease of modification, logic signals will be - //instantiated as they occur within the pipeline. This will keep local - //signals, modules, and combinational logic closely defined. - - //used for OSU DP-size hardware to wally XLEN interfacing - - integer XLENDIFF; - assign XLENDIFF = `XLEN - 64; - integer XLENDIFFN; - assign XLENDIFFN = 63 - `XLEN; - - // BEGIN PIPELINE CONTROL LOGIC - logic PipeEnableDE; - logic PipeEnableEM; - logic PipeEnableMW; - logic PipeClearDE; - logic PipeClearEM; - logic PipeClearMW; - - //temporarily assign pipe clear and enable signals - //to never flush & always be running - localparam PipeClear = 1'b0; - localparam PipeEnable = 1'b1; - always_comb begin - PipeEnableDE = ~StallE; - PipeEnableEM = ~StallM; - PipeEnableMW = ~StallW; - PipeClearDE = FlushE; - PipeClearEM = FlushM; - PipeClearMW = FlushW; - end - - // Wally-spec D stage control logic signal instantiation - logic FRegWriteD; - logic [2:0] FResultSelD; - logic [2:0] FrmD; - logic FmtD; - logic DivSqrtStartD; - logic [3:0] OpCtrlD; - logic FWriteIntD; - logic OutputInput2D; - logic [1:0] FMemRWD; - - logic DivBusyM; - logic [1:0] Input1MuxD, Input2MuxD; - logic Input3MuxD; - logic In2UsedD, In3UsedD; - - //Hazard unit for FPU - fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*); - - //top-level controller for FPU - fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); - - //instantiation of D stage regfile signals (includes some W stage signals - //for easy reference) - logic [2:0] FrmW; - logic FmtW; - logic FRegWriteW; - logic [4:0] RdW, Rs1D, Rs2D, Rs3D; - logic [`XLEN-1:0] WriteDataW; - logic [63:0] FPUResultDirW; - logic [`XLEN-1:0] ReadData1D, ReadData2D, ReadData3D; - - //regfile instantiation - //freg3adr fpregfile (FmtW, reset, PipeClear, clk, RdW, - // FRegWriteW, - // InstrD[19:15], InstrD[24:20], InstrD[31:27], - // FPUResultDirW, - // ReadData1D, ReadData2D, ReadData3D); - FPregfile fpregfile (clk, reset, FRegWriteW, + + + + + //control logic signal instantiation + logic FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM, FrmW; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division + logic FWriteIntD, FWriteIntE; // Write to integer register + logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction + logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory + logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal + logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal + logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal + logic FInput2UsedD; // Is input 2 used + logic FInput3UsedD; // Is input 3 used + logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component + + // regfile signals + logic [4:0] RdE, RdM, RdW; // ***Can take from ieu + logic [`XLEN-1:0] FWDM; // Write data for FP register + logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register + logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E; + logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE; + logic [`XLEN-1:0] FInput2E, FInput2M; + logic [`XLEN-1:0] FInput3E, FInput3M; + logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions + + // div/sqrt signals + logic DivDenormM, DivDenormW; + logic DivOvEn, DivUnEn; + logic DivBusyM; + logic [63:0] FDivResultM, FDivResultW; + logic [4:0] FDivFlagsM, FDivFlagsW; + + // FMA signals + logic [12:0] aligncntE, aligncntM; + logic [105:0] rE, rM; + logic [105:0] sE, sM; + logic [163:0] tE, tM; + logic [8:0] normcntE, normcntM; + logic [12:0] aeE, aeM; + logic bsE, bsM; + logic killprodE, killprodM; + logic prodofE, prodofM; + logic xzeroE, xzeroM; + logic yzeroE, yzeroM; + logic zzeroE, zzeroM; + logic xdenormE, xdenormM; + logic ydenormE, ydenormM; + logic zdenormE, zdenormM; + logic xinfE, xinfM; + logic yinfE, yinfM; + logic zinfE, zinfM; + logic xnanE, xnanM; + logic ynanE, ynanM; + logic znanE, znanM; + logic nanE, nanM; + logic [8:0] sumshiftE, sumshiftM; + logic sumshiftzeroE, sumshiftzeroM; + logic prodinfE, prodinfM; + logic [63:0] FmaResultM, FmaResultW; + logic [4:0] FmaFlagsM, FmaFlagsW; + + // add/cvt signals + logic [63:0] AddSumE, AddSumTcE; + logic [3:0] AddSelInvE; + logic [10:0] AddExpPostSumE; + logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE; + logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE; + logic AddConvertE; + logic [63:0] AddFloat1E, AddFloat2E; + logic [11:0] AddExp1DenormE, AddExp2DenormE; + logic [10:0] AddExponentE; + logic [2:0] AddRmE; + logic [3:0] AddOpTypeE; + logic AddPE, AddOvEnE, AddUnEnE; + logic AddDenormM; + logic [63:0] AddSumM, AddSumTcM; + logic [3:0] AddSelInvM; + logic [10:0] AddExpPostSumM; + logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM; + logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM; + logic AddConvertM, AddSignM; + logic [63:0] AddFloat1M, AddFloat2M; + logic [11:0] AddExp1DenormM, AddExp2DenormM; + logic [10:0] AddExponentM; + logic [63:0] AddOp1M, AddOp2M; + logic [2:0] AddRmM; + logic [3:0] AddOpTypeM; + logic AddPM, AddOvEnM, AddUnEnM; + logic [63:0] FAddResultM, FAddResultW; + logic [4:0] FAddFlagsM, FAddFlagsW; + + //cmp signals + logic [7:0] WE, WM; + logic [7:0] XE, XM; + logic ANaNE, ANaNM; + logic BNaNE, BNaNM; + logic AzeroE, AzeroM; + logic BzeroE, BzeroM; + logic CmpInvalidM, CmpInvalidW; + logic [1:0] CmpFCCM, CmpFCCW; + logic [63:0] FCmpResultW; + + // fsgn signals + logic [63:0] SgnResultE, SgnResultM, SgnResultW; + logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; + + //instantiation of W stage regfile signals + logic [`XLEN-1:0] SrcAW; + + // classify signals + logic [63:0] ClassResultE, ClassResultM, ClassResultW; + logic [4:0] ClassFlagsE, ClassFlagsM, ClassFlagsW; + + // other + logic [63:0] FPUResult64W, FPUResult64E; // 64-bit FPU result + logic [4:0] FPUFlagsW; + + // pipeline control logic + logic PipeEnableDE; + logic PipeEnableEM; + logic PipeEnableMW; + logic PipeClearDE; + logic PipeClearEM; + logic PipeClearMW; + + //temporarily assign pipe clear and enable signals + //to never flush & always be running + localparam PipeClear = 1'b0; + localparam PipeEnable = 1'b1; + always_comb begin + + PipeEnableDE = ~StallE; + PipeEnableEM = ~StallM; + PipeEnableMW = ~StallW; + PipeClearDE = FlushE; + PipeClearEM = FlushM; + PipeClearMW = FlushW; + + end + + + + + + + + + + + + + + //DECODE STAGE + + //Hazard unit for FPU + fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*); + + //top-level controller for FPU + fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); + + + //regfile instantiation + FPregfile fpregfile (clk, reset, FWriteEnW, InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, - FPUResultDirW, - ReadData1D, ReadData2D, ReadData3D); + FPUResult64W, + FRD1D, FRD2D, FRD3D); + + + + + + + + + + //***************** + //fpregfile D/E pipe registers + //***************** + flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E); + + //***************** + //other D/E pipe registers + //***************** + flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE); + flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE); + flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE); + flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE); + flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE); + flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE); + flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE); + flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E); + flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E); + flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E); + flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E); + flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE); + flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E); + flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE); + + + + + + + + + + + + + + //EXECUTION STAGE + + - // wally-spec E stage control logic signal instantiation - logic FRegWriteE; - logic [2:0] FResultSelE; - logic [2:0] FrmE; - logic FmtE; - logic DivSqrtStartE; - logic [3:0] OpCtrlE; - logic [1:0] Input1MuxE, Input2MuxE; - logic Input3MuxE; - logic [63:0] FPUResultDirE; - logic FWriteIntE; - logic OutputInput2E; - logic [1:0] FMemRWE; - - //instantiation of E stage regfile signals - logic [4:0] RdE; - logic [`XLEN-1:0] ReadData1E, ReadData2E, ReadData3E; - logic [`XLEN-1:0] Input1E, Input2E, Input3E, Input1tmpE; - - //instantiation of E/M stage div/sqrt signals - logic DivSqrtDone, DivDenormM; - logic [63:0] DivResultM; - logic [4:0] DivFlagsM; - logic [63:0] DivOp1, DivOp2; - logic [2:0] DivFrm; - logic DivOpType; - logic DivP; - logic DivOvEn, DivUnEn; - logic DivStart; - - //instantiate E stage FMA signals here - logic [12:0] aligncntE; - logic [105:0] rE; - logic [105:0] sE; - logic [163:0] tE; - logic [8:0] normcntE; - logic [12:0] aeE; - logic bsE; - logic killprodE; - logic prodofE; - logic xzeroE; - logic yzeroE; - logic zzeroE; - logic xdenormE; - logic ydenormE; - logic zdenormE; - logic xinfE; - logic yinfE; - logic zinfE; - logic xnanE; - logic ynanE; - logic znanE; - logic nanE; - logic [8:0] sumshiftE; - logic sumshiftzeroE; - logic prodinfE; - - //instantiation of E stage add/cvt signals - logic [63:0] AddSumE, AddSumTcE; - logic [3:0] AddSelInvE; - logic [10:0] AddExpPostSumE; - logic AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE; - logic AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE; - logic AddConvertE; - logic [63:0] AddFloat1E, AddFloat2E; - logic [11:0] AddExp1DenormE, AddExp2DenormE; - logic [10:0] AddExponentE; - logic [63:0] AddOp1E, AddOp2E; - logic [2:0] AddRmE; - logic [3:0] AddOpTypeE; - logic AddPE, AddOvEnE, AddUnEnE; - - //instantiation of E stage cmp signals - logic [7:0] WE, XE; - logic ANaNE, BNaNE, AzeroE, BzeroE; - logic [63:0] CmpOp1E, CmpOp2E; - logic [1:0] CmpSelE; - - //instantiation of E/M stage fsgn signals (due to bypass logic) - logic [63:0] SgnOp1E, SgnOp2E; - logic [1:0] SgnOpCodeE, SgnOpCodeM; - logic [63:0] SgnResultE, SgnResultM; - logic [4:0] SgnFlagsE, SgnFlagsM; - - //***************** - //fpregfile D/E pipe registers - //***************** - flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, ReadData1D, ReadData1E); - flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, ReadData2D, ReadData2E); - flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, ReadData3D, ReadData3E); - - //***************** - //other D/E pipe registers - //***************** - flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FRegWriteD, FRegWriteE); - flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE); - flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE); - flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE); - flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE); - flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE); - flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE); - flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, Input1MuxD, Input1MuxE); - flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, Input2MuxD, Input2MuxE); - flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, Input3MuxD, Input3MuxE); - flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResultDirW, FPUResultDirE); - flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE); - flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, OutputInput2D, OutputInput2E); - flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE); - // input muxs for forwarding - mux4 #(64) Input1Emux(ReadData1E, FPUResultDirW, FPUResultDirE, SrcAM, Input1MuxE, Input1tmpE); - mux3 #(64) Input2Emux(ReadData2E, FPUResultDirW, FPUResultDirE, Input2MuxE, Input2E); - mux2 #(64) Input3Emux(ReadData3E, FPUResultDirE, Input3MuxE, Input3E); - mux2 #(64) OutputInput2mux(Input1tmpE, Input2E, OutputInput2E, Input1E); + mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE); + mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E); + mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E); + mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E); - fma1 fma1 (.*); + fma1 fma1 (.*); + + //first and only instance of floating-point divider + fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .*); + + //first of two-stage instance of floating-point add/cvt unit + fpuaddcvt1 fpadd1 (.*); + + //first of two-stage instance of floating-point comparator + fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]); + + //first and only instance of floating-point sign converter + fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); + + + + + + + + + + + + + + + //***************** + //fpregfile D/E pipe registers + //***************** + flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M); + flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M); + flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M); + + //***************** + //fma E/M pipe registers + //***************** + flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM); + flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM); + flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM); + flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM); + flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM); + flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM); + flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM); + flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM); + flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM); + flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); + flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM); + flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM); + flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM); + flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM); + flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM); + flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); + flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM); + flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM); + flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); + flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM); + flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM); + flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM); + flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM); + flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM); + flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM); + + //***************** + //fpadd E/M pipe registers + //***************** + flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); + flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); + flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); + flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); + flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM); + flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); + flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); + flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); + flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); + flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); + flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); + flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM); + flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); + flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignM); + flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); + flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); + flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); + flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); + flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); + flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM); + + //***************** + //fpcmp E/M pipe registers + //***************** + flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM); + flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); + flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM); + flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); + flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM); + flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); + + //put this in for the event we want to delay fsgn - will otherwise bypass + //***************** + //fpsgn E/M pipe registers + //***************** + flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM); + flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM); + + //***************** + //other E/M pipe registers + //***************** + flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM); + flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM); + flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM); + flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM); + flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM); + flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM); + flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM); + flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM); + + + + + + + + + + + //BEGIN MEMORY STAGE + + assign FWriteDataM = FInput1M; + + mux2 #(64) FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); - //first and only instance of floating-point divider - fpdiv fpdivsqrt (.*); - - //first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, - AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, - AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, - AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, - AddExp1DenormE, AddExp2DenormE, AddExponentE, - Input1E, Input2E, FrmE, OpCtrlE, FmtE); - - //first of two-stage instance of floating-point comparator - fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, Input1E, Input2E, OpCtrlE[1:0]); - - //first and only instance of floating-point sign converter - fpusgn fpsgn (.*); - - //interface between XLEN size datapath and double-precision sized - //floating-point results - // - //define offsets for LSB zero extension or truncation - always_comb begin - - //truncate to 64 bits - //(causes warning during compilation - case never reached) - // if(`XLEN > 64) begin // ***KEP this isn't usedand it causes a lint error - // DivOp1 = Input1E[`XLEN-1:`XLEN-64]; - // DivOp2 = Input2E[`XLEN-1:`XLEN-64]; - // AddOp1E = Input1E[`XLEN-1:`XLEN-64]; - // AddOp2E = Input2E[`XLEN-1:`XLEN-64]; - // CmpOp1E = Input1E[`XLEN-1:`XLEN-64]; - // CmpOp2E = Input2E[`XLEN-1:`XLEN-64]; - // SgnOp1E = Input1E[`XLEN-1:`XLEN-64]; - // SgnOp2E = Input2E[`XLEN-1:`XLEN-64]; - // end - // //zero extend to 64 bits - // else begin - // DivOp1 = {Input1E,{64-`XLEN{1'b0}}}; - // DivOp2 = {Input2E,{64-`XLEN{1'b0}}}; - // AddOp1E = {Input1E,{64-`XLEN{1'b0}}}; - // AddOp2E = {Input2E,{64-`XLEN{1'b0}}}; - // CmpOp1E = {Input1E,{64-`XLEN{1'b0}}}; - // CmpOp2E = {Input2E,{64-`XLEN{1'b0}}}; - // SgnOp1E = {Input1E,{64-`XLEN{1'b0}}}; - // SgnOp2E = {Input2E,{64-`XLEN{1'b0}}}; - // end - - //assign op codes - AddOpTypeE[3:0] = OpCtrlE[3:0]; - CmpSelE[1:0] = OpCtrlE[1:0]; - DivOpType = OpCtrlE[0]; - SgnOpCodeE[1:0] = OpCtrlE[1:0]; - - end - - //E stage control signal interfacing between wally spec and OSU fp hardware - //op codes - - //wally-spec M stage control logic signal instantiation - logic FRegWriteM; - logic [2:0] FResultSelM; - logic [2:0] FrmM; - logic FmtM; - logic [3:0] OpCtrlM; - - //instantiate M stage FMA signals here ***rename fma signals and resize for XLEN - logic [63:0] FmaResultM; - logic [4:0] FmaFlagsM; - logic [12:0] aligncntM; - logic [105:0] rM; - logic [105:0] sM; - logic [163:0] tM; - logic [8:0] normcntM; - logic [12:0] aeM; - logic bsM; - logic killprodM; - logic prodofM; - logic xzeroM; - logic yzeroM; - logic zzeroM; - logic xdenormM; - logic ydenormM; - logic zdenormM; - logic xinfM; - logic yinfM; - logic zinfM; - logic xnanM; - logic ynanM; - logic znanM; - logic nanM; - logic [8:0] sumshiftM; - logic sumshiftzeroM; - logic prodinfM; - - //instantiation of M stage regfile signals - logic [4:0] RdM; - logic [`XLEN-1:0] Input1M, Input2M, Input3M; - logic [`XLEN-1:0] LoadStoreResultM; - - //instantiation of M stage add/cvt signals - logic [63:0] AddResultM; - logic [4:0] AddFlagsM; - logic AddDenormM; - logic [63:0] AddSumM, AddSumTcM; - logic [3:0] AddSelInvM; - logic [10:0] AddExpPostSumM; - logic AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM; - logic AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM; - logic AddConvertM, AddSignM; - logic [63:0] AddFloat1M, AddFloat2M; - logic [11:0] AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentM; - logic [63:0] AddOp1M, AddOp2M; - logic [2:0] AddRmM; - logic [3:0] AddOpTypeM; - logic AddPM, AddOvEnM, AddUnEnM; - - //instantiation of M stage cmp signals - logic CmpInvalidM; - logic [1:0] CmpFCCM; - logic [7:0] WM, XM; - logic ANaNM, BNaNM, AzeroM, BzeroM; - logic [63:0] CmpOp1M, CmpOp2M; - logic [1:0] CmpSelM; - - - //***************** - //fpregfile D/E pipe registers - //***************** - flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, Input1E, Input1M); - flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, Input2E, Input2M); - flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, Input3E, Input3M); - - //***************** - //fma E/M pipe registers - //***************** - flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM); - flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM); - flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM); - flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM); - flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM); - flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM); - flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM); - flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM); - flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM); - flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); - flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM); - flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM); - flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM); - flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM); - flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM); - flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); - flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM); - flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM); - flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); - flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM); - flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM); - flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM); - flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM); - flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM); - flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM); - - //***************** - //fpadd E/M pipe registers - //***************** - flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); - flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); - flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); - flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); - flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM); - flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); - flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); - flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); - flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); - flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); - flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); - flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM); - flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); - flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignM); - flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); - flopenrc #(64) EMRegAdd21(clk, reset, PipeClearEM, PipeEnableEM, AddOp1E, AddOp1M); - flopenrc #(64) EMRegAdd22(clk, reset, PipeClearEM, PipeEnableEM, AddOp2E, AddOp2M); - flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); - flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); - flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); - flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); - flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM); - - //***************** - //fpcmp E/M pipe registers - //***************** - flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM); - flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); - flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM); - flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); - flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM); - flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); - flopenrc #(64) EMRegCmp7(clk, reset, PipeClearEM, PipeEnableEM, CmpOp1E, CmpOp1M); - flopenrc #(64) EMRegCmp8(clk, reset, PipeClearEM, PipeEnableEM, CmpOp2E, CmpOp2M); - flopenrc #(2) EMRegCmp9(clk, reset, PipeClearEM, PipeEnableEM, CmpSelE, CmpSelM); - - //put this in for the event we want to delay fsgn - will otherwise bypass - //***************** - //fpsgn E/M pipe registers - //***************** - flopenrc #(2) EMRegSgn1(clk, reset, PipeClearEM, PipeEnableEM, SgnOpCodeE, SgnOpCodeM); - flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM); - flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM); - - //***************** - //other E/M pipe registers - //***************** - flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FRegWriteE, FRegWriteM); - flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM); - flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM); - flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM); - flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM); - flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM); - flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM); - flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM); - - assign FWriteDataM = Input1M; - mux2 #(64) LoadStoreResultMux(HRDATA, Input1M, |OpCtrlM[2:1], LoadStoreResultM); fma2 fma2(.*); - //second instance of two-stage floating-point add/cvt unit - fpuaddcvt2 fpadd2 (.*); - - //second instance of two-stage floating-point comparator - fpucmp2 fpcmp2 (CmpInvalidM, CmpFCCM, ANaNM, BNaNM, AzeroM, BzeroM, WM, XM, CmpSelM, CmpOp1M, CmpOp2M); - - //wally-spec W stage control logic signal instantiation - logic [2:0] FResultSelW; - - //instantiate W stage fma signals here - logic [63:0] FmaResultW; - logic [4:0] FmaFlagsW; - - //instantiation of W stage div/sqrt signals - logic DivDenormW; - logic [63:0] DivResultW; - logic [4:0] DivFlagsW; - - //instantiation of W stage fsgn signals - logic [63:0] SgnResultW; - logic [4:0] SgnFlagsW; - - //instantiation of W stage regfile signals - logic [`XLEN-1:0] LoadStoreResultW; - logic [`XLEN-1:0] SrcAW; - - //instantiation of W stage add/cvt signals - logic [63:0] AddResultW; - logic [4:0] AddFlagsW; - logic AddDenormW; - - //instantiation of W stage cmp signals - logic [63:0] CmpResultW; - logic CmpInvalidW; - logic [1:0] CmpFCCW; - - //instantiation of W stage classify signals - logic [63:0] ClassResultW; - logic [4:0] ClassFlagsW; - - //***************** - //fma M/W pipe registers - //***************** - flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW); - flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW); - - //***************** - //fpdiv M/W pipe registers - //***************** - flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, DivResultM, DivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, DivFlagsM, DivFlagsW); - flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); - - //***************** - //fpadd M/W pipe registers - //***************** - flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, AddResultM, AddResultW); - flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, AddFlagsM, AddFlagsW); - flopenrc #(1) MWRegAdd3(clk, reset, PipeClearMW, PipeEnableMW, AddDenormM, AddDenormW); - - //***************** - //fpcmp M/W pipe registers - //***************** - flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW); - flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW); - - //***************** - //fpsgn M/W pipe registers - //***************** - flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW); - flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW); - - //***************** - //other M/W pipe registers - //***************** - flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FRegWriteM, FRegWriteW); - flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW); - flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); - flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW); - flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW); - flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, LoadStoreResultM, LoadStoreResultW); - flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW); - - //flag signal mux via in-line ternaries - logic [4:0] FPUFlagsW; - //if bit 2 is active set to sign flags - otherwise: - //iff bit one is high - if bit zero is active set to fma flags - otherwise - //set to cmp flags - //iff bit one is low - if bit zero is active set to add/cvt flags - otherwise - //set to div/sqrt flags - //assign FPUFlagsW = (FResultSelW[2]) ? (SgnFlagsW) : ( - // (FResultSelW[1]) ? - // ( (FResultSelW[0]) ? (FmaFlagsW) : ({CmpInvalidW,4'b0000}) ) - // : ( (FResultSelW[0]) ? (AddFlagsW) : (DivFlagsW) ) - // ); - always_comb begin - case (FResultSelW) - // div/sqrt - 3'b000 : FPUFlagsW = DivFlagsW; - // cmp - 3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0}; - //fma/mult - 3'b010 : FPUFlagsW = FmaFlagsW; - // sgn inj - 3'b011 : FPUFlagsW = SgnFlagsW; - // add/sub/cnvt - 3'b100 : FPUFlagsW = AddFlagsW; - // classify - 3'b101 : FPUFlagsW = ClassFlagsW; - // output SrcAW - 3'b110 : FPUFlagsW = 5'b0; - // output ReadData1 - 3'b111 : FPUFlagsW = 5'b0; - default : FPUFlagsW = 5'bxxxxx; - endcase - end - - //result mux via in-line ternaries - //the uses the same logic as for flag signals - //assign FPUResultDirW = (FResultSelW[2]) ? (SgnResultW) : ( - // (FResultSelW[1]) ? - // ( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) ) - // : ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) ) - // ); - - - always_comb begin - case (FResultSelW) - // div/sqrt - 3'b000 : FPUResultDirW = DivResultW; - // cmp - 3'b001 : FPUResultDirW = CmpResultW; - //fma/mult - 3'b010 : FPUResultDirW = FmaResultW; - // sgn inj - 3'b011 : FPUResultDirW = SgnResultW; - // add/sub/cnvt - 3'b100 : FPUResultDirW = AddResultW; - // classify - 3'b101 : FPUResultDirW = ClassResultW; - // output SrcAW - 3'b110 : FPUResultDirW = SrcAW; - // Load/Store/Move to FP-register - 3'b111 : FPUResultDirW = LoadStoreResultW; - default : FPUResultDirW = {64{1'bx}}; - endcase - end - //interface between XLEN size datapath and double-precision sized - //floating-point results - // - //define offsets for LSB zero extension or truncation - always_comb begin - - //zero extension - - // Teo 04/13/2021 - // Commented out XLENDIFF{1'b0} due to error: - // Repetition multiplier must be constant. - - //if(`XLEN > 64) begin - // FPUResultW = {FPUResultDirW,{XLENDIFF{1'b0}}}; - //end - //truncate - //else begin - FPUResultW = FPUResultDirW[63:64-`XLEN]; - SetFflagsM = FPUFlagsW; - //end - - end - -endmodule // fpu + //second instance of two-stage floating-point add/cvt unit + fpuaddcvt2 fpadd2 (.*); + //second instance of two-stage floating-point comparator + fpucmp2 fpcmp2 (CmpInvalidM, CmpFCCM, ANaNM, BNaNM, AzeroM, BzeroM, WM, XM, {1'b0, FmtM}, FInput1M, FInput2M); + + + + + + + + + + + + //***************** + //fma M/W pipe registers + //***************** + flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW); + flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW); + + //***************** + //fpdiv M/W pipe registers + //***************** + flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW); + flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW); + flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); + + //***************** + //fpadd M/W pipe registers + //***************** + flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW); + flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW); + + //***************** + //fpcmp M/W pipe registers + //***************** + flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW); + flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW); + + //***************** + //fpsgn M/W pipe registers + //***************** + flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW); + flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW); + + //***************** + //other M/W pipe registers + //***************** + flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW); + flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW); + flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); + flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW); + flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW); + flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW); + flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW); + + + + + + + + + + + //######################################### + //BEGIN WRITEBACK STAGE + //######################################### + + always_comb begin + case (FResultSelW) + // div/sqrt + 3'b000 : FPUFlagsW = FDivFlagsW; + // cmp + 3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0}; + //fma/mult + 3'b010 : FPUFlagsW = FmaFlagsW; + // sgn inj + 3'b011 : FPUFlagsW = SgnFlagsW; + // add/sub/cnvt + 3'b100 : FPUFlagsW = FAddFlagsW; + // classify + 3'b101 : FPUFlagsW = ClassFlagsW; + // output SrcAW + 3'b110 : FPUFlagsW = 5'b0; + // output FRD1 + 3'b111 : FPUFlagsW = 5'b0; + default : FPUFlagsW = 5'bxxxxx; + endcase + end + + + always_comb begin + case (FResultSelW) + // div/sqrt + 3'b000 : FPUResult64W = FDivResultW; + // cmp + 3'b001 : FPUResult64W = FCmpResultW; + //fma/mult + 3'b010 : FPUResult64W = FmaResultW; + // sgn inj + 3'b011 : FPUResult64W = SgnResultW; + // add/sub/cnvt + 3'b100 : FPUResult64W = FAddResultW; + // classify + 3'b101 : FPUResult64W = ClassResultW; + // output SrcAW + 3'b110 : FPUResult64W = SrcAW; + // Load/Store/Move to FP-register + 3'b111 : FPUResult64W = FLoadStoreResultW; + default : FPUResult64W = {64{1'bx}}; + endcase + end + //interface between XLEN size datapath and double-precision sized + //floating-point results + // + //define offsets for LSB zero extension or truncation + always_comb begin + + //zero extension + FPUResultW = FPUResult64W[63:64-`XLEN]; + SetFflagsM = FPUFlagsW; + + end +endmodule diff --git a/wally-pipelined/src/fpu/fpuaddcvt1.sv b/wally-pipelined/src/fpu/fpuaddcvt1.sv index d50cb4e21..e1228f328 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt1.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt1.sv @@ -27,16 +27,15 @@ // -module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, op2_Norm, opA_Norm, opB_Norm, Invalid, DenormIn, convert, swap, normal_overflow, signA, Float1, Float2, exp1_denorm, exp2_denorm, exponent, op1, op2, rm, op_type, Pin); +module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE); - input logic [63:0] op1; // 1st input operand (A) - input logic [63:0] op2; // 2nd input operand (B) - input logic [2:0] rm; // Rounding mode - specify values - input logic [3:0] op_type; // Function opcode - input logic Pin; // Result Precision (1 for double, 0 for single) + input logic [63:0] FInput1E; // 1st input operand (A) + input logic [63:0] FInput2E; // 2nd input operand (B) + input logic [3:0] FOpCtrlE; // Function opcode + input logic FmtE; // Result Precision (1 for double, 0 for single) wire P; - assign P = ~Pin | op_type[2]; + assign P = ~FmtE | FOpCtrlE[2]; wire [63:0] IntValue; wire [11:0] exp1, exp2; @@ -54,44 +53,44 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, wire zeroB; wire [5:0] align_shift; - output logic [63:0] Float1; - output logic [63:0] Float2; - output logic [10:0] exponent; - output logic [10:0] exponent_postsum; - output logic [11:0] exp1_denorm, exp2_denorm;//KEP used to be [10:0] - output logic [63:0] sum, sum_tc; - output logic [3:0] sel_inv; - output logic corr_sign; - output logic signA; - output logic op1_Norm, op2_Norm; - output logic opA_Norm, opB_Norm; - output logic Invalid; - output logic DenormIn; + output logic [63:0] AddFloat1E; + output logic [63:0] AddFloat2E; + output logic [10:0] AddExponentE; + output logic [10:0] AddExpPostSumE; + output logic [11:0] AddExp1DenormE, AddExp2DenormE;//KEP used to be [10:0] + output logic [63:0] AddSumE, AddSumTcE; + output logic [3:0] AddSelInvE; + output logic AddCorrSignE; + output logic AddSignAE; + output logic AddOp1NormE, AddOp2NormE; + output logic AddOpANormE, AddOpBNormE; + output logic AddInvalidE; + output logic AddDenormInE; // output logic exp_valid; - output logic convert; - output logic swap; - output logic normal_overflow; + output logic AddConvertE; + output logic AddSwapE; + output logic AddNormOvflowE; wire [5:0] ZP_mantissaA; wire [5:0] ZP_mantissaB; wire ZV_mantissaA; wire ZV_mantissaB; // Convert the input operands to their appropriate forms based on - // the orignal operands, the op_type , and their precision P. + // the orignal operands, the FOpCtrlE , and their precision P. // Single precision inputs are converted to double precision // and the sign of the first operand is set appropratiately based on // if the operation is absolute value or negation. - convert_inputs conv1 (Float1, Float2, op1, op2, op_type, P); + convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P); // Test for exceptions and return the "Invalid Operation" and - // "Denormalized" Input Flags. The "sel_inv" is used in - // the third pipeline stage to select the result. Also, op1_Norm - // and op2_Norm are one if op1 and op2 are not zero or denormalized. + // "Denormalized" Input Flags. The "AddSelInvE" is used in + // the third pipeline stage to select the result. Also, AddOp1NormE + // and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized. // sub is one if the effective operation is subtaction. - exception exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, sub, - Float1, Float2, op_type); + exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub, + AddFloat1E, AddFloat2E, FOpCtrlE); // Perform Exponent Subtraction (used for alignment). For performance // both exponent subtractions are performed in parallel. This was @@ -99,25 +98,25 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, // the two parallel additions. The input values are zero-extended to 12 // bits prior to performing the addition. - assign exp1 = {1'b0, Float1[62:52]}; - assign exp2 = {1'b0, Float2[62:52]}; + assign exp1 = {1'b0, AddFloat1E[62:52]}; + assign exp2 = {1'b0, AddFloat2E[62:52]}; assign exp_diff1 = exp1 - exp2; - assign exp_diff2 = DenormIn ? ({Float2[63], exp2[10:0]} - {Float1[63], exp1[10:0]}): exp2 - exp1; + assign exp_diff2 = AddDenormInE ? ({AddFloat2E[63], exp2[10:0]} - {AddFloat1E[63], exp1[10:0]}): exp2 - exp1; - // The second operand (B) should be set to zero, if op_type does not + // The second operand (B) should be set to zero, if FOpCtrlE does not // specify addition or subtraction - assign zeroB = op_type[2] | op_type[1]; + assign zeroB = FOpCtrlE[2] | FOpCtrlE[1]; // Swapped operands if zeroB is not one and exp1 < exp2. - // Swapping causes exp2 to be used for the result exponent. + // SwapFmtEg causes exp2 to be used for the result exponent. // Only the exponent of the larger operand is used to determine // the final result. - assign swap = exp_diff1[11] & ~zeroB; - assign exponent = swap ? exp2[10:0] : exp1[10:0]; - assign exponent_postsum = swap ? exp2[10:0] : exp1[10:0]; - assign mantissaA = swap ? Float2[51:0] : Float1[51:0]; - assign mantissaB = swap ? Float1[51:0] : Float2[51:0]; - assign signA = swap ? Float2[63] : Float1[63]; + assign AddSwapE = exp_diff1[11] & ~zeroB; + assign AddExponentE = AddSwapE ? exp2[10:0] : exp1[10:0]; + assign AddExpPostSumE = AddSwapE ? exp2[10:0] : exp1[10:0]; + assign mantissaA = AddSwapE ? AddFloat2E[51:0] : AddFloat1E[51:0]; + assign mantissaB = AddSwapE ? AddFloat1E[51:0] : AddFloat2E[51:0]; + assign AddSignAE = AddSwapE ? AddFloat2E[63] : AddFloat1E[63]; // Leading-Zero Detector. Determine the size of the shift needed for // normalization. If sum_corrected is all zeros, the exp_valid is @@ -127,12 +126,12 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB); // Denormalized exponents created by subtracting the leading zeroes from the original exponents - assign exp1_denorm = swap ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa - assign exp2_denorm = swap ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB}); + assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa + assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB}); // Determine the alignment shift and limit it to 63. If any bit from // exp_shift[6] to exp_shift[11] is one, then shift is set to all ones. - assign exp_shift = swap ? exp_diff2 : exp_diff1; + assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1; assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9] | exp_shift[8] | exp_shift[7] | exp_shift[6]; assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift @@ -147,10 +146,10 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, // and loss of sign information. The two bits to the right of the // original mantissa form the "guard" and "round" bits that are used // to round the result. - assign opA_Norm = swap ? op2_Norm : op1_Norm; - assign opB_Norm = swap ? op1_Norm : op2_Norm; - assign mantissaA1 = {2'h0, opA_Norm, mantissaA[51:0]&{52{opA_Norm}}, 2'h0}; - assign mantissaB1 = {2'h0, opB_Norm, mantissaB[51:0]&{52{opB_Norm}}, 2'h0}; + assign AddOpANormE = AddSwapE ? AddOp2NormE : AddOp1NormE; + assign AddOpBNormE = AddSwapE ? AddOp1NormE : AddOp2NormE; + assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0}; + assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0}; // Perform mantissa alignment using a 57-bit barrel shifter // If any of the bits shifted out are one, Sticky_out is set. @@ -160,8 +159,8 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, // Place either the sign-extened 32-bit value or the original 64-bit value // into IntValue (to be used for integer to floating point conversion) - assign IntValue [31:0] = op1[31:0]; - assign IntValue [63:32] = op_type[0] ? {32{op1[31]}} : op1[63:32]; + assign IntValue [31:0] = FInput1E[31:0]; + assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32]; // If doing an integer to floating point conversion, mantissaA3 is set to // IntVal and the prenomalized exponent is set to 1084. Otherwise, @@ -169,30 +168,30 @@ module fpuaddcvt1 (sum, sum_tc, sel_inv, exponent_postsum, corr_sign, op1_Norm, // and the exponent value is left unchanged. // Under denormalized cases, the exponent before the rounder is set to 1 // if the normal shift value is 11. - assign convert = ~op_type[2] & op_type[1]; - assign mantissaA3 = (op_type[3]) ? (op_type[0] ? Float1 : ~Float1) : (DenormIn ? ({12'h0, mantissaA}) : (convert ? IntValue : {mantissaA1, 7'h0})); + assign AddConvertE = ~FOpCtrlE[2] & FOpCtrlE[1]; + assign mantissaA3 = (FOpCtrlE[3]) ? (FOpCtrlE[0] ? AddFloat1E : ~AddFloat1E) : (AddDenormInE ? ({12'h0, mantissaA}) : (AddConvertE ? IntValue : {mantissaA1, 7'h0})); // Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to // 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six // zeros. - assign mantissaB3[63:7] = (op_type[3]) ? (57'h0) : (DenormIn ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}}); - assign mantissaB3[6] = (op_type[3]) ? (1'b0) : (DenormIn ? mantissaB[6] : Sticky_out & ~zeroB); - assign mantissaB3[5:0] = (op_type[3]) ? (6'h01) : (DenormIn ? mantissaB[5:0] : 6'h0); + assign mantissaB3[63:7] = (FOpCtrlE[3]) ? (57'h0) : (AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}}); + assign mantissaB3[6] = (FOpCtrlE[3]) ? (1'b0) : (AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB); + assign mantissaB3[5:0] = (FOpCtrlE[3]) ? (6'h01) : (AddDenormInE ? mantissaB[5:0] : 6'h0); // The sign of the result needs to be corrected if the true // operation is subtraction and the input operands were swapped. - assign corr_sign = ~op_type[2]&~op_type[1]&op_type[0]&swap; + assign AddCorrSignE = ~FOpCtrlE[2]&~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE; // 64-bit Mantissa Adder/Subtractor - cla64 add1 (sum, mantissaA3, mantissaB3, sub); + cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); // 64-bit Mantissa Subtractor - to get the two's complement of the // result when the sign from the adder/subtractor is negative. - cla_sub64 sub1 (sum_tc, mantissaB3, mantissaA3); + cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); // Finds normal underflow result to determine whether to round final exponent down - //***KEP used to be (sum == 16'h0) I am unsure what it's supposed to be - assign normal_overflow = (DenormIn & (sum == 64'h0) & (opA_Norm | opB_Norm) & ~op_type[0]) ? 1'b1 : (sum[63] ? sum_tc[52] : sum[52]); + //***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be + assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]); endmodule // fpadd diff --git a/wally-pipelined/src/fpu/fpuaddcvt2.sv b/wally-pipelined/src/fpu/fpuaddcvt2.sv index e040d2d2f..36dabf080 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt2.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt2.sv @@ -27,15 +27,13 @@ // -module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, AddOp1M, AddOp2M, AddRmM, AddOpTypeM, AddPM, AddOvEnM, AddUnEnM); +module fpuaddcvt2 (FAddResultM, FAddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSelInvM, AddExpPostSumM, AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddConvertM, AddSwapM, AddSignAM, AddFloat1M, AddFloat2M, AddExp1DenormM, AddExp2DenormM, AddExponentM, FrmM, FOpCtrlM, FmtM); - input [63:0] AddOp1M; // 1st input operand (A) - input [63:0] AddOp2M; // 2nd input operand (B) - input [2:0] AddRmM; // Rounding mode - specify values - input [3:0] AddOpTypeM; // Function opcode - input AddPM; // Result Precision (0 for double, 1 for single) - input AddOvEnM; // Overflow trap enabled - input AddUnEnM; // Underflow trap enabled + input [2:0] FrmM; // Rounding mode - specify values + input [3:0] FOpCtrlM; // Function opcode + input FmtM; // Result Precision (0 for double, 1 for single) + // input AddOvEnM; // Overflow trap enabled + // input AddUnEnM; // Underflow trap enabled input [63:0] AddSumM, AddSumTcM; input [63:0] AddFloat1M; input [63:0] AddFloat2M; @@ -53,12 +51,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel input AddSwapM; // input AddNormOvflowM; - output [63:0] AddResultM; // Result of operation - output [4:0] AddFlagsM; // IEEE exception flags + output [63:0] FAddResultM; // Result of operation + output [4:0] FAddFlagsM; // IEEE exception flags output AddDenormM; // AddDenormM on input or output wire P; - assign P = AddPM | AddOpTypeM[2]; + assign P = FmtM | FOpCtrlM[2]; wire [10:0] exp_pre; wire [63:0] Result; @@ -82,6 +80,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel wire [63:0] sum_corr; logic AddNormOvflowM; + + logic AddOvEnM; // Overflow trap enabled + logic AddUnEnM; // Underflow trap enabled + + assign AddOvEnM = 1'b1; + assign AddUnEnM = 1'b1; //AddExponentM value pre-rounding with considerations for denormalized //cases/conversion cases assign exp_pre = AddDenormInM ? @@ -101,7 +105,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp; // Determines the correct comparison result based on operation and sign of resulting AddSumM - assign mantissa_comp = (AddOpTypeM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum; + assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum; // If the signs are different and both operands aren't denormalized // the normal underflow bit is needed and therefore updated. @@ -113,12 +117,12 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel // If the AddSumM is negative, use its two complement instead. // This value has to be 64-bits to correctly handle the // case 10...00 - assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & AddOpTypeM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~AddOpTypeM[0]) )) - ? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (AddOpTypeM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM)); + assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (AddFloat1M[63] ~^ AddFloat2M[63]) & FOpCtrlM[0] ) | ((AddFloat1M[63] ^ AddFloat2M[63]) & ~FOpCtrlM[0]) )) + ? (AddSumM[63] ? AddSumM : AddSumTcM) : ( (FOpCtrlM[3]) ? AddSumM : (AddSumM[63] ? AddSumTcM : AddSumM)); // Finds normal underflow result to determine whether to round final AddExponentM down //KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be - assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~AddOpTypeM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]); + assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]); // Leading-Zero Detector. Determine the size of the shift needed for // normalization. If sum_corrected is all zeros, the exp_valid is @@ -132,7 +136,7 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel // be right shifted. It outputs the normalized AddSumM. barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm); - assign sum_norm_w_bypass = (AddOpTypeM[3]) ? (AddOpTypeM[0] ? ~sum_corr : sum_corr) : (sum_norm); + assign sum_norm_w_bypass = (FOpCtrlM[3]) ? (FOpCtrlM[0] ? ~sum_corr : sum_corr) : (sum_norm); // Round the mantissa to a 52-bit value, with the leading one // removed. If the result is a single precision number, the actual @@ -141,18 +145,18 @@ module fpuaddcvt2 (AddResultM, AddFlagsM, AddDenormM, AddSumM, AddSumTcM, AddSel // exactly where the rounding point is. The rounding units also // handles special cases and set the exception flags. - // Changed DenormIO -> AddDenormM and FlagsIn -> AddFlagsM in order to + // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlagsM in order to // help in processor reservation station detection of load/stores. In // other words, the processor would like to know ahead of time that // if the result is an exception then don't load or store. - rounder round1 (Result, DenormIO, FlagsIn, AddRmM, P, AddOvEnM, AddUnEnM, exp_valid, + rounder round1 (Result, DenormIO, FlagsIn, FrmM, P, AddOvEnM, AddUnEnM, exp_valid, AddSelInvM, AddInvalidM, AddDenormInM, AddConvertM, sign_corr, exp_pre, norm_shift, sum_norm_w_bypass, AddExpPostSumM, AddOp1NormM, AddOp2NormM, AddFloat1M[63:52], AddFloat2M[63:52], - AddNormOvflowM, normal_underflow, AddSwapM, AddOpTypeM, AddSumM); + AddNormOvflowM, normal_underflow, AddSwapM, FOpCtrlM, AddSumM); // Store the final result and the exception flags in registers. - assign AddResultM = Result; - assign {AddDenormM, AddFlagsM} = {DenormIO, FlagsIn}; + assign FAddResultM = Result; + assign {AddDenormM, FAddFlagsM} = {DenormIO, FlagsIn}; endmodule // fpadd diff --git a/wally-pipelined/src/fpu/fpucmp1.sv b/wally-pipelined/src/fpu/fpucmp1.sv index 71bdea3b1..1cf267f22 100755 --- a/wally-pipelined/src/fpu/fpucmp1.sv +++ b/wally-pipelined/src/fpu/fpucmp1.sv @@ -37,7 +37,7 @@ // It also produces an invalid operation flag, which is one // if either of the input operands is a signaling NaN per 754 -module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel); +module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec input logic [63:0] op1; input logic [63:0] op2; diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv index ba7482511..4c1344dc7 100644 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ b/wally-pipelined/src/fpu/fpuhazard.sv @@ -27,45 +27,45 @@ module fpuhazard( input logic [4:0] Adr1, Adr2, Adr3, - input logic FRegWriteE, FRegWriteM, FRegWriteW, + input logic FWriteEnE, FWriteEnM, FWriteEnW, input logic [4:0] RdE, RdM, RdW, - input logic DivBusyM, + input logic DivBusyM, input logic RegWriteD, input logic [2:0] FResultSelD, FResultSelE, input logic IllegalFPUInstrD, - input logic In2UsedD, In3UsedD, + input logic FInput2UsedD, FInput3UsedD, // Stall outputs output logic FStallD, - output logic [1:0] Input1MuxD, Input2MuxD, - output logic Input3MuxD + output logic [1:0] FForwardInput1D, FForwardInput2D, + output logic FForwardInput3D ); always_comb begin // set ReadData as default - Input1MuxD = 2'b00; - Input2MuxD = 2'b00; - Input3MuxD = 1'b0; + FForwardInput1D = 2'b00; + FForwardInput2D = 2'b00; + FForwardInput3D = 1'b0; FStallD = DivBusyM; if (~IllegalFPUInstrD) begin // if taking a value from int register - if ((Adr1 == RdE) & (FRegWriteE | ((FResultSelE == 3'b110) & RegWriteD))) - if (FResultSelE == 3'b110) Input1MuxD = 2'b11; // choose SrcAM + if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD))) + if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM else FStallD = 1'b1; // otherwise stall - else if ((Adr1 == RdM) & FRegWriteM) Input1MuxD = 2'b01; // choose FPUResultDirW - else if ((Adr1 == RdW) & FRegWriteW) Input1MuxD = 2'b11; // choose FPUResultDirE + else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW + else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE - if(In2UsedD) - if ((Adr2 == RdE) & FRegWriteE) FStallD = 1'b1; - else if ((Adr2 == RdM) & FRegWriteM) Input2MuxD = 2'b01; // choose FPUResultDirW - else if ((Adr2 == RdW) & FRegWriteW) Input2MuxD = 2'b10; // choose FPUResultDirE + if(FInput2UsedD) + if ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1; + else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW + else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE - if(In3UsedD) - if ((Adr3 == RdE) & FRegWriteE) FStallD = 1'b1; - else if ((Adr3 == RdM) & FRegWriteM) FStallD = 1'b1; - else if ((Adr3 == RdW) & FRegWriteW) Input3MuxD = 1'b1; // choose FPUResultDirE + if(FInput3UsedD) + if ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1; + else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1; + else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE end end diff --git a/wally-pipelined/src/fpu/fsgn.sv b/wally-pipelined/src/fpu/fsgn.sv index 4f4748bd4..2850af86e 100755 --- a/wally-pipelined/src/fpu/fsgn.sv +++ b/wally-pipelined/src/fpu/fsgn.sv @@ -1,8 +1,8 @@ //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions -module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SgnOp1E, SgnOp2E); +module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E); - input [63:0] SgnOp1E, SgnOp2E; + input [63:0] FInput1E, FInput2E; input [1:0] SgnOpCodeE; output [63:0] SgnResultE; output [4:0] SgnFlagsE; @@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SgnOp1E, SgnOp2E); //op code designation: // - //00 - fsgnj - directly copy over sign value of SgnOp2E - //01 - fsgnjn - negate sign value of SgnOp2E - //10 - fsgnjx - XOR sign values of SgnOp1E & SgnOp2E + //00 - fsgnj - directly copy over sign value of FInput2E + //01 - fsgnjn - negate sign value of FInput2E + //10 - fsgnjx - XOR sign values of FInput1E & FInput2E // - assign SgnResultE[63] = SgnOpCodeE[1] ? (SgnOp1E[63] ^ SgnOp2E[63]) : (SgnOp2E[63] ^ SgnOpCodeE[0]); - assign SgnResultE[62:0] = SgnOp1E[62:0]; + assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]); + assign SgnResultE[62:0] = FInput1E[62:0]; //If the exponent is all ones, then the value is either Inf or NaN, //both of which will produce a QNaN/SNaN value of some sort. This will //set the invalid flag high. - assign AonesExp = SgnOp1E[62]&SgnOp1E[61]&SgnOp1E[60]&SgnOp1E[59]&SgnOp1E[58]&SgnOp1E[57]&SgnOp1E[56]&SgnOp1E[55]&SgnOp1E[54]&SgnOp1E[53]&SgnOp1E[52]; + assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52]; //the only flag that can occur during this operation is invalid //due to changing sign on already existing NaN diff --git a/wally-pipelined/src/fpu/special.sv b/wally-pipelined/src/fpu/special.sv index 711fd12dd..8ca265bb3 100644 --- a/wally-pipelined/src/fpu/special.sv +++ b/wally-pipelined/src/fpu/special.sv @@ -10,46 +10,46 @@ ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// -module special(Input1E, Input2E, Input3E, xzeroE, yzeroE, zzeroE, +module special(FInput1E, FInput2E, FInput3E, xzeroE, yzeroE, zzeroE, xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE); ///////////////////////////////////////////////////////////////////////////// - input logic [63:0] Input1E; // Input Input1E - input logic [63:0] Input2E; // Input Input2E - input logic [63:0] Input3E; // Input Input3E - output logic xzeroE; // Input Input1E = 0 - output logic yzeroE; // Input Input2E = 0 - output logic zzeroE; // Input Input3E = 0 - output logic xnanE; // Input1E is NaN - output logic ynanE; // Input2E is NaN - output logic znanE; // Input3E is NaN - output logic xdenormE; // Input1E is denormalized - output logic ydenormE; // Input2E is denormalized - output logic zdenormE; // Input3E is denormalized - output logic xinfE; // Input1E is infinity - output logic yinfE; // Input2E is infinity - output logic zinfE; // Input3E is infinity + input logic [63:0] FInput1E; // Input FInput1E + input logic [63:0] FInput2E; // Input FInput2E + input logic [63:0] FInput3E; // Input FInput3E + output logic xzeroE; // Input FInput1E = 0 + output logic yzeroE; // Input FInput2E = 0 + output logic zzeroE; // Input FInput3E = 0 + output logic xnanE; // FInput1E is NaN + output logic ynanE; // FInput2E is NaN + output logic znanE; // FInput3E is NaN + output logic xdenormE; // FInput1E is denormalized + output logic ydenormE; // FInput2E is denormalized + output logic zdenormE; // FInput3E is denormalized + output logic xinfE; // FInput1E is infinity + output logic yinfE; // FInput2E is infinity + output logic zinfE; // FInput3E is infinity // In the actual circuit design, the gates looking at bits // 51:0 and at bits 62:52 should be shared among the various detectors. // Check if input is NaN - assign xnanE = &Input1E[62:52] && |Input1E[51:0]; - assign ynanE = &Input2E[62:52] && |Input2E[51:0]; - assign znanE = &Input3E[62:52] && |Input3E[51:0]; + assign xnanE = &FInput1E[62:52] && |FInput1E[51:0]; + assign ynanE = &FInput2E[62:52] && |FInput2E[51:0]; + assign znanE = &FInput3E[62:52] && |FInput3E[51:0]; // Check if input is denormalized - assign xdenormE = ~(|Input1E[62:52]) && |Input1E[51:0]; - assign ydenormE = ~(|Input2E[62:52]) && |Input2E[51:0]; - assign zdenormE = ~(|Input3E[62:52]) && |Input3E[51:0]; + assign xdenormE = ~(|FInput1E[62:52]) && |FInput1E[51:0]; + assign ydenormE = ~(|FInput2E[62:52]) && |FInput2E[51:0]; + assign zdenormE = ~(|FInput3E[62:52]) && |FInput3E[51:0]; // Check if input is infinity - assign xinfE = &Input1E[62:52] && ~(|Input1E[51:0]); - assign yinfE = &Input2E[62:52] && ~(|Input2E[51:0]); - assign zinfE = &Input3E[62:52] && ~(|Input3E[51:0]); + assign xinfE = &FInput1E[62:52] && ~(|FInput1E[51:0]); + assign yinfE = &FInput2E[62:52] && ~(|FInput2E[51:0]); + assign zinfE = &FInput3E[62:52] && ~(|FInput3E[51:0]); // Check if inputs are all zero // Also forces denormalized inputs to zero. @@ -57,11 +57,11 @@ module special(Input1E, Input2E, Input3E, xzeroE, yzeroE, zzeroE, // to just check if the exponent is zero. // KATHERINE - commented following (21/01/11) - // assign xzeroE = ~(|Input1E[62:0]) || xdenormE; - // assign yzeroE = ~(|Input2E[62:0]) || ydenormE; - // assign zzeroE = ~(|Input3E[62:0]) || zdenormE; + // assign xzeroE = ~(|FInput1E[62:0]) || xdenormE; + // assign yzeroE = ~(|FInput2E[62:0]) || ydenormE; + // assign zzeroE = ~(|FInput3E[62:0]) || zdenormE; // KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number - assign xzeroE = ~(|Input1E[62:0]); - assign yzeroE = ~(|Input2E[62:0]); - assign zzeroE = ~(|Input3E[62:0]); + assign xzeroE = ~(|FInput1E[62:0]); + assign yzeroE = ~(|FInput2E[62:0]); + assign zzeroE = ~(|FInput3E[62:0]); endmodule diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index cb4a60a81..eab0885de 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -100,7 +100,7 @@ module wallypipelinedhart ( logic FStallD; logic FWriteIntW, FWriteIntM; logic [31:0] FSROutW; - logic DivSqrtDoneE; + logic FDivSqrtDoneM; logic IllegalFPUInstrD, IllegalFPUInstrE; logic [`XLEN-1:0] FPUResultW;