From 71e4a10efb02be0061f65420cdbe41e198a909b6 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 20 May 2021 22:17:59 -0400 Subject: [PATCH] FMV.D.X imperas test passes --- wally-pipelined/config/rv64ic/wally-config.vh | 2 +- .../regression/wave-dos/default-waves.do | 2 + wally-pipelined/src/fpu/fctrl.sv | 30 ++++-- wally-pipelined/src/fpu/fma1.sv | 24 ++--- wally-pipelined/src/fpu/fma2.sv | 28 ++--- wally-pipelined/src/fpu/fpdiv.sv | 5 +- wally-pipelined/src/fpu/fpu.sv | 101 ++++++++++++++---- wally-pipelined/src/fpu/fpuhazard.sv | 70 ++++++++++++ wally-pipelined/src/fpu/fsm.sv | 31 +++++- wally-pipelined/src/fpu/special.sv | 62 +++++------ wally-pipelined/src/hazard/hazard.sv | 3 +- wally-pipelined/src/ieu/controller.sv | 5 +- wally-pipelined/src/ieu/datapath.sv | 9 +- wally-pipelined/src/ieu/ieu.sv | 3 + wally-pipelined/src/privileged/csru.sv | 2 +- .../src/wally/wallypipelinedhart.sv | 13 ++- .../testbench/testbench-imperas.sv | 70 ++++++------ 17 files changed, 325 insertions(+), 135 deletions(-) create mode 100644 wally-pipelined/src/fpu/fpuhazard.sv diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index 12d254ba8..259e41ae6 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -31,7 +31,7 @@ `define XLEN 64 // MISA RISC-V configuration per specification -`define MISA (32'h00000104 | 0 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) +`define MISA (32'h00000104 | 0 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0) `define A_SUPPORTED ((`MISA >> 0) % 2 == 1) `define C_SUPPORTED ((`MISA >> 2) % 2 == 1) `define D_SUPPORTED ((`MISA >> 3) % 2 == 1) diff --git a/wally-pipelined/regression/wave-dos/default-waves.do b/wally-pipelined/regression/wave-dos/default-waves.do index a1aa2cd60..4a39ec50a 100644 --- a/wally-pipelined/regression/wave-dos/default-waves.do +++ b/wally-pipelined/regression/wave-dos/default-waves.do @@ -49,6 +49,8 @@ add wave -hex /testbench/dut/hart/ieu/dp/ResultW add wave -hex /testbench/dut/hart/ieu/dp/RdW add wave -divider +add wave -hex -r /testbench/* + # appearance TreeUpdate [SetDefaultTree] WaveRestoreZoom {0 ps} {100 ps} diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index 994cb1e6f..ba8332a29 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -13,9 +13,12 @@ module fctrl ( output logic [3:0] OpCtrlD, output logic FmtD, output logic [2:0] FrmD, - output logic WriteIntD); + output logic [1:0] FMemRWD, + output logic OutputInput2D, + output logic FWriteIntD); + logic IllegalFPUInstr1D, IllegalFPUInstr2D; //precision is taken directly from instruction assign FmtD = Funct7D[0]; // *** fix rounding for dynamic rounding @@ -53,6 +56,7 @@ module fctrl ( always_comb begin //checks all but FMA/store/load + IllegalFPUInstr2D = 0; if(OpD == 7'b1010011) begin casez(Funct7D) //compare @@ -77,7 +81,7 @@ module fctrl ( else if (Funct7D[1] == 0) FResultSelD = 3'b111; //output SrcW 7'b111100? : FResultSelD = 3'b110; - default : FResultSelD = 3'bxxx; + default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end endcase end //FMA/store/load @@ -92,12 +96,15 @@ module fctrl ( 7'b0100111 : FResultSelD = 3'b111; //load 7'b0000111 : FResultSelD = 3'b111; - default : FResultSelD = 3'bxxx; + default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end endcase end end + assign OutputInput2D = OpD == 7'b0100111; + assign FMemRWD[0] = OutputInput2D; + assign FMemRWD[1] = OpD == 7'b0000111; @@ -143,7 +150,7 @@ module fctrl ( always_comb begin - IllegalFPUInstrD = 0; + IllegalFPUInstr1D = 0; case (FResultSelD) // div/sqrt // fdiv = ???0 @@ -191,23 +198,24 @@ module fctrl ( // fmv.w.x = ???0 // fmv.w.d = ???1 3'b110 : OpCtrlD = {3'b0, Funct7D[0]}; - // output ReadData1 + // output Input1 // flw = ?000 - // fld = ?001 - // fsw = ?010 - // fsd = ?011 + // fld = ?001 + // fsw = ?010 // output Input2 + // fsd = ?011 // output Input2 // fmv.x.w = ?100 // fmv.d.w = ?101 // {?, is mv, is store, is double or fcvt.d.w} 3'b111 : OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; - default : begin OpCtrlD = 4'bxxxx; IllegalFPUInstrD = 1'b1; end + default : begin OpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; end endcase end + assign IllegalFPUInstrD = IllegalFPUInstr1D | IllegalFPUInstr2D; //write to integer source if conv to int occurs //AND of Funct7 for int results // is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv - assign WriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b001)&OpD[6]); + assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]); // if not writting to int reg and not a store function and not move - assign FRegWriteD = ~WriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]); + assign FRegWriteD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP; endmodule diff --git a/wally-pipelined/src/fpu/fma1.sv b/wally-pipelined/src/fpu/fma1.sv index 63fa5e0b0..59b516007 100644 --- a/wally-pipelined/src/fpu/fma1.sv +++ b/wally-pipelined/src/fpu/fma1.sv @@ -15,13 +15,13 @@ // normalize Normalization shifter // round Rounding of result // exception Handles exceptional cases -// bypass Handles bypass of result to ReadData1E or ReadData3E inputs +// bypass Handles bypass of result to Input1E or Input3E inputs // sign One bit sign handling block // special Catch special cases (inputs = 0 / infinity / etc.) // -// The FMAC computes FmaResultM=ReadData1E*ReadData2E+ReadData3E, rounded with the mode specified by +// The FMAC computes FmaResultM=Input1E*Input2E+Input3E, rounded with the mode specified by // RN, RZ, RM, or RP. The result is optionally bypassed back to -// the ReadData1E or ReadData3E inputs for use on the next cycle. In addition, four signals +// the Input1E or Input3E inputs for use on the next cycle. In addition, four signals // are produced: trap, overflow, underflow, and inexact. Trap indicates // an infinity, NaN, or denormalized number to be handled in software; // the other three signals are IEEE flags. @@ -29,15 +29,15 @@ ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// -module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE, +module fma1(Input1E, Input2E, Input3E, FrmE, rE, sE, tE, bsE, killprodE, sumshiftE, sumshiftzeroE, aligncntE, aeE , xzeroE, yzeroE, zzeroE, xnanE,ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE, nanE, prodinfE); ///////////////////////////////////////////////////////////////////////////// - input logic [63:0] ReadData1E; // input 1 - input logic [63:0] ReadData2E; // input 2 - input logic [63:0] ReadData3E; // input 3 + input logic [63:0] Input1E; // input 1 + input logic [63:0] Input2E; // input 2 + input logic [63:0] Input3E; // input 3 input logic [2:0] FrmE; // Rounding mode output logic [12:0] aligncntE; // status flags output logic [105:0] rE; // one result of partial product sum @@ -45,7 +45,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE, output logic [163:0] tE; // output logic of alignment shifter output logic [12:0] aeE; // multiplier expoent output logic bsE; // sticky bit of addend - output logic killprodE; // ReadData3E >> product + output logic killprodE; // Input3E >> product output logic xzeroE; output logic yzeroE; output logic zzeroE; @@ -68,7 +68,7 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE, // output logic [12:0] aligncntE; // shift count for alignment - logic prodof; // ReadData1E*ReadData2E out of range + logic prodof; // Input1E*Input2E out of range @@ -84,12 +84,12 @@ module fma1(ReadData1E, ReadData2E, ReadData3E, FrmE, // Instantiate fraction datapath - multiply multiply(.xman(ReadData1E[51:0]), .yman(ReadData2E[51:0]), .*); - align align(.zman(ReadData3E[51:0]),.*); + multiply multiply(.xman(Input1E[51:0]), .yman(Input2E[51:0]), .*); + align align(.zman(Input3E[51:0]),.*); // Instantiate exponent datapath - expgen1 expgen1(.xexp(ReadData1E[62:52]),.yexp(ReadData2E[62:52]),.zexp(ReadData3E[62:52]),.*); + expgen1 expgen1(.xexp(Input1E[62:52]),.yexp(Input2E[62:52]),.zexp(Input3E[62:52]),.*); // Instantiate special case detection across datapath & exponent path special special(.*); diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index 8ff107fff..23e6bb6b5 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -15,13 +15,13 @@ // normalize Normalization shifter // round Rounding of result // exception Handles exceptional cases -// bypass Handles bypass of result to ReadData1M or ReadData3M input logics +// bypass Handles bypass of result to Input1M or Input3M input logics // sign One bit sign handling block // special Catch special cases (input logics = 0 / infinity / etc.) // -// The FMAC computes FmaResultM=ReadData1M*ReadData2M+ReadData3M, rounded with the mode specified by +// The FMAC computes FmaResultM=Input1M*Input2M+Input3M, rounded with the mode specified by // RN, RZ, RM, or RP. The result is optionally bypassed back to -// the ReadData1M or ReadData3M input logics for use on the next cycle. In addition, four signals +// the Input1M or Input3M input logics for use on the next cycle. In addition, four signals // are produced: trap, overflow, underflow, and inexact. Trap indicates // an infinity, NaN, or denormalized number to be handled in software; // the other three signals are IMMM flags. @@ -29,7 +29,7 @@ ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// -module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM, +module fma2(Input1M, Input2M, Input3M, FrmM, FmaResultM, FmaFlagsM, aligncntM, rM, sM, tM, normcntM, aeM, bsM,killprodM, xzeroM, yzeroM,zzeroM,xdenormM,ydenormM, @@ -39,9 +39,9 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM, ); ///////////////////////////////////////////////////////////////////////////// - input logic [63:0] ReadData1M; // input logic 1 - input logic [63:0] ReadData2M; // input logic 2 - input logic [63:0] ReadData3M; // input logic 3 + input logic [63:0] Input1M; // input logic 1 + input logic [63:0] Input2M; // input logic 2 + input logic [63:0] Input3M; // input logic 3 input logic [2:0] FrmM; // Rounding mode input logic [12:0] aligncntM; // status flags input logic [105:0] rM; // one result of partial product sum @@ -50,7 +50,7 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM, input logic [8:0] normcntM; // shift count for normalizer input logic [12:0] aeM; // multiplier expoent input logic bsM; // sticky bit of addend - input logic killprodM; // ReadData3M >> product + input logic killprodM; // Input3M >> product input logic prodinfM; input logic xzeroM; input logic yzeroM; @@ -69,7 +69,7 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM, input logic sumshiftzeroM; - output logic [63:0] FmaResultM; // output FmaResultM=ReadData1M*ReadData2M+ReadData3M + output logic [63:0] FmaResultM; // output FmaResultM=Input1M*Input2M+Input3M output logic [4:0] FmaFlagsM; // status flags @@ -120,18 +120,18 @@ module fma2(ReadData1M, ReadData2M, ReadData3M, FrmM, add add(.*); lza lza(.*); - normalize normalize(.zexp(ReadData3M[62:52]),.*); - round round(.xman(ReadData1M[51:0]), .yman(ReadData2M[51:0]),.zman(ReadData3M[51:0]),.*); + normalize normalize(.zexp(Input3M[62:52]),.*); + round round(.xman(Input1M[51:0]), .yman(Input2M[51:0]),.zman(Input3M[51:0]),.*); // Instantiate exponent datapath - expgen2 expgen2(.xexp(ReadData1M[62:52]),.yexp(ReadData2M[62:52]),.zexp(ReadData3M[62:52]),.*); + expgen2 expgen2(.xexp(Input1M[62:52]),.yexp(Input2M[62:52]),.zexp(Input3M[62:52]),.*); // Instantiate control logic -sign sign(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.*); -flag2 flag2(.xsign(ReadData1M[63]),.ysign(ReadData2M[63]),.zsign(ReadData3M[63]),.vbits(v[1:0]),.*); +sign sign(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.*); +flag2 flag2(.xsign(Input1M[63]),.ysign(Input2M[63]),.zsign(Input3M[63]),.vbits(v[1:0]),.*); assign FmaResultM = {wsign,wexp,wman}; diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv index 5b7dc72a3..1574b79ef 100755 --- a/wally-pipelined/src/fpu/fpdiv.sv +++ b/wally-pipelined/src/fpu/fpdiv.sv @@ -24,7 +24,7 @@ // `timescale 1ps/1ps module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, DivFrm, DivOpType, DivP, DivOvEn, DivUnEn, - DivStart, reset, clk); + DivStart, reset, clk, DivBusyM); input [63:0] DivOp1; // 1st input operand (A) input [63:0] DivOp2; // 2nd input operand (B) @@ -42,6 +42,7 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di output [4:0] DivFlagsM; // IEEE exception flags output DivDenormM; // DivDenormM on input or output output DivSqrtDone; + output DivBusyM; supply1 vdd; supply0 vss; @@ -139,7 +140,7 @@ module fpdiv (DivSqrtDone, DivResultM, DivFlagsM, DivDenormM, DivOp1, DivOp2, Di // FSM : control divider fsm control (DivSqrtDone, load_rega, load_regb, load_regc, load_regd, load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, - clk, reset, DivStart, DivOpType); + clk, reset, DivStart, DivOpType, DivBusyM); // Round the mantissa to a 52-bit value, with the leading one // removed. The rounding units also handles special cases and diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 76a46498b..3f79946c0 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -11,10 +11,15 @@ module fpu ( input logic [31:0] InstrD, input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg - input logic StallE, StallM, StallW, + input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, + input logic RegWriteD, output logic [4:0] SetFflagsM, output logic [31:0] FSROutW, + output logic [1:0] FMemRWM, + output logic FStallE, + output logic FWriteIntW, + output logic [`XLEN-1:0] FWriteDataM, // Integer input being written into fpreg output logic DivSqrtDoneE, output logic IllegalFPUInstrD, output logic [`XLEN-1:0] FPUResultW); @@ -72,8 +77,17 @@ module fpu ( logic FmtD; logic DivSqrtStartD; logic [3:0] OpCtrlD; - logic WriteIntD; + logic FWriteIntD; + logic OutputInput2D; + logic [1:0] FMemRWD; + + logic DivBusyM; + logic [1:0] Input1MuxD, Input2MuxD; + logic Input3MuxD; + //Hazard unit for FPU + fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*); + //top-level controller for FPU fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); @@ -108,10 +122,17 @@ module fpu ( logic FmtE; logic DivSqrtStartE; logic [3:0] OpCtrlE; + logic [1:0] Input1MuxE, Input2MuxE; + logic Input3MuxE; + logic [63:0] FPUResultDirE; + logic FWriteIntE; + logic OutputInput2E; + logic [1:0] FMemRWE; //instantiation of E stage regfile signals logic [4:0] RdE; logic [`XLEN-1:0] ReadData1E, ReadData2E, ReadData3E; + logic [`XLEN-1:0] Input1E, Input2E, Input3E, Input1tmpE; //instantiation of E/M stage div/sqrt signals logic DivSqrtDone, DivDenormM; @@ -195,6 +216,13 @@ module fpu ( flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE); flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, OpCtrlD, OpCtrlE); flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, DivSqrtStartD, DivSqrtStartE); + flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, Input1MuxD, Input1MuxE); + flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, Input2MuxD, Input2MuxE); + flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, Input3MuxD, Input3MuxE); + flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResultDirW, FPUResultDirE); + flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE); + flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, OutputInput2D, OutputInput2E); + flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE); // //END D/E PIPE @@ -204,16 +232,27 @@ module fpu ( //BEGIN EXECUTION STAGE // + + + // input muxs for forwarding + + mux4 #(64) Input1Emux(ReadData1E, FPUResultDirW, FPUResultDirE, SrcAM, Input1MuxE, Input1tmpE); + mux3 #(64) Input2Emux(ReadData2E, FPUResultDirW, FPUResultDirE, Input2MuxE, Input2E); + mux2 #(64) Input3Emux(ReadData3E, FPUResultDirE, Input3MuxE, Input3E); + mux2 #(64) OutputInput2mux(Input1tmpE, Input2E, OutputInput2E, Input1E); + + + fma1 fma1 (.*); //first and only instance of floating-point divider fpdiv fpdivsqrt (.*); //first of two-stage instance of floating-point add/cvt unit - fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, ReadData1E, ReadData2E, FrmE, OpCtrlE, FmtE); + fpuaddcvt1 fpadd1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, Input1E, Input2E, FrmE, OpCtrlE, FmtE); //first of two-stage instance of floating-point comparator - fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, ReadData1E, ReadData2E, OpCtrlE[1:0]); + fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, Input1E, Input2E, OpCtrlE[1:0]); //first and only instance of floating-point sign converter fpusgn fpsgn (.*); @@ -227,25 +266,25 @@ module fpu ( //truncate to 64 bits //(causes warning during compilation - case never reached) // if(`XLEN > 64) begin // ***KEP this isn't usedand it causes a lint error -// DivOp1 = ReadData1E[`XLEN-1:`XLEN-64]; -// DivOp2 = ReadData2E[`XLEN-1:`XLEN-64]; -// AddOp1E = ReadData1E[`XLEN-1:`XLEN-64]; -// AddOp2E = ReadData2E[`XLEN-1:`XLEN-64]; -// CmpOp1E = ReadData1E[`XLEN-1:`XLEN-64]; -// CmpOp2E = ReadData2E[`XLEN-1:`XLEN-64]; -// SgnOp1E = ReadData1E[`XLEN-1:`XLEN-64]; -// SgnOp2E = ReadData2E[`XLEN-1:`XLEN-64]; +// DivOp1 = Input1E[`XLEN-1:`XLEN-64]; +// DivOp2 = Input2E[`XLEN-1:`XLEN-64]; +// AddOp1E = Input1E[`XLEN-1:`XLEN-64]; +// AddOp2E = Input2E[`XLEN-1:`XLEN-64]; +// CmpOp1E = Input1E[`XLEN-1:`XLEN-64]; +// CmpOp2E = Input2E[`XLEN-1:`XLEN-64]; +// SgnOp1E = Input1E[`XLEN-1:`XLEN-64]; +// SgnOp2E = Input2E[`XLEN-1:`XLEN-64]; // end // //zero extend to 64 bits // else begin -// DivOp1 = {ReadData1E,{64-`XLEN{1'b0}}}; -// DivOp2 = {ReadData2E,{64-`XLEN{1'b0}}}; -// AddOp1E = {ReadData1E,{64-`XLEN{1'b0}}}; -// AddOp2E = {ReadData2E,{64-`XLEN{1'b0}}}; -// CmpOp1E = {ReadData1E,{64-`XLEN{1'b0}}}; -// CmpOp2E = {ReadData2E,{64-`XLEN{1'b0}}}; -// SgnOp1E = {ReadData1E,{64-`XLEN{1'b0}}}; -// SgnOp2E = {ReadData2E,{64-`XLEN{1'b0}}}; +// DivOp1 = {Input1E,{64-`XLEN{1'b0}}}; +// DivOp2 = {Input2E,{64-`XLEN{1'b0}}}; +// AddOp1E = {Input1E,{64-`XLEN{1'b0}}}; +// AddOp2E = {Input2E,{64-`XLEN{1'b0}}}; +// CmpOp1E = {Input1E,{64-`XLEN{1'b0}}}; +// CmpOp2E = {Input2E,{64-`XLEN{1'b0}}}; +// SgnOp1E = {Input1E,{64-`XLEN{1'b0}}}; +// SgnOp2E = {Input2E,{64-`XLEN{1'b0}}}; // end //assign op codes @@ -273,6 +312,7 @@ module fpu ( logic [2:0] FrmM; logic FmtM; logic [3:0] OpCtrlM; + logic FWriteIntM; //instantiate M stage FMA signals here ***rename fma signals and resize for XLEN logic [63:0] FmaResultM; @@ -305,7 +345,7 @@ module fpu ( //instantiation of M stage regfile signals logic [4:0] RdM; - logic [`XLEN-1:0] ReadData1M, ReadData2M, ReadData3M; + logic [`XLEN-1:0] Input1M, Input2M, Input3M; //instantiation of M stage add/cvt signals logic [63:0] AddResultM; @@ -333,6 +373,14 @@ module fpu ( logic [63:0] CmpOp1M, CmpOp2M; logic [1:0] CmpSelM; + + //***************** + //fpregfile D/E pipe registers + //***************** + flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, Input1E, Input1M); + flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, Input2E, Input2M); + flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, Input3E, Input3M); + //***************** //fma E/M pipe registers //***************** @@ -423,6 +471,8 @@ module fpu ( flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM); flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM); flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, OpCtrlE, OpCtrlM); + flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM); + flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM); // //END E/M PIPE @@ -432,6 +482,9 @@ module fpu ( //BEGIN MEMORY STAGE // + + assign FWriteDataM = Input1M; + fma2 fma2(.*); //second instance of two-stage floating-point add/cvt unit @@ -466,7 +519,7 @@ module fpu ( logic [4:0] SgnFlagsW; //instantiation of W stage regfile signals - logic [`XLEN-1:0] ReadData1W, ReadData2W, ReadData3W; + logic [`XLEN-1:0] Input1W; logic [`XLEN-1:0] SrcAW; //instantiation of W stage add/cvt signals @@ -523,6 +576,8 @@ module fpu ( flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW); flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW); + flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, Input1M, Input1W); + flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW); ////END M/W PIPE //***************************************** @@ -590,7 +645,7 @@ module fpu ( // output SrcAW 3'b110 : FPUResultDirW = SrcAW; // output ReadData1 - 3'b111 : FPUResultDirW = ReadData1W; + 3'b111 : FPUResultDirW = Input1W; default : FPUResultDirW = {64{1'bx}}; endcase end diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv new file mode 100644 index 000000000..62787edc4 --- /dev/null +++ b/wally-pipelined/src/fpu/fpuhazard.sv @@ -0,0 +1,70 @@ +/////////////////////////////////////////// +// fpuhazard.sv +// +// Written: me@KatherineParry.com 19 May 2021 +// Modified: +// +// Purpose: Determine forwarding, stalls and flushes for the FPU +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module fpuhazard( + input logic [4:0] Adr1, Adr2, Adr3, + input logic FRegWriteE, FRegWriteM, FRegWriteW, + input logic [4:0] RdE, RdM, RdW, + input logic DivBusyM, + input logic RegWriteD, + input logic [2:0] FResultSelD, FResultSelE, + // Stall outputs + output logic FStallE, + output logic [1:0] Input1MuxD, Input2MuxD, + output logic Input3MuxD +); + + + always_comb begin + // set ReadData as default + Input1MuxD = 2'b00; + Input2MuxD = 2'b00; + Input3MuxD = 1'b0; + FStallE = DivBusyM; + + if ((Adr1 == RdE) & (FRegWriteE | ((FResultSelE == 3'b110) & RegWriteD))) + if (FResultSelE == 3'b110) Input1MuxD = 2'b11; // choose SrcAM + else FStallE = 1'b1; // otherwise stall + + else if ((Adr1 == RdM) & FRegWriteM) Input1MuxD = 2'b01; // choose FPUResultDirW + else if ((Adr1 == RdW) & FRegWriteW) Input1MuxD = 2'b11; // choose FPUResultDirE + + + + else if ((Adr2 == RdE) & FRegWriteE) FStallE = 1'b1;//***add a signals saying whether input 1, 2 or 3 are used + else if ((Adr2 == RdM) & FRegWriteM) Input2MuxD = 2'b01; // choose FPUResultDirW + else if ((Adr2 == RdW) & FRegWriteW) Input2MuxD = 2'b10; // choose FPUResultDirE + + + + + else if ((Adr3 == RdE) & FRegWriteE) FStallE = 1'b1; + else if ((Adr3 == RdM) & FRegWriteM) FStallE = 1'b1; + else if ((Adr3 == RdW) & FRegWriteW) Input3MuxD = 1'b1; // choose FPUResultDirE + end + +endmodule diff --git a/wally-pipelined/src/fpu/fsm.sv b/wally-pipelined/src/fpu/fsm.sv index 482ef592b..b5a503a1c 100755 --- a/wally-pipelined/src/fpu/fsm.sv +++ b/wally-pipelined/src/fpu/fsm.sv @@ -1,7 +1,7 @@ module fsm (done, load_rega, load_regb, load_regc, load_regd, load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, - clk, reset, start, op_type); + clk, reset, start, op_type, divBusy); input clk; input reset; @@ -20,6 +20,7 @@ module fsm (done, load_rega, load_regb, load_regc, output [2:0] sel_muxa; output [2:0] sel_muxb; output sel_muxr; + output logic divBusy; reg done; // End of cycles reg load_rega; // enable for regA @@ -63,6 +64,7 @@ module fsm (done, load_rega, load_regb, load_regc, if (start==1'b0) begin done = 1'b0; + divBusy = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -77,6 +79,7 @@ module fsm (done, load_rega, load_regb, load_regc, else if (start==1'b1 && op_type==1'b0) begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -91,6 +94,7 @@ module fsm (done, load_rega, load_regb, load_regc, else if (start==1'b1 && op_type==1'b1) begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -106,6 +110,7 @@ module fsm (done, load_rega, load_regb, load_regc, S1: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -120,6 +125,7 @@ module fsm (done, load_rega, load_regb, load_regc, S2: // iteration 1 begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -134,6 +140,7 @@ module fsm (done, load_rega, load_regb, load_regc, S3: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -148,6 +155,7 @@ module fsm (done, load_rega, load_regb, load_regc, S4: // iteration 2 begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -162,6 +170,7 @@ module fsm (done, load_rega, load_regb, load_regc, S5: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -176,6 +185,7 @@ module fsm (done, load_rega, load_regb, load_regc, S6: // iteration 3 begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -190,6 +200,7 @@ module fsm (done, load_rega, load_regb, load_regc, S7: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -204,6 +215,7 @@ module fsm (done, load_rega, load_regb, load_regc, S8: // q,qm,qp begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -218,6 +230,7 @@ module fsm (done, load_rega, load_regb, load_regc, S9: // rem begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -232,6 +245,7 @@ module fsm (done, load_rega, load_regb, load_regc, S10: // done begin done = 1'b1; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -246,6 +260,7 @@ module fsm (done, load_rega, load_regb, load_regc, S13: // start of sqrt path begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -260,6 +275,7 @@ module fsm (done, load_rega, load_regb, load_regc, S14: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -274,6 +290,7 @@ module fsm (done, load_rega, load_regb, load_regc, S15: // iteration 1 begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -288,6 +305,7 @@ module fsm (done, load_rega, load_regb, load_regc, S16: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -302,6 +320,7 @@ module fsm (done, load_rega, load_regb, load_regc, S17: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -316,6 +335,7 @@ module fsm (done, load_rega, load_regb, load_regc, S18: // iteration 2 begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -330,6 +350,7 @@ module fsm (done, load_rega, load_regb, load_regc, S19: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -344,6 +365,7 @@ module fsm (done, load_rega, load_regb, load_regc, S20: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -358,6 +380,7 @@ module fsm (done, load_rega, load_regb, load_regc, S21: // iteration 3 begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b1; load_regc = 1'b0; @@ -372,6 +395,7 @@ module fsm (done, load_rega, load_regb, load_regc, S22: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -386,6 +410,7 @@ module fsm (done, load_rega, load_regb, load_regc, S23: begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b1; load_regb = 1'b0; load_regc = 1'b1; @@ -400,6 +425,7 @@ module fsm (done, load_rega, load_regb, load_regc, S24: // q,qm,qp begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -414,6 +440,7 @@ module fsm (done, load_rega, load_regb, load_regc, S25: // rem begin done = 1'b0; + divBusy = 1'b1; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -428,6 +455,7 @@ module fsm (done, load_rega, load_regb, load_regc, S26: // done begin done = 1'b1; + divBusy = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; @@ -442,6 +470,7 @@ module fsm (done, load_rega, load_regb, load_regc, default: begin done = 1'b0; + divBusy = 1'b0; load_rega = 1'b0; load_regb = 1'b0; load_regc = 1'b0; diff --git a/wally-pipelined/src/fpu/special.sv b/wally-pipelined/src/fpu/special.sv index 723b1e32f..711fd12dd 100644 --- a/wally-pipelined/src/fpu/special.sv +++ b/wally-pipelined/src/fpu/special.sv @@ -10,46 +10,46 @@ ///////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////// -module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE, +module special(Input1E, Input2E, Input3E, xzeroE, yzeroE, zzeroE, xnanE, ynanE, znanE, xdenormE, ydenormE, zdenormE, xinfE, yinfE, zinfE); ///////////////////////////////////////////////////////////////////////////// - input logic [63:0] ReadData1E; // Input ReadData1E - input logic [63:0] ReadData2E; // Input ReadData2E - input logic [63:0] ReadData3E; // Input ReadData3E - output logic xzeroE; // Input ReadData1E = 0 - output logic yzeroE; // Input ReadData2E = 0 - output logic zzeroE; // Input ReadData3E = 0 - output logic xnanE; // ReadData1E is NaN - output logic ynanE; // ReadData2E is NaN - output logic znanE; // ReadData3E is NaN - output logic xdenormE; // ReadData1E is denormalized - output logic ydenormE; // ReadData2E is denormalized - output logic zdenormE; // ReadData3E is denormalized - output logic xinfE; // ReadData1E is infinity - output logic yinfE; // ReadData2E is infinity - output logic zinfE; // ReadData3E is infinity + input logic [63:0] Input1E; // Input Input1E + input logic [63:0] Input2E; // Input Input2E + input logic [63:0] Input3E; // Input Input3E + output logic xzeroE; // Input Input1E = 0 + output logic yzeroE; // Input Input2E = 0 + output logic zzeroE; // Input Input3E = 0 + output logic xnanE; // Input1E is NaN + output logic ynanE; // Input2E is NaN + output logic znanE; // Input3E is NaN + output logic xdenormE; // Input1E is denormalized + output logic ydenormE; // Input2E is denormalized + output logic zdenormE; // Input3E is denormalized + output logic xinfE; // Input1E is infinity + output logic yinfE; // Input2E is infinity + output logic zinfE; // Input3E is infinity // In the actual circuit design, the gates looking at bits // 51:0 and at bits 62:52 should be shared among the various detectors. // Check if input is NaN - assign xnanE = &ReadData1E[62:52] && |ReadData1E[51:0]; - assign ynanE = &ReadData2E[62:52] && |ReadData2E[51:0]; - assign znanE = &ReadData3E[62:52] && |ReadData3E[51:0]; + assign xnanE = &Input1E[62:52] && |Input1E[51:0]; + assign ynanE = &Input2E[62:52] && |Input2E[51:0]; + assign znanE = &Input3E[62:52] && |Input3E[51:0]; // Check if input is denormalized - assign xdenormE = ~(|ReadData1E[62:52]) && |ReadData1E[51:0]; - assign ydenormE = ~(|ReadData2E[62:52]) && |ReadData2E[51:0]; - assign zdenormE = ~(|ReadData3E[62:52]) && |ReadData3E[51:0]; + assign xdenormE = ~(|Input1E[62:52]) && |Input1E[51:0]; + assign ydenormE = ~(|Input2E[62:52]) && |Input2E[51:0]; + assign zdenormE = ~(|Input3E[62:52]) && |Input3E[51:0]; // Check if input is infinity - assign xinfE = &ReadData1E[62:52] && ~(|ReadData1E[51:0]); - assign yinfE = &ReadData2E[62:52] && ~(|ReadData2E[51:0]); - assign zinfE = &ReadData3E[62:52] && ~(|ReadData3E[51:0]); + assign xinfE = &Input1E[62:52] && ~(|Input1E[51:0]); + assign yinfE = &Input2E[62:52] && ~(|Input2E[51:0]); + assign zinfE = &Input3E[62:52] && ~(|Input3E[51:0]); // Check if inputs are all zero // Also forces denormalized inputs to zero. @@ -57,11 +57,11 @@ module special(ReadData1E, ReadData2E, ReadData3E, xzeroE, yzeroE, zzeroE, // to just check if the exponent is zero. // KATHERINE - commented following (21/01/11) - // assign xzeroE = ~(|ReadData1E[62:0]) || xdenormE; - // assign yzeroE = ~(|ReadData2E[62:0]) || ydenormE; - // assign zzeroE = ~(|ReadData3E[62:0]) || zdenormE; + // assign xzeroE = ~(|Input1E[62:0]) || xdenormE; + // assign yzeroE = ~(|Input2E[62:0]) || ydenormE; + // assign zzeroE = ~(|Input3E[62:0]) || zdenormE; // KATHERINE - removed denorm to prevent output logicing zero when computing with a denormalized number - assign xzeroE = ~(|ReadData1E[62:0]); - assign yzeroE = ~(|ReadData2E[62:0]); - assign zzeroE = ~(|ReadData3E[62:0]); + assign xzeroE = ~(|Input1E[62:0]); + assign yzeroE = ~(|Input2E[62:0]); + assign zzeroE = ~(|Input3E[62:0]); endmodule diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 9e8880edc..88b48ec4c 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -32,6 +32,7 @@ module hazard( input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, input logic DataStall, ICacheStallF, + input logic FStallE, input logic DivBusyE, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, @@ -60,7 +61,7 @@ module hazard( assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE); assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD) & ~(BranchFlushDE); // stall in decode if instruction is a load/mul/csr dependent on previous // assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD; // stall in decode if instruction is a load/mul/csr dependent on previous - assign StallECause = DivBusyE; + assign StallECause = DivBusyE | FStallE; assign StallMCause = 0; assign StallWCause = DataStall | ICacheStallF; diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 6972be293..e73fc6848 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -33,6 +33,7 @@ module controller( output logic [2:0] ImmSrcD, input logic IllegalIEUInstrFaultD, output logic IllegalBaseInstrFaultD, + output logic RegWriteD, // Execute stage control signals input logic StallE, FlushE, input logic [2:0] FlagsE, @@ -68,7 +69,7 @@ module controller( `define CTRLW 23 // pipelined control signals - logic RegWriteD, RegWriteE; + logic RegWriteE; logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; logic [1:0] MemRWD, MemRWE; logic JumpD; @@ -105,6 +106,7 @@ module controller( // RegWrite_ImmSrc_ALUSrc_MemRW_ResultSrc_Branch_ALUOp_Jump_TargetSrc_W64_CSRRead_Privileged_MulDiv_Atomic_Illegal 7'b0000000: ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // illegal instruction 7'b0000011: ControlsD = `CTRLW'b1_000_01_10_001_0_00_0_0_0_0_0_0_00_0; // lw + 7'b0000111: ControlsD = `CTRLW'b0_000_01_10_001_0_00_0_0_0_0_0_0_00_0; // flw 7'b0001111: ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_0; // fence = nop 7'b0010011: ControlsD = `CTRLW'b1_000_01_00_000_0_10_0_0_0_0_0_0_00_0; // I-type ALU 7'b0010111: ControlsD = `CTRLW'b1_100_11_00_000_0_00_0_0_0_0_0_0_00_0; // auipc @@ -113,6 +115,7 @@ module controller( else ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction 7'b0100011: ControlsD = `CTRLW'b0_001_01_01_000_0_00_0_0_0_0_0_0_00_0; // sw + 7'b0100111: ControlsD = `CTRLW'b0_001_01_01_000_0_00_0_0_0_0_0_0_00_0; // fsw 7'b0101111: if (`A_SUPPORTED) begin if (InstrD[31:27] == 5'b00010) ControlsD = `CTRLW'b1_000_00_10_001_0_00_0_0_0_0_0_0_01_0; // lr diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index f94e665eb..b7c9acc33 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -48,6 +48,8 @@ module datapath ( output logic [`XLEN-1:0] WriteDataM, MemAdrM, // Writeback stage signals input logic StallW, FlushW, + input logic FWriteIntW, + input logic [`XLEN-1:0] FPUResultW, input logic RegWriteW, input logic SquashSCW, input logic [2:0] ResultSrcW, @@ -77,13 +79,18 @@ module datapath ( // Writeback stage signals logic [`XLEN-1:0] SCResultW; logic [`XLEN-1:0] ALUResultW; + logic [`XLEN-1:0] WriteDataW; logic [`XLEN-1:0] ResultW; // Decode stage assign Rs1D = InstrD[19:15]; assign Rs2D = InstrD[24:20]; assign RdD = InstrD[11:7]; - regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, ResultW, RD1D, RD2D); + + //Mux for writting floating point + mux2 #(`XLEN) writedatamux(ResultW, FPUResultW, FWriteIntW, WriteDataW); + + regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); extend ext(.InstrD(InstrD[31:7]), .*); // Execute stage pipeline register and logic diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index dd48e89be..53bf6f3e1 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -31,6 +31,7 @@ module ieu ( input logic [31:0] InstrD, input logic IllegalIEUInstrFaultD, output logic IllegalBaseInstrFaultD, + output logic RegWriteD, // Execute Stage interface input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, @@ -49,6 +50,8 @@ module ieu ( output logic [2:0] Funct3M, // Writeback stage input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, + input logic FWriteIntW, + input logic [`XLEN-1:0] FPUResultW, // input logic [`XLEN-1:0] PCLinkW, output logic InstrValidW, // hazards diff --git a/wally-pipelined/src/privileged/csru.sv b/wally-pipelined/src/privileged/csru.sv index ad3095946..aea1d398f 100644 --- a/wally-pipelined/src/privileged/csru.sv +++ b/wally-pipelined/src/privileged/csru.sv @@ -44,7 +44,7 @@ module csru #(parameter // Floating Point CSRs in User Mode only needed if Floating Point is supported generate - if (`F_SUPPORTED) begin + if (`F_SUPPORTED | `D_SUPPORTED) begin logic [4:0] FFLAGS_REGW; logic WriteFFLAGSM, WriteFRMM, WriteFCSRM; logic [2:0] NextFRMM; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 2c7ccc053..e2c018420 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -93,7 +93,12 @@ module wallypipelinedhart ( logic [4:0] SetFflagsM; logic [2:0] FRM_REGW; logic FloatRegWriteW; + logic [1:0] FMemRWM; + logic RegWriteD; + logic [`XLEN-1:0] FWriteDataM; logic SquashSCW; + logic FStallE; + logic FWriteIntW; logic [31:0] FSROutW; logic DivSqrtDoneE; logic IllegalFPUInstrD, IllegalFPUInstrE; @@ -140,17 +145,23 @@ module wallypipelinedhart ( logic RASPredPCWrongM; logic BPPredClassNonCFIWrongM; + logic[`XLEN-1:0] WriteDatatmpM; + logic [4:0] InstrClassM; ifu ifu(.InstrInF(InstrRData), .*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // integer execution unit: integer register file, datapath and controller - dmem dmem(.*); // data cache unit + + + mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); + dmem dmem(.MemRWM(MemRWM|FMemRWM), .WriteDataM(WriteDatatmpM),.*); // data cache unit ahblite ebu( //.InstrReadF(1'b0), //.InstrRData(InstrF), // hook up InstrF later + .WriteDataM(WriteDatatmpM), .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .Funct7M(InstrM[31:25]), .*); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 093ac0774..78067b167 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -120,38 +120,38 @@ string tests32f[] = '{ string tests64d[] = '{ - "rv64d/I-FMV-X-D-01", "2000", - "rv64d/I-FADD-D-01", "2000", - "rv64d/I-FCLASS-D-01", "2000", - "rv64d/I-FCVT-D-L-01", "2000", - "rv64d/I-FCVT-D-LU-01", "2000", - "rv64d/I-FCVT-D-S-01", "2000", - "rv64d/I-FCVT-D-W-01", "2000", - "rv64d/I-FCVT-D-WU-01", "2000", - "rv64d/I-FCVT-L-D-01", "2000", - "rv64d/I-FCVT-LU-D-01", "2000", - "rv64d/I-FCVT-S-D-01", "2000", - "rv64d/I-FCVT-W-D-01", "2000", - "rv64d/I-FCVT-WU-D-01", "2000", - "rv64d/I-FDIV-D-01", "2000", - "rv64d/I-FEQ-D-01", "2000", - "rv64d/I-FLD-D-01", "2000", - "rv64d/I-FLE-D-01", "2000", - "rv64d/I-FLT-D-01", "2000", - "rv64d/I-FMADD-D-01", "2000", - "rv64d/I-FMAX-D-01", "2000", - "rv64d/I-FMIN-D-01", "2000", - "rv64d/I-FMSUB-D-01", "2000", - "rv64d/I-FMUL-D-01", "2000", "rv64d/I-FMV-D-X-01", "2000", - "rv64d/I-FNMADD-D-01", "2000", - "rv64d/I-FNMSUB-D-01", "2000", - "rv64d/I-FSD-01", "2000", - "rv64d/I-FSGNJ-D-01", "2000", - "rv64d/I-FSGNJN-D-01", "2000", - "rv64d/I-FSGNJX-D-01", "2000", - "rv64d/I-FSQRTD-01", "2000", - "rv64d/I-FSUB-D-01", "2000" + // "rv64d/I-FADD-D-01", "2000", + // "rv64d/I-FCLASS-D-01", "2000", + // "rv64d/I-FCVT-D-L-01", "2000", + // "rv64d/I-FCVT-D-LU-01", "2000", + // "rv64d/I-FCVT-D-S-01", "2000", + // "rv64d/I-FCVT-D-W-01", "2000", + // "rv64d/I-FCVT-D-WU-01", "2000", + // "rv64d/I-FCVT-L-D-01", "2000", + // "rv64d/I-FCVT-LU-D-01", "2000", + // "rv64d/I-FCVT-S-D-01", "2000", + // "rv64d/I-FCVT-W-D-01", "2000", + // "rv64d/I-FCVT-WU-D-01", "2000", + // "rv64d/I-FDIV-D-01", "2000", + // "rv64d/I-FEQ-D-01", "2000", + // "rv64d/I-FLD-D-01", "2000", + // "rv64d/I-FLE-D-01", "2000", + // "rv64d/I-FLT-D-01", "2000", + // "rv64d/I-FMADD-D-01", "2000", + // "rv64d/I-FMAX-D-01", "2000", + // "rv64d/I-FMIN-D-01", "2000", + // "rv64d/I-FMSUB-D-01", "2000", + // "rv64d/I-FMUL-D-01", "2000", + // "rv64d/I-FMV-X-D-01", "2000", + // "rv64d/I-FNMADD-D-01", "2000", + // "rv64d/I-FNMSUB-D-01", "2000", + // "rv64d/I-FSD-01", "2000", + // "rv64d/I-FSGNJ-D-01", "2000", + // "rv64d/I-FSGNJN-D-01", "2000", + // "rv64d/I-FSGNJX-D-01", "2000", + // "rv64d/I-FSQRTD-01", "2000", + // "rv64d/I-FSUB-D-01", "2000" }; @@ -528,7 +528,7 @@ string tests32f[] = '{ if (`M_SUPPORTED) tests = {tests, tests64m}; if (`A_SUPPORTED) tests = {tests, tests64a}; if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; - if (`F_SUPPORTED) tests = {tests64f, tests}; + // if (`F_SUPPORTED) tests = {tests64f, tests}; if (`D_SUPPORTED) tests = {tests64d, tests}; end //tests = {tests64a, tests}; @@ -655,7 +655,7 @@ string tests32f[] = '{ errors = errors+1; $display(" Error on test %s result %d: adr = %h sim = %h, signature = %h", tests[test], i, (testadr+i)*`XLEN/8, dut.uncore.dtim.RAM[testadr+i], signature[i]); - // $stop;//***debug + $stop;//***debug end end i = i + 1; @@ -923,8 +923,8 @@ module instrNameDecTB( else name = "ILLEGAL"; 10'b0000111_010: name = "FLW"; 10'b0100111_010: name = "FSW"; - 10'b0000111_010: name = "FLD"; - 10'b0100111_010: name = "FSD"; + 10'b0000111_011: name = "FLD"; + 10'b0100111_011: name = "FSD"; default: name = "ILLEGAL"; endcase endmodule