diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index ba8332a29..7c9a8813a 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -15,6 +15,7 @@ module fctrl ( output logic [2:0] FrmD, output logic [1:0] FMemRWD, output logic OutputInput2D, + output logic In2UsedD, In3UsedD, output logic FWriteIntD); @@ -55,50 +56,50 @@ module fctrl ( //(or equivalent) always_comb begin - //checks all but FMA/store/load - IllegalFPUInstr2D = 0; - if(OpD == 7'b1010011) begin - casez(Funct7D) - //compare - 7'b10100?? : FResultSelD = 3'b001; - //div/sqrt - 7'b0?011?? : FResultSelD = 3'b000; - //add/sub - 7'b0000??? : FResultSelD = 3'b100; - //mult - 7'b00010?? : FResultSelD = 3'b010; - //convert (not precision) - 7'b110?0?? : FResultSelD = 3'b100; - //convert (precision) - 7'b010000? : FResultSelD = 3'b100; - //Min/Max - 7'b00101?? : FResultSelD = 3'b001; - //sign injection - 7'b00100?? : FResultSelD = 3'b011; - //classify //only if funct3 = 001 - 7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101; - //output ReadData1 - else if (Funct7D[1] == 0) FResultSelD = 3'b111; - //output SrcW - 7'b111100? : FResultSelD = 3'b110; - default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end - endcase - end - //FMA/store/load - else begin - case(OpD) - //4 FMA instructions - 7'b1000011 : FResultSelD = 3'b010; - 7'b1000111 : FResultSelD = 3'b010; - 7'b1001011 : FResultSelD = 3'b010; - 7'b1001111 : FResultSelD = 3'b010; - //store - 7'b0100111 : FResultSelD = 3'b111; - //load - 7'b0000111 : FResultSelD = 3'b111; - default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end - endcase - end + //checks all but FMA/store/load + IllegalFPUInstr2D = 0; + if(OpD == 7'b1010011) begin + casez(Funct7D) + //compare + 7'b10100?? : FResultSelD = 3'b001; + //div/sqrt + 7'b0?011?? : FResultSelD = 3'b000; + //add/sub + 7'b0000??? : FResultSelD = 3'b100; + //mult + 7'b00010?? : FResultSelD = 3'b010; + //convert (not precision) + 7'b110?0?? : FResultSelD = 3'b100; + //convert (precision) + 7'b010000? : FResultSelD = 3'b100; + //Min/Max + 7'b00101?? : FResultSelD = 3'b001; + //sign injection + 7'b00100?? : FResultSelD = 3'b011; + //classify //only if funct3 = 001 + 7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101; + //output ReadData1 + else if (Funct7D[1] == 0) FResultSelD = 3'b111; + //output SrcW + 7'b111100? : FResultSelD = 3'b110; + default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end + endcase + end + //FMA/store/load + else begin + case(OpD) + //4 FMA instructions + 7'b1000011 : FResultSelD = 3'b010; + 7'b1000111 : FResultSelD = 3'b010; + 7'b1001011 : FResultSelD = 3'b010; + 7'b1001111 : FResultSelD = 3'b010; + //store + 7'b0100111 : FResultSelD = 3'b111; + //load + 7'b0000111 : FResultSelD = 3'b111; + default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end + endcase + end end assign OutputInput2D = OpD == 7'b0100111; @@ -151,11 +152,12 @@ module fctrl ( always_comb begin IllegalFPUInstr1D = 0; + In3UsedD = 0; case (FResultSelD) // div/sqrt // fdiv = ???0 // fsqrt = ???1 - 3'b000 : OpCtrlD = {3'b0, Funct7D[5]}; + 3'b000 : begin OpCtrlD = {3'b0, Funct7D[5]}; In2UsedD = ~Funct7D[5]; end // cmp // fmin = ?100 // fmax = ?101 @@ -163,7 +165,7 @@ module fctrl ( // flt = ?001 // fle = ?011 // {?, is min or max, is eq or le, is lt or le} - 3'b001 : OpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; + 3'b001 : begin OpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; In2UsedD = 1'b1; end //fma/mult // fmadd = ?000 // fmsub = ?001 @@ -171,12 +173,12 @@ module fctrl ( // fnmsub = ?011 // fmul = ?100 // {?, is mul, is negitive, is sub} - 3'b010 : OpCtrlD = {1'b0, OpD[4:2]}; + 3'b010 : begin OpCtrlD = {1'b0, OpD[4:2]}; In2UsedD = 1'b1; In3UsedD = ~OpD[4]; end // sgn inj // fsgnj = ??00 // fsgnjn = ??01 // fsgnjx = ??10 - 3'b011 : OpCtrlD = {2'b0, Funct3D[1:0]}; + 3'b011 : begin OpCtrlD = {2'b0, Funct3D[1:0]}; In2UsedD = 1'b1; end // add/sub/cnvt // fadd = 0000 // fsub = 0001 @@ -191,13 +193,13 @@ module fctrl ( // fcvt.d.wu = 1111 // fcvt.d.s = 1000 // { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub - 3'b100 : OpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; + 3'b100 : begin OpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), Rs2D[0]|(Funct7D[2]&~Funct7D[5])}; In2UsedD = ~Funct7D[5]; end // classify {?, ?, ?, ?} - 3'b101 : OpCtrlD = 4'b0; + 3'b101 : begin OpCtrlD = 4'b0; In2UsedD = 1'b0; end // output SrcAW // fmv.w.x = ???0 // fmv.w.d = ???1 - 3'b110 : OpCtrlD = {3'b0, Funct7D[0]}; + 3'b110 : begin OpCtrlD = {3'b0, Funct7D[0]}; In2UsedD = 1'b0; end // output Input1 // flw = ?000 // fld = ?001 @@ -206,8 +208,8 @@ module fctrl ( // fmv.x.w = ?100 // fmv.d.w = ?101 // {?, is mv, is store, is double or fcvt.d.w} - 3'b111 : OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; - default : begin OpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; end + 3'b111 : begin OpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; In2UsedD = OpD[5]; end + default : begin OpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; In2UsedD = 1'b0; end endcase end diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 3f79946c0..1b6662629 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -13,11 +13,12 @@ module fpu ( input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, + input logic [`AHBW-1:0] HRDATA, input logic RegWriteD, output logic [4:0] SetFflagsM, output logic [31:0] FSROutW, output logic [1:0] FMemRWM, - output logic FStallE, + output logic FStallD, output logic FWriteIntW, output logic [`XLEN-1:0] FWriteDataM, // Integer input being written into fpreg output logic DivSqrtDoneE, @@ -84,7 +85,7 @@ module fpu ( logic DivBusyM; logic [1:0] Input1MuxD, Input2MuxD; logic Input3MuxD; - + logic In2UsedD, In3UsedD; //Hazard unit for FPU fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*); @@ -346,6 +347,7 @@ module fpu ( //instantiation of M stage regfile signals logic [4:0] RdM; logic [`XLEN-1:0] Input1M, Input2M, Input3M; + logic [`XLEN-1:0] LoadStoreResultM; //instantiation of M stage add/cvt signals logic [63:0] AddResultM; @@ -485,6 +487,8 @@ module fpu ( assign FWriteDataM = Input1M; + mux2 #(64) LoadStoreResultMux(HRDATA, Input1M, |OpCtrlM[2:1], LoadStoreResultM); + fma2 fma2(.*); //second instance of two-stage floating-point add/cvt unit @@ -519,7 +523,7 @@ module fpu ( logic [4:0] SgnFlagsW; //instantiation of W stage regfile signals - logic [`XLEN-1:0] Input1W; + logic [`XLEN-1:0] LoadStoreResultW; logic [`XLEN-1:0] SrcAW; //instantiation of W stage add/cvt signals @@ -576,7 +580,7 @@ module fpu ( flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW); flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW); - flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, Input1M, Input1W); + flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, LoadStoreResultM, LoadStoreResultW); flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW); ////END M/W PIPE @@ -628,6 +632,8 @@ module fpu ( // ( (FResultSelW[0]) ? (FmaResultW) : ({62'b0,CmpFCCW}) ) // : ( (FResultSelW[0]) ? (AddResultW) : (DivResultW) ) // ); + + always_comb begin case (FResultSelW) // div/sqrt @@ -644,8 +650,8 @@ module fpu ( 3'b101 : FPUResultDirW = ClassResultW; // output SrcAW 3'b110 : FPUResultDirW = SrcAW; - // output ReadData1 - 3'b111 : FPUResultDirW = Input1W; + // Load/Store/Move to FP-register + 3'b111 : FPUResultDirW = LoadStoreResultW; default : FPUResultDirW = {64{1'bx}}; endcase end diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv index 62787edc4..4fb4e65a7 100644 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ b/wally-pipelined/src/fpu/fpuhazard.sv @@ -32,8 +32,10 @@ module fpuhazard( input logic DivBusyM, input logic RegWriteD, input logic [2:0] FResultSelD, FResultSelE, + input logic IllegalFPUInstrD, + input logic In2UsedD, In3UsedD, // Stall outputs - output logic FStallE, + output logic FStallD, output logic [1:0] Input1MuxD, Input2MuxD, output logic Input3MuxD ); @@ -44,27 +46,28 @@ module fpuhazard( Input1MuxD = 2'b00; Input2MuxD = 2'b00; Input3MuxD = 1'b0; - FStallE = DivBusyM; + FStallD = DivBusyM; + if (~IllegalFPUInstrD) begin - if ((Adr1 == RdE) & (FRegWriteE | ((FResultSelE == 3'b110) & RegWriteD))) - if (FResultSelE == 3'b110) Input1MuxD = 2'b11; // choose SrcAM - else FStallE = 1'b1; // otherwise stall + if ((Adr1 == RdE) & (FRegWriteE | ((FResultSelE == 3'b110) & RegWriteD))) + if (FResultSelE == 3'b110) Input1MuxD = 2'b11; // choose SrcAM + else FStallD = 1'b1; // otherwise stall + else if ((Adr1 == RdM) & FRegWriteM) Input1MuxD = 2'b01; // choose FPUResultDirW + else if ((Adr1 == RdW) & FRegWriteW) Input1MuxD = 2'b11; // choose FPUResultDirE + - else if ((Adr1 == RdM) & FRegWriteM) Input1MuxD = 2'b01; // choose FPUResultDirW - else if ((Adr1 == RdW) & FRegWriteW) Input1MuxD = 2'b11; // choose FPUResultDirE - - - - else if ((Adr2 == RdE) & FRegWriteE) FStallE = 1'b1;//***add a signals saying whether input 1, 2 or 3 are used - else if ((Adr2 == RdM) & FRegWriteM) Input2MuxD = 2'b01; // choose FPUResultDirW - else if ((Adr2 == RdW) & FRegWriteW) Input2MuxD = 2'b10; // choose FPUResultDirE + if(In2UsedD) + if ((Adr2 == RdE) & FRegWriteE) FStallD = 1'b1; + else if ((Adr2 == RdM) & FRegWriteM) Input2MuxD = 2'b01; // choose FPUResultDirW + else if ((Adr2 == RdW) & FRegWriteW) Input2MuxD = 2'b10; // choose FPUResultDirE + if(In3UsedD) + if ((Adr3 == RdE) & FRegWriteE) FStallD = 1'b1; + else if ((Adr3 == RdM) & FRegWriteM) FStallD = 1'b1; + else if ((Adr3 == RdW) & FRegWriteW) Input3MuxD = 1'b1; // choose FPUResultDirE + end - - else if ((Adr3 == RdE) & FRegWriteE) FStallE = 1'b1; - else if ((Adr3 == RdM) & FRegWriteM) FStallE = 1'b1; - else if ((Adr3 == RdW) & FRegWriteW) Input3MuxD = 1'b1; // choose FPUResultDirE end endmodule diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 88b48ec4c..7bd592864 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -32,7 +32,7 @@ module hazard( input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, input logic DataStall, ICacheStallF, - input logic FStallE, + input logic FStallD, input logic DivBusyE, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, @@ -59,9 +59,9 @@ module hazard( assign BranchFlushDE = BPPredWrongE | RetM | TrapM; assign StallFCause = CSRWritePendingDEM & ~(BranchFlushDE); - assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD) & ~(BranchFlushDE); // stall in decode if instruction is a load/mul/csr dependent on previous + assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FStallD) & ~(BranchFlushDE); // stall in decode if instruction is a load/mul/csr dependent on previous // assign StallDCause = LoadStallD | MulDivStallD | CSRRdStallD; // stall in decode if instruction is a load/mul/csr dependent on previous - assign StallECause = DivBusyE | FStallE; + assign StallECause = DivBusyE; assign StallMCause = 0; assign StallWCause = DataStall | ICacheStallF; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index e2c018420..5164a1f1e 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -97,7 +97,7 @@ module wallypipelinedhart ( logic RegWriteD; logic [`XLEN-1:0] FWriteDataM; logic SquashSCW; - logic FStallE; + logic FStallD; logic FWriteIntW; logic [31:0] FSROutW; logic DivSqrtDoneE; diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 77d3bb84b..cb4001504 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -134,7 +134,8 @@ string tests32f[] = '{ // "rv64d/I-FCVT-WU-D-01", "2000", // "rv64d/I-FDIV-D-01", "2000", // "rv64d/I-FEQ-D-01", "2000", - "rv64d/I-FLD-D-01", "2000" + "rv64d/I-FSD-01", "2000", + "rv64d/I-FLD-01", "2420" // "rv64d/I-FLE-D-01", "2000", // "rv64d/I-FLT-D-01", "2000", // "rv64d/I-FMADD-D-01", "2000", @@ -145,7 +146,6 @@ string tests32f[] = '{ // "rv64d/I-FMV-X-D-01", "2000", // "rv64d/I-FNMADD-D-01", "2000", // "rv64d/I-FNMSUB-D-01", "2000", - //"rv64d/I-FSD-01", "2000" // "rv64d/I-FSGNJ-D-01", "2000", // "rv64d/I-FSGNJN-D-01", "2000", // "rv64d/I-FSGNJX-D-01", "2000",