diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index 5749d0db..a9fcb564 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -6,6 +6,7 @@ module fctrl ( input logic [2:0] Funct3D, input logic [2:0] FRM_REGW, output logic IllegalFPUInstrD, + output logic IsFPD, output logic FWriteEnD, output logic FDivStartD, output logic [2:0] FResultSelD, @@ -27,20 +28,19 @@ module fctrl ( //write is enabled for all fp instruciton op codes //sans fp load - logic isFP, isFPLD; always_comb begin //case statement is easier to modify //in case of errors case(OpD) //fp instructions sans load - 7'b1010011 : isFP = 1'b1; - 7'b1000011 : isFP = 1'b1; - 7'b1000111 : isFP = 1'b1; - 7'b1001011 : isFP = 1'b1; - 7'b1001111 : isFP = 1'b1; - 7'b0100111 : isFP = 1'b1; - 7'b0000111 : isFP = 1'b1;// KEP change 7'b1010011 to 7'b0000111 - default : isFP = 1'b0; + 7'b1010011 : IsFPD = 1'b1; + 7'b1000011 : IsFPD = 1'b1; + 7'b1000111 : IsFPD = 1'b1; + 7'b1001011 : IsFPD = 1'b1; + 7'b1001111 : IsFPD = 1'b1; + 7'b0100111 : IsFPD = 1'b1; + 7'b0000111 : IsFPD = 1'b1;// KEP change 7'b1010011 to 7'b0000111 + default : IsFPD = 1'b0; endcase end @@ -218,5 +218,5 @@ module fctrl ( // is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]); // if not writting to int reg and not a store function and not move - assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP; + assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & IsFPD; endmodule diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index e886c66e..7f93d33a 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -30,15 +30,15 @@ module fpu ( input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [31:0] InstrD, input logic [`XLEN-1:0] ReadDataW, // Read data from memory - input logic RegWriteD, // register write enable from ieu input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, - output logic [1:0] FMemRWM, // Read/write enable for memory {read, write} + output logic IsFPD, IsFPE, // Read/write enable for memory {read, write} output logic FStallD, // Stall the decode stage if Div/Sqrt instruction output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable - output logic [`XLEN-1:0] FWriteDataM, // Data to be written to memory + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory + output logic [`XLEN-1:0] FIntResM, output logic FDivBusyE, // Is the divison/sqrt unit busy output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic [4:0] SetFflagsM, // FPU flags @@ -51,24 +51,27 @@ module fpu ( logic FDivStartD, FDivStartE; // Start division logic FWriteIntD; // Write to integer register logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction - logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory - logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal - logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal - logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal - logic FInput2UsedD; // Is input 2 used - logic FInput3UsedD; // Is input 3 used + logic [1:0] FMemRWD; // Read and write enable for memory + logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal + logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal + logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal + logic SrcYUsedD; // Is input 2 used + logic SrcZUsedD; // Is input 3 used logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component logic SelLoadInputE, SelLoadInputM; // Select which adress to load when single precision + logic FInput2UsedD, FInput3UsedD; + logic [4:0] Adr1E, Adr2E, Adr3E; // regfile signals logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining logic [63:0] FWDM; // Write data for FP register logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] FInput1E, FInput1M, FInput1W, FInput1tmpE; // Input 1 to the various units (after forwarding) - logic [63:0] FInput2E, FInput2M; // Input 2 to the various units (after forwarding) - logic [63:0] FInput3E, FInput3M; // Input 3 to the various units (after forwarding) + logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding) + logic [`XLEN-1:0] SrcXMAligned; + logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding) + logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions // div/sqrt signals @@ -123,19 +126,13 @@ module fpu ( logic [4:0] FAddFlagsM, FAddFlagsW; // cmp signals - logic [7:0] WE, WM; - logic [7:0] XE, XM; - logic ANaNE, ANaNM; - logic BNaNE, BNaNM; - logic AzeroE, AzeroM; - logic BzeroE, BzeroM; - logic CmpInvalidM, CmpInvalidW; - logic [1:0] CmpFCCM, CmpFCCW; - logic [63:0] FCmpResultM, FCmpResultW; + logic CmpInvalidE, CmpInvalidM, CmpInvalidW; + logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW; // fsgn signals logic [63:0] SgnResultE, SgnResultM, SgnResultW; logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; + logic [63:0] FResM; // instantiation of W stage regfile signals logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; @@ -150,8 +147,6 @@ module fpu ( //DECODE STAGE - // Hazard unit for FPU - fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*); // top-level controller for FPU fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); @@ -172,22 +167,45 @@ module fpu ( //***************** // other D/E pipe registers //***************** - flopenrc #(64) DEReg14(clk, reset, FlushE, ~StallE, FPUResult64W, FPUResult64E); - flopenrc #(28) CtrlRegE(clk, reset, FlushE, ~StallE, - {FWriteEnD, FResultSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FDivStartD, FForwardInput1D, FForwardInput2D, FForwardInput3D, FWriteIntD, FOutputInput2D, FMemRWD, InstrD[15]}, - {FWriteEnE, FResultSelE, FrmE, FmtE, RdE, FOpCtrlE, FDivStartE, FForwardInput1E, FForwardInput2E, FForwardInput3E, FWriteIntE, FOutputInput2E, FMemRWE, SelLoadInputE}); - + // flopenrc #(64) DEReg14(clk, reset, FlushE, ~StallE, FPUResult64W, FPUResult64E); + // flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FWriteEnD, FWriteEnE); + // flopenrc #(3) CtrlRegE2(clk, reset, FlushE, ~StallE, FResultSelD, FResultSelE); + // flopenrc #(3) CtrlRegE3(clk, reset, FlushE, ~StallE, FrmD, FrmE); + // flopenrc #(1) CtrlRegE4(clk, reset, FlushE, ~StallE, FmtD, FmtE); + // flopenrc #(5) CtrlRegE5(clk, reset, FlushE, ~StallE, InstrD[11:7], RdE); + // flopenrc #(4) CtrlRegE6(clk, reset, FlushE, ~StallE, FOpCtrlD, FOpCtrlE); + flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + // flopenrc #(1) CtrlRegE8(clk, reset, FlushE, ~StallE, FWriteIntD, FWriteIntE); + // flopenrc #(1) CtrlRegE9(clk, reset, FlushE, ~StallE, FOutputInput2D, FOutputInput2E); + // flopenrc #(2) CtrlRegE10(clk, reset, FlushE, ~StallE, FMemRWD, FMemRWE); + // flopenrc #(1) CtrlRegE11(clk, reset, FlushE, ~StallE, InstrD[15], SelLoadInputE); + flopenrc #(20) CtrlRegE(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD, InstrD[15], IsFPD}, + {FWriteEnE, FResultSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE, SelLoadInputE, IsFPE}); + //EXECUTION STAGE // input muxs for forwarding - mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM); - mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, FForwardInput1E, FInput1tmpE); - mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E); - mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E); - mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E); + // single vs double for SRCAM + // mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM); + // //input 1 forwarding mux + // mux4 #(64) SrcXEmux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, ForwardXE, SrcXtmpE); + // mux3 #(64) SrcYEmux(FRD2E, FPUResult64W, FPUResult64E, ForwardYE, SrcYE); + // mux2 #(64) SrcZEmux(FRD3E, FPUResult64E, ForwardZE, SrcZE); + // mux2 #(64) FOutputInput2mux(SrcXtmpE, SrcYE, FOutputInput2E, SrcXE); + + // Hazard unit for FPU + fpuhazard hazard(.*); + + mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); + mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); + mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); + // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .FOpCtrlE(FOpCtrlE[2:0]),.*); + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*); // first and only instance of floating-point divider logic fpdivClk; @@ -198,10 +216,10 @@ module fpu ( .ECLK(fpdivClk)); // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(FInput1E), .q(DivInput1E), + flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d(FInput2E), .q(DivInput2E), + flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); @@ -211,20 +229,21 @@ module fpu ( fpuaddcvt1 fpadd1 (.*); // first of two-stage instance of floating-point comparator - fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]); + fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE); // first and only instance of floating-point sign converter fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); // first and only instance of floating-point classify unit fpuclassify fpuclass (.*); + assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; //***************** //fpregfile D/E pipe registers //***************** - flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FInput1E, FInput1M); - flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, FInput2E, FInput2M); - flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, FInput3E, FInput3M); + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); + flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); + flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); //***************** // fma E/M pipe registers @@ -276,12 +295,15 @@ module fpu ( //***************** // fpcmp E/M pipe registers //***************** - flopenrc #(8) EMRegCmp1(clk, reset, FlushM, ~StallM, WE, WM); - flopenrc #(8) EMRegCmp2(clk, reset, FlushM, ~StallM, XE, XM); - flopenrc #(1) EMRegcmp3(clk, reset, FlushM, ~StallM, ANaNE, ANaNM); - flopenrc #(1) EMRegCmp4(clk, reset, FlushM, ~StallM, BNaNE, BNaNM); - flopenrc #(1) EMRegCmp5(clk, reset, FlushM, ~StallM, AzeroE, AzeroM); - flopenrc #(1) EMRegCmp6(clk, reset, FlushM, ~StallM, BzeroE, BzeroM); + // flopenrc #(8) EMRegCmp1(clk, reset, FlushM, ~StallM, WE, WM); + // flopenrc #(8) EMRegCmp2(clk, reset, FlushM, ~StallM, XE, XM); + // flopenrc #(1) EMRegcmp3(clk, reset, FlushM, ~StallM, ANaNE, ANaNM); + // flopenrc #(1) EMRegCmp4(clk, reset, FlushM, ~StallM, BNaNE, BNaNM); + // flopenrc #(1) EMRegCmp5(clk, reset, FlushM, ~StallM, AzeroE, AzeroM); + // flopenrc #(1) EMRegCmp6(clk, reset, FlushM, ~StallM, BzeroE, BzeroM); + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM); + // flopenrc #(2) EMRegCmp2(clk, reset, FlushM, ~StallM, CmpFCCE, CmpFCCM); + flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM); // put this in for the event we want to delay fsgn - will otherwise bypass //***************** @@ -300,7 +322,7 @@ module fpu ( flopenrc #(5) EMReg5(clk, reset, FlushM, ~StallM, RdE, RdM); flopenrc #(4) EMReg6(clk, reset, FlushM, ~StallM, FOpCtrlE, FOpCtrlM); flopenrc #(1) EMReg7(clk, reset, FlushM, ~StallM, FWriteIntE, FWriteIntM); - flopenrc #(2) EMReg8(clk, reset, FlushM, ~StallM, FMemRWE, FMemRWM); + // flopenrc #(2) EMReg8(clk, reset, FlushM, ~StallM, FMemRWE, FMemRWM); flopenrc #(1) EMReg9(clk, reset, FlushM, ~StallM, SelLoadInputE, SelLoadInputM); //***************** @@ -310,32 +332,35 @@ module fpu ( //BEGIN MEMORY STAGE - assign FWriteDataM = FmtM ? FInput1M[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FInput1M[63:32]}; + mux2 #(64) FResMux(AlignedSrcAM, SgnResultM, FResultSelM == 3'b011, FResM); + assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux3 #(`XLEN) IntResMux(SrcXMAligned, FCmpResultM[`XLEN-1:0], ClassResultM[`XLEN-1:0], {FResultSelM == 3'b101, FResultSelM == 3'b001}, FIntResM); + //adjecent adress values are sent to the FPU, select the correct one // -imm is 80000 most of the time vs the error one which is 00000 // mux3 #(64) FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); + // mux2 #(64) FLoadStoreResultMux(FLoadResultM, SrcXM, |FOpCtrlM[2:1], FLoadStoreResultM); - fma2 fma2(.X(FInput1M), .Y(FInput2M), .Z(FInput3M), .FOpCtrlM(FOpCtrlM[2:0]), .*); + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*); // second instance of two-stage floating-point add/cvt unit fpuaddcvt2 fpadd2 (.*); // second instance of two-stage floating-point comparator - fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), - .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*); + // fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), + // .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(SrcXM), .op2(SrcYM), .*); // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - //***************** //fpregfile M/W pipe registers //***************** - flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, FInput1M, FInput1W); + flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW); + flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW); //***************** // fma M/W pipe registers @@ -360,7 +385,7 @@ module fpu ( // fpcmp M/W pipe registers //***************** flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW); - flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW); + // flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW); flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW); //***************** @@ -396,10 +421,10 @@ module fpu ( // mux3 #(64) FLoadResultMux({ReadD[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); + // mux2 #(64) FLoadStoreResultMux(FLoadResultM, SrcXM, |FOpCtrlM[2:1], FLoadStoreResultM); //***RV32D needs to give two bus transactions mux2 #(64) FLoadResultMux({ReadDataW[31:0], {32{1'b0}}}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, FLoadResultW); - mux2 #(64) FLoadStoreResultMux(FLoadResultW, FInput1W, |FOpCtrlW[2:1], FLoadStoreResultW); + mux2 #(64) FLoadStoreResultMux(FLoadResultW, SrcYW, |FOpCtrlW[2:1], FLoadStoreResultW); diff --git a/wally-pipelined/src/fpu/fpuaddcvt1.sv b/wally-pipelined/src/fpu/fpuaddcvt1.sv index febd47d1..8f045dcd 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt1.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt1.sv @@ -27,10 +27,10 @@ // -module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE); +module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE); - input logic [63:0] FInput1E; // 1st input operand (A) - input logic [63:0] FInput2E; // 2nd input operand (B) + input logic [63:0] SrcXE; // 1st input operand (A) + input logic [63:0] SrcYE; // 2nd input operand (B) input logic [3:0] FOpCtrlE; // Function opcode input logic FmtE; // Result Precision (1 for double, 0 for single) @@ -81,12 +81,12 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, // and the sign of the first operand is set appropratiately based on // if the operation is absolute value or negation. - convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P); + convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P); // Test for exceptions and return the "Invalid Operation" and // "Denormalized" Input Flags. The "AddSelInvE" is used in // the third pipeline stage to select the result. Also, AddOp1NormE - // and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized. + // and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized. // sub is one if the effective operation is subtaction. exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub, @@ -159,8 +159,8 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, // Place either the sign-extened 32-bit value or the original 64-bit value // into IntValue (to be used for integer to floating point conversion) - assign IntValue [31:0] = FInput1E[31:0]; - assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32]; + assign IntValue [31:0] = SrcXE[31:0]; + assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32]; // If doing an integer to floating point conversion, mantissaA3 is set to // IntVal and the prenomalized exponent is set to 1084. Otherwise, diff --git a/wally-pipelined/src/fpu/fpuclassify.sv b/wally-pipelined/src/fpu/fpuclassify.sv index 1000bdf4..b320b2f0 100644 --- a/wally-pipelined/src/fpu/fpuclassify.sv +++ b/wally-pipelined/src/fpu/fpuclassify.sv @@ -1,7 +1,8 @@ + `include "wally-config.vh" module fpuclassify ( - input logic [63:0] FInput1E, + input logic [63:0] SrcXE, input logic FmtE, // 0-single 1-double output logic [63:0] ClassResultE ); @@ -13,9 +14,9 @@ module fpuclassify ( logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan; // single and double precision layouts - assign single = FInput1E[63:32]; - assign double = FInput1E; - assign sign = FInput1E[63]; + assign single = SrcXE[63:32]; + assign double = SrcXE; + assign sign = SrcXE[63]; // basic calculations for readabillity assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23]; @@ -43,10 +44,7 @@ module fpuclassify ( // bit 7 - +infinity // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassResultE = FmtE ? {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity} : - {{22{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity, {32{1'b0}}}; - + assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, + ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity}; endmodule diff --git a/wally-pipelined/src/fpu/fpucmp1.sv b/wally-pipelined/src/fpu/fpucmp1.sv index 1cf267f2..3a8245e6 100755 --- a/wally-pipelined/src/fpu/fpucmp1.sv +++ b/wally-pipelined/src/fpu/fpucmp1.sv @@ -1,3 +1,4 @@ + // // File name : fpcomp.v // Title : Floating-Point Comparator @@ -17,9 +18,9 @@ // and correct for sign bits // // This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal Sel that indicates the type of +// signals, and a 2-bit signal FOpCtrlE that indicates the type of // operands being compared as indicated below. -// Sel Description +// FOpCtrlE Description // 00 double precision numbers // 01 single precision numbers // 10 half precision numbers @@ -37,24 +38,41 @@ // It also produces an invalid operation flag, which is one // if either of the input operands is a signaling NaN per 754 -module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec - - input logic [63:0] op1; - input logic [63:0] op2; - input logic [1:0] Sel; +`include "wally-config.vh" +module fpucmp1 ( + input logic [63:0] op1, + input logic [63:0] op2, + input logic [2:0] FOpCtrlE, + input logic FmtE, - output logic [7:0] w, x; - output logic ANaN, BNaN; - output logic Azero, Bzero; + + output logic Invalid, // Invalid Operation + // output logic [1:0] FCC, // Condition Codes + output logic [63:0] FCmpResultE); + // Perform magnitude comparison between the 63 least signficant bits + // of the input operands. Only LT and EQ are returned, since GT can + // be determined from these values. + logic [1:0] FCC; // Condition Codes + logic [7:0] w, x; + logic ANaN, BNaN; + logic Azero, Bzero; + logic LT; // magnitude op1 < magnitude op2 + logic EQ; // magnitude op1 = magnitude op2 + + magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); + + // Determine final values based on output of magnitude comparison, + // sign bits, and special case testing. + exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE); // Perform magnitude comparison between the 63 least signficant bits // of the input operands. Only LT and EQ are returned, since GT can // be determined from these values. - magcompare64b_1 magcomp2 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); + magcompare64b_2 magcomp2 (LT, EQ, w, x); // Determine final values based on output of magnitude comparison, // sign bits, and special case testing. - exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, Sel); + exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*); endmodule // fpcomp @@ -178,9 +196,9 @@ module magcompare64b_1 (w, x, A, B); endmodule // magcompare64b // This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of // operands being compared as indicated below. -// Sel Description +// FOpCtrlE Description // 00 double precision numbers // 01 single precision numbers // 10 half precision numbers @@ -196,11 +214,11 @@ endmodule // magcompare64b // It also produces a invalid operation flag, which is one // if either of the input operands is a signaling NaN. -module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); +module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE); input logic [63:0] A; input logic [63:0] B; - input logic [1:0] Sel; + input logic [2:0] FOpCtrlE; logic dp, sp, hp; @@ -209,9 +227,9 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); output logic Azero; output logic Bzero; - assign dp = !Sel[1]&!Sel[0]; - assign sp = !Sel[1]&Sel[0]; - assign hp = Sel[1]&!Sel[0]; + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; // Test if A or B is NaN. assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & @@ -232,3 +250,216 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); assign Bzero = (B[62:0] == 63'h0); endmodule // exception_cmp +// +// File name : fpcomp.v +// Title : Floating-Point Comparator +// project : FPU +// Library : fpcomp +// Author(s) : James E. Stine +// Purpose : definition of main unit to floating-point comparator +// notes : +// +// Copyright Oklahoma State University +// +// Floating Point Comparator (Algorithm) +// +// 1.) Performs sign-extension if the inputs are 32-bit integers. +// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// 3.) Check for special cases (+0=-0, unordered, and infinite values) +// and correct for sign bits +// +// This module takes 64-bits inputs op1 and op2, VSS, and VDD +// signals, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 (unused) +// +// The comparator produces a 2-bit signal FCC, which +// indicates the result of the comparison: +// +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// +// It also produces an invalid operation flag, which is one +// if either of the input operands is a signaling NaN per 754 + + +/*module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule*/ // magcompare2b + +// 2-bit magnitude comparator +// This module compares two 2-bit values A and B. LT is '1' if A < B +// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// this version actually incorporates don't cares into the equation to +// simplify the optimization + +// module magcompare2c (LT, GT, A, B); + +// input logic [1:0] A; +// input logic [1:0] B; + +// output logic LT; +// output logic GT; + +// assign LT = B[1] | (!A[1]&B[0]); +// assign GT = A[1] | (!B[1]&A[0]); + +// endmodule // magcompare2b + +// This module compares two 64-bit values A and B. LT is '1' if A < B +// and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// This structure was modified so +// that it only does a strict magnitdude comparison, and only +// returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// of 63 2-bit magnitude comparators, followed by one OR gates. +// +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare64b_2 (LT, EQ, w, x); + + input logic [7:0] w; + input logic [7:0] x; + logic [3:0] y; + logic [3:0] z; + logic [1:0] a; + logic [1:0] b; + logic GT; + + output logic LT; + output logic EQ; + + magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); + magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); + magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); + magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); + + magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); + magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); + + magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); + + assign EQ = ~(LT | GT); + +endmodule // magcompare64b + +// This module takes 64-bits inputs A and B, two magnitude comparison +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 bfloat precision numbers +// +// The comparator produces a 2-bit signal fcc, which +// indicates the result of the comparison as follows: +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// It also produces a invalid operation flag, which is one +// if either of the input operands is a signaling NaN. + +module exception_cmp_2 ( + input logic [63:0] A, + input logic [63:0] B, + input logic FmtE, + input logic LT_mag, + input logic EQ_mag, + input logic [2:0] FOpCtrlE, + + output logic invalid, + output logic [1:0] fcc, + output logic [63:0] FCmpResultE, + + input logic Azero, + input logic Bzero, + input logic ANaN, + input logic BNaN); + + logic dp; + logic sp; + logic hp; + logic ASNaN; + logic BSNaN; + logic UO; + logic GT; + logic LT; + logic EQ; + logic [62:0] sixtythreezeros = 63'h0; + + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; + + // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating + // point comparison is being performed. + assign UO = (ANaN | BNaN); + + // Test if A or B is a signaling NaN. + assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); + assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); + + // If either A or B is a signaling NaN the "Invalid Operation" + // exception flag is set to one; otherwise it is zero. + assign invalid = (ASNaN | BSNaN); + + // A and B are equal if (their magnitudes are equal) AND ((their signs are + // equal) or (their magnitudes are zero AND they are floating point + // numbers)). Also, A and B are not equal if they are unordered. + assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); + + // A is less than B if (A is negative and B is posiive) OR + // (A and B are positive and the magnitude of A is less than + // the magnitude of B) or (A and B are negative integers and + // the magnitude of A is less than the magnitude of B) or + // (A and B are negative floating point numbers and + // the magnitude of A is greater than the magnitude of B). + // Also, A is not less than B if A and B are equal or unordered. + assign LT = ((~LT_mag & A[63] & B[63]) | + (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; + + // A is greater than B when LT, EQ, and UO are are false. + assign GT = ~(LT | EQ | UO); + + // Note: it may be possible to optimize the setting of fcc + // a little more, but it is probably not worth the effort. + + // Set the bits of fcc based on LT, GT, EQ, and UO + assign fcc[0] = LT | UO; + assign fcc[1] = GT | UO; + + always_comb begin + case (FOpCtrlE[2:0]) + 3'b111: FCmpResultE = LT ? A : B;//min + 3'b101: FCmpResultE = GT ? A : B;//max + 3'b010: FCmpResultE = {63'b0, EQ};//equal + 3'b001: FCmpResultE = {63'b0, LT};//less than + 3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal + default: FCmpResultE = 64'b0; + endcase + end + +endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpucmp2.sv b/wally-pipelined/src/fpu/fpucmp2.sv index 42a780ac..ee14afb9 100755 --- a/wally-pipelined/src/fpu/fpucmp2.sv +++ b/wally-pipelined/src/fpu/fpucmp2.sv @@ -1,243 +1,243 @@ -// -// File name : fpcomp.v -// Title : Floating-Point Comparator -// project : FPU -// Library : fpcomp -// Author(s) : James E. Stine -// Purpose : definition of main unit to floating-point comparator -// notes : -// -// Copyright Oklahoma State University -// -// Floating Point Comparator (Algorithm) -// -// 1.) Performs sign-extension if the inputs are 32-bit integers. -// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// 3.) Check for special cases (+0=-0, unordered, and infinite values) -// and correct for sign bits -// -// This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal Sel that indicates the type of -// operands being compared as indicated below. -// Sel Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 (unused) -// -// The comparator produces a 2-bit signal FCC, which -// indicates the result of the comparison: -// -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// -// It also produces an invalid operation flag, which is one -// if either of the input operands is a signaling NaN per 754 +// // +// // File name : fpcomp.v +// // Title : Floating-Point Comparator +// // project : FPU +// // Library : fpcomp +// // Author(s) : James E. Stine +// // Purpose : definition of main unit to floating-point comparator +// // notes : +// // +// // Copyright Oklahoma State University +// // +// // Floating Point Comparator (Algorithm) +// // +// // 1.) Performs sign-extension if the inputs are 32-bit integers. +// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// // 3.) Check for special cases (+0=-0, unordered, and infinite values) +// // and correct for sign bits +// // +// // This module takes 64-bits inputs op1 and op2, VSS, and VDD +// // signals, and a 2-bit signal Sel that indicates the type of +// // operands being compared as indicated below. +// // Sel Description +// // 00 double precision numbers +// // 01 single precision numbers +// // 10 half precision numbers +// // 11 (unused) +// // +// // The comparator produces a 2-bit signal FCC, which +// // indicates the result of the comparison: +// // +// // fcc decscription +// // 00 A = B +// // 01 A < B +// // 10 A > B +// // 11 A and B are unordered (i.e., A or B is NaN) +// // +// // It also produces an invalid operation flag, which is one +// // if either of the input operands is a signaling NaN per 754 -module fpucmp2 ( - input logic [63:0] op1, - input logic [63:0] op2, - input logic [1:0] Sel, - input logic [7:0] w, x, - input logic ANaN, BNaN, - input logic Azero, Bzero, - input logic [3:0] FOpCtrlM, - input logic FmtM, +// module fpucmp2 ( +// input logic [63:0] op1, +// input logic [63:0] op2, +// input logic [1:0] Sel, +// input logic [7:0] w, x, +// input logic ANaN, BNaN, +// input logic Azero, Bzero, +// input logic [3:0] FOpCtrlM, +// input logic FmtM, - output logic Invalid, // Invalid Operation - output logic [1:0] FCC, // Condition Codes - output logic [63:0] FCmpResultM); +// output logic Invalid, // Invalid Operation +// output logic [1:0] FCC, // Condition Codes +// output logic [63:0] FCmpResultM); - logic LT; // magnitude op1 < magnitude op2 - logic EQ; // magnitude op1 = magnitude op2 +// logic LT; // magnitude op1 < magnitude op2 +// logic EQ; // magnitude op1 = magnitude op2 - // Perform magnitude comparison between the 63 least signficant bits - // of the input operands. Only LT and EQ are returned, since GT can - // be determined from these values. - magcompare64b_2 magcomp2 (LT, EQ, w, x); +// // Perform magnitude comparison between the 63 least signficant bits +// // of the input operands. Only LT and EQ are returned, since GT can +// // be determined from these values. +// magcompare64b_2 magcomp2 (LT, EQ, w, x); - // Determine final values based on output of magnitude comparison, - // sign bits, and special case testing. - exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); +// // Determine final values based on output of magnitude comparison, +// // sign bits, and special case testing. +// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); -endmodule // fpcomp +// endmodule // fpcomp -/*module magcompare2b (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - // Determine if A < B using a minimized sum-of-products expression - assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; - // Determine if A > B using a minimized sum-of-products expression - assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -endmodule*/ // magcompare2b - -// 2-bit magnitude comparator -// This module compares two 2-bit values A and B. LT is '1' if A < B -// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// this version actually incorporates don't cares into the equation to -// simplify the optimization - -// module magcompare2c (LT, GT, A, B); +// /*module magcompare2b (LT, GT, A, B); // input logic [1:0] A; // input logic [1:0] B; -// output logic LT; -// output logic GT; +// output logic LT; +// output logic GT; -// assign LT = B[1] | (!A[1]&B[0]); -// assign GT = A[1] | (!B[1]&A[0]); +// // Determine if A < B using a minimized sum-of-products expression +// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; +// // Determine if A > B using a minimized sum-of-products expression +// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; -// endmodule // magcompare2b +// endmodule*/ // magcompare2b -// This module compares two 64-bit values A and B. LT is '1' if A < B -// and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// This structure was modified so -// that it only does a strict magnitdude comparison, and only -// returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// of 63 2-bit magnitude comparators, followed by one OR gates. -// -// J. E. Stine and M. J. Schulte, "A combined two's complement and -// floating-point comparator," 2005 IEEE International Symposium on -// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// doi: 10.1109/ISCAS.2005.1464531 +// // 2-bit magnitude comparator +// // This module compares two 2-bit values A and B. LT is '1' if A < B +// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// // this version actually incorporates don't cares into the equation to +// // simplify the optimization -module magcompare64b_2 (LT, EQ, w, x); +// // module magcompare2c (LT, GT, A, B); - input logic [7:0] w; - input logic [7:0] x; - logic [3:0] y; - logic [3:0] z; - logic [1:0] a; - logic [1:0] b; - logic GT; +// // input logic [1:0] A; +// // input logic [1:0] B; - output logic LT; - output logic EQ; +// // output logic LT; +// // output logic GT; + +// // assign LT = B[1] | (!A[1]&B[0]); +// // assign GT = A[1] | (!B[1]&A[0]); + +// // endmodule // magcompare2b + +// // This module compares two 64-bit values A and B. LT is '1' if A < B +// // and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// // This structure was modified so +// // that it only does a strict magnitdude comparison, and only +// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// // of 63 2-bit magnitude comparators, followed by one OR gates. +// // +// // J. E. Stine and M. J. Schulte, "A combined two's complement and +// // floating-point comparator," 2005 IEEE International Symposium on +// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// // doi: 10.1109/ISCAS.2005.1464531 + +// module magcompare64b_2 (LT, EQ, w, x); + +// input logic [7:0] w; +// input logic [7:0] x; +// logic [3:0] y; +// logic [3:0] z; +// logic [1:0] a; +// logic [1:0] b; +// logic GT; - magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); - magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); - magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); - magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); +// output logic LT; +// output logic EQ; - magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); - magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); +// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); +// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); +// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); +// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); - magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - - assign EQ = ~(LT | GT); - -endmodule // magcompare64b - -// This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of -// operands being compared as indicated below. -// Sel Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 bfloat precision numbers -// -// The comparator produces a 2-bit signal fcc, which -// indicates the result of the comparison as follows: -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// It also produces a invalid operation flag, which is one -// if either of the input operands is a signaling NaN. - -module exception_cmp_2 ( - input logic [63:0] A, - input logic [63:0] B, - input logic FmtM, - input logic LT_mag, - input logic EQ_mag, - input logic [1:0] Sel, - input logic [3:0] FOpCtrlM, +// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); +// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); - output logic invalid, - output logic [1:0] fcc, - output logic [63:0] FCmpResultM, +// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - input logic Azero, - input logic Bzero, - input logic ANaN, - input logic BNaN); +// assign EQ = ~(LT | GT); + +// endmodule // magcompare64b + +// // This module takes 64-bits inputs A and B, two magnitude comparison +// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of +// // operands being compared as indicated below. +// // Sel Description +// // 00 double precision numbers +// // 01 single precision numbers +// // 10 half precision numbers +// // 11 bfloat precision numbers +// // +// // The comparator produces a 2-bit signal fcc, which +// // indicates the result of the comparison as follows: +// // fcc decscription +// // 00 A = B +// // 01 A < B +// // 10 A > B +// // 11 A and B are unordered (i.e., A or B is NaN) +// // It also produces a invalid operation flag, which is one +// // if either of the input operands is a signaling NaN. + +// module exception_cmp_2 ( +// input logic [63:0] A, +// input logic [63:0] B, +// input logic FmtM, +// input logic LT_mag, +// input logic EQ_mag, +// input logic [1:0] Sel, +// input logic [3:0] FOpCtrlM, - logic dp; - logic sp; - logic hp; - logic ASNaN; - logic BSNaN; - logic UO; - logic GT; - logic LT; - logic EQ; - logic [62:0] sixtythreezeros = 63'h0; +// output logic invalid, +// output logic [1:0] fcc, +// output logic [63:0] FCmpResultM, - assign dp = !Sel[1]&!Sel[0]; - assign sp = !Sel[1]&Sel[0]; - assign hp = Sel[1]&!Sel[0]; - - // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating - // point comparison is being performed. - assign UO = (ANaN | BNaN); - - // Test if A or B is a signaling NaN. - assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); - assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); - - // If either A or B is a signaling NaN the "Invalid Operation" - // exception flag is set to one; otherwise it is zero. - assign invalid = (ASNaN | BSNaN); - - // A and B are equal if (their magnitudes are equal) AND ((their signs are - // equal) or (their magnitudes are zero AND they are floating point - // numbers)). Also, A and B are not equal if they are unordered. - assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); +// input logic Azero, +// input logic Bzero, +// input logic ANaN, +// input logic BNaN); - // A is less than B if (A is negative and B is posiive) OR - // (A and B are positive and the magnitude of A is less than - // the magnitude of B) or (A and B are negative integers and - // the magnitude of A is less than the magnitude of B) or - // (A and B are negative floating point numbers and - // the magnitude of A is greater than the magnitude of B). - // Also, A is not less than B if A and B are equal or unordered. - assign LT = ((~LT_mag & A[63] & B[63]) | - (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; +// logic dp; +// logic sp; +// logic hp; +// logic ASNaN; +// logic BSNaN; +// logic UO; +// logic GT; +// logic LT; +// logic EQ; +// logic [62:0] sixtythreezeros = 63'h0; + +// assign dp = !Sel[1]&!Sel[0]; +// assign sp = !Sel[1]&Sel[0]; +// assign hp = Sel[1]&!Sel[0]; + +// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating +// // point comparison is being performed. +// assign UO = (ANaN | BNaN); + +// // Test if A or B is a signaling NaN. +// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); +// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); + +// // If either A or B is a signaling NaN the "Invalid Operation" +// // exception flag is set to one; otherwise it is zero. +// assign invalid = (ASNaN | BSNaN); + +// // A and B are equal if (their magnitudes are equal) AND ((their signs are +// // equal) or (their magnitudes are zero AND they are floating point +// // numbers)). Also, A and B are not equal if they are unordered. +// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); - // A is greater than B when LT, EQ, and UO are are false. - assign GT = ~(LT | EQ | UO); +// // A is less than B if (A is negative and B is posiive) OR +// // (A and B are positive and the magnitude of A is less than +// // the magnitude of B) or (A and B are negative integers and +// // the magnitude of A is less than the magnitude of B) or +// // (A and B are negative floating point numbers and +// // the magnitude of A is greater than the magnitude of B). +// // Also, A is not less than B if A and B are equal or unordered. +// assign LT = ((~LT_mag & A[63] & B[63]) | +// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; + +// // A is greater than B when LT, EQ, and UO are are false. +// assign GT = ~(LT | EQ | UO); - // Note: it may be possible to optimize the setting of fcc - // a little more, but it is probably not worth the effort. +// // Note: it may be possible to optimize the setting of fcc +// // a little more, but it is probably not worth the effort. - // Set the bits of fcc based on LT, GT, EQ, and UO - assign fcc[0] = LT | UO; - assign fcc[1] = GT | UO; +// // Set the bits of fcc based on LT, GT, EQ, and UO +// assign fcc[0] = LT | UO; +// assign fcc[1] = GT | UO; - always_comb begin - case (FOpCtrlM[2:0]) - 3'b111: FCmpResultM = LT ? A : B;//min - 3'b101: FCmpResultM = GT ? A : B;//max - 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal - 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than - 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal - default: FCmpResultM = 64'b0; - endcase - end +// always_comb begin +// case (FOpCtrlM[2:0]) +// 3'b111: FCmpResultM = LT ? A : B;//min +// 3'b101: FCmpResultM = GT ? A : B;//max +// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal +// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than +// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal +// default: FCmpResultM = 64'b0; +// endcase +// end -endmodule // exception_cmp +// endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv index 959ef476..03667d84 100644 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ b/wally-pipelined/src/fpu/fpuhazard.sv @@ -26,47 +26,41 @@ `include "wally-config.vh" module fpuhazard( - input logic [4:0] Adr1, Adr2, Adr3, - input logic FWriteEnE, FWriteEnM, FWriteEnW, - input logic [4:0] RdE, RdM, RdW, - input logic FDivBusyE, - input logic RegWriteD, - input logic [2:0] FResultSelD, FResultSelE, - input logic IllegalFPUInstrD, - input logic FInput2UsedD, FInput3UsedD, - // Stall outputs - output logic FStallD, - output logic [1:0] FForwardInput1D, FForwardInput2D, - output logic FForwardInput3D + input logic [4:0] Adr1E, Adr2E, Adr3E, + input logic FWriteEnM, FWriteEnW, + input logic [4:0] RdM, RdW, + input logic [2:0] FResultSelM, + output logic FStallD, + output logic [1:0] ForwardXE, ForwardYE, ForwardZE ); always_comb begin // set ReadData as default - FForwardInput1D = 2'b00; - FForwardInput2D = 2'b00; - FForwardInput3D = 1'b0; - FStallD = FDivBusyE; - if (~IllegalFPUInstrD) begin -// if taking a value from int register - if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD))) - if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM - else FStallD = 1'b1; // otherwise stall - else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW - else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE + ForwardXE = 2'b00; // choose FRD1E + ForwardYE = 2'b00; // choose FRD2E + ForwardZE = 2'b00; // choose FRD3E + FStallD = 0; + + if ((Adr1E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardXE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W - if(FInput2UsedD) - if ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1; - else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW - else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE + if ((Adr2E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardYE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W - - if(FInput3UsedD) - if ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1; - else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1; - else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE - end + + if ((Adr3E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b110 | FResultSelM == 3'b011) ForwardZE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W end diff --git a/wally-pipelined/src/fpu/fsgn.sv b/wally-pipelined/src/fpu/fsgn.sv index 2850af86..62d0e7d7 100755 --- a/wally-pipelined/src/fpu/fsgn.sv +++ b/wally-pipelined/src/fpu/fsgn.sv @@ -1,8 +1,8 @@ //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions -module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E); +module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); - input [63:0] FInput1E, FInput2E; + input [63:0] SrcXE, SrcYE; input [1:0] SgnOpCodeE; output [63:0] SgnResultE; output [4:0] SgnFlagsE; @@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E); //op code designation: // - //00 - fsgnj - directly copy over sign value of FInput2E - //01 - fsgnjn - negate sign value of FInput2E - //10 - fsgnjx - XOR sign values of FInput1E & FInput2E + //00 - fsgnj - directly copy over sign value of SrcYE + //01 - fsgnjn - negate sign value of SrcYE + //10 - fsgnjx - XOR sign values of SrcXE & SrcYE // - assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]); - assign SgnResultE[62:0] = FInput1E[62:0]; + assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); + assign SgnResultE[62:0] = SrcXE[62:0]; //If the exponent is all ones, then the value is either Inf or NaN, //both of which will produce a QNaN/SNaN value of some sort. This will //set the invalid flag high. - assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52]; + assign AonesExp = SrcXE[62]&SrcXE[61]&SrcXE[60]&SrcXE[59]&SrcXE[58]&SrcXE[57]&SrcXE[56]&SrcXE[55]&SrcXE[54]&SrcXE[53]&SrcXE[52]; //the only flag that can occur during this operation is invalid //due to changing sign on already existing NaN diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 016d8e1a..356574d0 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -32,7 +32,7 @@ module hazard( input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, input logic DataStall, ICacheStallF, - input logic FPUStallD, + input logic FPUStallD, FStallD, input logic DivBusyE,FDivBusyE, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, @@ -56,7 +56,7 @@ module hazard( // If any stages are stalled, the first stage that isn't stalled must flush. assign StallFCause = CSRWritePendingDEM && ~(TrapM || RetM || BPPredWrongE); - assign StallDCause = (LoadStallD || MulDivStallD || CSRRdStallD || FPUStallD) && ~(TrapM || RetM || BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous + assign StallDCause = (LoadStallD || MulDivStallD || CSRRdStallD || FPUStallD || FStallD) && ~(TrapM || RetM || BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous assign StallECause = DivBusyE || FDivBusyE; assign StallMCause = 0; assign StallWCause = DataStall || ICacheStallF; diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 3654437f..16fd5a8f 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -45,7 +45,8 @@ module controller( output logic MemReadE, CSRReadE, // for Hazard Unit output logic [2:0] Funct3E, output logic MulDivE, W64E, - output logic JumpE, + output logic JumpE, + output logic [1:0] MemRWE, // Memory stage control signals input logic StallM, FlushM, output logic [1:0] MemRWM, @@ -74,7 +75,7 @@ module controller( // pipelined control signals logic RegWriteE; logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; - logic [1:0] MemRWD, MemRWE; + logic [1:0] MemRWD; logic JumpD; logic BranchD, BranchE; logic [1:0] ALUOpD; @@ -141,6 +142,7 @@ module controller( ControlsD = `CTRLW'b1_000_00_00_011_0_00_0_0_1_0_0_1_00_0; // W-type Multiply/Divide else ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction + //7'b1010011: ControlsD = `CTRLW'b0_000_00_00_101_0_00_0_0_0_0_0_0_00_1; // FP 7'b1100011: ControlsD = `CTRLW'b0_010_00_00_000_1_01_0_0_0_0_0_0_00_0; // beq 7'b1100111: ControlsD = `CTRLW'b1_000_00_00_000_0_00_1_1_0_0_0_0_00_0; // jalr 7'b1101111: ControlsD = `CTRLW'b1_011_00_00_000_0_00_1_0_0_0_0_0_00_0; // jal diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 13db65a3..c3303f9a 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -37,6 +37,9 @@ module datapath ( input logic ALUSrcAE, ALUSrcBE, input logic TargetSrcE, input logic JumpE, + input logic IsFPE, + input logic [1:0] MemRWE, + input logic [`XLEN-1:0] FWriteDataE, input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, output logic [2:0] FlagsE, @@ -44,13 +47,13 @@ module datapath ( output logic [`XLEN-1:0] SrcAE, SrcBE, // Memory stage signals input logic StallM, FlushM, - input logic [`XLEN-1:0] FWriteDataM, + input logic FWriteIntM, + input logic [`XLEN-1:0] FIntResM, output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] WriteDataM, MemAdrM, // Writeback stage signals input logic StallW, FlushW, input logic FWriteIntW, - input logic [`XLEN-1:0] FPUResultW, input logic RegWriteW, input logic SquashSCW, input logic [2:0] ResultSrcW, @@ -70,13 +73,14 @@ module datapath ( logic [`XLEN-1:0] RD1E, RD2E; logic [`XLEN-1:0] ExtImmE; - logic [`XLEN-1:0] PreSrcAE, SrcAE2, SrcBE2; + logic [`XLEN-1:0] PreSrcAE, PreSrcBE, SrcAE2, SrcBE2; logic [`XLEN-1:0] ALUResultE; logic [`XLEN-1:0] WriteDataE; logic [`XLEN-1:0] TargetBaseE; // Memory stage signals logic [`XLEN-1:0] ALUResultM; + logic [`XLEN-1:0] ResultM; // Writeback stage signals logic [`XLEN-1:0] SCResultW; logic [`XLEN-1:0] ALUResultW; @@ -88,8 +92,7 @@ module datapath ( assign Rs2D = InstrD[24:20]; assign RdD = InstrD[11:7]; - //Mux for writting floating point - mux2 #(`XLEN) writedatamux(ResultW, FPUResultW, FWriteIntW, WriteDataW); + //Mux for writting floating point regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); extend ext(.InstrD(InstrD[31:7]), .*); @@ -102,11 +105,12 @@ module datapath ( flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux4 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, FWriteDataM, ForwardAE, PreSrcAE); - mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, FWriteDataM, ForwardBE, WriteDataE); + mux3 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, ForwardAE, PreSrcAE); + mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, ForwardBE, PreSrcBE); + mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, IsFPE, WriteDataE); mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2); - mux2 #(`XLEN) srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE); + mux2 #(`XLEN) srcbmux(PreSrcBE, ExtImmE, ALUSrcBE, SrcBE); mux2 #(`XLEN) srcbmux2(SrcBE, {`XLEN{1'b0}}, JumpE, SrcBE2); // *** May be able to remove this mux. alu #(`XLEN) alu(SrcAE2, SrcBE2, ALUControlE, ALUResultE, FlagsE); mux2 #(`XLEN) targetsrcmux(PCE, SrcAE, TargetSrcE, TargetBaseE); @@ -117,10 +121,11 @@ module datapath ( flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM); assign MemAdrM = ALUResultM; flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); - flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); + flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); + mux2 #(`XLEN) resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM); // Writeback stage pipeline register and logic - flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW); + flopenrc #(`XLEN) ResultWReg(clk, reset, FlushW, ~StallW, ResultM, ResultW); flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); // handle Store Conditional result if atomic extension supported @@ -131,11 +136,11 @@ module datapath ( assign SCResultW = 0; endgenerate - mux5 #(`XLEN) resultmux(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); + mux5 #(`XLEN) resultmuxW(ResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, WriteDataW); /* -----\/----- EXCLUDED -----\/----- // This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage. // *** need to look at how the decoder is coded to fix. - mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, ResultW); + mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, WriteDataW); >>>>>>> bp -----/\----- EXCLUDED -----/\----- */ diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index 07c4daaf..e7b3ff24 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -42,14 +42,12 @@ module forward( ForwardAE = 2'b00; ForwardBE = 2'b00; if (Rs1E != 5'b0) - if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; + if ((Rs1E == RdM) & (RegWriteM|FWriteIntM)) ForwardAE = 2'b10; else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01; - else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11; if (Rs2E != 5'b0) - if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; + if ((Rs2E == RdM) & (RegWriteM|FWriteIntM)) ForwardBE = 2'b10; else if ((Rs2E == RdW) & (RegWriteW|FWriteIntW)) ForwardBE = 2'b01; - else if ((Rs2E == RdM) & FWriteIntM) ForwardBE = 2'b11; end // Stall on dependent operations that finish in Mem Stage and can't bypass in time diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 62dc371b..2515f323 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -35,7 +35,10 @@ module ieu ( // Execute Stage interface input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, - input logic FWriteIntE, + input logic FWriteIntE, + input logic IsFPE, + //input logic [1:0] FMemRWE, + input logic [`XLEN-1:0] FWriteDataE, output logic [`XLEN-1:0] PCTargetE, output logic MulDivE, W64E, output logic [2:0] Funct3E, @@ -43,9 +46,8 @@ module ieu ( // Memory stage interface input logic DataMisalignedM, input logic DataAccessFaultM, - input logic SquashSCW, input logic FWriteIntM, - input logic [`XLEN-1:0] FWriteDataM, + input logic [`XLEN-1:0] FIntResM, output logic [1:0] MemRWM, output logic [1:0] AtomicM, output logic [`XLEN-1:0] MemAdrM, WriteDataM, @@ -54,7 +56,7 @@ module ieu ( // Writeback stage input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, input logic FWriteIntW, - input logic [`XLEN-1:0] FPUResultW, + input logic SquashSCW, // input logic [`XLEN-1:0] PCLinkW, output logic InstrValidM, InstrValidW, // hazards @@ -82,6 +84,7 @@ module ieu ( logic RegWriteM, RegWriteW; logic MemReadE, CSRReadE; logic JumpE; + logic [1:0] MemRWE; controller c(.*); datapath dp(.*); diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index b32770b9..e8064bcc 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -86,22 +86,27 @@ module wallypipelinedhart ( logic PCSrcE; logic CSRWritePendingDEM; - logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD; + logic LoadStallD, MulDivStallD, CSRRdStallD; logic DivDoneE; logic DivBusyE; logic DivDoneW; - logic [4:0] SetFflagsM; - logic [2:0] FRM_REGW; - logic FloatRegWriteW; - logic [1:0] FMemRWM; logic RegWriteD; - logic [`XLEN-1:0] FWriteDataM; - logic SquashSCW; - logic FStallD; - logic FWriteIntE, FWriteIntW, FWriteIntM; - logic FDivBusyE; - logic IllegalFPUInstrD, IllegalFPUInstrE; - logic [`XLEN-1:0] FPUResultW; + logic SquashSCM, SquashSCW; + + // floating point unit signals + logic [2:0] FRM_REGW; + logic [1:0] FMemRWM, FMemRWE; + logic FStallD; + logic FWriteIntE, FWriteIntM, FWriteIntW; + logic [`XLEN-1:0] FWriteDataE; + logic [`XLEN-1:0] FIntResM; + logic FDivBusyE; + logic IsFPD, IsFPE; + logic IllegalFPUInstrD, IllegalFPUInstrE; + logic FloatRegWriteW; + logic FPUStallD; + logic [4:0] SetFflagsM; + logic [`XLEN-1:0] FPUResultW; // memory management unit signals logic ITLBWriteF, DTLBWriteM; @@ -160,13 +165,13 @@ module wallypipelinedhart ( ieu ieu(.*); // integer execution unit: integer register file, datapath and controller - mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); - lsu lsu(.MemRWM(MemRWM|FMemRWM), .WriteDataM(WriteDatatmpM),.*); // data cache unit + // mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); + lsu lsu(.*); // data cache unit ahblite ebu( //.InstrReadF(1'b0), //.InstrRData(InstrF), // hook up InstrF later - .WriteDataM(WriteDatatmpM), + .WriteDataM(WriteDataM), .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .Funct7M(InstrM[31:25]), .*); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 2b052dcd..11b8e562 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -539,8 +539,8 @@ string tests32f[] = '{ if (`M_SUPPORTED) tests = {tests, tests64m}; if (`A_SUPPORTED) tests = {tests, tests64a}; if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; - if (`D_SUPPORTED) tests = {tests64d, tests}; if (`F_SUPPORTED) tests = {tests64f, tests}; + if (`D_SUPPORTED) tests = {tests64d, tests}; end //tests = {tests64a, tests}; end else begin // RV32