From ccbad6749748a73ad9eb8afb190fb8831819eaba Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 22 Dec 2022 16:25:37 +0000 Subject: [PATCH] Added negative-result int diviison support in U and UM registers. 13 tests pass! --- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/src/fpu/fdivsqrt/fdivsqrt.sv | 8 ++++---- pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv | 8 ++++---- pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 13 +++++-------- pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 4 ++-- 5 files changed, 16 insertions(+), 19 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 2c53d1e4..e6726eef 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -120,7 +120,7 @@ `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) `define RK (`DIVCOPIES*`LOGR) // r*k used for intdiv preproc `define LOGK ($clog2(`DIVCOPIES)) -`define LOGRK ($clog2(`RADIX*`DIVCOPIES)) // log2(R*k) +`define LOGRK ($clog2(`RK)) // log2(r*k) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) // one iteration is required for the integer bit for minimally redundent radix-4 `define FPDUR ((`DIVN+1+(`LOGR*`DIVCOPIES))/(`LOGR*`DIVCOPIES)+(`RADIX/4)) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 86993a7d..b4c4964d 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -67,14 +67,14 @@ module fdivsqrt( logic WZeroM, AZeroM, BZeroM, AZeroE, BZeroE; logic SpecialCaseM; logic [`DIVBLEN:0] nE, nM, mM; - logic OTFCSwapE, ALTBM, As; + logic CalcOTFCSwapE, OTFCSwapE, ALTBM, As; logic DivStartE; logic [`XLEN-1:0] ForwardedSrcAM; fdivsqrtpreproc fdivsqrtpreproc( .clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Ym(YmE), .XZeroE, .X, .DPreproc, .ForwardedSrcAM, - .nE, .nM, .mM, .OTFCSwapE, .ALTBM, .AZeroM, .BZeroM, .AZeroE, .BZeroE, .As, + .nE, .nM, .mM, .CalcOTFCSwapE, .OTFCSwapE, .ALTBM, .AZeroM, .BZeroM, .AZeroE, .BZeroE, .As, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, .nE, @@ -85,11 +85,11 @@ module fdivsqrt( fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .MDUE, .SqrtE, // .SqrtM, .X,.DPreproc, .FirstWS(WS), .FirstWC(WC), - .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwapE, + .IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .CalcOTFCSwapE, .OTFCSwapE, .FDivBusyE); fdivsqrtpostproc fdivsqrtpostproc( .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .RemOpM(Funct3M[1]), .ForwardedSrcAM, - .nM, .ALTBM, .mM, .BZeroM, .As, + .nM, .ALTBM, .mM, .BZeroM, .As, .OTFCSwapEM(OTFCSwapE), .QmM, .WZeroM, .DivSM, .FPIntDivResultM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index 0d835811..75145e55 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -38,7 +38,7 @@ module fdivsqrtiter( input logic XZeroE, YZeroE, input logic SqrtE, MDUE, // input logic SqrtM, - input logic OTFCSwapE, + input logic CalcOTFCSwapE, OTFCSwapE, input logic [`DIVb+3:0] X, input logic [`DIVb-1:0] DPreproc, output logic [`DIVb-1:0] D, @@ -81,9 +81,9 @@ module fdivsqrtiter( flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]); // UOTFC Result U and UM registers/initialization mux - // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division - assign initU = (SqrtE & ~(MDUE)) ? {1'b1, {(`DIVb){1'b0}}} : 0; - assign initUM = (SqrtE & ~(MDUE)) ? 0 : {1'b1, {(`DIVb){1'b0}}}; + // Initialize U to 1.0 and UM to 0 for square root or negative-result int division; U to 0 and UM to -1 otherwise + assign initU = ((MDUE & CalcOTFCSwapE) | (SqrtE & ~(MDUE))) ? {1'b1, {(`DIVb){1'b0}}} : 0; + assign initUM = ((MDUE & CalcOTFCSwapE) | (SqrtE & ~(MDUE))) ? 0 : {1'b1, {(`DIVb){1'b0}}}; mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux); mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux); flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]); diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 87bb47d6..5f914298 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -35,9 +35,7 @@ module fdivsqrtpostproc( input logic [`DIVb-1:0] D, input logic [`DIVb:0] FirstU, FirstUM, input logic [`DIVb+1:0] FirstC, - input logic Firstun, - input logic SqrtM, - input logic SpecialCaseM, + input logic Firstun, SqrtM, SpecialCaseM, OTFCSwapEM, input logic [`XLEN-1:0] ForwardedSrcAM, input logic RemOpM, ALTBM, BZeroM, As, input logic [`DIVBLEN:0] nM, mM, @@ -54,7 +52,7 @@ module fdivsqrtpostproc( logic [`DIVBLEN:0] NormShiftM; logic [`DIVb:0] IntQuotM, NormQuotM; logic [`DIVb+3:0] IntRemM, NormRemM; - logic [`DIVb+3:0] PreResultM, PreFPIntDivResultM; + logic signed [`DIVb+3:0] PreResultM, PreFPIntDivResultM; // check for early termination on an exact result. If the result is not exact, the sticky should be set aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0); @@ -130,11 +128,10 @@ module fdivsqrtpostproc( NormShiftM = (mM + (`DIVBLEN+1)'(`DIVa)); PreResultM = IntRemM; end else begin - if (BZeroM) begin - NormShiftM = 0; + NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); + if (BZeroM | (~ALTBM & OTFCSwapEM)) begin PreResultM = {3'b111, IntQuotM}; end else begin - NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))); PreResultM = {3'b000, IntQuotM}; end //PreResultM = {IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM[`DIVb], IntQuotM}; // Suspicious Sign Extender @@ -143,7 +140,7 @@ module fdivsqrtpostproc( // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted - assign PreFPIntDivResultM = ($signed(PreResultM) >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)}; + assign PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM) + {{(`DIVb+3){1'b0}}, (PostIncM & ~RemOpM)}; assign FPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0]; assign PreQmM = NegStickyM ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 359bb0c8..0bd3fae0 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -42,7 +42,7 @@ module fdivsqrtpreproc ( input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, output logic [`DIVBLEN:0] nE, nM, mM, - output logic OTFCSwapE, ALTBM, As, AZeroM, BZeroM, AZeroE, BZeroE, + output logic CalcOTFCSwapE, OTFCSwapE, ALTBM, As, AZeroM, BZeroM, AZeroE, BZeroE, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, output logic [`DIVb-1:0] DPreproc, @@ -56,7 +56,7 @@ module fdivsqrtpreproc ( // Intdiv signals logic [`DIVb-1:0] IFNormLenX, IFNormLenD; logic [`XLEN-1:0] PosA, PosB; - logic Bs, CalcOTFCSwapE, ALTBE; + logic Bs, ALTBE; logic [`XLEN-1:0] A64, B64; logic [`DIVBLEN:0] mE; logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;