From ea7c50e0ee68743b53997a655a4d74874e1ce4a4 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 16:38:47 -0700 Subject: [PATCH] Reordered fdivsqrtpreproc to follow logic --- src/fpu/fdivsqrt/fdivsqrt.sv | 6 ++-- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 6 ++-- src/fpu/fdivsqrt/fdivsqrtfsm.sv | 4 +-- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 53 ++++++++++++++++------------- 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index f4d46501..f7a44363 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -62,7 +62,7 @@ module fdivsqrt( logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection logic WZeroE; // Early termination flag - logic [`DURLEN-1:0] cycles; // FSM cycles + logic [`DURLEN-1:0] CyclesE; // FSM cycles logic SpecialCaseM; // Divide by zero, square root of negative, etc. logic DivStartE; // Enable signal for flops during stall @@ -76,7 +76,7 @@ module fdivsqrt( fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles, + .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .BZeroM, .nM, .mM, .AM, @@ -85,7 +85,7 @@ module fdivsqrt( fdivsqrtfsm fdivsqrtfsm( // FSM .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, - .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, // Int-specific .IDivStartE, .ISpecialCaseE, .IntDivE); diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 4025a30c..2e17cc25 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -33,7 +33,7 @@ module fdivsqrtcycles( input logic SqrtE, input logic IntDivE, input logic [`DIVBLEN:0] nE, - output logic [`DURLEN-1:0] cycles + output logic [`DURLEN-1:0] CyclesE ); logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits // DIVN = `NF+3 @@ -68,8 +68,8 @@ module fdivsqrtcycles( always_comb begin if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); - else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 5332087a..75010f74 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -39,7 +39,7 @@ module fdivsqrtfsm( input logic StallM, FlushE, input logic IntDivE, input logic ISpecialCaseE, - input logic [`DURLEN-1:0] cycles, + input logic [`DURLEN-1:0] CyclesE, output logic IFDivStartE, output logic FDivBusyE, FDivDoneE, output logic SpecialCaseM @@ -67,7 +67,7 @@ module fdivsqrtfsm( state <= #1 IDLE; end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE // end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE - step <= cycles; + step <= CyclesE; if (SpecialCaseE) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 04739ee8..a63fad82 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -43,7 +43,7 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, output logic ISpecialCaseE, - output logic [`DURLEN-1:0] cycles, + output logic [`DURLEN-1:0] CyclesE, output logic [`DIVBLEN:0] nM, mM, output logic NegQuotM, ALTBM, IntDivM, W64M, output logic AsM, BZeroM, @@ -62,6 +62,7 @@ module fdivsqrtpreproc ( logic NegQuotE; // Integer quotient is negative logic AsE, BsE; // Signs of integer inputs logic [`XLEN-1:0] AE; // input A after W64 adjustment + logic ALTBE; ////////////////////////////////////////////////////// // Integer Preprocessing @@ -113,13 +114,16 @@ module fdivsqrtpreproc ( assign XPreproc = (IFX << ell) << 1; assign DPreproc = (IFD << mE) << 1; + // *** CT: move to fdivsqrtintpreshift + ////////////////////////////////////////////////////// // Integer Right Shift to digit boundary + // Determine DivXShifted (X shifted to digit boundary) + // and nE (number of fractional digits) ////////////////////////////////////////////////////// if (`IDIV_ON_FPU) begin:intrightshift // Int Supported logic [`DIVBLEN:0] ZeroDiff, p; - logic ALTBE; // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros @@ -129,37 +133,24 @@ module fdivsqrtpreproc ( // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; - /* verilator lint_off WIDTH */ // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps if (`LOGRK > 0) begin // more than 1 bit per cycle logic [`LOGRK-1:0] IntTrunc, RightShiftX; logic [`DIVBLEN:0] TotalIntBits, IntSteps; - + /* verilator lint_off WIDTH */ assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting assign nE = p; assign DivXShifted = DivX; end - /* verilator lint_on WIDTH */ - // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); - flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); - flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); - flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); - flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); - flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); - flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); - if (`XLEN==64) - flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end else begin - assign X = PreShiftX; assign ISpecialCaseE = 0; end @@ -183,21 +174,35 @@ module fdivsqrtpreproc ( // Selet integer or floating-point operands ////////////////////////////////////////////////////// - mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + if (`IDIV_ON_FPU) begin + mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + end else begin + assign X = PreShiftX; + end // Divisior register flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); - - - - - // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles); + fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); + + if (`IDIV_ON_FPU) begin:intpipelineregs + // pipeline registers + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); + flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); + if (`XLEN==64) + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + end + endmodule