Merge pull request #266 from davidharrishmc/dev

FDivSqrt cleanup
This commit is contained in:
Ross Thompson 2023-04-21 20:23:23 -05:00 committed by GitHub
commit 884c3c22d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 85 additions and 57 deletions

View File

@ -138,10 +138,10 @@ module fctrl (
endcase endcase
7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)
ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass
else if (Funct3D == 3'b000 & Rs2D == 5'b00000) else if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt)
ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w / fmv.x.d to int register ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q fp to int register
7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) 7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt)
ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w.x / fmv.d.x to fp reg ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x int to fp reg
7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00)
ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h)
7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01)

View File

@ -62,7 +62,7 @@ module fdivsqrt(
logic [`DIVb+1:0] FirstC; // Step tracker logic [`DIVb+1:0] FirstC; // Step tracker
logic Firstun; // Quotient selection logic Firstun; // Quotient selection
logic WZeroE; // Early termination flag logic WZeroE; // Early termination flag
logic [`DURLEN-1:0] cycles; // FSM cycles logic [`DURLEN-1:0] CyclesE; // FSM cycles
logic SpecialCaseM; // Divide by zero, square root of negative, etc. logic SpecialCaseM; // Divide by zero, square root of negative, etc.
logic DivStartE; // Enable signal for flops during stall logic DivStartE; // Enable signal for flops during stall
@ -76,7 +76,7 @@ module fdivsqrt(
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles, .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
// Int-specific // Int-specific
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
.BZeroM, .nM, .mM, .AM, .BZeroM, .nM, .mM, .AM,
@ -85,7 +85,7 @@ module fdivsqrt(
fdivsqrtfsm fdivsqrtfsm( // FSM fdivsqrtfsm fdivsqrtfsm( // FSM
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles, .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
// Int-specific // Int-specific
.IDivStartE, .ISpecialCaseE, .IntDivE); .IDivStartE, .ISpecialCaseE, .IntDivE);

View File

@ -1,10 +1,10 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// fdivsqrt.sv // fdivsqrtcycles.sv
// //
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu
// Modified: 18 April 2022 // Modified: 18 April 2022
// //
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit // Purpose: Determine number of cycles for divsqrt
// //
// Documentation: RISC-V System on Chip Design Chapter 13 // Documentation: RISC-V System on Chip Design Chapter 13
// //
@ -33,7 +33,7 @@ module fdivsqrtcycles(
input logic SqrtE, input logic SqrtE,
input logic IntDivE, input logic IntDivE,
input logic [`DIVBLEN:0] nE, input logic [`DIVBLEN:0] nE,
output logic [`DURLEN-1:0] cycles output logic [`DURLEN-1:0] CyclesE
); );
logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
// DIVN = `NF+3 // DIVN = `NF+3
@ -68,8 +68,8 @@ module fdivsqrtcycles(
always_comb begin always_comb begin
if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
end end
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */

View File

@ -39,7 +39,7 @@ module fdivsqrtfsm(
input logic StallM, FlushE, input logic StallM, FlushE,
input logic IntDivE, input logic IntDivE,
input logic ISpecialCaseE, input logic ISpecialCaseE,
input logic [`DURLEN-1:0] cycles, input logic [`DURLEN-1:0] CyclesE,
output logic IFDivStartE, output logic IFDivStartE,
output logic FDivBusyE, FDivDoneE, output logic FDivBusyE, FDivDoneE,
output logic SpecialCaseM output logic SpecialCaseM
@ -67,7 +67,7 @@ module fdivsqrtfsm(
state <= #1 IDLE; state <= #1 IDLE;
end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE
// end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE // end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE
step <= cycles; step <= CyclesE;
if (SpecialCaseE) state <= #1 DONE; if (SpecialCaseE) state <= #1 DONE;
else state <= #1 BUSY; else state <= #1 BUSY;
end else if (state == BUSY) begin end else if (state == BUSY) begin

View File

@ -43,44 +43,49 @@ module fdivsqrtpreproc (
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic IntDivE, W64E, input logic IntDivE, W64E,
output logic ISpecialCaseE, output logic ISpecialCaseE,
output logic [`DURLEN-1:0] cycles, output logic [`DURLEN-1:0] CyclesE,
output logic [`DIVBLEN:0] nM, mM, output logic [`DIVBLEN:0] nM, mM,
output logic NegQuotM, ALTBM, IntDivM, W64M, output logic NegQuotM, ALTBM, IntDivM, W64M,
output logic AsM, BZeroM, output logic AsM, BZeroM,
output logic [`XLEN-1:0] AM output logic [`XLEN-1:0] AM
); );
logic [`DIVb-1:0] XPreproc, DPreproc; logic [`DIVb-1:0] Xfract, Dfract;
logic [`DIVb:0] PreSqrtX; logic [`DIVb:0] PreSqrtX;
logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
logic [`NE+1:0] QeE; // Quotient Exponent (FP only) logic [`NE+1:0] QeE; // Quotient Exponent (FP only)
logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
logic NumerZeroE; // Numerator is zero (X or A) logic NumerZeroE; // Numerator is zero (X or A)
logic AZeroE, BZeroE; // A or B is Zero for integer division logic AZeroE, BZeroE; // A or B is Zero for integer division
logic signedDiv; // signed division logic SignedDivE; // signed division
logic NegQuotE; // Integer quotient is negative logic NegQuotE; // Integer quotient is negative
logic AsE, BsE; // Signs of integer inputs logic AsE, BsE; // Signs of integer inputs
logic [`XLEN-1:0] AE; // input A after W64 adjustment logic [`XLEN-1:0] AE; // input A after W64 adjustment
logic ALTBE;
//////////////////////////////////////////////////////
// Integer Preprocessing
//////////////////////////////////////////////////////
if (`IDIV_ON_FPU) begin:intpreproc // Int Supported if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
logic [`XLEN-1:0] BE, PosA, PosB; logic [`XLEN-1:0] BE, PosA, PosB;
// Extract inputs, signs, zero, depending on W64 mode if applicable // Extract inputs, signs, zero, depending on W64 mode if applicable
assign signedDiv = ~Funct3E[0]; assign SignedDivE = ~Funct3E[0];
// Source handling // Source handling
if (`XLEN==64) begin // 64-bit, supports W64 if (`XLEN==64) begin // 64-bit, supports W64
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & signedDiv}}, ForwardedSrcAE[31:0]}, W64E, AE); mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & signedDiv}}, ForwardedSrcBE[31:0]}, W64E, BE); mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
end else begin // 32 bits only end else begin // 32 bits only
assign AE = ForwardedSrcAE; assign AE = ForwardedSrcAE;
assign BE = ForwardedSrcBE; assign BE = ForwardedSrcBE;
end end
assign AZeroE = ~(|AE); assign AZeroE = ~(|AE);
assign BZeroE = ~(|BE); assign BZeroE = ~(|BE);
assign AsE = AE[`XLEN-1] & signedDiv; assign AsE = AE[`XLEN-1] & SignedDivE;
assign BsE = BE[`XLEN-1] & signedDiv; assign BsE = BE[`XLEN-1] & SignedDivE;
assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
// Force integer inputs to be postiive // Force integer inputs to be postiive
@ -90,35 +95,35 @@ module fdivsqrtpreproc (
// Select integer or floating point inputs // Select integer or floating point inputs
mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX); mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX);
mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD); mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD);
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
end else begin // Int not supported end else begin // Int not supported
assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}}; assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}};
assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}}; assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}};
assign NumerZeroE = XZeroE;
end end
//////////////////////////////////////////////////////
// Integer & FP leading zero and normalization shift
//////////////////////////////////////////////////////
// count leading zeros for Subnorm FP and to normalize integer inputs // count leading zeros for Subnorm FP and to normalize integer inputs
lzc #(`DIVb) lzcX (IFX, ell); lzc #(`DIVb) lzcX (IFX, ell);
lzc #(`DIVb) lzcY (IFD, mE); lzc #(`DIVb) lzcY (IFD, mE);
// Normalization shift: shift off leading one // Normalization shift: shift off leading one
assign XPreproc = (IFX << ell) << 1; assign Xfract = (IFX << ell) << 1;
assign DPreproc = (IFD << mE) << 1; assign Dfract = (IFD << mE) << 1;
// append leading 1 (for nonzero inputs) // *** CT: move to fdivsqrtintpreshift
// shift square root to be in range [1/4, 1)
// Normalized numbers are shifted right by 1 if the exponent is odd
// Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
assign DivX = {3'b000, ~NumerZeroE, XPreproc};
// Divisior register //////////////////////////////////////////////////////
flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); // Integer Right Shift to digit boundary
// Determine DivXShifted (X shifted to digit boundary)
// and nE (number of fractional digits)
//////////////////////////////////////////////////////
// ***CT: factor out fdivsqrtcycles
if (`IDIV_ON_FPU) begin:intrightshift // Int Supported if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
logic [`DIVBLEN:0] ZeroDiff, p; logic [`DIVBLEN:0] ZeroDiff, p;
logic ALTBE;
// calculate number of fractional bits p // calculate number of fractional bits p
assign ZeroDiff = mE - ell; // Difference in number of leading zeros assign ZeroDiff = mE - ell; // Difference in number of leading zeros
@ -128,31 +133,68 @@ module fdivsqrtpreproc (
// Integer special cases (terminate immediately) // Integer special cases (terminate immediately)
assign ISpecialCaseE = BZeroE | ALTBE; assign ISpecialCaseE = BZeroE | ALTBE;
/* verilator lint_off WIDTH */
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
if (`LOGRK > 0) begin // more than 1 bit per cycle if (`LOGRK > 0) begin // more than 1 bit per cycle
logic [`LOGRK-1:0] IntTrunc, RightShiftX; logic [`LOGRK-1:0] IntTrunc, RightShiftX;
logic [`DIVBLEN:0] TotalIntBits, IntSteps; logic [`DIVBLEN:0] TotalIntBits, IntSteps;
/* verilator lint_off WIDTH */
assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator
assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div
assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits
assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
/* verilator lint_on WIDTH */
end else begin // radix 2 1 copy doesn't require shifting end else begin // radix 2 1 copy doesn't require shifting
assign nE = p; assign nE = p;
assign DivXShifted = DivX; assign DivXShifted = DivX;
end end
/* verilator lint_on WIDTH */ end else begin
assign ISpecialCaseE = 0;
end
// Selet integer or floating-point operands // CT *** fdivsqrtfplead1
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
//////////////////////////////////////////////////////
// Floating-Point Preprocessing
// append leading 1 (for nonzero inputs)
// shift square root to be in range [1/4, 1)
// Normalized numbers are shifted right by 1 if the exponent is odd
// Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
//////////////////////////////////////////////////////
assign DivX = {3'b000, ~NumerZeroE, Xfract};
// Sqrt is initialized on step one as R(X-1), so depends on Radix
mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
//////////////////////////////////////////////////////
// Selet integer or floating-point operands
//////////////////////////////////////////////////////
if (`IDIV_ON_FPU) begin
mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
end else begin
assign X = PreShiftX;
end
// Divisior register
flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
// Floating-point exponent
fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
// Number of FSM cycles (to FSM)
fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
if (`IDIV_ON_FPU) begin:intpipelineregs
// pipeline registers // pipeline registers
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
@ -162,21 +204,7 @@ module fdivsqrtpreproc (
flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM);
if (`XLEN==64) if (`XLEN==64)
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
end else begin
assign NumerZeroE = XZeroE;
assign X = PreShiftX;
end end
// Sqrt is initialized on step one as R(X-1), so depends on Radix
if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
// Floating-point exponent
fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
// Number of FSM cycles (to FSM)
fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles);
endmodule endmodule