fdiv is now parameterized using Lim's method.

This commit is contained in:
Ross Thompson 2023-05-26 14:25:14 -05:00
parent 81491e85e5
commit 29e0357f21
17 changed files with 271 additions and 302 deletions

View File

@ -26,15 +26,13 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrt import cvw::*; #(parameter cvw_t P) (
module fdivsqrt(
input logic clk, input logic clk,
input logic reset, input logic reset,
input logic [`FMTBITS-1:0] FmtE, input logic [P.FMTBITS-1:0] FmtE,
input logic XsE, input logic XsE,
input logic [`NF:0] XmE, YmE, input logic [P.NF:0] XmE, YmE,
input logic [`NE-1:0] XeE, YeE, input logic [P.NE-1:0] XeE, YeE,
input logic XInfE, YInfE, input logic XInfE, YInfE,
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE, input logic XNaNE, YNaNE,
@ -42,39 +40,39 @@ module fdivsqrt(
input logic StallM, input logic StallM,
input logic FlushE, input logic FlushE,
input logic SqrtE, SqrtM, input logic SqrtE, SqrtM,
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [2:0] Funct3E, Funct3M, input logic [2:0] Funct3E, Funct3M,
input logic IntDivE, W64E, input logic IntDivE, W64E,
output logic DivStickyM, output logic DivStickyM,
output logic FDivBusyE, IFDivStartE, FDivDoneE, output logic FDivBusyE, IFDivStartE, FDivDoneE,
output logic [`NE+1:0] QeM, output logic [P.NE+1:0] QeM,
output logic [`DIVb:0] QmM, output logic [P.DIVb:0] QmM,
output logic [`XLEN-1:0] FIntDivResultM output logic [P.XLEN-1:0] FIntDivResultM
); );
// Floating-point division and square root module, with optional integer division and remainder // Floating-point division and square root module, with optional integer division and remainder
// Computes X/Y, sqrt(X), A/B, or A%B // Computes X/Y, sqrt(X), A/B, or A%B
logic [`DIVb+3:0] WS, WC; // Partial remainder components logic [P.DIVb+3:0] WS, WC; // Partial remainder components
logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
logic [`DIVb+3:0] D; // Iterator Divisor logic [P.DIVb+3:0] D; // Iterator Divisor
logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
logic [`DIVb+1:0] FirstC; // Step tracker logic [P.DIVb+1:0] FirstC; // Step tracker
logic Firstun; // Quotient selection logic Firstun; // Quotient selection
logic WZeroE; // Early termination flag logic WZeroE; // Early termination flag
logic [`DURLEN-1:0] CyclesE; // FSM cycles logic [P.DURLEN-1:0] CyclesE; // FSM cycles
logic SpecialCaseM; // Divide by zero, square root of negative, etc. logic SpecialCaseM; // Divide by zero, square root of negative, etc.
logic DivStartE; // Enable signal for flops during stall logic DivStartE; // Enable signal for flops during stall
// Integer div/rem signals // Integer div/rem signals
logic BZeroM; // Denominator is zero logic BZeroM; // Denominator is zero
logic IntDivM; // Integer operation logic IntDivM; // Integer operation
logic [`DIVBLEN:0] nM, mM; // Shift amounts logic [P.DIVBLEN:0] nM, mM; // Shift amounts
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
logic ISpecialCaseE; // Integer div/remainder special cases logic ISpecialCaseE; // Integer div/remainder special cases
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
// Int-specific // Int-specific
@ -82,18 +80,18 @@ module fdivsqrt(
.BZeroM, .nM, .mM, .AM, .BZeroM, .nM, .mM, .AM,
.IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM); .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
fdivsqrtfsm fdivsqrtfsm( // FSM fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
// Int-specific // Int-specific
.IDivStartE, .ISpecialCaseE, .IntDivE); .IDivStartE, .ISpecialCaseE, .IntDivE);
fdivsqrtiter fdivsqrtiter( // CSA Iterator fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
.FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM, .SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
.QmM, .WZeroE, .DivStickyM, .QmM, .WZeroE, .DivStickyM,

View File

@ -26,51 +26,49 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] FmtE,
module fdivsqrtcycles(
input logic [`FMTBITS-1:0] FmtE,
input logic SqrtE, input logic SqrtE,
input logic IntDivE, input logic IntDivE,
input logic [`DIVBLEN:0] nE, input logic [P.DIVBLEN:0] nE,
output logic [`DURLEN-1:0] CyclesE output logic [P.DURLEN-1:0] CyclesE
); );
logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits
// DIVN = `NF+3 // DIVN = P.NF+3
// NS = NF + 1 // NS = NF + 1
// N = NS or NS+2 for div/sqrt. // N = NS or NS+2 for div/sqrt.
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
if (`FPSIZES == 1) if (P.FPSIZES == 1)
assign Nf = `NF; assign Nf = P.NF;
else if (`FPSIZES == 2) else if (P.FPSIZES == 2)
always_comb always_comb
case (FmtE) case (FmtE)
1'b0: Nf = `NF1; 1'b0: Nf = P.NF1;
1'b1: Nf = `NF; 1'b1: Nf = P.NF;
endcase endcase
else if (`FPSIZES == 3) else if (P.FPSIZES == 3)
always_comb always_comb
case (FmtE) case (FmtE)
`FMT: Nf = `NF; P.FMT: Nf = P.NF;
`FMT1: Nf = `NF1; P.FMT1: Nf = P.NF1;
`FMT2: Nf = `NF2; P.FMT2: Nf = P.NF2;
endcase endcase
else if (`FPSIZES == 4) else if (P.FPSIZES == 4)
always_comb always_comb
case(FmtE) case(FmtE)
`S_FMT: Nf = `S_NF; P.S_FMT: Nf = P.S_NF;
`D_FMT: Nf = `D_NF; P.D_FMT: Nf = P.D_NF;
`H_FMT: Nf = `H_NF; P.H_FMT: Nf = P.H_NF;
`Q_FMT: Nf = `Q_NF; P.Q_FMT: Nf = P.Q_NF;
endcase endcase
always_comb begin always_comb begin
if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
end end
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */
endmodule endmodule

View File

@ -26,49 +26,47 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt,
module fdivsqrtexpcalc( input logic [P.NE-1:0] Xe, Ye,
input logic [`FMTBITS-1:0] Fmt,
input logic [`NE-1:0] Xe, Ye,
input logic Sqrt, input logic Sqrt,
input logic XZero, input logic XZero,
input logic [`DIVBLEN:0] ell, m, input logic [P.DIVBLEN:0] ell, m,
output logic [`NE+1:0] Qe output logic [P.NE+1:0] Qe
); );
logic [`NE-2:0] Bias; logic [P.NE-2:0] Bias;
logic [`NE+1:0] SXExp; logic [P.NE+1:0] SXExp;
logic [`NE+1:0] SExp; logic [P.NE+1:0] SExp;
logic [`NE+1:0] DExp; logic [P.NE+1:0] DExp;
if (`FPSIZES == 1) begin if (P.FPSIZES == 1) begin
assign Bias = (`NE-1)'(`BIAS); assign Bias = (P.NE-1)'(P.BIAS);
end else if (`FPSIZES == 2) begin end else if (P.FPSIZES == 2) begin
assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
end else if (`FPSIZES == 3) begin end else if (P.FPSIZES == 3) begin
always_comb always_comb
case (Fmt) case (Fmt)
`FMT: Bias = (`NE-1)'(`BIAS); P.FMT: Bias = (P.NE-1)'(P.BIAS);
`FMT1: Bias = (`NE-1)'(`BIAS1); P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
`FMT2: Bias = (`NE-1)'(`BIAS2); P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
default: Bias = 'x; default: Bias = 'x;
endcase endcase
end else if (`FPSIZES == 4) begin end else if (P.FPSIZES == 4) begin
always_comb always_comb
case (Fmt) case (Fmt)
2'h3: Bias = (`NE-1)'(`Q_BIAS); 2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
2'h1: Bias = (`NE-1)'(`D_BIAS); 2'h1: Bias = (P.NE-1)'(P.D_BIAS);
2'h0: Bias = (`NE-1)'(`S_BIAS); 2'h0: Bias = (P.NE-1)'(P.S_BIAS);
2'h2: Bias = (`NE-1)'(`H_BIAS); 2'h2: Bias = (P.NE-1)'(P.H_BIAS);
endcase endcase
end end
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS); assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias}; assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
// correct exponent for subnormal input's normalization shifts // correct exponent for subnormal input's normalization shifts
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}); assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
assign Qe = Sqrt ? SExp : DExp; assign Qe = Sqrt ? SExp : DExp;
endmodule endmodule

View File

@ -26,14 +26,12 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) (
module fdivsqrtfgen2 (
input logic up, uz, input logic up, uz,
input logic [`DIVb+3:0] C, U, UM, input logic [P.DIVb+3:0] C, U, UM,
output logic [`DIVb+3:0] F output logic [P.DIVb+3:0] F
); );
logic [`DIVb+3:0] FP, FN, FZ; logic [P.DIVb+3:0] FP, FN, FZ;
// Generate for both positive and negative bits // Generate for both positive and negative bits
assign FP = ~(U << 1) & C; assign FP = ~(U << 1) & C;

View File

@ -26,14 +26,12 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) (
module fdivsqrtfgen4 (
input logic [3:0] udigit, input logic [3:0] udigit,
input logic [`DIVb+3:0] C, U, UM, input logic [P.DIVb+3:0] C, U, UM,
output logic [`DIVb+3:0] F output logic [P.DIVb+3:0] F
); );
logic [`DIVb+3:0] F2, F1, F0, FN1, FN2; logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2;
// Generate for both positive and negative bits // Generate for both positive and negative bits
assign F2 = (~U << 2) & (C << 2); assign F2 = (~U << 2) & (C << 2);
@ -49,4 +47,4 @@ module fdivsqrtfgen4 (
else if (udigit[1]) F = FN1; else if (udigit[1]) F = FN1;
else if (udigit[0]) F = FN2; else if (udigit[0]) F = FN2;
else F = F0; else F = F0;
endmodule endmodule

View File

@ -26,9 +26,7 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) (
module fdivsqrtfsm(
input logic clk, reset, input logic clk, reset,
input logic XInfE, YInfE, input logic XInfE, YInfE,
input logic XZeroE, YZeroE, input logic XZeroE, YZeroE,
@ -39,7 +37,7 @@ module fdivsqrtfsm(
input logic StallM, FlushE, input logic StallM, FlushE,
input logic IntDivE, input logic IntDivE,
input logic ISpecialCaseE, input logic ISpecialCaseE,
input logic [`DURLEN-1:0] CyclesE, input logic [P.DURLEN-1:0] CyclesE,
output logic IFDivStartE, output logic IFDivStartE,
output logic FDivBusyE, FDivDoneE, output logic FDivBusyE, FDivDoneE,
output logic SpecialCaseM output logic SpecialCaseM
@ -49,16 +47,16 @@ module fdivsqrtfsm(
statetype state; statetype state;
logic SpecialCaseE, FSpecialCaseE; logic SpecialCaseE, FSpecialCaseE;
logic [`DURLEN-1:0] step; logic [P.DURLEN-1:0] step;
// FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division // FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division
assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM; assign IFDivStartE = (FDivStartE | (IDivStartE & P.IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
assign FDivDoneE = (state == DONE); assign FDivDoneE = (state == DONE);
assign FDivBusyE = (state == BUSY) | IFDivStartE; assign FDivBusyE = (state == BUSY) | IFDivStartE;
// terminate immediately on special cases // terminate immediately on special cases
assign FSpecialCaseE = XZeroE | | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE; assign FSpecialCaseE = XZeroE | | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
if (`IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE; if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
else assign SpecialCaseE = FSpecialCaseE; else assign SpecialCaseE = FSpecialCaseE;
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
@ -78,4 +76,4 @@ module fdivsqrtfsm(
end end
end end
endmodule endmodule

View File

@ -26,38 +26,36 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
module fdivsqrtiter(
input logic clk, input logic clk,
input logic IFDivStartE, input logic IFDivStartE,
input logic FDivBusyE, input logic FDivBusyE,
input logic SqrtE, input logic SqrtE,
input logic [`DIVb+3:0] X, D, input logic [P.DIVb+3:0] X, D,
output logic [`DIVb:0] FirstU, FirstUM, output logic [P.DIVb:0] FirstU, FirstUM,
output logic [`DIVb+1:0] FirstC, output logic [P.DIVb+1:0] FirstC,
output logic Firstun, output logic Firstun,
output logic [`DIVb+3:0] FirstWS, FirstWC output logic [P.DIVb+3:0] FirstWS, FirstWC
); );
/* verilator lint_off UNOPTFLAT */ /* verilator lint_off UNOPTFLAT */
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b
logic [`DIVb+1:0] initC; // Q2.b logic [P.DIVb+1:0] initC; // Q2.b
logic [`DIVCOPIES-1:0] un; logic [P.DIVCOPIES-1:0] un;
logic [`DIVb+3:0] WSN, WCN; // Q4.b logic [P.DIVb+3:0] WSN, WCN; // Q4.b
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b
logic [`DIVb+1:0] NextC; logic [P.DIVb+1:0] NextC;
logic [`DIVb:0] UMux, UMMux; logic [P.DIVb:0] UMux, UMMux;
logic [`DIVb:0] initU, initUM; logic [P.DIVb:0] initU, initUM;
/* verilator lint_on UNOPTFLAT */ /* verilator lint_on UNOPTFLAT */
// Top Muxes and Registers // Top Muxes and Registers
@ -66,36 +64,36 @@ module fdivsqrtiter(
// are fed back for the next iteration. // are fed back for the next iteration.
// Residual WS/SC registers/initializaiton mux // Residual WS/SC registers/initializaiton mux
mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN); mux2 #(P.DIVb+4) wsmux(WS[P.DIVCOPIES], X, IFDivStartE, WSN);
mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN); mux2 #(P.DIVb+4) wcmux(WC[P.DIVCOPIES], '0, IFDivStartE, WCN);
flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]); flopen #(P.DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]); flopen #(P.DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
// UOTFC Result U and UM registers/initialization mux // UOTFC Result U and UM registers/initialization mux
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise
assign initU = {SqrtE, {(`DIVb){1'b0}}}; assign initU = {SqrtE, {(P.DIVb){1'b0}}};
assign initUM = {~SqrtE, {(`DIVb){1'b0}}}; assign initUM = {~SqrtE, {(P.DIVb){1'b0}}};
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux); mux2 #(P.DIVb+1) Umux(UNext[P.DIVCOPIES-1], initU, IFDivStartE, UMux);
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux); mux2 #(P.DIVb+1) UMmux(UMNext[P.DIVCOPIES-1], initUM, IFDivStartE, UMMux);
flopen #(`DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]); flopen #(P.DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]);
flopen #(`DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]); flopen #(P.DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);
// C register/initialization mux // C register/initialization mux
// Initialize C to -1 for sqrt and -R for division // Initialize C to -1 for sqrt and -R for division
logic [1:0] initCUpper; logic [1:0] initCUpper;
if(`RADIX == 4) begin if(P.RADIX == 4) begin
mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper); mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper);
end else begin end else begin
mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper); mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper);
end end
assign initC = {initCUpper, {`DIVb{1'b0}}}; assign initC = {initCUpper, {P.DIVb{1'b0}}};
mux2 #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC); mux2 #(P.DIVb+2) cmux(C[P.DIVCOPIES], initC, IFDivStartE, NextC);
flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]); flopen #(P.DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
// Divisor Selections // Divisor Selections
assign DBar = ~D; // for -D assign DBar = ~D; // for -D
if(`RADIX == 4) begin : d2 if(P.RADIX == 4) begin : d2
assign D2 = D << 1; // for 2D, only used in R4 assign D2 = D << 1; // for 2D, only used in R4
assign DBar2 = ~D2; // for -2D, only used in R4 assign DBar2 = ~D2; // for -2D, only used in R4
end end
@ -103,15 +101,15 @@ module fdivsqrtiter(
// k=DIVCOPIES of the recurrence logic // k=DIVCOPIES of the recurrence logic
genvar i; genvar i;
generate generate
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
if (`RADIX == 2) begin: stage if (P.RADIX == 2) begin: stage
fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE, fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end else begin: stage end else begin: stage
logic j1; logic j1;
assign j1 = (i == 0 & ~C[0][`DIVb-1]); assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1, fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end end

View File

@ -26,51 +26,49 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
module fdivsqrtpostproc(
input logic clk, reset, input logic clk, reset,
input logic StallM, input logic StallM,
input logic [`DIVb+3:0] WS, WC, input logic [P.DIVb+3:0] WS, WC,
input logic [`DIVb+3:0] D, input logic [P.DIVb+3:0] D,
input logic [`DIVb:0] FirstU, FirstUM, input logic [P.DIVb:0] FirstU, FirstUM,
input logic [`DIVb+1:0] FirstC, input logic [P.DIVb+1:0] FirstC,
input logic SqrtE, input logic SqrtE,
input logic Firstun, SqrtM, SpecialCaseM, NegQuotM, input logic Firstun, SqrtM, SpecialCaseM, NegQuotM,
input logic [`XLEN-1:0] AM, input logic [P.XLEN-1:0] AM,
input logic RemOpM, ALTBM, BZeroM, AsM, W64M, input logic RemOpM, ALTBM, BZeroM, AsM, W64M,
input logic [`DIVBLEN:0] nM, mM, input logic [P.DIVBLEN:0] nM, mM,
output logic [`DIVb:0] QmM, output logic [P.DIVb:0] QmM,
output logic WZeroE, output logic WZeroE,
output logic DivStickyM, output logic DivStickyM,
output logic [`XLEN-1:0] FIntDivResultM output logic [P.XLEN-1:0] FIntDivResultM
); );
logic [`DIVb+3:0] W, Sum; logic [P.DIVb+3:0] W, Sum;
logic [`DIVb:0] PreQmM; logic [P.DIVb:0] PreQmM;
logic NegStickyM; logic NegStickyM;
logic weq0E, WZeroM; logic weq0E, WZeroM;
logic [`XLEN-1:0] IntDivResultM; logic [P.XLEN-1:0] IntDivResultM;
////////////////////////// //////////////////////////
// Execute Stage: Detect early termination for an exact result // Execute Stage: Detect early termination for an exact result
////////////////////////// //////////////////////////
// check for early termination on an exact result. // check for early termination on an exact result.
aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0E); aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);
if (`RADIX == 2) begin: R2EarlyTerm if (P.RADIX == 2) begin: R2EarlyTerm
logic [`DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE; logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
logic [`DIVb+2:0] FirstK; logic [P.DIVb+2:0] FirstK;
logic wfeq0E; logic wfeq0E;
logic [`DIVb+3:0] WCF, WSF; logic [P.DIVb+3:0] WCF, WSF;
assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1)); assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
assign FZeroSqrtE = {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
assign FZeroDivE = D << 1; // F for divide assign FZeroDivE = D << 1; // F for divide
mux2 #(`DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE); mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero}; csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E); aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
assign WZeroE = weq0E|(wfeq0E & Firstun); assign WZeroE = weq0E|(wfeq0E & Firstun);
end else begin end else begin
assign WZeroE = weq0E; assign WZeroE = weq0E;
@ -91,27 +89,27 @@ module fdivsqrtpostproc(
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed. // Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed.
assign Sum = WC + WS; assign Sum = WC + WS;
assign NegStickyM = Sum[`DIVb+3]; assign NegStickyM = Sum[P.DIVb+3];
mux2 #(`DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
mux2 #(`DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM); mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
// Integer quotient or remainder correctoin, normalization, and special cases // Integer quotient or remainder correctoin, normalization, and special cases
if (`IDIV_ON_FPU) begin:intpostproc // Int supported if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
logic [`DIVBLEN:0] NormShiftM; logic [P.DIVBLEN:0] NormShiftM;
logic [`DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM; logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
logic signed [`DIVb+3:0] PreResultM, PreIntResultM; logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
assign W = $signed(Sum) >>> `LOGR; assign W = $signed(Sum) >>> P.LOGR;
assign UnsignedQuotM = {3'b000, PreQmM}; assign UnsignedQuotM = {3'b000, PreQmM};
// Integer remainder: sticky and sign correction muxes // Integer remainder: sticky and sign correction muxes
mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM); mux2 #(P.DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); mux2 #(P.DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM); mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
// Select quotient or remainder and do normalization shift // Select quotient or remainder and do normalization shift
mux2 #(`DIVBLEN+1) normshiftmux(((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))), (mM + (`DIVBLEN+1)'(`DIVa)), RemOpM, NormShiftM); mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM);
mux2 #(`DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM); mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
assign PreIntResultM = $signed(PreResultM >>> NormShiftM); assign PreIntResultM = $signed(PreResultM >>> NormShiftM);
// special case logic // special case logic
@ -119,18 +117,18 @@ module fdivsqrtpostproc(
always_comb always_comb
if (BZeroM) begin // Divide by zero if (BZeroM) begin // Divide by zero
if (RemOpM) IntDivResultM = AM; if (RemOpM) IntDivResultM = AM;
else IntDivResultM = {(`XLEN){1'b1}}; else IntDivResultM = {(P.XLEN){1'b1}};
end else if (ALTBM) begin // Numerator is zero end else if (ALTBM) begin // Numerator is zero
if (RemOpM) IntDivResultM = AM; if (RemOpM) IntDivResultM = AM;
else IntDivResultM = '0; else IntDivResultM = '0;
end else IntDivResultM = PreIntResultM[`XLEN-1:0]; end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
// sign extend result for W64 // sign extend result for W64
if (`XLEN==64) begin if (P.XLEN==64) begin
mux2 #(64) resmux(IntDivResultM[`XLEN-1:0], mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0],
{{(`XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64 {{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
W64M, FIntDivResultM); W64M, FIntDivResultM);
end else end else
assign FIntDivResultM = IntDivResultM[`XLEN-1:0]; assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
end end
endmodule endmodule

View File

@ -26,56 +26,54 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
module fdivsqrtpreproc (
input logic clk, input logic clk,
input logic IFDivStartE, input logic IFDivStartE,
input logic [`NF:0] Xm, Ym, input logic [P.NF:0] Xm, Ym,
input logic [`NE-1:0] Xe, Ye, input logic [P.NE-1:0] Xe, Ye,
input logic [`FMTBITS-1:0] FmtE, input logic [P.FMTBITS-1:0] FmtE,
input logic SqrtE, input logic SqrtE,
input logic XZeroE, input logic XZeroE,
input logic [2:0] Funct3E, input logic [2:0] Funct3E,
output logic [`NE+1:0] QeM, output logic [P.NE+1:0] QeM,
output logic [`DIVb+3:0] X, D, output logic [P.DIVb+3:0] X, D,
// Int-specific // Int-specific
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic IntDivE, W64E, input logic IntDivE, W64E,
output logic ISpecialCaseE, output logic ISpecialCaseE,
output logic [`DURLEN-1:0] CyclesE, output logic [P.DURLEN-1:0] CyclesE,
output logic [`DIVBLEN:0] nM, mM, output logic [P.DIVBLEN:0] nM, mM,
output logic NegQuotM, ALTBM, IntDivM, W64M, output logic NegQuotM, ALTBM, IntDivM, W64M,
output logic AsM, BZeroM, output logic AsM, BZeroM,
output logic [`XLEN-1:0] AM output logic [P.XLEN-1:0] AM
); );
logic [`DIVb-1:0] Xfract, Dfract; logic [P.DIVb-1:0] Xfract, Dfract;
logic [`DIVb:0] PreSqrtX; logic [P.DIVb:0] PreSqrtX;
logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
logic [`NE+1:0] QeE; // Quotient Exponent (FP only) logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
logic NumerZeroE; // Numerator is zero (X or A) logic NumerZeroE; // Numerator is zero (X or A)
logic AZeroE, BZeroE; // A or B is Zero for integer division logic AZeroE, BZeroE; // A or B is Zero for integer division
logic SignedDivE; // signed division logic SignedDivE; // signed division
logic NegQuotE; // Integer quotient is negative logic NegQuotE; // Integer quotient is negative
logic AsE, BsE; // Signs of integer inputs logic AsE, BsE; // Signs of integer inputs
logic [`XLEN-1:0] AE; // input A after W64 adjustment logic [P.XLEN-1:0] AE; // input A after W64 adjustment
logic ALTBE; logic ALTBE;
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// Integer Preprocessing // Integer Preprocessing
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
if (`IDIV_ON_FPU) begin:intpreproc // Int Supported if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
logic [`XLEN-1:0] BE, PosA, PosB; logic [P.XLEN-1:0] BE, PosA, PosB;
// Extract inputs, signs, zero, depending on W64 mode if applicable // Extract inputs, signs, zero, depending on W64 mode if applicable
assign SignedDivE = ~Funct3E[0]; assign SignedDivE = ~Funct3E[0];
// Source handling // Source handling
if (`XLEN==64) begin // 64-bit, supports W64 if (P.XLEN==64) begin // 64-bit, supports W64
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE); mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE); mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
end else begin // 32 bits only end else begin // 32 bits only
@ -84,21 +82,21 @@ module fdivsqrtpreproc (
end end
assign AZeroE = ~(|AE); assign AZeroE = ~(|AE);
assign BZeroE = ~(|BE); assign BZeroE = ~(|BE);
assign AsE = AE[`XLEN-1] & SignedDivE; assign AsE = AE[P.XLEN-1] & SignedDivE;
assign BsE = BE[`XLEN-1] & SignedDivE; assign BsE = BE[P.XLEN-1] & SignedDivE;
assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
// Force integer inputs to be postiive // Force integer inputs to be postiive
mux2 #(`XLEN) posamux(AE, -AE, AsE, PosA); mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
mux2 #(`XLEN) posbmux(BE, -BE, BsE, PosB); mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
// Select integer or floating point inputs // Select integer or floating point inputs
mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX); mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD); mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
end else begin // Int not supported end else begin // Int not supported
assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}}; assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}}; assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
assign NumerZeroE = XZeroE; assign NumerZeroE = XZeroE;
end end
@ -107,8 +105,8 @@ module fdivsqrtpreproc (
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// count leading zeros for Subnorm FP and to normalize integer inputs // count leading zeros for Subnorm FP and to normalize integer inputs
lzc #(`DIVb) lzcX (IFX, ell); lzc #(P.DIVb) lzcX (IFX, ell);
lzc #(`DIVb) lzcY (IFD, mE); lzc #(P.DIVb) lzcY (IFD, mE);
// Normalization shift: shift off leading one // Normalization shift: shift off leading one
assign Xfract = (IFX << ell) << 1; assign Xfract = (IFX << ell) << 1;
@ -122,28 +120,28 @@ module fdivsqrtpreproc (
// and nE (number of fractional digits) // and nE (number of fractional digits)
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
if (`IDIV_ON_FPU) begin:intrightshift // Int Supported if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
logic [`DIVBLEN:0] ZeroDiff, p; logic [P.DIVBLEN:0] ZeroDiff, p;
// calculate number of fractional bits p // calculate number of fractional bits p
assign ZeroDiff = mE - ell; // Difference in number of leading zeros assign ZeroDiff = mE - ell; // Difference in number of leading zeros
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B (A has more leading zeros) assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
mux2 #(`DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
// Integer special cases (terminate immediately) // Integer special cases (terminate immediately)
assign ISpecialCaseE = BZeroE | ALTBE; assign ISpecialCaseE = BZeroE | ALTBE;
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
if (`LOGRK > 0) begin // more than 1 bit per cycle if (P.LOGRK > 0) begin // more than 1 bit per cycle
logic [`LOGRK-1:0] IntTrunc, RightShiftX; logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
logic [`DIVBLEN:0] TotalIntBits, IntSteps; logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */
end else begin // radix 2 1 copy doesn't require shifting end else begin // radix 2 1 copy doesn't require shifting
@ -167,42 +165,42 @@ module fdivsqrtpreproc (
assign DivX = {3'b000, ~NumerZeroE, Xfract}; assign DivX = {3'b000, ~NumerZeroE, Xfract};
// Sqrt is initialized on step one as R(X-1), so depends on Radix // Sqrt is initialized on step one as R(X-1), so depends on Radix
mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// Selet integer or floating-point operands // Selet integer or floating-point operands
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
if (`IDIV_ON_FPU) begin if (P.IDIV_ON_FPU) begin
mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
end else begin end else begin
assign X = PreShiftX; assign X = PreShiftX;
end end
// Divisior register // Divisior register
flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D); flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
// Floating-point exponent // Floating-point exponent
fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
// Number of FSM cycles (to FSM) // Number of FSM cycles (to FSM)
fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
if (`IDIV_ON_FPU) begin:intpipelineregs if (P.IDIV_ON_FPU) begin:intpipelineregs
// pipeline registers // pipeline registers
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
if (`XLEN==64) if (P.XLEN==64)
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
end end

View File

@ -26,8 +26,6 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtqsel2 ( module fdivsqrtqsel2 (
input logic [3:0] ps, pc, input logic [3:0] ps, pc,
output logic up, uz, un output logic up, uz, un

View File

@ -26,8 +26,6 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtqsel4 ( module fdivsqrtqsel4 (
input logic [2:0] Dmsbs, input logic [2:0] Dmsbs,
input logic [4:0] Smsbs, input logic [4:0] Smsbs,

View File

@ -26,8 +26,6 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtqsel4cmp ( module fdivsqrtqsel4cmp (
input logic [2:0] Dmsbs, input logic [2:0] Dmsbs,
input logic [4:0] Smsbs, input logic [4:0] Smsbs,

View File

@ -26,27 +26,26 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off UNOPTFLAT */ /* verilator lint_off UNOPTFLAT */
module fdivsqrtstage2 ( module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
input logic [`DIVb+3:0] D, DBar, input logic [P.DIVb+3:0] D, DBar,
input logic [`DIVb:0] U, UM, input logic [P.DIVb:0] U, UM,
input logic [`DIVb+3:0] WS, WC, input logic [P.DIVb+3:0] WS, WC,
input logic [`DIVb+1:0] C, input logic [P.DIVb+1:0] C,
input logic SqrtE, input logic SqrtE,
output logic un, output logic un,
output logic [`DIVb+1:0] CNext, output logic [P.DIVb+1:0] CNext,
output logic [`DIVb:0] UNext, UMNext, output logic [P.DIVb:0] UNext, UMNext,
output logic [`DIVb+3:0] WSNext, WCNext output logic [P.DIVb+3:0] WSNext, WCNext
); );
/* verilator lint_on UNOPTFLAT */ /* verilator lint_on UNOPTFLAT */
logic [`DIVb+3:0] Dsel; logic [P.DIVb+3:0] Dsel;
logic up, uz; logic up, uz;
logic [`DIVb+3:0] F; logic [P.DIVb+3:0] F;
logic [`DIVb+3:0] AddIn; logic [P.DIVb+3:0] AddIn;
logic [`DIVb+3:0] WSA, WCA; logic [P.DIVb+3:0] WSA, WCA;
// Qmient Selection logic // Qmient Selection logic
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un) // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
@ -56,10 +55,10 @@ module fdivsqrtstage2 (
// 0000 = 0 // 0000 = 0
// 0010 = -1 // 0010 = -1
// 0001 = -2 // 0001 = -2
fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un); fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un);
// Sqrt F generation. Extend C, U, UM to Q4.k // Sqrt F generation. Extend C, U, UM to Q4.k
fdivsqrtfgen2 fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
// Divisor multiple // Divisor multiple
always_comb always_comb
@ -69,16 +68,16 @@ module fdivsqrtstage2 (
// Partial Product Generation // Partial Product Generation
// WSA, WCA = WS + WC - qD // WSA, WCA = WS + WC - qD
mux2 #(`DIVb+4) addinmux(Dsel, F, SqrtE, AddIn); mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA); csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
assign WSNext = WSA << 1; assign WSNext = WSA << 1;
assign WCNext = WCA << 1; assign WCNext = WCA << 1;
// Shift thermometer code C // Shift thermometer code C
assign CNext = {1'b1, C[`DIVb+1:1]}; assign CNext = {1'b1, C[P.DIVb+1:1]};
// Unified On-The-Fly Converter to accumulate result // Unified On-The-Fly Converter to accumulate result
fdivsqrtuotfc2 uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext); fdivsqrtuotfc2 #(P) uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
endmodule endmodule

View File

@ -26,29 +26,27 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
input logic [P.DIVb+3:0] D, DBar, D2, DBar2,
module fdivsqrtstage4 ( input logic [P.DIVb:0] U,UM,
input logic [`DIVb+3:0] D, DBar, D2, DBar2, input logic [P.DIVb+3:0] WS, WC,
input logic [`DIVb:0] U,UM, input logic [P.DIVb+1:0] C,
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb+1:0] C,
input logic SqrtE, j1, input logic SqrtE, j1,
output logic [`DIVb+1:0] CNext, output logic [P.DIVb+1:0] CNext,
output logic un, output logic un,
output logic [`DIVb:0] UNext, UMNext, output logic [P.DIVb:0] UNext, UMNext,
output logic [`DIVb+3:0] WSNext, WCNext output logic [P.DIVb+3:0] WSNext, WCNext
); );
logic [`DIVb+3:0] Dsel; logic [P.DIVb+3:0] Dsel;
logic [3:0] udigit; logic [3:0] udigit;
logic [`DIVb+3:0] F; logic [P.DIVb+3:0] F;
logic [`DIVb+3:0] AddIn; logic [P.DIVb+3:0] AddIn;
logic [4:0] Smsbs; logic [4:0] Smsbs;
logic [2:0] Dmsbs; logic [2:0] Dmsbs;
logic [7:0] WCmsbs, WSmsbs; logic [7:0] WCmsbs, WSmsbs;
logic CarryIn; logic CarryIn;
logic [`DIVb+3:0] WSA, WCA; logic [P.DIVb+3:0] WSA, WCA;
// Digit Selection logic // Digit Selection logic
// u encoding: // u encoding:
@ -57,16 +55,16 @@ module fdivsqrtstage4 (
// 0000 = 0 // 0000 = 0
// 0010 = -1 // 0010 = -1
// 0001 = -2 // 0001 = -2
assign Smsbs = U[`DIVb:`DIVb-4]; assign Smsbs = U[P.DIVb:P.DIVb-4];
assign Dmsbs = D[`DIVb-1:`DIVb-3]; assign Dmsbs = D[P.DIVb-1:P.DIVb-3];
assign WCmsbs = WC[`DIVb+3:`DIVb-4]; assign WCmsbs = WC[P.DIVb+3:P.DIVb-4];
assign WSmsbs = WS[`DIVb+3:`DIVb-4]; assign WSmsbs = WS[P.DIVb+3:P.DIVb-4];
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit); fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
assign un = 1'b0; // unused for radix 4 assign un = 1'b0; // unused for radix 4
// F generation logic // F generation logic
fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); fdivsqrtfgen4 #(P) fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
// Divisor multiple logic // Divisor multiple logic
always_comb always_comb
@ -83,15 +81,15 @@ module fdivsqrtstage4 (
// {WS, WC}}Next = (WS + WC - qD or F) << 2 // {WS, WC}}Next = (WS + WC - qD or F) << 2
assign AddIn = SqrtE ? F : Dsel; assign AddIn = SqrtE ? F : Dsel;
assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D
csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); csa #(P.DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
assign WSNext = WSA << 2; assign WSNext = WSA << 2;
assign WCNext = WCA << 2; assign WCNext = WCA << 2;
// Shift thermometer code C // Shift thermometer code C
assign CNext = {2'b11, C[`DIVb+1:2]}; assign CNext = {2'b11, C[P.DIVb+1:2]};
// On-the-fly converter to accumulate result // On-the-fly converter to accumulate result
fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); fdivsqrtuotfc4 #(P) fdivsqrtuotfc4(.udigit, .C(CNext[P.DIVb:0]), .U, .UM, .UNext, .UMNext);
endmodule endmodule

View File

@ -26,22 +26,20 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
/////////////////////////////// ///////////////////////////////
// Unified OTFC, Radix 2 // // Unified OTFC, Radix 2 //
/////////////////////////////// ///////////////////////////////
module fdivsqrtuotfc2( module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) (
input logic up, un, input logic up, un,
input logic [`DIVb+1:0] C, input logic [P.DIVb+1:0] C,
input logic [`DIVb:0] U, UM, input logic [P.DIVb:0] U, UM,
output logic [`DIVb:0] UNext, UMNext output logic [P.DIVb:0] UNext, UMNext
); );
// The on-the-fly converter transfers the divsqrt // The on-the-fly converter transfers the divsqrt
// bits to the quotient as they come. // bits to the quotient as they come.
logic [`DIVb:0] K; logic [P.DIVb:0] K;
assign K = (C[`DIVb:0] & ~(C[`DIVb:0] << 1)); // Thermometer to one hot encoding assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
always_comb begin always_comb begin
if (up) begin if (up) begin

View File

@ -26,19 +26,17 @@
// and limitations under the License. // and limitations under the License.
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh" module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) (
module fdivsqrtuotfc4(
input logic [3:0] udigit, input logic [3:0] udigit,
input logic [`DIVb:0] U, UM, input logic [P.DIVb:0] U, UM,
input logic [`DIVb:0] C, input logic [P.DIVb:0] C,
output logic [`DIVb:0] UNext, UMNext output logic [P.DIVb:0] UNext, UMNext
); );
// The on-the-fly converter transfers the square root // The on-the-fly converter transfers the square root
// bits to the quotient as they come. // bits to the quotient as they come.
// Use this otfc for division and square root. // Use this otfc for division and square root.
logic [`DIVb:0] K1, K2, K3; logic [P.DIVb:0] K1, K2, K3;
assign K1 = (C&~(C << 1)); // K assign K1 = (C&~(C << 1)); // K
assign K2 = ((C << 1)&~(C << 2)); // 2K assign K2 = ((C << 1)&~(C << 2)); // 2K
assign K3 = (C & ~(C << 2)); // 3K assign K3 = (C & ~(C << 2)); // 3K

View File

@ -238,7 +238,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE)); .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE));
// divide and square root: fdiv, fsqrt, optionally integer division // divide and square root: fdiv, fsqrt, optionally integer division
fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,