mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
fdiv is now parameterized using Lim's method.
This commit is contained in:
parent
81491e85e5
commit
29e0357f21
@ -26,15 +26,13 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
||||||
|
|
||||||
module fdivsqrt(
|
|
||||||
input logic clk,
|
input logic clk,
|
||||||
input logic reset,
|
input logic reset,
|
||||||
input logic [`FMTBITS-1:0] FmtE,
|
input logic [P.FMTBITS-1:0] FmtE,
|
||||||
input logic XsE,
|
input logic XsE,
|
||||||
input logic [`NF:0] XmE, YmE,
|
input logic [P.NF:0] XmE, YmE,
|
||||||
input logic [`NE-1:0] XeE, YeE,
|
input logic [P.NE-1:0] XeE, YeE,
|
||||||
input logic XInfE, YInfE,
|
input logic XInfE, YInfE,
|
||||||
input logic XZeroE, YZeroE,
|
input logic XZeroE, YZeroE,
|
||||||
input logic XNaNE, YNaNE,
|
input logic XNaNE, YNaNE,
|
||||||
@ -42,39 +40,39 @@ module fdivsqrt(
|
|||||||
input logic StallM,
|
input logic StallM,
|
||||||
input logic FlushE,
|
input logic FlushE,
|
||||||
input logic SqrtE, SqrtM,
|
input logic SqrtE, SqrtM,
|
||||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||||
input logic [2:0] Funct3E, Funct3M,
|
input logic [2:0] Funct3E, Funct3M,
|
||||||
input logic IntDivE, W64E,
|
input logic IntDivE, W64E,
|
||||||
output logic DivStickyM,
|
output logic DivStickyM,
|
||||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||||
output logic [`NE+1:0] QeM,
|
output logic [P.NE+1:0] QeM,
|
||||||
output logic [`DIVb:0] QmM,
|
output logic [P.DIVb:0] QmM,
|
||||||
output logic [`XLEN-1:0] FIntDivResultM
|
output logic [P.XLEN-1:0] FIntDivResultM
|
||||||
);
|
);
|
||||||
|
|
||||||
// Floating-point division and square root module, with optional integer division and remainder
|
// Floating-point division and square root module, with optional integer division and remainder
|
||||||
// Computes X/Y, sqrt(X), A/B, or A%B
|
// Computes X/Y, sqrt(X), A/B, or A%B
|
||||||
|
|
||||||
logic [`DIVb+3:0] WS, WC; // Partial remainder components
|
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
|
||||||
logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||||
logic [`DIVb+3:0] D; // Iterator Divisor
|
logic [P.DIVb+3:0] D; // Iterator Divisor
|
||||||
logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values
|
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||||
logic [`DIVb+1:0] FirstC; // Step tracker
|
logic [P.DIVb+1:0] FirstC; // Step tracker
|
||||||
logic Firstun; // Quotient selection
|
logic Firstun; // Quotient selection
|
||||||
logic WZeroE; // Early termination flag
|
logic WZeroE; // Early termination flag
|
||||||
logic [`DURLEN-1:0] CyclesE; // FSM cycles
|
logic [P.DURLEN-1:0] CyclesE; // FSM cycles
|
||||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||||
logic DivStartE; // Enable signal for flops during stall
|
logic DivStartE; // Enable signal for flops during stall
|
||||||
|
|
||||||
// Integer div/rem signals
|
// Integer div/rem signals
|
||||||
logic BZeroM; // Denominator is zero
|
logic BZeroM; // Denominator is zero
|
||||||
logic IntDivM; // Integer operation
|
logic IntDivM; // Integer operation
|
||||||
logic [`DIVBLEN:0] nM, mM; // Shift amounts
|
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
||||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||||
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor
|
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||||
|
|
||||||
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
|
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
|
||||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||||
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
|
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
|
||||||
// Int-specific
|
// Int-specific
|
||||||
@ -82,18 +80,18 @@ module fdivsqrt(
|
|||||||
.BZeroM, .nM, .mM, .AM,
|
.BZeroM, .nM, .mM, .AM,
|
||||||
.IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
|
.IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
|
||||||
|
|
||||||
fdivsqrtfsm fdivsqrtfsm( // FSM
|
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
|
||||||
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
|
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
|
||||||
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
|
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
|
||||||
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
|
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
|
||||||
// Int-specific
|
// Int-specific
|
||||||
.IDivStartE, .ISpecialCaseE, .IntDivE);
|
.IDivStartE, .ISpecialCaseE, .IntDivE);
|
||||||
|
|
||||||
fdivsqrtiter fdivsqrtiter( // CSA Iterator
|
fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator
|
||||||
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
|
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
|
||||||
.FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
|
.FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
|
||||||
|
|
||||||
fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor
|
fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
|
||||||
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
|
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
|
||||||
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
|
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
|
||||||
.QmM, .WZeroE, .DivStickyM,
|
.QmM, .WZeroE, .DivStickyM,
|
||||||
|
@ -26,51 +26,49 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
||||||
|
input logic [P.FMTBITS-1:0] FmtE,
|
||||||
module fdivsqrtcycles(
|
|
||||||
input logic [`FMTBITS-1:0] FmtE,
|
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
input logic IntDivE,
|
input logic IntDivE,
|
||||||
input logic [`DIVBLEN:0] nE,
|
input logic [P.DIVBLEN:0] nE,
|
||||||
output logic [`DURLEN-1:0] CyclesE
|
output logic [P.DURLEN-1:0] CyclesE
|
||||||
);
|
);
|
||||||
logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
|
logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits
|
||||||
// DIVN = `NF+3
|
// DIVN = P.NF+3
|
||||||
// NS = NF + 1
|
// NS = NF + 1
|
||||||
// N = NS or NS+2 for div/sqrt.
|
// N = NS or NS+2 for div/sqrt.
|
||||||
|
|
||||||
/* verilator lint_off WIDTH */
|
/* verilator lint_off WIDTH */
|
||||||
if (`FPSIZES == 1)
|
if (P.FPSIZES == 1)
|
||||||
assign Nf = `NF;
|
assign Nf = P.NF;
|
||||||
else if (`FPSIZES == 2)
|
else if (P.FPSIZES == 2)
|
||||||
always_comb
|
always_comb
|
||||||
case (FmtE)
|
case (FmtE)
|
||||||
1'b0: Nf = `NF1;
|
1'b0: Nf = P.NF1;
|
||||||
1'b1: Nf = `NF;
|
1'b1: Nf = P.NF;
|
||||||
endcase
|
endcase
|
||||||
else if (`FPSIZES == 3)
|
else if (P.FPSIZES == 3)
|
||||||
always_comb
|
always_comb
|
||||||
case (FmtE)
|
case (FmtE)
|
||||||
`FMT: Nf = `NF;
|
P.FMT: Nf = P.NF;
|
||||||
`FMT1: Nf = `NF1;
|
P.FMT1: Nf = P.NF1;
|
||||||
`FMT2: Nf = `NF2;
|
P.FMT2: Nf = P.NF2;
|
||||||
endcase
|
endcase
|
||||||
else if (`FPSIZES == 4)
|
else if (P.FPSIZES == 4)
|
||||||
always_comb
|
always_comb
|
||||||
case(FmtE)
|
case(FmtE)
|
||||||
`S_FMT: Nf = `S_NF;
|
P.S_FMT: Nf = P.S_NF;
|
||||||
`D_FMT: Nf = `D_NF;
|
P.D_FMT: Nf = P.D_NF;
|
||||||
`H_FMT: Nf = `H_NF;
|
P.H_FMT: Nf = P.H_NF;
|
||||||
`Q_FMT: Nf = `Q_NF;
|
P.Q_FMT: Nf = P.Q_NF;
|
||||||
endcase
|
endcase
|
||||||
|
|
||||||
always_comb begin
|
always_comb begin
|
||||||
if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
|
if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
|
||||||
else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
|
else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
|
||||||
if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
|
if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
|
||||||
else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
|
else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
|
||||||
end
|
end
|
||||||
/* verilator lint_on WIDTH */
|
/* verilator lint_on WIDTH */
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -26,49 +26,47 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
||||||
|
input logic [P.FMTBITS-1:0] Fmt,
|
||||||
module fdivsqrtexpcalc(
|
input logic [P.NE-1:0] Xe, Ye,
|
||||||
input logic [`FMTBITS-1:0] Fmt,
|
|
||||||
input logic [`NE-1:0] Xe, Ye,
|
|
||||||
input logic Sqrt,
|
input logic Sqrt,
|
||||||
input logic XZero,
|
input logic XZero,
|
||||||
input logic [`DIVBLEN:0] ell, m,
|
input logic [P.DIVBLEN:0] ell, m,
|
||||||
output logic [`NE+1:0] Qe
|
output logic [P.NE+1:0] Qe
|
||||||
);
|
);
|
||||||
logic [`NE-2:0] Bias;
|
logic [P.NE-2:0] Bias;
|
||||||
logic [`NE+1:0] SXExp;
|
logic [P.NE+1:0] SXExp;
|
||||||
logic [`NE+1:0] SExp;
|
logic [P.NE+1:0] SExp;
|
||||||
logic [`NE+1:0] DExp;
|
logic [P.NE+1:0] DExp;
|
||||||
|
|
||||||
if (`FPSIZES == 1) begin
|
if (P.FPSIZES == 1) begin
|
||||||
assign Bias = (`NE-1)'(`BIAS);
|
assign Bias = (P.NE-1)'(P.BIAS);
|
||||||
|
|
||||||
end else if (`FPSIZES == 2) begin
|
end else if (P.FPSIZES == 2) begin
|
||||||
assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
|
assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1);
|
||||||
|
|
||||||
end else if (`FPSIZES == 3) begin
|
end else if (P.FPSIZES == 3) begin
|
||||||
always_comb
|
always_comb
|
||||||
case (Fmt)
|
case (Fmt)
|
||||||
`FMT: Bias = (`NE-1)'(`BIAS);
|
P.FMT: Bias = (P.NE-1)'(P.BIAS);
|
||||||
`FMT1: Bias = (`NE-1)'(`BIAS1);
|
P.FMT1: Bias = (P.NE-1)'(P.BIAS1);
|
||||||
`FMT2: Bias = (`NE-1)'(`BIAS2);
|
P.FMT2: Bias = (P.NE-1)'(P.BIAS2);
|
||||||
default: Bias = 'x;
|
default: Bias = 'x;
|
||||||
endcase
|
endcase
|
||||||
|
|
||||||
end else if (`FPSIZES == 4) begin
|
end else if (P.FPSIZES == 4) begin
|
||||||
always_comb
|
always_comb
|
||||||
case (Fmt)
|
case (Fmt)
|
||||||
2'h3: Bias = (`NE-1)'(`Q_BIAS);
|
2'h3: Bias = (P.NE-1)'(P.Q_BIAS);
|
||||||
2'h1: Bias = (`NE-1)'(`D_BIAS);
|
2'h1: Bias = (P.NE-1)'(P.D_BIAS);
|
||||||
2'h0: Bias = (`NE-1)'(`S_BIAS);
|
2'h0: Bias = (P.NE-1)'(P.S_BIAS);
|
||||||
2'h2: Bias = (`NE-1)'(`H_BIAS);
|
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
|
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
|
||||||
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
|
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
|
||||||
|
|
||||||
// correct exponent for subnormal input's normalization shifts
|
// correct exponent for subnormal input's normalization shifts
|
||||||
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
||||||
assign Qe = Sqrt ? SExp : DExp;
|
assign Qe = Sqrt ? SExp : DExp;
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -26,14 +26,12 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) (
|
||||||
|
|
||||||
module fdivsqrtfgen2 (
|
|
||||||
input logic up, uz,
|
input logic up, uz,
|
||||||
input logic [`DIVb+3:0] C, U, UM,
|
input logic [P.DIVb+3:0] C, U, UM,
|
||||||
output logic [`DIVb+3:0] F
|
output logic [P.DIVb+3:0] F
|
||||||
);
|
);
|
||||||
logic [`DIVb+3:0] FP, FN, FZ;
|
logic [P.DIVb+3:0] FP, FN, FZ;
|
||||||
|
|
||||||
// Generate for both positive and negative bits
|
// Generate for both positive and negative bits
|
||||||
assign FP = ~(U << 1) & C;
|
assign FP = ~(U << 1) & C;
|
||||||
|
@ -26,14 +26,12 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) (
|
||||||
|
|
||||||
module fdivsqrtfgen4 (
|
|
||||||
input logic [3:0] udigit,
|
input logic [3:0] udigit,
|
||||||
input logic [`DIVb+3:0] C, U, UM,
|
input logic [P.DIVb+3:0] C, U, UM,
|
||||||
output logic [`DIVb+3:0] F
|
output logic [P.DIVb+3:0] F
|
||||||
);
|
);
|
||||||
logic [`DIVb+3:0] F2, F1, F0, FN1, FN2;
|
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2;
|
||||||
|
|
||||||
// Generate for both positive and negative bits
|
// Generate for both positive and negative bits
|
||||||
assign F2 = (~U << 2) & (C << 2);
|
assign F2 = (~U << 2) & (C << 2);
|
||||||
@ -49,4 +47,4 @@ module fdivsqrtfgen4 (
|
|||||||
else if (udigit[1]) F = FN1;
|
else if (udigit[1]) F = FN1;
|
||||||
else if (udigit[0]) F = FN2;
|
else if (udigit[0]) F = FN2;
|
||||||
else F = F0;
|
else F = F0;
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -26,9 +26,7 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) (
|
||||||
|
|
||||||
module fdivsqrtfsm(
|
|
||||||
input logic clk, reset,
|
input logic clk, reset,
|
||||||
input logic XInfE, YInfE,
|
input logic XInfE, YInfE,
|
||||||
input logic XZeroE, YZeroE,
|
input logic XZeroE, YZeroE,
|
||||||
@ -39,7 +37,7 @@ module fdivsqrtfsm(
|
|||||||
input logic StallM, FlushE,
|
input logic StallM, FlushE,
|
||||||
input logic IntDivE,
|
input logic IntDivE,
|
||||||
input logic ISpecialCaseE,
|
input logic ISpecialCaseE,
|
||||||
input logic [`DURLEN-1:0] CyclesE,
|
input logic [P.DURLEN-1:0] CyclesE,
|
||||||
output logic IFDivStartE,
|
output logic IFDivStartE,
|
||||||
output logic FDivBusyE, FDivDoneE,
|
output logic FDivBusyE, FDivDoneE,
|
||||||
output logic SpecialCaseM
|
output logic SpecialCaseM
|
||||||
@ -49,16 +47,16 @@ module fdivsqrtfsm(
|
|||||||
statetype state;
|
statetype state;
|
||||||
|
|
||||||
logic SpecialCaseE, FSpecialCaseE;
|
logic SpecialCaseE, FSpecialCaseE;
|
||||||
logic [`DURLEN-1:0] step;
|
logic [P.DURLEN-1:0] step;
|
||||||
|
|
||||||
// FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division
|
// FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division
|
||||||
assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
|
assign IFDivStartE = (FDivStartE | (IDivStartE & P.IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
|
||||||
assign FDivDoneE = (state == DONE);
|
assign FDivDoneE = (state == DONE);
|
||||||
assign FDivBusyE = (state == BUSY) | IFDivStartE;
|
assign FDivBusyE = (state == BUSY) | IFDivStartE;
|
||||||
|
|
||||||
// terminate immediately on special cases
|
// terminate immediately on special cases
|
||||||
assign FSpecialCaseE = XZeroE | | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
|
assign FSpecialCaseE = XZeroE | | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
|
||||||
if (`IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
|
if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
|
||||||
else assign SpecialCaseE = FSpecialCaseE;
|
else assign SpecialCaseE = FSpecialCaseE;
|
||||||
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
||||||
|
|
||||||
@ -78,4 +76,4 @@ module fdivsqrtfsm(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -26,38 +26,36 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
|
||||||
|
|
||||||
module fdivsqrtiter(
|
|
||||||
input logic clk,
|
input logic clk,
|
||||||
input logic IFDivStartE,
|
input logic IFDivStartE,
|
||||||
input logic FDivBusyE,
|
input logic FDivBusyE,
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
input logic [`DIVb+3:0] X, D,
|
input logic [P.DIVb+3:0] X, D,
|
||||||
output logic [`DIVb:0] FirstU, FirstUM,
|
output logic [P.DIVb:0] FirstU, FirstUM,
|
||||||
output logic [`DIVb+1:0] FirstC,
|
output logic [P.DIVb+1:0] FirstC,
|
||||||
output logic Firstun,
|
output logic Firstun,
|
||||||
output logic [`DIVb+3:0] FirstWS, FirstWC
|
output logic [P.DIVb+3:0] FirstWS, FirstWC
|
||||||
);
|
);
|
||||||
|
|
||||||
/* verilator lint_off UNOPTFLAT */
|
/* verilator lint_off UNOPTFLAT */
|
||||||
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
|
logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b
|
||||||
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
|
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b
|
||||||
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
|
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b
|
||||||
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
|
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b
|
||||||
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
|
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b
|
||||||
logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b
|
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b
|
||||||
logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b
|
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b
|
||||||
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b
|
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b
|
||||||
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
|
logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b
|
||||||
logic [`DIVb+1:0] initC; // Q2.b
|
logic [P.DIVb+1:0] initC; // Q2.b
|
||||||
logic [`DIVCOPIES-1:0] un;
|
logic [P.DIVCOPIES-1:0] un;
|
||||||
|
|
||||||
logic [`DIVb+3:0] WSN, WCN; // Q4.b
|
logic [P.DIVb+3:0] WSN, WCN; // Q4.b
|
||||||
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b
|
logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b
|
||||||
logic [`DIVb+1:0] NextC;
|
logic [P.DIVb+1:0] NextC;
|
||||||
logic [`DIVb:0] UMux, UMMux;
|
logic [P.DIVb:0] UMux, UMMux;
|
||||||
logic [`DIVb:0] initU, initUM;
|
logic [P.DIVb:0] initU, initUM;
|
||||||
/* verilator lint_on UNOPTFLAT */
|
/* verilator lint_on UNOPTFLAT */
|
||||||
|
|
||||||
// Top Muxes and Registers
|
// Top Muxes and Registers
|
||||||
@ -66,36 +64,36 @@ module fdivsqrtiter(
|
|||||||
// are fed back for the next iteration.
|
// are fed back for the next iteration.
|
||||||
|
|
||||||
// Residual WS/SC registers/initializaiton mux
|
// Residual WS/SC registers/initializaiton mux
|
||||||
mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
|
mux2 #(P.DIVb+4) wsmux(WS[P.DIVCOPIES], X, IFDivStartE, WSN);
|
||||||
mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
|
mux2 #(P.DIVb+4) wcmux(WC[P.DIVCOPIES], '0, IFDivStartE, WCN);
|
||||||
flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
|
flopen #(P.DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
|
||||||
flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
|
flopen #(P.DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
|
||||||
|
|
||||||
// UOTFC Result U and UM registers/initialization mux
|
// UOTFC Result U and UM registers/initialization mux
|
||||||
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise
|
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 otherwise
|
||||||
assign initU = {SqrtE, {(`DIVb){1'b0}}};
|
assign initU = {SqrtE, {(P.DIVb){1'b0}}};
|
||||||
assign initUM = {~SqrtE, {(`DIVb){1'b0}}};
|
assign initUM = {~SqrtE, {(P.DIVb){1'b0}}};
|
||||||
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
|
mux2 #(P.DIVb+1) Umux(UNext[P.DIVCOPIES-1], initU, IFDivStartE, UMux);
|
||||||
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
|
mux2 #(P.DIVb+1) UMmux(UMNext[P.DIVCOPIES-1], initUM, IFDivStartE, UMMux);
|
||||||
flopen #(`DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]);
|
flopen #(P.DIVb+1) UReg(clk, FDivBusyE, UMux, U[0]);
|
||||||
flopen #(`DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);
|
flopen #(P.DIVb+1) UMReg(clk, FDivBusyE, UMMux, UM[0]);
|
||||||
|
|
||||||
// C register/initialization mux
|
// C register/initialization mux
|
||||||
// Initialize C to -1 for sqrt and -R for division
|
// Initialize C to -1 for sqrt and -R for division
|
||||||
logic [1:0] initCUpper;
|
logic [1:0] initCUpper;
|
||||||
if(`RADIX == 4) begin
|
if(P.RADIX == 4) begin
|
||||||
mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper);
|
mux2 #(2) cuppermux4(2'b00, 2'b11, SqrtE, initCUpper);
|
||||||
end else begin
|
end else begin
|
||||||
mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper);
|
mux2 #(2) cuppermux2(2'b10, 2'b11, SqrtE, initCUpper);
|
||||||
end
|
end
|
||||||
|
|
||||||
assign initC = {initCUpper, {`DIVb{1'b0}}};
|
assign initC = {initCUpper, {P.DIVb{1'b0}}};
|
||||||
mux2 #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC);
|
mux2 #(P.DIVb+2) cmux(C[P.DIVCOPIES], initC, IFDivStartE, NextC);
|
||||||
flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
|
flopen #(P.DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
|
||||||
|
|
||||||
// Divisor Selections
|
// Divisor Selections
|
||||||
assign DBar = ~D; // for -D
|
assign DBar = ~D; // for -D
|
||||||
if(`RADIX == 4) begin : d2
|
if(P.RADIX == 4) begin : d2
|
||||||
assign D2 = D << 1; // for 2D, only used in R4
|
assign D2 = D << 1; // for 2D, only used in R4
|
||||||
assign DBar2 = ~D2; // for -2D, only used in R4
|
assign DBar2 = ~D2; // for -2D, only used in R4
|
||||||
end
|
end
|
||||||
@ -103,15 +101,15 @@ module fdivsqrtiter(
|
|||||||
// k=DIVCOPIES of the recurrence logic
|
// k=DIVCOPIES of the recurrence logic
|
||||||
genvar i;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : iterations
|
for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
|
||||||
if (`RADIX == 2) begin: stage
|
if (P.RADIX == 2) begin: stage
|
||||||
fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtE,
|
fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
|
||||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||||
end else begin: stage
|
end else begin: stage
|
||||||
logic j1;
|
logic j1;
|
||||||
assign j1 = (i == 0 & ~C[0][`DIVb-1]);
|
assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
|
||||||
fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
|
fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
|
||||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||||
end
|
end
|
||||||
|
@ -26,51 +26,49 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
||||||
|
|
||||||
module fdivsqrtpostproc(
|
|
||||||
input logic clk, reset,
|
input logic clk, reset,
|
||||||
input logic StallM,
|
input logic StallM,
|
||||||
input logic [`DIVb+3:0] WS, WC,
|
input logic [P.DIVb+3:0] WS, WC,
|
||||||
input logic [`DIVb+3:0] D,
|
input logic [P.DIVb+3:0] D,
|
||||||
input logic [`DIVb:0] FirstU, FirstUM,
|
input logic [P.DIVb:0] FirstU, FirstUM,
|
||||||
input logic [`DIVb+1:0] FirstC,
|
input logic [P.DIVb+1:0] FirstC,
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
input logic Firstun, SqrtM, SpecialCaseM, NegQuotM,
|
input logic Firstun, SqrtM, SpecialCaseM, NegQuotM,
|
||||||
input logic [`XLEN-1:0] AM,
|
input logic [P.XLEN-1:0] AM,
|
||||||
input logic RemOpM, ALTBM, BZeroM, AsM, W64M,
|
input logic RemOpM, ALTBM, BZeroM, AsM, W64M,
|
||||||
input logic [`DIVBLEN:0] nM, mM,
|
input logic [P.DIVBLEN:0] nM, mM,
|
||||||
output logic [`DIVb:0] QmM,
|
output logic [P.DIVb:0] QmM,
|
||||||
output logic WZeroE,
|
output logic WZeroE,
|
||||||
output logic DivStickyM,
|
output logic DivStickyM,
|
||||||
output logic [`XLEN-1:0] FIntDivResultM
|
output logic [P.XLEN-1:0] FIntDivResultM
|
||||||
);
|
);
|
||||||
|
|
||||||
logic [`DIVb+3:0] W, Sum;
|
logic [P.DIVb+3:0] W, Sum;
|
||||||
logic [`DIVb:0] PreQmM;
|
logic [P.DIVb:0] PreQmM;
|
||||||
logic NegStickyM;
|
logic NegStickyM;
|
||||||
logic weq0E, WZeroM;
|
logic weq0E, WZeroM;
|
||||||
logic [`XLEN-1:0] IntDivResultM;
|
logic [P.XLEN-1:0] IntDivResultM;
|
||||||
|
|
||||||
//////////////////////////
|
//////////////////////////
|
||||||
// Execute Stage: Detect early termination for an exact result
|
// Execute Stage: Detect early termination for an exact result
|
||||||
//////////////////////////
|
//////////////////////////
|
||||||
|
|
||||||
// check for early termination on an exact result.
|
// check for early termination on an exact result.
|
||||||
aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0E);
|
aplusbeq0 #(P.DIVb+4) wspluswceq0(WS, WC, weq0E);
|
||||||
|
|
||||||
if (`RADIX == 2) begin: R2EarlyTerm
|
if (P.RADIX == 2) begin: R2EarlyTerm
|
||||||
logic [`DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
|
logic [P.DIVb+3:0] FZeroE, FZeroSqrtE, FZeroDivE;
|
||||||
logic [`DIVb+2:0] FirstK;
|
logic [P.DIVb+2:0] FirstK;
|
||||||
logic wfeq0E;
|
logic wfeq0E;
|
||||||
logic [`DIVb+3:0] WCF, WSF;
|
logic [P.DIVb+3:0] WCF, WSF;
|
||||||
|
|
||||||
assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
|
assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
|
||||||
assign FZeroSqrtE = {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
|
assign FZeroSqrtE = {FirstUM[P.DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
|
||||||
assign FZeroDivE = D << 1; // F for divide
|
assign FZeroDivE = D << 1; // F for divide
|
||||||
mux2 #(`DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
|
mux2 #(P.DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
|
||||||
csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
|
csa #(P.DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
|
||||||
aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
|
aplusbeq0 #(P.DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
|
||||||
assign WZeroE = weq0E|(wfeq0E & Firstun);
|
assign WZeroE = weq0E|(wfeq0E & Firstun);
|
||||||
end else begin
|
end else begin
|
||||||
assign WZeroE = weq0E;
|
assign WZeroE = weq0E;
|
||||||
@ -91,27 +89,27 @@ module fdivsqrtpostproc(
|
|||||||
|
|
||||||
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed.
|
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed.
|
||||||
assign Sum = WC + WS;
|
assign Sum = WC + WS;
|
||||||
assign NegStickyM = Sum[`DIVb+3];
|
assign NegStickyM = Sum[P.DIVb+3];
|
||||||
mux2 #(`DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
|
mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
|
||||||
mux2 #(`DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
|
mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
|
||||||
|
|
||||||
// Integer quotient or remainder correctoin, normalization, and special cases
|
// Integer quotient or remainder correctoin, normalization, and special cases
|
||||||
if (`IDIV_ON_FPU) begin:intpostproc // Int supported
|
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
|
||||||
logic [`DIVBLEN:0] NormShiftM;
|
logic [P.DIVBLEN:0] NormShiftM;
|
||||||
logic [`DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
||||||
logic signed [`DIVb+3:0] PreResultM, PreIntResultM;
|
logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
|
||||||
|
|
||||||
assign W = $signed(Sum) >>> `LOGR;
|
assign W = $signed(Sum) >>> P.LOGR;
|
||||||
assign UnsignedQuotM = {3'b000, PreQmM};
|
assign UnsignedQuotM = {3'b000, PreQmM};
|
||||||
|
|
||||||
// Integer remainder: sticky and sign correction muxes
|
// Integer remainder: sticky and sign correction muxes
|
||||||
mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
|
mux2 #(P.DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
|
||||||
mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
|
mux2 #(P.DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
|
||||||
mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
|
mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
|
||||||
|
|
||||||
// Select quotient or remainder and do normalization shift
|
// Select quotient or remainder and do normalization shift
|
||||||
mux2 #(`DIVBLEN+1) normshiftmux(((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR))), (mM + (`DIVBLEN+1)'(`DIVa)), RemOpM, NormShiftM);
|
mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM);
|
||||||
mux2 #(`DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
|
mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
|
||||||
assign PreIntResultM = $signed(PreResultM >>> NormShiftM);
|
assign PreIntResultM = $signed(PreResultM >>> NormShiftM);
|
||||||
|
|
||||||
// special case logic
|
// special case logic
|
||||||
@ -119,18 +117,18 @@ module fdivsqrtpostproc(
|
|||||||
always_comb
|
always_comb
|
||||||
if (BZeroM) begin // Divide by zero
|
if (BZeroM) begin // Divide by zero
|
||||||
if (RemOpM) IntDivResultM = AM;
|
if (RemOpM) IntDivResultM = AM;
|
||||||
else IntDivResultM = {(`XLEN){1'b1}};
|
else IntDivResultM = {(P.XLEN){1'b1}};
|
||||||
end else if (ALTBM) begin // Numerator is zero
|
end else if (ALTBM) begin // Numerator is zero
|
||||||
if (RemOpM) IntDivResultM = AM;
|
if (RemOpM) IntDivResultM = AM;
|
||||||
else IntDivResultM = '0;
|
else IntDivResultM = '0;
|
||||||
end else IntDivResultM = PreIntResultM[`XLEN-1:0];
|
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
|
||||||
|
|
||||||
// sign extend result for W64
|
// sign extend result for W64
|
||||||
if (`XLEN==64) begin
|
if (P.XLEN==64) begin
|
||||||
mux2 #(64) resmux(IntDivResultM[`XLEN-1:0],
|
mux2 #(64) resmux(IntDivResultM[P.XLEN-1:0],
|
||||||
{{(`XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
|
{{(P.XLEN-32){IntDivResultM[31]}}, IntDivResultM[31:0]}, // Sign extending in case of W64
|
||||||
W64M, FIntDivResultM);
|
W64M, FIntDivResultM);
|
||||||
end else
|
end else
|
||||||
assign FIntDivResultM = IntDivResultM[`XLEN-1:0];
|
assign FIntDivResultM = IntDivResultM[P.XLEN-1:0];
|
||||||
end
|
end
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -26,56 +26,54 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||||
|
|
||||||
module fdivsqrtpreproc (
|
|
||||||
input logic clk,
|
input logic clk,
|
||||||
input logic IFDivStartE,
|
input logic IFDivStartE,
|
||||||
input logic [`NF:0] Xm, Ym,
|
input logic [P.NF:0] Xm, Ym,
|
||||||
input logic [`NE-1:0] Xe, Ye,
|
input logic [P.NE-1:0] Xe, Ye,
|
||||||
input logic [`FMTBITS-1:0] FmtE,
|
input logic [P.FMTBITS-1:0] FmtE,
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
input logic XZeroE,
|
input logic XZeroE,
|
||||||
input logic [2:0] Funct3E,
|
input logic [2:0] Funct3E,
|
||||||
output logic [`NE+1:0] QeM,
|
output logic [P.NE+1:0] QeM,
|
||||||
output logic [`DIVb+3:0] X, D,
|
output logic [P.DIVb+3:0] X, D,
|
||||||
// Int-specific
|
// Int-specific
|
||||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||||
input logic IntDivE, W64E,
|
input logic IntDivE, W64E,
|
||||||
output logic ISpecialCaseE,
|
output logic ISpecialCaseE,
|
||||||
output logic [`DURLEN-1:0] CyclesE,
|
output logic [P.DURLEN-1:0] CyclesE,
|
||||||
output logic [`DIVBLEN:0] nM, mM,
|
output logic [P.DIVBLEN:0] nM, mM,
|
||||||
output logic NegQuotM, ALTBM, IntDivM, W64M,
|
output logic NegQuotM, ALTBM, IntDivM, W64M,
|
||||||
output logic AsM, BZeroM,
|
output logic AsM, BZeroM,
|
||||||
output logic [`XLEN-1:0] AM
|
output logic [P.XLEN-1:0] AM
|
||||||
);
|
);
|
||||||
|
|
||||||
logic [`DIVb-1:0] Xfract, Dfract;
|
logic [P.DIVb-1:0] Xfract, Dfract;
|
||||||
logic [`DIVb:0] PreSqrtX;
|
logic [P.DIVb:0] PreSqrtX;
|
||||||
logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||||
logic [`NE+1:0] QeE; // Quotient Exponent (FP only)
|
logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
|
||||||
logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
||||||
logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
|
logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
|
||||||
logic NumerZeroE; // Numerator is zero (X or A)
|
logic NumerZeroE; // Numerator is zero (X or A)
|
||||||
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
||||||
logic SignedDivE; // signed division
|
logic SignedDivE; // signed division
|
||||||
logic NegQuotE; // Integer quotient is negative
|
logic NegQuotE; // Integer quotient is negative
|
||||||
logic AsE, BsE; // Signs of integer inputs
|
logic AsE, BsE; // Signs of integer inputs
|
||||||
logic [`XLEN-1:0] AE; // input A after W64 adjustment
|
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
|
||||||
logic ALTBE;
|
logic ALTBE;
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Integer Preprocessing
|
// Integer Preprocessing
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
|
|
||||||
if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
|
if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
|
||||||
logic [`XLEN-1:0] BE, PosA, PosB;
|
logic [P.XLEN-1:0] BE, PosA, PosB;
|
||||||
|
|
||||||
// Extract inputs, signs, zero, depending on W64 mode if applicable
|
// Extract inputs, signs, zero, depending on W64 mode if applicable
|
||||||
assign SignedDivE = ~Funct3E[0];
|
assign SignedDivE = ~Funct3E[0];
|
||||||
|
|
||||||
// Source handling
|
// Source handling
|
||||||
if (`XLEN==64) begin // 64-bit, supports W64
|
if (P.XLEN==64) begin // 64-bit, supports W64
|
||||||
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
|
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
|
||||||
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
|
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
|
||||||
end else begin // 32 bits only
|
end else begin // 32 bits only
|
||||||
@ -84,21 +82,21 @@ module fdivsqrtpreproc (
|
|||||||
end
|
end
|
||||||
assign AZeroE = ~(|AE);
|
assign AZeroE = ~(|AE);
|
||||||
assign BZeroE = ~(|BE);
|
assign BZeroE = ~(|BE);
|
||||||
assign AsE = AE[`XLEN-1] & SignedDivE;
|
assign AsE = AE[P.XLEN-1] & SignedDivE;
|
||||||
assign BsE = BE[`XLEN-1] & SignedDivE;
|
assign BsE = BE[P.XLEN-1] & SignedDivE;
|
||||||
assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
|
assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
|
||||||
|
|
||||||
// Force integer inputs to be postiive
|
// Force integer inputs to be postiive
|
||||||
mux2 #(`XLEN) posamux(AE, -AE, AsE, PosA);
|
mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
|
||||||
mux2 #(`XLEN) posbmux(BE, -BE, BsE, PosB);
|
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
|
||||||
|
|
||||||
// Select integer or floating point inputs
|
// Select integer or floating point inputs
|
||||||
mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX);
|
mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
|
||||||
mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD);
|
mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
|
||||||
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
|
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
|
||||||
end else begin // Int not supported
|
end else begin // Int not supported
|
||||||
assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}};
|
assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
|
||||||
assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}};
|
assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
|
||||||
assign NumerZeroE = XZeroE;
|
assign NumerZeroE = XZeroE;
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -107,8 +105,8 @@ module fdivsqrtpreproc (
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
|
|
||||||
// count leading zeros for Subnorm FP and to normalize integer inputs
|
// count leading zeros for Subnorm FP and to normalize integer inputs
|
||||||
lzc #(`DIVb) lzcX (IFX, ell);
|
lzc #(P.DIVb) lzcX (IFX, ell);
|
||||||
lzc #(`DIVb) lzcY (IFD, mE);
|
lzc #(P.DIVb) lzcY (IFD, mE);
|
||||||
|
|
||||||
// Normalization shift: shift off leading one
|
// Normalization shift: shift off leading one
|
||||||
assign Xfract = (IFX << ell) << 1;
|
assign Xfract = (IFX << ell) << 1;
|
||||||
@ -122,28 +120,28 @@ module fdivsqrtpreproc (
|
|||||||
// and nE (number of fractional digits)
|
// and nE (number of fractional digits)
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
|
|
||||||
if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
|
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
|
||||||
logic [`DIVBLEN:0] ZeroDiff, p;
|
logic [P.DIVBLEN:0] ZeroDiff, p;
|
||||||
|
|
||||||
// calculate number of fractional bits p
|
// calculate number of fractional bits p
|
||||||
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
||||||
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B (A has more leading zeros)
|
assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
|
||||||
mux2 #(`DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
|
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
|
||||||
|
|
||||||
// Integer special cases (terminate immediately)
|
// Integer special cases (terminate immediately)
|
||||||
assign ISpecialCaseE = BZeroE | ALTBE;
|
assign ISpecialCaseE = BZeroE | ALTBE;
|
||||||
|
|
||||||
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
|
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
|
||||||
|
|
||||||
if (`LOGRK > 0) begin // more than 1 bit per cycle
|
if (P.LOGRK > 0) begin // more than 1 bit per cycle
|
||||||
logic [`LOGRK-1:0] IntTrunc, RightShiftX;
|
logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
|
||||||
logic [`DIVBLEN:0] TotalIntBits, IntSteps;
|
logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
|
||||||
/* verilator lint_off WIDTH */
|
/* verilator lint_off WIDTH */
|
||||||
assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
||||||
assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator
|
assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
|
||||||
assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div
|
assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
|
||||||
assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits
|
assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
|
||||||
assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount
|
assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
|
||||||
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
|
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
|
||||||
/* verilator lint_on WIDTH */
|
/* verilator lint_on WIDTH */
|
||||||
end else begin // radix 2 1 copy doesn't require shifting
|
end else begin // radix 2 1 copy doesn't require shifting
|
||||||
@ -167,42 +165,42 @@ module fdivsqrtpreproc (
|
|||||||
assign DivX = {3'b000, ~NumerZeroE, Xfract};
|
assign DivX = {3'b000, ~NumerZeroE, Xfract};
|
||||||
|
|
||||||
// Sqrt is initialized on step one as R(X-1), so depends on Radix
|
// Sqrt is initialized on step one as R(X-1), so depends on Radix
|
||||||
mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
|
mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
|
||||||
if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
|
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
|
||||||
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
|
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
|
||||||
mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Selet integer or floating-point operands
|
// Selet integer or floating-point operands
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
|
|
||||||
if (`IDIV_ON_FPU) begin
|
if (P.IDIV_ON_FPU) begin
|
||||||
mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
|
mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
|
||||||
end else begin
|
end else begin
|
||||||
assign X = PreShiftX;
|
assign X = PreShiftX;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Divisior register
|
// Divisior register
|
||||||
flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
|
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
|
||||||
|
|
||||||
// Floating-point exponent
|
// Floating-point exponent
|
||||||
fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
|
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
|
||||||
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
|
flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
|
||||||
|
|
||||||
// Number of FSM cycles (to FSM)
|
// Number of FSM cycles (to FSM)
|
||||||
fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
|
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
|
||||||
|
|
||||||
if (`IDIV_ON_FPU) begin:intpipelineregs
|
if (P.IDIV_ON_FPU) begin:intpipelineregs
|
||||||
// pipeline registers
|
// pipeline registers
|
||||||
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
||||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||||
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
|
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
|
||||||
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
||||||
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
||||||
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
||||||
flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
|
flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
|
||||||
flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
||||||
if (`XLEN==64)
|
if (P.XLEN==64)
|
||||||
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -26,8 +26,6 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
|
||||||
|
|
||||||
module fdivsqrtqsel2 (
|
module fdivsqrtqsel2 (
|
||||||
input logic [3:0] ps, pc,
|
input logic [3:0] ps, pc,
|
||||||
output logic up, uz, un
|
output logic up, uz, un
|
||||||
|
@ -26,8 +26,6 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
|
||||||
|
|
||||||
module fdivsqrtqsel4 (
|
module fdivsqrtqsel4 (
|
||||||
input logic [2:0] Dmsbs,
|
input logic [2:0] Dmsbs,
|
||||||
input logic [4:0] Smsbs,
|
input logic [4:0] Smsbs,
|
||||||
|
@ -26,8 +26,6 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
|
||||||
|
|
||||||
module fdivsqrtqsel4cmp (
|
module fdivsqrtqsel4cmp (
|
||||||
input logic [2:0] Dmsbs,
|
input logic [2:0] Dmsbs,
|
||||||
input logic [4:0] Smsbs,
|
input logic [4:0] Smsbs,
|
||||||
|
@ -26,27 +26,26 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
|
||||||
|
|
||||||
/* verilator lint_off UNOPTFLAT */
|
/* verilator lint_off UNOPTFLAT */
|
||||||
module fdivsqrtstage2 (
|
module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic [`DIVb+3:0] D, DBar,
|
input logic [P.DIVb+3:0] D, DBar,
|
||||||
input logic [`DIVb:0] U, UM,
|
input logic [P.DIVb:0] U, UM,
|
||||||
input logic [`DIVb+3:0] WS, WC,
|
input logic [P.DIVb+3:0] WS, WC,
|
||||||
input logic [`DIVb+1:0] C,
|
input logic [P.DIVb+1:0] C,
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
output logic un,
|
output logic un,
|
||||||
output logic [`DIVb+1:0] CNext,
|
output logic [P.DIVb+1:0] CNext,
|
||||||
output logic [`DIVb:0] UNext, UMNext,
|
output logic [P.DIVb:0] UNext, UMNext,
|
||||||
output logic [`DIVb+3:0] WSNext, WCNext
|
output logic [P.DIVb+3:0] WSNext, WCNext
|
||||||
);
|
);
|
||||||
/* verilator lint_on UNOPTFLAT */
|
/* verilator lint_on UNOPTFLAT */
|
||||||
|
|
||||||
logic [`DIVb+3:0] Dsel;
|
logic [P.DIVb+3:0] Dsel;
|
||||||
logic up, uz;
|
logic up, uz;
|
||||||
logic [`DIVb+3:0] F;
|
logic [P.DIVb+3:0] F;
|
||||||
logic [`DIVb+3:0] AddIn;
|
logic [P.DIVb+3:0] AddIn;
|
||||||
logic [`DIVb+3:0] WSA, WCA;
|
logic [P.DIVb+3:0] WSA, WCA;
|
||||||
|
|
||||||
// Qmient Selection logic
|
// Qmient Selection logic
|
||||||
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
|
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
|
||||||
@ -56,10 +55,10 @@ module fdivsqrtstage2 (
|
|||||||
// 0000 = 0
|
// 0000 = 0
|
||||||
// 0010 = -1
|
// 0010 = -1
|
||||||
// 0001 = -2
|
// 0001 = -2
|
||||||
fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un);
|
fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un);
|
||||||
|
|
||||||
// Sqrt F generation. Extend C, U, UM to Q4.k
|
// Sqrt F generation. Extend C, U, UM to Q4.k
|
||||||
fdivsqrtfgen2 fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
||||||
|
|
||||||
// Divisor multiple
|
// Divisor multiple
|
||||||
always_comb
|
always_comb
|
||||||
@ -69,16 +68,16 @@ module fdivsqrtstage2 (
|
|||||||
|
|
||||||
// Partial Product Generation
|
// Partial Product Generation
|
||||||
// WSA, WCA = WS + WC - qD
|
// WSA, WCA = WS + WC - qD
|
||||||
mux2 #(`DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
|
mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
|
||||||
csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
|
csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
|
||||||
assign WSNext = WSA << 1;
|
assign WSNext = WSA << 1;
|
||||||
assign WCNext = WCA << 1;
|
assign WCNext = WCA << 1;
|
||||||
|
|
||||||
// Shift thermometer code C
|
// Shift thermometer code C
|
||||||
assign CNext = {1'b1, C[`DIVb+1:1]};
|
assign CNext = {1'b1, C[P.DIVb+1:1]};
|
||||||
|
|
||||||
// Unified On-The-Fly Converter to accumulate result
|
// Unified On-The-Fly Converter to accumulate result
|
||||||
fdivsqrtuotfc2 uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
|
fdivsqrtuotfc2 #(P) uotfc2(.up, .un, .C(CNext), .U, .UM, .UNext, .UMNext);
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,29 +26,27 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
|
||||||
|
input logic [P.DIVb+3:0] D, DBar, D2, DBar2,
|
||||||
module fdivsqrtstage4 (
|
input logic [P.DIVb:0] U,UM,
|
||||||
input logic [`DIVb+3:0] D, DBar, D2, DBar2,
|
input logic [P.DIVb+3:0] WS, WC,
|
||||||
input logic [`DIVb:0] U,UM,
|
input logic [P.DIVb+1:0] C,
|
||||||
input logic [`DIVb+3:0] WS, WC,
|
|
||||||
input logic [`DIVb+1:0] C,
|
|
||||||
input logic SqrtE, j1,
|
input logic SqrtE, j1,
|
||||||
output logic [`DIVb+1:0] CNext,
|
output logic [P.DIVb+1:0] CNext,
|
||||||
output logic un,
|
output logic un,
|
||||||
output logic [`DIVb:0] UNext, UMNext,
|
output logic [P.DIVb:0] UNext, UMNext,
|
||||||
output logic [`DIVb+3:0] WSNext, WCNext
|
output logic [P.DIVb+3:0] WSNext, WCNext
|
||||||
);
|
);
|
||||||
|
|
||||||
logic [`DIVb+3:0] Dsel;
|
logic [P.DIVb+3:0] Dsel;
|
||||||
logic [3:0] udigit;
|
logic [3:0] udigit;
|
||||||
logic [`DIVb+3:0] F;
|
logic [P.DIVb+3:0] F;
|
||||||
logic [`DIVb+3:0] AddIn;
|
logic [P.DIVb+3:0] AddIn;
|
||||||
logic [4:0] Smsbs;
|
logic [4:0] Smsbs;
|
||||||
logic [2:0] Dmsbs;
|
logic [2:0] Dmsbs;
|
||||||
logic [7:0] WCmsbs, WSmsbs;
|
logic [7:0] WCmsbs, WSmsbs;
|
||||||
logic CarryIn;
|
logic CarryIn;
|
||||||
logic [`DIVb+3:0] WSA, WCA;
|
logic [P.DIVb+3:0] WSA, WCA;
|
||||||
|
|
||||||
// Digit Selection logic
|
// Digit Selection logic
|
||||||
// u encoding:
|
// u encoding:
|
||||||
@ -57,16 +55,16 @@ module fdivsqrtstage4 (
|
|||||||
// 0000 = 0
|
// 0000 = 0
|
||||||
// 0010 = -1
|
// 0010 = -1
|
||||||
// 0001 = -2
|
// 0001 = -2
|
||||||
assign Smsbs = U[`DIVb:`DIVb-4];
|
assign Smsbs = U[P.DIVb:P.DIVb-4];
|
||||||
assign Dmsbs = D[`DIVb-1:`DIVb-3];
|
assign Dmsbs = D[P.DIVb-1:P.DIVb-3];
|
||||||
assign WCmsbs = WC[`DIVb+3:`DIVb-4];
|
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4];
|
||||||
assign WSmsbs = WS[`DIVb+3:`DIVb-4];
|
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4];
|
||||||
|
|
||||||
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
|
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
|
||||||
assign un = 1'b0; // unused for radix 4
|
assign un = 1'b0; // unused for radix 4
|
||||||
|
|
||||||
// F generation logic
|
// F generation logic
|
||||||
fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
fdivsqrtfgen4 #(P) fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
||||||
|
|
||||||
// Divisor multiple logic
|
// Divisor multiple logic
|
||||||
always_comb
|
always_comb
|
||||||
@ -83,15 +81,15 @@ module fdivsqrtstage4 (
|
|||||||
// {WS, WC}}Next = (WS + WC - qD or F) << 2
|
// {WS, WC}}Next = (WS + WC - qD or F) << 2
|
||||||
assign AddIn = SqrtE ? F : Dsel;
|
assign AddIn = SqrtE ? F : Dsel;
|
||||||
assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D
|
assign CarryIn = ~SqrtE & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D
|
||||||
csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
|
csa #(P.DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA);
|
||||||
assign WSNext = WSA << 2;
|
assign WSNext = WSA << 2;
|
||||||
assign WCNext = WCA << 2;
|
assign WCNext = WCA << 2;
|
||||||
|
|
||||||
// Shift thermometer code C
|
// Shift thermometer code C
|
||||||
assign CNext = {2'b11, C[`DIVb+1:2]};
|
assign CNext = {2'b11, C[P.DIVb+1:2]};
|
||||||
|
|
||||||
// On-the-fly converter to accumulate result
|
// On-the-fly converter to accumulate result
|
||||||
fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext);
|
fdivsqrtuotfc4 #(P) fdivsqrtuotfc4(.udigit, .C(CNext[P.DIVb:0]), .U, .UM, .UNext, .UMNext);
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,22 +26,20 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
|
||||||
|
|
||||||
///////////////////////////////
|
///////////////////////////////
|
||||||
// Unified OTFC, Radix 2 //
|
// Unified OTFC, Radix 2 //
|
||||||
///////////////////////////////
|
///////////////////////////////
|
||||||
module fdivsqrtuotfc2(
|
module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic up, un,
|
input logic up, un,
|
||||||
input logic [`DIVb+1:0] C,
|
input logic [P.DIVb+1:0] C,
|
||||||
input logic [`DIVb:0] U, UM,
|
input logic [P.DIVb:0] U, UM,
|
||||||
output logic [`DIVb:0] UNext, UMNext
|
output logic [P.DIVb:0] UNext, UMNext
|
||||||
);
|
);
|
||||||
// The on-the-fly converter transfers the divsqrt
|
// The on-the-fly converter transfers the divsqrt
|
||||||
// bits to the quotient as they come.
|
// bits to the quotient as they come.
|
||||||
logic [`DIVb:0] K;
|
logic [P.DIVb:0] K;
|
||||||
|
|
||||||
assign K = (C[`DIVb:0] & ~(C[`DIVb:0] << 1)); // Thermometer to one hot encoding
|
assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
|
||||||
|
|
||||||
always_comb begin
|
always_comb begin
|
||||||
if (up) begin
|
if (up) begin
|
||||||
|
@ -26,19 +26,17 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`include "wally-config.vh"
|
module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) (
|
||||||
|
|
||||||
module fdivsqrtuotfc4(
|
|
||||||
input logic [3:0] udigit,
|
input logic [3:0] udigit,
|
||||||
input logic [`DIVb:0] U, UM,
|
input logic [P.DIVb:0] U, UM,
|
||||||
input logic [`DIVb:0] C,
|
input logic [P.DIVb:0] C,
|
||||||
output logic [`DIVb:0] UNext, UMNext
|
output logic [P.DIVb:0] UNext, UMNext
|
||||||
);
|
);
|
||||||
// The on-the-fly converter transfers the square root
|
// The on-the-fly converter transfers the square root
|
||||||
// bits to the quotient as they come.
|
// bits to the quotient as they come.
|
||||||
// Use this otfc for division and square root.
|
// Use this otfc for division and square root.
|
||||||
|
|
||||||
logic [`DIVb:0] K1, K2, K3;
|
logic [P.DIVb:0] K1, K2, K3;
|
||||||
assign K1 = (C&~(C << 1)); // K
|
assign K1 = (C&~(C << 1)); // K
|
||||||
assign K2 = ((C << 1)&~(C << 2)); // 2K
|
assign K2 = ((C << 1)&~(C << 2)); // 2K
|
||||||
assign K3 = (C & ~(C << 2)); // 3K
|
assign K3 = (C & ~(C << 2)); // 3K
|
||||||
|
@ -238,7 +238,7 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
|||||||
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE));
|
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE));
|
||||||
|
|
||||||
// divide and square root: fdiv, fsqrt, optionally integer division
|
// divide and square root: fdiv, fsqrt, optionally integer division
|
||||||
fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
|
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
|
||||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
|
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
|
||||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
|
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
|
||||||
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
|
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
|
||||||
|
Loading…
Reference in New Issue
Block a user