Fixed D sizing issues across fdivsqrt. Fixed preproc to accept either int or float inputs

This commit is contained in:
cturek 2022-12-10 21:56:35 +00:00
parent d15cf5c65c
commit f57211bb49
7 changed files with 78 additions and 87 deletions

View File

@ -56,11 +56,11 @@ module fdivsqrt(
// output logic [`XLEN-1:0] RemM,
);
logic [`DIVb+3:0] WS, WC;
logic [`DIVb+3:0] WS, WC;
logic [`DIVb+3:0] X;
logic [`DIVN-2:0] D; // U0.N-1
logic [`DIVN-2:0] Dpreproc;
logic [`DIVb:0] FirstU, FirstUM;
logic [`DIVb-1:0] D;
logic [`DIVb-1:0] DPreproc;
logic [`DIVb:0] FirstU, FirstUM;
logic [`DIVb+1:0] FirstC;
logic Firstun;
logic WZero;
@ -71,7 +71,7 @@ module fdivsqrt(
fdivsqrtpreproc fdivsqrtpreproc(
.clk, .IFDivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE),
.Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc,
.Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .DPreproc,
.n, .m, .OTFCSwap, .ALTBM, .BZero, .As,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
fdivsqrtfsm fdivsqrtfsm(
@ -81,7 +81,7 @@ module fdivsqrt(
.XInfE, .YInfE, .WZero, .SpecialCaseM);
fdivsqrtiter fdivsqrtiter(
.clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, // .SqrtM,
.X,.Dpreproc, .FirstWS(WS), .FirstWC(WC),
.X,.DPreproc, .FirstWS(WS), .FirstWC(WC),
.IFDivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .OTFCSwap,
.FDivBusyE);
fdivsqrtpostproc fdivsqrtpostproc(

View File

@ -40,42 +40,34 @@ module fdivsqrtiter(
// input logic SqrtM,
input logic OTFCSwap,
input logic [`DIVb+3:0] X,
input logic [`DIVN-2:0] Dpreproc,
output logic [`DIVN-2:0] D, // U0.N-1
output logic [`DIVb:0] FirstU, FirstUM,
input logic [`DIVb-1:0] DPreproc,
output logic [`DIVb-1:0] D,
output logic [`DIVb:0] FirstU, FirstUM,
output logic [`DIVb+1:0] FirstC,
output logic Firstun,
output logic [`DIVb+3:0] FirstWS, FirstWC
output logic [`DIVb+3:0] FirstWS, FirstWC
);
//QLEN = 1.(number of bits created for division)
// N is NF+1 or XLEN
// WC/WS is dependent on D so 4.N-1 ie N+3 bits or N+2:0 + one more bit in fraction for possible sqrt right shift
// D is 1.N-1, but the msb is always 1 so 0.N-1 or N-1 bits or N-2:0
// Dsel should match WC/WS so 4.N-1 ie N+3 bits or N+2:0
// U/UM should be 1.b so b+1 bits or b:0
// C needs to be the lenght of the final fraction 0.b so b or b-1:0
/* verilator lint_off UNOPTFLAT */
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b
logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
logic [`DIVb+1:0] initC; // Q2.b
/* verilator lint_off UNOPTFLAT */
logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b
logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b
logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b
logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UM[`DIVCOPIES:0]; // U1.b
logic [`DIVb:0] UNext[`DIVCOPIES-1:0]; // U1.b
logic [`DIVb:0] UMNext[`DIVCOPIES-1:0]; // U1.b
logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b
logic [`DIVb+1:0] initC; // Q2.b
logic [`DIVCOPIES-1:0] un;
/* verilator lint_on UNOPTFLAT */
logic [`DIVb+3:0] WSN, WCN; // Q4.N-1
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.N-1
logic [`DIVb+1:0] NextC;
logic [`DIVb+1:0] CMux;
logic [`DIVb:0] UMux, UMMux;
logic [`DIVb:0] initU, initUM;
logic [`DIVb+3:0] WSN, WCN; // Q4.b
logic [`DIVb+3:0] DBar, D2, DBar2; // Q4.b
logic [`DIVb+1:0] NextC;
logic [`DIVb+1:0] CMux;
logic [`DIVb:0] UMux, UMMux;
logic [`DIVb:0] initU, initUM;
/* verilator lint_on UNOPTFLAT */
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
@ -85,15 +77,15 @@ module fdivsqrtiter(
// Residual WS/SC registers/initializaiton mux
mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, IFDivStartE, WSN);
mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, IFDivStartE, WCN);
flopen #(`DIVb+4) wsflop(clk, FDivBusyE, WSN, WS[0]);
flopen #(`DIVb+4) wcflop(clk, FDivBusyE, WCN, WC[0]);
flopen #(`DIVb+4) wsreg(clk, FDivBusyE, WSN, WS[0]);
flopen #(`DIVb+4) wcreg(clk, FDivBusyE, WCN, WC[0]);
// UOTFC Result U and UM registers/initialization mux
// Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division
assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0;
assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}};
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, IFDivStartE, UMux);
mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, IFDivStartE, UMMux);
flopen #(`DIVb+1) UReg(clk, IFDivStartE|FDivBusyE, UMux, U[0]);
flopen #(`DIVb+1) UMReg(clk, IFDivStartE|FDivBusyE, UMMux, UM[0]);
@ -103,18 +95,18 @@ module fdivsqrtiter(
assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10;
assign initC = {initCUpper, {`DIVb{1'b0}}};
mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, IFDivStartE, CMux);
flopen #(`DIVb+2) cflop(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
flopen #(`DIVb+2) creg(clk, IFDivStartE|FDivBusyE, CMux, C[0]);
// Divisior register
flopen #(`DIVN-1) dflop(clk, IFDivStartE, Dpreproc, D);
flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D);
// Divisor Selections
// - choose the negitive version of what's being selected
// - D is only the fraction
assign DBar = {3'b111, 1'b0, ~D, {`DIVb-`DIVN+1{1'b1}}};
// - D is a 0.b mantissa
assign DBar = {3'b111, 1'b0, ~D};
if(`RADIX == 4) begin : d2
assign DBar2 = {2'b11, 1'b0, ~D, {`DIVb+2-`DIVN{1'b1}}};
assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}};
assign DBar2 = {2'b11, 1'b0, ~D, 1'b1};
assign D2 = {2'b0, 1'b1, D, 1'b0};
end
// k=DIVCOPIES of the recurrence logic

View File

@ -32,8 +32,8 @@
module fdivsqrtpostproc(
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVN-2:0] D, // U0.N-1
input logic [`DIVb:0] FirstU, FirstUM,
input logic [`DIVb-1:0] D,
input logic [`DIVb:0] FirstU, FirstUM,
input logic [`DIVb+1:0] FirstC,
input logic Firstun,
input logic SqrtM,
@ -41,12 +41,12 @@ module fdivsqrtpostproc(
input logic [`XLEN-1:0] ForwardedSrcAE,
input logic RemOpM, ALTBM, BZero, As,
input logic [`DIVBLEN:0] n, m,
output logic [`DIVb:0] QmM,
output logic [`DIVb:0] QmM,
output logic WZero,
output logic DivSM
);
logic [`DIVb+3:0] W, Sum, RemD;
logic [`DIVb+3:0] W, Sum, RemDM;
logic [`DIVb:0] PreQmM;
logic NegStickyM, PostIncM;
logic weq0;
@ -78,14 +78,14 @@ module fdivsqrtpostproc(
assign Sum = WC + WS;
assign W = $signed(Sum) >>> `LOGR;
assign NegStickyM = W[`DIVb+3];
assign RemD = {4'b0000, D, {(`DIVb-`DIVN+1){1'b0}}};
assign RemDM = {4'b0000, D};
// Integer division: sign handling for div and rem
always_comb
if (~As)
if (NegStickyM) begin
NormQuotM = FirstUM;
NormRemM = W + RemD;
NormRemM = W + RemDM;
PostIncM = 0;
end else begin
NormQuotM = FirstU;
@ -99,7 +99,7 @@ module fdivsqrtpostproc(
PostIncM = 0;
end else begin
NormQuotM = FirstU;
NormRemM = W - RemD;
NormRemM = W - RemDM;
PostIncM = 1;
end

View File

@ -45,22 +45,21 @@ module fdivsqrtpreproc (
output logic OTFCSwap, ALTBM, BZero, As,
output logic [`NE+1:0] QeM,
output logic [`DIVb+3:0] X,
output logic [`DIVN-2:0] Dpreproc
output logic [`DIVb-1:0] DPreproc
);
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`NF-1:0] PreprocA, PreprocX;
logic [`NF-1:0] PreprocB, PreprocY;
logic [`NF+1:0] SqrtX;
logic [`DIVb-1:0] XPreproc;
logic [`DIVb:0] SqrtX;
logic [`DIVb+3:0] DivX;
logic [`NE+1:0] Qe;
logic [`NE+1:0] QeE;
// Intdiv signals
logic [`DIVb-1:0] ZeroBufX, ZeroBufY;
logic [`DIVb-1:0] IFNormLenX, IFNormLenD;
logic [`XLEN-1:0] PosA, PosB;
logic Bs, OTFCSwapTemp, ALTBE;
logic Bs, CalcOTFCSwap, ALTBE;
logic [`XLEN-1:0] A64, B64;
logic [`DIVBLEN:0] Calcn, Calcm;
logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
logic [`DIVBLEN:0] pPlusr, pPrCeil, p, L;
logic [`DIVBLEN:0] pPlusr, pPrCeil, p, ell;
logic [`LOGRK-1:0] pPrTrunc;
logic [`DIVb+3:0] PreShiftX;
@ -72,39 +71,38 @@ module fdivsqrtpreproc (
assign A64 = W64E ? {{(`XLEN-32){As}}, ForwardedSrcAE[31:0]} : ForwardedSrcAE;
assign B64 = W64E ? {{(`XLEN-32){Bs}}, ForwardedSrcBE[31:0]} : ForwardedSrcBE;
assign OTFCSwapTemp = (As ^ Bs) & MDUE;
assign CalcOTFCSwap = (As ^ Bs) & MDUE;
assign PosA = As ? -A64 : A64;
assign PosB = Bs ? -B64 : B64;
assign BZero = |ForwardedSrcBE;
assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}};
assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}};
lzc #(`DIVb) lzcX (ZeroBufX, L);
lzc #(`DIVb) lzcY (ZeroBufY, Calcm);
assign IFNormLenX = MDUE ? {PosA, {(`DIVb-`XLEN){1'b0}}} : {Xm, {(`DIVb-`NF-1){1'b0}}};
assign IFNormLenD = MDUE ? {PosB, {(`DIVb-`XLEN){1'b0}}} : {Ym, {(`DIVb-`NF-1){1'b0}}};
lzc #(`DIVb) lzcX (IFNormLenX, ell);
lzc #(`DIVb) lzcY (IFNormLenD, Calcm);
assign PreprocX = Xm[`NF-1:0]<<L;
assign PreprocY = Ym[`NF-1:0]<<Calcm;
assign XPreproc = IFNormLenX << (ell + {{`DIVBLEN{1'b0}}, ~MDUE}); // had issue with (`DIVBLEN+1)'(~MDUE) so using this instead
assign DPreproc = IFNormLenD << (Calcm + {{`DIVBLEN{1'b0}}, ~MDUE});
assign ZeroDiff = Calcm - L;
assign ZeroDiff = Calcm - ell;
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B
assign p = ALTBE ? '0 : ZeroDiff;
assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
assign pPrTrunc = pPlusr[`LOGRK-1:0];
assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)};
assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN{1'b0}}, |(pPrTrunc)};
assign Calcn = (pPrCeil << `LOGK) - 1;
assign IntBits = (`DIVBLEN)'(`RK) + p;
assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]};
assign SqrtX = Xe[0]^L[0] ? {1'b0, ~XZero, PreprocX} : {~XZero, PreprocX, 1'b0};
assign DivX = {3'b000, ~XZero, PreprocX, {`DIVb-`NF{1'b0}}};
assign SqrtX = (Xe[0]^ell[0]) ? {1'b0, ~XZero, XPreproc[`DIVb-1:1]} : {~XZero, XPreproc}; // Bottom bit of XPreproc is always zero because DIVb is larger than XLEN and NF
assign DivX = {3'b000, ~XZero, XPreproc};
// *** explain why X is shifted between radices (initial assignment of WS=RX)
if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX, {`DIVb-1-`NF{1'b0}}} : DivX;
else assign PreShiftX = Sqrt ? {2'b11, SqrtX, {`DIVb-1-`NF{1'b0}}, 1'b0} : DivX;
if (`RADIX == 2) assign PreShiftX = Sqrt ? {3'b111, SqrtX} : DivX;
else assign PreShiftX = Sqrt ? {2'b11, SqrtX, 1'b0} : DivX;
assign X = MDUE ? DivX >> RightShiftX : PreShiftX;
assign Dpreproc = {PreprocY, {`DIVN-1-`NF{1'b0}}};
// radix 2 radix 4
// 1 copies DIVLEN+2 DIVLEN+2/2
@ -116,12 +114,12 @@ module fdivsqrtpreproc (
// r = 1 or 2
// DIVRESLEN/(r*`DIVCOPIES)
flopen #(`NE+2) expreg(clk, IFDivStartE, Qe, QeM);
flopen #(1) swapreg(clk, IFDivStartE, OTFCSwapTemp, OTFCSwap);
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
flopen #(1) swapreg(clk, IFDivStartE, CalcOTFCSwap, OTFCSwap);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, Calcn, n);
flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, Calcm, m);
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe);
expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .ell, .m(Calcm), .Qe(QeE));
endmodule
@ -130,7 +128,7 @@ module expcalc(
input logic [`NE-1:0] Xe, Ye,
input logic Sqrt,
input logic XZero,
input logic [`DIVBLEN:0] L, m,
input logic [`DIVBLEN:0] ell, m,
output logic [`NE+1:0] Qe
);
logic [`NE-2:0] Bias;
@ -162,10 +160,10 @@ module expcalc(
2'h2: Bias = (`NE-1)'(`H_BIAS);
endcase
end
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - (`NE+2)'(`BIAS);
assign SXExp = {2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - (`NE+2)'(`BIAS);
assign SExp = {SXExp[`NE+1], SXExp[`NE+1:1]} + {2'b0, Bias};
// correct exponent for denormalized input's normalization shifts
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, L} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
assign DExp = ({2'b0, Xe} - {{(`NE+1-`DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(`NE+1-`DIVBLEN){1'b0}}, m} + {3'b0, Bias}) & {`NE+2{~XZero}};
assign Qe = Sqrt ? SExp : DExp;
endmodule

View File

@ -32,7 +32,7 @@
/* verilator lint_off UNOPTFLAT */
module fdivsqrtstage2 (
input logic [`DIVN-2:0] D,
input logic [`DIVb-1:0] D,
input logic [`DIVb+3:0] DBar,
input logic [`DIVb:0] U, UM,
input logic [`DIVb+3:0] WS, WC,
@ -69,7 +69,7 @@ module fdivsqrtstage2 (
always_comb
if (up) Dsel = DBar;
else if (uz) Dsel = '0; // qz
else Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; // un
else Dsel = {3'b000, 1'b1, D}; // un
// Partial Product Generation
// WSA, WCA = WS + WC - qD

View File

@ -31,7 +31,7 @@
`include "wally-config.vh"
module fdivsqrtstage4 (
input logic [`DIVN-2:0] D,
input logic [`DIVb-1:0] D,
input logic [`DIVb+3:0] DBar, D2, DBar2,
input logic [`DIVb:0] U, UM,
input logic [`DIVb+3:0] WS, WC,
@ -61,7 +61,7 @@ module fdivsqrtstage4 (
// 0010 = -1
// 0001 = -2
assign Smsbs = U[`DIVb:`DIVb-4];
assign Dmsbs = D[`DIVN-2:`DIVN-4];
assign Dmsbs = D[`DIVb-1:`DIVb-3];
assign WCmsbs = WC[`DIVb+3:`DIVb-4];
assign WSmsbs = WS[`DIVb+3:`DIVb-4];
@ -77,7 +77,7 @@ module fdivsqrtstage4 (
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = '0;
4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}};
4'b0010: Dsel = {3'b0, 1'b1, D};
4'b0001: Dsel = D2;
default: Dsel = 'x;
endcase

View File

@ -1413,6 +1413,7 @@ string imperas32f[] = '{
string arch32f[] = '{
`RISCVARCHTEST,
"rv32i_m/F/src/fdiv_b20-01.S",
"rv32i_m/F/src/fadd_b10-01.S",
"rv32i_m/F/src/fadd_b1-01.S",
"rv32i_m/F/src/fadd_b11-01.S",