forked from Github_Repos/cvw
Handle special case Int Div/Rem of |A| < |B| in a single cycle
This commit is contained in:
parent
f567577ede
commit
f8af51e07b
@ -24,7 +24,7 @@
|
||||
|
||||
// division constants
|
||||
`define RADIX 32'h2
|
||||
`define DIVCOPIES 32'h2
|
||||
`define DIVCOPIES 32'h1
|
||||
|
||||
// Memory synthesis configuration
|
||||
`define USE_SRAM 0
|
||||
|
@ -68,20 +68,20 @@ module fdivsqrt(
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic AZeroE, BZeroE; // Numerator/Denominator is zero (Execute)
|
||||
logic AZeroM, BZeroM; // Numerator/Denominator is zero (Memory)
|
||||
logic BZeroE, BZeroM; // Denominator is zero
|
||||
logic MDUM; // Integer operation
|
||||
logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
|
||||
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
|
||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||
.Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E,
|
||||
.QeM, .X, .DPreproc,
|
||||
// Int-specific
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .MDUE, .W64E,
|
||||
.AZeroE, .BZeroE, .nE, .AZeroM, .BZeroM, .nM, .mM, .AM,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .MDUE, .W64E, .ISpecialCaseE,
|
||||
.BZeroE, .nE, .BZeroM, .nM, .mM, .AM,
|
||||
.MDUM, .W64M, .NegQuotM, .ALTBM, .AsM);
|
||||
|
||||
fdivsqrtfsm fdivsqrtfsm( // FSM
|
||||
@ -89,7 +89,7 @@ module fdivsqrt(
|
||||
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
|
||||
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM,
|
||||
// Int-specific
|
||||
.IDivStartE, .AZeroE, .BZeroE, .nE, .MDUE);
|
||||
.IDivStartE, .BZeroE, .ISpecialCaseE, .nE, .MDUE);
|
||||
|
||||
fdivsqrtiter fdivsqrtiter( // CSA Iterator
|
||||
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .DPreproc,
|
||||
|
@ -36,7 +36,7 @@ module fdivsqrtfsm(
|
||||
input logic [`FMTBITS-1:0] FmtE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic AZeroE, BZeroE,
|
||||
input logic BZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic XsE,
|
||||
@ -46,6 +46,7 @@ module fdivsqrtfsm(
|
||||
input logic WZeroE,
|
||||
input logic MDUE,
|
||||
input logic [`DIVBLEN:0] nE,
|
||||
input logic ISpecialCaseE,
|
||||
output logic IFDivStartE,
|
||||
output logic FDivBusyE, FDivDoneE,
|
||||
output logic SpecialCaseM
|
||||
@ -65,7 +66,7 @@ module fdivsqrtfsm(
|
||||
|
||||
// terminate immediately on special cases
|
||||
assign FSpecialCaseE = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE);
|
||||
if (`IDIV_ON_FPU) assign SpecialCaseE = MDUE ? BZeroE : FSpecialCaseE;
|
||||
if (`IDIV_ON_FPU) assign SpecialCaseE = MDUE ? ISpecialCaseE : FSpecialCaseE;
|
||||
else assign SpecialCaseE = FSpecialCaseE;
|
||||
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
||||
|
||||
|
@ -99,7 +99,7 @@ module fdivsqrtpostproc(
|
||||
mux2 #(`DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
|
||||
mux2 #(`DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
|
||||
|
||||
if (`IDIV_ON_FPU) begin // Int supported
|
||||
if (`IDIV_ON_FPU) begin:intpostproc // Int supported
|
||||
logic [`DIVBLEN:0] NormShiftM;
|
||||
logic [`DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
||||
|
||||
@ -121,18 +121,19 @@ module fdivsqrtpostproc(
|
||||
NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
|
||||
PreResultM = NormQuotM;
|
||||
end
|
||||
PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM);
|
||||
PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); // *** rename to PreIntResultM?
|
||||
end
|
||||
|
||||
// special case logic
|
||||
// terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B|
|
||||
always_comb
|
||||
if (BZeroM) begin // Divide by zero
|
||||
if (RemOpM) SpecialFPIntDivResultM = AM;
|
||||
if (RemOpM) SpecialFPIntDivResultM = AM; // *** rename to IntDivResult?
|
||||
else SpecialFPIntDivResultM = {(`XLEN){1'b1}};
|
||||
end else if (ALTBM) begin // Numerator is zero
|
||||
end else if (ALTBM) begin // Numerator is zero
|
||||
if (RemOpM) SpecialFPIntDivResultM = AM;
|
||||
else SpecialFPIntDivResultM = '0;
|
||||
end else SpecialFPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
|
||||
end else SpecialFPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
|
||||
|
||||
// sign extend result for W64
|
||||
if (`XLEN==64) begin
|
||||
|
@ -45,9 +45,10 @@ module fdivsqrtpreproc (
|
||||
// Int-specific
|
||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic MDUE, W64E,
|
||||
output logic ISpecialCaseE,
|
||||
output logic [`DIVBLEN:0] nE, nM, mM,
|
||||
output logic NegQuotM, ALTBM, MDUM, W64M,
|
||||
output logic AsM, AZeroM, BZeroM, AZeroE, BZeroE,
|
||||
output logic AsM, BZeroM, BZeroE,
|
||||
output logic [`XLEN-1:0] AM
|
||||
);
|
||||
|
||||
@ -58,8 +59,9 @@ module fdivsqrtpreproc (
|
||||
logic [`DIVb-1:0] IFNormLenX, IFNormLenD; // Correctly-sized inputs for iterator
|
||||
logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs
|
||||
logic NumerZeroE; // Numerator is zero (X or A)
|
||||
logic AZeroE; // A is Zero for integer division
|
||||
|
||||
if (`IDIV_ON_FPU) begin // Int Supported
|
||||
if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
|
||||
logic signedDiv, NegQuotE;
|
||||
logic AsBit, BsBit, AsE, BsE, ALTBE;
|
||||
logic [`XLEN-1:0] AE, BE, PosA, PosB;
|
||||
@ -98,8 +100,11 @@ module fdivsqrtpreproc (
|
||||
|
||||
// calculate number of fractional bits p
|
||||
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
||||
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B?
|
||||
mux2 #(`DIVBLEN+1) pmux(ZeroDiff, 0, ALTBE, p);
|
||||
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B (A has more leading zeros)
|
||||
mux2 #(`DIVBLEN+1) pmux(ZeroDiff, {(`DIVBLEN+1){1'b0}}, ALTBE, p); // *** is there a more graceful way to write these constants
|
||||
|
||||
// Integer special cases (terminate immediately)
|
||||
assign ISpecialCaseE = BZeroE | ALTBE;
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
|
||||
@ -113,7 +118,7 @@ module fdivsqrtpreproc (
|
||||
assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div
|
||||
assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits
|
||||
assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount
|
||||
assign DivXShifted = DivX >> RightShiftX; // shift X to complete in nE steps
|
||||
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
|
||||
end else begin // radix 2 1 copy doesn't require shifting
|
||||
assign nE = p;
|
||||
assign DivXShifted = DivX;
|
||||
@ -129,7 +134,6 @@ module fdivsqrtpreproc (
|
||||
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
|
||||
flopen #(1) azeroreg(clk, IFDivStartE, AZeroE, AZeroM);
|
||||
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
||||
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
||||
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
||||
|
@ -60,6 +60,6 @@ module forward(
|
||||
assign MatchDE = ((Rs1D == RdE) | (Rs2D == RdE)) & (RdE != 5'b0); // Decode-stage instruction source depends on result from execute stage instruction
|
||||
assign FCvtIntStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt
|
||||
assign LoadStallD = (MemReadE|SCE) & MatchDE;
|
||||
assign MDUStallD = MDUE & MatchDE;
|
||||
assign MDUStallD = MDUE & MatchDE; // Int mult/div is at least two cycle latency, even when coming from the FDIV
|
||||
assign CSRRdStallD = CSRReadE & MatchDE;
|
||||
endmodule
|
||||
|
Loading…
Reference in New Issue
Block a user