mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Handle special case Int Div/Rem of |A| < |B| in a single cycle
This commit is contained in:
parent
c653f1b30f
commit
499b52a7f0
@ -24,7 +24,7 @@
|
|||||||
|
|
||||||
// division constants
|
// division constants
|
||||||
`define RADIX 32'h2
|
`define RADIX 32'h2
|
||||||
`define DIVCOPIES 32'h2
|
`define DIVCOPIES 32'h1
|
||||||
|
|
||||||
// Memory synthesis configuration
|
// Memory synthesis configuration
|
||||||
`define USE_SRAM 0
|
`define USE_SRAM 0
|
||||||
|
@ -68,20 +68,20 @@ module fdivsqrt(
|
|||||||
logic DivStartE; // Enable signal for flops during stall
|
logic DivStartE; // Enable signal for flops during stall
|
||||||
|
|
||||||
// Integer div/rem signals
|
// Integer div/rem signals
|
||||||
logic AZeroE, BZeroE; // Numerator/Denominator is zero (Execute)
|
logic BZeroE, BZeroM; // Denominator is zero
|
||||||
logic AZeroM, BZeroM; // Numerator/Denominator is zero (Memory)
|
|
||||||
logic MDUM; // Integer operation
|
logic MDUM; // Integer operation
|
||||||
logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts
|
logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts
|
||||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||||
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor
|
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||||
|
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||||
|
|
||||||
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
|
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
|
||||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||||
.Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E,
|
.Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E,
|
||||||
.QeM, .X, .DPreproc,
|
.QeM, .X, .DPreproc,
|
||||||
// Int-specific
|
// Int-specific
|
||||||
.ForwardedSrcAE, .ForwardedSrcBE, .MDUE, .W64E,
|
.ForwardedSrcAE, .ForwardedSrcBE, .MDUE, .W64E, .ISpecialCaseE,
|
||||||
.AZeroE, .BZeroE, .nE, .AZeroM, .BZeroM, .nM, .mM, .AM,
|
.BZeroE, .nE, .BZeroM, .nM, .mM, .AM,
|
||||||
.MDUM, .W64M, .NegQuotM, .ALTBM, .AsM);
|
.MDUM, .W64M, .NegQuotM, .ALTBM, .AsM);
|
||||||
|
|
||||||
fdivsqrtfsm fdivsqrtfsm( // FSM
|
fdivsqrtfsm fdivsqrtfsm( // FSM
|
||||||
@ -89,7 +89,7 @@ module fdivsqrt(
|
|||||||
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
|
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
|
||||||
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM,
|
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM,
|
||||||
// Int-specific
|
// Int-specific
|
||||||
.IDivStartE, .AZeroE, .BZeroE, .nE, .MDUE);
|
.IDivStartE, .BZeroE, .ISpecialCaseE, .nE, .MDUE);
|
||||||
|
|
||||||
fdivsqrtiter fdivsqrtiter( // CSA Iterator
|
fdivsqrtiter fdivsqrtiter( // CSA Iterator
|
||||||
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .DPreproc,
|
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .DPreproc,
|
||||||
|
@ -36,7 +36,7 @@ module fdivsqrtfsm(
|
|||||||
input logic [`FMTBITS-1:0] FmtE,
|
input logic [`FMTBITS-1:0] FmtE,
|
||||||
input logic XInfE, YInfE,
|
input logic XInfE, YInfE,
|
||||||
input logic XZeroE, YZeroE,
|
input logic XZeroE, YZeroE,
|
||||||
input logic AZeroE, BZeroE,
|
input logic BZeroE,
|
||||||
input logic XNaNE, YNaNE,
|
input logic XNaNE, YNaNE,
|
||||||
input logic FDivStartE, IDivStartE,
|
input logic FDivStartE, IDivStartE,
|
||||||
input logic XsE,
|
input logic XsE,
|
||||||
@ -46,6 +46,7 @@ module fdivsqrtfsm(
|
|||||||
input logic WZeroE,
|
input logic WZeroE,
|
||||||
input logic MDUE,
|
input logic MDUE,
|
||||||
input logic [`DIVBLEN:0] nE,
|
input logic [`DIVBLEN:0] nE,
|
||||||
|
input logic ISpecialCaseE,
|
||||||
output logic IFDivStartE,
|
output logic IFDivStartE,
|
||||||
output logic FDivBusyE, FDivDoneE,
|
output logic FDivBusyE, FDivDoneE,
|
||||||
output logic SpecialCaseM
|
output logic SpecialCaseM
|
||||||
@ -65,7 +66,7 @@ module fdivsqrtfsm(
|
|||||||
|
|
||||||
// terminate immediately on special cases
|
// terminate immediately on special cases
|
||||||
assign FSpecialCaseE = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE);
|
assign FSpecialCaseE = XZeroE | (YZeroE&~SqrtE) | XInfE | YInfE | XNaNE | YNaNE | (XsE&SqrtE);
|
||||||
if (`IDIV_ON_FPU) assign SpecialCaseE = MDUE ? BZeroE : FSpecialCaseE;
|
if (`IDIV_ON_FPU) assign SpecialCaseE = MDUE ? ISpecialCaseE : FSpecialCaseE;
|
||||||
else assign SpecialCaseE = FSpecialCaseE;
|
else assign SpecialCaseE = FSpecialCaseE;
|
||||||
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ module fdivsqrtpostproc(
|
|||||||
mux2 #(`DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
|
mux2 #(`DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
|
||||||
mux2 #(`DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
|
mux2 #(`DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
|
||||||
|
|
||||||
if (`IDIV_ON_FPU) begin // Int supported
|
if (`IDIV_ON_FPU) begin:intpostproc // Int supported
|
||||||
logic [`DIVBLEN:0] NormShiftM;
|
logic [`DIVBLEN:0] NormShiftM;
|
||||||
logic [`DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
logic [`DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
||||||
|
|
||||||
@ -121,18 +121,19 @@ module fdivsqrtpostproc(
|
|||||||
NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
|
NormShiftM = ((`DIVBLEN+1)'(`DIVb) - (nM * (`DIVBLEN+1)'(`LOGR)));
|
||||||
PreResultM = NormQuotM;
|
PreResultM = NormQuotM;
|
||||||
end
|
end
|
||||||
PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM);
|
PreFPIntDivResultM = $signed(PreResultM >>> NormShiftM); // *** rename to PreIntResultM?
|
||||||
end
|
end
|
||||||
|
|
||||||
// special case logic
|
// special case logic
|
||||||
|
// terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B|
|
||||||
always_comb
|
always_comb
|
||||||
if (BZeroM) begin // Divide by zero
|
if (BZeroM) begin // Divide by zero
|
||||||
if (RemOpM) SpecialFPIntDivResultM = AM;
|
if (RemOpM) SpecialFPIntDivResultM = AM; // *** rename to IntDivResult?
|
||||||
else SpecialFPIntDivResultM = {(`XLEN){1'b1}};
|
else SpecialFPIntDivResultM = {(`XLEN){1'b1}};
|
||||||
end else if (ALTBM) begin // Numerator is zero
|
end else if (ALTBM) begin // Numerator is zero
|
||||||
if (RemOpM) SpecialFPIntDivResultM = AM;
|
if (RemOpM) SpecialFPIntDivResultM = AM;
|
||||||
else SpecialFPIntDivResultM = '0;
|
else SpecialFPIntDivResultM = '0;
|
||||||
end else SpecialFPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
|
end else SpecialFPIntDivResultM = PreFPIntDivResultM[`XLEN-1:0];
|
||||||
|
|
||||||
// sign extend result for W64
|
// sign extend result for W64
|
||||||
if (`XLEN==64) begin
|
if (`XLEN==64) begin
|
||||||
|
@ -45,9 +45,10 @@ module fdivsqrtpreproc (
|
|||||||
// Int-specific
|
// Int-specific
|
||||||
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||||
input logic MDUE, W64E,
|
input logic MDUE, W64E,
|
||||||
|
output logic ISpecialCaseE,
|
||||||
output logic [`DIVBLEN:0] nE, nM, mM,
|
output logic [`DIVBLEN:0] nE, nM, mM,
|
||||||
output logic NegQuotM, ALTBM, MDUM, W64M,
|
output logic NegQuotM, ALTBM, MDUM, W64M,
|
||||||
output logic AsM, AZeroM, BZeroM, AZeroE, BZeroE,
|
output logic AsM, BZeroM, BZeroE,
|
||||||
output logic [`XLEN-1:0] AM
|
output logic [`XLEN-1:0] AM
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -58,8 +59,9 @@ module fdivsqrtpreproc (
|
|||||||
logic [`DIVb-1:0] IFNormLenX, IFNormLenD; // Correctly-sized inputs for iterator
|
logic [`DIVb-1:0] IFNormLenX, IFNormLenD; // Correctly-sized inputs for iterator
|
||||||
logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs
|
logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs
|
||||||
logic NumerZeroE; // Numerator is zero (X or A)
|
logic NumerZeroE; // Numerator is zero (X or A)
|
||||||
|
logic AZeroE; // A is Zero for integer division
|
||||||
|
|
||||||
if (`IDIV_ON_FPU) begin // Int Supported
|
if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
|
||||||
logic signedDiv, NegQuotE;
|
logic signedDiv, NegQuotE;
|
||||||
logic AsBit, BsBit, AsE, BsE, ALTBE;
|
logic AsBit, BsBit, AsE, BsE, ALTBE;
|
||||||
logic [`XLEN-1:0] AE, BE, PosA, PosB;
|
logic [`XLEN-1:0] AE, BE, PosA, PosB;
|
||||||
@ -98,8 +100,11 @@ module fdivsqrtpreproc (
|
|||||||
|
|
||||||
// calculate number of fractional bits p
|
// calculate number of fractional bits p
|
||||||
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
||||||
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B?
|
assign ALTBE = ZeroDiff[`DIVBLEN]; // A less than B (A has more leading zeros)
|
||||||
mux2 #(`DIVBLEN+1) pmux(ZeroDiff, 0, ALTBE, p);
|
mux2 #(`DIVBLEN+1) pmux(ZeroDiff, {(`DIVBLEN+1){1'b0}}, ALTBE, p); // *** is there a more graceful way to write these constants
|
||||||
|
|
||||||
|
// Integer special cases (terminate immediately)
|
||||||
|
assign ISpecialCaseE = BZeroE | ALTBE;
|
||||||
|
|
||||||
/* verilator lint_off WIDTH */
|
/* verilator lint_off WIDTH */
|
||||||
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
|
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
|
||||||
@ -113,7 +118,7 @@ module fdivsqrtpreproc (
|
|||||||
assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div
|
assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div
|
||||||
assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits
|
assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits
|
||||||
assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount
|
assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount
|
||||||
assign DivXShifted = DivX >> RightShiftX; // shift X to complete in nE steps
|
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
|
||||||
end else begin // radix 2 1 copy doesn't require shifting
|
end else begin // radix 2 1 copy doesn't require shifting
|
||||||
assign nE = p;
|
assign nE = p;
|
||||||
assign DivXShifted = DivX;
|
assign DivXShifted = DivX;
|
||||||
@ -129,7 +134,6 @@ module fdivsqrtpreproc (
|
|||||||
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
||||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||||
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
|
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
|
||||||
flopen #(1) azeroreg(clk, IFDivStartE, AZeroE, AZeroM);
|
|
||||||
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
||||||
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
||||||
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
||||||
|
@ -60,6 +60,6 @@ module forward(
|
|||||||
assign MatchDE = ((Rs1D == RdE) | (Rs2D == RdE)) & (RdE != 5'b0); // Decode-stage instruction source depends on result from execute stage instruction
|
assign MatchDE = ((Rs1D == RdE) | (Rs2D == RdE)) & (RdE != 5'b0); // Decode-stage instruction source depends on result from execute stage instruction
|
||||||
assign FCvtIntStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt
|
assign FCvtIntStallD = FCvtIntE & MatchDE; // FPU to Integer transfers have single-cycle latency except fcvt
|
||||||
assign LoadStallD = (MemReadE|SCE) & MatchDE;
|
assign LoadStallD = (MemReadE|SCE) & MatchDE;
|
||||||
assign MDUStallD = MDUE & MatchDE;
|
assign MDUStallD = MDUE & MatchDE; // Int mult/div is at least two cycle latency, even when coming from the FDIV
|
||||||
assign CSRRdStallD = CSRReadE & MatchDE;
|
assign CSRRdStallD = CSRReadE & MatchDE;
|
||||||
endmodule
|
endmodule
|
||||||
|
Loading…
Reference in New Issue
Block a user