Merge pull request #256 from cturek/main

Simplifying fds to follow diagram
This commit is contained in:
David Harris 2023-04-20 16:07:22 -07:00 committed by GitHub
commit 94d1533264
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 133 additions and 108 deletions

View File

@ -57,43 +57,43 @@ module fdivsqrt(
logic [`DIVb+3:0] WS, WC; // Partial remainder components
logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend)
logic [`DIVb-1:0] DPreproc, D; // Iterator Divisor
logic [`DIVb+3:0] D; // Iterator Divisor
logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values
logic [`DIVb+1:0] FirstC; // Step tracker
logic Firstun; // Quotient selection
logic WZeroE; // Early termination flag
logic [`DURLEN-1:0] cycles; // FSM cycles
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
logic DivStartE; // Enable signal for flops during stall
// Integer div/rem signals
logic BZeroM; // Denominator is zero
logic IntDivM; // Integer operation
logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts
logic [`DIVBLEN:0] nM, mM; // Shift amounts
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
logic [`XLEN-1:0] AM; // Original Numerator for postprocessor
logic ISpecialCaseE; // Integer div/remainder special cases
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
.Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E,
.QeM, .X, .DPreproc,
fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles,
// Int-specific
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
.nE, .BZeroM, .nM, .mM, .AM,
.BZeroM, .nM, .mM, .AM,
.IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
fdivsqrtfsm fdivsqrtfsm( // FSM
.clk, .reset, .FmtE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
fdivsqrtfsm fdivsqrtfsm( // FSM
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM,
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles,
// Int-specific
.IDivStartE, .ISpecialCaseE, .nE, .IntDivE);
.IDivStartE, .ISpecialCaseE, .IntDivE);
fdivsqrtiter fdivsqrtiter( // CSA Iterator
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .DPreproc,
.D, .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
fdivsqrtiter fdivsqrtiter( // CSA Iterator
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
.FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor
fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
.QmM, .WZeroE, .DivStickyM,

View File

@ -0,0 +1,76 @@
///////////////////////////////////////////
// fdivsqrt.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu
// Modified: 18 April 2022
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module fdivsqrtcycles(
input logic [`FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic IntDivE,
input logic [`DIVBLEN:0] nE,
output logic [`DURLEN-1:0] cycles
);
logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
// DIVN = `NF+3
// NS = NF + 1
// N = NS or NS+2 for div/sqrt.
/* verilator lint_off WIDTH */
if (`FPSIZES == 1)
assign Nf = `NF;
else if (`FPSIZES == 2)
always_comb
case (FmtE)
1'b0: Nf = `NF1;
1'b1: Nf = `NF;
endcase
else if (`FPSIZES == 3)
always_comb
case (FmtE)
`FMT: Nf = `NF;
`FMT1: Nf = `NF1;
`FMT2: Nf = `NF2;
endcase
else if (`FPSIZES == 4)
always_comb
case(FmtE)
`S_FMT: Nf = `S_NF;
`D_FMT: Nf = `D_NF;
`H_FMT: Nf = `H_NF;
`Q_FMT: Nf = `Q_NF;
endcase
always_comb begin
if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
end
/* verilator lint_on WIDTH */
endmodule

View File

@ -29,32 +29,27 @@
`include "wally-config.vh"
module fdivsqrtfsm(
input logic clk,
input logic reset,
input logic [`FMTBITS-1:0] FmtE,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic FDivStartE, IDivStartE,
input logic XsE,
input logic SqrtE,
input logic StallM,
input logic FlushE,
input logic WZeroE,
input logic IntDivE,
input logic [`DIVBLEN:0] nE,
input logic ISpecialCaseE,
output logic IFDivStartE,
output logic FDivBusyE, FDivDoneE,
output logic SpecialCaseM
input logic clk, reset,
input logic XInfE, YInfE,
input logic XZeroE, YZeroE,
input logic XNaNE, YNaNE,
input logic FDivStartE, IDivStartE,
input logic XsE, WZeroE,
input logic SqrtE,
input logic StallM, FlushE,
input logic IntDivE,
input logic ISpecialCaseE,
input logic [`DURLEN-1:0] cycles,
output logic IFDivStartE,
output logic FDivBusyE, FDivDoneE,
output logic SpecialCaseM
);
typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
statetype state;
logic [`DURLEN-1:0] step;
logic [`DURLEN-1:0] cycles;
logic SpecialCaseE, FSpecialCaseE;
logic [`DURLEN-1:0] step;
// FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division
assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
@ -67,47 +62,6 @@ module fdivsqrtfsm(
else assign SpecialCaseE = FSpecialCaseE;
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
// DIVN = `NF+3
// NS = NF + 1
// N = NS or NS+2 for div/sqrt.
// *** CT 4/13/23 move cycles calculation back to preprocesor
/* verilator lint_off WIDTH */
logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
if (`FPSIZES == 1)
assign Nf = `NF;
else if (`FPSIZES == 2)
always_comb
case (FmtE)
1'b0: Nf = `NF1;
1'b1: Nf = `NF;
endcase
else if (`FPSIZES == 3)
always_comb
case (FmtE)
`FMT: Nf = `NF;
`FMT1: Nf = `NF1;
`FMT2: Nf = `NF2;
endcase
else if (`FPSIZES == 4)
always_comb
case(FmtE)
`S_FMT: Nf = `S_NF;
`D_FMT: Nf = `D_NF;
`H_FMT: Nf = `H_NF;
`Q_FMT: Nf = `Q_NF;
endcase
always_comb begin
if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
end
/* verilator lint_on WIDTH */
always_ff @(posedge clk) begin
if (reset | FlushE) begin
state <= #1 IDLE;

View File

@ -33,9 +33,7 @@ module fdivsqrtiter(
input logic IFDivStartE,
input logic FDivBusyE,
input logic SqrtE,
input logic [`DIVb+3:0] X,
input logic [`DIVb-1:0] DPreproc,
output logic [`DIVb-1:0] D,
input logic [`DIVb+3:0] X, D,
output logic [`DIVb:0] FirstU, FirstUM,
output logic [`DIVb+1:0] FirstC,
output logic Firstun,
@ -95,16 +93,11 @@ module fdivsqrtiter(
mux2 #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC);
flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
// Divisior register
flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D);
// Divisor Selections
// - choose the negitive version of what's being selected
// - D is a 0.b mantissa
assign DBar = {3'b111, 1'b0, ~D};
assign DBar = ~D; // for -D
if(`RADIX == 4) begin : d2
assign DBar2 = {2'b11, 1'b0, ~D, 1'b1};
assign D2 = {2'b0, 1'b1, D, 1'b0};
assign D2 = D << 1; // for 2D, only used in R4
assign DBar2 = ~D2; // for -2D, only used in R4
end
// k=DIVCOPIES of the recurrence logic

View File

@ -32,7 +32,7 @@ module fdivsqrtpostproc(
input logic clk, reset,
input logic StallM,
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb-1:0] D,
input logic [`DIVb+3:0] D,
input logic [`DIVb:0] FirstU, FirstUM,
input logic [`DIVb+1:0] FirstC,
input logic SqrtE,
@ -46,7 +46,7 @@ module fdivsqrtpostproc(
output logic [`XLEN-1:0] FIntDivResultM
);
logic [`DIVb+3:0] W, Sum, DM;
logic [`DIVb+3:0] W, Sum;
logic [`DIVb:0] PreQmM;
logic NegStickyM;
logic weq0E, WZeroM;
@ -67,7 +67,7 @@ module fdivsqrtpostproc(
assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
assign FZeroSqrtE = {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root
assign FZeroDivE = {3'b001,D,1'b0}; // F for divide
assign FZeroDivE = D << 1; // F for divide
mux2 #(`DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
@ -102,11 +102,10 @@ module fdivsqrtpostproc(
logic signed [`DIVb+3:0] PreResultM, PreIntResultM;
assign W = $signed(Sum) >>> `LOGR;
assign DM = {4'b0001, D};
assign UnsignedQuotM = {3'b000, PreQmM};
// Integer remainder: sticky and sign correction muxes
mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM);
mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);

View File

@ -33,29 +33,29 @@ module fdivsqrtpreproc (
input logic IFDivStartE,
input logic [`NF:0] Xm, Ym,
input logic [`NE-1:0] Xe, Ye,
input logic [`FMTBITS-1:0] Fmt,
input logic Sqrt,
input logic [`FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic XZeroE,
input logic [2:0] Funct3E,
output logic [`NE+1:0] QeM,
output logic [`DIVb+3:0] X,
output logic [`DIVb-1:0] DPreproc,
output logic [`DIVb+3:0] X, D,
// Int-specific
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic IntDivE, W64E,
output logic ISpecialCaseE,
output logic [`DIVBLEN:0] nE, nM, mM,
output logic [`DURLEN-1:0] cycles,
output logic [`DIVBLEN:0] nM, mM,
output logic NegQuotM, ALTBM, IntDivM, W64M,
output logic AsM, BZeroM,
output logic [`XLEN-1:0] AM
);
logic [`DIVb-1:0] XPreproc;
logic [`DIVb-1:0] XPreproc, DPreproc;
logic [`DIVb:0] PreSqrtX;
logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
logic [`NE+1:0] QeE; // Quotient Exponent (FP only)
logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs
logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
logic NumerZeroE; // Numerator is zero (X or A)
logic AZeroE, BZeroE; // A or B is Zero for integer division
logic signedDiv; // signed division
@ -111,7 +111,9 @@ module fdivsqrtpreproc (
// Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
assign DivX = {3'b000, ~NumerZeroE, XPreproc};
// *** CT 4/13/23 Create D output here with leading 1 appended as well, use in the other modules
// Divisior register
flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D);
// ***CT: factor out fdivsqrtcycles
if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
@ -168,10 +170,13 @@ module fdivsqrtpreproc (
// Sqrt is initialized on step one as R(X-1), so depends on Radix
if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
mux2 #(`DIVb+4) prexmux(DivX, SqrtX, Sqrt, PreShiftX);
mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
// Floating-point exponent
fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
// Number of FSM cycles (to FSM)
fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles);
endmodule

View File

@ -30,8 +30,7 @@
/* verilator lint_off UNOPTFLAT */
module fdivsqrtstage2 (
input logic [`DIVb-1:0] D,
input logic [`DIVb+3:0] DBar,
input logic [`DIVb+3:0] D, DBar,
input logic [`DIVb:0] U, UM,
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb+1:0] C,
@ -66,7 +65,7 @@ module fdivsqrtstage2 (
always_comb
if (up) Dsel = DBar;
else if (uz) Dsel = '0;
else Dsel = {4'b0001, D}; // un
else Dsel = D; // un
// Partial Product Generation
// WSA, WCA = WS + WC - qD

View File

@ -29,8 +29,7 @@
`include "wally-config.vh"
module fdivsqrtstage4 (
input logic [`DIVb-1:0] D,
input logic [`DIVb+3:0] DBar, D2, DBar2,
input logic [`DIVb+3:0] D, DBar, D2, DBar2,
input logic [`DIVb:0] U,UM,
input logic [`DIVb+3:0] WS, WC,
input logic [`DIVb+1:0] C,
@ -75,7 +74,7 @@ module fdivsqrtstage4 (
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = '0;
4'b0010: Dsel = {3'b0, 1'b1, D};
4'b0010: Dsel = D;
4'b0001: Dsel = D2;
default: Dsel = 'x;
endcase