cvw/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
2023-05-26 14:25:14 -05:00

209 lines
9.3 KiB
Systemverilog

///////////////////////////////////////////
// fdivsqrtpreproc.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Divide/Square root preprocessing: integer absolute value and W64, normalization shift
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic IFDivStartE,
input logic [P.NF:0] Xm, Ym,
input logic [P.NE-1:0] Xe, Ye,
input logic [P.FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic XZeroE,
input logic [2:0] Funct3E,
output logic [P.NE+1:0] QeM,
output logic [P.DIVb+3:0] X, D,
// Int-specific
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic IntDivE, W64E,
output logic ISpecialCaseE,
output logic [P.DURLEN-1:0] CyclesE,
output logic [P.DIVBLEN:0] nM, mM,
output logic NegQuotM, ALTBM, IntDivM, W64M,
output logic AsM, BZeroM,
output logic [P.XLEN-1:0] AM
);
logic [P.DIVb-1:0] Xfract, Dfract;
logic [P.DIVb:0] PreSqrtX;
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
logic NumerZeroE; // Numerator is zero (X or A)
logic AZeroE, BZeroE; // A or B is Zero for integer division
logic SignedDivE; // signed division
logic NegQuotE; // Integer quotient is negative
logic AsE, BsE; // Signs of integer inputs
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
logic ALTBE;
//////////////////////////////////////////////////////
// Integer Preprocessing
//////////////////////////////////////////////////////
if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
logic [P.XLEN-1:0] BE, PosA, PosB;
// Extract inputs, signs, zero, depending on W64 mode if applicable
assign SignedDivE = ~Funct3E[0];
// Source handling
if (P.XLEN==64) begin // 64-bit, supports W64
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
end else begin // 32 bits only
assign AE = ForwardedSrcAE;
assign BE = ForwardedSrcBE;
end
assign AZeroE = ~(|AE);
assign BZeroE = ~(|BE);
assign AsE = AE[P.XLEN-1] & SignedDivE;
assign BsE = BE[P.XLEN-1] & SignedDivE;
assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
// Force integer inputs to be postiive
mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
// Select integer or floating point inputs
mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
end else begin // Int not supported
assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
assign NumerZeroE = XZeroE;
end
//////////////////////////////////////////////////////
// Integer & FP leading zero and normalization shift
//////////////////////////////////////////////////////
// count leading zeros for Subnorm FP and to normalize integer inputs
lzc #(P.DIVb) lzcX (IFX, ell);
lzc #(P.DIVb) lzcY (IFD, mE);
// Normalization shift: shift off leading one
assign Xfract = (IFX << ell) << 1;
assign Dfract = (IFD << mE) << 1;
// *** CT: move to fdivsqrtintpreshift
//////////////////////////////////////////////////////
// Integer Right Shift to digit boundary
// Determine DivXShifted (X shifted to digit boundary)
// and nE (number of fractional digits)
//////////////////////////////////////////////////////
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
logic [P.DIVBLEN:0] ZeroDiff, p;
// calculate number of fractional bits p
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
// Integer special cases (terminate immediately)
assign ISpecialCaseE = BZeroE | ALTBE;
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
if (P.LOGRK > 0) begin // more than 1 bit per cycle
logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
/* verilator lint_off WIDTH */
assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
/* verilator lint_on WIDTH */
end else begin // radix 2 1 copy doesn't require shifting
assign nE = p;
assign DivXShifted = DivX;
end
end else begin
assign ISpecialCaseE = 0;
end
// CT *** fdivsqrtfplead1
//////////////////////////////////////////////////////
// Floating-Point Preprocessing
// append leading 1 (for nonzero inputs)
// shift square root to be in range [1/4, 1)
// Normalized numbers are shifted right by 1 if the exponent is odd
// Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
//////////////////////////////////////////////////////
assign DivX = {3'b000, ~NumerZeroE, Xfract};
// Sqrt is initialized on step one as R(X-1), so depends on Radix
mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
//////////////////////////////////////////////////////
// Selet integer or floating-point operands
//////////////////////////////////////////////////////
if (P.IDIV_ON_FPU) begin
mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
end else begin
assign X = PreShiftX;
end
// Divisior register
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
// Floating-point exponent
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
// Number of FSM cycles (to FSM)
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
if (P.IDIV_ON_FPU) begin:intpipelineregs
// pipeline registers
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
if (P.XLEN==64)
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
end
endmodule