mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
209 lines
9.3 KiB
Systemverilog
209 lines
9.3 KiB
Systemverilog
///////////////////////////////////////////
|
|
// fdivsqrtpreproc.sv
|
|
//
|
|
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
|
// Modified:13 January 2022
|
|
//
|
|
// Purpose: Divide/Square root preprocessing: integer absolute value and W64, normalization shift
|
|
//
|
|
// Documentation: RISC-V System on Chip Design Chapter 13
|
|
//
|
|
// A component of the CORE-V-WALLY configurable RISC-V project.
|
|
//
|
|
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
|
//
|
|
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
|
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
|
// may obtain a copy of the License at
|
|
//
|
|
// https://solderpad.org/licenses/SHL-2.1/
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
|
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
// and limitations under the License.
|
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|
input logic clk,
|
|
input logic IFDivStartE,
|
|
input logic [P.NF:0] Xm, Ym,
|
|
input logic [P.NE-1:0] Xe, Ye,
|
|
input logic [P.FMTBITS-1:0] FmtE,
|
|
input logic SqrtE,
|
|
input logic XZeroE,
|
|
input logic [2:0] Funct3E,
|
|
output logic [P.NE+1:0] QeM,
|
|
output logic [P.DIVb+3:0] X, D,
|
|
// Int-specific
|
|
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
|
input logic IntDivE, W64E,
|
|
output logic ISpecialCaseE,
|
|
output logic [P.DURLEN-1:0] CyclesE,
|
|
output logic [P.DIVBLEN:0] nM, mM,
|
|
output logic NegQuotM, ALTBM, IntDivM, W64M,
|
|
output logic AsM, BZeroM,
|
|
output logic [P.XLEN-1:0] AM
|
|
);
|
|
|
|
logic [P.DIVb-1:0] Xfract, Dfract;
|
|
logic [P.DIVb:0] PreSqrtX;
|
|
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
|
logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
|
|
logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
|
logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
|
|
logic NumerZeroE; // Numerator is zero (X or A)
|
|
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
|
logic SignedDivE; // signed division
|
|
logic NegQuotE; // Integer quotient is negative
|
|
logic AsE, BsE; // Signs of integer inputs
|
|
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
|
|
logic ALTBE;
|
|
|
|
//////////////////////////////////////////////////////
|
|
// Integer Preprocessing
|
|
//////////////////////////////////////////////////////
|
|
|
|
if (P.IDIV_ON_FPU) begin:intpreproc // Int Supported
|
|
logic [P.XLEN-1:0] BE, PosA, PosB;
|
|
|
|
// Extract inputs, signs, zero, depending on W64 mode if applicable
|
|
assign SignedDivE = ~Funct3E[0];
|
|
|
|
// Source handling
|
|
if (P.XLEN==64) begin // 64-bit, supports W64
|
|
mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
|
|
mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
|
|
end else begin // 32 bits only
|
|
assign AE = ForwardedSrcAE;
|
|
assign BE = ForwardedSrcBE;
|
|
end
|
|
assign AZeroE = ~(|AE);
|
|
assign BZeroE = ~(|BE);
|
|
assign AsE = AE[P.XLEN-1] & SignedDivE;
|
|
assign BsE = BE[P.XLEN-1] & SignedDivE;
|
|
assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
|
|
|
|
// Force integer inputs to be postiive
|
|
mux2 #(P.XLEN) posamux(AE, -AE, AsE, PosA);
|
|
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
|
|
|
|
// Select integer or floating point inputs
|
|
mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
|
|
mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
|
|
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
|
|
end else begin // Int not supported
|
|
assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
|
|
assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
|
|
assign NumerZeroE = XZeroE;
|
|
end
|
|
|
|
//////////////////////////////////////////////////////
|
|
// Integer & FP leading zero and normalization shift
|
|
//////////////////////////////////////////////////////
|
|
|
|
// count leading zeros for Subnorm FP and to normalize integer inputs
|
|
lzc #(P.DIVb) lzcX (IFX, ell);
|
|
lzc #(P.DIVb) lzcY (IFD, mE);
|
|
|
|
// Normalization shift: shift off leading one
|
|
assign Xfract = (IFX << ell) << 1;
|
|
assign Dfract = (IFD << mE) << 1;
|
|
|
|
// *** CT: move to fdivsqrtintpreshift
|
|
|
|
//////////////////////////////////////////////////////
|
|
// Integer Right Shift to digit boundary
|
|
// Determine DivXShifted (X shifted to digit boundary)
|
|
// and nE (number of fractional digits)
|
|
//////////////////////////////////////////////////////
|
|
|
|
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
|
|
logic [P.DIVBLEN:0] ZeroDiff, p;
|
|
|
|
// calculate number of fractional bits p
|
|
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
|
assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
|
|
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
|
|
|
|
// Integer special cases (terminate immediately)
|
|
assign ISpecialCaseE = BZeroE | ALTBE;
|
|
|
|
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
|
|
|
|
if (P.LOGRK > 0) begin // more than 1 bit per cycle
|
|
logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
|
|
logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
|
|
/* verilator lint_off WIDTH */
|
|
assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
|
assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
|
|
assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
|
|
assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
|
|
assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
|
|
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
|
|
/* verilator lint_on WIDTH */
|
|
end else begin // radix 2 1 copy doesn't require shifting
|
|
assign nE = p;
|
|
assign DivXShifted = DivX;
|
|
end
|
|
end else begin
|
|
assign ISpecialCaseE = 0;
|
|
end
|
|
|
|
// CT *** fdivsqrtfplead1
|
|
|
|
//////////////////////////////////////////////////////
|
|
// Floating-Point Preprocessing
|
|
// append leading 1 (for nonzero inputs)
|
|
// shift square root to be in range [1/4, 1)
|
|
// Normalized numbers are shifted right by 1 if the exponent is odd
|
|
// Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
|
|
//////////////////////////////////////////////////////
|
|
|
|
assign DivX = {3'b000, ~NumerZeroE, Xfract};
|
|
|
|
// Sqrt is initialized on step one as R(X-1), so depends on Radix
|
|
mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
|
|
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
|
|
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
|
|
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
|
|
|
//////////////////////////////////////////////////////
|
|
// Selet integer or floating-point operands
|
|
//////////////////////////////////////////////////////
|
|
|
|
if (P.IDIV_ON_FPU) begin
|
|
mux2 #(P.DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
|
|
end else begin
|
|
assign X = PreShiftX;
|
|
end
|
|
|
|
// Divisior register
|
|
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
|
|
|
|
// Floating-point exponent
|
|
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
|
|
flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
|
|
|
|
// Number of FSM cycles (to FSM)
|
|
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
|
|
|
|
if (P.IDIV_ON_FPU) begin:intpipelineregs
|
|
// pipeline registers
|
|
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
|
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
|
flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
|
|
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
|
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
|
flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
|
flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
|
|
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
|
if (P.XLEN==64)
|
|
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
|
end
|
|
|
|
endmodule
|
|
|