mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge pull request #374 from kipmacsaigoren/divsqrtrem
divremsqrt initial push
This commit is contained in:
commit
b45655ea16
@ -16,15 +16,6 @@ add wave -noupdate /testbenchfp/ResMatch
|
||||
add wave -noupdate /testbenchfp/FlagMatch
|
||||
add wave -noupdate /testbenchfp/CheckNow
|
||||
add wave -noupdate /testbenchfp/NaNGood
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
|
||||
|
||||
add wave -group {Testbench} -noupdate /testbenchfp/*
|
||||
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
|
||||
|
103
src/fpu/divremsqrt/divremsqrt.sv
Normal file
103
src/fpu/divremsqrt/divremsqrt.sv
Normal file
@ -0,0 +1,103 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrt.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu
|
||||
// Modified:19 May 2023
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrt import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic XsE,
|
||||
input logic [P.NF:0] XmE, YmE,
|
||||
input logic [P.NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic IntDivE, W64E,
|
||||
output logic DivStickyM,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [P.NE+1:0] QeM,
|
||||
output logic [P.DIVb:0] QmM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM
|
||||
);
|
||||
|
||||
// Floating-point division and square root module, with optional integer division and remainder
|
||||
// Computes X/Y, sqrt(X), A/B, or A%B
|
||||
|
||||
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [P.DIVb+3:0] D; // Iterator Divisor
|
||||
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [P.DIVb+1:0] FirstC; // Step tracker
|
||||
logic Firstun; // Quotient selection
|
||||
logic WZeroE; // Early termination flag
|
||||
logic [P.DURLEN-1:0] CyclesE; // FSM cycles
|
||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic IntDivM; // Integer operation
|
||||
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
|
||||
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
|
||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
|
||||
// Int-specific
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
|
||||
.BZeroM, .nM, .mM, .AM,
|
||||
.IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
|
||||
|
||||
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
|
||||
.clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE,
|
||||
.FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM,
|
||||
.FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
|
||||
// Int-specific
|
||||
.IDivStartE, .ISpecialCaseE, .IntDivE);
|
||||
|
||||
fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator
|
||||
.clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D,
|
||||
.FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
|
||||
|
||||
fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
|
||||
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
|
||||
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
|
||||
.QmM, .WZeroE, .DivStickyM,
|
||||
// Int-specific
|
||||
.nM, .mM, .ALTBM, .AsM, .BZeroM, .NegQuotM, .W64M, .RemOpM(Funct3M[1]), .AM,
|
||||
.FIntDivResultM);
|
||||
endmodule
|
||||
|
182
src/fpu/divremsqrt/divremsqrtflags.sv
Normal file
182
src/fpu/divremsqrt/divremsqrtflags.sv
Normal file
@ -0,0 +1,182 @@
|
||||
|
||||
///////////////////////////////////////////
|
||||
// flags.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: Post-Processing flag calculation
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtflags import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // X sign
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn, // is a Inf input being used
|
||||
input logic XInf, YInf, // inputs are infinity
|
||||
input logic NaNIn, // is a NaN input being used
|
||||
input logic XSNaN, YSNaN, // inputs are signaling NaNs
|
||||
input logic XZero, YZero, // inputs are zero
|
||||
input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
input logic [P.NE+1:0] Me, // exponent of the normalized sum
|
||||
// rounding
|
||||
input logic Plus1, // do you add one for rounding
|
||||
input logic Round, Guard, Sticky, // bits used to determine rounding
|
||||
input logic UfPlus1, // do you add one for rounding for the unbounded exponent result
|
||||
// divsqrt
|
||||
input logic DivOp, // conversion opperation?
|
||||
input logic Sqrt, // Sqrt?
|
||||
// flags
|
||||
output logic DivByZero, // divide by zero flag
|
||||
output logic Overflow, // overflow flag to select result
|
||||
output logic Invalid, // invalid flag to select the result
|
||||
output logic [4:0] PostProcFlg // flags
|
||||
);
|
||||
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic Inexact; // final inexact flag
|
||||
logic FpInexact; // floating point inexact flag
|
||||
logic DivInvalid; // integer invalid flag
|
||||
logic Underflow; // Underflow flag
|
||||
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Overflow
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine if the result exponent is greater than or equal to the maximum exponent or
|
||||
// the shift amount is greater than the integers size (for cvt to int)
|
||||
// ShiftGtIntSz calculation:
|
||||
// a left shift of intlen+1 is still in range but any more than that is an overflow
|
||||
// inital: | 64 0's | XLEN |
|
||||
// | 64 0's | XLEN | << 64
|
||||
// | XLEN | 00000... |
|
||||
// 65 = ...0 0 0 0 0 1 0 0 0 0 0 1
|
||||
// | or | | or |
|
||||
// 33 = ...0 0 0 0 0 0 1 0 0 0 0 1
|
||||
// | or | | or |
|
||||
// larger or equal if:
|
||||
// - any of the bits after the most significan 1 is one
|
||||
// - the most signifcant in 65 or 33 is still a one in the number and
|
||||
// one of the later bits is one
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
|
||||
P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
|
||||
P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
|
||||
default: ResExpGteMax = 1'bx;
|
||||
endcase
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
|
||||
P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
|
||||
P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
|
||||
P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
// calulate overflow flag:
|
||||
// if the result is greater than or equal to the max exponent(not taking into account sign)
|
||||
// | and the exponent isn't negitive
|
||||
// | | if the input isnt infinity or NaN
|
||||
// | | |
|
||||
assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Underflow
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// calculate underflow flag: detecting tininess after rounding
|
||||
// the exponent is negitive
|
||||
// | the result is subnormal
|
||||
// | | the result is normal and rounded from a Subnorm
|
||||
// | | | and if given an unbounded exponent the result does not round
|
||||
// | | | | and if the result is not exact
|
||||
// | | | | | and if the input isnt infinity or NaN
|
||||
// | | | | | |
|
||||
assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Inexact
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed res isn't outputed
|
||||
assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid);
|
||||
//assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);
|
||||
|
||||
// if the res is too small to be represented and not 0
|
||||
// | and if the res is not invalid (outside the integer bounds)
|
||||
// | |
|
||||
|
||||
// select the inexact flag to output
|
||||
assign Inexact = FpInexact;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Invalid
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) any input is a signaling NaN
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
|
||||
|
||||
assign SigNaN = (XSNaN) | (YSNaN) ;
|
||||
|
||||
//invalid flag for division
|
||||
assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);
|
||||
|
||||
assign Invalid = SigNaN | (DivInvalid&DivOp);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Divide by Zero
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// if dividing by zero and not 0/0
|
||||
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
|
||||
assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// final flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Combine flags
|
||||
// - to integer results do not set the underflow or overflow flags
|
||||
assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact};
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
181
src/fpu/divremsqrt/divremsqrtpostprocess.sv
Normal file
181
src/fpu/divremsqrt/divremsqrtpostprocess.sv
Normal file
@ -0,0 +1,181 @@
|
||||
///////////////////////////////////////////
|
||||
// postprocess.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu
|
||||
// Modified: 19 May 2023
|
||||
//
|
||||
// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrtpostprocess import cvw::*; #(parameter cvw_t P) (
|
||||
// general signals
|
||||
input logic Xs, Ys, // input signs
|
||||
input logic [P.NF:0] Xm, Ym, // input mantissas
|
||||
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
|
||||
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
|
||||
input logic XZero, YZero, // inputs are zero
|
||||
input logic XInf, YInf, // inputs are infinity
|
||||
input logic XNaN, YNaN, // inputs are NaN
|
||||
input logic XSNaN, YSNaN, // inputs are signaling NaNs
|
||||
input logic [1:0] PostProcSel, // select result to be written to fp register
|
||||
//fma signals
|
||||
//divide signals
|
||||
input logic DivSticky, // divider sticky bit
|
||||
input logic [P.NE+1:0] DivQe, // divsqrt exponent
|
||||
input logic [P.DIVb:0] DivQm, // divsqrt significand
|
||||
// final results
|
||||
output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result
|
||||
output logic [4:0] PostProcFlg // postprocesser flags
|
||||
);
|
||||
|
||||
// general signals
|
||||
logic Rs; // result sign
|
||||
logic [P.NF-1:0] Rf; // Result fraction
|
||||
logic [P.NE-1:0] Re; // Result exponent
|
||||
logic Ms; // norMalized sign
|
||||
logic [P.CORRSHIFTSZ-1:0] Mf; // norMalized fraction
|
||||
logic [P.NE+1:0] Me; // normalized exponent
|
||||
logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount
|
||||
logic [P.NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift
|
||||
logic [P.NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
|
||||
logic Plus1; // add one to the final result?
|
||||
logic Overflow; // overflow flag used to select results
|
||||
logic Invalid; // invalid flag used to select results
|
||||
logic Guard, Round, Sticky; // bits needed to determine rounding
|
||||
logic [P.FMTBITS-1:0] OutFmt; // output format
|
||||
// division singals
|
||||
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
|
||||
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
|
||||
logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
|
||||
logic DivByZero; // divide by zero flag
|
||||
logic DivResSubnorm; // is the divsqrt result subnormal
|
||||
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
|
||||
// conversion signals
|
||||
logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter
|
||||
logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result
|
||||
logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result
|
||||
logic CvtResUf; // did the convert result underflow
|
||||
logic IntInvalid; // invalid integer flag
|
||||
// readability signals
|
||||
logic Mult; // multiply opperation
|
||||
logic Sqrt; // is the divsqrt opperation sqrt
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic Signed; // is the opperation with a signed integer?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic CvtOp; // convertion opperation
|
||||
logic DivOp; // divider opperation
|
||||
logic InfIn; // are any of the inputs infinity
|
||||
logic NaNIn; // are any of the inputs NaN
|
||||
|
||||
// signals to help readability
|
||||
//assign Signed = OpCtrl[0];
|
||||
//assign Int64 = OpCtrl[1];
|
||||
//assign IntToFp = OpCtrl[2];
|
||||
//assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
|
||||
//assign CvtOp = (PostProcSel == 2'b00);
|
||||
//assign FmaOp = (PostProcSel == 2'b10);
|
||||
assign DivOp = (PostProcSel == 2'b01);
|
||||
assign Sqrt = OpCtrl[0];
|
||||
|
||||
// is there an input of infinity or NaN being used
|
||||
assign InfIn = XInf|YInf;
|
||||
assign NaNIn = XNaN|YNaN;
|
||||
|
||||
// choose the ouptut format depending on the opperation
|
||||
// - fp -> fp: OpCtrl contains the percision of the output
|
||||
// - otherwise: Fmt contains the percision of the output
|
||||
if (P.FPSIZES == 2)
|
||||
//assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT);
|
||||
assign OutFmt = Fmt;
|
||||
else if (P.FPSIZES == 3 | P.FPSIZES == 4)
|
||||
//assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
|
||||
assign OutFmt = Fmt;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// final claulations before shifting
|
||||
/*cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,
|
||||
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);*/
|
||||
|
||||
divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||
|
||||
assign ShiftAmt = DivShiftAmt;
|
||||
assign ShiftIn = DivShiftIn;
|
||||
|
||||
// main normalization shift
|
||||
normshift #(P) normshift (.ShiftIn, .ShiftAmt, .Shifted);
|
||||
|
||||
// correct for LZA/divsqrt error
|
||||
divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .Shifted, .Mf);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// round to zero
|
||||
// round to -infinity
|
||||
// round to infinity
|
||||
// round to nearest max magnitude
|
||||
|
||||
// calulate result sign used in rounding unit
|
||||
divremsqrtroundsign #(P) roundsign( .DivOp, .Sqrt, .Xs, .Ys, .Ms);
|
||||
|
||||
divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Qe,
|
||||
.Ms, .Mf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard,
|
||||
.FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs);*/
|
||||
assign Rs = Ms;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero,
|
||||
.Xs, .OutFmt, .Sqrt,
|
||||
.NaNIn, .Round, .DivByZero,
|
||||
.Guard, .Sticky, .UfPlus1,.DivOp, .FullRe, .Plus1,
|
||||
.Me, .Invalid, .Overflow, .PostProcFlg);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
|
||||
|
||||
divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero,
|
||||
.Frm, .OutFmt, .XNaN, .YNaN,
|
||||
.NaNIn, .Plus1, .Invalid, .Overflow, .InfIn,
|
||||
.XInf, .YInf, .DivOp, .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes );
|
||||
|
||||
endmodule
|
308
src/fpu/divremsqrt/divremsqrtround.sv
Normal file
308
src/fpu/divremsqrt/divremsqrtround.sv
Normal file
@ -0,0 +1,308 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtround.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu, me@KatherineParry.com
|
||||
// Modified: 19 May 2023
|
||||
//
|
||||
// Purpose: Rounder
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
module divremsqrtround import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic Ms, // normalized sign
|
||||
input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction
|
||||
// divsqrt
|
||||
input logic DivOp, // is a division opperation being done
|
||||
input logic DivSticky, // divsqrt sticky bit
|
||||
input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent
|
||||
// outputs
|
||||
output logic [P.NE+1:0] Me, // normalied fraction
|
||||
output logic UfPlus1, // do you add one to the result if given an unbounded exponent
|
||||
output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow
|
||||
output logic [P.NE-1:0] Re, // Result exponent
|
||||
output logic [P.NF-1:0] Rf, // Result fractionNormS
|
||||
output logic Sticky, // sticky bit
|
||||
output logic Plus1, // do you add one to the final result
|
||||
output logic Round, Guard // bits needed to calculate rounding
|
||||
);
|
||||
|
||||
logic UfCalcPlus1; // calculated plus one for unbounded exponent
|
||||
logic NormSticky; // normalized sum's sticky bit
|
||||
logic [P.NF-1:0] RoundFrac; // rounded fraction
|
||||
logic FpGuard, FpRound; // floating point round/guard bits
|
||||
logic FpLsbRes; // least significant bit of floating point result
|
||||
logic LsbRes; // lsb of result
|
||||
logic CalcPlus1; // calculated plus1
|
||||
logic FpPlus1; // do you add one to the fp result
|
||||
logic [P.FLEN:0] RoundAdd; // how much to add to the result
|
||||
|
||||
// what position is XLEN in?
|
||||
// options:
|
||||
// 1: XLEN > NF > NF1
|
||||
// 2: NF > XLEN > NF1
|
||||
// 3: NF > NF1 > XLEN
|
||||
// single and double will always be smaller than XLEN
|
||||
localparam XLENPOS = P.XLEN > P.NF ? 1 : P.XLEN > P.NF1 ? 2 : 3;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// {Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - plus 1 otherwise
|
||||
|
||||
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to -infinity
|
||||
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to infinity
|
||||
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0x - do nothing
|
||||
// 10 - tie - Plus1
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// - Plus 1 otherwise
|
||||
|
||||
|
||||
// determine what format the final result is in: int or fp
|
||||
|
||||
// sticky bit calculation
|
||||
if (P.FPSIZES == 1) begin
|
||||
|
||||
// 1: XLEN > NF
|
||||
// | XLEN |
|
||||
// | NF |1|1|
|
||||
// ^ ^ if floating point result
|
||||
// ^ if not an FMA result
|
||||
if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
|
||||
// 2: NF > XLEN
|
||||
if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
// XLEN is either 64 or 32
|
||||
// so half and single are always smaller then XLEN
|
||||
|
||||
// 1: XLEN > NF > NF1
|
||||
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&~OutFmt) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&~OutFmt) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
// 1: XLEN > NF > NF1
|
||||
if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&~(OutFmt==P.FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
|
||||
// 2: NF > XLEN > NF1
|
||||
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&~(OutFmt==P.FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&(OutFmt==P.FMT1)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
// Quad precision will always be greater than XLEN
|
||||
// 2: NF > XLEN > NF1
|
||||
if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&~(OutFmt==P.Q_FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
|
||||
// 3: NF > NF1 > XLEN
|
||||
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
|
||||
if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT))) |
|
||||
(|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
// only add the Addend sticky if doing an FMA opperation
|
||||
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
|
||||
assign Sticky = DivSticky&DivOp | NormSticky;
|
||||
|
||||
|
||||
|
||||
|
||||
// determine round and LSB of the rounded value
|
||||
// - underflow round bit is used to determint the underflow flag
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
|
||||
assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
|
||||
assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1];
|
||||
assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1];
|
||||
assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2];
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.FMT: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
|
||||
end
|
||||
P.FMT1: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2];
|
||||
end
|
||||
P.FMT2: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2];
|
||||
end
|
||||
default: begin
|
||||
FpGuard = 1'bx;
|
||||
FpLsbRes = 1'bx;
|
||||
FpRound = 1'bx;
|
||||
end
|
||||
endcase
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2];
|
||||
end
|
||||
2'h1: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2];
|
||||
end
|
||||
2'h0: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2];
|
||||
end
|
||||
2'h2: begin
|
||||
FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1];
|
||||
FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF];
|
||||
FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2];
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
assign Guard = FpGuard;
|
||||
assign LsbRes = FpLsbRes;
|
||||
assign Round = FpRound;
|
||||
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (Frm)
|
||||
3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = Ms;//round down
|
||||
3'b011: CalcPlus1 = ~Ms;//round up
|
||||
3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (Frm)
|
||||
3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = Ms;//round down
|
||||
3'b011: UfCalcPlus1 = ~Ms;//round up
|
||||
3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
|
||||
assign FpPlus1 = Plus1;
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
|
||||
|
||||
|
||||
|
||||
|
||||
// place Plus1 into the proper position for the format
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
// \/FLEN+1
|
||||
// | NE+2 | NF |
|
||||
// '-NE+2-^----NF1----^
|
||||
// P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
|
||||
assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
|
||||
|
||||
end else if (P.FPSIZES == 4)
|
||||
assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
|
||||
|
||||
|
||||
|
||||
// trim unneeded bits from fraction
|
||||
assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF];
|
||||
|
||||
|
||||
|
||||
// select the exponent
|
||||
assign Me = Qe;
|
||||
|
||||
|
||||
|
||||
// round the result
|
||||
// - if the fraction overflows one should be added to the exponent
|
||||
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
|
||||
assign Re = FullRe[P.NE-1:0];
|
||||
|
||||
|
||||
endmodule
|
45
src/fpu/divremsqrt/divremsqrtroundsign.sv
Normal file
45
src/fpu/divremsqrt/divremsqrtroundsign.sv
Normal file
@ -0,0 +1,45 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtroundsign.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu,me@KatherineParry.com
|
||||
// Modified: 19 May 2023
|
||||
//
|
||||
// Purpose: Sign calculation for rounding
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divremsqrtroundsign import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // x sign
|
||||
input logic Ys, // y sign
|
||||
input logic Sqrt, // sqrt oppertion? (when using divsqrt unit)
|
||||
input logic DivOp, // is divsqrt opperation
|
||||
output logic Ms // normalized result sign
|
||||
);
|
||||
|
||||
logic Qs; // divsqrt result sign
|
||||
|
||||
// calculate divsqrt sign
|
||||
assign Qs = Xs^(Ys&~Sqrt);
|
||||
|
||||
// Select sign for rounding calulation
|
||||
assign Ms = (Qs&DivOp);
|
||||
|
||||
endmodule
|
92
src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
Normal file
92
src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
Normal file
@ -0,0 +1,92 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtshiftcorrection.sv
|
||||
//
|
||||
// Written: me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: shift correction
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrtshiftcorrection import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
|
||||
// divsqrt
|
||||
input logic DivOp, // is it a divsqrt opperation
|
||||
input logic DivResSubnorm, // is the divsqrt result subnormal
|
||||
input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent
|
||||
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
|
||||
//fma
|
||||
//input logic FmaOp, // is it an fma opperation
|
||||
//input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
|
||||
//input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
|
||||
//input logic FmaSZero,
|
||||
// output
|
||||
//output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
|
||||
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
|
||||
output logic [P.NE+1:0] Qe // corrected exponent for divider
|
||||
);
|
||||
|
||||
logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
logic [P.CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
|
||||
logic [P.CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
|
||||
logic ResSubnorm; // is the result Subnormal
|
||||
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
|
||||
logic LeftShiftQm; // should the divsqrt result be shifted one to the left
|
||||
|
||||
// LZA correction
|
||||
assign LZAPlus1 = Shifted[P.NORMSHIFTSZ-1];
|
||||
|
||||
// correct the shifting error caused by the LZA
|
||||
// - the only possible mantissa for a plus two is all zeroes
|
||||
// - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
|
||||
mux2 #(P.NORMSHIFTSZ-2) lzacorrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
|
||||
|
||||
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
|
||||
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
|
||||
assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
|
||||
assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
|
||||
assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
|
||||
mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
|
||||
|
||||
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
|
||||
always_comb
|
||||
//if(FmaOp) Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}};
|
||||
if (DivOp&~DivResSubnorm) Mf = CorrQmShifted;
|
||||
else Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ];
|
||||
|
||||
// Determine sum's exponent
|
||||
// main exponent issues:
|
||||
// - LZA was one too large
|
||||
// - LZA was two too large
|
||||
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 1
|
||||
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
|
||||
// if plus1 If plus2 kill if the result Zero or actually subnormal
|
||||
// | | |
|
||||
//assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
|
||||
|
||||
// recalculate if the result is subnormal after LZA correction
|
||||
//assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZ-2]&~Shifted[P.NORMSHIFTSZ-1];
|
||||
|
||||
// the quotent is in the range [.5,2) if there is no early termination
|
||||
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
|
||||
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1};
|
||||
endmodule
|
240
src/fpu/divremsqrt/divremsqrtspecialcase.sv
Normal file
240
src/fpu/divremsqrt/divremsqrtspecialcase.sv
Normal file
@ -0,0 +1,240 @@
|
||||
///////////////////////////////////////////
|
||||
// divremsqrtspecialcase.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu,me@KatherineParry.com
|
||||
// Modified: 7/5/2022
|
||||
//
|
||||
// Purpose: special case selection
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module divremsqrtspecialcase import cvw::*; #(parameter cvw_t P) (
|
||||
input logic Xs, // X sign
|
||||
input logic [P.NF:0] Xm, Ym, // input significand's
|
||||
input logic XNaN, YNaN, // are the inputs NaN
|
||||
input logic [2:0] Frm, // rounding mode
|
||||
input logic [P.FMTBITS-1:0] OutFmt, // output format
|
||||
input logic InfIn, // are any inputs infinity
|
||||
input logic NaNIn, // are any input NaNs
|
||||
input logic XInf, YInf, // are X or Y inifnity
|
||||
input logic XZero, // is X zero
|
||||
input logic Plus1, // do you add one for rounding
|
||||
input logic Rs, // the result's sign
|
||||
input logic Invalid, Overflow, // flags to choose the result
|
||||
input logic [P.NE-1:0] Re, // Result exponent
|
||||
input logic [P.NE+1:0] FullRe, // Result full exponent
|
||||
input logic [P.NF-1:0] Rf, // Result fraction
|
||||
// divsqrt
|
||||
input logic DivOp, // is it a divsqrt opperation
|
||||
input logic DivByZero, // divide by zero flag
|
||||
// outputs
|
||||
output logic [P.FLEN-1:0] PostProcRes // final result
|
||||
);
|
||||
|
||||
logic [P.FLEN-1:0] XNaNRes; // X is NaN result
|
||||
logic [P.FLEN-1:0] YNaNRes; // Y is NaN result
|
||||
logic [P.FLEN-1:0] InvalidRes; // Invalid result result
|
||||
logic [P.FLEN-1:0] UfRes; // underflowed result result
|
||||
logic [P.FLEN-1:0] OfRes; // overflowed result result
|
||||
logic [P.FLEN-1:0] NormRes; // normal result
|
||||
logic OfResMax; // does the of result output maximum norm fp number
|
||||
logic KillRes; // kill the result for underflow
|
||||
logic SelOfRes; // should the overflow result be selected
|
||||
|
||||
|
||||
// does the overflow result output the maximum normalized floating point number
|
||||
// output infinity if the input is infinity
|
||||
assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
|
||||
|
||||
// select correct outputs for special cases
|
||||
if (P.FPSIZES == 1) begin
|
||||
//NaN res selection depending on standard
|
||||
if(P.IEEE754) begin
|
||||
assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign NormRes = {Rs, Re, Rf};
|
||||
|
||||
end else if (P.FPSIZES == 2) begin
|
||||
if(P.IEEE754) begin
|
||||
assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
|
||||
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end else begin
|
||||
assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end
|
||||
|
||||
always_comb
|
||||
if(OutFmt)
|
||||
if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
|
||||
else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
else
|
||||
if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
|
||||
else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
|
||||
assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
|
||||
|
||||
end else if (P.FPSIZES == 3) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
P.FMT: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {Rs, Re, Rf};
|
||||
end
|
||||
P.FMT1: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
|
||||
YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
|
||||
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
|
||||
UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
|
||||
end
|
||||
P.FMT2: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
|
||||
YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
|
||||
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
|
||||
UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
|
||||
end
|
||||
default: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = (P.FLEN)'(0);
|
||||
YNaNRes = (P.FLEN)'(0);
|
||||
InvalidRes = (P.FLEN)'(0);
|
||||
end else begin
|
||||
InvalidRes = (P.FLEN)'(0);
|
||||
end
|
||||
OfRes = (P.FLEN)'(0);
|
||||
UfRes = (P.FLEN)'(0);
|
||||
NormRes = (P.FLEN)'(0);
|
||||
end
|
||||
endcase
|
||||
|
||||
end else if (P.FPSIZES == 4) begin
|
||||
always_comb
|
||||
case (OutFmt)
|
||||
2'h3: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
|
||||
YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end else begin
|
||||
InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
|
||||
UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {Rs, Re, Rf};
|
||||
end
|
||||
2'h1: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
|
||||
YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
|
||||
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
|
||||
end
|
||||
OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
|
||||
UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
|
||||
end
|
||||
2'h0: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
|
||||
YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
|
||||
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
|
||||
UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
|
||||
end
|
||||
2'h2: begin
|
||||
if(P.IEEE754) begin
|
||||
XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
|
||||
YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
|
||||
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
|
||||
end else begin
|
||||
InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
|
||||
end
|
||||
|
||||
OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};
|
||||
// zero is exact if dividing by infinity so don't add 1
|
||||
UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
|
||||
NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// determine if you shoould kill the res - Cvt
|
||||
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
|
||||
// - dont set to zero if fp input is zero but not using the fp input
|
||||
// - dont set to zero if int input is zero but not using the int input
|
||||
assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
|
||||
|
||||
// calculate if the overflow result should be selected
|
||||
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
|
||||
|
||||
// output infinity with result sign if divide by zero
|
||||
if(P.IEEE754)
|
||||
always_comb
|
||||
if(XNaN) PostProcRes = XNaNRes;
|
||||
else if(YNaN) PostProcRes = YNaNRes;
|
||||
else if(Invalid) PostProcRes = InvalidRes;
|
||||
else if(SelOfRes) PostProcRes = OfRes;
|
||||
else if(KillRes) PostProcRes = UfRes;
|
||||
else PostProcRes = NormRes;
|
||||
else
|
||||
always_comb
|
||||
if(NaNIn|Invalid) PostProcRes = InvalidRes;
|
||||
else if(SelOfRes) PostProcRes = OfRes;
|
||||
else if(KillRes) PostProcRes = UfRes;
|
||||
else PostProcRes = NormRes;
|
||||
|
||||
endmodule
|
96
src/fpu/divremsqrt/drsu.sv
Normal file
96
src/fpu/divremsqrt/drsu.sv
Normal file
@ -0,0 +1,96 @@
|
||||
///////////////////////////////////////////
|
||||
// drsu.sv
|
||||
//
|
||||
// Written: kekim@hmc.edu
|
||||
// Modified:19 May 2023
|
||||
//
|
||||
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
// A component of the CORE-V-WALLY configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
//
|
||||
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
module drsu import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic XsE, YsE,
|
||||
input logic [P.NF:0] XmE, YmE,
|
||||
input logic [P.NE-1:0] XeE, YeE,
|
||||
input logic XInfE, YInfE,
|
||||
input logic XZeroE, YZeroE,
|
||||
input logic XNaNE, YNaNE,
|
||||
input logic XSNaNE, YSNaNE,
|
||||
input logic FDivStartE, IDivStartE,
|
||||
input logic StallM,
|
||||
input logic FlushE,
|
||||
input logic SqrtE, SqrtM,
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [2:0] Funct3E, Funct3M,
|
||||
input logic IntDivE, W64E,
|
||||
input logic [2:0] Frm,
|
||||
input logic [2:0] OpCtrl,
|
||||
input logic [1:0] PostProcSel,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [P.FLEN-1:0] FResM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM,
|
||||
output logic [4:0] FlgM
|
||||
);
|
||||
|
||||
// Floating-point division and square root module, with optional integer division and remainder
|
||||
// Computes X/Y, sqrt(X), A/B, or A%B
|
||||
|
||||
logic [P.DIVb+3:0] WS, WC; // Partial remainder components
|
||||
logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend)
|
||||
logic [P.DIVb+3:0] D; // Iterator Divisor
|
||||
logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values
|
||||
logic [P.DIVb+1:0] FirstC; // Step tracker
|
||||
logic Firstun; // Quotient selection
|
||||
logic WZeroE; // Early termination flag
|
||||
logic [P.DURLEN-1:0] CyclesE; // FSM cycles
|
||||
logic SpecialCaseM; // Divide by zero, square root of negative, etc.
|
||||
logic DivStartE; // Enable signal for flops during stall
|
||||
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic IntDivM; // Integer operation
|
||||
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
||||
logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor
|
||||
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
logic [P.DIVb:0] QmM;
|
||||
logic [P.NE+1:0] QeM;
|
||||
logic DivStickyM;
|
||||
|
||||
divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE,
|
||||
.XeE, .YeE, .SqrtE, .SqrtM,
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE,
|
||||
.XNaNE, .YNaNE,
|
||||
.FDivStartE, .IDivStartE, .W64E,
|
||||
.StallM, .DivStickyM, .FDivBusyE, .QeM,
|
||||
.QmM,
|
||||
.FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
|
||||
.Funct3E, .IntDivE, .FIntDivResultM,
|
||||
.FDivDoneE, .IFDivStartE);
|
||||
divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl,
|
||||
.XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE),
|
||||
.YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivQe(QeM), .DivQm(QmM), .PostProcRes(FResM), .PostProcFlg(FlgM));
|
||||
endmodule
|
||||
|
@ -232,7 +232,7 @@ module lsu import cvw::*; #(parameter cvw_t P) (
|
||||
// **** create config to support DTIM with floating point.
|
||||
dtim #(P) dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM),
|
||||
.DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM),
|
||||
.ReadDataWordM(DTIMReadDataWordM[P.XLEN-1:0]), .ByteMaskM(ByteMaskM[P.XLEN/8-1:0]));
|
||||
.ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM[P.LLEN/8-1:0]));
|
||||
end else begin
|
||||
end
|
||||
if (P.BUS_SUPPORTED) begin : bus
|
||||
@ -308,11 +308,11 @@ module lsu import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
ahbinterface #(P.XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY),
|
||||
.HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA),
|
||||
.HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM),
|
||||
.HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM[P.XLEN/8-1:0]), .WriteData(LSUWriteDataM[P.XLEN-1:0]),
|
||||
.Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer));
|
||||
|
||||
// Mux between the 2 sources of read data, 0: Bus, 1: DTIM
|
||||
if(P.DTIM_SUPPORTED) mux2 #(P.XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM, SelDTIM, ReadDataWordMuxM);
|
||||
if(P.DTIM_SUPPORTED) mux2 #(P.XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM[P.XLEN-1:0], SelDTIM, ReadDataWordMuxM[P.XLEN-1:0]);
|
||||
else assign ReadDataWordMuxM = FetchBuffer[P.XLEN-1:0];
|
||||
assign LSUHBURST = 3'b0;
|
||||
assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -42,6 +42,14 @@
|
||||
`define FROM_I_OPCTRL 3'b101
|
||||
`define FROM_UL_OPCTRL 3'b110
|
||||
`define FROM_L_OPCTRL 3'b111
|
||||
`define INTREMU_OPCTRL 3'b001
|
||||
`define INTREM_OPCTRL 3'b010
|
||||
`define INTDIV_OPCTRL 3'b011
|
||||
`define INTDIVW_OPCTRL 3'b100
|
||||
`define INTDIVU_OPCTRL 3'b101
|
||||
`define INTREMW_OPCTRL 3'b110
|
||||
`define INTREMUW_OPCTRL 3'b111
|
||||
`define INTDIVUW_OPCTRL 3'b000
|
||||
`define RNE 3'b000
|
||||
`define RZ 3'b001
|
||||
`define RU 3'b011
|
||||
@ -52,6 +60,8 @@
|
||||
`define CVTINTUNIT 0
|
||||
`define CVTFPUNIT 4
|
||||
`define CMPUNIT 3
|
||||
`define DIVREMSQRTUNIT 5
|
||||
`define INTDIVUNIT 6
|
||||
|
||||
string f16rv32cvtint[] = '{
|
||||
"ui32_to_f16_rne.tv",
|
||||
@ -580,5 +590,39 @@ string f128fma[] = '{
|
||||
"f128_mulAdd_rnm.tv"
|
||||
};
|
||||
|
||||
string intrem[] = '{
|
||||
"cvw_64_rem-01.tv"
|
||||
};
|
||||
|
||||
string intdiv[] = '{
|
||||
"cvw_64_div-01.tv"
|
||||
};
|
||||
|
||||
string intremu[] = '{
|
||||
"cvw_64_remu-01.tv"
|
||||
};
|
||||
|
||||
string intdivu[] = '{
|
||||
"cvw_64_divu-01.tv"
|
||||
};
|
||||
|
||||
string intremw[] = '{
|
||||
"cvw_64_remw-01.tv"
|
||||
};
|
||||
|
||||
string intremuw[] = '{
|
||||
"cvw_64_remuw-01.tv"
|
||||
};
|
||||
|
||||
string intdivuw[] = '{
|
||||
"cvw_64_divuw-01.tv"
|
||||
};
|
||||
|
||||
string intdivw[] = '{
|
||||
"cvw_64_divw-01.tv"
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -2076,10 +2076,11 @@ string arch64zbs[] = '{
|
||||
};
|
||||
|
||||
string custom[] = '{
|
||||
`CUSTOM,
|
||||
"simple",
|
||||
`RISCVARCHTEST,
|
||||
"rv64i_m/M/src/div-01.S"
|
||||
/*"simple",
|
||||
"debug",
|
||||
"cacheTest"
|
||||
"cacheTest"*/
|
||||
};
|
||||
string testsBP64[] = '{
|
||||
`IMPERASTEST,
|
||||
|
@ -128,7 +128,7 @@ def create_vectors(my_config):
|
||||
done = True
|
||||
# put it all together
|
||||
if not done:
|
||||
translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode)
|
||||
translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
|
||||
dest_file.write(translation + "\n")
|
||||
else:
|
||||
# print("read false")
|
||||
@ -174,7 +174,7 @@ def create_vectors(my_config):
|
||||
flags = "XX"
|
||||
# put it all together
|
||||
if not done:
|
||||
translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode)
|
||||
translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
|
||||
dest_file.write(translation + "\n")
|
||||
else:
|
||||
# print("read false")
|
||||
@ -217,7 +217,7 @@ def create_vectors(my_config):
|
||||
flags = "XX"
|
||||
# put it all together
|
||||
if not done:
|
||||
translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode)
|
||||
translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
|
||||
dest_file.write(translation + "\n")
|
||||
else:
|
||||
# print("read false")
|
||||
@ -261,7 +261,7 @@ def create_vectors(my_config):
|
||||
|
||||
# put it all together
|
||||
if not done:
|
||||
translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode)
|
||||
translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
|
||||
dest_file.write(translation + "\n")
|
||||
else:
|
||||
# print("read false")
|
||||
@ -272,22 +272,22 @@ def create_vectors(my_config):
|
||||
src_file2.close()
|
||||
|
||||
config_list = [
|
||||
Config(32, "M", "div", "div-", 0),
|
||||
Config(32, "M", "div", "div-", 4),
|
||||
Config(32, "F", "fdiv", "fdiv", 1),
|
||||
Config(32, "F", "fsqrt", "fsqrt", 2),
|
||||
Config(32, "M", "rem", "rem-", 3),
|
||||
Config(32, "M", "divu", "divu-", 4),
|
||||
Config(32, "M", "remu", "remu-", 5),
|
||||
Config(64, "M", "div", "div-", 0),
|
||||
Config(32, "M", "rem", "rem-", 6),
|
||||
Config(32, "M", "divu", "divu-", 5),
|
||||
Config(32, "M", "remu", "remu-", 7),
|
||||
Config(64, "M", "div", "div-", 4),
|
||||
Config(64, "F", "fdiv", "fdiv", 1),
|
||||
Config(64, "F", "fsqrt", "fsqrt", 2),
|
||||
Config(64, "M", "rem", "rem-", 3),
|
||||
Config(64, "M", "divu", "divu-", 4),
|
||||
Config(64, "M", "remu", "remu-", 5),
|
||||
Config(64, "M", "divw", "divw-", 6),
|
||||
Config(64, "M", "divuw", "divuw-", 7),
|
||||
Config(64, "M", "remw", "remw-", 8),
|
||||
Config(64, "M", "remuw", "remuw-", 9)
|
||||
Config(64, "M", "rem", "rem-", 6),
|
||||
Config(64, "M", "divu", "divu-", 5),
|
||||
Config(64, "M", "remu", "remu-", 7),
|
||||
Config(64, "M", "divw", "divw-", 4),
|
||||
Config(64, "M", "divuw", "divuw-", 5),
|
||||
Config(64, "M", "remw", "remw-", 6),
|
||||
Config(64, "M", "remuw", "remuw-", 7)
|
||||
]
|
||||
|
||||
for c in config_list:
|
||||
|
Loading…
Reference in New Issue
Block a user