mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-02 17:55:19 +00:00
213 lines
8.9 KiB
Systemverilog
Executable File
213 lines
8.9 KiB
Systemverilog
Executable File
///////////////////////////////////////////
|
|
//
|
|
// Written: James Stine
|
|
// Modified: 8/1/2018
|
|
//
|
|
// Purpose: Floating point divider/square root rounder unit (Goldschmidt)
|
|
//
|
|
// A component of the Wally configurable RISC-V project.
|
|
//
|
|
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
|
//
|
|
// MIT LICENSE
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
|
// software and associated documentation files (the "Software"), to deal in the Software
|
|
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
|
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
|
// to whom the Software is furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all copies or
|
|
// substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
|
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
|
// OR OTHER DEALINGS IN THE SOFTWARE.
|
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
module rounder_div (
|
|
input logic [1:0] rm,
|
|
input logic P,
|
|
input logic OvEn,
|
|
input logic UnEn,
|
|
input logic [12:0] exp_diff,
|
|
input logic [2:0] sel_inv,
|
|
input logic Invalid,
|
|
input logic SignR,
|
|
input logic [63:0] Float1,
|
|
input logic [63:0] Float2,
|
|
input logic XNaNQ,
|
|
input logic YNaNQ,
|
|
input logic XZeroQ,
|
|
input logic YZeroQ,
|
|
input logic XInfQ,
|
|
input logic YInfQ,
|
|
input logic op_type,
|
|
input logic [59:0] q1,
|
|
input logic [59:0] qm1,
|
|
input logic [59:0] qp1,
|
|
input logic [59:0] q0,
|
|
input logic [59:0] qm0,
|
|
input logic [59:0] qp0,
|
|
input logic [119:0] regr_out,
|
|
|
|
output logic [63:0] Result,
|
|
output logic [4:0] Flags
|
|
);
|
|
|
|
logic Rsign;
|
|
logic [10:0] Rexp;
|
|
logic [12:0] Texp;
|
|
logic [51:0] Rmant;
|
|
logic [59:0] Tmant;
|
|
logic [51:0] Smant;
|
|
logic Rzero;
|
|
logic Gdp, Gsp, G;
|
|
logic UnFlow_SP, UnFlow_DP, UnderFlow;
|
|
logic OvFlow_SP, OvFlow_DP, OverFlow;
|
|
logic Inexact;
|
|
logic Round_zero;
|
|
logic Infinite;
|
|
logic VeryLarge;
|
|
logic Largest;
|
|
logic Div0;
|
|
logic Adj_exp;
|
|
logic Valid;
|
|
logic NaN;
|
|
logic Texp_l7z;
|
|
logic Texp_l7o;
|
|
logic OvCon;
|
|
logic zero_rem;
|
|
logic [1:0] mux_mant;
|
|
logic sign_rem;
|
|
logic [59:0] q, qm, qp;
|
|
logic exp_ovf;
|
|
|
|
logic [50:0] NaN_out;
|
|
logic NaN_Sign_out;
|
|
logic Sign_out;
|
|
|
|
// Remainder = 0?
|
|
assign zero_rem = ~(|regr_out);
|
|
// Remainder Sign
|
|
assign sign_rem = ~regr_out[119];
|
|
// choose correct Guard bit [1,2) or [0,1)
|
|
assign Gdp = q1[59] ? q1[6] : q0[6];
|
|
assign Gsp = q1[59] ? q1[35] : q0[35];
|
|
assign G = P ? Gsp : Gdp;
|
|
// Selection of Rounding (from logic/switching)
|
|
assign mux_mant[1] = (SignR&rm[1]&rm[0]&G) | (!SignR&rm[1]&!rm[0]&G) |
|
|
(!rm[1]&!rm[0]&G&!sign_rem) |
|
|
(SignR&rm[1]&rm[0]&!zero_rem&!sign_rem) |
|
|
(!SignR&rm[1]&!rm[0]&!zero_rem&!sign_rem);
|
|
assign mux_mant[0] = (!SignR&rm[0]&!G&!zero_rem&sign_rem) |
|
|
(!rm[1]&rm[0]&!G&!zero_rem&sign_rem) |
|
|
(SignR&rm[1]&!rm[0]&!G&!zero_rem&sign_rem);
|
|
|
|
// Which Q?
|
|
mux2 #(60) mx1 (q0, q1, q1[59], q);
|
|
mux2 #(60) mx2 (qm0, qm1, q1[59], qm);
|
|
mux2 #(60) mx3 (qp0, qp1, q1[59], qp);
|
|
// Choose Q, Q+1, Q-1
|
|
mux3 #(60) mx4 (q, qm, qp, mux_mant, Tmant);
|
|
assign Smant = Tmant[58:7];
|
|
// Compute the value of the exponent
|
|
// exponent is modified if we choose:
|
|
// 1.) we choose any qm0, qp0, q0 (since we shift mant)
|
|
// 2.) we choose qp and we overflow (for RU)
|
|
assign exp_ovf = |{qp[58:36], (qp[35:7] & {29{~P}})};
|
|
assign Texp = exp_diff - {{12{1'b0}}, ~q1[59]} + {{12{1'b0}}, mux_mant[1]&qp1[59]&~exp_ovf};
|
|
|
|
// Overflow only occurs for double precision, if Texp[10] to Texp[0] are
|
|
// all ones. To encourage sharing with single precision overflow detection,
|
|
// the lower 7 bits are tested separately.
|
|
assign Texp_l7o = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
|
|
assign OvFlow_DP = (~Texp[12]&Texp[11]) | (Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o);
|
|
|
|
// Overflow occurs for single precision if (Texp[10] is one) and
|
|
// ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0]
|
|
// are all ones.
|
|
assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);
|
|
|
|
// Underflow occurs for double precision if (Texp[11]/Texp[10] is one) or
|
|
// Texp[10] to Texp[0] are all zeros.
|
|
assign Texp_l7z = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
|
|
assign UnFlow_DP = (Texp[12]&Texp[11]) | ~Texp[11]&~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
|
|
|
|
// Underflow occurs for single precision if (Texp[10] is zero) and
|
|
// (Texp[9] or Texp[8] or Texp[7]) is zero.
|
|
assign UnFlow_SP = ~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z);
|
|
|
|
// Set the overflow and underflow flags. They should not be set if
|
|
// the input was infinite or NaN or the output of the adder is zero.
|
|
// 00 = Valid
|
|
// 10 = NaN
|
|
assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0];
|
|
assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0];
|
|
assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid;
|
|
assign OverFlow = (P & OvFlow_SP | OvFlow_DP) & Valid;
|
|
assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN;
|
|
|
|
// The final result is Inexact if any rounding occurred ((i.e., R or S
|
|
// is one), or (if the result overflows ) or (if the result underflows and the
|
|
// underflow trap is not enabled)) and (value of the result was not previous set
|
|
// by an exception case).
|
|
assign Inexact = (G|~zero_rem|OverFlow|(UnderFlow&~UnEn))&Valid;
|
|
|
|
// Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0,
|
|
// Invlalid.
|
|
assign Flags = {Inexact, UnderFlow, OverFlow, Div0, Invalid};
|
|
|
|
// Determine sign
|
|
assign Rzero = UnderFlow | (~sel_inv[2]&sel_inv[1]&sel_inv[0]);
|
|
assign Rsign = SignR;
|
|
|
|
// The exponent of the final result is zero if the final result is
|
|
// zero or a denorm, all ones if the final result is NaN or Infinite
|
|
// or overflow occurred and the magnitude of the number is
|
|
// not rounded toward from zero, and all ones with an LSB of zero
|
|
// if overflow occurred and the magnitude of the number is
|
|
// rounded toward zero. If the result is single precision,
|
|
// Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
|
|
// and overflow occurs and the operation is not conversion, bits 10 and 9 are
|
|
// inverted for double precision, and bits 7 and 6 are inverted for single precision.
|
|
assign Round_zero = ~rm[1]&rm[0] | ~SignR&rm[0] | SignR&rm[1]&~rm[0];
|
|
assign VeryLarge = OverFlow & ~OvEn;
|
|
assign Infinite = (VeryLarge & ~Round_zero) | sel_inv[1];
|
|
assign Largest = VeryLarge & Round_zero;
|
|
assign Adj_exp = OverFlow & OvEn;
|
|
assign Rexp[10:1] = ({10{~Valid}} |
|
|
{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8],
|
|
(Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} |
|
|
{10{VeryLarge}})&{10{~Rzero | NaN}};
|
|
assign Rexp[0] = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
|
|
|
|
// If the result is zero or infinity, the mantissa is all zeros.
|
|
// If the result is NaN, the mantissa is 10...0
|
|
// If the result the largest floating point number, the mantissa
|
|
// is all ones. Otherwise, the mantissa is not changed.
|
|
assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0];
|
|
assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63];
|
|
assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ |
|
|
NaN_Sign_out&(XNaNQ|YNaNQ);
|
|
// FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1
|
|
// | Float1[63]&op_type; (logic to fix this but removed for now)
|
|
|
|
assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero);
|
|
assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) |
|
|
(NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}});
|
|
|
|
// For single precision, the 8 least significant bits of the exponent
|
|
// and 23 most significant bits of the mantissa contain bits used
|
|
// for the final result. A double precision result is returned if
|
|
// overflow has occurred, the overflow trap is enabled, and a conversion
|
|
// is being performed.
|
|
assign OvCon = OverFlow & OvEn;
|
|
assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]}
|
|
: {Sign_out, Rexp, Rmant};
|
|
|
|
endmodule // rounder
|
|
|