///////////////////////////////////////////
//
// Written: James Stine
// Modified: 8/1/2018
//
// Purpose: Floating point divider/square root rounder unit (Goldschmidt)
// 
// A component of the Wally configurable RISC-V project.
// 
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
// software and associated documentation files (the "Software"), to deal in the Software 
// without restriction, including without limitation the rights to use, copy, modify, merge, 
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
// to whom the Software is furnished to do so, subject to the following conditions:
//
//   The above copyright notice and this permission notice shall be included in all copies or 
//   substantial portions of the Software.
//
//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
//   OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////

module rounder_div (
    input logic [1:0] 	rm,
    input logic 	P,
    input logic 	OvEn,
    input logic 	UnEn,
    input logic [12:0] 	exp_diff,
    input logic [2:0] 	sel_inv,
    input logic 	Invalid,
    input logic 	SignR,
    input logic [63:0] 	Float1,
    input logic [63:0] 	Float2,
    input logic 	XNaNQ,
    input logic 	YNaNQ,
    input logic 	XZeroQ,
    input logic 	YZeroQ, 
    input logic 	XInfQ,
    input logic 	YInfQ,
    input logic 	op_type, 
    input logic [59:0] 	q1,
    input logic [59:0] 	qm1,
    input logic [59:0] 	qp1,
    input logic [59:0] 	q0,
    input logic [59:0] 	qm0,
    input logic [59:0] 	qp0, 
    input logic [119:0] regr_out,
   
    output logic [63:0] Result,
    output logic [4:0] 	Flags
    );
      
   logic 		Rsign;
   logic [10:0] 	Rexp;
   logic [12:0] 	Texp;
   logic [51:0] 	Rmant;
   logic [59:0] 	Tmant;
   logic [51:0] 	Smant;   
   logic 		Rzero;
   logic 	       Gdp, Gsp, G;
   logic 	       UnFlow_SP, UnFlow_DP, UnderFlow; 
   logic 	       OvFlow_SP, OvFlow_DP, OverFlow;		
   logic 	       Inexact;
   logic 	       Round_zero;
   logic 	       Infinite;
   logic 	       VeryLarge;
   logic 	       Largest;
   logic 	       Div0;      
   logic 	       Adj_exp;
   logic 	       Valid;
   logic 	       NaN;
   logic 	       Texp_l7z;
   logic 	       Texp_l7o;
   logic 	       OvCon;
   logic 	       zero_rem;
   logic [1:0] 	       mux_mant;
   logic 	       sign_rem;
   logic [59:0]        q, qm, qp;
   logic 	       exp_ovf;

   logic [50:0]        NaN_out;
   logic 	       NaN_Sign_out;   
   logic 	       Sign_out;     

   // Remainder = 0?
   assign zero_rem = ~(|regr_out);
   // Remainder Sign
   assign sign_rem = ~regr_out[119];
   // choose correct Guard bit [1,2) or [0,1)
   assign Gdp = q1[59] ? q1[6] : q0[6];
   assign Gsp = q1[59] ? q1[35] : q0[35];
   assign G = P ? Gsp : Gdp;   
   // Selection of Rounding (from logic/switching)
   assign mux_mant[1] = (SignR&rm[1]&rm[0]&G) | (!SignR&rm[1]&!rm[0]&G) | 
			(!rm[1]&!rm[0]&G&!sign_rem) | 
			(SignR&rm[1]&rm[0]&!zero_rem&!sign_rem) | 
			(!SignR&rm[1]&!rm[0]&!zero_rem&!sign_rem);
   assign mux_mant[0] = (!SignR&rm[0]&!G&!zero_rem&sign_rem) | 
			(!rm[1]&rm[0]&!G&!zero_rem&sign_rem) | 
			(SignR&rm[1]&!rm[0]&!G&!zero_rem&sign_rem);
   
   // Which Q?
   mux2 #(60) mx1 (q0, q1, q1[59], q);
   mux2 #(60) mx2 (qm0, qm1, q1[59], qm);   
   mux2 #(60) mx3 (qp0, qp1, q1[59], qp);
   // Choose Q, Q+1, Q-1
   mux3 #(60) mx4 (q, qm, qp, mux_mant, Tmant);
   assign Smant = Tmant[58:7];
   // Compute the value of the exponent
   //   exponent is modified if we choose:
   //   1.) we choose any qm0, qp0, q0 (since we shift mant)
   //   2.) we choose qp and we overflow (for RU)
   assign exp_ovf = |{qp[58:36], (qp[35:7] & {29{~P}})};
   assign Texp = exp_diff - {{12{1'b0}}, ~q1[59]} + {{12{1'b0}}, mux_mant[1]&qp1[59]&~exp_ovf};
   
   // Overflow only occurs for double precision, if Texp[10] to Texp[0] are 
   // all ones. To encourage sharing with single precision overflow detection,
   // the lower 7 bits are tested separately. 
   assign Texp_l7o  = Texp[6]&Texp[5]&Texp[4]&Texp[3]&Texp[2]&Texp[1]&Texp[0];
   assign OvFlow_DP = (~Texp[12]&Texp[11]) | (Texp[10]&Texp[9]&Texp[8]&Texp[7]&Texp_l7o);

   // Overflow occurs for single precision if (Texp[10] is one)  and 
   // ((Texp[9] or Texp[8] or Texp[7]) is one) or (Texp[6] to Texp[0] 
   // are all ones. 
   assign OvFlow_SP = Texp[10]&(Texp[9]|Texp[8]|Texp[7]|Texp_l7o);

   // Underflow occurs for double precision if (Texp[11]/Texp[10] is one) or 
   // Texp[10] to Texp[0] are all zeros. 
   assign Texp_l7z  = ~Texp[6]&~Texp[5]&~Texp[4]&~Texp[3]&~Texp[2]&~Texp[1]&~Texp[0];
   assign UnFlow_DP = (Texp[12]&Texp[11]) | ~Texp[11]&~Texp[10]&~Texp[9]&~Texp[8]&~Texp[7]&Texp_l7z;
   
   // Underflow occurs for single precision if (Texp[10] is zero)  and 
   // (Texp[9] or Texp[8] or Texp[7]) is zero. 
   assign UnFlow_SP = ~Texp[10]&(~Texp[9]|~Texp[8]|~Texp[7]|Texp_l7z);
   
   // Set the overflow and underflow flags. They should not be set if
   // the input was infinite or NaN or the output of the adder is zero.
   // 00 = Valid
   // 10 = NaN
   assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0];
   assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0]; 
   assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid;
   assign OverFlow  = (P & OvFlow_SP | OvFlow_DP) & Valid;
   assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN;   

   // The final result is Inexact if any rounding occurred ((i.e., R or S 
   // is one), or (if the result overflows ) or (if the result underflows and the 
   // underflow trap is not enabled)) and (value of the result was not previous set 
   // by an exception case). 
   assign Inexact = (G|~zero_rem|OverFlow|(UnderFlow&~UnEn))&Valid;

   // Set the IEEE Exception Flags: Inexact, Underflow, Overflow, Div_By_0, 
   // Invlalid. 
   assign Flags = {Inexact, UnderFlow, OverFlow, Div0, Invalid};

   // Determine sign
   assign Rzero = UnderFlow | (~sel_inv[2]&sel_inv[1]&sel_inv[0]);
   assign Rsign = SignR;   
      
   // The exponent of the final result is zero if the final result is 
   // zero or a denorm, all ones if the final result is NaN or Infinite
   // or overflow occurred and the magnitude of the number is 
   // not rounded toward from zero, and all ones with an LSB of zero
   // if overflow occurred and the magnitude of the number is 
   // rounded toward zero. If the result is single precision, 
   // Texp[7] shoud be inverted. When the Overflow trap is enabled (OvEn = 1)
   // and overflow occurs and the operation is not conversion, bits 10 and 9 are 
   // inverted for double precision, and bits 7 and 6 are inverted for single precision. 
   assign Round_zero = ~rm[1]&rm[0] | ~SignR&rm[0] | SignR&rm[1]&~rm[0];
   assign VeryLarge = OverFlow & ~OvEn;
   assign Infinite   = (VeryLarge & ~Round_zero) | sel_inv[1];
   assign Largest = VeryLarge & Round_zero;
   assign Adj_exp = OverFlow & OvEn;
   assign Rexp[10:1] = ({10{~Valid}} | 
			{Texp[10]&~Adj_exp, Texp[9]&~Adj_exp, Texp[8], 
			 (Texp[7]^P)&~(Adj_exp&P), Texp[6]&~(Adj_exp&P), Texp[5:1]} | 
		        {10{VeryLarge}})&{10{~Rzero | NaN}};
   assign Rexp[0]    = ({~Valid} | Texp[0] | Infinite)&(~Rzero | NaN)&~Largest;
   
   // If the result is zero or infinity, the mantissa is all zeros. 
   // If the result is NaN, the mantissa is 10...0
   // If the result the largest floating point number, the mantissa
   // is all ones. Otherwise, the mantissa is not changed.
   assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0];
   assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63];
   assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ | 
   		     NaN_Sign_out&(XNaNQ|YNaNQ);
   // FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1
   // | Float1[63]&op_type;  (logic to fix this but removed for now)
   
   assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero);
   assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) |
			(NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}});
   
   // For single precision, the 8 least significant bits of the exponent
   // and 23 most significant bits of the mantissa contain bits used 
   // for the final result. A double precision result is returned if 
   // overflow has occurred, the overflow trap is enabled, and a conversion
   // is being performed. 
   assign OvCon = OverFlow & OvEn;
   assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]}
	           : {Sign_out, Rexp, Rmant};

endmodule // rounder