//
// File name : fpdiv
// Title     : Floating-Point Divider/Square-Root
// project   : FPU
// Library   : fpdiv
// Author(s) : James E. Stine, Jr.
// Purpose   : definition of main unit to floating-point div/sqrt
// notes :   
//
// Copyright Oklahoma State University
//
// Basic Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity,  NaN)
// Step 3: Exponent Logic
// Step 4: Divide/Sqrt using Goldschmidt
// Step 5: Normalize the result.//
//   Shift left until normalized.  Normalized when the value to the 
//   left of the binrary point is 1.
// Step 6: Round the result.// 
// Step 7: Put quotient/remainder onto output.
//

// `timescale 1ps/1ps
module fpdiv (FDivSqrtDoneM, FDivResultM, FDivFlagsM, DivDenormM, FInput1E, FInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
	      FDivStartE, reset, clk, DivBusyM);

   input [63:0] FInput1E;		// 1st input operand (A)
   input [63:0] FInput2E;		// 2nd input operand (B)
   input [2:0] 	FrmE;		// Rounding mode - specify values 
   input 	DivOpType;	// Function opcode
   input 	FmtE;   		// Result Precision (0 for double, 1 for single)
   input 	DivOvEn;		// Overflow trap enabled
   input 	DivUnEn;   	// Underflow trap enabled

   input 	FDivStartE;
   input 	reset;
   input 	clk;   

   output [63:0] FDivResultM;	// Result of operation
   output [4:0]  FDivFlagsM;   	// IEEE exception flags 
   output 	 DivDenormM;   	// DivDenormM on input or output
   output 	 FDivSqrtDoneM;
   output    DivBusyM;

   supply1 	  vdd;
   supply0 	  vss;   

   wire [63:0] 	 Float1; 
   wire [63:0] 	 Float2;
   wire [63:0] 	 IntValue;
   
   wire [12:0] 	 exp1, exp2, expF;
   wire [12:0] 	 exp_diff, bias;
   wire [13:0] 	 exp_sqrt;
   wire [12:0] 	 exp_s;
   wire [12:0] 	 exp_c;
   
   wire [10:0] 	 exponent, exp_pre;
   wire [63:0] 	 Result;   
   wire [52:0] 	 mantissaA;
   wire [52:0] 	 mantissaB; 
   wire [63:0] 	 sum, sum_tc, sum_corr, sum_norm;
   
   wire [5:0] 	 align_shift;
   wire [5:0] 	 norm_shift;
   wire [2:0] 	 sel_inv;
   wire		 op1_Norm, op2_Norm;
   wire		 opA_Norm, opB_Norm;
   wire		 Invalid;
   wire 	 DenormIn, DenormIO;
   wire [4:0] 	 FlagsIn;   	
   wire 	 exp_gt63;
   wire 	 Sticky_out;
   wire 	 signResult, sign_corr;
   wire          corr_sign;
   wire 	 zeroB;         
   wire 	 convert;
   wire          swap;
   wire          sub;
   
   wire [63:0] 	 q1, qm1, qp1, q0, qm0, qp0;
   wire [63:0] 	 rega_out, regb_out, regc_out, regd_out;
   wire [127:0]  regr_out;
   wire [2:0] 	 sel_muxa, sel_muxb;
   wire 	 sel_muxr;   
   wire 	 load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;

   wire 	 donev, sel_muxrv, sel_muxsv;
   wire [1:0] 	 sel_muxav, sel_muxbv;   
   wire 	 load_regav, load_regbv, load_regcv;
   wire 	 load_regrv, load_regsv;
   
   logic exp_cout1, exp_cout2, exp_odd, open;
   // Convert the input operands to their appropriate forms based on 
   // the orignal operands, the DivOpType , and their precision FmtE. 
   // Single precision inputs are converted to double precision 
   // and the sign of the first operand is set appropratiately based on
   // if the operation is absolute value or negation. 
   convert_inputs_div divconv1 (Float1, Float2, FInput1E, FInput2E, DivOpType, FmtE);

   // Test for exceptions and return the "Invalid Operation" and
   // "Denormalized" Input FDivFlagsM. The "sel_inv" is used in
   // the third pipeline stage to select the result. Also, op1_Norm
   // and op2_Norm are one if FInput1E and FInput2E are not zero or denormalized.
   // sub is one if the effective operation is subtaction. 
   exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, 
		   Float1, Float2, DivOpType);

   // Determine Sign/Mantissa
   assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType;
   assign mantissaA = {vdd, Float1[51:0]};
   assign mantissaB = {vdd, Float2[51:0]};
   // Perform Exponent Subtraction - expA - expB + Bias   
   assign exp1 = {2'b0, Float1[62:52]};
   assign exp2 = {2'b0, Float2[62:52]};
   // bias : DP = 2^{11-1}-1 = 1023
   assign bias = {3'h0, 10'h3FF};
   // Divide exponent
   csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
   exp_add explogic1 (exp_cout1, {open, exp_diff}, 
		      {vss, exp_s}, {vss, exp_c}, 1'b1);
   // Sqrt exponent (check if exponent is odd)
   assign exp_odd = Float1[52] ? vss : vdd;
   exp_add explogic2 (exp_cout2, exp_sqrt, 
		      {vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
   // Choose correct exponent
   assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;   

   // Main Goldschmidt/Division Routine
   divconv goldy (q1, qm1, qp1, q0, qm0, qp0, 
		  rega_out, regb_out, regc_out, regd_out,
		  regr_out, mantissaB, mantissaA, 
		  sel_muxa, sel_muxb, sel_muxr, 
		  reset, clk,
		  load_rega, load_regb, load_regc, load_regd,
		  load_regr, load_regs, FmtE, DivOpType, exp_odd);

   // FSM : control divider
   fsm control (FDivSqrtDoneM, load_rega, load_regb, load_regc, load_regd, 
		load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, 
		clk, reset, FDivStartE, DivOpType, DivBusyM);
   
   // Round the mantissa to a 52-bit value, with the leading one
   // removed. The rounding units also handles special cases and 
   // set the exception flags.
   //***add max magnitude and swap negitive and positive infinity
   rounder_div divround1 (Result, DenormIO, FlagsIn, 
		   FrmE, FmtE, DivOvEn, DivUnEn, expF, 
   		   sel_inv, Invalid, DenormIn, signResult, 
		   q1, qm1, qp1, q0, qm0, qp0, regr_out);

   // Store the final result and the exception flags in registers.
   flopenr #(64) rega (clk, reset, FDivSqrtDoneM, Result, FDivResultM);
   flopenr #(1) regb (clk, reset, FDivSqrtDoneM, DenormIO, DivDenormM);   
   flopenr #(5) regc (clk, reset, FDivSqrtDoneM, FlagsIn, FDivFlagsM);   
   
endmodule // fpadd

//
// Brent-Kung Prefix Adder 
//   (yes, it is 14 bits as my generator is broken for 13 bits :( 
//    assume, synthesizer will delete stuff not needed )
//
module exp_add (cout, sum, a, b, cin);
   
   input [13:0] a, b;
   input 	cin;
   
   output [13:0] sum;
   output 	 cout;

   wire [14:0] 	 p,g;
   wire [13:0] 	 c;

   // pre-computation
   assign p={a^b,1'b0};
   assign g={a&b, cin};

   // prefix tree
   brent_kung prefix_tree(c, p[13:0], g[13:0]);

   // post-computation
   assign sum=p[14:1]^c;
   assign cout=g[14]|(p[14]&c[13]);

endmodule // exp_add

module brent_kung (c, p, g);
   
   input [13:0] p;
   input [13:0] g;
   output [14:1] c;

   logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
   logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
   logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
   // parallel-prefix, Brent-Kung

   // Stage 1: Generates G/FmtE pairs that span 1 bits
   grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
   black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
   black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
   black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
   black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
   black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
   black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});

   // Stage 2: Generates G/FmtE pairs that span 2 bits
   grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
   black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
   black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});

   // Stage 3: Generates G/FmtE pairs that span 4 bits
   grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);

   // Stage 4: Generates G/FmtE pairs that span 8 bits

   // Stage 5: Generates G/FmtE pairs that span 4 bits
   grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);

   // Stage 6: Generates G/FmtE pairs that span 2 bits
   grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
   grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
   grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);

   // Last grey cell stage 
   grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
   grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
   grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
   grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
   grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
   grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);

   // Final Stage: Apply c_k+1=G_k_0
   assign c[1]=g[0];
   assign c[2]=G_1_0;
   assign c[3]=G_2_0;
   assign c[4]=G_3_0;
   assign c[5]=G_4_0;
   assign c[6]=G_5_0;
   assign c[7]=G_6_0;
   assign c[8]=G_7_0;
   assign c[9]=G_8_0;

   assign c[10]=G_9_0;
   assign c[11]=G_10_0;
   assign c[12]=G_11_0;
   assign c[13]=G_12_0;
   assign c[14]=G_13_0;

endmodule // brent_kung