From e6d19be87cc5e49076f84260c8039add714855c7 Mon Sep 17 00:00:00 2001
From: James Stine <james.stine@okstate.edu>
Date: Wed, 14 Jul 2021 06:48:29 -0500
Subject: [PATCH] put back for now to test fdiv

---
 wally-pipelined/src/fpu/fdivsqrt.sv | 256 ++++++++++++++++++++++++++++
 1 file changed, 256 insertions(+)
 create mode 100755 wally-pipelined/src/fpu/fdivsqrt.sv

diff --git a/wally-pipelined/src/fpu/fdivsqrt.sv b/wally-pipelined/src/fpu/fdivsqrt.sv
new file mode 100755
index 00000000..6d8da23f
--- /dev/null
+++ b/wally-pipelined/src/fpu/fdivsqrt.sv
@@ -0,0 +1,256 @@
+//
+// File name : fpdiv
+// Title     : Floating-Point Divider/Square-Root
+// project   : FPU
+// Library   : fpdiv
+// Author(s) : James E. Stine, Jr.
+// Purpose   : definition of main unit to floating-point div/sqrt
+// notes :   
+//
+// Copyright Oklahoma State University
+//
+// Basic Operations
+//
+// Step 1: Load operands, set flags, and convert SP to DP
+// Step 2: Check for special inputs ( +/- Infinity,  NaN)
+// Step 3: Exponent Logic
+// Step 4: Divide/Sqrt using Goldschmidt
+// Step 5: Normalize the result.//
+//   Shift left until normalized.  Normalized when the value to the 
+//   left of the binrary point is 1.
+// Step 6: Round the result.// 
+// Step 7: Put quotient/remainder onto output.
+//
+
+// `timescale 1ps/1ps
+module fdivsqrt (FDivSqrtDoneE, FDivResultM, FDivSqrtFlgM, DivInput1E, DivInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
+	      FDivStartE, reset, clk, FDivBusyE, HoldInputs);
+
+   input [63:0] DivInput1E;		// 1st input operand (A)
+   input [63:0] DivInput2E;		// 2nd input operand (B)
+   input [2:0] 	FrmE;		// Rounding mode - specify values 
+   input 	DivOpType;	// Function opcode
+   input 	FmtE;   		// Result Precision (0 for double, 1 for single) //***will need to swap this
+   input 	DivOvEn;		// Overflow trap enabled
+   input 	DivUnEn;   	// Underflow trap enabled
+
+   input 	FDivStartE;
+   input 	reset;
+   input 	clk;   
+
+   output [63:0] FDivResultM;	// Result of operation
+   output [4:0]  FDivSqrtFlgM;   	// IEEE exception flags 
+   output 	 FDivSqrtDoneE;
+   output    FDivBusyE, HoldInputs;
+
+   supply1 	  vdd;
+   supply0 	  vss;   
+
+   wire [63:0] 	 Float1; 
+   wire [63:0] 	 Float2;
+   wire [63:0] 	 IntValue;
+   
+   wire 	 DivDenormM;   	// DivDenormM on input or output
+   wire [12:0] 	 exp1, exp2, expF;
+   wire [12:0] 	 exp_diff, bias;
+   wire [13:0] 	 exp_sqrt;
+   wire [12:0] 	 exp_s;
+   wire [12:0] 	 exp_c;
+   
+   wire [10:0] 	 exponent, exp_pre;
+   wire [63:0] 	 Result;   
+   wire [52:0] 	 mantissaA;
+   wire [52:0] 	 mantissaB; 
+   wire [63:0] 	 sum, sum_tc, sum_corr, sum_norm;
+   
+   wire [5:0] 	 align_shift;
+   wire [5:0] 	 norm_shift;
+   wire [2:0] 	 sel_inv;
+   wire		 op1_Norm, op2_Norm;
+   wire		 opA_Norm, opB_Norm;
+   wire		 Invalid;
+   wire 	 DenormIn, DenormIO;
+   wire [4:0] 	 FlagsIn;   	
+   wire 	 exp_gt63;
+   wire 	 Sticky_out;
+   wire 	 signResult, sign_corr;
+   wire          corr_sign;
+   wire 	 zeroB;         
+   wire 	 convert;
+   wire          swap;
+   wire          sub;
+   
+   wire [63:0] 	 q1, qm1, qp1, q0, qm0, qp0;
+   wire [63:0] 	 rega_out, regb_out, regc_out, regd_out;
+   wire [127:0]  regr_out;
+   wire [2:0] 	 sel_muxa, sel_muxb;
+   wire 	 sel_muxr;   
+   wire 	 load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
+
+   wire 	 donev, sel_muxrv, sel_muxsv;
+   wire [1:0] 	 sel_muxav, sel_muxbv;   
+   wire 	 load_regav, load_regbv, load_regcv;
+   wire 	 load_regrv, load_regsv;
+   
+   logic exp_cout1, exp_cout2, exp_odd, open;
+
+
+   // Convert the input operands to their appropriate forms based on 
+   // the orignal operands, the DivOpType , and their precision FmtE. 
+   // Single precision inputs are converted to double precision 
+   // and the sign of the first operand is set appropratiately based on
+   // if the operation is absolute value or negation. 
+   convert_inputs_div divconv1 (Float1, Float2, DivInput1E, DivInput2E, DivOpType, FmtE);
+
+   // Test for exceptions and return the "Invalid Operation" and
+   // "Denormalized" Input FDivSqrtFlgM. The "sel_inv" is used in
+   // the third pipeline stage to select the result. Also, op1_Norm
+   // and op2_Norm are one if DivInput1E and DivInput2E are not zero or denormalized.
+   // sub is one if the effective operation is subtaction. 
+   exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, 
+		   Float1, Float2, DivOpType);
+
+   // Determine Sign/Mantissa
+   assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType;
+   assign mantissaA = {vdd, Float1[51:0]};
+   assign mantissaB = {vdd, Float2[51:0]};
+   // Perform Exponent Subtraction - expA - expB + Bias   
+   assign exp1 = {2'b0, Float1[62:52]};
+   assign exp2 = {2'b0, Float2[62:52]};
+   // bias : DP = 2^{11-1}-1 = 1023
+   assign bias = {3'h0, 10'h3FF};
+   // Divide exponent
+   csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); //***adder
+   exp_add explogic1 (exp_cout1, {open, exp_diff}, //***adder?
+		      {vss, exp_s}, {vss, exp_c}, 1'b1);
+   // Sqrt exponent (check if exponent is odd)
+   assign exp_odd = Float1[52] ? vss : vdd;
+   exp_add explogic2 (exp_cout2, exp_sqrt, //***adder?
+		      {vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
+   // Choose correct exponent
+   assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;   
+
+   // Main Goldschmidt/Division Routine
+   divconv goldy (q1, qm1, qp1, q0, qm0, qp0, 
+		  rega_out, regb_out, regc_out, regd_out,
+		  regr_out, mantissaB, mantissaA, 
+		  sel_muxa, sel_muxb, sel_muxr, 
+		  reset, clk,
+		  load_rega, load_regb, load_regc, load_regd,
+		  load_regr, load_regs, FmtE, DivOpType, exp_odd);
+
+   // FSM : control divider
+   fsm control (FDivSqrtDoneE, load_rega, load_regb, load_regc, load_regd, 
+		load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, 
+		clk, reset, FDivStartE, DivOpType, FDivBusyE, HoldInputs);
+   
+   // Round the mantissa to a 52-bit value, with the leading one
+   // removed. The rounding units also handles special cases and 
+   // set the exception flags.
+   //***add max magnitude and swap negitive and positive infinity
+   rounder_div divround1 (Result, DenormIO, FlagsIn, 
+		   FrmE, FmtE, DivOvEn, DivUnEn, expF, 
+   		   sel_inv, Invalid, DenormIn, signResult, 
+		   q1, qm1, qp1, q0, qm0, qp0, regr_out);
+
+   // Store the final result and the exception flags in registers.
+   flopenr #(64) rega (clk, reset, FDivSqrtDoneE, Result, FDivResultM);
+   flopenr #(1) regb (clk, reset, FDivSqrtDoneE, DenormIO, DivDenormM);   
+   flopenr #(5) regc (clk, reset, FDivSqrtDoneE, FlagsIn, FDivSqrtFlgM);   
+   
+endmodule // fpadd
+
+//
+// Brent-Kung Prefix Adder 
+//   (yes, it is 14 bits as my generator is broken for 13 bits :( 
+//    assume, synthesizer will delete stuff not needed )
+//
+module exp_add (cout, sum, a, b, cin);
+   
+   input [13:0] a, b;
+   input 	cin;
+   
+   output [13:0] sum;
+   output 	 cout;
+
+   wire [14:0] 	 p,g;
+   wire [13:0] 	 c;
+
+   // pre-computation
+   assign p={a^b,1'b0};
+   assign g={a&b, cin};
+
+   // prefix tree
+   brent_kung prefix_tree(c, p[13:0], g[13:0]);
+
+   // post-computation
+   assign sum=p[14:1]^c;
+   assign cout=g[14]|(p[14]&c[13]);
+
+endmodule // exp_add
+
+module brent_kung (c, p, g);
+   
+   input [13:0] p;
+   input [13:0] g;
+   output [14:1] c;
+
+   logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
+   logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
+   logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
+   // parallel-prefix, Brent-Kung
+
+   // Stage 1: Generates G/FmtE pairs that span 1 bits
+   grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
+   black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
+   black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
+   black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
+   black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
+   black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
+   black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
+
+   // Stage 2: Generates G/FmtE pairs that span 2 bits
+   grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
+   black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
+   black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
+
+   // Stage 3: Generates G/FmtE pairs that span 4 bits
+   grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
+
+   // Stage 4: Generates G/FmtE pairs that span 8 bits
+
+   // Stage 5: Generates G/FmtE pairs that span 4 bits
+   grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
+
+   // Stage 6: Generates G/FmtE pairs that span 2 bits
+   grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
+   grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
+   grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
+
+   // Last grey cell stage 
+   grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
+   grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
+   grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
+   grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
+   grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
+   grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
+
+   // Final Stage: Apply c_k+1=G_k_0
+   assign c[1]=g[0];
+   assign c[2]=G_1_0;
+   assign c[3]=G_2_0;
+   assign c[4]=G_3_0;
+   assign c[5]=G_4_0;
+   assign c[6]=G_5_0;
+   assign c[7]=G_6_0;
+   assign c[8]=G_7_0;
+   assign c[9]=G_8_0;
+
+   assign c[10]=G_9_0;
+   assign c[11]=G_10_0;
+   assign c[12]=G_11_0;
+   assign c[13]=G_12_0;
+   assign c[14]=G_13_0;
+
+endmodule // brent_kung
+