cvw/wally-pipelined/src/fpu/fpdiv.sv
2021-05-25 20:04:34 -04:00

255 lines
8.4 KiB
Systemverilog
Executable File

//
// File name : fpdiv
// Title : Floating-Point Divider/Square-Root
// project : FPU
// Library : fpdiv
// Author(s) : James E. Stine, Jr.
// Purpose : definition of main unit to floating-point div/sqrt
// notes :
//
// Copyright Oklahoma State University
//
// Basic Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity, NaN)
// Step 3: Exponent Logic
// Step 4: Divide/Sqrt using Goldschmidt
// Step 5: Normalize the result.//
// Shift left until normalized. Normalized when the value to the
// left of the binrary point is 1.
// Step 6: Round the result.//
// Step 7: Put quotient/remainder onto output.
//
// `timescale 1ps/1ps
module fpdiv (FDivSqrtDoneM, FDivResultM, FDivFlagsM, DivDenormM, FInput1E, FInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
FDivStartE, reset, clk, DivBusyM);
input [63:0] FInput1E; // 1st input operand (A)
input [63:0] FInput2E; // 2nd input operand (B)
input [2:0] FrmE; // Rounding mode - specify values
input DivOpType; // Function opcode
input FmtE; // Result Precision (0 for double, 1 for single)
input DivOvEn; // Overflow trap enabled
input DivUnEn; // Underflow trap enabled
input FDivStartE;
input reset;
input clk;
output [63:0] FDivResultM; // Result of operation
output [4:0] FDivFlagsM; // IEEE exception flags
output DivDenormM; // DivDenormM on input or output
output FDivSqrtDoneM;
output DivBusyM;
supply1 vdd;
supply0 vss;
wire [63:0] Float1;
wire [63:0] Float2;
wire [63:0] IntValue;
wire [12:0] exp1, exp2, expF;
wire [12:0] exp_diff, bias;
wire [13:0] exp_sqrt;
wire [12:0] exp_s;
wire [12:0] exp_c;
wire [10:0] exponent, exp_pre;
wire [63:0] Result;
wire [52:0] mantissaA;
wire [52:0] mantissaB;
wire [63:0] sum, sum_tc, sum_corr, sum_norm;
wire [5:0] align_shift;
wire [5:0] norm_shift;
wire [2:0] sel_inv;
wire op1_Norm, op2_Norm;
wire opA_Norm, opB_Norm;
wire Invalid;
wire DenormIn, DenormIO;
wire [4:0] FlagsIn;
wire exp_gt63;
wire Sticky_out;
wire signResult, sign_corr;
wire corr_sign;
wire zeroB;
wire convert;
wire swap;
wire sub;
wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
wire [63:0] rega_out, regb_out, regc_out, regd_out;
wire [127:0] regr_out;
wire [2:0] sel_muxa, sel_muxb;
wire sel_muxr;
wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
wire donev, sel_muxrv, sel_muxsv;
wire [1:0] sel_muxav, sel_muxbv;
wire load_regav, load_regbv, load_regcv;
wire load_regrv, load_regsv;
logic exp_cout1, exp_cout2, exp_odd, open;
// Convert the input operands to their appropriate forms based on
// the orignal operands, the DivOpType , and their precision FmtE.
// Single precision inputs are converted to double precision
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs_div divconv1 (Float1, Float2, FInput1E, FInput2E, DivOpType, FmtE);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input FDivFlagsM. The "sel_inv" is used in
// the third pipeline stage to select the result. Also, op1_Norm
// and op2_Norm are one if FInput1E and FInput2E are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
Float1, Float2, DivOpType);
// Determine Sign/Mantissa
assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType;
assign mantissaA = {vdd, Float1[51:0]};
assign mantissaB = {vdd, Float2[51:0]};
// Perform Exponent Subtraction - expA - expB + Bias
assign exp1 = {2'b0, Float1[62:52]};
assign exp2 = {2'b0, Float2[62:52]};
// bias : DP = 2^{11-1}-1 = 1023
assign bias = {3'h0, 10'h3FF};
// Divide exponent
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
exp_add explogic1 (exp_cout1, {open, exp_diff},
{vss, exp_s}, {vss, exp_c}, 1'b1);
// Sqrt exponent (check if exponent is odd)
assign exp_odd = Float1[52] ? vss : vdd;
exp_add explogic2 (exp_cout2, exp_sqrt,
{vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
// Choose correct exponent
assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;
// Main Goldschmidt/Division Routine
divconv goldy (q1, qm1, qp1, q0, qm0, qp0,
rega_out, regb_out, regc_out, regd_out,
regr_out, mantissaB, mantissaA,
sel_muxa, sel_muxb, sel_muxr,
reset, clk,
load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, FmtE, DivOpType, exp_odd);
// FSM : control divider
fsm control (FDivSqrtDoneM, load_rega, load_regb, load_regc, load_regd,
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
clk, reset, FDivStartE, DivOpType, DivBusyM);
// Round the mantissa to a 52-bit value, with the leading one
// removed. The rounding units also handles special cases and
// set the exception flags.
//***add max magnitude and swap negitive and positive infinity
rounder_div divround1 (Result, DenormIO, FlagsIn,
FrmE, FmtE, DivOvEn, DivUnEn, expF,
sel_inv, Invalid, DenormIn, signResult,
q1, qm1, qp1, q0, qm0, qp0, regr_out);
// Store the final result and the exception flags in registers.
flopenr #(64) rega (clk, reset, FDivSqrtDoneM, Result, FDivResultM);
flopenr #(1) regb (clk, reset, FDivSqrtDoneM, DenormIO, DivDenormM);
flopenr #(5) regc (clk, reset, FDivSqrtDoneM, FlagsIn, FDivFlagsM);
endmodule // fpadd
//
// Brent-Kung Prefix Adder
// (yes, it is 14 bits as my generator is broken for 13 bits :(
// assume, synthesizer will delete stuff not needed )
//
module exp_add (cout, sum, a, b, cin);
input [13:0] a, b;
input cin;
output [13:0] sum;
output cout;
wire [14:0] p,g;
wire [13:0] c;
// pre-computation
assign p={a^b,1'b0};
assign g={a&b, cin};
// prefix tree
brent_kung prefix_tree(c, p[13:0], g[13:0]);
// post-computation
assign sum=p[14:1]^c;
assign cout=g[14]|(p[14]&c[13]);
endmodule // exp_add
module brent_kung (c, p, g);
input [13:0] p;
input [13:0] g;
output [14:1] c;
logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
// parallel-prefix, Brent-Kung
// Stage 1: Generates G/FmtE pairs that span 1 bits
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
// Stage 2: Generates G/FmtE pairs that span 2 bits
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
// Stage 3: Generates G/FmtE pairs that span 4 bits
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
// Stage 4: Generates G/FmtE pairs that span 8 bits
// Stage 5: Generates G/FmtE pairs that span 4 bits
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
// Stage 6: Generates G/FmtE pairs that span 2 bits
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
// Last grey cell stage
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
// Final Stage: Apply c_k+1=G_k_0
assign c[1]=g[0];
assign c[2]=G_1_0;
assign c[3]=G_2_0;
assign c[4]=G_3_0;
assign c[5]=G_4_0;
assign c[6]=G_5_0;
assign c[7]=G_6_0;
assign c[8]=G_7_0;
assign c[9]=G_8_0;
assign c[10]=G_9_0;
assign c[11]=G_10_0;
assign c[12]=G_11_0;
assign c[13]=G_12_0;
assign c[14]=G_13_0;
endmodule // brent_kung