mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-05 11:15:19 +00:00
255 lines
8.4 KiB
Systemverilog
Executable File
255 lines
8.4 KiB
Systemverilog
Executable File
//
|
|
// File name : fpdiv
|
|
// Title : Floating-Point Divider/Square-Root
|
|
// project : FPU
|
|
// Library : fpdiv
|
|
// Author(s) : James E. Stine, Jr.
|
|
// Purpose : definition of main unit to floating-point div/sqrt
|
|
// notes :
|
|
//
|
|
// Copyright Oklahoma State University
|
|
//
|
|
// Basic Operations
|
|
//
|
|
// Step 1: Load operands, set flags, and convert SP to DP
|
|
// Step 2: Check for special inputs ( +/- Infinity, NaN)
|
|
// Step 3: Exponent Logic
|
|
// Step 4: Divide/Sqrt using Goldschmidt
|
|
// Step 5: Normalize the result.//
|
|
// Shift left until normalized. Normalized when the value to the
|
|
// left of the binrary point is 1.
|
|
// Step 6: Round the result.//
|
|
// Step 7: Put quotient/remainder onto output.
|
|
//
|
|
|
|
// `timescale 1ps/1ps
|
|
module fpdiv (FDivSqrtDoneM, FDivResultM, FDivFlagsM, DivDenormM, FInput1E, FInput2E, FrmE, DivOpType, FmtE, DivOvEn, DivUnEn,
|
|
FDivStartE, reset, clk, DivBusyM);
|
|
|
|
input [63:0] FInput1E; // 1st input operand (A)
|
|
input [63:0] FInput2E; // 2nd input operand (B)
|
|
input [2:0] FrmE; // Rounding mode - specify values
|
|
input DivOpType; // Function opcode
|
|
input FmtE; // Result Precision (0 for double, 1 for single)
|
|
input DivOvEn; // Overflow trap enabled
|
|
input DivUnEn; // Underflow trap enabled
|
|
|
|
input FDivStartE;
|
|
input reset;
|
|
input clk;
|
|
|
|
output [63:0] FDivResultM; // Result of operation
|
|
output [4:0] FDivFlagsM; // IEEE exception flags
|
|
output DivDenormM; // DivDenormM on input or output
|
|
output FDivSqrtDoneM;
|
|
output DivBusyM;
|
|
|
|
supply1 vdd;
|
|
supply0 vss;
|
|
|
|
wire [63:0] Float1;
|
|
wire [63:0] Float2;
|
|
wire [63:0] IntValue;
|
|
|
|
wire [12:0] exp1, exp2, expF;
|
|
wire [12:0] exp_diff, bias;
|
|
wire [13:0] exp_sqrt;
|
|
wire [12:0] exp_s;
|
|
wire [12:0] exp_c;
|
|
|
|
wire [10:0] exponent, exp_pre;
|
|
wire [63:0] Result;
|
|
wire [52:0] mantissaA;
|
|
wire [52:0] mantissaB;
|
|
wire [63:0] sum, sum_tc, sum_corr, sum_norm;
|
|
|
|
wire [5:0] align_shift;
|
|
wire [5:0] norm_shift;
|
|
wire [2:0] sel_inv;
|
|
wire op1_Norm, op2_Norm;
|
|
wire opA_Norm, opB_Norm;
|
|
wire Invalid;
|
|
wire DenormIn, DenormIO;
|
|
wire [4:0] FlagsIn;
|
|
wire exp_gt63;
|
|
wire Sticky_out;
|
|
wire signResult, sign_corr;
|
|
wire corr_sign;
|
|
wire zeroB;
|
|
wire convert;
|
|
wire swap;
|
|
wire sub;
|
|
|
|
wire [63:0] q1, qm1, qp1, q0, qm0, qp0;
|
|
wire [63:0] rega_out, regb_out, regc_out, regd_out;
|
|
wire [127:0] regr_out;
|
|
wire [2:0] sel_muxa, sel_muxb;
|
|
wire sel_muxr;
|
|
wire load_rega, load_regb, load_regc, load_regd, load_regr, load_regs;
|
|
|
|
wire donev, sel_muxrv, sel_muxsv;
|
|
wire [1:0] sel_muxav, sel_muxbv;
|
|
wire load_regav, load_regbv, load_regcv;
|
|
wire load_regrv, load_regsv;
|
|
|
|
logic exp_cout1, exp_cout2, exp_odd, open;
|
|
// Convert the input operands to their appropriate forms based on
|
|
// the orignal operands, the DivOpType , and their precision FmtE.
|
|
// Single precision inputs are converted to double precision
|
|
// and the sign of the first operand is set appropratiately based on
|
|
// if the operation is absolute value or negation.
|
|
convert_inputs_div divconv1 (Float1, Float2, FInput1E, FInput2E, DivOpType, FmtE);
|
|
|
|
// Test for exceptions and return the "Invalid Operation" and
|
|
// "Denormalized" Input FDivFlagsM. The "sel_inv" is used in
|
|
// the third pipeline stage to select the result. Also, op1_Norm
|
|
// and op2_Norm are one if FInput1E and FInput2E are not zero or denormalized.
|
|
// sub is one if the effective operation is subtaction.
|
|
exception_div divexc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm,
|
|
Float1, Float2, DivOpType);
|
|
|
|
// Determine Sign/Mantissa
|
|
assign signResult = ((Float1[63]^Float2[63])&~DivOpType) | Float1[63]&DivOpType;
|
|
assign mantissaA = {vdd, Float1[51:0]};
|
|
assign mantissaB = {vdd, Float2[51:0]};
|
|
// Perform Exponent Subtraction - expA - expB + Bias
|
|
assign exp1 = {2'b0, Float1[62:52]};
|
|
assign exp2 = {2'b0, Float2[62:52]};
|
|
// bias : DP = 2^{11-1}-1 = 1023
|
|
assign bias = {3'h0, 10'h3FF};
|
|
// Divide exponent
|
|
csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c);
|
|
exp_add explogic1 (exp_cout1, {open, exp_diff},
|
|
{vss, exp_s}, {vss, exp_c}, 1'b1);
|
|
// Sqrt exponent (check if exponent is odd)
|
|
assign exp_odd = Float1[52] ? vss : vdd;
|
|
exp_add explogic2 (exp_cout2, exp_sqrt,
|
|
{vss, exp1}, {4'h0, 10'h3ff}, exp_odd);
|
|
// Choose correct exponent
|
|
assign expF = DivOpType ? exp_sqrt[13:1] : exp_diff;
|
|
|
|
// Main Goldschmidt/Division Routine
|
|
divconv goldy (q1, qm1, qp1, q0, qm0, qp0,
|
|
rega_out, regb_out, regc_out, regd_out,
|
|
regr_out, mantissaB, mantissaA,
|
|
sel_muxa, sel_muxb, sel_muxr,
|
|
reset, clk,
|
|
load_rega, load_regb, load_regc, load_regd,
|
|
load_regr, load_regs, FmtE, DivOpType, exp_odd);
|
|
|
|
// FSM : control divider
|
|
fsm control (FDivSqrtDoneM, load_rega, load_regb, load_regc, load_regd,
|
|
load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr,
|
|
clk, reset, FDivStartE, DivOpType, DivBusyM);
|
|
|
|
// Round the mantissa to a 52-bit value, with the leading one
|
|
// removed. The rounding units also handles special cases and
|
|
// set the exception flags.
|
|
//***add max magnitude and swap negitive and positive infinity
|
|
rounder_div divround1 (Result, DenormIO, FlagsIn,
|
|
FrmE, FmtE, DivOvEn, DivUnEn, expF,
|
|
sel_inv, Invalid, DenormIn, signResult,
|
|
q1, qm1, qp1, q0, qm0, qp0, regr_out);
|
|
|
|
// Store the final result and the exception flags in registers.
|
|
flopenr #(64) rega (clk, reset, FDivSqrtDoneM, Result, FDivResultM);
|
|
flopenr #(1) regb (clk, reset, FDivSqrtDoneM, DenormIO, DivDenormM);
|
|
flopenr #(5) regc (clk, reset, FDivSqrtDoneM, FlagsIn, FDivFlagsM);
|
|
|
|
endmodule // fpadd
|
|
|
|
//
|
|
// Brent-Kung Prefix Adder
|
|
// (yes, it is 14 bits as my generator is broken for 13 bits :(
|
|
// assume, synthesizer will delete stuff not needed )
|
|
//
|
|
module exp_add (cout, sum, a, b, cin);
|
|
|
|
input [13:0] a, b;
|
|
input cin;
|
|
|
|
output [13:0] sum;
|
|
output cout;
|
|
|
|
wire [14:0] p,g;
|
|
wire [13:0] c;
|
|
|
|
// pre-computation
|
|
assign p={a^b,1'b0};
|
|
assign g={a&b, cin};
|
|
|
|
// prefix tree
|
|
brent_kung prefix_tree(c, p[13:0], g[13:0]);
|
|
|
|
// post-computation
|
|
assign sum=p[14:1]^c;
|
|
assign cout=g[14]|(p[14]&c[13]);
|
|
|
|
endmodule // exp_add
|
|
|
|
module brent_kung (c, p, g);
|
|
|
|
input [13:0] p;
|
|
input [13:0] g;
|
|
output [14:1] c;
|
|
|
|
logic G_1_0, G_3_2,G_5_4,G_7_6,G_9_8,G_11_10,G_13_12,G_3_0,G_7_4,G_11_8;
|
|
logic P_3_2,P_5_4,P_7_6,P_9_8,P_11_10,P_13_12,P_7_4,P_11_8;
|
|
logic G_7_0,G_11_0,G_5_0,G_9_0,G_13_0,G_2_0,G_4_0,G_6_0,G_8_0,G_10_0,G_12_0;
|
|
// parallel-prefix, Brent-Kung
|
|
|
|
// Stage 1: Generates G/FmtE pairs that span 1 bits
|
|
grey b_1_0 (G_1_0, {g[1],g[0]}, p[1]);
|
|
black b_3_2 (G_3_2, P_3_2, {g[3],g[2]}, {p[3],p[2]});
|
|
black b_5_4 (G_5_4, P_5_4, {g[5],g[4]}, {p[5],p[4]});
|
|
black b_7_6 (G_7_6, P_7_6, {g[7],g[6]}, {p[7],p[6]});
|
|
black b_9_8 (G_9_8, P_9_8, {g[9],g[8]}, {p[9],p[8]});
|
|
black b_11_10 (G_11_10, P_11_10, {g[11],g[10]}, {p[11],p[10]});
|
|
black b_13_12 (G_13_12, P_13_12, {g[13],g[12]}, {p[13],p[12]});
|
|
|
|
// Stage 2: Generates G/FmtE pairs that span 2 bits
|
|
grey g_3_0 (G_3_0, {G_3_2,G_1_0}, P_3_2);
|
|
black b_7_4 (G_7_4, P_7_4, {G_7_6,G_5_4}, {P_7_6,P_5_4});
|
|
black b_11_8 (G_11_8, P_11_8, {G_11_10,G_9_8}, {P_11_10,P_9_8});
|
|
|
|
// Stage 3: Generates G/FmtE pairs that span 4 bits
|
|
grey g_7_0 (G_7_0, {G_7_4,G_3_0}, P_7_4);
|
|
|
|
// Stage 4: Generates G/FmtE pairs that span 8 bits
|
|
|
|
// Stage 5: Generates G/FmtE pairs that span 4 bits
|
|
grey g_11_0 (G_11_0, {G_11_8,G_7_0}, P_11_8);
|
|
|
|
// Stage 6: Generates G/FmtE pairs that span 2 bits
|
|
grey g_5_0 (G_5_0, {G_5_4,G_3_0}, P_5_4);
|
|
grey g_9_0 (G_9_0, {G_9_8,G_7_0}, P_9_8);
|
|
grey g_13_0 (G_13_0, {G_13_12,G_11_0}, P_13_12);
|
|
|
|
// Last grey cell stage
|
|
grey g_2_0 (G_2_0, {g[2],G_1_0}, p[2]);
|
|
grey g_4_0 (G_4_0, {g[4],G_3_0}, p[4]);
|
|
grey g_6_0 (G_6_0, {g[6],G_5_0}, p[6]);
|
|
grey g_8_0 (G_8_0, {g[8],G_7_0}, p[8]);
|
|
grey g_10_0 (G_10_0, {g[10],G_9_0}, p[10]);
|
|
grey g_12_0 (G_12_0, {g[12],G_11_0}, p[12]);
|
|
|
|
// Final Stage: Apply c_k+1=G_k_0
|
|
assign c[1]=g[0];
|
|
assign c[2]=G_1_0;
|
|
assign c[3]=G_2_0;
|
|
assign c[4]=G_3_0;
|
|
assign c[5]=G_4_0;
|
|
assign c[6]=G_5_0;
|
|
assign c[7]=G_6_0;
|
|
assign c[8]=G_7_0;
|
|
assign c[9]=G_8_0;
|
|
|
|
assign c[10]=G_9_0;
|
|
assign c[11]=G_10_0;
|
|
assign c[12]=G_11_0;
|
|
assign c[13]=G_12_0;
|
|
assign c[14]=G_13_0;
|
|
|
|
endmodule // brent_kung
|
|
|