cvw/wally-pipelined/src/fpu/faddcvt.sv

//
// File name : fpadd
// Title     : Floating-Point Adder/Subtractor
// project   : FPU
// Library   : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose   : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and convert SP to DP
// Step 2: Check for special inputs ( +/- Infinity,  NaN)
// Step 3: Compare exponents.  Swap the operands of exp1 < exp2
//         or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller exponent,
//          and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
//   Shift left until normalized.  Normalized when the value to the
//   left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put sum onto output.
//

module faddcvt(
   input logic          clk,
   input logic          reset,
   input logic          FlushM,     // flush the memory stage
   input logic          StallM,     // stall the memory stage
   input logic  [63:0]  FSrcXE,		// 1st input operand (A)
   input logic  [63:0]  FSrcYE,		// 2nd input operand (B)
   input logic  [2:0]   FOpCtrlE, FOpCtrlM,	// Function opcode
   input logic          FmtE, FmtM,   	// Result Precision (0 for double, 1 for single)
   input logic  [2:0] 	FrmM,		      // Rounding mode - specify values
   input logic XSgnE, YSgnE,
   input logic [52:0] XManE, YManE,
   input logic [10:0] XExpE, YExpE,
   input logic XSgnM, YSgnM,
   input logic [52:0] XManM, YManM,
   input logic [10:0] XExpM, YExpM,
   input logic XDenormE, YDenormE,
   input logic XNormE, YNormE,
   input logic XNormM, YNormM,
   input logic XZeroE, YZeroE,
   input logic XInfE, YInfE,
   input logic XNaNE, YNaNE,
   input logic XSNaNE, YSNaNE,
   output logic [63:0]  FAddResM,	   // Result of operation
   output logic [4:0]   FAddFlgM);   	// IEEE exception flags

   logic [63:0] 	AddSumE, AddSumM;
   logic [63:0]   AddSumTcE, AddSumTcM;
   logic [3:0] 	AddSelInvE, AddSelInvM;
   logic [10:0] 	AddExpPostSumE,AddExpPostSumM;
   logic 		   AddCorrSignE, AddCorrSignM;
   logic          AddOpANormE,  AddOpANormM;
   logic          AddOpBNormE, AddOpBNormM;
   logic          AddInvalidE, AddInvalidM;
   logic 		   AddDenormInE, AddDenormInM;
   logic          AddSwapE, AddSwapM;
   logic          AddSignAE, AddSignAM;
   logic [11:0] 	AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM;
   logic [10:0] 	AddExponentE, AddExponentM;


   fpuaddcvt1 fpadd1 (.FOpCtrlE, .FmtE, .AddExponentE,
                     .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE,
   .XSgnE, .YSgnE,.XManE, .YManE, .XExpE, .YExpE,  .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
                     .AddCorrSignE, .AddSignAE, .AddOpANormE, .AddOpBNormE, .AddInvalidE,
                     .AddDenormInE, .AddSwapE);

   // E/M pipeline registers
   flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
   flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
   flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
   flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
   flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
   flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
   flopenrc #(11) EMRegAdd9(clk, reset, FlushM, ~StallM,
                           {AddSelInvE, AddCorrSignE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddSwapE, AddSignAE},
                           {AddSelInvM, AddCorrSignM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddSwapM, AddSignAM});


   fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM,  .XNormM, .YNormM,
                     .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
                     .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM,
                     .AddSignAM, .AddCorrSignM, .AddSwapM, .FAddResM, .FAddFlgM);
endmodule

module fpuaddcvt1 (
   input logic [2:0]	   FOpCtrlE,	// Function opcode
   input logic 	      FmtE,   		// Result Precision (1 for double, 0 for single)
   input logic XSgnE, YSgnE,
   input logic [10:0] XExpE, YExpE,
   input logic [52:0] XManE, YManE,
   input logic XDenormE, YDenormE,
   input logic XNormE, YNormE,
   input logic XZeroE, YZeroE,
   input logic XInfE, YInfE,
   input logic XNaNE, YNaNE,
   input logic XSNaNE, YSNaNE,

   output logic [10:0] 	AddExponentE,
   output logic [10:0]	AddExpPostSumE,
   output logic [11:0]  AddExp1DenormE, AddExp2DenormE,//KEP used to be [10:0]
   output logic [63:0]  AddSumE, AddSumTcE,
   output logic [3:0]   AddSelInvE,
   output logic         AddCorrSignE,
   output logic 	      AddSignAE,
   output logic	      AddOpANormE, AddOpBNormE,
   output logic	      AddInvalidE,
   output logic 	      AddDenormInE,
   output logic         AddSwapE
   );

   logic [5:0]	 ZP_mantissaA;
   logic [5:0]	 ZP_mantissaB;
   wire		    ZV_mantissaA;
   wire		    ZV_mantissaB;

   wire          P;
   assign P = ~(FmtE^FOpCtrlE[1]);

   wire [63:0] IntValue;
   wire [11:0] exp1, exp2;
   wire [11:0] exp_diff1, exp_diff2;
   wire [11:0] exp_shift;
   wire [51:0] mantissaA;
   wire [56:0] mantissaA1;
   wire [63:0] mantissaA3;
   wire [51:0] mantissaB;
   wire [56:0] mantissaB1, mantissaB2;
   wire [63:0] mantissaB3;
   wire 	      exp_gt63;
   wire 	      Sticky_out;
   wire        sub;
   wire 	      zeroB;
   wire [5:0]	align_shift;

   // Test for exceptions and return the "Invalid Operation" and
   // "Denormalized" Input Flags. The "AddSelInvE" is used in
   // the third pipeline stage to select the result. Also, AddOp1NormE
   // and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized.
   // sub is one if the effective operation is subtaction.

   exception exc1 (.Ztype(AddSelInvE), .Invalid(AddInvalidE), .Denorm(AddDenormInE), .Sub(sub),
   .XSgnE, .YSgnE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
	.op_type(FOpCtrlE));

   // Perform Exponent Subtraction (used for alignment). For performance
   // both exponent subtractions are performed in parallel. This was
   // changed to a behavior level to allow the tools to  try to optimize
   // the two parallel additions. The input values are zero-extended to 12
   // bits prior to performing the addition.

   assign exp1 = {1'b0, XExpE};
   assign exp2 = {1'b0, YExpE};
   assign exp_diff1 = exp1 - exp2;
   assign exp_diff2 = AddDenormInE ? ({YSgnE, YExpE} - {XSgnE, XExpE}): exp2 - exp1;

   // The second operand (B) should be set to zero, if FOpCtrlE does not
   // specify addition or subtraction
   assign zeroB = FOpCtrlE[1];

   // Swapped operands if zeroB is not one and exp1 < exp2.
   // Swapping causes exp2 to be used for the result exponent.
   // Only the exponent of the larger operand is used to determine
   // the final result.
   assign AddSwapE = exp_diff1[11] & ~zeroB;
   assign AddExponentE = AddSwapE ? YExpE : XExpE;
   assign AddExpPostSumE = AddSwapE ? YExpE : XExpE;
   assign mantissaA = AddSwapE ? YManE[51:0] : XManE[51:0];
   assign mantissaB = AddSwapE ? XManE[51:0] : YManE[51:0];
   assign AddSignAE     = AddSwapE ? YSgnE : XSgnE;

   // Leading-Zero Detector. Determine the size of the shift needed for
   // normalization. If sum_corrected is all zeros, the exp_valid is
   // zero; otherwise, it is one.
   // modified to 52 bits to detect leading zeroes on denormalized mantissas
   // lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA);
   // lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB);
   logic [8:0] i;
   logic [8:0] j;
    always_comb begin
            i = 0;
            while (~mantissaA[52-i] && $unsigned(i) <= $unsigned(52)) i = i+1;  // search for leading one
            ZP_mantissaA = i;
    end
    always_comb begin
            j = 0;
            while (~mantissaB[52-j] && $unsigned(j) <= $unsigned(52)) j = j+1;  // search for leading one
            ZP_mantissaB = j;
    end

   // Denormalized exponents created by subtracting the leading zeroes from the original exponents
   assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa
   assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB});

   // Determine the alignment shift and limit it to 63. If any bit from
   // exp_shift[6] to exp_shift[11] is one, then shift is set to all ones.
   assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1;
   assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9]
     | exp_shift[8] | exp_shift[7] | exp_shift[6];
   assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift

   // Unpack the 52-bit mantissas to 57-bit numbers of the form.
   //    001.M[51]M[50] ... M[1]M[0]00
   // Unless the number has an exponent of zero, in which case it
   // is unpacked as
   //    000.00 ... 00
   // This effectively flushes denormalized values to zero.
   // The three bits of to the left of the binary point prevent overflow
   // and loss of sign information. The two bits to the right of the
   // original mantissa form the "guard" and "round" bits that are used
   // to round the result.
   assign AddOpANormE = AddSwapE ? YNormE : XNormE;
   assign AddOpBNormE = AddSwapE ? XNormE : YNormE;
   assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0};
   assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0};

   // Perform mantissa alignment using a 57-bit barrel shifter
   // If any of the bits shifted out are one, Sticky_out is set.
   // The size of the barrel shifter could be reduced by two bits
   // by not adding the leading two zeros until after the shift.
   barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift);

   // Place either the sign-extened 32-bit value or the original 64-bit value
   // into IntValue (to be used for integer to floating point conversion)
   // assign IntValue [31:0] = FSrcXE[31:0];
   // assign IntValue [63:32] = FOpCtrlE[0] ? {32{FSrcXE[31]}} : FSrcXE[63:32];

   // If doing an integer to floating point conversion, mantissaA3 is set to
   // IntVal and the prenomalized exponent is set to 1084. Otherwise,
   // mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero,
   // and the exponent value is left unchanged.
   // Under denormalized cases, the exponent before the rounder is set to 1
   // if the normal shift value is 11.
   assign mantissaA3    = AddDenormInE ? ({12'h0, mantissaA}) : {mantissaA1, 7'h0};

   // Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to
   // 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six
   // zeros.
   assign mantissaB3[63:7] = AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}};
   assign mantissaB3[6]    = AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB;
   assign mantissaB3[5:0]  = AddDenormInE ? mantissaB[5:0] : 6'h0;

   // The sign of the result needs to be corrected if the true
   // operation is subtraction and the input operands were swapped.
   assign AddCorrSignE = ~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE;

   // 64-bit Mantissa Adder/Subtractor
   cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder

   // 64-bit Mantissa Subtractor - to get the two's complement of the
   // result when the sign from the adder/subtractor is negative.
   cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder

   // Finds normal underflow result to determine whether to round final exponent down
   //***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be
   // assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]);

endmodule // fpadd


//
// File name : fpadd
// Title     : Floating-Point Adder/Subtractor
// project   : FPU
// Library   : fpadd
// Author(s) : James E. Stine, Jr., Brett Mathis
// Purpose   : definition of main unit to floating-point add/sub
// notes :
//
// Copyright Oklahoma State University
// Copyright AFRL
//
// Basic and Denormalized Operations
//
// Step 1: Load operands, set flags, and AddConvertM SP to DP
// Step 2: Check for special inputs ( +/- Infinity,  NaN)
// Step 3: Compare exponents.  Swap the operands of exp1 < exp2
//         or of (exp1 = exp2 AND mnt1 < mnt2)
// Step 4: Shift the mantissa corresponding to the smaller AddExponentM,
//          and extend precision by three bits to the right.
// Step 5: Add or subtract the mantissas.
// Step 6: Normalize the result.//
//   Shift left until normalized.  Normalized when the value to the
//   left of the binrary point is 1.
// Step 7: Round the result.//
// Step 8: Put AddSumM onto output.
//


module fpuaddcvt2 (
   input logic [2:0] 	FrmM,		// Rounding mode - specify values
   input logic [2:0]	FOpCtrlM,	// Function opcode
   input logic 	FmtM,   		// Result Precision (0 for double, 1 for single)
   input logic [63:0] AddSumM, AddSumTcM,
   input logic [11:0]	 AddExp1DenormM, AddExp2DenormM,
   input logic [10:0] 	 AddExponentM, AddExpPostSumM,
   input logic [3:0] 	 AddSelInvM,
   input logic XSgnM, YSgnM,
   input logic [52:0] XManM, YManM,
   input logic [10:0] XExpM, YExpM,
   input logic XNormM, YNormM,
   input logic		 AddOpANormM, AddOpBNormM,
   input logic		 AddInvalidM,
   input logic 	 AddDenormInM,
   input logic 	 AddSignAM,
   input logic         AddCorrSignM,
   input logic          AddSwapM,

   output logic [63:0] FAddResM,	// Result of operation
   output logic [4:0]  FAddFlgM   	// IEEE exception flags
);
   wire 	 AddDenormM;   	// AddDenormM on input or output

   wire          P;
   assign P = ~(FmtM^FOpCtrlM[1]);

   wire [10:0]   exp_pre;
   wire [63:0] 	 Result;
   wire [63:0] 	 sum_norm, sum_norm_w_bypass;
   wire [5:0] 	 norm_shift, norm_shift_denorm;
   wire          exp_valid;
   wire		 DenormIO;
   wire [4:0] 	 FlagsIn;
   wire 	 Sticky_out;
   wire 	 sign_corr;
   wire 	 zeroB;
   wire 	 mantissa_comp;
   wire 	 mantissa_comp_sum;
   wire 	 mantissa_comp_sum_tc;
   wire 	 Float1_sum_comp;
   wire 	 Float2_sum_comp;
   wire 	 Float1_sum_tc_comp;
   wire 	 Float2_sum_tc_comp;
   wire 	 normal_underflow;
   wire [63:0]   sum_corr;
   logic AddNormOvflowM;


   logic 	AddOvEnM;		// Overflow trap enabled
   logic 	AddUnEnM;   	// Underflow trap enabled

   assign AddOvEnM = 1'b1;
   assign AddUnEnM = 1'b1;
   //AddExponentM value pre-rounding with considerations for denormalized
   //cases/conversion cases
   assign exp_pre       = AddDenormInM ?
                          ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0]))
                          : AddExponentM;


   // Finds normal underflow result to determine whether to round final AddExponentM down
   // Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor
   assign Float1_sum_comp = ~(XManM[51:0] > AddSumM[51:0]);
   assign Float2_sum_comp = ~(YManM[51:0] > AddSumM[51:0]);
   assign Float1_sum_tc_comp = ~(XManM[51:0] > AddSumTcM[51:0]);
   assign Float2_sum_tc_comp = ~(YManM[51:0] > AddSumTcM[51:0]);

   // Determines the correct Float value to compare based on AddSwapM result
   assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp;
   assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp;

   // Determines the correct comparison result based on operation and sign of resulting AddSumM
   assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum;

   // If the signs are different and both operands aren't denormalized
   // the normal underflow bit is needed and therefore updated.
   assign normal_underflow = ((XSgnM ^ YSgnM) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0;

   // Determine the correct sign of the result
   assign sign_corr = (AddCorrSignM ^ AddSignAM) ^ AddSumM[63];

   // If the AddSumM is negative, use its two complement instead.
   // This value has to be 64-bits to correctly handle the
   // case 10...00
   assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (XSgnM ~^ YSgnM) & FOpCtrlM[0] ) | ((XSgnM ^ YSgnM) & ~FOpCtrlM[0]) ))
			 ? (AddSumM[63] ? AddSumM : AddSumTcM) : (AddSumM[63] ? AddSumTcM : AddSumM);

   // Finds normal underflow result to determine whether to round final AddExponentM down
   //KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be
   assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]);

   // Leading-Zero Detector. Determine the size of the shift needed for
   // normalization. If sum_corrected is all zeros, the exp_valid is
   // zero; otherwise, it is one.
   lz64 lzd1 (norm_shift, exp_valid, sum_corr);

   assign norm_shift_denorm = (AddDenormInM & ( (~AddOpANormM & ~AddOpBNormM) | normal_underflow)) ? (6'h00) : (norm_shift);

   // Barell shifter used for normalization. It takes as inputs the
   // the corrected AddSumM and the amount by which the AddSumM should
   // be right shifted. It outputs the normalized AddSumM.
   barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm);

   assign sum_norm_w_bypass = sum_norm;

   // Round the mantissa to a 52-bit value, with the leading one
   // removed. If the result is a single precision number, the actual
   // mantissa is in the upper 23 bits and the lower 29 bits are zero.
   // At this point, normalization has already been performed, so we know
   // exactly where the rounding point is. The rounding units also
   // handles special cases and set the exception flags.

   // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to
   // help in processor reservation station detection of load/stores. In
   // other words, the processor would like to know ahead of time that
   // if the result is an exception then don't load or store.
   rounder round1 (.Result, .DenormIO, .Flags(FlagsIn), .rm(FrmM), .P, .OvEn(AddOvEnM), .UnEn(AddUnEnM), .exp_valid,
		   .sel_inv(AddSelInvM), .Invalid(AddInvalidM), .DenormIn(AddDenormInM), .Asign(sign_corr), .Aexp(exp_pre), .norm_shift, .A(sum_norm_w_bypass),
		   .exponent_postsum(AddExpPostSumM), .A_Norm(XNormM), .B_Norm(YNormM), .exp_A_unmodified({XSgnM, XExpM}), .exp_B_unmodified({YSgnM, YExpM}),
		   .normal_overflow(AddNormOvflowM), .normal_underflow, .swap(AddSwapM), .op_type(FOpCtrlM), .sum(AddSumM));

   // Store the final result and the exception flags in registers.
   assign FAddResM = Result;
   assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn};

endmodule // fpadd