diff --git a/wally-pipelined/src/fpu/fpdiv.sv b/wally-pipelined/src/fpu/fpdiv.sv new file mode 100755 index 00000000..4051f6de --- /dev/null +++ b/wally-pipelined/src/fpu/fpdiv.sv @@ -0,0 +1,151 @@ +// +// File name : fpdiv +// Title : Floating-Point Divider/Square-Root +// project : FPU +// Library : fpdiv +// Author(s) : James E. Stine, Jr. +// Purpose : definition of main unit to floating-point div/sqrt +// notes : +// +// Copyright Oklahoma State University +// +// Basic Operations +// +// Step 1: Load operands, set flags, and convert SP to DP +// Step 2: Check for special inputs ( +/- Infinity, NaN) +// Step 3: Exponent Logic +// Step 4: Divide/Sqrt using Goldschmidt +// Step 5: Normalize the result.// +// Shift left until normalized. Normalized when the value to the +// left of the binrary point is 1. +// Step 6: Round the result.// +// Step 7: Put quotient/remainder onto output. +// + +`timescale 1ps/1ps +module fpdiv (done, AS_Result, Flags, Denorm, op1, op2, rm, op_type, P, OvEn, UnEn, + start, reset, clk); + + input [63:0] op1; // 1st input operand (A) + input [63:0] op2; // 2nd input operand (B) + input [1:0] rm; // Rounding mode - specify values + input op_type; // Function opcode + input P; // Result Precision (0 for double, 1 for single) + input OvEn; // Overflow trap enabled + input UnEn; // Underflow trap enabled + input start; + input reset; + input clk; + + output [63:0] AS_Result; // Result of operation + output [4:0] Flags; // IEEE exception flags + output Denorm; // Denorm on input or output + output done; + + supply1 vdd; + supply0 vss; + + wire [63:0] Float1; + wire [63:0] Float2; + wire [63:0] IntValue; + + wire [12:0] exp1, exp2, expF; + wire [12:0] exp_diff, bias; + wire [13:0] exp_sqrt; + wire [12:0] exp_s; + wire [12:0] exp_c; + + wire [10:0] exponent, exp_pre; + wire [63:0] Result; + wire [52:0] mantissaA; + wire [52:0] mantissaB; + wire [63:0] sum, sum_tc, sum_corr, sum_norm; + + wire [5:0] align_shift; + wire [5:0] norm_shift; + wire [2:0] sel_inv; + wire op1_Norm, op2_Norm; + wire opA_Norm, opB_Norm; + wire Invalid; + wire DenormIn, DenormIO; + wire [4:0] FlagsIn; + wire exp_gt63; + wire Sticky_out; + wire signResult, sign_corr; + wire corr_sign; + wire zeroB; + wire convert; + wire swap; + wire sub; + + wire [63:0] q1, qm1, qp1, q0, qm0, qp0; + wire [63:0] rega_out, regb_out, regc_out, regd_out; + wire [127:0] regr_out; + wire [2:0] sel_muxa, sel_muxb; + wire sel_muxr; + wire load_rega, load_regb, load_regc, load_regd, load_regr; + + wire donev, sel_muxrv, sel_muxsv; + wire [1:0] sel_muxav, sel_muxbv; + wire load_regav, load_regbv, load_regcv; + wire load_regrv, load_regsv; + + // Convert the input operands to their appropriate forms based on + // the orignal operands, the op_type , and their precision P. + // Single precision inputs are converted to double precision + // and the sign of the first operand is set appropratiately based on + // if the operation is absolute value or negation. + convert_inputs_div conv1 (Float1, Float2, op1, op2, op_type, P); + + // Test for exceptions and return the "Invalid Operation" and + // "Denormalized" Input Flags. The "sel_inv" is used in + // the third pipeline stage to select the result. Also, op1_Norm + // and op2_Norm are one if op1 and op2 are not zero or denormalized. + // sub is one if the effective operation is subtaction. + exception_div exc1 (sel_inv, Invalid, DenormIn, op1_Norm, op2_Norm, + Float1, Float2, op_type); + + // Determine Sign/Mantissa + assign signResult = ((Float1[63]^Float2[63])&~op_type) | Float1[63]&op_type; + assign mantissaA = {vdd, Float1[51:0]}; + assign mantissaB = {vdd, Float2[51:0]}; + // Perform Exponent Subtraction - expA - expB + Bias + assign exp1 = {2'b0, Float1[62:52]}; + assign exp2 = {2'b0, Float2[62:52]}; + // bias : DP = 2^{11-1}-1 = 1023 + assign bias = {3'h0, 10'h3FF}; + // Divide exponent + csa #(13) csa1 (exp1, ~exp2, bias, exp_s, exp_c); + adder #(14) explogic1 ({vss, exp_s}, {vss, exp_c}, 1'b1, {open, exp_diff}, exp_cout1); + + // Sqrt exponent (check if exponent is odd) + assign exp_odd = Float1[52] ? vss : vdd; + adder #(14) explogic2 ({vss, exp1}, {4'h0, 10'h3ff}, exp_odd, exp_sqrt, exp_cout2); + // Choose correct exponent + assign expF = op_type ? exp_sqrt[13:1] : exp_diff; + + // Main Goldschmidt/Division Routine + divconv goldy (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, regd_out, + regr_out, mantissaB, mantissaA, sel_muxa, sel_muxb, sel_muxr, + reset, clk, load_rega, load_regb, load_regc, load_regd, + load_regr, load_regs, P, op_type, exp_odd); + + // FSM : control divider + fsm_div control (done, load_rega, load_regb, load_regc, load_regd, + load_regr, load_regs, sel_muxa, sel_muxb, sel_muxr, + clk, reset, start, error, op_type); + + // Round the mantissa to a 52-bit value, with the leading one + // removed. The rounding units also handles special cases and + // set the exception flags. + rounder_div round1 (Result, DenormIO, FlagsIn, + rm, P, OvEn, UnEn, expF, + sel_inv, Invalid, DenormIn, signResult, + q1, qm1, qp1, q0, qm0, qp0, regr_out); + + // Store the final result and the exception flags in registers. + flopenr #(64) rega (clk, reset, done, Result, AS_Result); + flopenr #(1) regb (clk, reset, done, DenormIO, Denorm); + flopenr #(5) regc (clk, reset, done, FlagsIn, Flags); + +endmodule // fpadd diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 1f878cb1..77d68591 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -108,17 +108,9 @@ module fpu ( logic [63:0] FPUResultW; logic [4:0] FPUFlagsW; - - - - - - - //DECODE STAGE - // top-level controller for FPU fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, @@ -129,14 +121,6 @@ module fpu ( InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW, FPUResultW, FRD1D, FRD2D, FRD3D); - - - - - - - - //***************** // D/E pipe registers @@ -152,18 +136,6 @@ module fpu ( {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); - - - - - - - - - - - - //EXECUTION STAGE // Hazard unit for FPU @@ -198,12 +170,10 @@ module fpu ( .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); - fdivsqrt fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, + fpdiv fdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .DivInput1E, .DivInput2E, .FrmE, .DivOvEn(1'b1), .DivUnEn(1'b1), .FDivStartE, .FDivResultM, .FDivSqrtFlgM, .FDivSqrtDoneE, .FDivBusyE, .HoldInputs, .reset); - - // first of two-stage instance of floating-point add/cvt unit faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .SrcXE, .SrcYE, .FOpCtrlE, .FAddResM, .FAddFlgM); @@ -224,15 +194,6 @@ module fpu ( // mux2 #(`XLEN) FWriteDataMux({{`XLEN-32{1'b0}}, SrcYE[63:32]}, SrcYE[63:64-`XLEN], FmtE, FWriteDataE); assign FWriteDataE = SrcYE[`XLEN-1:0]; - - - - - - - - - //***************** // E/M pipe registers //***************** @@ -255,36 +216,18 @@ module fpu ( {FRegWriteM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM); - - - - - - - //BEGIN MEMORY STAGE - mux4 #(64) FResMux(AlignedSrcAM, SgnResM, CmpResM, CvtResM, FResSelM, FResM); mux4 #(5) FFlgMux(5'b0, {4'b0, SgnNVM}, {4'b0, CmpNVM}, CvtFlgM, FResSelM, FFlgM); // mux2 #(`XLEN) SrcXAlignedMux({{`XLEN-32{1'b0}}, SrcXM[63:32]}, SrcXM[63:64-`XLEN], FmtM, SrcXMAligned); mux4 #(`XLEN) IntResMux(CmpResM[`XLEN-1:0], SrcXM[`XLEN-1:0], ClassResM[`XLEN-1:0], CvtResM[`XLEN-1:0], FIntResSelM, FIntResM); - // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({{32{1'b1}}, SrcAM[31:0]}, {{64-`XLEN{1'b1}}, SrcAM}, FmtM, AlignedSrcAM); - - mux5 #(5) FPUFlgMux(5'b0, FMAFlgM, FAddFlgM, FDivSqrtFlgM, FFlgM, FResultSelW, SetFflagsM); - - - - - - - //***************** // M/W pipe registers //***************** @@ -302,16 +245,10 @@ module fpu ( {FRegWriteM, FResultSelM, RdM, FmtM, FWriteIntM}, {FRegWriteW, FResultSelW, RdW, FmtW, FWriteIntW}); - - - - - //######################################### // BEGIN WRITEBACK STAGE //######################################### - mux2 #(64) ReadResMux({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); mux5 #(64) FPUResultMux(ReadResW, FMAResW, FAddResW, FDivResultW, FResW, FResultSelW, FPUResultW); @@ -330,4 +267,3 @@ module fpu ( endgenerate endmodule // fpu -