diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index b9584bc9..206cefbb 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -138,10 +138,10 @@ module fctrl ( endcase 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass - else if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w / fmv.x.d to int register - 7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w.x / fmv.d.x to fp reg + else if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q fp to int register + 7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) + ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x int to fp reg 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index f4d46501..f7a44363 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -62,7 +62,7 @@ module fdivsqrt( logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection logic WZeroE; // Early termination flag - logic [`DURLEN-1:0] cycles; // FSM cycles + logic [`DURLEN-1:0] CyclesE; // FSM cycles logic SpecialCaseM; // Divide by zero, square root of negative, etc. logic DivStartE; // Enable signal for flops during stall @@ -76,7 +76,7 @@ module fdivsqrt( fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles, + .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .BZeroM, .nM, .mM, .AM, @@ -85,7 +85,7 @@ module fdivsqrt( fdivsqrtfsm fdivsqrtfsm( // FSM .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, - .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, // Int-specific .IDivStartE, .ISpecialCaseE, .IntDivE); diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index f1ad32cd..2e17cc25 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrt.sv +// fdivsqrtcycles.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu // Modified: 18 April 2022 // -// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// Purpose: Determine number of cycles for divsqrt // // Documentation: RISC-V System on Chip Design Chapter 13 // @@ -33,7 +33,7 @@ module fdivsqrtcycles( input logic SqrtE, input logic IntDivE, input logic [`DIVBLEN:0] nE, - output logic [`DURLEN-1:0] cycles + output logic [`DURLEN-1:0] CyclesE ); logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits // DIVN = `NF+3 @@ -68,8 +68,8 @@ module fdivsqrtcycles( always_comb begin if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); - else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 5332087a..75010f74 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -39,7 +39,7 @@ module fdivsqrtfsm( input logic StallM, FlushE, input logic IntDivE, input logic ISpecialCaseE, - input logic [`DURLEN-1:0] cycles, + input logic [`DURLEN-1:0] CyclesE, output logic IFDivStartE, output logic FDivBusyE, FDivDoneE, output logic SpecialCaseM @@ -67,7 +67,7 @@ module fdivsqrtfsm( state <= #1 IDLE; end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE // end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE - step <= cycles; + step <= CyclesE; if (SpecialCaseE) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 43a5e42b..3de4b252 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -43,44 +43,49 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, output logic ISpecialCaseE, - output logic [`DURLEN-1:0] cycles, + output logic [`DURLEN-1:0] CyclesE, output logic [`DIVBLEN:0] nM, mM, output logic NegQuotM, ALTBM, IntDivM, W64M, output logic AsM, BZeroM, output logic [`XLEN-1:0] AM ); - logic [`DIVb-1:0] XPreproc, DPreproc; + logic [`DIVb-1:0] Xfract, Dfract; logic [`DIVb:0] PreSqrtX; logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division - logic signedDiv; // signed division + logic SignedDivE; // signed division logic NegQuotE; // Integer quotient is negative logic AsE, BsE; // Signs of integer inputs logic [`XLEN-1:0] AE; // input A after W64 adjustment + logic ALTBE; + + ////////////////////////////////////////////////////// + // Integer Preprocessing + ////////////////////////////////////////////////////// if (`IDIV_ON_FPU) begin:intpreproc // Int Supported logic [`XLEN-1:0] BE, PosA, PosB; // Extract inputs, signs, zero, depending on W64 mode if applicable - assign signedDiv = ~Funct3E[0]; + assign SignedDivE = ~Funct3E[0]; // Source handling if (`XLEN==64) begin // 64-bit, supports W64 - mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & signedDiv}}, ForwardedSrcAE[31:0]}, W64E, AE); - mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & signedDiv}}, ForwardedSrcBE[31:0]}, W64E, BE); + mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE); + mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE); end else begin // 32 bits only assign AE = ForwardedSrcAE; assign BE = ForwardedSrcBE; end assign AZeroE = ~(|AE); assign BZeroE = ~(|BE); - assign AsE = AE[`XLEN-1] & signedDiv; - assign BsE = BE[`XLEN-1] & signedDiv; + assign AsE = AE[`XLEN-1] & SignedDivE; + assign BsE = BE[`XLEN-1] & SignedDivE; assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative // Force integer inputs to be postiive @@ -90,35 +95,35 @@ module fdivsqrtpreproc ( // Select integer or floating point inputs mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX); mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD); - - + mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); end else begin // Int not supported assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}}; assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}}; + assign NumerZeroE = XZeroE; end + ////////////////////////////////////////////////////// + // Integer & FP leading zero and normalization shift + ////////////////////////////////////////////////////// + // count leading zeros for Subnorm FP and to normalize integer inputs lzc #(`DIVb) lzcX (IFX, ell); lzc #(`DIVb) lzcY (IFD, mE); // Normalization shift: shift off leading one - assign XPreproc = (IFX << ell) << 1; - assign DPreproc = (IFD << mE) << 1; + assign Xfract = (IFX << ell) << 1; + assign Dfract = (IFD << mE) << 1; - // append leading 1 (for nonzero inputs) - // shift square root to be in range [1/4, 1) - // Normalized numbers are shifted right by 1 if the exponent is odd - // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. - mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); - assign DivX = {3'b000, ~NumerZeroE, XPreproc}; + // *** CT: move to fdivsqrtintpreshift - // Divisior register - flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); + ////////////////////////////////////////////////////// + // Integer Right Shift to digit boundary + // Determine DivXShifted (X shifted to digit boundary) + // and nE (number of fractional digits) + ////////////////////////////////////////////////////// - // ***CT: factor out fdivsqrtcycles if (`IDIV_ON_FPU) begin:intrightshift // Int Supported logic [`DIVBLEN:0] ZeroDiff, p; - logic ALTBE; // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros @@ -128,31 +133,68 @@ module fdivsqrtpreproc ( // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; - /* verilator lint_off WIDTH */ // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps if (`LOGRK > 0) begin // more than 1 bit per cycle logic [`LOGRK-1:0] IntTrunc, RightShiftX; logic [`DIVBLEN:0] TotalIntBits, IntSteps; - + /* verilator lint_off WIDTH */ assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting assign nE = p; assign DivXShifted = DivX; end - /* verilator lint_on WIDTH */ + end else begin + assign ISpecialCaseE = 0; + end - // Selet integer or floating-point operands - mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); + // CT *** fdivsqrtfplead1 + + ////////////////////////////////////////////////////// + // Floating-Point Preprocessing + // append leading 1 (for nonzero inputs) + // shift square root to be in range [1/4, 1) + // Normalized numbers are shifted right by 1 if the exponent is odd + // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. + ////////////////////////////////////////////////////// + + assign DivX = {3'b000, ~NumerZeroE, Xfract}; + + // Sqrt is initialized on step one as R(X-1), so depends on Radix + mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; + else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; + mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); + + ////////////////////////////////////////////////////// + // Selet integer or floating-point operands + ////////////////////////////////////////////////////// + + if (`IDIV_ON_FPU) begin mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + end else begin + assign X = PreShiftX; + end + // Divisior register + flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D); + + // Floating-point exponent + fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); + flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + + // Number of FSM cycles (to FSM) + fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); + + if (`IDIV_ON_FPU) begin:intpipelineregs // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); @@ -162,21 +204,7 @@ module fdivsqrtpreproc ( flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); if (`XLEN==64) flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); - end else begin - assign NumerZeroE = XZeroE; - assign X = PreShiftX; end - // Sqrt is initialized on step one as R(X-1), so depends on Radix - if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; - else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; - mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); - - // Floating-point exponent - fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); - flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); - - // Number of FSM cycles (to FSM) - fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles); endmodule