diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv new file mode 100644 index 000000000..3ca85cfb4 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrt.sv @@ -0,0 +1,104 @@ +/////////////////////////////////////////// +// divremsqrt.sv +// +// Written: kekim@hmc.edu +// Modified:19 May 2023 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fdivsqrt( + input logic clk, + input logic reset, + input logic [`FMTBITS-1:0] FmtE, + input logic XsE, + input logic [`NF:0] XmE, YmE, + input logic [`NE-1:0] XeE, YeE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic FDivStartE, IDivStartE, + input logic StallM, + input logic FlushE, + input logic SqrtE, SqrtM, + input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [2:0] Funct3E, Funct3M, + input logic IntDivE, W64E, + output logic DivStickyM, + output logic FDivBusyE, IFDivStartE, FDivDoneE, + output logic [`NE+1:0] QeM, + output logic [`DIVb:0] QmM, + output logic [`XLEN-1:0] FIntDivResultM +); + + // Floating-point division and square root module, with optional integer division and remainder + // Computes X/Y, sqrt(X), A/B, or A%B + + logic [`DIVb+3:0] WS, WC; // Partial remainder components + logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) + logic [`DIVb+3:0] D; // Iterator Divisor + logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values + logic [`DIVb+1:0] FirstC; // Step tracker + logic Firstun; // Quotient selection + logic WZeroE; // Early termination flag + logic [`DURLEN-1:0] CyclesE; // FSM cycles + logic SpecialCaseM; // Divide by zero, square root of negative, etc. + logic DivStartE; // Enable signal for flops during stall + + // Integer div/rem signals + logic BZeroM; // Denominator is zero + logic IntDivM; // Integer operation + logic [`DIVBLEN:0] nM, mM; // Shift amounts + logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor + logic [`XLEN-1:0] AM; // Original Numerator for postprocessor + logic ISpecialCaseE; // Integer div/remainder special cases + + fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor + .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), + .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, + // Int-specific + .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, + .BZeroM, .nM, .mM, .AM, + .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM); + + fdivsqrtfsm fdivsqrtfsm( // FSM + .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, + .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, + // Int-specific + .IDivStartE, .ISpecialCaseE, .IntDivE); + + fdivsqrtiter fdivsqrtiter( // CSA Iterator + .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, + .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); + + fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor + .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, + .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, + .QmM, .WZeroE, .DivStickyM, + // Int-specific + .nM, .mM, .ALTBM, .AsM, .BZeroM, .NegQuotM, .W64M, .RemOpM(Funct3M[1]), .AM, + .FIntDivResultM); +endmodule + diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv new file mode 100644 index 000000000..698e38a3a --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv @@ -0,0 +1,231 @@ +/////////////////////////////////////////// +// postprocess.sv +// +// Written: kekim@hmc.edu +// Modified: 19 May 2023 +// +// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module divremsqrtpostprocess ( + // general signals + input logic Xs, Ys, // input signs + input logic [`NF:0] Xm, Ym, Zm, // input mantissas + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [2:0] OpCtrl, // choose which opperation (look below for values) + input logic XZero, YZero, // inputs are zero + input logic XInf, YInf, ZInf, // inputs are infinity + input logic XNaN, YNaN, ZNaN, // inputs are NaN + input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs + input logic [1:0] PostProcSel, // select result to be written to fp register + //fma signals + input logic FmaAs, // the modified Z sign - depends on instruction + input logic FmaPs, // the product's sign + input logic FmaSs, // Sum sign + input logic [`NE+1:0] FmaSe, // the sum's exponent + input logic [3*`NF+3:0] FmaSm, // the positive sum + input logic FmaASticky, // sticky bit that is calculated during alignment + input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // the normalization shift count + //divide signals + input logic DivSticky, // divider sticky bit + input logic [`NE+1:0] DivQe, // divsqrt exponent + input logic [`DIVb:0] DivQm, // divsqrt significand + // conversion signals + input logic CvtCs, // the result's sign + input logic [`NE:0] CvtCe, // the calculated expoent + input logic CvtResSubnormUf, // the convert result is subnormal or underflows + input logic [`LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by + input logic ToInt, // is fp->int (since it's writting to the integer register) + input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) + input logic IntZero, // is the integer input zero + // final results + output logic [`FLEN-1:0] PostProcRes,// postprocessor final result + output logic [4:0] PostProcFlg,// postprocesser flags + output logic [`XLEN-1:0] FCvtIntRes // the integer conversion result + ); + + // general signals + logic Rs; // result sign + logic [`NF-1:0] Rf; // Result fraction + logic [`NE-1:0] Re; // Result exponent + logic Ms; // norMalized sign + logic [`CORRSHIFTSZ-1:0] Mf; // norMalized fraction + logic [`NE+1:0] Me; // normalized exponent + logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow + logic UfPlus1; // do you add one (for determining underflow flag) + logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount + logic [`NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift + logic [`NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction) + logic Plus1; // add one to the final result? + logic Overflow; // overflow flag used to select results + logic Invalid; // invalid flag used to select results + logic Guard, Round, Sticky; // bits needed to determine rounding + logic [`FMTBITS-1:0] OutFmt; // output format + // fma signals + logic [`NE+1:0] FmaMe; // exponent of the normalized sum + logic FmaSZero; // is the sum zero + logic [3*`NF+5:0] FmaShiftIn; // fma shift input + logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results + logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection + logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma + // division singals + logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount + logic [`NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input + logic [`NE+1:0] Qe; // divsqrt corrected exponent after corretion shift + logic DivByZero; // divide by zero flag + logic DivResSubnorm; // is the divsqrt result subnormal + logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) + // conversion signals + logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted for converter + logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result + logic [`XLEN+1:0] CvtNegRes; // possibly negated integer result + logic CvtResUf; // did the convert result underflow + logic IntInvalid; // invalid integer flag + // readability signals + logic Mult; // multiply opperation + logic Sqrt; // is the divsqrt opperation sqrt + logic Int64; // is the integer 64 bits? + logic Signed; // is the opperation with a signed integer? + logic IntToFp; // is the opperation an int->fp conversion? + logic CvtOp; // convertion opperation + logic FmaOp; // fma opperation + logic DivOp; // divider opperation + logic InfIn; // are any of the inputs infinity + logic NaNIn; // are any of the inputs NaN + + // signals to help readability + //assign Signed = OpCtrl[0]; + //assign Int64 = OpCtrl[1]; + //assign IntToFp = OpCtrl[2]; + //assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; + //assign CvtOp = (PostProcSel == 2'b00); + //assign FmaOp = (PostProcSel == 2'b10); + assign DivOp = (PostProcSel == 2'b01); + assign Sqrt = OpCtrl[0]; + + // is there an input of infinity or NaN being used + assign InfIn = XInf|YInf|ZInf; + assign NaNIn = XNaN|YNaN|ZNaN; + + // choose the ouptut format depending on the opperation + // - fp -> fp: OpCtrl contains the percision of the output + // - otherwise: Fmt contains the percision of the output + if (`FPSIZES == 2) + //assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); + assign OutFmt = Fmt; + else if (`FPSIZES == 3 | `FPSIZES == 4) + //assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; + assign OutFmt = Fmt; + + /////////////////////////////////////////////////////////////////////////////// + // Normalization + /////////////////////////////////////////////////////////////////////////////// + + // final claulations before shifting + /*cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn, + .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);*/ + + /*fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, + .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);*/ + + divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + + assign ShiftAmt = DivShiftAmt; + assign ShiftIn = DivShiftIn; + /* + // select which unit's output to shift + always_comb + case(PostProcSel) + 2'b10: begin // fma + ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt}; + ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}}; + end + 2'b00: begin // cvt + ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt}; + ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; + end + 2'b01: begin //divsqrt + ShiftAmt = DivShiftAmt; + ShiftIn = DivShiftIn; + end + default: begin + ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}}; + ShiftIn = {`NORMSHIFTSZ{1'bx}}; + end + endcase + */ + + // main normalization shift + normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); + + // correct for LZA/divsqrt error + divremsqrtshiftcorrection shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .Shifted, .Mf); + + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// + + // round to nearest even + // round to zero + // round to -infinity + // round to infinity + // round to nearest max magnitude + + // calulate result sign used in rounding unit + divremsqrtroundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); + + round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, + .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf, + .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); + + /////////////////////////////////////////////////////////////////////////////// + // Sign calculation + /////////////////////////////////////////////////////////////////////////////// + + /*resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard, + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs);*/ + + /////////////////////////////////////////////////////////////////////////////// + // Flags + /////////////////////////////////////////////////////////////////////////////// + + flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, + .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, + .NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero, + .Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, + .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); + + /////////////////////////////////////////////////////////////////////////////// + // Select the result + /////////////////////////////////////////////////////////////////////////////// + + negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); + + specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, + .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, + .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes); + +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv new file mode 100644 index 000000000..396948915 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtround.sv @@ -0,0 +1,339 @@ +/////////////////////////////////////////// +// divremsqrtround.sv +// +// Written: kekim@hmc.edu +// Modified: 19 May 2023 +// +// Purpose: Rounder +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +// what position is XLEN in? +// options: +// 1: XLEN > NF > NF1 +// 2: NF > XLEN > NF1 +// 3: NF > NF1 > XLEN +// single and double will always be smaller than XLEN +`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3) + +module round( + input logic [`FMTBITS-1:0] OutFmt, // output format + input logic [2:0] Frm, // rounding mode + //input logic [1:0] PostProcSel, // select the postprocessor output + input logic Ms, // normalized sign + input logic [`CORRSHIFTSZ-1:0] Mf, // normalized fraction + // fma + //input logic FmaOp, // is an fma opperation being done? + //input logic [`NE+1:0] FmaMe, // exponent of the normalized sum for fma + //input logic FmaASticky, // addend's sticky bit + + // divsqrt + //input logic DivOp, // is a division opperation being done + input logic DivSticky, // divsqrt sticky bit + input logic [`NE+1:0] Qe, // the divsqrt calculated expoent + // cvt + input logic CvtOp, // is a convert opperation being done + input logic ToInt, // is the cvt op a cvt to integer + input logic CvtResSubnormUf, // is the cvt result subnormal or underflow + input logic CvtResUf, // does the cvt result underflow + input logic [`NE:0] CvtCe, // the cvt calculated expoent + // outputs + output logic [`NE+1:0] Me, // normalied fraction + output logic UfPlus1, // do you add one to the result if given an unbounded exponent + output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow + output logic [`NE-1:0] Re, // Result exponent + output logic [`NF-1:0] Rf, // Result fractionNormS + output logic Sticky, // sticky bit + output logic Plus1, // do you add one to the final result + output logic Round, Guard // bits needed to calculate rounding +); + + logic UfCalcPlus1; // calculated plus one for unbounded exponent + logic NormSticky; // normalized sum's sticky bit + logic [`NF-1:0] RoundFrac; // rounded fraction + logic FpRes; // is the result a floating point + logic IntRes; // is the result an integer + logic FpGuard, FpRound; // floating point round/guard bits + logic FpLsbRes; // least significant bit of floating point result + logic LsbRes; // lsb of result + logic CalcPlus1; // calculated plus1 + logic FpPlus1; // do you add one to the fp result + logic [`FLEN:0] RoundAdd; // how much to add to the result + + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// + + // round to nearest even + // {Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 if result is odd (LSBNormSum = 1) + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - plus 1 otherwise + + // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + + // round to -infinity + // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + + // round to infinity + // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 + + // round to nearest max magnitude + // {Guard, Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - Plus 1 otherwise + + + // determine what format the final result is in: int or fp + assign IntRes = ToInt; + assign FpRes = ~IntRes; + + // sticky bit calculation + if (`FPSIZES == 1) begin + + // 1: XLEN > NF + // | XLEN | + // | NF |1|1| + // ^ ^ if floating point result + // ^ if not an FMA result + if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); + // 2: NF > XLEN + if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); + + end else if (`FPSIZES == 2) begin + // XLEN is either 64 or 32 + // so half and single are always smaller then XLEN + + // 1: XLEN > NF > NF1 + if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); + // 2: NF > XLEN > NF1 + if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); + // 3: NF > NF1 > XLEN + if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); + + end else if (`FPSIZES == 3) begin + // 1: XLEN > NF > NF1 + if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); + // 2: NF > XLEN > NF1 + if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); + // 3: NF > NF1 > XLEN + if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); + + end else if (`FPSIZES == 4) begin + // Quad precision will always be greater than XLEN + // 2: NF > XLEN > NF1 + if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); + // 3: NF > NF1 > XLEN + // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer + if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); + + end + + + + // only add the Addend sticky if doing an FMA opperation + // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) + //assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp; + assign Sticky = DivSticky; + + + + + // determine round and LSB of the rounded value + // - underflow round bit is used to determint the underflow flag + if (`FPSIZES == 1) begin + assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; + assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; + assign FpRound = Mf[`CORRSHIFTSZ-`NF-2]; + + end else if (`FPSIZES == 2) begin + assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; + assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; + assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; + + end else if (`FPSIZES == 3) begin + always_comb + case (OutFmt) + `FMT: begin + FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; + FpRound = Mf[`CORRSHIFTSZ-`NF-2]; + end + `FMT1: begin + FpGuard = Mf[`CORRSHIFTSZ-`NF1-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`NF1]; + FpRound = Mf[`CORRSHIFTSZ-`NF1-2]; + end + `FMT2: begin + FpGuard = Mf[`CORRSHIFTSZ-`NF2-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`NF2]; + FpRound = Mf[`CORRSHIFTSZ-`NF2-2]; + end + default: begin + FpGuard = 1'bx; + FpLsbRes = 1'bx; + FpRound = 1'bx; + end + endcase + end else if (`FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF]; + FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; + end + 2'h1: begin + FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF]; + FpRound = Mf[`CORRSHIFTSZ-`D_NF-2]; + end + 2'h0: begin + FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF]; + FpRound = Mf[`CORRSHIFTSZ-`S_NF-2]; + end + 2'h2: begin + FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF]; + FpRound = Mf[`CORRSHIFTSZ-`H_NF-2]; + end + endcase + end + + /*assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard; + assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes; + assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound;*/ + + assign Guard = FpGuard; + assign LsbRes = FpLsbRes; + assign Round = FpRound; + + + always_comb begin + // Determine if you add 1 + case (Frm) + 3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even + 3'b001: CalcPlus1 = 0;//round to zero + 3'b010: CalcPlus1 = Ms;//round down + 3'b011: CalcPlus1 = ~Ms;//round up + 3'b100: CalcPlus1 = Guard;//round to nearest max magnitude + default: CalcPlus1 = 1'bx; + endcase + // Determine if you add 1 (for underflow flag) + case (Frm) + 3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even + 3'b001: UfCalcPlus1 = 0;//round to zero + 3'b010: UfCalcPlus1 = Ms;//round down + 3'b011: UfCalcPlus1 = ~Ms;//round up + 3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude + default: UfCalcPlus1 = 1'bx; + endcase + + end + + // If an answer is exact don't round + assign Plus1 = CalcPlus1 & (Sticky|Round|Guard); + //assign FpPlus1 = Plus1&~(ToInt&CvtOp); + assign FpPlus1 = Plus1; + assign UfPlus1 = UfCalcPlus1 & (Sticky|Round); + + + + + // place Plus1 into the proper position for the format + if (`FPSIZES == 1) begin + assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; + + end else if (`FPSIZES == 2) begin + // \/FLEN+1 + // | NE+2 | NF | + // '-NE+2-^----NF1----^ + // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 + assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt}; + + end else if (`FPSIZES == 3) begin + assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)}; + + end else if (`FPSIZES == 4) + assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; + + + + // trim unneeded bits from fraction + assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; + + + + // select the exponent + assign Me = Qe; + /*always_comb + case(PostProcSel) + 2'b10: Me = FmaMe; // fma + 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt + // 2'b01: Me = DivDone ? Qe : '0; // divide + 2'b01: Me = Qe; // divide + default: Me = '0; + endcase*/ + + + + // round the result + // - if the fraction overflows one should be added to the exponent + assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; + assign Re = FullRe[`NE-1:0]; + + +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtroundsign.sv b/src/fpu/divremsqrt/divremsqrtroundsign.sv new file mode 100644 index 000000000..87b72ba48 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtroundsign.sv @@ -0,0 +1,46 @@ +/////////////////////////////////////////// +// divremsqrtroundsign.sv +// +// Written: kekim@hmc.edu,me@KatherineParry.com +// Modified: 19 May 2023 +// +// Purpose: Sign calculation for rounding +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// +`include "wally-config.vh" + +module roundsign( + input logic Xs, // x sign + input logic Ys, // y sign + input logic Sqrt, // sqrt oppertion? (when using divsqrt unit) + input logic DivOp, // is divsqrt opperation + output logic Ms // normalized result sign +); + + logic Qs; // divsqrt result sign + + // calculate divsqrt sign + assign Qs = Xs^(Ys&~Sqrt); + + // Select sign for rounding calulation + assign Ms = (Qs&DivOp); + +endmodule \ No newline at end of file diff --git a/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv new file mode 100644 index 000000000..da21e928b --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv @@ -0,0 +1,93 @@ +/////////////////////////////////////////// +// divremsqrtshiftcorrection.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: shift correction +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module divremsqrtshiftcorrection( + input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction + // divsqrt + input logic DivOp, // is it a divsqrt opperation + input logic DivResSubnorm, // is the divsqrt result subnormal + input logic [`NE+1:0] DivQe, // the divsqrt result's exponent + input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) + //fma + //input logic FmaOp, // is it an fma opperation + //input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results + //input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection + //input logic FmaSZero, + // output + //output logic [`NE+1:0] FmaMe, // exponent of the normalized sum + output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction + output logic [`NE+1:0] Qe // corrected exponent for divider +); + + logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction + logic [`CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted + logic [`CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift + logic ResSubnorm; // is the result Subnormal + logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction + logic LeftShiftQm; // should the divsqrt result be shifted one to the left + + // LZA correction + assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1]; + + // correct the shifting error caused by the LZA + // - the only possible mantissa for a plus two is all zeroes + // - a one has to propigate all the way through a sum. so we can leave the bottom statement alone + mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted); + + // correct the shifting of the divsqrt caused by producing a result in (2, .5] range + // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) + assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1)); + assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; + assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1]; + mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); + + // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits + always_comb + //if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}}; + if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; + else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + + // Determine sum's exponent + // main exponent issues: + // - LZA was one too large + // - LZA was two too large + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 1 + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 2 + // if plus1 If plus2 kill if the result Zero or actually subnormal + // | | | + //assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}}; + + // recalculate if the result is subnormal after LZA correction + //assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1]; + + // the quotent is in the range [.5,2) if there is no early termination + // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift + assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1}; +endmodule \ No newline at end of file