diff --git a/sim/wave-fpu.do b/sim/wave-fpu.do index 08b6b3378..421daebba 100644 --- a/sim/wave-fpu.do +++ b/sim/wave-fpu.do @@ -16,15 +16,6 @@ add wave -noupdate /testbenchfp/ResMatch add wave -noupdate /testbenchfp/FlagMatch add wave -noupdate /testbenchfp/CheckNow add wave -noupdate /testbenchfp/NaNGood -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* + add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv new file mode 100644 index 000000000..bf5b1d782 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrt.sv @@ -0,0 +1,103 @@ +/////////////////////////////////////////// +// divremsqrt.sv +// +// Written: kekim@hmc.edu +// Modified:19 May 2023 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + + module divremsqrt import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic reset, + input logic [P.FMTBITS-1:0] FmtE, + input logic XsE, + input logic [P.NF:0] XmE, YmE, + input logic [P.NE-1:0] XeE, YeE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic FDivStartE, IDivStartE, + input logic StallM, + input logic FlushE, + input logic SqrtE, SqrtM, + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [2:0] Funct3E, Funct3M, + input logic IntDivE, W64E, + output logic DivStickyM, + output logic FDivBusyE, IFDivStartE, FDivDoneE, + output logic [P.NE+1:0] QeM, + output logic [P.DIVb:0] QmM, + output logic [P.XLEN-1:0] FIntDivResultM +); + + // Floating-point division and square root module, with optional integer division and remainder + // Computes X/Y, sqrt(X), A/B, or A%B + + logic [P.DIVb+3:0] WS, WC; // Partial remainder components + logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend) + logic [P.DIVb+3:0] D; // Iterator Divisor + logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values + logic [P.DIVb+1:0] FirstC; // Step tracker + logic Firstun; // Quotient selection + logic WZeroE; // Early termination flag + logic [P.DURLEN-1:0] CyclesE; // FSM cycles + logic SpecialCaseM; // Divide by zero, square root of negative, etc. + logic DivStartE; // Enable signal for flops during stall + + // Integer div/rem signals + logic BZeroM; // Denominator is zero + logic IntDivM; // Integer operation + logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor + logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor + logic ISpecialCaseE; // Integer div/remainder special cases + + fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor + .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), + .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, + // Int-specific + .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, + .BZeroM, .nM, .mM, .AM, + .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM); + + fdivsqrtfsm #(P) fdivsqrtfsm( // FSM + .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, + .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, + // Int-specific + .IDivStartE, .ISpecialCaseE, .IntDivE); + + fdivsqrtiter #(P) fdivsqrtiter( // CSA Iterator + .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, + .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); + + fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor + .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, + .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, + .QmM, .WZeroE, .DivStickyM, + // Int-specific + .nM, .mM, .ALTBM, .AsM, .BZeroM, .NegQuotM, .W64M, .RemOpM(Funct3M[1]), .AM, + .FIntDivResultM); +endmodule + diff --git a/src/fpu/divremsqrt/divremsqrtflags.sv b/src/fpu/divremsqrt/divremsqrtflags.sv new file mode 100644 index 000000000..522d1d597 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtflags.sv @@ -0,0 +1,182 @@ + +/////////////////////////////////////////// +// flags.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: Post-Processing flag calculation +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtflags import cvw::*; #(parameter cvw_t P) ( + input logic Xs, // X sign + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic InfIn, // is a Inf input being used + input logic XInf, YInf, // inputs are infinity + input logic NaNIn, // is a NaN input being used + input logic XSNaN, YSNaN, // inputs are signaling NaNs + input logic XZero, YZero, // inputs are zero + input logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow + input logic [P.NE+1:0] Me, // exponent of the normalized sum + // rounding + input logic Plus1, // do you add one for rounding + input logic Round, Guard, Sticky, // bits used to determine rounding + input logic UfPlus1, // do you add one for rounding for the unbounded exponent result + // divsqrt + input logic DivOp, // conversion opperation? + input logic Sqrt, // Sqrt? + // flags + output logic DivByZero, // divide by zero flag + output logic Overflow, // overflow flag to select result + output logic Invalid, // invalid flag to select the result + output logic [4:0] PostProcFlg // flags +); + + logic SigNaN; // is an input a signaling NaN + logic Inexact; // final inexact flag + logic FpInexact; // floating point inexact flag + logic DivInvalid; // integer invalid flag + logic Underflow; // Underflow flag + logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent + + /////////////////////////////////////////////////////////////////////////////// + // Overflow + /////////////////////////////////////////////////////////////////////////////// + + // determine if the result exponent is greater than or equal to the maximum exponent or + // the shift amount is greater than the integers size (for cvt to int) + // ShiftGtIntSz calculation: + // a left shift of intlen+1 is still in range but any more than that is an overflow + // inital: | 64 0's | XLEN | + // | 64 0's | XLEN | << 64 + // | XLEN | 00000... | + // 65 = ...0 0 0 0 0 1 0 0 0 0 0 1 + // | or | | or | + // 33 = ...0 0 0 0 0 0 1 0 0 0 0 1 + // | or | | or | + // larger or equal if: + // - any of the bits after the most significan 1 is one + // - the most signifcant in 65 or 33 is still a one in the number and + // one of the later bits is one + if (P.FPSIZES == 1) begin + assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE]; + + end else if (P.FPSIZES == 2) begin + assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]); + + end else if (P.FPSIZES == 3) begin + always_comb + case (OutFmt) + P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE]; + P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]); + P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]); + default: ResExpGteMax = 1'bx; + endcase + + end else if (P.FPSIZES == 4) begin + always_comb + case (OutFmt) + P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE]; + P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]); + P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]); + P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]); + endcase + end + + + // calulate overflow flag: + // if the result is greater than or equal to the max exponent(not taking into account sign) + // | and the exponent isn't negitive + // | | if the input isnt infinity or NaN + // | | | + assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero); + + /////////////////////////////////////////////////////////////////////////////// + // Underflow + /////////////////////////////////////////////////////////////////////////////// + + // calculate underflow flag: detecting tininess after rounding + // the exponent is negitive + // | the result is subnormal + // | | the result is normal and rounded from a Subnorm + // | | | and if given an unbounded exponent the result does not round + // | | | | and if the result is not exact + // | | | | | and if the input isnt infinity or NaN + // | | | | | | + assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); + + + /////////////////////////////////////////////////////////////////////////////// + // Inexact + /////////////////////////////////////////////////////////////////////////////// + + // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision + // - Don't set the underflow flag if an underflowed res isn't outputed + assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid); + //assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero); + + // if the res is too small to be represented and not 0 + // | and if the res is not invalid (outside the integer bounds) + // | | + + // select the inexact flag to output + assign Inexact = FpInexact; + + /////////////////////////////////////////////////////////////////////////////// + // Invalid + /////////////////////////////////////////////////////////////////////////////// + + // Set Invalid flag for following cases: + // 1) any input is a signaling NaN + // 2) Inf - Inf (unless x or y is NaN) + // 3) 0 * Inf + + + assign SigNaN = (XSNaN) | (YSNaN) ; + + //invalid flag for division + assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero); + + assign Invalid = SigNaN | (DivInvalid&DivOp); + + /////////////////////////////////////////////////////////////////////////////// + // Divide by Zero + /////////////////////////////////////////////////////////////////////////////// + + // if dividing by zero and not 0/0 + // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) + assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn); + + + /////////////////////////////////////////////////////////////////////////////// + // final flags + /////////////////////////////////////////////////////////////////////////////// + + // Combine flags + // - to integer results do not set the underflow or overflow flags + assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact}; + +endmodule + + + + diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv new file mode 100644 index 000000000..02981ea2d --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv @@ -0,0 +1,181 @@ +/////////////////////////////////////////// +// postprocess.sv +// +// Written: kekim@hmc.edu +// Modified: 19 May 2023 +// +// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module divremsqrtpostprocess import cvw::*; #(parameter cvw_t P) ( + // general signals + input logic Xs, Ys, // input signs + input logic [P.NF:0] Xm, Ym, // input mantissas + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [P.FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [2:0] OpCtrl, // choose which opperation (look below for values) + input logic XZero, YZero, // inputs are zero + input logic XInf, YInf, // inputs are infinity + input logic XNaN, YNaN, // inputs are NaN + input logic XSNaN, YSNaN, // inputs are signaling NaNs + input logic [1:0] PostProcSel, // select result to be written to fp register + //fma signals + //divide signals + input logic DivSticky, // divider sticky bit + input logic [P.NE+1:0] DivQe, // divsqrt exponent + input logic [P.DIVb:0] DivQm, // divsqrt significand + // final results + output logic [P.FLEN-1:0] PostProcRes,// postprocessor final result + output logic [4:0] PostProcFlg // postprocesser flags + ); + + // general signals + logic Rs; // result sign + logic [P.NF-1:0] Rf; // Result fraction + logic [P.NE-1:0] Re; // Result exponent + logic Ms; // norMalized sign + logic [P.CORRSHIFTSZ-1:0] Mf; // norMalized fraction + logic [P.NE+1:0] Me; // normalized exponent + logic [P.NE+1:0] FullRe; // Re with bits to determine sign and overflow + logic UfPlus1; // do you add one (for determining underflow flag) + logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount + logic [P.NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift + logic [P.NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction) + logic Plus1; // add one to the final result? + logic Overflow; // overflow flag used to select results + logic Invalid; // invalid flag used to select results + logic Guard, Round, Sticky; // bits needed to determine rounding + logic [P.FMTBITS-1:0] OutFmt; // output format + // division singals + logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount + logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input + logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift + logic DivByZero; // divide by zero flag + logic DivResSubnorm; // is the divsqrt result subnormal + logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) + // conversion signals + logic [P.CVTLEN+P.NF:0] CvtShiftIn; // number to be shifted for converter + logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result + logic [P.XLEN+1:0] CvtNegRes; // possibly negated integer result + logic CvtResUf; // did the convert result underflow + logic IntInvalid; // invalid integer flag + // readability signals + logic Mult; // multiply opperation + logic Sqrt; // is the divsqrt opperation sqrt + logic Int64; // is the integer 64 bits? + logic Signed; // is the opperation with a signed integer? + logic IntToFp; // is the opperation an int->fp conversion? + logic CvtOp; // convertion opperation + logic DivOp; // divider opperation + logic InfIn; // are any of the inputs infinity + logic NaNIn; // are any of the inputs NaN + + // signals to help readability + //assign Signed = OpCtrl[0]; + //assign Int64 = OpCtrl[1]; + //assign IntToFp = OpCtrl[2]; + //assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; + //assign CvtOp = (PostProcSel == 2'b00); + //assign FmaOp = (PostProcSel == 2'b10); + assign DivOp = (PostProcSel == 2'b01); + assign Sqrt = OpCtrl[0]; + + // is there an input of infinity or NaN being used + assign InfIn = XInf|YInf; + assign NaNIn = XNaN|YNaN; + + // choose the ouptut format depending on the opperation + // - fp -> fp: OpCtrl contains the percision of the output + // - otherwise: Fmt contains the percision of the output + if (P.FPSIZES == 2) + //assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); + assign OutFmt = Fmt; + else if (P.FPSIZES == 3 | P.FPSIZES == 4) + //assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; + assign OutFmt = Fmt; + + /////////////////////////////////////////////////////////////////////////////// + // Normalization + /////////////////////////////////////////////////////////////////////////////// + + // final claulations before shifting + /*cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn, + .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);*/ + + divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + + assign ShiftAmt = DivShiftAmt; + assign ShiftIn = DivShiftIn; + + // main normalization shift + normshift #(P) normshift (.ShiftIn, .ShiftAmt, .Shifted); + + // correct for LZA/divsqrt error + divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .Shifted, .Mf); + + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// + + // round to nearest even + // round to zero + // round to -infinity + // round to infinity + // round to nearest max magnitude + + // calulate result sign used in rounding unit + divremsqrtroundsign #(P) roundsign( .DivOp, .Sqrt, .Xs, .Ys, .Ms); + + divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Qe, + .Ms, .Mf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); + + /////////////////////////////////////////////////////////////////////////////// + // Sign calculation + /////////////////////////////////////////////////////////////////////////////// + + /*resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard, + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs);*/ + assign Rs = Ms; + + /////////////////////////////////////////////////////////////////////////////// + // Flags + /////////////////////////////////////////////////////////////////////////////// + + divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero, + .Xs, .OutFmt, .Sqrt, + .NaNIn, .Round, .DivByZero, + .Guard, .Sticky, .UfPlus1,.DivOp, .FullRe, .Plus1, + .Me, .Invalid, .Overflow, .PostProcFlg); + + /////////////////////////////////////////////////////////////////////////////// + // Select the result + /////////////////////////////////////////////////////////////////////////////// + + //negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); + + divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero, + .Frm, .OutFmt, .XNaN, .YNaN, + .NaNIn, .Plus1, .Invalid, .Overflow, .InfIn, + .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes ); + +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv new file mode 100644 index 000000000..2911bd920 --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtround.sv @@ -0,0 +1,308 @@ +/////////////////////////////////////////// +// divremsqrtround.sv +// +// Written: kekim@hmc.edu, me@KatherineParry.com +// Modified: 19 May 2023 +// +// Purpose: Rounder +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + + +module divremsqrtround import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic [2:0] Frm, // rounding mode + input logic Ms, // normalized sign + input logic [P.CORRSHIFTSZ-1:0] Mf, // normalized fraction + // divsqrt + input logic DivOp, // is a division opperation being done + input logic DivSticky, // divsqrt sticky bit + input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent + // outputs + output logic [P.NE+1:0] Me, // normalied fraction + output logic UfPlus1, // do you add one to the result if given an unbounded exponent + output logic [P.NE+1:0] FullRe, // Re with bits to determine sign and overflow + output logic [P.NE-1:0] Re, // Result exponent + output logic [P.NF-1:0] Rf, // Result fractionNormS + output logic Sticky, // sticky bit + output logic Plus1, // do you add one to the final result + output logic Round, Guard // bits needed to calculate rounding +); + + logic UfCalcPlus1; // calculated plus one for unbounded exponent + logic NormSticky; // normalized sum's sticky bit + logic [P.NF-1:0] RoundFrac; // rounded fraction + logic FpGuard, FpRound; // floating point round/guard bits + logic FpLsbRes; // least significant bit of floating point result + logic LsbRes; // lsb of result + logic CalcPlus1; // calculated plus1 + logic FpPlus1; // do you add one to the fp result + logic [P.FLEN:0] RoundAdd; // how much to add to the result + +// what position is XLEN in? +// options: +// 1: XLEN > NF > NF1 +// 2: NF > XLEN > NF1 +// 3: NF > NF1 > XLEN +// single and double will always be smaller than XLEN + localparam XLENPOS = P.XLEN > P.NF ? 1 : P.XLEN > P.NF1 ? 2 : 3; + + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// + + // round to nearest even + // {Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 if result is odd (LSBNormSum = 1) + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - plus 1 otherwise + + // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + + // round to -infinity + // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + + // round to infinity + // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 + + // round to nearest max magnitude + // {Guard, Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - Plus 1 otherwise + + + // determine what format the final result is in: int or fp + + // sticky bit calculation + if (P.FPSIZES == 1) begin + + // 1: XLEN > NF + // | XLEN | + // | NF |1|1| + // ^ ^ if floating point result + // ^ if not an FMA result + if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); + // 2: NF > XLEN + if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + + end else if (P.FPSIZES == 2) begin + // XLEN is either 64 or 32 + // so half and single are always smaller then XLEN + + // 1: XLEN > NF > NF1 + if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&~OutFmt) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); + // 2: NF > XLEN > NF1 + if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&~OutFmt) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt)) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + // 3: NF > NF1 > XLEN + if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt)) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + + end else if (P.FPSIZES == 3) begin + // 1: XLEN > NF > NF1 + if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) | + (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&~(OutFmt==P.FMT)) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]); + // 2: NF > XLEN > NF1 + if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) | + (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&~(OutFmt==P.FMT)) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT))) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + // 3: NF > NF1 > XLEN + if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&(OutFmt==P.FMT1)) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1))) | + (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT))) | + (|Mf[P.CORRSHIFTSZ-P.NF-2:0]); + + end else if (P.FPSIZES == 4) begin + // Quad precision will always be greater than XLEN + // 2: NF > XLEN > NF1 + if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) | + (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | + (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&~(OutFmt==P.Q_FMT)) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT))) | + (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); + // 3: NF > NF1 > XLEN + // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer + if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) | + (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | + (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | + (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT))) | + (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]); + + end + + + + // only add the Addend sticky if doing an FMA opperation + // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) + assign Sticky = DivSticky&DivOp | NormSticky; + + + + + // determine round and LSB of the rounded value + // - underflow round bit is used to determint the underflow flag + if (P.FPSIZES == 1) begin + assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; + assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; + assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; + + end else if (P.FPSIZES == 2) begin + assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1]; + assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1]; + assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2]; + + end else if (P.FPSIZES == 3) begin + always_comb + case (OutFmt) + P.FMT: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.NF-2]; + end + P.FMT1: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1]; + FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2]; + end + P.FMT2: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2]; + FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2]; + end + default: begin + FpGuard = 1'bx; + FpLsbRes = 1'bx; + FpRound = 1'bx; + end + endcase + end else if (P.FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2]; + end + 2'h1: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2]; + end + 2'h0: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2]; + end + 2'h2: begin + FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1]; + FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF]; + FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2]; + end + endcase + end + + + assign Guard = FpGuard; + assign LsbRes = FpLsbRes; + assign Round = FpRound; + + + always_comb begin + // Determine if you add 1 + case (Frm) + 3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even + 3'b001: CalcPlus1 = 0;//round to zero + 3'b010: CalcPlus1 = Ms;//round down + 3'b011: CalcPlus1 = ~Ms;//round up + 3'b100: CalcPlus1 = Guard;//round to nearest max magnitude + default: CalcPlus1 = 1'bx; + endcase + // Determine if you add 1 (for underflow flag) + case (Frm) + 3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even + 3'b001: UfCalcPlus1 = 0;//round to zero + 3'b010: UfCalcPlus1 = Ms;//round down + 3'b011: UfCalcPlus1 = ~Ms;//round up + 3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude + default: UfCalcPlus1 = 1'bx; + endcase + + end + + // If an answer is exact don't round + assign Plus1 = CalcPlus1 & (Sticky|Round|Guard); + assign FpPlus1 = Plus1; + assign UfPlus1 = UfCalcPlus1 & (Sticky|Round); + + + + + // place Plus1 into the proper position for the format + if (P.FPSIZES == 1) begin + assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1}; + + end else if (P.FPSIZES == 2) begin + // \/FLEN+1 + // | NE+2 | NF | + // '-NE+2-^----NF1----^ + // P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1 + assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt}; + + end else if (P.FPSIZES == 3) begin + assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)}; + + end else if (P.FPSIZES == 4) + assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)}; + + + + // trim unneeded bits from fraction + assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF]; + + + + // select the exponent + assign Me = Qe; + + + + // round the result + // - if the fraction overflows one should be added to the exponent + assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; + assign Re = FullRe[P.NE-1:0]; + + +endmodule diff --git a/src/fpu/divremsqrt/divremsqrtroundsign.sv b/src/fpu/divremsqrt/divremsqrtroundsign.sv new file mode 100644 index 000000000..b0dd4270b --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtroundsign.sv @@ -0,0 +1,45 @@ +/////////////////////////////////////////// +// divremsqrtroundsign.sv +// +// Written: kekim@hmc.edu,me@KatherineParry.com +// Modified: 19 May 2023 +// +// Purpose: Sign calculation for rounding +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module divremsqrtroundsign import cvw::*; #(parameter cvw_t P) ( + input logic Xs, // x sign + input logic Ys, // y sign + input logic Sqrt, // sqrt oppertion? (when using divsqrt unit) + input logic DivOp, // is divsqrt opperation + output logic Ms // normalized result sign +); + + logic Qs; // divsqrt result sign + + // calculate divsqrt sign + assign Qs = Xs^(Ys&~Sqrt); + + // Select sign for rounding calulation + assign Ms = (Qs&DivOp); + +endmodule \ No newline at end of file diff --git a/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv new file mode 100644 index 000000000..c03f1b5df --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv @@ -0,0 +1,92 @@ +/////////////////////////////////////////// +// divremsqrtshiftcorrection.sv +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: shift correction +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module divremsqrtshiftcorrection import cvw::*; #(parameter cvw_t P) ( + input logic [P.NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction + // divsqrt + input logic DivOp, // is it a divsqrt opperation + input logic DivResSubnorm, // is the divsqrt result subnormal + input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent + input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) + //fma + //input logic FmaOp, // is it an fma opperation + //input logic [P.NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results + //input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection + //input logic FmaSZero, + // output + //output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum + output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction + output logic [P.NE+1:0] Qe // corrected exponent for divider +); + + logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction + logic [P.CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted + logic [P.CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift + logic ResSubnorm; // is the result Subnormal + logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction + logic LeftShiftQm; // should the divsqrt result be shifted one to the left + + // LZA correction + assign LZAPlus1 = Shifted[P.NORMSHIFTSZ-1]; + + // correct the shifting error caused by the LZA + // - the only possible mantissa for a plus two is all zeroes + // - a one has to propigate all the way through a sum. so we can leave the bottom statement alone + mux2 #(P.NORMSHIFTSZ-2) lzacorrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted); + + // correct the shifting of the divsqrt caused by producing a result in (2, .5] range + // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) + assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1)); + assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2]; + assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1]; + mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); + + // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits + always_comb + //if(FmaOp) Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}}; + if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; + else Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ]; + + // Determine sum's exponent + // main exponent issues: + // - LZA was one too large + // - LZA was two too large + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 1 + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 2 + // if plus1 If plus2 kill if the result Zero or actually subnormal + // | | | + //assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}}; + + // recalculate if the result is subnormal after LZA correction + //assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZ-2]&~Shifted[P.NORMSHIFTSZ-1]; + + // the quotent is in the range [.5,2) if there is no early termination + // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift + assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1}; +endmodule \ No newline at end of file diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv new file mode 100644 index 000000000..d7f569add --- /dev/null +++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv @@ -0,0 +1,240 @@ +/////////////////////////////////////////// +// divremsqrtspecialcase.sv +// +// Written: kekim@hmc.edu,me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: special case selection +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module divremsqrtspecialcase import cvw::*; #(parameter cvw_t P) ( + input logic Xs, // X sign + input logic [P.NF:0] Xm, Ym, // input significand's + input logic XNaN, YNaN, // are the inputs NaN + input logic [2:0] Frm, // rounding mode + input logic [P.FMTBITS-1:0] OutFmt, // output format + input logic InfIn, // are any inputs infinity + input logic NaNIn, // are any input NaNs + input logic XInf, YInf, // are X or Y inifnity + input logic XZero, // is X zero + input logic Plus1, // do you add one for rounding + input logic Rs, // the result's sign + input logic Invalid, Overflow, // flags to choose the result + input logic [P.NE-1:0] Re, // Result exponent + input logic [P.NE+1:0] FullRe, // Result full exponent + input logic [P.NF-1:0] Rf, // Result fraction + // divsqrt + input logic DivOp, // is it a divsqrt opperation + input logic DivByZero, // divide by zero flag + // outputs + output logic [P.FLEN-1:0] PostProcRes // final result +); + + logic [P.FLEN-1:0] XNaNRes; // X is NaN result + logic [P.FLEN-1:0] YNaNRes; // Y is NaN result + logic [P.FLEN-1:0] InvalidRes; // Invalid result result + logic [P.FLEN-1:0] UfRes; // underflowed result result + logic [P.FLEN-1:0] OfRes; // overflowed result result + logic [P.FLEN-1:0] NormRes; // normal result + logic OfResMax; // does the of result output maximum norm fp number + logic KillRes; // kill the result for underflow + logic SelOfRes; // should the overflow result be selected + + + // does the overflow result output the maximum normalized floating point number + // output infinity if the input is infinity + assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs)); + + // select correct outputs for special cases + if (P.FPSIZES == 1) begin + //NaN res selection depending on standard + if(P.IEEE754) begin + assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end else begin + assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end + + assign OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = {Rs, Re, Rf}; + + end else if (P.FPSIZES == 2) begin + if(P.IEEE754) begin + assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]}; + assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end else begin + assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end + + always_comb + if(OutFmt) + if(OfResMax) OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}}; + else OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + else + if(OfResMax) OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}}; + else OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)}; + assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]}; + + end else if (P.FPSIZES == 3) begin + always_comb + case (OutFmt) + P.FMT: begin + if(P.IEEE754) begin + XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; + end + P.FMT1: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]}; + YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]}; + InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)}; + end + OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)}; + UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]}; + end + P.FMT2: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]}; + YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]}; + InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)}; + end + + OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)}; + UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]}; + end + default: begin + if(P.IEEE754) begin + XNaNRes = (P.FLEN)'(0); + YNaNRes = (P.FLEN)'(0); + InvalidRes = (P.FLEN)'(0); + end else begin + InvalidRes = (P.FLEN)'(0); + end + OfRes = (P.FLEN)'(0); + UfRes = (P.FLEN)'(0); + NormRes = (P.FLEN)'(0); + end + endcase + + end else if (P.FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + if(P.IEEE754) begin + XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]}; + YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]}; + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}}; + UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; + end + 2'h1: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]}; + YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]}; + InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)}; + end + OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)}; + UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]}; + end + 2'h0: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]}; + YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]}; + InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)}; + UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]}; + end + 2'h2: begin + if(P.IEEE754) begin + XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]}; + YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]}; + InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)}; + end else begin + InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)}; + // zero is exact if dividing by infinity so don't add 1 + UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]}; + end + endcase + end + + // determine if you shoould kill the res - Cvt + // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 + // - dont set to zero if fp input is zero but not using the fp input + // - dont set to zero if int input is zero but not using the int input + assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1); + + // calculate if the overflow result should be selected + assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); + + // output infinity with result sign if divide by zero + if(P.IEEE754) + always_comb + if(XNaN) PostProcRes = XNaNRes; + else if(YNaN) PostProcRes = YNaNRes; + else if(Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + else + always_comb + if(NaNIn|Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + +endmodule \ No newline at end of file diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv new file mode 100644 index 000000000..a9fb58860 --- /dev/null +++ b/src/fpu/divremsqrt/drsu.sv @@ -0,0 +1,96 @@ +/////////////////////////////////////////// +// drsu.sv +// +// Written: kekim@hmc.edu +// Modified:19 May 2023 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +module drsu import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic reset, + input logic [P.FMTBITS-1:0] FmtE, + input logic XsE, YsE, + input logic [P.NF:0] XmE, YmE, + input logic [P.NE-1:0] XeE, YeE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic XSNaNE, YSNaNE, + input logic FDivStartE, IDivStartE, + input logic StallM, + input logic FlushE, + input logic SqrtE, SqrtM, + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [2:0] Funct3E, Funct3M, + input logic IntDivE, W64E, + input logic [2:0] Frm, + input logic [2:0] OpCtrl, + input logic [1:0] PostProcSel, + output logic FDivBusyE, IFDivStartE, FDivDoneE, + output logic [P.FLEN-1:0] FResM, + output logic [P.XLEN-1:0] FIntDivResultM, + output logic [4:0] FlgM +); + + // Floating-point division and square root module, with optional integer division and remainder + // Computes X/Y, sqrt(X), A/B, or A%B + + logic [P.DIVb+3:0] WS, WC; // Partial remainder components + logic [P.DIVb+3:0] X; // Iterator Initial Value (from dividend) + logic [P.DIVb+3:0] D; // Iterator Divisor + logic [P.DIVb:0] FirstU, FirstUM; // Intermediate result values + logic [P.DIVb+1:0] FirstC; // Step tracker + logic Firstun; // Quotient selection + logic WZeroE; // Early termination flag + logic [P.DURLEN-1:0] CyclesE; // FSM cycles + logic SpecialCaseM; // Divide by zero, square root of negative, etc. + logic DivStartE; // Enable signal for flops during stall + + // Integer div/rem signals + logic BZeroM; // Denominator is zero + logic IntDivM; // Integer operation + logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor + logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor + logic ISpecialCaseE; // Integer div/remainder special cases + logic [P.DIVb:0] QmM; + logic [P.NE+1:0] QeM; + logic DivStickyM; + + divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, + .XeE, .YeE, .SqrtE, .SqrtM, + .XInfE, .YInfE, .XZeroE, .YZeroE, + .XNaNE, .YNaNE, + .FDivStartE, .IDivStartE, .W64E, + .StallM, .DivStickyM, .FDivBusyE, .QeM, + .QmM, + .FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M, + .Funct3E, .IntDivE, .FIntDivResultM, + .FDivDoneE, .IFDivStartE); + divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl, + .XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), + .YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivQe(QeM), .DivQm(QmM), .PostProcRes(FResM), .PostProcFlg(FlgM)); +endmodule + diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index effa4ff29..3584a05c8 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -232,7 +232,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // **** create config to support DTIM with floating point. dtim #(P) dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), - .ReadDataWordM(DTIMReadDataWordM[P.XLEN-1:0]), .ByteMaskM(ByteMaskM[P.XLEN/8-1:0])); + .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM[P.LLEN/8-1:0])); end else begin end if (P.BUS_SUPPORTED) begin : bus @@ -308,11 +308,11 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ahbinterface #(P.XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), - .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM), + .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM[P.XLEN/8-1:0]), .WriteData(LSUWriteDataM[P.XLEN-1:0]), .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); // Mux between the 2 sources of read data, 0: Bus, 1: DTIM - if(P.DTIM_SUPPORTED) mux2 #(P.XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM, SelDTIM, ReadDataWordMuxM); + if(P.DTIM_SUPPORTED) mux2 #(P.XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM[P.XLEN-1:0], SelDTIM, ReadDataWordMuxM[P.XLEN-1:0]); else assign ReadDataWordMuxM = FetchBuffer[P.XLEN-1:0]; assign LSUHBURST = 3'b0; assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0; diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv index 08d674f1d..057ab9eab 100644 --- a/testbench/testbench-fp.sv +++ b/testbench/testbench-fp.sv @@ -1,6 +1,6 @@ /////////////////////////////////////////// // -// Written: me@KatherineParry.com, james.stine@okstate.edu +// Written: me@KatherineParry.com, james.stine@okstate.edu, kekim@hmc.edu // // Purpose: Testbench for UCB Testfloat on Wally // @@ -56,7 +56,8 @@ module testbenchfp; logic WriteIntVal; // value of the current WriteInt logic [P.FLEN-1:0] X, Y, Z; // inputs read from TestFloat logic [P.FLEN-1:0] XPostBox; // inputs read from TestFloat - logic [P.XLEN-1:0] SrcA; // integer input + logic [P.XLEN-1:0] SrcA, SrcB; // integer input + logic W64; // is W64 instruction logic [P.FLEN-1:0] Ans; // correct answer from TestFloat logic [P.FLEN-1:0] Res; // result from other units logic [4:0] AnsFlg; // correct flags read from testfloat @@ -84,6 +85,7 @@ module testbenchfp; logic [P.DIVb:0] Quot; logic CvtResSubnormUfE; logic DivStart; + logic IDivStart; logic FDivBusyE; logic OldFDivBusyE; logic reset = 1'b0; @@ -118,11 +120,14 @@ module testbenchfp; logic [P.NE+1:0] QeM; logic [P.DIVb:0] QmM; logic [P.XLEN-1:0] FIntDivResultM; + logic IntDivE; logic ResMatch; // Check if result match logic FlagMatch; // Check if IEEE flags match logic CheckNow; // Final check logic FMAop; // Is this a FMA operation? + logic sqrtop; // Is this a SQRT operation? + flopen #(3) funct3reg(.clk, .en(IFDivStartE), .d(Funct3E), .q(Funct3M)); /////////////////////////////////////////////////////////////////////////////////////////////// // ||||||||| |||||||| ||||||| ||||||||| ||||||| |||||||| ||| @@ -149,7 +154,7 @@ module testbenchfp; $display("This simulation for TEST is %s", TEST); $display("This simulation for TEST is of the operand size of %s", TEST_SIZE); if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported - if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion + if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion // add the 128-bit cvtint tests to the to-be-tested list Tests = {Tests, f128rv32cvtint}; // add the op-codes for these tests to the op-code list @@ -167,13 +172,13 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; // add what unit is used and the fmt to their lists (one for each test) for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b11}; + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b11}; end end - end - // if the floating-point conversions are being tested - if (TEST === "cvtfp" | TEST === "all") begin + end + // if the floating-point conversions are being tested + if (TEST === "cvtfp" | TEST === "all") begin if (P.D_SUPPORTED) begin // if double precision is supported // add the 128 <-> 64 bit conversions to the to-be-tested list Tests = {Tests, f128f64cvt}; @@ -182,12 +187,12 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b11}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b01}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; end end if (P.F_SUPPORTED) begin // if single precision is supported @@ -198,12 +203,12 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b11}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b00}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; end end if (P.ZFH_SUPPORTED) begin // if half precision is supported @@ -214,16 +219,16 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b11}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b11}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b10}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; end end - end - if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested + end + if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested // add the compare tests/op-ctrls/unit/fmt Tests = {Tests, f128cmp}; OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; @@ -232,8 +237,8 @@ module testbenchfp; Unit = {Unit, `CMPUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "add" | TEST === "all") begin // if addition is being tested + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested // add the addition tests/op-ctrls/unit/fmt Tests = {Tests, f128add}; OpCtrl = {OpCtrl, `ADD_OPCTRL}; @@ -242,8 +247,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested // add the subtraction tests/op-ctrls/unit/fmt Tests = {Tests, f128sub}; OpCtrl = {OpCtrl, `SUB_OPCTRL}; @@ -252,8 +257,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested // add the multiply tests/op-ctrls/unit/fmt Tests = {Tests, f128mul}; OpCtrl = {OpCtrl, `MUL_OPCTRL}; @@ -262,8 +267,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "div" | TEST === "all") begin // if division is being tested + end + if (TEST === "div" | TEST === "all") begin // if division is being tested // add the divide tests/op-ctrls/unit/fmt Tests = {Tests, f128div}; OpCtrl = {OpCtrl, `DIV_OPCTRL}; @@ -272,8 +277,8 @@ module testbenchfp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested // add the square-root tests/op-ctrls/unit/fmt Tests = {Tests, f128sqrt}; OpCtrl = {OpCtrl, `SQRT_OPCTRL}; @@ -282,8 +287,8 @@ module testbenchfp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b11}; end - end - if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested + end + if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested Tests = {Tests, f128fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; WriteInt = {WriteInt, 1'b0}; @@ -291,10 +296,19 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b11}; end - end + end + if (TEST === "fdivremsqrt") begin // if unified div sqrt is being tested + Tests = {Tests, f128div, f128sqrt}; + OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + for(int i = 0; i<10; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end end if (P.D_SUPPORTED & (TEST_SIZE == "DP" | TEST_SIZE == "all")) begin // if double precision is supported - if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested + if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested Tests = {Tests, f64rv32cvtint}; // add the op-codes for these tests to the op-code list OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; @@ -311,12 +325,12 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; // add what unit is used and the fmt to their lists (one for each test) for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b01}; + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b01}; end end - end - if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested if (P.F_SUPPORTED) begin // if single precision is supported // add the 64 <-> 32 bit conversions to the to-be-tested list Tests = {Tests, f64f32cvt}; @@ -325,12 +339,12 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b01}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b00}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; end end if (P.ZFH_SUPPORTED) begin // if half precision is supported @@ -341,16 +355,16 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b01}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b01}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b10}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; end end - end - if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64cmp}; OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; @@ -359,8 +373,8 @@ module testbenchfp; Unit = {Unit, `CMPUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "add" | TEST === "all") begin // if addition is being tested + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64add}; OpCtrl = {OpCtrl, `ADD_OPCTRL}; @@ -369,8 +383,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64sub}; OpCtrl = {OpCtrl, `SUB_OPCTRL}; @@ -379,8 +393,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64mul}; OpCtrl = {OpCtrl, `MUL_OPCTRL}; @@ -389,8 +403,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "div" | TEST === "all") begin // if division is being tested + end + if (TEST === "div" | TEST === "all") begin // if division is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64div}; OpCtrl = {OpCtrl, `DIV_OPCTRL}; @@ -399,8 +413,8 @@ module testbenchfp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted + end + if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f64sqrt}; OpCtrl = {OpCtrl, `SQRT_OPCTRL}; @@ -409,8 +423,8 @@ module testbenchfp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b01}; end - end - if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested + end + if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested Tests = {Tests, f64fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; WriteInt = {WriteInt, 1'b0}; @@ -418,10 +432,19 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b01}; end - end + end + if (TEST === "fdivremsqrt") begin // if unified div sqrt is being tested + Tests = {Tests, f64div, f64sqrt}; + OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + for(int i = 0; i<10; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end end if (P.F_SUPPORTED & (TEST_SIZE == "SP" | TEST_SIZE == "all")) begin // if single precision being supported - if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested + if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested Tests = {Tests, f32rv32cvtint}; // add the op-codes for these tests to the op-code list OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; @@ -438,12 +461,12 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; // add what unit is used and the fmt to their lists (one for each test) for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b00}; + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b00}; end end - end - if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversion is being tested + end + if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversion is being tested if (P.ZFH_SUPPORTED) begin // add the 32 <-> 16 bit conversions to the to-be-tested list Tests = {Tests, f32f16cvt}; @@ -452,16 +475,16 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0}; // add the unit being tested and fmt (input format) for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b00}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b00}; end for(int i = 0; i<5; i++) begin - Unit = {Unit, `CVTFPUNIT}; - Fmt = {Fmt, 2'b10}; + Unit = {Unit, `CVTFPUNIT}; + Fmt = {Fmt, 2'b10}; end end - end - if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested + end + if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32cmp}; OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; @@ -470,8 +493,8 @@ module testbenchfp; Unit = {Unit, `CMPUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "add" | TEST === "all") begin // if addition is being tested + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32add}; OpCtrl = {OpCtrl, `ADD_OPCTRL}; @@ -480,8 +503,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "sub" | TEST === "all") begin // if subtration is being tested + end + if (TEST === "sub" | TEST === "all") begin // if subtration is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32sub}; OpCtrl = {OpCtrl, `SUB_OPCTRL}; @@ -490,8 +513,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "mul" | TEST === "all") begin // if multiply is being tested + end + if (TEST === "mul" | TEST === "all") begin // if multiply is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32mul}; OpCtrl = {OpCtrl, `MUL_OPCTRL}; @@ -500,8 +523,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "div" | TEST === "all") begin // if division is being tested + end + if (TEST === "div" | TEST === "all") begin // if division is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32div}; OpCtrl = {OpCtrl, `DIV_OPCTRL}; @@ -510,8 +533,8 @@ module testbenchfp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f32sqrt}; OpCtrl = {OpCtrl, `SQRT_OPCTRL}; @@ -520,8 +543,8 @@ module testbenchfp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b00}; end - end - if (TEST === "fma" | TEST === "all") begin // if fma is being tested + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested Tests = {Tests, f32fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; WriteInt = {WriteInt, 1'b0}; @@ -529,10 +552,19 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b00}; end - end + end + if (TEST === "fdivremsqrt") begin // if unified div sqrt is being tested + Tests = {Tests, f32div, f32sqrt}; + OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + for(int i = 0; i<10; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end end if (P.ZFH_SUPPORTED & (TEST_SIZE == "HP" | TEST_SIZE == "all")) begin // if half precision supported - if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested + if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested Tests = {Tests, f16rv32cvtint}; // add the op-codes for these tests to the op-code list OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL}; @@ -549,12 +581,12 @@ module testbenchfp; WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1}; // add what unit is used and the fmt to their lists (one for each test) for(int i = 0; i<20; i++) begin - Unit = {Unit, `CVTINTUNIT}; - Fmt = {Fmt, 2'b10}; + Unit = {Unit, `CVTINTUNIT}; + Fmt = {Fmt, 2'b10}; end end - end - if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested + end + if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16cmp}; OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL}; @@ -563,8 +595,8 @@ module testbenchfp; Unit = {Unit, `CMPUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "add" | TEST === "all") begin // if addition is being tested + end + if (TEST === "add" | TEST === "all") begin // if addition is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16add}; OpCtrl = {OpCtrl, `ADD_OPCTRL}; @@ -573,8 +605,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested + end + if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16sub}; OpCtrl = {OpCtrl, `SUB_OPCTRL}; @@ -583,8 +615,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested + end + if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16mul}; OpCtrl = {OpCtrl, `MUL_OPCTRL}; @@ -593,8 +625,8 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "div" | TEST === "all") begin // if division is being tested + end + if (TEST === "div" | TEST === "all") begin // if division is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16div}; OpCtrl = {OpCtrl, `DIV_OPCTRL}; @@ -603,8 +635,8 @@ module testbenchfp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested + end + if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // add the correct tests/op-ctrls/unit/fmt to their lists Tests = {Tests, f16sqrt}; OpCtrl = {OpCtrl, `SQRT_OPCTRL}; @@ -613,8 +645,8 @@ module testbenchfp; Unit = {Unit, `DIVUNIT}; Fmt = {Fmt, 2'b10}; end - end - if (TEST === "fma" | TEST === "all") begin // if fma is being tested + end + if (TEST === "fma" | TEST === "all") begin // if fma is being tested Tests = {Tests, f16fma}; OpCtrl = {OpCtrl, `FMA_OPCTRL}; WriteInt = {WriteInt, 1'b0}; @@ -622,12 +654,79 @@ module testbenchfp; Unit = {Unit, `FMAUNIT}; Fmt = {Fmt, 2'b10}; end - end + end + if (TEST === "fdivremsqrt") begin // if unified div sqrt is being tested + Tests = {Tests, f16div, f16sqrt}; + OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL}; + WriteInt = {WriteInt, 1'b0, 1'b0}; + for(int i = 0; i<10; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end end + if (TEST === "intrem" | TEST === "intdivrem" ) begin // if integer remainder is being tested + Tests = {Tests, intrem}; + OpCtrl = {OpCtrl, `INTREM_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdiv" | TEST ==="intdivrem") begin // if integer division is being tested + Tests = {Tests, intdiv}; + OpCtrl = {OpCtrl, `INTDIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremu"| TEST ==="intdivrem") begin // if unsigned integer remainder is being tested + Tests = {Tests, intremu}; + OpCtrl = {OpCtrl, `INTREMU_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivu"| TEST ==="intdivrem") begin // if unsigned integer division is being tested + Tests = {Tests, intdivu}; + OpCtrl = {OpCtrl, `INTDIVU_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremw"| TEST ==="intdivrem") begin // if w-type integer remainder is being tested + Tests = {Tests, intremw}; + OpCtrl = {OpCtrl, `INTREMW_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intremuw"| TEST ==="intdivrem") begin // if unsigned w-type integer remainder is being tested + Tests = {Tests, intremuw}; + OpCtrl = {OpCtrl, `INTREMUW_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivw"| TEST ==="intdivrem") begin // if w-type integer division is being tested + Tests = {Tests, intdivw}; + OpCtrl = {OpCtrl, `INTDIVW_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + if (TEST === "intdivuw"| TEST ==="intdivrem") begin // if unsigned w-type integer divison is being tested + Tests = {Tests, intdivuw}; + OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + Unit = {Unit, `INTDIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + + end + // check if nothing is being tested if (Tests.size() == 0) begin - $display("TEST %s not supported in this configuration", TEST); - $stop; + $display("TEST %s not supported in this configuration", TEST); + $stop; end end @@ -673,10 +772,10 @@ module testbenchfp; // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector readvectors #(P) readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), - .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, + .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .SrcB, .Xs, .Ys, .Zs, .Unit(UnitVal), - .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), - .Xm, .Ym, .Zm, .DivStart, + .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), .Funct3E, .W64, + .Xm, .Ym, .Zm, .DivStart, .IDivStart, .IntDivE, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN, .XSubnorm, .ZSubnorm, @@ -697,29 +796,29 @@ module testbenchfp; // instantiate devices under test if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma fma #(P) fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), - .Xe(Xe), .Ye(Ye), .Ze(Ze), - .Xm(Xm), .Ym(Ym), .Zm(Zm), - .XZero, .YZero, .ZZero, .Ss, .Se, - .OpCtrl(OpCtrlVal), .Sm, .InvA, .SCnt, .As, .Ps, - .ASticky); + .Xe(Xe), .Ye(Ye), .Ze(Ze), + .Xm(Xm), .Ym(Ym), .Zm(Zm), + .XZero, .YZero, .ZZero, .Ss, .Se, + .OpCtrl(OpCtrlVal), .Sm, .InvA, .SCnt, .As, .Ps, + .ASticky); end - postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), - .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), - .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss), - .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE), - .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE), - .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), - .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, - .FmaASticky(ASticky), .FmaSe(Se), - .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), - .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); + /*postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), + .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss), + .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE), + .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE), + .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), + .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, + .FmaASticky(ASticky), .FmaSe(Se), + .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));*/ if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt - fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), - .XZero(XZero), .OpCtrl(OpCtrlVal), .IntZero, - .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), - .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE)); + fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), + .XZero(XZero), .OpCtrl(OpCtrlVal), .IntZero, + .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), + .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE)); end if (TEST === "cmp" | TEST === "all") begin: fcmp @@ -730,16 +829,39 @@ module testbenchfp; if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), - .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]), - .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), - .XNaNE(XNaN), .YNaNE(YNaN), - .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0), - .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .QeM(DivCalcExp), - .QmM(Quot), - .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M), - .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM), - .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE)); + .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL), .SqrtM(OpCtrlVal===`SQRT_OPCTRL), + .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), + .XNaNE(XNaN), .YNaNE(YNaN), + .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0), + .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .QeM(DivCalcExp), + .QmM(Quot), + .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M), + .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM), + .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE)); end + if (TEST === "fdivremsqrt" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" | TEST ==="intdivrem") begin: divremsqrt + drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), + .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT), .SqrtM(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT), + .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .PostProcSel(UnitVal[1:0]), + .XNaNE(XNaN), .YNaNE(YNaN), .OpCtrl(OpCtrlVal), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .Frm(FrmVal), + .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(W64), + .StallM(1'b0), .FDivBusyE, + .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M), + .Funct3E(Funct3E), .IntDivE(IntDivE), + .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg)); + end + else begin: postprocess + postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), + .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss), + .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE), + .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE), + .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), + .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, + .FmaASticky(ASticky), .FmaSe(Se), + .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); + end assign CmpFlg[3:0] = 0; @@ -754,8 +876,8 @@ module testbenchfp; // the IDLE state. initial begin - #0 reset = 1'b1; - #25 reset = 1'b0; + #0 reset = 1'b1; + #25 reset = 1'b0; end /////////////////////////////////////////////////////////////////////////////////////////////// @@ -771,13 +893,13 @@ module testbenchfp; // Check if the correct answer and result is a NaN always_comb begin - if (UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin - // an integer output can't be a NaN - AnsNaN = 1'b0; - ResNaN = 1'b0; + if (UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT | (UnitVal === `DIVREMSQRTUNIT && WriteIntVal == 1'b1)) begin + // an integer output can't be a NaN + AnsNaN = 1'b0; + ResNaN = 1'b0; end else if (UnitVal === `CVTFPUNIT) begin - case (OpCtrlVal[1:0]) + case (OpCtrlVal[1:0]) 4'b11: begin // quad AnsNaN = &Ans[P.Q_LEN-2:P.NF]&(|Ans[P.Q_NF-1:0]); ResNaN = &Res[P.Q_LEN-2:P.NF]&(|Res[P.Q_NF-1:0]); @@ -794,10 +916,10 @@ module testbenchfp; AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]); ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]); end - endcase + endcase end else begin - case (FmtVal) + case (FmtVal) 4'b11: begin // quad AnsNaN = &Ans[P.Q_LEN-2:P.Q_NF]&(|Ans[P.Q_NF-1:0]); ResNaN = &Res[P.Q_LEN-2:P.Q_NF]&(|Res[P.Q_NF-1:0]); @@ -814,27 +936,29 @@ module testbenchfp; AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]); ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]); end - endcase + endcase end end always_comb begin // select the result to check case (UnitVal) - `FMAUNIT: Res = FpRes; - `DIVUNIT: Res = FpRes; - `CMPUNIT: Res = CmpRes; - `CVTINTUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes; - `CVTFPUNIT: Res = FpRes; + `FMAUNIT: Res = FpRes; + `DIVUNIT: Res = FpRes; + `CMPUNIT: Res = CmpRes; + `CVTINTUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes; + `CVTFPUNIT: Res = FpRes; + `INTDIVUNIT: Res = IntRes; endcase // select the flag to check case (UnitVal) - `FMAUNIT: ResFlg = Flg; - `DIVUNIT: ResFlg = Flg; - `CMPUNIT: ResFlg = CmpFlg; - `CVTINTUNIT: ResFlg = Flg; - `CVTFPUNIT: ResFlg = Flg; + `FMAUNIT: ResFlg = Flg; + `DIVUNIT: ResFlg = Flg; + `CMPUNIT: ResFlg = CmpFlg; + `CVTINTUNIT: ResFlg = Flg; + `CVTFPUNIT: ResFlg = Flg; + `INTDIVUNIT: ResFlg = Flg; endcase end @@ -846,33 +970,33 @@ module testbenchfp; always @(posedge clk) begin // Add extra clock cycles in beginning for fdivsqrt to adequate reset state if (~(FDivBusyE|DivStart)|(UnitVal != `DIVUNIT)) begin - // This allows specific number of clocks to allow each vector - // to complete for division or square root. It is an - // arbitrary value and can be changed, if needed. - case (FmtVal) - // QP - 4'b11: begin - repeat (20) - @(posedge clk); - end - // HP - 4'b10: begin - repeat (14) - @(posedge clk); - end - // DP - 4'b01: begin - repeat (18) - @(posedge clk); - end - // SP - 4'b00: begin - repeat (16) - @(posedge clk); - end - endcase // case (FmtVal) - if (reset != 1'b1) - VectorNum += 1; // increment the vector + // This allows specific number of clocks to allow each vector + // to complete for division or square root. It is an + // arbitrary value and can be changed, if needed. + case (FmtVal) + // QP + 4'b11: begin + repeat (20) + @(posedge clk); + end + // HP + 4'b10: begin + repeat (14) + @(posedge clk); + end + // DP + 4'b01: begin + repeat (18) + @(posedge clk); + end + // SP + 4'b00: begin + repeat (16) + @(posedge clk); + end + endcase // case (FmtVal) + if (reset != 1'b1) + VectorNum += 1; // increment the vector end end @@ -882,7 +1006,7 @@ module testbenchfp; // - the sign of the NaN does not matter for the opperations being tested // - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT) - case (FmtVal) + case (FmtVal) 4'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | @@ -903,9 +1027,9 @@ module testbenchfp; (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})) | (ZNaN&(Res[P.H_LEN-2:0] === {Z[P.H_LEN-2:P.H_NF],1'b1,Z[P.H_NF-2:0]}))); - endcase + endcase else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format - case (OpCtrlVal[1:0]) + case (OpCtrlVal[1:0]) 2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) | (AnsNaN&(Res[P.Q_LEN-2:0] === Ans[P.Q_LEN-2:0])) | @@ -926,7 +1050,7 @@ module testbenchfp; (AnsNaN&(Res[P.H_LEN-2:0] === Ans[P.H_LEN-2:0])) | (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]}))); - endcase + endcase else NaNGood = 1'b0; // integers can't be NaNs @@ -944,18 +1068,22 @@ module testbenchfp; // wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx)); assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx)); - assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL); + assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal == `INTDIVU_OPCTRL) | (OpCtrlVal ==`INTREMU_OPCTRL) | (OpCtrlVal ==`INTREMW_OPCTRL) | (OpCtrlVal ==`INTREMUW_OPCTRL) | (OpCtrlVal == `INTDIVW_OPCTRL) | (OpCtrlVal == `INTDIVW_OPCTRL) | (OpCtrlVal == `INTDIVUW_OPCTRL); assign FMAop = (OpCtrlVal == `FMAUNIT); assign DivDone = OldFDivBusyE & ~FDivBusyE; // Maybe change OpCtrl but for now just look at TEST for fma test assign CheckNow = ((DivDone | ~divsqrtop) | (TEST == "add" | TEST == "fma" | TEST == "sub")) & (UnitVal !== `CVTINTUNIT) & (UnitVal !== `CMPUNIT); if (~(ResMatch & FlagMatch) & CheckNow) begin - errors += 1; - $display("\nError in %s", Tests[TestNum]); - $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); - $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); - $stop; + integer fd; + fd = $fopen("fperr.out","a"); + $fwrite(fd, "%h_%h_%h_%2h\n",X,Y,Ans,AnsFlg); + $fclose(fd); + errors += 1; + $display("\nError in %s", Tests[TestNum]); + $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n SrcB: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, SrcB, Res, ResFlg, Ans, AnsFlg); + $display("time: $t", $realtime); end // TestFloat sets the result to all 1's when there is an invalid result, however in @@ -965,36 +1093,36 @@ module testbenchfp; // Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but // the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff... else if ((UnitVal === `CVTINTUNIT) & - ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&(Res[P.XLEN-1:0] === (P.XLEN)'(0))) | - (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&OpCtrlVal[1]&(Res[P.XLEN-1:0] === {1'b0, {P.XLEN-1{1'b1}}})) | - (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&~OpCtrlVal[1]&(Res[P.XLEN-1:0] === {{P.XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | - (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin - errors += 1; - $display("There is an error in %s", Tests[TestNum]); - $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); - $stop; + ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&(Res[P.XLEN-1:0] === (P.XLEN)'(0))) | + (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&OpCtrlVal[1]&(Res[P.XLEN-1:0] === {1'b0, {P.XLEN-1{1'b1}}})) | + (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&~OpCtrlVal[1]&(Res[P.XLEN-1:0] === {{P.XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | + (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin + errors += 1; + $display("There is an error in %s", Tests[TestNum]); + $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); + $stop; end if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the eof - // increment the test - TestNum += 1; - // clear the vectors - for(int i=0; i<6133248; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}}; - // read next files - $readmemh({`PATH, Tests[TestNum]}, TestVectors); - // set the vector index back to 0 - VectorNum = 0; - // incemet the operation if all the rounding modes have been tested - if (FrmNum === 4) OpCtrlNum += 1; - // increment the rounding mode or loop back to rne - if (FrmNum < 4) FrmNum += 1; - else FrmNum = 0; - // if no more Tests - finish - if (Tests[TestNum] === "") begin + // increment the test + TestNum += 1; + // clear the vectors + for(int i=0; i<6133248; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}}; + // read next files + $readmemh({`PATH, Tests[TestNum]}, TestVectors); + // set the vector index back to 0 + VectorNum = 0; + // incemet the operation if all the rounding modes have been tested + if (FrmNum === 4 | TEST === "intdivrem") OpCtrlNum += 1; + // increment the rounding mode or loop back to rne + if (FrmNum < 4) FrmNum += 1; + else FrmNum = 0; + // if no more Tests - finish + if (Tests[TestNum] === "") begin $display("\nAll Tests completed with %d errors\n", errors); $stop; - end - $display("Running %s vectors", Tests[TestNum]); + end + $display("Running %s vectors", Tests[TestNum]); end end endmodule @@ -1011,6 +1139,7 @@ module readvectors ( input logic [2:0] OpCtrl, output logic [P.FLEN-1:0] Ans, output logic [P.XLEN-1:0] SrcA, + output logic [P.XLEN-1:0] SrcB, output logic [4:0] AnsFlg, output logic Xs, Ys, Zs, // sign bits of XYZ output logic [P.NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) @@ -1022,6 +1151,10 @@ module readvectors ( output logic XInf, YInf, ZInf, // is XYZ infinity output logic XExpMax, output logic DivStart, + output logic IDivStart, + output logic IntDivE, + output logic [2:0] Funct3E, + output logic W64, output logic [P.FLEN-1:0] X, Y, Z, XPostBox ); @@ -1101,8 +1234,14 @@ module readvectors ( Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]}; end endcase - `DIVUNIT: - if (OpCtrl[0]) + `DIVUNIT: begin + IDivStart=1'b0; + IntDivE=1'b0; + SrcA={P.XLEN{1'b0}}; + SrcB={P.XLEN{1'b0}}; + W64=1'b0; + Funct3E=3'b0; + if (OpCtrl === `SQRT_OPCTRL) case (Fmt) 2'b11: begin // quad #20; @@ -1176,6 +1315,57 @@ module readvectors ( DivStart = 1'b0; end endcase + end + `INTDIVUNIT: begin + #20; + X = {P.FLEN{1'bx}}; + SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)]; + SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN]; + Ans = TestVector[P.D_LEN-1:0]; + AnsFlg = 5'bx; + if (~clk) #5; + IDivStart = 1'b1; + IntDivE = 1'b1; + case (OpCtrl) + `INTDIV_OPCTRL: begin + Funct3E = 3'b100; + W64 = 1'b0; + end + `INTREM_OPCTRL: begin + Funct3E = 3'b110; + W64 = 1'b0; + end + `INTREMU_OPCTRL: begin + Funct3E = 3'b111; + W64 = 1'b0; + end + `INTDIVU_OPCTRL: begin + Funct3E = 3'b101; + W64 = 1'b0; + end + `INTDIVW_OPCTRL: begin + Funct3E = 3'b100; + W64 = 1'b1; + end + `INTDIVUW_OPCTRL: begin + Funct3E = 3'b101; + W64 = 1'b1; + end + `INTREMW_OPCTRL: begin + Funct3E = 3'b110; + W64 = 1'b1; + end + `INTREMUW_OPCTRL: begin + Funct3E = 3'b111; + W64 = 1'b1; + end + endcase + #10 // one clk cycle + IDivStart = 1'b0; + IntDivE = 1'b0; + W64 = 1'b0; + end + `CMPUNIT: case (Fmt) 2'b11: begin // quad @@ -1402,4 +1592,4 @@ module readvectors ( .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN, .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf, .XEn, .YEn, .ZEn, .XExpMax, .XPostBox); -endmodule +endmodule \ No newline at end of file diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh index e29cc8447..3633094f4 100644 --- a/testbench/tests-fp.vh +++ b/testbench/tests-fp.vh @@ -42,6 +42,14 @@ `define FROM_I_OPCTRL 3'b101 `define FROM_UL_OPCTRL 3'b110 `define FROM_L_OPCTRL 3'b111 +`define INTREMU_OPCTRL 3'b001 +`define INTREM_OPCTRL 3'b010 +`define INTDIV_OPCTRL 3'b011 +`define INTDIVW_OPCTRL 3'b100 +`define INTDIVU_OPCTRL 3'b101 +`define INTREMW_OPCTRL 3'b110 +`define INTREMUW_OPCTRL 3'b111 +`define INTDIVUW_OPCTRL 3'b000 `define RNE 3'b000 `define RZ 3'b001 `define RU 3'b011 @@ -52,6 +60,8 @@ `define CVTINTUNIT 0 `define CVTFPUNIT 4 `define CMPUNIT 3 +`define DIVREMSQRTUNIT 5 +`define INTDIVUNIT 6 string f16rv32cvtint[] = '{ "ui32_to_f16_rne.tv", @@ -580,5 +590,39 @@ string f128fma[] = '{ "f128_mulAdd_rnm.tv" }; +string intrem[] = '{ + "cvw_64_rem-01.tv" +}; + +string intdiv[] = '{ + "cvw_64_div-01.tv" +}; + +string intremu[] = '{ + "cvw_64_remu-01.tv" +}; + +string intdivu[] = '{ + "cvw_64_divu-01.tv" +}; + +string intremw[] = '{ + "cvw_64_remw-01.tv" +}; + +string intremuw[] = '{ + "cvw_64_remuw-01.tv" +}; + +string intdivuw[] = '{ + "cvw_64_divuw-01.tv" +}; + +string intdivw[] = '{ + "cvw_64_divw-01.tv" +}; + + + diff --git a/testbench/tests.vh b/testbench/tests.vh index 116d39424..0b029676c 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -2076,10 +2076,11 @@ string arch64zbs[] = '{ }; string custom[] = '{ - `CUSTOM, - "simple", + `RISCVARCHTEST, + "rv64i_m/M/src/div-01.S" + /*"simple", "debug", - "cacheTest" + "cacheTest"*/ }; string testsBP64[] = '{ `IMPERASTEST, diff --git a/tests/fp/combined_IF_vectors/extract_arch_vectors.py b/tests/fp/combined_IF_vectors/extract_arch_vectors.py index 12669bc58..6fe63d0c7 100755 --- a/tests/fp/combined_IF_vectors/extract_arch_vectors.py +++ b/tests/fp/combined_IF_vectors/extract_arch_vectors.py @@ -128,7 +128,7 @@ def create_vectors(my_config): done = True # put it all together if not done: - translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode) + translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip())) dest_file.write(translation + "\n") else: # print("read false") @@ -174,7 +174,7 @@ def create_vectors(my_config): flags = "XX" # put it all together if not done: - translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode) + translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip())) dest_file.write(translation + "\n") else: # print("read false") @@ -217,7 +217,7 @@ def create_vectors(my_config): flags = "XX" # put it all together if not done: - translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode) + translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip())) dest_file.write(translation + "\n") else: # print("read false") @@ -261,7 +261,7 @@ def create_vectors(my_config): # put it all together if not done: - translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode) + translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip())) dest_file.write(translation + "\n") else: # print("read false") @@ -272,22 +272,22 @@ def create_vectors(my_config): src_file2.close() config_list = [ -Config(32, "M", "div", "div-", 0), +Config(32, "M", "div", "div-", 4), Config(32, "F", "fdiv", "fdiv", 1), Config(32, "F", "fsqrt", "fsqrt", 2), -Config(32, "M", "rem", "rem-", 3), -Config(32, "M", "divu", "divu-", 4), -Config(32, "M", "remu", "remu-", 5), -Config(64, "M", "div", "div-", 0), +Config(32, "M", "rem", "rem-", 6), +Config(32, "M", "divu", "divu-", 5), +Config(32, "M", "remu", "remu-", 7), +Config(64, "M", "div", "div-", 4), Config(64, "F", "fdiv", "fdiv", 1), Config(64, "F", "fsqrt", "fsqrt", 2), -Config(64, "M", "rem", "rem-", 3), -Config(64, "M", "divu", "divu-", 4), -Config(64, "M", "remu", "remu-", 5), -Config(64, "M", "divw", "divw-", 6), -Config(64, "M", "divuw", "divuw-", 7), -Config(64, "M", "remw", "remw-", 8), -Config(64, "M", "remuw", "remuw-", 9) +Config(64, "M", "rem", "rem-", 6), +Config(64, "M", "divu", "divu-", 5), +Config(64, "M", "remu", "remu-", 7), +Config(64, "M", "divw", "divw-", 4), +Config(64, "M", "divuw", "divuw-", 5), +Config(64, "M", "remw", "remw-", 6), +Config(64, "M", "remuw", "remuw-", 7) ] for c in config_list: