From e17cfe96223eb4fb7b203e8264a52d758d79711f Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Fri, 19 May 2023 14:20:22 -0700
Subject: [PATCH 01/40] began divremsqrt specific postprocessing

---
 src/fpu/divremsqrt/divremsqrt.sv              | 104 ++++++
 src/fpu/divremsqrt/divremsqrtpostprocess.sv   | 231 ++++++++++++
 src/fpu/divremsqrt/divremsqrtround.sv         | 339 ++++++++++++++++++
 src/fpu/divremsqrt/divremsqrtroundsign.sv     |  46 +++
 .../divremsqrt/divremsqrtshiftcorrection.sv   |  93 +++++
 5 files changed, 813 insertions(+)
 create mode 100644 src/fpu/divremsqrt/divremsqrt.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtpostprocess.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtround.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtroundsign.sv
 create mode 100644 src/fpu/divremsqrt/divremsqrtshiftcorrection.sv

diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv
new file mode 100644
index 000000000..3ca85cfb4
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrt.sv
@@ -0,0 +1,104 @@
+///////////////////////////////////////////
+// divremsqrt.sv
+//
+// Written: kekim@hmc.edu
+// Modified:19 May 2023
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module fdivsqrt(
+  input  logic                clk, 
+  input  logic                reset, 
+  input  logic [`FMTBITS-1:0] FmtE,
+  input  logic                XsE,
+  input  logic [`NF:0]        XmE, YmE,
+  input  logic [`NE-1:0]      XeE, YeE,
+  input  logic                XInfE, YInfE, 
+  input  logic                XZeroE, YZeroE, 
+  input  logic                XNaNE, YNaNE, 
+  input  logic                FDivStartE, IDivStartE,
+  input  logic                StallM,
+  input  logic                FlushE,
+  input  logic                SqrtE, SqrtM,
+  input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [2:0]          Funct3E, Funct3M,
+  input  logic                IntDivE, W64E,
+  output logic                DivStickyM,
+  output logic                FDivBusyE, IFDivStartE, FDivDoneE,
+  output logic [`NE+1:0]      QeM,
+  output logic [`DIVb:0]      QmM,
+  output logic [`XLEN-1:0]    FIntDivResultM
+);
+
+  // Floating-point division and square root module, with optional integer division and remainder
+  // Computes X/Y, sqrt(X), A/B, or A%B
+
+  logic [`DIVb+3:0]           WS, WC;                       // Partial remainder components
+  logic [`DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
+  logic [`DIVb+3:0]           D;                            // Iterator Divisor
+  logic [`DIVb:0]             FirstU, FirstUM;              // Intermediate result values
+  logic [`DIVb+1:0]           FirstC;                       // Step tracker
+  logic                       Firstun;                      // Quotient selection
+  logic                       WZeroE;                       // Early termination flag
+  logic [`DURLEN-1:0]         CyclesE;                      // FSM cycles
+  logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
+  logic                       DivStartE;                    // Enable signal for flops during stall
+                                                            
+  // Integer div/rem signals                                
+  logic                       BZeroM;                       // Denominator is zero
+  logic                       IntDivM;                      // Integer operation
+  logic [`DIVBLEN:0]          nM, mM;                       // Shift amounts
+  logic                       NegQuotM, ALTBM, AsM, W64M;   // Special handling for postprocessor
+  logic [`XLEN-1:0]           AM;                           // Original Numerator for postprocessor
+  logic                       ISpecialCaseE;                // Integer div/remainder special cases
+
+  fdivsqrtpreproc fdivsqrtpreproc(                          // Preprocessor
+    .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
+    .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
+    // Int-specific 
+    .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
+    .BZeroM, .nM, .mM, .AM, 
+    .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
+
+  fdivsqrtfsm fdivsqrtfsm(                                  // FSM
+    .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
+    .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, 
+    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
+    // Int-specific 
+    .IDivStartE, .ISpecialCaseE, .IntDivE);
+
+  fdivsqrtiter fdivsqrtiter(                                // CSA Iterator
+    .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, 
+    .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
+
+  fdivsqrtpostproc fdivsqrtpostproc(                        // Postprocessor
+    .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, 
+    .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, 
+    .QmM, .WZeroE, .DivStickyM, 
+    // Int-specific 
+    .nM, .mM, .ALTBM, .AsM, .BZeroM, .NegQuotM, .W64M, .RemOpM(Funct3M[1]), .AM, 
+    .FIntDivResultM);
+endmodule
+
diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
new file mode 100644
index 000000000..698e38a3a
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -0,0 +1,231 @@
+///////////////////////////////////////////
+// postprocess.sv
+//
+// Written: kekim@hmc.edu
+// Modified: 19 May 2023
+//
+// Purpose: Post-Processing: normalization, rounding, sign, flags, special cases
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module divremsqrtpostprocess (
+  // general signals
+  input logic                             Xs, Ys,     // input signs
+  input logic  [`NF:0]                    Xm, Ym, Zm, // input mantissas
+  input logic  [2:0]                      Frm,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+  input logic  [`FMTBITS-1:0]             Fmt,        // precision 1 = double 0 = single
+  input logic  [2:0]                      OpCtrl,     // choose which opperation (look below for values)
+  input logic                             XZero, YZero,        // inputs are zero
+  input logic                             XInf, YInf, ZInf,    // inputs are infinity
+  input logic                             XNaN, YNaN, ZNaN,    // inputs are NaN
+  input logic                             XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs
+  input logic  [1:0]                      PostProcSel,         // select result to be written to fp register
+  //fma signals
+  input logic                             FmaAs,      // the modified Z sign - depends on instruction
+  input logic                             FmaPs,      // the product's sign
+  input logic                             FmaSs,      // Sum sign
+  input logic  [`NE+1:0]                  FmaSe,      // the sum's exponent
+  input logic  [3*`NF+3:0]                FmaSm,      // the positive sum
+  input logic                             FmaASticky, // sticky bit that is calculated during alignment
+  input logic  [$clog2(3*`NF+5)-1:0]      FmaSCnt,    // the normalization shift count
+  //divide signals
+  input logic                             DivSticky,  // divider sticky bit
+  input logic  [`NE+1:0]                  DivQe,      // divsqrt exponent
+  input logic  [`DIVb:0]                  DivQm,      // divsqrt significand
+  // conversion signals
+  input logic                             CvtCs,      // the result's sign
+  input logic  [`NE:0]                    CvtCe,      // the calculated expoent
+  input logic                             CvtResSubnormUf, // the convert result is subnormal or underflows
+  input logic  [`LOGCVTLEN-1:0]           CvtShiftAmt,// how much to shift by
+  input logic                             ToInt,      // is fp->int (since it's writting to the integer register)
+  input logic  [`CVTLEN-1:0]              CvtLzcIn,   // input to the Leading Zero Counter (without msb)
+  input logic                             IntZero,    // is the integer input zero
+  // final results
+  output logic [`FLEN-1:0]                PostProcRes,// postprocessor final result
+  output logic [4:0]                      PostProcFlg,// postprocesser flags
+  output logic [`XLEN-1:0]                FCvtIntRes  // the integer conversion result
+  );
+  
+  // general signals
+  logic                       Rs;         // result sign
+  logic [`NF-1:0]             Rf;         // Result fraction
+  logic [`NE-1:0]             Re;         // Result exponent
+  logic                       Ms;         // norMalized sign
+  logic [`CORRSHIFTSZ-1:0]    Mf;         // norMalized fraction
+  logic [`NE+1:0]             Me;         // normalized exponent
+  logic [`NE+1:0]             FullRe;     // Re with bits to determine sign and overflow
+  logic                       UfPlus1;    // do you add one (for determining underflow flag)
+  logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt;   // normalization shift amount
+  logic [`NORMSHIFTSZ-1:0]    ShiftIn;    // input to normalization shift
+  logic [`NORMSHIFTSZ-1:0]    Shifted;    // the ouput of the normalized shifter (before shift correction)
+  logic                       Plus1;      // add one to the final result?
+  logic                       Overflow;   // overflow flag used to select results
+  logic                       Invalid;    // invalid flag used to select results
+  logic                       Guard, Round, Sticky; // bits needed to determine rounding
+  logic [`FMTBITS-1:0]        OutFmt;     // output format
+  // fma signals
+  logic [`NE+1:0]             FmaMe;      // exponent of the normalized sum
+  logic                       FmaSZero;   // is the sum zero
+  logic [3*`NF+5:0]           FmaShiftIn; // fma shift input
+  logic [`NE+1:0]             NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
+  logic                       FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
+  logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
+  // division singals
+  logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt;        // divsqrt shif amount
+  logic [`NORMSHIFTSZ-1:0]    DivShiftIn;         // divsqrt shift input
+  logic [`NE+1:0]             Qe;                 // divsqrt corrected exponent after corretion shift
+  logic                       DivByZero;          // divide by zero flag
+  logic                       DivResSubnorm;      // is the divsqrt result subnormal
+  logic                       DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
+  // conversion signals
+  logic [`CVTLEN+`NF:0]       CvtShiftIn;         // number to be shifted for converter
+  logic [1:0]                 CvtNegResMsbs;      // most significant bits of possibly negated int result
+  logic [`XLEN+1:0]           CvtNegRes;          // possibly negated integer result
+  logic                       CvtResUf;           // did the convert result underflow
+  logic                       IntInvalid;         // invalid integer flag
+  // readability signals
+  logic                       Mult;       // multiply opperation
+  logic                       Sqrt;       // is the divsqrt opperation sqrt
+  logic                       Int64;      // is the integer 64 bits?
+  logic                       Signed;     // is the opperation with a signed integer?
+  logic                       IntToFp;    // is the opperation an int->fp conversion?
+  logic                       CvtOp;      // convertion opperation
+  logic                       FmaOp;      // fma opperation
+  logic                       DivOp;      // divider opperation
+  logic                       InfIn;      // are any of the inputs infinity
+  logic                       NaNIn;      // are any of the inputs NaN
+
+  // signals to help readability
+  //assign Signed =  OpCtrl[0];
+  //assign Int64 =   OpCtrl[1];
+  //assign IntToFp = OpCtrl[2];
+  //assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
+  //assign CvtOp = (PostProcSel == 2'b00);
+  //assign FmaOp = (PostProcSel == 2'b10);
+  assign DivOp = (PostProcSel == 2'b01);
+  assign Sqrt =  OpCtrl[0];
+
+  // is there an input of infinity or NaN being used
+  assign InfIn = XInf|YInf|ZInf;
+  assign NaNIn = XNaN|YNaN|ZNaN;
+
+  // choose the ouptut format depending on the opperation
+  //      - fp -> fp: OpCtrl contains the percision of the output
+  //      - otherwise: Fmt contains the percision of the output
+  if (`FPSIZES == 2) 
+      //assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); 
+      assign OutFmt = Fmt;
+  else if (`FPSIZES == 3 | `FPSIZES == 4) 
+      //assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; 
+      assign OutFmt = Fmt;
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Normalization
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // final claulations before shifting
+  /*cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,  
+      .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);*/
+
+  /*fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
+      .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);*/
+
+  divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
+
+  assign ShiftAmt = DivShiftAmt;
+  assign ShiftIn = DivShiftIn;
+  /*
+  // select which unit's output to shift
+  always_comb
+    case(PostProcSel)
+      2'b10: begin // fma
+        ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt};
+        ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}};
+      end
+      2'b00: begin // cvt
+        ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt};
+        ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
+      end
+      2'b01: begin //divsqrt
+        ShiftAmt = DivShiftAmt;
+        ShiftIn =  DivShiftIn;
+      end
+      default: begin 
+        ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}}; 
+        ShiftIn = {`NORMSHIFTSZ{1'bx}}; 
+      end
+    endcase
+  */
+  
+  // main normalization shift
+  normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
+
+  // correct for LZA/divsqrt error
+  divremsqrtshiftcorrection shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .Shifted, .Mf);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Rounding
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // round to nearest even
+  // round to zero
+  // round to -infinity
+  // round to infinity
+  // round to nearest max magnitude
+
+  // calulate result sign used in rounding unit
+  divremsqrtroundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
+
+  round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
+      .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt,  .CvtResUf,
+      .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Sign calculation
+  ///////////////////////////////////////////////////////////////////////////////
+
+  /*resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard,
+      .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs);*/
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Flags
+  ///////////////////////////////////////////////////////////////////////////////
+
+  flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
+              .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
+              .NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero,
+              .Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
+              .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Select the result
+  ///////////////////////////////////////////////////////////////////////////////
+
+  negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
+
+  specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
+      .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, 
+      .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
+      .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes);
+
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv
new file mode 100644
index 000000000..396948915
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtround.sv
@@ -0,0 +1,339 @@
+///////////////////////////////////////////
+// divremsqrtround.sv
+//
+// Written: kekim@hmc.edu
+// Modified: 19 May 2023
+//
+// Purpose: Rounder
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+// what position is XLEN in?
+//  options: 
+//     1: XLEN > NF   > NF1
+//     2: NF   > XLEN > NF1
+//     3: NF   > NF1  > XLEN
+//  single and double will always be smaller than XLEN
+`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
+
+module round(
+  input  logic [`FMTBITS-1:0]     OutFmt,             // output format
+  input  logic [2:0]              Frm,                // rounding mode
+  //input  logic [1:0]              PostProcSel,        // select the postprocessor output
+  input  logic                    Ms,                 // normalized sign
+  input  logic [`CORRSHIFTSZ-1:0] Mf,                 // normalized fraction
+  // fma
+  //input  logic                    FmaOp,              // is an fma opperation being done?
+  //input  logic [`NE+1:0]          FmaMe,              // exponent of the normalized sum for fma
+  //input  logic                    FmaASticky,         // addend's sticky bit
+
+  // divsqrt
+  //input  logic                    DivOp,              // is a division opperation being done
+  input  logic                    DivSticky,          // divsqrt sticky bit
+  input  logic [`NE+1:0]          Qe,                 // the divsqrt calculated expoent
+  // cvt
+  input  logic                    CvtOp,              // is a convert opperation being done
+  input  logic                    ToInt,              // is the cvt op a cvt to integer
+  input  logic                    CvtResSubnormUf,    // is the cvt result subnormal or underflow
+  input  logic                    CvtResUf,           // does the cvt result underflow
+  input  logic [`NE:0]            CvtCe,              // the cvt calculated expoent
+  // outputs
+  output logic [`NE+1:0]          Me,                 // normalied fraction
+  output logic                    UfPlus1,            // do you add one to the result if given an unbounded exponent
+  output logic [`NE+1:0]          FullRe,             // Re with bits to determine sign and overflow
+  output logic [`NE-1:0]          Re,                 // Result exponent
+  output logic [`NF-1:0]          Rf,                 // Result fractionNormS
+  output logic                    Sticky,             // sticky bit
+  output logic                    Plus1,              // do you add one to the final result
+  output logic                    Round, Guard        // bits needed to calculate rounding
+);
+
+  logic           UfCalcPlus1;        // calculated plus one for unbounded exponent
+  logic           NormSticky;         // normalized sum's sticky bit
+  logic [`NF-1:0] RoundFrac;          // rounded fraction
+  logic           FpRes;              // is the result a floating point
+  logic           IntRes;             // is the result an integer
+  logic           FpGuard, FpRound;   // floating point round/guard bits
+  logic           FpLsbRes;           // least significant bit of floating point result
+  logic           LsbRes;             // lsb of result
+  logic           CalcPlus1;          // calculated plus1
+  logic           FpPlus1;            // do you add one to the fp result 
+  logic [`FLEN:0] RoundAdd;           // how much to add to the result
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Rounding
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // round to nearest even
+  //      {Round, Sticky}
+  //      0x - do nothing
+  //      10 - tie - Plus1 if result is odd  (LSBNormSum = 1)
+  //          - don't add 1 if a small number was supposed to be subtracted
+  //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+  //         - plus 1 otherwise
+
+  //  round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+  //  round to -infinity
+  //          - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+  //          - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
+
+  //  round to infinity
+  //          - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
+  //          - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
+
+  //  round to nearest max magnitude
+  //      {Guard, Round, Sticky}
+  //      0x - do nothing
+  //      10 - tie - Plus1
+  //          - don't add 1 if a small number was supposed to be subtracted
+  //      11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
+  //         - Plus 1 otherwise
+
+
+  // determine what format the final result is in: int or fp
+  assign IntRes = ToInt;
+  assign FpRes = ~IntRes;
+
+  // sticky bit calculation
+  if (`FPSIZES == 1) begin
+
+      //     1: XLEN > NF
+      //      |         XLEN          |
+      //      |    NF     |1|1|
+      //                     ^    ^ if floating point result
+      //                     ^ if not an FMA result
+      if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
+      //     2: NF > XLEN
+      if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
+                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+
+  end else if (`FPSIZES == 2) begin
+      // XLEN is either 64 or 32
+      // so half and single are always smaller then XLEN
+
+      // 1: XLEN > NF   > NF1
+      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
+                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
+      // 2: NF   > XLEN > NF1
+      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      // 3: NF   > NF1  > XLEN
+      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
+                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+
+  end else if (`FPSIZES == 3) begin
+      // 1: XLEN > NF   > NF1
+      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
+      // 2: NF   > XLEN > NF1
+      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
+                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      // 3: NF   > NF1  > XLEN
+      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+
+  end else if (`FPSIZES == 4) begin
+      // Quad precision will always be greater than XLEN
+      // 2: NF   > XLEN > NF1
+      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
+                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
+      // 3: NF   > NF1  > XLEN
+      // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
+      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
+                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
+
+  end
+  
+
+
+  // only add the Addend sticky if doing an FMA opperation
+  //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
+  //assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp;
+  assign Sticky = DivSticky;
+  
+
+
+
+  // determine round and LSB of the rounded value
+  //      - underflow round bit is used to determint the underflow flag
+  if (`FPSIZES == 1) begin
+      assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
+      assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
+      assign FpRound = Mf[`CORRSHIFTSZ-`NF-2];
+
+  end else if (`FPSIZES == 2) begin
+      assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
+      assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
+      assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
+
+  end else if (`FPSIZES == 3) begin
+      always_comb
+          case (OutFmt)
+              `FMT: begin
+                  FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
+                  FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
+                  FpRound = Mf[`CORRSHIFTSZ-`NF-2];
+              end
+              `FMT1: begin
+                  FpGuard = Mf[`CORRSHIFTSZ-`NF1-1];
+                  FpLsbRes = Mf[`CORRSHIFTSZ-`NF1];
+                  FpRound = Mf[`CORRSHIFTSZ-`NF1-2];
+              end
+              `FMT2: begin
+                  FpGuard = Mf[`CORRSHIFTSZ-`NF2-1];
+                  FpLsbRes = Mf[`CORRSHIFTSZ-`NF2];
+                  FpRound = Mf[`CORRSHIFTSZ-`NF2-2];
+              end
+              default: begin
+                  FpGuard = 1'bx;
+                  FpLsbRes = 1'bx;
+                  FpRound = 1'bx;
+              end
+          endcase
+  end else if (`FPSIZES == 4) begin
+      always_comb
+          case (OutFmt)
+              2'h3: begin
+                  FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1];
+                  FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF];
+                  FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
+              end
+              2'h1: begin
+                  FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1];
+                  FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF];
+                  FpRound = Mf[`CORRSHIFTSZ-`D_NF-2];
+              end
+              2'h0: begin
+                  FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1];
+                  FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF];
+                  FpRound = Mf[`CORRSHIFTSZ-`S_NF-2];
+              end
+              2'h2: begin
+                  FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1];
+                  FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF];
+                  FpRound = Mf[`CORRSHIFTSZ-`H_NF-2];
+              end
+          endcase
+  end
+
+  /*assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard;
+  assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes;
+  assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound;*/
+  
+  assign Guard =  FpGuard;
+  assign LsbRes = FpLsbRes;
+  assign Round =  FpRound;
+
+
+  always_comb begin
+      // Determine if you add 1
+      case (Frm)
+          3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
+          3'b001: CalcPlus1 = 0;//round to zero
+          3'b010: CalcPlus1 = Ms;//round down
+          3'b011: CalcPlus1 = ~Ms;//round up
+          3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
+          default: CalcPlus1 = 1'bx;
+      endcase
+      // Determine if you add 1 (for underflow flag)
+      case (Frm)
+          3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
+          3'b001: UfCalcPlus1 = 0;//round to zero
+          3'b010: UfCalcPlus1 = Ms;//round down
+          3'b011: UfCalcPlus1 = ~Ms;//round up
+          3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
+          default: UfCalcPlus1 = 1'bx;
+      endcase
+  
+  end
+
+  // If an answer is exact don't round
+  assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
+  //assign FpPlus1 = Plus1&~(ToInt&CvtOp);
+  assign FpPlus1 = Plus1;
+  assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
+
+
+
+
+  // place Plus1 into the proper position for the format
+  if (`FPSIZES == 1) begin
+      assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
+
+  end else if (`FPSIZES == 2) begin
+      // \/FLEN+1
+      //  | NE+2 |        NF      |
+      //  '-NE+2-^----NF1----^
+      // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
+      assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
+
+  end else if (`FPSIZES == 3) begin
+      assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
+
+  end else if (`FPSIZES == 4)      
+      assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
+
+
+
+  // trim unneeded bits from fraction
+  assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
+  
+
+
+  // select the exponent
+  assign Me = Qe;
+  /*always_comb
+      case(PostProcSel)
+          2'b10: Me = FmaMe; // fma
+          2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
+          // 2'b01: Me = DivDone ? Qe : '0; // divide
+          2'b01: Me = Qe; // divide
+          default: Me = '0; 
+      endcase*/
+
+
+
+  // round the result
+  //      - if the fraction overflows one should be added to the exponent
+  assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
+  assign Re = FullRe[`NE-1:0];
+
+
+endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtroundsign.sv b/src/fpu/divremsqrt/divremsqrtroundsign.sv
new file mode 100644
index 000000000..87b72ba48
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtroundsign.sv
@@ -0,0 +1,46 @@
+///////////////////////////////////////////
+// divremsqrtroundsign.sv
+//
+// Written: kekim@hmc.edu,me@KatherineParry.com
+// Modified: 19 May 2023
+//
+// Purpose: Sign calculation for rounding
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+`include "wally-config.vh"
+
+module roundsign(
+  input logic         Xs,     // x sign
+  input logic         Ys,     // y sign
+  input logic         Sqrt,   // sqrt oppertion? (when using divsqrt unit)
+  input logic         DivOp,  // is divsqrt opperation
+  output logic        Ms      // normalized result sign
+);
+
+  logic               Qs;     // divsqrt result sign
+
+  // calculate divsqrt sign
+  assign Qs = Xs^(Ys&~Sqrt);
+
+  // Select sign for rounding calulation
+  assign Ms = (Qs&DivOp);
+
+endmodule
\ No newline at end of file
diff --git a/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
new file mode 100644
index 000000000..da21e928b
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
@@ -0,0 +1,93 @@
+///////////////////////////////////////////
+// divremsqrtshiftcorrection.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: shift correction
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module divremsqrtshiftcorrection(
+  input logic  [`NORMSHIFTSZ-1:0] Shifted,                // the shifted sum before LZA correction
+  // divsqrt
+  input logic                     DivOp,                  // is it a divsqrt opperation
+  input logic                     DivResSubnorm,          // is the divsqrt result subnormal
+  input logic  [`NE+1:0]          DivQe,                  // the divsqrt result's exponent
+  input logic                     DivSubnormShiftPos,     // is the subnorm divider shift amount positive (ie not underflowed)
+  //fma
+  //input logic                     FmaOp,                  // is it an fma opperation
+  //input logic  [`NE+1:0]          NormSumExp,             // exponent of the normalized sum not taking into account Subnormal or zero results
+  //input logic                     FmaPreResultSubnorm,    // is the result subnormal - calculated before LZA corection
+  //input logic                     FmaSZero,
+  // output
+  //output logic [`NE+1:0]          FmaMe,                  // exponent of the normalized sum
+  output logic [`CORRSHIFTSZ-1:0] Mf,                     // the shifted sum before LZA correction
+  output logic [`NE+1:0]          Qe                      // corrected exponent for divider
+);
+
+  logic [3*`NF+3:0]           CorrSumShifted;             // the shifted sum after LZA correction
+  logic [`CORRSHIFTSZ-1:0]    CorrQm0, CorrQm1;           // portions of Shifted to select for CorrQmShifted
+  logic [`CORRSHIFTSZ-1:0]    CorrQmShifted;              // the shifted divsqrt result after one bit shift
+  logic                       ResSubnorm;                 // is the result Subnormal
+  logic                       LZAPlus1;                   // add one or two to the sum's exponent due to LZA correction
+  logic                       LeftShiftQm;                // should the divsqrt result be shifted one to the left
+
+  // LZA correction
+  assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1];
+
+  // correct the shifting error caused by the LZA
+  //  - the only possible mantissa for a plus two is all zeroes 
+  //      - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
+  mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
+
+  // correct the shifting of the divsqrt caused by producing a result in (2, .5] range
+  //    condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
+  assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
+  assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
+  assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1];
+  mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
+  
+  // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
+  always_comb
+    //if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}};
+    if (DivOp&~DivResSubnorm)  Mf = CorrQmShifted;
+    else                       Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    
+  // Determine sum's exponent
+  //  main exponent issues: 
+  //      - LZA was one too large
+  //      - LZA was two too large
+  //      - if the result was calulated to be subnorm but it's norm and the LZA was off by 1
+  //      - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
+  //                          if plus1                    If plus2                               kill if the result Zero or actually subnormal
+  //                          |                           |                                      |
+  //assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}};
+  
+  // recalculate if the result is subnormal after LZA correction
+  //assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1];
+
+  // the quotent is in the range [.5,2) if there is no early termination
+  // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
+  assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1};
+endmodule
\ No newline at end of file

From 159a994475d04a89984dcf3eab1a0eb72fc2b825 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Fri, 19 May 2023 14:42:52 -0700
Subject: [PATCH 02/40] divremsqrtflags first pass

---
 src/fpu/divremsqrt/divremsqrtflags.sv | 184 ++++++++++++++++++++++++++
 1 file changed, 184 insertions(+)
 create mode 100644 src/fpu/divremsqrt/divremsqrtflags.sv

diff --git a/src/fpu/divremsqrt/divremsqrtflags.sv b/src/fpu/divremsqrt/divremsqrtflags.sv
new file mode 100644
index 000000000..7924e0624
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtflags.sv
@@ -0,0 +1,184 @@
+
+///////////////////////////////////////////
+// flags.sv
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Post-Processing flag calculation
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+`include "wally-config.vh"
+
+module flags(
+  input  logic                Xs,                     // X sign
+  input  logic [`FMTBITS-1:0] OutFmt,                 // output format
+  input  logic                InfIn,                  // is a Inf input being used
+  input  logic                XInf, YInf, ZInf,       // inputs are infinity
+  input  logic                NaNIn,                  // is a NaN input being used
+  input  logic                XSNaN, YSNaN, ZSNaN,    // inputs are signaling NaNs
+  input  logic                XZero, YZero,           // inputs are zero
+  input  logic [`NE+1:0]      FullRe,                 // Re with bits to determine sign and overflow
+  input  logic [`NE+1:0]      Me,                     // exponent of the normalized sum
+  // rounding
+  input  logic                Plus1,                  // do you add one for rounding
+  input  logic                Round, Guard, Sticky,   // bits used to determine rounding
+  input  logic                UfPlus1,                // do you add one for rounding for the unbounded exponent result
+  // divsqrt
+  input  logic                DivOp,                  // conversion opperation?
+  input  logic                Sqrt,                   // Sqrt?
+  // flags
+  output logic                DivByZero,              // divide by zero flag
+  output logic                Overflow,               // overflow flag to select result
+  output logic                Invalid,                // invalid flag to select the result
+  output logic                IntInvalid,             // invalid integer result to select
+  output logic [4:0]          PostProcFlg             // flags
+);
+
+  logic               SigNaN;         // is an input a signaling NaN
+  logic               Inexact;        // final inexact flag
+  logic               FpInexact;      // floating point inexact flag
+  logic               DivInvalid;     // integer invalid flag
+  logic               Underflow;      // Underflow flag
+  logic               ResExpGteMax;   // is the result greater than or equal to the maximum floating point expoent
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Overflow
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // determine if the result exponent is greater than or equal to the maximum exponent or 
+  // the shift amount is greater than the integers size (for cvt to int)
+  // ShiftGtIntSz calculation:  
+  //      a left shift of intlen+1 is still in range but any more than that is an overflow
+  //              inital: |      64 0's         |    XLEN     |
+  //                      |      64 0's         |    XLEN     | << 64
+  //                      |      XLEN           |    00000... |
+  //      65 = ...0 0 0 0   0 1 0 0   0 0 0 1
+  //          |     or      | |     or      |
+  //      33 = ...0 0 0 0   0 0 1 0   0 0 0 1
+  //          |     or        | |     or    |
+  //      larger or equal if:
+  //          - any of the bits after the most significan 1 is one
+  //          - the most signifcant in 65 or 33 is still a one in the number and
+  //            one of the later bits is one
+  if (`FPSIZES == 1) begin
+      assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
+
+  end else if (`FPSIZES == 2) begin    
+      assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
+
+  end else if (`FPSIZES == 3) begin
+      always_comb
+          case (OutFmt)
+              `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
+              `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
+              `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
+              default: ResExpGteMax = 1'bx;
+          endcase
+
+  end else if (`FPSIZES == 4) begin        
+      always_comb
+          case (OutFmt)
+              `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
+              `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
+              `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
+              `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
+          endcase
+  end
+
+
+  // calulate overflow flag:
+  //                 if the result is greater than or equal to the max exponent(not taking into account sign)
+  //                 |           and the exponent isn't negitive
+  //                 |           |                   if the input isnt infinity or NaN
+  //                 |           |                   |            
+  assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Underflow
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // calculate underflow flag: detecting tininess after rounding
+  //                  the exponent is negitive
+  //                  |                    the result is subnormal
+  //                  |                    |                    the result is normal and rounded from a Subnorm
+  //                  |                    |                    |                                      and if given an unbounded exponent the result does not round
+  //                  |                    |                    |                                      |                     and if the result is not exact
+  //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
+  //                  |                    |                    |                                      |                     |               |
+  assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
+
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Inexact
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
+  //      - Don't set the underflow flag if an underflowed res isn't outputed
+  assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid);
+  //assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);
+
+  //                  if the res is too small to be represented and not 0
+  //                  |                                     and if the res is not invalid (outside the integer bounds)
+  //                  |                                     |
+
+  // select the inexact flag to output
+  assign Inexact = FpInexact;
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Invalid
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // Set Invalid flag for following cases:
+  //   1) any input is a signaling NaN
+  //   2) Inf - Inf (unless x or y is NaN)
+  //   3) 0 * Inf
+
+  
+  assign SigNaN = (XSNaN) | (YSNaN) ;
+  
+  //invalid flag for division
+  assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);
+
+  assign Invalid = SigNaN | (DivInvalid&DivOp);
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // Divide by Zero
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // if dividing by zero and not 0/0
+  //  - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
+  assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);  
+
+
+  ///////////////////////////////////////////////////////////////////////////////
+  // final flags
+  ///////////////////////////////////////////////////////////////////////////////
+
+  // Combine flags
+  //      - to integer results do not set the underflow or overflow flags
+  assign PostProcFlg = {Invalid, DivByZero, Overflow, Underflow, Inexact};
+
+endmodule
+
+
+
+

From 74b5fe1f6bc5f8632859ddab38c4fe7bc35540d5 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Fri, 19 May 2023 15:37:29 -0700
Subject: [PATCH 03/40] started special case

---
 src/fpu/divremsqrt/divremsqrtpostprocess.sv |  33 +--
 src/fpu/divremsqrt/divremsqrtround.sv       |  85 ++----
 src/fpu/divremsqrt/divremsqrtspecialcase.sv | 308 ++++++++++++++++++++
 3 files changed, 339 insertions(+), 87 deletions(-)
 create mode 100644 src/fpu/divremsqrt/divremsqrtspecialcase.sv

diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
index 698e38a3a..d0e5a618b 100644
--- a/src/fpu/divremsqrt/divremsqrtpostprocess.sv
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -87,7 +87,6 @@ module divremsqrtpostprocess (
   logic [`NE+1:0]             FmaMe;      // exponent of the normalized sum
   logic                       FmaSZero;   // is the sum zero
   logic [3*`NF+5:0]           FmaShiftIn; // fma shift input
-  logic [`NE+1:0]             NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
   logic                       FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
   logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
   // division singals
@@ -147,35 +146,10 @@ module divremsqrtpostprocess (
   /*cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,  
       .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);*/
 
-  /*fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
-      .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);*/
-
   divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
 
   assign ShiftAmt = DivShiftAmt;
   assign ShiftIn = DivShiftIn;
-  /*
-  // select which unit's output to shift
-  always_comb
-    case(PostProcSel)
-      2'b10: begin // fma
-        ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt};
-        ShiftIn =  {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}};
-      end
-      2'b00: begin // cvt
-        ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt};
-        ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
-      end
-      2'b01: begin //divsqrt
-        ShiftAmt = DivShiftAmt;
-        ShiftIn =  DivShiftIn;
-      end
-      default: begin 
-        ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}}; 
-        ShiftIn = {`NORMSHIFTSZ{1'bx}}; 
-      end
-    endcase
-  */
   
   // main normalization shift
   normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
@@ -196,7 +170,7 @@ module divremsqrtpostprocess (
   // calulate result sign used in rounding unit
   divremsqrtroundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
 
-  round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
+  divremsqrtround round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
       .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt,  .CvtResUf,
       .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
 
@@ -206,12 +180,13 @@ module divremsqrtpostprocess (
 
   /*resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard,
       .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs);*/
+  assign Rs = Ms;
 
   ///////////////////////////////////////////////////////////////////////////////
   // Flags
   ///////////////////////////////////////////////////////////////////////////////
 
-  flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
+  divremsqrtflags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
               .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
               .NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero,
               .Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
@@ -221,7 +196,7 @@ module divremsqrtpostprocess (
   // Select the result
   ///////////////////////////////////////////////////////////////////////////////
 
-  negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
+  //negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
 
   specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
       .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, 
diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv
index 396948915..004a6694d 100644
--- a/src/fpu/divremsqrt/divremsqrtround.sv
+++ b/src/fpu/divremsqrt/divremsqrtround.sv
@@ -1,7 +1,7 @@
 ///////////////////////////////////////////
 // divremsqrtround.sv
 //
-// Written: kekim@hmc.edu
+// Written: kekim@hmc.edu, me@KatherineParry.com
 // Modified: 19 May 2023
 //
 // Purpose: Rounder
@@ -39,24 +39,12 @@
 module round(
   input  logic [`FMTBITS-1:0]     OutFmt,             // output format
   input  logic [2:0]              Frm,                // rounding mode
-  //input  logic [1:0]              PostProcSel,        // select the postprocessor output
   input  logic                    Ms,                 // normalized sign
   input  logic [`CORRSHIFTSZ-1:0] Mf,                 // normalized fraction
-  // fma
-  //input  logic                    FmaOp,              // is an fma opperation being done?
-  //input  logic [`NE+1:0]          FmaMe,              // exponent of the normalized sum for fma
-  //input  logic                    FmaASticky,         // addend's sticky bit
-
   // divsqrt
-  //input  logic                    DivOp,              // is a division opperation being done
+  input  logic                    DivOp,              // is a division opperation being done
   input  logic                    DivSticky,          // divsqrt sticky bit
   input  logic [`NE+1:0]          Qe,                 // the divsqrt calculated expoent
-  // cvt
-  input  logic                    CvtOp,              // is a convert opperation being done
-  input  logic                    ToInt,              // is the cvt op a cvt to integer
-  input  logic                    CvtResSubnormUf,    // is the cvt result subnormal or underflow
-  input  logic                    CvtResUf,           // does the cvt result underflow
-  input  logic [`NE:0]            CvtCe,              // the cvt calculated expoent
   // outputs
   output logic [`NE+1:0]          Me,                 // normalied fraction
   output logic                    UfPlus1,            // do you add one to the result if given an unbounded exponent
@@ -71,8 +59,6 @@ module round(
   logic           UfCalcPlus1;        // calculated plus one for unbounded exponent
   logic           NormSticky;         // normalized sum's sticky bit
   logic [`NF-1:0] RoundFrac;          // rounded fraction
-  logic           FpRes;              // is the result a floating point
-  logic           IntRes;             // is the result an integer
   logic           FpGuard, FpRound;   // floating point round/guard bits
   logic           FpLsbRes;           // least significant bit of floating point result
   logic           LsbRes;             // lsb of result
@@ -112,8 +98,6 @@ module round(
 
 
   // determine what format the final result is in: int or fp
-  assign IntRes = ToInt;
-  assign FpRes = ~IntRes;
 
   // sticky bit calculation
   if (`FPSIZES == 1) begin
@@ -123,60 +107,58 @@ module round(
       //      |    NF     |1|1|
       //                     ^    ^ if floating point result
       //                     ^ if not an FMA result
-      if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+      if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]) |
                                                 (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
       //     2: NF > XLEN
-      if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:0]);
 
   end else if (`FPSIZES == 2) begin
       // XLEN is either 64 or 32
       // so half and single are always smaller then XLEN
 
       // 1: XLEN > NF   > NF1
-      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&~OutFmt) |
+                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]) |
                                                 (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
       // 2: NF   > XLEN > NF1
-      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
+      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&~OutFmt) | 
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt)) |
                                                 (|Mf[`CORRSHIFTSZ-`NF-2:0]);
       // 3: NF   > NF1  > XLEN
-      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
+      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt)) |
                                                 (|Mf[`CORRSHIFTSZ-`NF-2:0]);
 
   end else if (`FPSIZES == 3) begin
       // 1: XLEN > NF   > NF1
-      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&(OutFmt==`FMT1)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&~(OutFmt==`FMT)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]) |
                                                 (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
       // 2: NF   > XLEN > NF1
-      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
+      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&(OutFmt==`FMT1)) |
+                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&~(OutFmt==`FMT)) | 
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT))) |
                                                 (|Mf[`CORRSHIFTSZ-`NF-2:0]);
       // 3: NF   > NF1  > XLEN
-      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
+      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&(OutFmt==`FMT1)) |
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1))) |
+                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT))) |
                                                 (|Mf[`CORRSHIFTSZ-`NF-2:0]);
 
   end else if (`FPSIZES == 4) begin
       // Quad precision will always be greater than XLEN
       // 2: NF   > XLEN > NF1
-      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
-                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&(OutFmt==`H_FMT)) |
+                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
+                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&~(OutFmt==`Q_FMT)) | 
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT))) |
                                                 (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
       // 3: NF   > NF1  > XLEN
       // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
-      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
-                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&(OutFmt==`H_FMT)) |
+                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
+                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
+                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT))) |
                                                 (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
 
   end
@@ -185,8 +167,7 @@ module round(
 
   // only add the Addend sticky if doing an FMA opperation
   //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
-  //assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp;
-  assign Sticky = DivSticky;
+  assign Sticky = DivSticky&DivOp;
   
 
 
@@ -253,9 +234,6 @@ module round(
           endcase
   end
 
-  /*assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard;
-  assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes;
-  assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound;*/
   
   assign Guard =  FpGuard;
   assign LsbRes = FpLsbRes;
@@ -286,7 +264,6 @@ module round(
 
   // If an answer is exact don't round
   assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
-  //assign FpPlus1 = Plus1&~(ToInt&CvtOp);
   assign FpPlus1 = Plus1;
   assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
 
@@ -319,14 +296,6 @@ module round(
 
   // select the exponent
   assign Me = Qe;
-  /*always_comb
-      case(PostProcSel)
-          2'b10: Me = FmaMe; // fma
-          2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
-          // 2'b01: Me = DivDone ? Qe : '0; // divide
-          2'b01: Me = Qe; // divide
-          default: Me = '0; 
-      endcase*/
 
 
 
diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
new file mode 100644
index 000000000..1d0a7193f
--- /dev/null
+++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
@@ -0,0 +1,308 @@
+///////////////////////////////////////////
+// divremsqrtspecialcase.sv
+//
+// Written: kekim@hmc.edu,me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: special case selection
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module specialcase(
+  input  logic                Xs,         // X sign
+  input  logic [`NF:0]        Xm, Ym, Zm, // input significand's
+  input  logic                XNaN, YNaN, ZNaN, // are the inputs NaN
+  input  logic [2:0]          Frm,        // rounding mode
+  input  logic [`FMTBITS-1:0] OutFmt,     // output format
+  input  logic                InfIn,      // are any inputs infinity
+  input  logic                NaNIn,      // are any input NaNs
+  input  logic                XInf, YInf, // are X or Y inifnity
+  input  logic                XZero,      // is X zero
+  input  logic                Plus1,      // do you add one for rounding
+  input  logic                Rs,         // the result's sign
+  input  logic                Invalid, Overflow,  // flags to choose the result
+  input  logic [`NE-1:0]      Re,         // Result exponent
+  input  logic [`NE+1:0]      FullRe,     // Result full exponent
+  input  logic [`NF-1:0]      Rf,         // Result fraction
+  // fma
+  input  logic                FmaOp,      // is it a fma opperation
+  // divsqrt
+  input  logic                DivOp,      // is it a divsqrt opperation
+  input  logic                DivByZero,  // divide by zero flag
+  // cvt
+  input  logic                CvtOp,      // is it a conversion opperation
+  input  logic                IntZero,    // is the integer input zero
+  input  logic                IntToFp,    // is cvt int -> fp opperation
+  input  logic                Int64,      // is the integer 64 bits
+  input  logic                Signed,     // is the integer signed
+  input  logic [`NE:0]        CvtCe,      // the calculated expoent for cvt
+  input  logic                IntInvalid, // integer invalid flag to choose the result
+  input  logic                CvtResUf,   // does the convert result underflow
+  input  logic [`XLEN+1:0]    CvtNegRes,  // the possibly negated of the integer result
+  // outputs
+  output logic [`FLEN-1:0]    PostProcRes,// final result
+  output logic [`XLEN-1:0]    FCvtIntRes  // final integer result
+);
+
+  logic [`FLEN-1:0]   XNaNRes;    // X is NaN result
+  logic [`FLEN-1:0]   YNaNRes;    // Y is NaN result
+  logic [`FLEN-1:0]   ZNaNRes;    // Z is NaN result
+  logic [`FLEN-1:0]   InvalidRes; // Invalid result result
+  logic [`FLEN-1:0]   UfRes;      // underflowed result result
+  logic [`FLEN-1:0]   OfRes;      // overflowed result result
+  logic [`FLEN-1:0]   NormRes;    // normal result
+  logic [`XLEN-1:0]   OfIntRes;   // the overflow result for integer output
+  logic               OfResMax;   // does the of result output maximum norm fp number
+  logic               KillRes;    // kill the result for underflow
+  logic               SelOfRes;   // should the overflow result be selected
+
+
+  // does the overflow result output the maximum normalized floating point number
+  //                output infinity if the input is infinity
+  assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
+
+  // select correct outputs for special cases
+  if (`FPSIZES == 1) begin
+      //NaN res selection depending on standard
+      if(`IEEE754) begin
+          assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
+          assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
+          assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
+          assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+      end else begin
+          assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+      end
+
+      assign OfRes =  OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
+      assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
+      assign NormRes = {Rs, Re, Rf};
+
+  end else if (`FPSIZES == 2) begin
+      if(`IEEE754) begin
+          assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
+          assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
+          assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
+          assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+      end else begin 
+          assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+      end
+
+      always_comb
+          if(OutFmt)
+              if(OfResMax)    OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
+              else            OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
+          else
+              if(OfResMax)    OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
+              else            OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
+      assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+      assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
+
+  end else if (`FPSIZES == 3) begin
+      always_comb
+          case (OutFmt)
+              `FMT: begin  
+                  if(`IEEE754) begin
+                      XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
+                      YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
+                      ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
+                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                  end else begin 
+                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                  end
+                  
+                  OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
+                  UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {Rs, Re, Rf};
+              end
+              `FMT1: begin  
+                  if(`IEEE754) begin
+                      XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
+                      YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
+                      ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
+                      InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                  end
+                  OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
+                  UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
+              end
+              `FMT2: begin  
+                  if(`IEEE754) begin
+                      XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
+                      YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
+                      ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
+                      InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                  end
+                  
+                  OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)};
+                  UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
+              end
+              default: begin
+                  if(`IEEE754) begin
+                      XNaNRes = (`FLEN)'(0);
+                      YNaNRes = (`FLEN)'(0);
+                      ZNaNRes = (`FLEN)'(0);
+                      InvalidRes = (`FLEN)'(0);
+                  end else begin 
+                      InvalidRes = (`FLEN)'(0);
+                  end
+                  OfRes = (`FLEN)'(0);
+                  UfRes = (`FLEN)'(0);
+                  NormRes = (`FLEN)'(0);
+              end
+          endcase
+
+  end else if (`FPSIZES == 4) begin 
+      always_comb
+          case (OutFmt)
+              2'h3: begin  
+                  if(`IEEE754) begin
+                      XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
+                      YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
+                      ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
+                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                  end else begin 
+                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                  end
+                  
+                  OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
+                  UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {Rs, Re, Rf};
+              end
+              2'h1: begin  
+                  if(`IEEE754) begin
+                      XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
+                      YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
+                      ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
+                      InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                  end
+                  OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)};
+                  UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
+              end
+              2'h0: begin  
+                  if(`IEEE754) begin
+                      XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
+                      YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
+                      ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
+                      InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                  end
+                  
+                  OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)};
+                  UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
+              end
+              2'h2: begin  
+                  if(`IEEE754) begin
+                      XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
+                      YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
+                      ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
+                      InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                  end else begin 
+                      InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                  end
+                  
+                  OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)};      
+                // zero is exact if dividing by infinity so don't add 1
+                  UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
+              end
+          endcase
+  end
+
+  // determine if you shoould kill the res - Cvt
+  //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
+  //      - dont set to zero if fp input is zero but not using the fp input
+  //      - dont set to zero if int input is zero but not using the int input
+  assign KillRes = FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
+  
+  // calculate if the overflow result should be selected
+  assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
+  
+  // output infinity with result sign if divide by zero
+  if(`IEEE754)
+    always_comb
+      if(XNaN)   PostProcRes = XNaNRes;
+      else if(YNaN)        PostProcRes = YNaNRes;
+      else if(Invalid)            PostProcRes = InvalidRes;
+      else if(SelOfRes)           PostProcRes = OfRes;
+      else if(KillRes)            PostProcRes = UfRes;
+      else                        PostProcRes = NormRes;
+  else
+    always_comb
+      if(NaNIn|Invalid)           PostProcRes = InvalidRes;
+      else if(SelOfRes)           PostProcRes = OfRes;
+      else if(KillRes)            PostProcRes = UfRes;
+      else                        PostProcRes = NormRes;
+
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // integer result selection        
+  ///////////////////////////////////////////////////////////////////////////////////////        
+
+  // select the overflow integer res
+  //      - negitive infinity and out of range negitive input
+  //                 |  int  |  long  |
+  //          signed | -2^31 | -2^63  |
+  //        unsigned |   0   |    0   |
+  //
+  //      - positive infinity and out of range positive input and NaNs
+  //                 |   int  |  long  |
+  //          signed | 2^31-1 | 2^63-1 |
+  //        unsigned | 2^32-1 | 2^64-1 |
+  //
+  //      other: 32 bit unsinged res should be sign extended as if it were a signed number
+  always_comb
+    if(Signed)
+      if(Xs&~NaNIn) // signed negitive
+        if(Int64)   OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
+        else        OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
+      else          // signed positive
+        if(Int64)   OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
+        else        OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
+    else
+      if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
+      else          OfIntRes = {`XLEN{1'b1}}; // unsigned positive
+
+
+  // select the integer output
+  //      - if the input is invalid (out of bounds NaN or Inf) then output overflow res
+  //      - if the input underflows
+  //          - if rounding and signed opperation and negitive input, output -1
+  //          - otherwise output a rounded 0
+  //      - otherwise output the normal res (trmined and sign extended if nessisary)
+  always_comb
+    if(IntInvalid)          FCvtIntRes = OfIntRes;
+    else if(CvtCe[`NE]) 
+      if(Xs&Signed&Plus1)   FCvtIntRes = {{`XLEN{1'b1}}};
+      else                  FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
+    else if(Int64)          FCvtIntRes = CvtNegRes[`XLEN-1:0];
+    else                    FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
+endmodule
\ No newline at end of file

From a43ce924784b078881e0a945a1daaddc33877280 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Fri, 19 May 2023 15:48:15 -0700
Subject: [PATCH 04/40] special case first pass

---
 src/fpu/divremsqrt/divremsqrtspecialcase.sv | 60 +--------------------
 1 file changed, 2 insertions(+), 58 deletions(-)

diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
index 1d0a7193f..ade2ccf3e 100644
--- a/src/fpu/divremsqrt/divremsqrtspecialcase.sv
+++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
@@ -44,24 +44,11 @@ module specialcase(
   input  logic [`NE-1:0]      Re,         // Result exponent
   input  logic [`NE+1:0]      FullRe,     // Result full exponent
   input  logic [`NF-1:0]      Rf,         // Result fraction
-  // fma
-  input  logic                FmaOp,      // is it a fma opperation
   // divsqrt
   input  logic                DivOp,      // is it a divsqrt opperation
   input  logic                DivByZero,  // divide by zero flag
-  // cvt
-  input  logic                CvtOp,      // is it a conversion opperation
-  input  logic                IntZero,    // is the integer input zero
-  input  logic                IntToFp,    // is cvt int -> fp opperation
-  input  logic                Int64,      // is the integer 64 bits
-  input  logic                Signed,     // is the integer signed
-  input  logic [`NE:0]        CvtCe,      // the calculated expoent for cvt
-  input  logic                IntInvalid, // integer invalid flag to choose the result
-  input  logic                CvtResUf,   // does the convert result underflow
-  input  logic [`XLEN+1:0]    CvtNegRes,  // the possibly negated of the integer result
   // outputs
   output logic [`FLEN-1:0]    PostProcRes,// final result
-  output logic [`XLEN-1:0]    FCvtIntRes  // final integer result
 );
 
   logic [`FLEN-1:0]   XNaNRes;    // X is NaN result
@@ -71,7 +58,6 @@ module specialcase(
   logic [`FLEN-1:0]   UfRes;      // underflowed result result
   logic [`FLEN-1:0]   OfRes;      // overflowed result result
   logic [`FLEN-1:0]   NormRes;    // normal result
-  logic [`XLEN-1:0]   OfIntRes;   // the overflow result for integer output
   logic               OfResMax;   // does the of result output maximum norm fp number
   logic               KillRes;    // kill the result for underflow
   logic               SelOfRes;   // should the overflow result be selected
@@ -250,8 +236,8 @@ module specialcase(
   // output infinity with result sign if divide by zero
   if(`IEEE754)
     always_comb
-      if(XNaN)   PostProcRes = XNaNRes;
-      else if(YNaN)        PostProcRes = YNaNRes;
+      if(XNaN)                    PostProcRes = XNaNRes;
+      else if(YNaN)               PostProcRes = YNaNRes;
       else if(Invalid)            PostProcRes = InvalidRes;
       else if(SelOfRes)           PostProcRes = OfRes;
       else if(KillRes)            PostProcRes = UfRes;
@@ -263,46 +249,4 @@ module specialcase(
       else if(KillRes)            PostProcRes = UfRes;
       else                        PostProcRes = NormRes;
 
-  ///////////////////////////////////////////////////////////////////////////////////////
-  // integer result selection        
-  ///////////////////////////////////////////////////////////////////////////////////////        
-
-  // select the overflow integer res
-  //      - negitive infinity and out of range negitive input
-  //                 |  int  |  long  |
-  //          signed | -2^31 | -2^63  |
-  //        unsigned |   0   |    0   |
-  //
-  //      - positive infinity and out of range positive input and NaNs
-  //                 |   int  |  long  |
-  //          signed | 2^31-1 | 2^63-1 |
-  //        unsigned | 2^32-1 | 2^64-1 |
-  //
-  //      other: 32 bit unsinged res should be sign extended as if it were a signed number
-  always_comb
-    if(Signed)
-      if(Xs&~NaNIn) // signed negitive
-        if(Int64)   OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
-        else        OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
-      else          // signed positive
-        if(Int64)   OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
-        else        OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
-    else
-      if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
-      else          OfIntRes = {`XLEN{1'b1}}; // unsigned positive
-
-
-  // select the integer output
-  //      - if the input is invalid (out of bounds NaN or Inf) then output overflow res
-  //      - if the input underflows
-  //          - if rounding and signed opperation and negitive input, output -1
-  //          - otherwise output a rounded 0
-  //      - otherwise output the normal res (trmined and sign extended if nessisary)
-  always_comb
-    if(IntInvalid)          FCvtIntRes = OfIntRes;
-    else if(CvtCe[`NE]) 
-      if(Xs&Signed&Plus1)   FCvtIntRes = {{`XLEN{1'b1}}};
-      else                  FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
-    else if(Int64)          FCvtIntRes = CvtNegRes[`XLEN-1:0];
-    else                    FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
 endmodule
\ No newline at end of file

From ab4fbcdf79eefcf5a834fa270fa7d28e7e211f05 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 21 May 2023 13:21:57 -0700
Subject: [PATCH 05/40] added top-level dsru.sv (divsqrtrem unit)

---
 src/fpu/divremsqrt/divremsqrt.sv            |  2 +-
 src/fpu/divremsqrt/divremsqrtflags.sv       |  7 +-
 src/fpu/divremsqrt/divremsqrtpostprocess.sv | 60 ++++---------
 src/fpu/divremsqrt/divremsqrtspecialcase.sv | 15 +---
 src/fpu/divremsqrt/drsu.sv                  | 95 +++++++++++++++++++++
 5 files changed, 119 insertions(+), 60 deletions(-)
 create mode 100644 src/fpu/divremsqrt/drsu.sv

diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv
index 3ca85cfb4..caaa45ab2 100644
--- a/src/fpu/divremsqrt/divremsqrt.sv
+++ b/src/fpu/divremsqrt/divremsqrt.sv
@@ -28,7 +28,7 @@
 
 `include "wally-config.vh"
 
-module fdivsqrt(
+module divremsqrt(
   input  logic                clk, 
   input  logic                reset, 
   input  logic [`FMTBITS-1:0] FmtE,
diff --git a/src/fpu/divremsqrt/divremsqrtflags.sv b/src/fpu/divremsqrt/divremsqrtflags.sv
index 7924e0624..c2f0f356c 100644
--- a/src/fpu/divremsqrt/divremsqrtflags.sv
+++ b/src/fpu/divremsqrt/divremsqrtflags.sv
@@ -28,13 +28,13 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"
 
-module flags(
+module divremsqrtflags(
   input  logic                Xs,                     // X sign
   input  logic [`FMTBITS-1:0] OutFmt,                 // output format
   input  logic                InfIn,                  // is a Inf input being used
-  input  logic                XInf, YInf, ZInf,       // inputs are infinity
+  input  logic                XInf, YInf,             // inputs are infinity
   input  logic                NaNIn,                  // is a NaN input being used
-  input  logic                XSNaN, YSNaN, ZSNaN,    // inputs are signaling NaNs
+  input  logic                XSNaN, YSNaN,           // inputs are signaling NaNs
   input  logic                XZero, YZero,           // inputs are zero
   input  logic [`NE+1:0]      FullRe,                 // Re with bits to determine sign and overflow
   input  logic [`NE+1:0]      Me,                     // exponent of the normalized sum
@@ -49,7 +49,6 @@ module flags(
   output logic                DivByZero,              // divide by zero flag
   output logic                Overflow,               // overflow flag to select result
   output logic                Invalid,                // invalid flag to select the result
-  output logic                IntInvalid,             // invalid integer result to select
   output logic [4:0]          PostProcFlg             // flags
 );
 
diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
index d0e5a618b..c210a4817 100644
--- a/src/fpu/divremsqrt/divremsqrtpostprocess.sv
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -31,39 +31,23 @@
 module divremsqrtpostprocess (
   // general signals
   input logic                             Xs, Ys,     // input signs
-  input logic  [`NF:0]                    Xm, Ym, Zm, // input mantissas
+  input logic  [`NF:0]                    Xm, Ym,     // input mantissas
   input logic  [2:0]                      Frm,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
   input logic  [`FMTBITS-1:0]             Fmt,        // precision 1 = double 0 = single
   input logic  [2:0]                      OpCtrl,     // choose which opperation (look below for values)
   input logic                             XZero, YZero,        // inputs are zero
-  input logic                             XInf, YInf, ZInf,    // inputs are infinity
-  input logic                             XNaN, YNaN, ZNaN,    // inputs are NaN
-  input logic                             XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs
+  input logic                             XInf, YInf,          // inputs are infinity
+  input logic                             XNaN, YNaN,          // inputs are NaN
+  input logic                             XSNaN, YSNaN,        // inputs are signaling NaNs
   input logic  [1:0]                      PostProcSel,         // select result to be written to fp register
   //fma signals
-  input logic                             FmaAs,      // the modified Z sign - depends on instruction
-  input logic                             FmaPs,      // the product's sign
-  input logic                             FmaSs,      // Sum sign
-  input logic  [`NE+1:0]                  FmaSe,      // the sum's exponent
-  input logic  [3*`NF+3:0]                FmaSm,      // the positive sum
-  input logic                             FmaASticky, // sticky bit that is calculated during alignment
-  input logic  [$clog2(3*`NF+5)-1:0]      FmaSCnt,    // the normalization shift count
   //divide signals
   input logic                             DivSticky,  // divider sticky bit
   input logic  [`NE+1:0]                  DivQe,      // divsqrt exponent
   input logic  [`DIVb:0]                  DivQm,      // divsqrt significand
-  // conversion signals
-  input logic                             CvtCs,      // the result's sign
-  input logic  [`NE:0]                    CvtCe,      // the calculated expoent
-  input logic                             CvtResSubnormUf, // the convert result is subnormal or underflows
-  input logic  [`LOGCVTLEN-1:0]           CvtShiftAmt,// how much to shift by
-  input logic                             ToInt,      // is fp->int (since it's writting to the integer register)
-  input logic  [`CVTLEN-1:0]              CvtLzcIn,   // input to the Leading Zero Counter (without msb)
-  input logic                             IntZero,    // is the integer input zero
   // final results
   output logic [`FLEN-1:0]                PostProcRes,// postprocessor final result
   output logic [4:0]                      PostProcFlg,// postprocesser flags
-  output logic [`XLEN-1:0]                FCvtIntRes  // the integer conversion result
   );
   
   // general signals
@@ -83,12 +67,6 @@ module divremsqrtpostprocess (
   logic                       Invalid;    // invalid flag used to select results
   logic                       Guard, Round, Sticky; // bits needed to determine rounding
   logic [`FMTBITS-1:0]        OutFmt;     // output format
-  // fma signals
-  logic [`NE+1:0]             FmaMe;      // exponent of the normalized sum
-  logic                       FmaSZero;   // is the sum zero
-  logic [3*`NF+5:0]           FmaShiftIn; // fma shift input
-  logic                       FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
-  logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
   // division singals
   logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt;        // divsqrt shif amount
   logic [`NORMSHIFTSZ-1:0]    DivShiftIn;         // divsqrt shift input
@@ -109,7 +87,6 @@ module divremsqrtpostprocess (
   logic                       Signed;     // is the opperation with a signed integer?
   logic                       IntToFp;    // is the opperation an int->fp conversion?
   logic                       CvtOp;      // convertion opperation
-  logic                       FmaOp;      // fma opperation
   logic                       DivOp;      // divider opperation
   logic                       InfIn;      // are any of the inputs infinity
   logic                       NaNIn;      // are any of the inputs NaN
@@ -125,8 +102,8 @@ module divremsqrtpostprocess (
   assign Sqrt =  OpCtrl[0];
 
   // is there an input of infinity or NaN being used
-  assign InfIn = XInf|YInf|ZInf;
-  assign NaNIn = XNaN|YNaN|ZNaN;
+  assign InfIn = XInf|YInf;
+  assign NaNIn = XNaN|YNaN;
 
   // choose the ouptut format depending on the opperation
   //      - fp -> fp: OpCtrl contains the percision of the output
@@ -168,11 +145,10 @@ module divremsqrtpostprocess (
   // round to nearest max magnitude
 
   // calulate result sign used in rounding unit
-  divremsqrtroundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
+  divremsqrtroundsign roundsign( .DivOp, .Sqrt, .Xs, .Ys, , .Ms);
 
-  divremsqrtround round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
-      .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt,  .CvtResUf,
-      .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
+  divremsqrtround round(.OutFmt, .Frm, .Plus1, .Qe,
+      .Ms, .Mf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
 
   ///////////////////////////////////////////////////////////////////////////////
   // Sign calculation
@@ -186,11 +162,11 @@ module divremsqrtpostprocess (
   // Flags
   ///////////////////////////////////////////////////////////////////////////////
 
-  divremsqrtflags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
-              .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
-              .NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero,
-              .Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
-              .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
+  divremsqrtflags flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero, 
+              .Xs, .Sqrt,
+              .NaNIn, .Round, .DivByZero,
+              .Guard, .Sticky, .UfPlus1,.DivOp, .FullRe, .Plus1,
+              .Me, .Invalid, .Overflow, .PostProcFlg);
 
   ///////////////////////////////////////////////////////////////////////////////
   // Select the result
@@ -198,9 +174,9 @@ module divremsqrtpostprocess (
 
   //negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
 
-  specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
-      .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, 
-      .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
-      .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes);
+  divremsqrtspecialcase specialcase(.Xs, .Xm, .Ym, .XZero, 
+      .Frm, .OutFmt, .XNaN, .YNaN,  
+      .NaNIn, .Plus1, .Invalid, .Overflow, .InfIn,
+      .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes );
 
 endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
index ade2ccf3e..1172705f0 100644
--- a/src/fpu/divremsqrt/divremsqrtspecialcase.sv
+++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
@@ -30,8 +30,8 @@
 
 module specialcase(
   input  logic                Xs,         // X sign
-  input  logic [`NF:0]        Xm, Ym, Zm, // input significand's
-  input  logic                XNaN, YNaN, ZNaN, // are the inputs NaN
+  input  logic [`NF:0]        Xm, Ym, // input significand's
+  input  logic                XNaN, YNaN, // are the inputs NaN
   input  logic [2:0]          Frm,        // rounding mode
   input  logic [`FMTBITS-1:0] OutFmt,     // output format
   input  logic                InfIn,      // are any inputs infinity
@@ -53,7 +53,6 @@ module specialcase(
 
   logic [`FLEN-1:0]   XNaNRes;    // X is NaN result
   logic [`FLEN-1:0]   YNaNRes;    // Y is NaN result
-  logic [`FLEN-1:0]   ZNaNRes;    // Z is NaN result
   logic [`FLEN-1:0]   InvalidRes; // Invalid result result
   logic [`FLEN-1:0]   UfRes;      // underflowed result result
   logic [`FLEN-1:0]   OfRes;      // overflowed result result
@@ -73,7 +72,6 @@ module specialcase(
       if(`IEEE754) begin
           assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
           assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-          assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
           assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
       end else begin
           assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
@@ -87,7 +85,6 @@ module specialcase(
       if(`IEEE754) begin
           assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
           assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-          assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
           assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
       end else begin 
           assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
@@ -110,7 +107,6 @@ module specialcase(
                   if(`IEEE754) begin
                       XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
                       YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                      ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
                       InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                   end else begin 
                       InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
@@ -124,7 +120,6 @@ module specialcase(
                   if(`IEEE754) begin
                       XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
                       YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-                      ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
                       InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                   end else begin 
                       InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
@@ -137,7 +132,6 @@ module specialcase(
                   if(`IEEE754) begin
                       XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
                       YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
-                      ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
                       InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
                   end else begin 
                       InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
@@ -151,7 +145,6 @@ module specialcase(
                   if(`IEEE754) begin
                       XNaNRes = (`FLEN)'(0);
                       YNaNRes = (`FLEN)'(0);
-                      ZNaNRes = (`FLEN)'(0);
                       InvalidRes = (`FLEN)'(0);
                   end else begin 
                       InvalidRes = (`FLEN)'(0);
@@ -169,7 +162,6 @@ module specialcase(
                   if(`IEEE754) begin
                       XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
                       YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                      ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
                       InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                   end else begin 
                       InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
@@ -183,7 +175,6 @@ module specialcase(
                   if(`IEEE754) begin
                       XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
                       YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
-                      ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
                       InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                   end else begin 
                       InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
@@ -196,7 +187,6 @@ module specialcase(
                   if(`IEEE754) begin
                       XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
                       YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
-                      ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
                       InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
                   end else begin 
                       InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
@@ -210,7 +200,6 @@ module specialcase(
                   if(`IEEE754) begin
                       XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
                       YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
-                      ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
                       InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
                   end else begin 
                       InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv
new file mode 100644
index 000000000..14445441d
--- /dev/null
+++ b/src/fpu/divremsqrt/drsu.sv
@@ -0,0 +1,95 @@
+///////////////////////////////////////////
+// drsu.sv
+//
+// Written: kekim@hmc.edu
+// Modified:19 May 2023
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit with postprocessing
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module drsu(
+  input  logic                clk, 
+  input  logic                reset, 
+  input  logic [`FMTBITS-1:0] FmtE,
+  input  logic                XsE, YsE,
+  input  logic [`NF:0]        XmE, YmE,
+  input  logic [`NE-1:0]      XeE, YeE,
+  input  logic                XInfE, YInfE, 
+  input  logic                XZeroE, YZeroE, 
+  input  logic                XNaNE, YNaNE, 
+  input  logic                XSNaNE, YSNaNE,
+  input  logic                FDivStartE, IDivStartE,
+  input  logic                StallM,
+  input  logic                FlushE,
+  input  logic                SqrtE, SqrtM,
+  input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [2:0]          Funct3E, Funct3M,
+  input  logic                IntDivE, W64E,
+  input  logic [2:0]          Frm,
+  input  logic [2:0]          OpCtrl,
+  input  logic [`FMTBits:0]   Fmt,
+  input  logic [1:0]          PostProcSel,
+  output logic                FDivBusyE, IFDivStartE, FDivDoneE,
+  output logic [`FLEN-1:0]    FResM,
+  output logic [`XLEN-1:0]    FIntDivResultM,
+  output logic [4:0]          FlgM
+);
+
+  // Floating-point division and square root module, with optional integer division and remainder
+  // Computes X/Y, sqrt(X), A/B, or A%B
+
+  logic [`DIVb+3:0]           WS, WC;                       // Partial remainder components
+  logic [`DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
+  logic [`DIVb+3:0]           D;                            // Iterator Divisor
+  logic [`DIVb:0]             FirstU, FirstUM;              // Intermediate result values
+  logic [`DIVb+1:0]           FirstC;                       // Step tracker
+  logic                       Firstun;                      // Quotient selection
+  logic                       WZeroE;                       // Early termination flag
+  logic [`DURLEN-1:0]         CyclesE;                      // FSM cycles
+  logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
+  logic                       DivStartE;                    // Enable signal for flops during stall
+                                                            
+  // Integer div/rem signals                                
+  logic                       BZeroM;                       // Denominator is zero
+  logic                       IntDivM;                      // Integer operation
+  logic [`DIVBLEN:0]          nM, mM;                       // Shift amounts
+  logic                       NegQuotM, ALTBM, AsM, W64M;   // Special handling for postprocessor
+  logic [`XLEN-1:0]           AM;                           // Original Numerator for postprocessor
+  logic                       ISpecialCaseE;                // Integer div/remainder special cases
+
+  divremsqrt divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, 
+            .XeE, .YeE, .SqrtE, .SqrtM,
+                    .XInfE, .YInfE, .XZeroE, .YZeroE, 
+            .XNaNE, .YNaNE, 
+                    .FDivStartE, .IDivStartE, .W64E,
+                    .StallM, .DivStickyM, .FDivBusyE, .QeM,
+                    .QmM,
+                    .FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
+                    .Funct3E, .IntDivE, .FIntDivResultM,
+                    .FDivDoneE, .IFDivStartE);
+  divremsqrtpostprocess divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Frm(Frm), .Fmt(Fmt), .OpCtrl,
+    .XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), 
+    .YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivQe(QeM), .DivQm(QmM), .PostProcRes(FResM), .PostProcFlg(FlgM));
+endmodule
+

From c9f758b240bca7d01fd083800d2ed47f9700b653 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 21 May 2023 13:35:38 -0700
Subject: [PATCH 06/40] divremsqrt directory passes lint

---
 src/fpu/divremsqrt/divremsqrtpostprocess.sv | 4 ++--
 src/fpu/divremsqrt/divremsqrtround.sv       | 2 +-
 src/fpu/divremsqrt/divremsqrtroundsign.sv   | 2 +-
 src/fpu/divremsqrt/divremsqrtspecialcase.sv | 4 ++--
 src/fpu/divremsqrt/drsu.sv                  | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
index c210a4817..6ab1a54e5 100644
--- a/src/fpu/divremsqrt/divremsqrtpostprocess.sv
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -47,7 +47,7 @@ module divremsqrtpostprocess (
   input logic  [`DIVb:0]                  DivQm,      // divsqrt significand
   // final results
   output logic [`FLEN-1:0]                PostProcRes,// postprocessor final result
-  output logic [4:0]                      PostProcFlg,// postprocesser flags
+  output logic [4:0]                      PostProcFlg // postprocesser flags
   );
   
   // general signals
@@ -163,7 +163,7 @@ module divremsqrtpostprocess (
   ///////////////////////////////////////////////////////////////////////////////
 
   divremsqrtflags flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero, 
-              .Xs, .Sqrt,
+              .Xs, .OutFmt, .Sqrt,
               .NaNIn, .Round, .DivByZero,
               .Guard, .Sticky, .UfPlus1,.DivOp, .FullRe, .Plus1,
               .Me, .Invalid, .Overflow, .PostProcFlg);
diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv
index 004a6694d..7b5dcc9da 100644
--- a/src/fpu/divremsqrt/divremsqrtround.sv
+++ b/src/fpu/divremsqrt/divremsqrtround.sv
@@ -36,7 +36,7 @@
 //  single and double will always be smaller than XLEN
 `define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
 
-module round(
+module divremsqrtround(
   input  logic [`FMTBITS-1:0]     OutFmt,             // output format
   input  logic [2:0]              Frm,                // rounding mode
   input  logic                    Ms,                 // normalized sign
diff --git a/src/fpu/divremsqrt/divremsqrtroundsign.sv b/src/fpu/divremsqrt/divremsqrtroundsign.sv
index 87b72ba48..83f82eeac 100644
--- a/src/fpu/divremsqrt/divremsqrtroundsign.sv
+++ b/src/fpu/divremsqrt/divremsqrtroundsign.sv
@@ -27,7 +27,7 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"
 
-module roundsign(
+module divremsqrtroundsign(
   input logic         Xs,     // x sign
   input logic         Ys,     // y sign
   input logic         Sqrt,   // sqrt oppertion? (when using divsqrt unit)
diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
index 1172705f0..9bfd74721 100644
--- a/src/fpu/divremsqrt/divremsqrtspecialcase.sv
+++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
@@ -28,7 +28,7 @@
 
 `include "wally-config.vh"
 
-module specialcase(
+module divremsqrtspecialcase(
   input  logic                Xs,         // X sign
   input  logic [`NF:0]        Xm, Ym, // input significand's
   input  logic                XNaN, YNaN, // are the inputs NaN
@@ -48,7 +48,7 @@ module specialcase(
   input  logic                DivOp,      // is it a divsqrt opperation
   input  logic                DivByZero,  // divide by zero flag
   // outputs
-  output logic [`FLEN-1:0]    PostProcRes,// final result
+  output logic [`FLEN-1:0]    PostProcRes // final result
 );
 
   logic [`FLEN-1:0]   XNaNRes;    // X is NaN result
diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv
index 14445441d..938dcbc15 100644
--- a/src/fpu/divremsqrt/drsu.sv
+++ b/src/fpu/divremsqrt/drsu.sv
@@ -48,7 +48,7 @@ module drsu(
   input  logic                IntDivE, W64E,
   input  logic [2:0]          Frm,
   input  logic [2:0]          OpCtrl,
-  input  logic [`FMTBits:0]   Fmt,
+  input  logic [`FMTBITS:0]   Fmt,
   input  logic [1:0]          PostProcSel,
   output logic                FDivBusyE, IFDivStartE, FDivDoneE,
   output logic [`FLEN-1:0]    FResM,
@@ -88,7 +88,7 @@ module drsu(
                     .FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
                     .Funct3E, .IntDivE, .FIntDivResultM,
                     .FDivDoneE, .IFDivStartE);
-  divremsqrtpostprocess divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Frm(Frm), .Fmt(Fmt), .OpCtrl,
+  divremsqrtpostprocess divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(Fmt), .OpCtrl,
     .XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), 
     .YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivQe(QeM), .DivQm(QmM), .PostProcRes(FResM), .PostProcFlg(FlgM));
 endmodule

From 70fc32104e0aa2290bb8d0fdea9fbc4536967ecd Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 21 May 2023 14:05:57 -0700
Subject: [PATCH 07/40] more lint fixes

---
 src/fpu/divremsqrt/divremsqrtpostprocess.sv | 2 +-
 src/fpu/divremsqrt/drsu.sv                  | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
index 6ab1a54e5..43aba0c3e 100644
--- a/src/fpu/divremsqrt/divremsqrtpostprocess.sv
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -145,7 +145,7 @@ module divremsqrtpostprocess (
   // round to nearest max magnitude
 
   // calulate result sign used in rounding unit
-  divremsqrtroundsign roundsign( .DivOp, .Sqrt, .Xs, .Ys, , .Ms);
+  divremsqrtroundsign roundsign( .DivOp, .Sqrt, .Xs, .Ys, Ms);
 
   divremsqrtround round(.OutFmt, .Frm, .Plus1, .Qe,
       .Ms, .Mf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv
index 938dcbc15..6e23c548b 100644
--- a/src/fpu/divremsqrt/drsu.sv
+++ b/src/fpu/divremsqrt/drsu.sv
@@ -77,6 +77,9 @@ module drsu(
   logic                       NegQuotM, ALTBM, AsM, W64M;   // Special handling for postprocessor
   logic [`XLEN-1:0]           AM;                           // Original Numerator for postprocessor
   logic                       ISpecialCaseE;                // Integer div/remainder special cases
+  logic [`DIVb:0]             QmM;
+  logic [`NE+1:0]             QeM;
+  logic                       DivStickyM;
 
   divremsqrt divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, 
             .XeE, .YeE, .SqrtE, .SqrtM,

From f3190823c8f14aa5563baba0a9bcf36618a50d28 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 21 May 2023 14:09:17 -0700
Subject: [PATCH 08/40] lint fixes

---
 src/fpu/divremsqrt/divremsqrtpostprocess.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
index 43aba0c3e..2041ffe4a 100644
--- a/src/fpu/divremsqrt/divremsqrtpostprocess.sv
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -145,7 +145,7 @@ module divremsqrtpostprocess (
   // round to nearest max magnitude
 
   // calulate result sign used in rounding unit
-  divremsqrtroundsign roundsign( .DivOp, .Sqrt, .Xs, .Ys, Ms);
+  divremsqrtroundsign roundsign( .DivOp, .Sqrt, .Xs, .Ys, .Ms);
 
   divremsqrtround round(.OutFmt, .Frm, .Plus1, .Qe,
       .Ms, .Mf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);

From 9b3a8766564ae82364e34808a55666f9f105c7ab Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 28 May 2023 11:40:51 -0700
Subject: [PATCH 09/40] fixed bug in rv32M test vector generation code - prior
 code skipped every other line in the reference file, so it only generated
 half the test vectors, with half of them having the wrong answer - prior code
 also opened test vector file to be written to in "append" mode, and I changed
 to write mode (so that the script overwrites instead of adding to an existing
 file)

---
 .../extract_arch_vectors.py                   | 61 +++++++++++++++++--
 1 file changed, 55 insertions(+), 6 deletions(-)

diff --git a/tests/fp/combined_IF_vectors/extract_arch_vectors.py b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
index c80c7a843..09b57dc1e 100755
--- a/tests/fp/combined_IF_vectors/extract_arch_vectors.py
+++ b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
@@ -77,7 +77,7 @@ def create_vectors(my_config):
         rounding_mode = "X"
         flags = "XX"
         # use name to create our new tv
-        dest_file = open("{}cvw_{}_{}.tv".format(dest_dir, my_config.bits, vector1[:-2]), 'a')
+        dest_file = open("{}cvw_{}_{}.tv".format(dest_dir, my_config.bits, vector1[:-2]), 'w')
         # open vectors
         src_file1 = open(source_dir1 + vector1,'r')
         src_file2 = open(source_dir2 + vector2,'r')
@@ -144,7 +144,7 @@ def create_vectors(my_config):
                 answer2 = src_file2.readline().strip()
                 answer1 = src_file2.readline().strip()
                 answer = answer1 + answer2
-                # print(answer1,answer2)
+                #print(answer1,answer2)
                 if not (answer2 == "e7d4b281" and answer1 == "6f5ca309"): # if there is still stuff to read
                     # parse through .S file
                     detected = False
@@ -179,13 +179,57 @@ def create_vectors(my_config):
                 else:
                     # print("read false")
                     reading = False
+        elif my_config.letter == "M" and my_config.bits == 32:
+            reading = True
+            #skip first 2 lines bc junk
+            src_file2.readline()
+            while reading:
+                # print("trigger 64M")
+                # get answer from Ref...signature
+                # answers span two lines and are reversed
+                answer = src_file2.readline().strip()
+                #print(answer1,answer2)
+                if not (answer == "6f5ca309"): # if there is still stuff to read
+                    # parse through .S file
+                    detected = False
+                    done = False
+                    op1val = "0"
+                    op2val = "0"
+                    while not (detected or done):
+                        # print("det1")
+                        line = src_file1.readline()
+                        # print(line)
+                        if "op1val" in line:
+                            # print("det2")
+                            # parse line
+                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
+                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
+                                op1val = twos_comp(my_config.bits, op1val)
+                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals, unnec here but keeping for later
+                                op2val = line.split("op2val")[1].split("x")[1].strip()
+                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
+                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
+                                    op2val = twos_comp(my_config.bits, op2val)
+                            # go to next test in vector
+                            detected = True
+                        elif "RVTEST_CODE_END" in line:
+                            done = True
+                    # ints don't have flags
+                    flags = "XX"
+                    # put it all together
+                    if not done:
+                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode)
+                        dest_file.write(translation + "\n")
+                else:
+                    # print("read false")
+                    reading = False 
         else:
             while reading:
                 # get answer and flags from Ref...signature
                 answer = src_file2.readline()
-                # print(answer)
+                print(answer)
                 packed = src_file2.readline()[6:]
-                # print(packed)
+                print("Packed: ", packed)
                 if len(packed.strip())>0: # if there is still stuff to read
                     # print("packed")
                     # parse through .S file
@@ -229,7 +273,7 @@ def create_vectors(my_config):
         src_file2.close()
 
 config_list = [
-Config(32, "M", "div", "div_", 0),
+Config(32, "M", "div", "div-", 0),
 Config(32, "F", "fdiv", "fdiv", 1),
 Config(32, "F", "fsqrt", "fsqrt", 2),
 Config(32, "M", "rem", "rem-", 3),
@@ -247,5 +291,10 @@ Config(64, "M", "remw", "remw-", 8),
 Config(64, "M", "remuw", "remuw-", 9)
 ]
 
+"""
 for c in config_list:
-    create_vectors(c)
\ No newline at end of file
+    create_vectors(c)
+"""
+create_vectors(config_list[0])
+#create_vectors(config_list[6])
+#create_vectors(config_list[5])
\ No newline at end of file

From 43f6b7cfa3a0bab3112fbe650a97bb9bff263de6 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 28 May 2023 11:48:41 -0700
Subject: [PATCH 10/40] fixed bug in testvector extract script - old script
 skips first 2 lines in rv32m case, new script only skips first line

---
 tests/fp/combined_IF_vectors/extract_arch_vectors.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/tests/fp/combined_IF_vectors/extract_arch_vectors.py b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
index 09b57dc1e..c069603b6 100755
--- a/tests/fp/combined_IF_vectors/extract_arch_vectors.py
+++ b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
@@ -180,14 +180,14 @@ def create_vectors(my_config):
                     # print("read false")
                     reading = False
         elif my_config.letter == "M" and my_config.bits == 32:
+            print("REEEEE")
             reading = True
-            #skip first 2 lines bc junk
-            src_file2.readline()
             while reading:
                 # print("trigger 64M")
                 # get answer from Ref...signature
                 # answers span two lines and are reversed
                 answer = src_file2.readline().strip()
+                print(f"Answer: {answer}")
                 #print(answer1,answer2)
                 if not (answer == "6f5ca309"): # if there is still stuff to read
                     # parse through .S file
@@ -291,10 +291,5 @@ Config(64, "M", "remw", "remw-", 8),
 Config(64, "M", "remuw", "remuw-", 9)
 ]
 
-"""
 for c in config_list:
-    create_vectors(c)
-"""
-create_vectors(config_list[0])
-#create_vectors(config_list[6])
-#create_vectors(config_list[5])
\ No newline at end of file
+    create_vectors(c)
\ No newline at end of file

From 9307f8b7d53978fc2a5414c01a1b4b0da8c0d6da Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 28 May 2023 15:19:01 -0700
Subject: [PATCH 11/40] added divremsqrtunit macro

---
 testbench/tests-fp.vh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index e29cc8447..3008e3fe0 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -52,6 +52,7 @@
 `define CVTINTUNIT     0
 `define CVTFPUNIT      4
 `define CMPUNIT        3
+`define DIVREMSQRTUNIT 5
 
 string f16rv32cvtint[] = '{
 	"ui32_to_f16_rne.tv",

From bca32af002d432904c83d8bdea3b1914a012fda7 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 28 May 2023 15:19:32 -0700
Subject: [PATCH 12/40] added initial tests selection for unified fdiv, fsqrt

---
 testbench/testbench-fp.sv | 64 ++++++++++++++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 11 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index b7fcc237f..db5861402 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -274,6 +274,15 @@ module testbenchfp;
           Fmt = {Fmt, 2'b11};
         end
       end
+      if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
+        Tests = {Tests, f128div, f128sqrt};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0, 1'b0};
+        for(int i = 0; i<10; i++) begin
+            Unit = {Unit, `DIVUNIT};
+            Fmt = {Fmt, 2'b11};
+        end
+      end
     end
     if (`D_SUPPORTED) begin // if double precision is supported
       if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
@@ -401,6 +410,15 @@ module testbenchfp;
           Fmt = {Fmt, 2'b01};
         end
       end
+      if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
+        Tests = {Tests, f128div, f128sqrt};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0, 1'b0};
+        for(int i = 0; i<10; i++) begin
+            Unit = {Unit, `DIVUNIT};
+            Fmt = {Fmt, 2'b01};
+        end
+      end
     end
     if (`F_SUPPORTED) begin // if single precision being supported
       if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
@@ -512,6 +530,15 @@ module testbenchfp;
           Fmt = {Fmt, 2'b00};
         end
       end
+      if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
+        Tests = {Tests, f128div, f128sqrt};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0, 1'b0};
+        for(int i = 0; i<10; i++) begin
+            Unit = {Unit, `DIVUNIT};
+            Fmt = {Fmt, 2'00};
+        end
+      end
     end
     if (`ZFH_SUPPORTED) begin // if half precision supported
       if (TEST === "cvtint"| TEST === "all") begin // if in conversions are being tested
@@ -605,6 +632,16 @@ module testbenchfp;
           Fmt = {Fmt, 2'b10};
         end
       end
+      if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
+        Tests = {Tests, f128div, f128sqrt};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
+        WriteInt = {WriteInt, 1'b0, 1'b0};
+        for(int i = 0; i<10; i++) begin
+            Unit = {Unit, `DIVUNIT};
+            Fmt = {Fmt, 2'10};
+        end
+      end
+
     end
 
     // check if nothing is being tested
@@ -681,16 +718,18 @@ module testbenchfp;
             .ASticky); 
   end
               
-  postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
-              .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
-              .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
-              .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
-              .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
-              .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
-              .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
-              .FmaASticky(ASticky), .FmaSe(Se),
-              .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
-              .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
+  if (TEST !=== "divremsqrt") begin : fpostprocess
+    postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+                .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
+                .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
+                .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
+                .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
+                .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
+                .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
+                .FmaASticky(ASticky), .FmaSe(Se),
+                .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+                .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
+  end
   
   if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt
     fcvt fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
@@ -715,6 +754,9 @@ module testbenchfp;
                        .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
                        .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
   end
+  if (TEST === "divremsqrt") begin: divremsqrt
+    drsu drsu();
+  end
 
   assign CmpFlg[3:0] = 0;
 
@@ -746,7 +788,7 @@ module testbenchfp;
 
   // Check if the correct answer and result is a NaN
   always_comb begin
-    if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
+    if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT | (UnitVal === `DIVREMSQRTUNIT && WriteIntVal == 1'b1)) begin
       // an integer output can't be a NaN
       AnsNaN = 1'b0;
       ResNaN = 1'b0;

From caf6840211d433f681ff46afd1a0c166ead606a0 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 28 May 2023 21:18:37 -0700
Subject: [PATCH 13/40] connected drsu in/out in fp-testbench testbench gives
 spurious succesful messages... check sqrt. result and answer signals are
 mismatch in waveforms but tb says ok...

---
 sim/wave-fpu.do            | 20 ++++++------
 src/fpu/divremsqrt/drsu.sv |  3 +-
 testbench/testbench-fp.sv  | 67 ++++++++++++++++++++++++++------------
 testbench/tests-fp.vh      |  4 +++
 4 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/sim/wave-fpu.do b/sim/wave-fpu.do
index 05ccb2154..55c2d485b 100644
--- a/sim/wave-fpu.do
+++ b/sim/wave-fpu.do
@@ -9,15 +9,15 @@ add wave -noupdate /testbenchfp/Res
 add wave -noupdate /testbenchfp/Ans
 add wave -noupdate /testbenchfp/DivStart
 add wave -noupdate /testbenchfp/FDivBusyE
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/specialcase/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/flags/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/normshift/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/shiftcorrection/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/resultsign/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/round/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/fmashiftcalc/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/divshiftcalc/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/postprocess/cvtshiftcalc/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv
index 6e23c548b..82e68beda 100644
--- a/src/fpu/divremsqrt/drsu.sv
+++ b/src/fpu/divremsqrt/drsu.sv
@@ -48,7 +48,6 @@ module drsu(
   input  logic                IntDivE, W64E,
   input  logic [2:0]          Frm,
   input  logic [2:0]          OpCtrl,
-  input  logic [`FMTBITS:0]   Fmt,
   input  logic [1:0]          PostProcSel,
   output logic                FDivBusyE, IFDivStartE, FDivDoneE,
   output logic [`FLEN-1:0]    FResM,
@@ -91,7 +90,7 @@ module drsu(
                     .FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
                     .Funct3E, .IntDivE, .FIntDivResultM,
                     .FDivDoneE, .IFDivStartE);
-  divremsqrtpostprocess divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(Fmt), .OpCtrl,
+  divremsqrtpostprocess divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl,
     .XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), 
     .YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivQe(QeM), .DivQm(QmM), .PostProcRes(FResM), .PostProcFlg(FlgM));
 endmodule
diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index db5861402..c9f346d43 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -411,7 +411,7 @@ module testbenchfp;
         end
       end
       if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
-        Tests = {Tests, f128div, f128sqrt};
+        Tests = {Tests, f64div, f64sqrt};
         OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0, 1'b0};
         for(int i = 0; i<10; i++) begin
@@ -531,12 +531,12 @@ module testbenchfp;
         end
       end
       if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
-        Tests = {Tests, f128div, f128sqrt};
+        Tests = {Tests, f32div, f32sqrt};
         OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0, 1'b0};
         for(int i = 0; i<10; i++) begin
             Unit = {Unit, `DIVUNIT};
-            Fmt = {Fmt, 2'00};
+            Fmt = {Fmt, 2'b00};
         end
       end
     end
@@ -633,14 +633,30 @@ module testbenchfp;
         end
       end
       if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
-        Tests = {Tests, f128div, f128sqrt};
+        Tests = {Tests, f16div, f16sqrt};
         OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0, 1'b0};
         for(int i = 0; i<10; i++) begin
             Unit = {Unit, `DIVUNIT};
-            Fmt = {Fmt, 2'10};
+            Fmt = {Fmt, 2'b10};
         end
       end
+      if (TEST === "divremsqrttest") begin // if unified div sqrt is being tested
+        Tests = {Tests, f16sqrt};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        for(int i = 0; i<5; i++) begin
+            Unit = {Unit, `DIVUNIT};
+            Fmt = {Fmt, 2'b10};
+        end
+      end
+      if (TEST === "custom") begin // if unified div sqrt is being tested
+        Tests = {Tests, custom};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `DIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
 
     end
 
@@ -718,19 +734,6 @@ module testbenchfp;
             .ASticky); 
   end
               
-  if (TEST !=== "divremsqrt") begin : fpostprocess
-    postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
-                .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
-                .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
-                .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
-                .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
-                .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
-                .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
-                .FmaASticky(ASticky), .FmaSe(Se),
-                .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
-                .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
-  end
-  
   if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt
     fcvt fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
               .XZero(XZero), .OpCtrl(OpCtrlVal), .IntZero,
@@ -742,7 +745,7 @@ module testbenchfp;
                 .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes),
                 .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
   end
-  if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt
+  if (TEST === "div" | TEST === "sqrt" | TEST === "all" | TEST === "custom") begin: fdivsqrt
      fdivsqrt fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
 		       .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
                        .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
@@ -754,8 +757,30 @@ module testbenchfp;
                        .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
                        .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
   end
-  if (TEST === "divremsqrt") begin: divremsqrt
-    drsu drsu();
+  if (TEST === "divremsqrt" | TEST === "divremsqrttest") begin: divremsqrt
+    drsu drsu(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
+		       .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
+           .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
+           .PostProcSel(UnitVal[1:0]),
+		       .XNaNE(XNaN), .YNaNE(YNaN), 
+           .Frm(FrmVal), 
+                       .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
+                       .StallM(1'b0), .FDivBusyE,
+                       .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
+                       .Funct3E(Funct3E), .IntDivE(1'b0), 
+                       .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
+  end
+  else begin: postprocess
+    postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+                .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
+                .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
+                .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
+                .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
+                .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
+                .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
+                .FmaASticky(ASticky), .FmaSe(Se),
+                .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+                .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
   end
 
   assign CmpFlg[3:0] = 0;
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index 3008e3fe0..e32da9d29 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -581,5 +581,9 @@ string f128fma[] = '{
 	"f128_mulAdd_rnm.tv"
 };
 
+string custom[] = '{
+	"f16_sqrt_rne.tv"
+};
+
 
 

From 6df19e460fd0fd919d22815d431675093ab03afd Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Wed, 31 May 2023 17:16:05 -0700
Subject: [PATCH 14/40] added custom fp tests

---
 testbench/testbench-fp.sv | 9 ++++++++-
 testbench/tests-fp.vh     | 5 +++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index b7fcc237f..a1cc91760 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -605,6 +605,13 @@ module testbenchfp;
           Fmt = {Fmt, 2'b10};
         end
       end
+      if (TEST === "custom"   | TEST === "all") begin // if fma is being tested
+        Tests = {Tests, custom};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `DIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
     end
 
     // check if nothing is being tested
@@ -703,7 +710,7 @@ module testbenchfp;
                 .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes),
                 .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
   end
-  if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt
+  if (TEST === "div" | TEST === "sqrt" | TEST === "custom" | TEST === "all") begin: fdivsqrt
      fdivsqrt fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
 		       .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
                        .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index e29cc8447..5653f42f5 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -580,5 +580,6 @@ string f128fma[] = '{
 	"f128_mulAdd_rnm.tv"
 };
 
-
-
+string custom[] = '{
+	"f16_div_rne.tv"
+};
\ No newline at end of file

From 001f1c7dc30f8d787e27e3838ef8b60d2f1e4cf7 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sat, 10 Jun 2023 11:18:27 -0700
Subject: [PATCH 15/40] add custom tests

---
 testbench/testbench-fp.sv | 6 +++---
 testbench/tests-fp.vh     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index e8b16a10d..f70f55067 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -740,7 +740,7 @@ module testbenchfp;
             .ASticky); 
   end
               
-  postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+  /*postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
               .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
               .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
               .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
@@ -749,7 +749,7 @@ module testbenchfp;
               .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
               .FmaASticky(ASticky), .FmaSe(Se),
               .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
-              .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
+              .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));*/
   
   if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt
     fcvt fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
@@ -788,7 +788,7 @@ module testbenchfp;
                        .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
   end
   else begin: postprocess
-    postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+    postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
                 .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
                 .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
                 .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index e32da9d29..3c7e0cbf9 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -582,7 +582,7 @@ string f128fma[] = '{
 };
 
 string custom[] = '{
-	"f16_sqrt_rne.tv"
+	"f16_div_rne.tv"
 };
 
 

From 07bfceed364867638ab24f4c2c334960e48be735 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Mon, 26 Jun 2023 17:45:57 -0700
Subject: [PATCH 16/40] - accounted for cvw path change - funky verilog imports

---
 src/fpu/divremsqrt/divremsqrt.sv              |  33 ++-
 src/fpu/divremsqrt/divremsqrtflags.sv         |  39 ++--
 src/fpu/divremsqrt/divremsqrtpostprocess.sv   |  47 ++--
 src/fpu/divremsqrt/divremsqrtround.sv         | 200 ++++++++---------
 src/fpu/divremsqrt/divremsqrtroundsign.sv     |   3 +-
 .../divremsqrt/divremsqrtshiftcorrection.sv   |  41 ++--
 src/fpu/divremsqrt/divremsqrtspecialcase.sv   | 205 +++++++++---------
 src/fpu/divremsqrt/drsu.sv                    |  35 ++-
 8 files changed, 298 insertions(+), 305 deletions(-)

diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv
index caaa45ab2..e45e383f9 100644
--- a/src/fpu/divremsqrt/divremsqrt.sv
+++ b/src/fpu/divremsqrt/divremsqrt.sv
@@ -26,15 +26,14 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
-`include "wally-config.vh"
 
-module divremsqrt(
+ module divremsqrt import cvw::*;  #(parameter cvw_t P) (
   input  logic                clk, 
   input  logic                reset, 
-  input  logic [`FMTBITS-1:0] FmtE,
+  input  logic [P.FMTBITS-1:0] FmtE,
   input  logic                XsE,
-  input  logic [`NF:0]        XmE, YmE,
-  input  logic [`NE-1:0]      XeE, YeE,
+  input  logic [P.NF:0]        XmE, YmE,
+  input  logic [P.NE-1:0]      XeE, YeE,
   input  logic                XInfE, YInfE, 
   input  logic                XZeroE, YZeroE, 
   input  logic                XNaNE, YNaNE, 
@@ -42,36 +41,36 @@ module divremsqrt(
   input  logic                StallM,
   input  logic                FlushE,
   input  logic                SqrtE, SqrtM,
-  input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
   input  logic [2:0]          Funct3E, Funct3M,
   input  logic                IntDivE, W64E,
   output logic                DivStickyM,
   output logic                FDivBusyE, IFDivStartE, FDivDoneE,
-  output logic [`NE+1:0]      QeM,
-  output logic [`DIVb:0]      QmM,
-  output logic [`XLEN-1:0]    FIntDivResultM
+  output logic [P.NE+1:0]      QeM,
+  output logic [P.DIVb:0]      QmM,
+  output logic [P.XLEN-1:0]    FIntDivResultM
 );
 
   // Floating-point division and square root module, with optional integer division and remainder
   // Computes X/Y, sqrt(X), A/B, or A%B
 
-  logic [`DIVb+3:0]           WS, WC;                       // Partial remainder components
-  logic [`DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
-  logic [`DIVb+3:0]           D;                            // Iterator Divisor
-  logic [`DIVb:0]             FirstU, FirstUM;              // Intermediate result values
-  logic [`DIVb+1:0]           FirstC;                       // Step tracker
+  logic [P.DIVb+3:0]           WS, WC;                       // Partial remainder components
+  logic [P.DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
+  logic [P.DIVb+3:0]           D;                            // Iterator Divisor
+  logic [P.DIVb:0]             FirstU, FirstUM;              // Intermediate result values
+  logic [P.DIVb+1:0]           FirstC;                       // Step tracker
   logic                       Firstun;                      // Quotient selection
   logic                       WZeroE;                       // Early termination flag
-  logic [`DURLEN-1:0]         CyclesE;                      // FSM cycles
+  logic [P.DURLEN-1:0]         CyclesE;                      // FSM cycles
   logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
   logic                       DivStartE;                    // Enable signal for flops during stall
                                                             
   // Integer div/rem signals                                
   logic                       BZeroM;                       // Denominator is zero
   logic                       IntDivM;                      // Integer operation
-  logic [`DIVBLEN:0]          nM, mM;                       // Shift amounts
+  logic [P.DIVBLEN:0]          nM, mM;                       // Shift amounts
   logic                       NegQuotM, ALTBM, AsM, W64M;   // Special handling for postprocessor
-  logic [`XLEN-1:0]           AM;                           // Original Numerator for postprocessor
+  logic [P.XLEN-1:0]           AM;                           // Original Numerator for postprocessor
   logic                       ISpecialCaseE;                // Integer div/remainder special cases
 
   fdivsqrtpreproc fdivsqrtpreproc(                          // Preprocessor
diff --git a/src/fpu/divremsqrt/divremsqrtflags.sv b/src/fpu/divremsqrt/divremsqrtflags.sv
index c2f0f356c..522d1d597 100644
--- a/src/fpu/divremsqrt/divremsqrtflags.sv
+++ b/src/fpu/divremsqrt/divremsqrtflags.sv
@@ -26,18 +26,17 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"
 
-module divremsqrtflags(
+module divremsqrtflags import cvw::*;  #(parameter cvw_t P) (
   input  logic                Xs,                     // X sign
-  input  logic [`FMTBITS-1:0] OutFmt,                 // output format
+  input  logic [P.FMTBITS-1:0] OutFmt,                 // output format
   input  logic                InfIn,                  // is a Inf input being used
   input  logic                XInf, YInf,             // inputs are infinity
   input  logic                NaNIn,                  // is a NaN input being used
   input  logic                XSNaN, YSNaN,           // inputs are signaling NaNs
   input  logic                XZero, YZero,           // inputs are zero
-  input  logic [`NE+1:0]      FullRe,                 // Re with bits to determine sign and overflow
-  input  logic [`NE+1:0]      Me,                     // exponent of the normalized sum
+  input  logic [P.NE+1:0]      FullRe,                 // Re with bits to determine sign and overflow
+  input  logic [P.NE+1:0]      Me,                     // exponent of the normalized sum
   // rounding
   input  logic                Plus1,                  // do you add one for rounding
   input  logic                Round, Guard, Sticky,   // bits used to determine rounding
@@ -78,28 +77,28 @@ module divremsqrtflags(
   //          - any of the bits after the most significan 1 is one
   //          - the most signifcant in 65 or 33 is still a one in the number and
   //            one of the later bits is one
-  if (`FPSIZES == 1) begin
-      assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
+  if (P.FPSIZES == 1) begin
+      assign ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
 
-  end else if (`FPSIZES == 2) begin    
-      assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
+  end else if (P.FPSIZES == 2) begin    
+      assign ResExpGteMax = OutFmt ? &FullRe[P.NE-1:0] | FullRe[P.NE] : &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
 
-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
       always_comb
           case (OutFmt)
-              `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
-              `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
-              `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
+              P.FMT: ResExpGteMax = &FullRe[P.NE-1:0] | FullRe[P.NE];
+              P.FMT1: ResExpGteMax = &FullRe[P.NE1-1:0] | (|FullRe[P.NE:P.NE1]);
+              P.FMT2: ResExpGteMax = &FullRe[P.NE2-1:0] | (|FullRe[P.NE:P.NE2]);
               default: ResExpGteMax = 1'bx;
           endcase
 
-  end else if (`FPSIZES == 4) begin        
+  end else if (P.FPSIZES == 4) begin        
       always_comb
           case (OutFmt)
-              `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
-              `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
-              `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
-              `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
+              P.Q_FMT: ResExpGteMax = &FullRe[P.Q_NE-1:0] | FullRe[P.Q_NE];
+              P.D_FMT: ResExpGteMax = &FullRe[P.D_NE-1:0] | (|FullRe[P.Q_NE:P.D_NE]);
+              P.S_FMT: ResExpGteMax = &FullRe[P.S_NE-1:0] | (|FullRe[P.Q_NE:P.S_NE]);
+              P.H_FMT: ResExpGteMax = &FullRe[P.H_NE-1:0] | (|FullRe[P.Q_NE:P.H_NE]);
           endcase
   end
 
@@ -109,7 +108,7 @@ module divremsqrtflags(
   //                 |           and the exponent isn't negitive
   //                 |           |                   if the input isnt infinity or NaN
   //                 |           |                   |            
-  assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
+  assign Overflow = ResExpGteMax & ~FullRe[P.NE+1]&~(InfIn|NaNIn|DivByZero);
 
   ///////////////////////////////////////////////////////////////////////////////
   // Underflow
@@ -123,7 +122,7 @@ module divremsqrtflags(
   //                  |                    |                    |                                      |                     and if the result is not exact
   //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
   //                  |                    |                    |                                      |                     |               |
-  assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
+  assign Underflow = ((FullRe[P.NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
 
 
   ///////////////////////////////////////////////////////////////////////////////
diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
index 2041ffe4a..0307121c0 100644
--- a/src/fpu/divremsqrt/divremsqrtpostprocess.sv
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -26,14 +26,13 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
-`include "wally-config.vh"
 
-module divremsqrtpostprocess (
+module divremsqrtpostprocess import cvw::*;  #(parameter cvw_t P)  (
   // general signals
   input logic                             Xs, Ys,     // input signs
-  input logic  [`NF:0]                    Xm, Ym,     // input mantissas
+  input logic  [P.NF:0]                    Xm, Ym,     // input mantissas
   input logic  [2:0]                      Frm,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-  input logic  [`FMTBITS-1:0]             Fmt,        // precision 1 = double 0 = single
+  input logic  [P.FMTBITS-1:0]             Fmt,        // precision 1 = double 0 = single
   input logic  [2:0]                      OpCtrl,     // choose which opperation (look below for values)
   input logic                             XZero, YZero,        // inputs are zero
   input logic                             XInf, YInf,          // inputs are infinity
@@ -43,41 +42,41 @@ module divremsqrtpostprocess (
   //fma signals
   //divide signals
   input logic                             DivSticky,  // divider sticky bit
-  input logic  [`NE+1:0]                  DivQe,      // divsqrt exponent
-  input logic  [`DIVb:0]                  DivQm,      // divsqrt significand
+  input logic  [P.NE+1:0]                  DivQe,      // divsqrt exponent
+  input logic  [P.DIVb:0]                  DivQm,      // divsqrt significand
   // final results
-  output logic [`FLEN-1:0]                PostProcRes,// postprocessor final result
+  output logic [P.FLEN-1:0]                PostProcRes,// postprocessor final result
   output logic [4:0]                      PostProcFlg // postprocesser flags
   );
   
   // general signals
   logic                       Rs;         // result sign
-  logic [`NF-1:0]             Rf;         // Result fraction
-  logic [`NE-1:0]             Re;         // Result exponent
+  logic [P.NF-1:0]             Rf;         // Result fraction
+  logic [P.NE-1:0]             Re;         // Result exponent
   logic                       Ms;         // norMalized sign
-  logic [`CORRSHIFTSZ-1:0]    Mf;         // norMalized fraction
-  logic [`NE+1:0]             Me;         // normalized exponent
-  logic [`NE+1:0]             FullRe;     // Re with bits to determine sign and overflow
+  logic [P.CORRSHIFTSZ-1:0]    Mf;         // norMalized fraction
+  logic [P.NE+1:0]             Me;         // normalized exponent
+  logic [P.NE+1:0]             FullRe;     // Re with bits to determine sign and overflow
   logic                       UfPlus1;    // do you add one (for determining underflow flag)
-  logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt;   // normalization shift amount
-  logic [`NORMSHIFTSZ-1:0]    ShiftIn;    // input to normalization shift
-  logic [`NORMSHIFTSZ-1:0]    Shifted;    // the ouput of the normalized shifter (before shift correction)
+  logic [P.LOGNORMSHIFTSZ-1:0] ShiftAmt;   // normalization shift amount
+  logic [P.NORMSHIFTSZ-1:0]    ShiftIn;    // input to normalization shift
+  logic [P.NORMSHIFTSZ-1:0]    Shifted;    // the ouput of the normalized shifter (before shift correction)
   logic                       Plus1;      // add one to the final result?
   logic                       Overflow;   // overflow flag used to select results
   logic                       Invalid;    // invalid flag used to select results
   logic                       Guard, Round, Sticky; // bits needed to determine rounding
-  logic [`FMTBITS-1:0]        OutFmt;     // output format
+  logic [P.FMTBITS-1:0]        OutFmt;     // output format
   // division singals
-  logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt;        // divsqrt shif amount
-  logic [`NORMSHIFTSZ-1:0]    DivShiftIn;         // divsqrt shift input
-  logic [`NE+1:0]             Qe;                 // divsqrt corrected exponent after corretion shift
+  logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt;        // divsqrt shif amount
+  logic [P.NORMSHIFTSZ-1:0]    DivShiftIn;         // divsqrt shift input
+  logic [P.NE+1:0]             Qe;                 // divsqrt corrected exponent after corretion shift
   logic                       DivByZero;          // divide by zero flag
   logic                       DivResSubnorm;      // is the divsqrt result subnormal
   logic                       DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
   // conversion signals
-  logic [`CVTLEN+`NF:0]       CvtShiftIn;         // number to be shifted for converter
+  logic [P.CVTLEN+P.NF:0]       CvtShiftIn;         // number to be shifted for converter
   logic [1:0]                 CvtNegResMsbs;      // most significant bits of possibly negated int result
-  logic [`XLEN+1:0]           CvtNegRes;          // possibly negated integer result
+  logic [P.XLEN+1:0]           CvtNegRes;          // possibly negated integer result
   logic                       CvtResUf;           // did the convert result underflow
   logic                       IntInvalid;         // invalid integer flag
   // readability signals
@@ -108,10 +107,10 @@ module divremsqrtpostprocess (
   // choose the ouptut format depending on the opperation
   //      - fp -> fp: OpCtrl contains the percision of the output
   //      - otherwise: Fmt contains the percision of the output
-  if (`FPSIZES == 2) 
-      //assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); 
+  if (P.FPSIZES == 2) 
+      //assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == P.FMT); 
       assign OutFmt = Fmt;
-  else if (`FPSIZES == 3 | `FPSIZES == 4) 
+  else if (P.FPSIZES == 3 | P.FPSIZES == 4) 
       //assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; 
       assign OutFmt = Fmt;
 
diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv
index 7b5dcc9da..3d530b384 100644
--- a/src/fpu/divremsqrt/divremsqrtround.sv
+++ b/src/fpu/divremsqrt/divremsqrtround.sv
@@ -26,31 +26,23 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
-`include "wally-config.vh"
 
-// what position is XLEN in?
-//  options: 
-//     1: XLEN > NF   > NF1
-//     2: NF   > XLEN > NF1
-//     3: NF   > NF1  > XLEN
-//  single and double will always be smaller than XLEN
-`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
 
-module divremsqrtround(
-  input  logic [`FMTBITS-1:0]     OutFmt,             // output format
+module divremsqrtround import cvw::*;  #(parameter cvw_t P)  (
+  input  logic [P.FMTBITS-1:0]     OutFmt,             // output format
   input  logic [2:0]              Frm,                // rounding mode
   input  logic                    Ms,                 // normalized sign
-  input  logic [`CORRSHIFTSZ-1:0] Mf,                 // normalized fraction
+  input  logic [P.CORRSHIFTSZ-1:0] Mf,                 // normalized fraction
   // divsqrt
   input  logic                    DivOp,              // is a division opperation being done
   input  logic                    DivSticky,          // divsqrt sticky bit
-  input  logic [`NE+1:0]          Qe,                 // the divsqrt calculated expoent
+  input  logic [P.NE+1:0]          Qe,                 // the divsqrt calculated expoent
   // outputs
-  output logic [`NE+1:0]          Me,                 // normalied fraction
+  output logic [P.NE+1:0]          Me,                 // normalied fraction
   output logic                    UfPlus1,            // do you add one to the result if given an unbounded exponent
-  output logic [`NE+1:0]          FullRe,             // Re with bits to determine sign and overflow
-  output logic [`NE-1:0]          Re,                 // Result exponent
-  output logic [`NF-1:0]          Rf,                 // Result fractionNormS
+  output logic [P.NE+1:0]          FullRe,             // Re with bits to determine sign and overflow
+  output logic [P.NE-1:0]          Re,                 // Result exponent
+  output logic [P.NF-1:0]          Rf,                 // Result fractionNormS
   output logic                    Sticky,             // sticky bit
   output logic                    Plus1,              // do you add one to the final result
   output logic                    Round, Guard        // bits needed to calculate rounding
@@ -58,13 +50,21 @@ module divremsqrtround(
 
   logic           UfCalcPlus1;        // calculated plus one for unbounded exponent
   logic           NormSticky;         // normalized sum's sticky bit
-  logic [`NF-1:0] RoundFrac;          // rounded fraction
+  logic [P.NF-1:0] RoundFrac;          // rounded fraction
   logic           FpGuard, FpRound;   // floating point round/guard bits
   logic           FpLsbRes;           // least significant bit of floating point result
   logic           LsbRes;             // lsb of result
   logic           CalcPlus1;          // calculated plus1
   logic           FpPlus1;            // do you add one to the fp result 
-  logic [`FLEN:0] RoundAdd;           // how much to add to the result
+  logic [P.FLEN:0] RoundAdd;           // how much to add to the result
+
+// what position is XLEN in?
+//  options: 
+//     1: XLEN > NF   > NF1
+//     2: NF   > XLEN > NF1
+//     3: NF   > NF1  > XLEN
+//  single and double will always be smaller than XLEN
+  localparam XLENPOS = P.XLEN > P.NF ? 1 : P.XLEN > P.NF1 ? 2 : 3;
 
   ///////////////////////////////////////////////////////////////////////////////
   // Rounding
@@ -100,66 +100,66 @@ module divremsqrtround(
   // determine what format the final result is in: int or fp
 
   // sticky bit calculation
-  if (`FPSIZES == 1) begin
+  if (P.FPSIZES == 1) begin
 
       //     1: XLEN > NF
       //      |         XLEN          |
       //      |    NF     |1|1|
       //                     ^    ^ if floating point result
       //                     ^ if not an FMA result
-      if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
+      if (P.XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
       //     2: NF > XLEN
-      if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (P.XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
 
-  end else if (`FPSIZES == 2) begin
+  end else if (P.FPSIZES == 2) begin
       // XLEN is either 64 or 32
       // so half and single are always smaller then XLEN
 
       // 1: XLEN > NF   > NF1
-      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&~OutFmt) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
+      if (P.XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&~OutFmt) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
       // 2: NF   > XLEN > NF1
-      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&~OutFmt) | 
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (P.XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&~OutFmt) | 
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
       // 3: NF   > NF1  > XLEN
-      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (P.XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
 
-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
       // 1: XLEN > NF   > NF1
-      if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&(OutFmt==`FMT1)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&~(OutFmt==`FMT)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
+      if (P.XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&~(OutFmt==P.FMT)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
       // 2: NF   > XLEN > NF1
-      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&(OutFmt==`FMT1)) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&~(OutFmt==`FMT)) | 
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT))) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (P.XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&~(OutFmt==P.FMT)) | 
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT))) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
       // 3: NF   > NF1  > XLEN
-      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&(OutFmt==`FMT1)) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1))) |
-                                                (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT))) |
-                                                (|Mf[`CORRSHIFTSZ-`NF-2:0]);
+      if (P.XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&(OutFmt==P.FMT1)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1))) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT))) |
+                                                (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
 
-  end else if (`FPSIZES == 4) begin
+  end else if (P.FPSIZES == 4) begin
       // Quad precision will always be greater than XLEN
       // 2: NF   > XLEN > NF1
-      if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&(OutFmt==`H_FMT)) |
-                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
-                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&~(OutFmt==`Q_FMT)) | 
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT))) |
-                                                (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
+      if (P.XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | 
+                                                (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&~(OutFmt==P.Q_FMT)) | 
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT))) |
+                                                (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
       // 3: NF   > NF1  > XLEN
       // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
-      if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&(OutFmt==`H_FMT)) |
-                                                (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
-                                                (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
-                                                (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT))) |
-                                                (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
+      if (P.XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) |
+                                                (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
+                                                (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
+                                                (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT))) |
+                                                (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
 
   end
   
@@ -174,33 +174,33 @@ module divremsqrtround(
 
   // determine round and LSB of the rounded value
   //      - underflow round bit is used to determint the underflow flag
-  if (`FPSIZES == 1) begin
-      assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
-      assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
-      assign FpRound = Mf[`CORRSHIFTSZ-`NF-2];
+  if (P.FPSIZES == 1) begin
+      assign FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
+      assign FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
+      assign FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
 
-  end else if (`FPSIZES == 2) begin
-      assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
-      assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
-      assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
+  end else if (P.FPSIZES == 2) begin
+      assign FpGuard = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-1] : Mf[P.CORRSHIFTSZ-P.NF1-1];
+      assign FpLsbRes = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF] : Mf[P.CORRSHIFTSZ-P.NF1];
+      assign FpRound = OutFmt ? Mf[P.CORRSHIFTSZ-P.NF-2] : Mf[P.CORRSHIFTSZ-P.NF1-2];
 
-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
       always_comb
           case (OutFmt)
-              `FMT: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`NF-2];
+              P.FMT: begin
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.NF-2];
               end
-              `FMT1: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`NF1-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`NF1];
-                  FpRound = Mf[`CORRSHIFTSZ-`NF1-2];
+              P.FMT1: begin
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.NF1-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF1];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.NF1-2];
               end
-              `FMT2: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`NF2-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`NF2];
-                  FpRound = Mf[`CORRSHIFTSZ-`NF2-2];
+              P.FMT2: begin
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.NF2-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.NF2];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.NF2-2];
               end
               default: begin
                   FpGuard = 1'bx;
@@ -208,28 +208,28 @@ module divremsqrtround(
                   FpRound = 1'bx;
               end
           endcase
-  end else if (`FPSIZES == 4) begin
+  end else if (P.FPSIZES == 4) begin
       always_comb
           case (OutFmt)
               2'h3: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.Q_NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.Q_NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.Q_NF-2];
               end
               2'h1: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`D_NF-2];
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.D_NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.D_NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.D_NF-2];
               end
               2'h0: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`S_NF-2];
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.S_NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.S_NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.S_NF-2];
               end
               2'h2: begin
-                  FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1];
-                  FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF];
-                  FpRound = Mf[`CORRSHIFTSZ-`H_NF-2];
+                  FpGuard = Mf[P.CORRSHIFTSZ-P.H_NF-1];
+                  FpLsbRes = Mf[P.CORRSHIFTSZ-P.H_NF];
+                  FpRound = Mf[P.CORRSHIFTSZ-P.H_NF-2];
               end
           endcase
   end
@@ -271,26 +271,26 @@ module divremsqrtround(
 
 
   // place Plus1 into the proper position for the format
-  if (`FPSIZES == 1) begin
-      assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
+  if (P.FPSIZES == 1) begin
+      assign RoundAdd = {{P.FLEN{1'b0}}, FpPlus1};
 
-  end else if (`FPSIZES == 2) begin
+  end else if (P.FPSIZES == 2) begin
       // \/FLEN+1
       //  | NE+2 |        NF      |
       //  '-NE+2-^----NF1----^
-      // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
-      assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
+      // P.FLEN+1-P.NE-2-P.NF1 = FLEN-1-NE-NF1
+      assign RoundAdd = {(P.NE+1+P.NF1)'(0), FpPlus1&~OutFmt, (P.NF-P.NF1-1)'(0), FpPlus1&OutFmt};
 
-  end else if (`FPSIZES == 3) begin
-      assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
+  end else if (P.FPSIZES == 3) begin
+      assign RoundAdd = {(P.NE+1+P.NF2)'(0), FpPlus1&(OutFmt==P.FMT2), (P.NF1-P.NF2-1)'(0), FpPlus1&(OutFmt==P.FMT1), (P.NF-P.NF1-1)'(0), FpPlus1&(OutFmt==P.FMT)};
 
-  end else if (`FPSIZES == 4)      
-      assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
+  end else if (P.FPSIZES == 4)      
+      assign RoundAdd = {(P.Q_NE+1+P.H_NF)'(0), FpPlus1&(OutFmt==P.H_FMT), (P.S_NF-P.H_NF-1)'(0), FpPlus1&(OutFmt==P.S_FMT), (P.D_NF-P.S_NF-1)'(0), FpPlus1&(OutFmt==P.D_FMT), (P.Q_NF-P.D_NF-1)'(0), FpPlus1&(OutFmt==P.Q_FMT)};
 
 
 
   // trim unneeded bits from fraction
-  assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
+  assign RoundFrac = Mf[P.CORRSHIFTSZ-1:P.CORRSHIFTSZ-P.NF];
   
 
 
@@ -302,7 +302,7 @@ module divremsqrtround(
   // round the result
   //      - if the fraction overflows one should be added to the exponent
   assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
-  assign Re = FullRe[`NE-1:0];
+  assign Re = FullRe[P.NE-1:0];
 
 
 endmodule
diff --git a/src/fpu/divremsqrt/divremsqrtroundsign.sv b/src/fpu/divremsqrt/divremsqrtroundsign.sv
index 83f82eeac..b0dd4270b 100644
--- a/src/fpu/divremsqrt/divremsqrtroundsign.sv
+++ b/src/fpu/divremsqrt/divremsqrtroundsign.sv
@@ -25,9 +25,8 @@
 // either express or implied. See the License for the specific language governing permissions 
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
-`include "wally-config.vh"
 
-module divremsqrtroundsign(
+module divremsqrtroundsign import cvw::*;  #(parameter cvw_t P) (
   input logic         Xs,     // x sign
   input logic         Ys,     // y sign
   input logic         Sqrt,   // sqrt oppertion? (when using divsqrt unit)
diff --git a/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
index da21e928b..c03f1b5df 100644
--- a/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
+++ b/src/fpu/divremsqrt/divremsqrtshiftcorrection.sv
@@ -26,53 +26,52 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
-`include "wally-config.vh"
 
-module divremsqrtshiftcorrection(
-  input logic  [`NORMSHIFTSZ-1:0] Shifted,                // the shifted sum before LZA correction
+module divremsqrtshiftcorrection import cvw::*;  #(parameter cvw_t P) (
+  input logic  [P.NORMSHIFTSZ-1:0] Shifted,                // the shifted sum before LZA correction
   // divsqrt
   input logic                     DivOp,                  // is it a divsqrt opperation
   input logic                     DivResSubnorm,          // is the divsqrt result subnormal
-  input logic  [`NE+1:0]          DivQe,                  // the divsqrt result's exponent
+  input logic  [P.NE+1:0]          DivQe,                  // the divsqrt result's exponent
   input logic                     DivSubnormShiftPos,     // is the subnorm divider shift amount positive (ie not underflowed)
   //fma
   //input logic                     FmaOp,                  // is it an fma opperation
-  //input logic  [`NE+1:0]          NormSumExp,             // exponent of the normalized sum not taking into account Subnormal or zero results
+  //input logic  [P.NE+1:0]          NormSumExp,             // exponent of the normalized sum not taking into account Subnormal or zero results
   //input logic                     FmaPreResultSubnorm,    // is the result subnormal - calculated before LZA corection
   //input logic                     FmaSZero,
   // output
-  //output logic [`NE+1:0]          FmaMe,                  // exponent of the normalized sum
-  output logic [`CORRSHIFTSZ-1:0] Mf,                     // the shifted sum before LZA correction
-  output logic [`NE+1:0]          Qe                      // corrected exponent for divider
+  //output logic [P.NE+1:0]          FmaMe,                  // exponent of the normalized sum
+  output logic [P.CORRSHIFTSZ-1:0] Mf,                     // the shifted sum before LZA correction
+  output logic [P.NE+1:0]          Qe                      // corrected exponent for divider
 );
 
-  logic [3*`NF+3:0]           CorrSumShifted;             // the shifted sum after LZA correction
-  logic [`CORRSHIFTSZ-1:0]    CorrQm0, CorrQm1;           // portions of Shifted to select for CorrQmShifted
-  logic [`CORRSHIFTSZ-1:0]    CorrQmShifted;              // the shifted divsqrt result after one bit shift
+  logic [3*P.NF+3:0]           CorrSumShifted;             // the shifted sum after LZA correction
+  logic [P.CORRSHIFTSZ-1:0]    CorrQm0, CorrQm1;           // portions of Shifted to select for CorrQmShifted
+  logic [P.CORRSHIFTSZ-1:0]    CorrQmShifted;              // the shifted divsqrt result after one bit shift
   logic                       ResSubnorm;                 // is the result Subnormal
   logic                       LZAPlus1;                   // add one or two to the sum's exponent due to LZA correction
   logic                       LeftShiftQm;                // should the divsqrt result be shifted one to the left
 
   // LZA correction
-  assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1];
+  assign LZAPlus1 = Shifted[P.NORMSHIFTSZ-1];
 
   // correct the shifting error caused by the LZA
   //  - the only possible mantissa for a plus two is all zeroes 
   //      - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
-  mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
+  mux2 #(P.NORMSHIFTSZ-2) lzacorrmux(Shifted[P.NORMSHIFTSZ-3:0], Shifted[P.NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
 
   // correct the shifting of the divsqrt caused by producing a result in (2, .5] range
   //    condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
   assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
-  assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
-  assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1];
-  mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
+  assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
+  assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
+  mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
   
   // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
   always_comb
-    //if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}};
+    //if(FmaOp)                       Mf = {CorrSumShifted, {P.CORRSHIFTSZ-(3*P.NF+4){1'b0}}};
     if (DivOp&~DivResSubnorm)  Mf = CorrQmShifted;
-    else                       Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    else                       Mf = Shifted[P.NORMSHIFTSZ-1:P.NORMSHIFTSZ-P.CORRSHIFTSZ];
     
   // Determine sum's exponent
   //  main exponent issues: 
@@ -82,12 +81,12 @@ module divremsqrtshiftcorrection(
   //      - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
   //                          if plus1                    If plus2                               kill if the result Zero or actually subnormal
   //                          |                           |                                      |
-  //assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}};
+  //assign FmaMe = (NormSumExp+{{P.NE+1{1'b0}}, LZAPlus1} +{{P.NE+1{1'b0}}, FmaPreResultSubnorm}) & {P.NE+2{~(FmaSZero|ResSubnorm)}};
   
   // recalculate if the result is subnormal after LZA correction
-  //assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1];
+  //assign ResSubnorm = FmaPreResultSubnorm&~Shifted[P.NORMSHIFTSZ-2]&~Shifted[P.NORMSHIFTSZ-1];
 
   // the quotent is in the range [.5,2) if there is no early termination
   // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
-  assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1};
+  assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1};
 endmodule
\ No newline at end of file
diff --git a/src/fpu/divremsqrt/divremsqrtspecialcase.sv b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
index 9bfd74721..d7f569add 100644
--- a/src/fpu/divremsqrt/divremsqrtspecialcase.sv
+++ b/src/fpu/divremsqrt/divremsqrtspecialcase.sv
@@ -26,14 +26,13 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
-`include "wally-config.vh"
 
-module divremsqrtspecialcase(
+module divremsqrtspecialcase import cvw::*;  #(parameter cvw_t P) (
   input  logic                Xs,         // X sign
-  input  logic [`NF:0]        Xm, Ym, // input significand's
+  input  logic [P.NF:0]        Xm, Ym, // input significand's
   input  logic                XNaN, YNaN, // are the inputs NaN
   input  logic [2:0]          Frm,        // rounding mode
-  input  logic [`FMTBITS-1:0] OutFmt,     // output format
+  input  logic [P.FMTBITS-1:0] OutFmt,     // output format
   input  logic                InfIn,      // are any inputs infinity
   input  logic                NaNIn,      // are any input NaNs
   input  logic                XInf, YInf, // are X or Y inifnity
@@ -41,22 +40,22 @@ module divremsqrtspecialcase(
   input  logic                Plus1,      // do you add one for rounding
   input  logic                Rs,         // the result's sign
   input  logic                Invalid, Overflow,  // flags to choose the result
-  input  logic [`NE-1:0]      Re,         // Result exponent
-  input  logic [`NE+1:0]      FullRe,     // Result full exponent
-  input  logic [`NF-1:0]      Rf,         // Result fraction
+  input  logic [P.NE-1:0]      Re,         // Result exponent
+  input  logic [P.NE+1:0]      FullRe,     // Result full exponent
+  input  logic [P.NF-1:0]      Rf,         // Result fraction
   // divsqrt
   input  logic                DivOp,      // is it a divsqrt opperation
   input  logic                DivByZero,  // divide by zero flag
   // outputs
-  output logic [`FLEN-1:0]    PostProcRes // final result
+  output logic [P.FLEN-1:0]    PostProcRes // final result
 );
 
-  logic [`FLEN-1:0]   XNaNRes;    // X is NaN result
-  logic [`FLEN-1:0]   YNaNRes;    // Y is NaN result
-  logic [`FLEN-1:0]   InvalidRes; // Invalid result result
-  logic [`FLEN-1:0]   UfRes;      // underflowed result result
-  logic [`FLEN-1:0]   OfRes;      // overflowed result result
-  logic [`FLEN-1:0]   NormRes;    // normal result
+  logic [P.FLEN-1:0]   XNaNRes;    // X is NaN result
+  logic [P.FLEN-1:0]   YNaNRes;    // Y is NaN result
+  logic [P.FLEN-1:0]   InvalidRes; // Invalid result result
+  logic [P.FLEN-1:0]   UfRes;      // underflowed result result
+  logic [P.FLEN-1:0]   OfRes;      // overflowed result result
+  logic [P.FLEN-1:0]   NormRes;    // normal result
   logic               OfResMax;   // does the of result output maximum norm fp number
   logic               KillRes;    // kill the result for underflow
   logic               SelOfRes;   // should the overflow result be selected
@@ -67,148 +66,148 @@ module divremsqrtspecialcase(
   assign OfResMax = (~InfIn)&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
 
   // select correct outputs for special cases
-  if (`FPSIZES == 1) begin
+  if (P.FPSIZES == 1) begin
       //NaN res selection depending on standard
-      if(`IEEE754) begin
-          assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-          assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-          assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+      if(P.IEEE754) begin
+          assign XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+          assign YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+          assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
       end else begin
-          assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+          assign InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
       end
 
-      assign OfRes =  OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
-      assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
+      assign OfRes =  OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+      assign UfRes = {Rs, {P.FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
       assign NormRes = {Rs, Re, Rf};
 
-  end else if (`FPSIZES == 2) begin
-      if(`IEEE754) begin
-          assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-          assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-          assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+  end else if (P.FPSIZES == 2) begin
+      if(P.IEEE754) begin
+          assign XNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+          assign YNaNRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
+          assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
       end else begin 
-          assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+          assign InvalidRes = OutFmt ? {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}} : {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
       end
 
       always_comb
           if(OutFmt)
-              if(OfResMax)    OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
-              else            OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
+              if(OfResMax)    OfRes = {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}};
+              else            OfRes = {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
           else
-              if(OfResMax)    OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
-              else            OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
-      assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-      assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
+              if(OfResMax)    OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}};
+              else            OfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
+      assign UfRes = OutFmt ? {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+      assign NormRes = OutFmt ? {Rs, Re, Rf} : {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
 
-  end else if (`FPSIZES == 3) begin
+  end else if (P.FPSIZES == 3) begin
       always_comb
           case (OutFmt)
-              `FMT: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                      YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+              P.FMT: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                      YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
                   end else begin 
-                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
                   end
                   
-                  OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
-                  UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+                  UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
                   NormRes = {Rs, Re, Rf};
               end
-              `FMT1: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
-                      YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
-                      InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+              P.FMT1: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF1]};
+                      YNaNRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF1]};
+                      InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
                   end else begin 
-                      InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.LEN1{1'b1}}, 1'b0, {P.NE1{1'b1}}, 1'b1, (P.NF1-1)'(0)};
                   end
-                  OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
-                  UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
+                  OfRes = OfResMax ? {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1-1{1'b1}}, 1'b0, {P.NF1{1'b1}}} : {{P.FLEN-P.LEN1{1'b1}}, Rs, {P.NE1{1'b1}}, (P.NF1)'(0)};
+                  UfRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, (P.LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.LEN1{1'b1}}, Rs, Re[P.NE1-1:0], Rf[P.NF-1:P.NF-P.NF1]};
               end
-              `FMT2: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
-                      YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
-                      InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+              P.FMT2: begin  
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.NF2]};
+                      YNaNRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.NF2]};
+                      InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
                   end else begin 
-                      InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.LEN2{1'b1}}, 1'b0, {P.NE2{1'b1}}, 1'b1, (P.NF2-1)'(0)};
                   end
                   
-                  OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)};
-                  UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
+                  OfRes = OfResMax ? {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2-1{1'b1}}, 1'b0, {P.NF2{1'b1}}} : {{P.FLEN-P.LEN2{1'b1}}, Rs, {P.NE2{1'b1}}, (P.NF2)'(0)};
+                  UfRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, (P.LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.LEN2{1'b1}}, Rs, Re[P.NE2-1:0], Rf[P.NF-1:P.NF-P.NF2]};
               end
               default: begin
-                  if(`IEEE754) begin
-                      XNaNRes = (`FLEN)'(0);
-                      YNaNRes = (`FLEN)'(0);
-                      InvalidRes = (`FLEN)'(0);
+                  if(P.IEEE754) begin
+                      XNaNRes = (P.FLEN)'(0);
+                      YNaNRes = (P.FLEN)'(0);
+                      InvalidRes = (P.FLEN)'(0);
                   end else begin 
-                      InvalidRes = (`FLEN)'(0);
+                      InvalidRes = (P.FLEN)'(0);
                   end
-                  OfRes = (`FLEN)'(0);
-                  UfRes = (`FLEN)'(0);
-                  NormRes = (`FLEN)'(0);
+                  OfRes = (P.FLEN)'(0);
+                  UfRes = (P.FLEN)'(0);
+                  NormRes = (P.FLEN)'(0);
               end
           endcase
 
-  end else if (`FPSIZES == 4) begin 
+  end else if (P.FPSIZES == 4) begin 
       always_comb
           case (OutFmt)
               2'h3: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
-                      YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
-                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                  if(P.IEEE754) begin
+                      XNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Xm[P.NF-2:0]};
+                      YNaNRes = {1'b0, {P.NE{1'b1}}, 1'b1, Ym[P.NF-2:0]};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
                   end else begin 
-                      InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                      InvalidRes = {1'b0, {P.NE{1'b1}}, 1'b1, {P.NF-1{1'b0}}};
                   end
                   
-                  OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
-                  UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  OfRes = OfResMax ? {Rs, {P.NE-1{1'b1}}, 1'b0, {P.NF{1'b1}}} : {Rs, {P.NE{1'b1}}, {P.NF{1'b0}}};
+                  UfRes = {Rs, (P.FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
                   NormRes = {Rs, Re, Rf};
               end
               2'h1: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
-                      YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
-                      InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.D_NF]};
+                      YNaNRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.D_NF]};
+                      InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
                   end else begin 
-                      InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.D_LEN{1'b1}}, 1'b0, {P.D_NE{1'b1}}, 1'b1, (P.D_NF-1)'(0)};
                   end
-                  OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                  UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
+                  OfRes = OfResMax ? {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE-1{1'b1}}, 1'b0, {P.D_NF{1'b1}}} : {{P.FLEN-P.D_LEN{1'b1}}, Rs, {P.D_NE{1'b1}}, (P.D_NF)'(0)};
+                  UfRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, (P.D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.D_LEN{1'b1}}, Rs, Re[P.D_NE-1:0], Rf[P.NF-1:P.NF-P.D_NF]};
               end
               2'h0: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
-                      YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
-                      InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.S_NF]};
+                      YNaNRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.S_NF]};
+                      InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
                   end else begin 
-                      InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.S_LEN{1'b1}}, 1'b0, {P.S_NE{1'b1}}, 1'b1, (P.S_NF-1)'(0)};
                   end
                   
-                  OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                  UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
+                  OfRes = OfResMax ? {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE-1{1'b1}}, 1'b0, {P.S_NF{1'b1}}} : {{P.FLEN-P.S_LEN{1'b1}}, Rs, {P.S_NE{1'b1}}, (P.S_NF)'(0)};
+                  UfRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, (P.S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.S_LEN{1'b1}}, Rs, Re[P.S_NE-1:0], Rf[P.NF-1:P.NF-P.S_NF]};
               end
               2'h2: begin  
-                  if(`IEEE754) begin
-                      XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
-                      YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
-                      InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                  if(P.IEEE754) begin
+                      XNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Xm[P.NF-2:P.NF-P.H_NF]};
+                      YNaNRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, Ym[P.NF-2:P.NF-P.H_NF]};
+                      InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
                   end else begin 
-                      InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                      InvalidRes = {{P.FLEN-P.H_LEN{1'b1}}, 1'b0, {P.H_NE{1'b1}}, 1'b1, (P.H_NF-1)'(0)};
                   end
                   
-                  OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)};      
+                  OfRes = OfResMax ? {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE-1{1'b1}}, 1'b0, {P.H_NF{1'b1}}} : {{P.FLEN-P.H_LEN{1'b1}}, Rs, {P.H_NE{1'b1}}, (P.H_NF)'(0)};      
                 // zero is exact if dividing by infinity so don't add 1
-                  UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
-                  NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
+                  UfRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, (P.H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
+                  NormRes = {{P.FLEN-P.H_LEN{1'b1}}, Rs, Re[P.H_NE-1:0], Rf[P.NF-1:P.NF-P.H_NF]};
               end
           endcase
   end
@@ -217,13 +216,13 @@ module divremsqrtspecialcase(
   //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
   //      - dont set to zero if fp input is zero but not using the fp input
   //      - dont set to zero if int input is zero but not using the int input
-  assign KillRes = FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
+  assign KillRes = FullRe[P.NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
   
   // calculate if the overflow result should be selected
   assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
   
   // output infinity with result sign if divide by zero
-  if(`IEEE754)
+  if(P.IEEE754)
     always_comb
       if(XNaN)                    PostProcRes = XNaNRes;
       else if(YNaN)               PostProcRes = YNaNRes;
diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv
index 82e68beda..fc04c0d31 100644
--- a/src/fpu/divremsqrt/drsu.sv
+++ b/src/fpu/divremsqrt/drsu.sv
@@ -26,15 +26,14 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
-`include "wally-config.vh"
 
-module drsu(
+module drsu import cvw::*;  #(parameter cvw_t P) (
   input  logic                clk, 
   input  logic                reset, 
-  input  logic [`FMTBITS-1:0] FmtE,
+  input  logic [P.FMTBITS-1:0] FmtE,
   input  logic                XsE, YsE,
-  input  logic [`NF:0]        XmE, YmE,
-  input  logic [`NE-1:0]      XeE, YeE,
+  input  logic [P.NF:0]        XmE, YmE,
+  input  logic [P.NE-1:0]      XeE, YeE,
   input  logic                XInfE, YInfE, 
   input  logic                XZeroE, YZeroE, 
   input  logic                XNaNE, YNaNE, 
@@ -43,41 +42,41 @@ module drsu(
   input  logic                StallM,
   input  logic                FlushE,
   input  logic                SqrtE, SqrtM,
-  input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+  input  logic [P.XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B
   input  logic [2:0]          Funct3E, Funct3M,
   input  logic                IntDivE, W64E,
   input  logic [2:0]          Frm,
   input  logic [2:0]          OpCtrl,
   input  logic [1:0]          PostProcSel,
   output logic                FDivBusyE, IFDivStartE, FDivDoneE,
-  output logic [`FLEN-1:0]    FResM,
-  output logic [`XLEN-1:0]    FIntDivResultM,
+  output logic [P.FLEN-1:0]    FResM,
+  output logic [P.XLEN-1:0]    FIntDivResultM,
   output logic [4:0]          FlgM
 );
 
   // Floating-point division and square root module, with optional integer division and remainder
   // Computes X/Y, sqrt(X), A/B, or A%B
 
-  logic [`DIVb+3:0]           WS, WC;                       // Partial remainder components
-  logic [`DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
-  logic [`DIVb+3:0]           D;                            // Iterator Divisor
-  logic [`DIVb:0]             FirstU, FirstUM;              // Intermediate result values
-  logic [`DIVb+1:0]           FirstC;                       // Step tracker
+  logic [P.DIVb+3:0]           WS, WC;                       // Partial remainder components
+  logic [P.DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
+  logic [P.DIVb+3:0]           D;                            // Iterator Divisor
+  logic [P.DIVb:0]             FirstU, FirstUM;              // Intermediate result values
+  logic [P.DIVb+1:0]           FirstC;                       // Step tracker
   logic                       Firstun;                      // Quotient selection
   logic                       WZeroE;                       // Early termination flag
-  logic [`DURLEN-1:0]         CyclesE;                      // FSM cycles
+  logic [P.DURLEN-1:0]         CyclesE;                      // FSM cycles
   logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
   logic                       DivStartE;                    // Enable signal for flops during stall
                                                             
   // Integer div/rem signals                                
   logic                       BZeroM;                       // Denominator is zero
   logic                       IntDivM;                      // Integer operation
-  logic [`DIVBLEN:0]          nM, mM;                       // Shift amounts
+  logic [P.DIVBLEN:0]          nM, mM;                       // Shift amounts
   logic                       NegQuotM, ALTBM, AsM, W64M;   // Special handling for postprocessor
-  logic [`XLEN-1:0]           AM;                           // Original Numerator for postprocessor
+  logic [P.XLEN-1:0]           AM;                           // Original Numerator for postprocessor
   logic                       ISpecialCaseE;                // Integer div/remainder special cases
-  logic [`DIVb:0]             QmM;
-  logic [`NE+1:0]             QeM;
+  logic [P.DIVb:0]             QmM;
+  logic [P.NE+1:0]             QeM;
   logic                       DivStickyM;
 
   divremsqrt divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, 

From f87e1232a4aac46f849b1f7ec11c2c899681c4a4 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Tue, 27 Jun 2023 20:05:30 -0700
Subject: [PATCH 17/40] added custom test support

---
 testbench/testbench-fp.sv | 3 +--
 testbench/tests-fp.vh     | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index 247953360..ddeed27ce 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -770,7 +770,7 @@ module testbenchfp;
                    .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
    end
    
-   if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt
+   if (TEST === "div" | TEST === "sqrt" | TEST === "all"| TEST === "custom") begin: fdivsqrt
       fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
 			     .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
 			     .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
@@ -995,7 +995,6 @@ module testbenchfp;
 	 $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]);
 	 $display("Error in %s", Tests[TestNum]);
 	 $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
-	 $stop;
       end
       
       // TestFloat sets the result to all 1's when there is an invalid result, however in 
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index 3c7e0cbf9..e32da9d29 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -582,7 +582,7 @@ string f128fma[] = '{
 };
 
 string custom[] = '{
-	"f16_div_rne.tv"
+	"f16_sqrt_rne.tv"
 };
 
 

From 1b33913e3831df7e530f0108958ce385d2d25f12 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Wed, 28 Jun 2023 08:28:20 -0700
Subject: [PATCH 18/40] lint fixes

---
 src/fpu/divremsqrt/divremsqrtround.sv | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv
index 3d530b384..c199f25b6 100644
--- a/src/fpu/divremsqrt/divremsqrtround.sv
+++ b/src/fpu/divremsqrt/divremsqrtround.sv
@@ -107,40 +107,40 @@ module divremsqrtround import cvw::*;  #(parameter cvw_t P)  (
       //      |    NF     |1|1|
       //                     ^    ^ if floating point result
       //                     ^ if not an FMA result
-      if (P.XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
+      if (XLENPOS == 1)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
                                                 (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
       //     2: NF > XLEN
-      if (P.XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
+      if (XLENPOS == 2)assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
 
   end else if (P.FPSIZES == 2) begin
       // XLEN is either 64 or 32
       // so half and single are always smaller then XLEN
 
       // 1: XLEN > NF   > NF1
-      if (P.XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&~OutFmt) |
+      if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&~OutFmt) |
                                                 (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
                                                 (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
       // 2: NF   > XLEN > NF1
-      if (P.XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&~OutFmt) | 
+      if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&~OutFmt) | 
                                                 (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt)) |
                                                 (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
       // 3: NF   > NF1  > XLEN
-      if (P.XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt)) |
+      if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~OutFmt)) |
                                                 (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
 
   end else if (P.FPSIZES == 3) begin
       // 1: XLEN > NF   > NF1
-      if (P.XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) |
+      if (XLENPOS == 1) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) |
                                                 (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&~(OutFmt==P.FMT)) |
                                                 (|Mf[P.CORRSHIFTSZ-P.NF-2:P.CORRSHIFTSZ-P.XLEN-1]) |
                                                 (|Mf[P.CORRSHIFTSZ-P.XLEN-2:0]);
       // 2: NF   > XLEN > NF1
-      if (P.XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) |
+      if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.NF1-1]&(OutFmt==P.FMT1)) |
                                                 (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.XLEN-1]&~(OutFmt==P.FMT)) | 
                                                 (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT))) |
                                                 (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
       // 3: NF   > NF1  > XLEN
-      if (P.XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&(OutFmt==P.FMT1)) |
+      if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.NF2-2:P.CORRSHIFTSZ-P.XLEN-1]&(OutFmt==P.FMT1)) |
                                                 (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.NF1-1]&((OutFmt==P.FMT1))) |
                                                 (|Mf[P.CORRSHIFTSZ-P.NF1-2:P.CORRSHIFTSZ-P.NF-1]&(~(OutFmt==P.FMT))) |
                                                 (|Mf[P.CORRSHIFTSZ-P.NF-2:0]);
@@ -148,14 +148,14 @@ module divremsqrtround import cvw::*;  #(parameter cvw_t P)  (
   end else if (P.FPSIZES == 4) begin
       // Quad precision will always be greater than XLEN
       // 2: NF   > XLEN > NF1
-      if (P.XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) |
+      if (XLENPOS == 2) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) |
                                                 (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) | 
                                                 (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&~(OutFmt==P.Q_FMT)) | 
                                                 (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT))) |
                                                 (|Mf[P.CORRSHIFTSZ-P.Q_NF-2:0]);
       // 3: NF   > NF1  > XLEN
       // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
-      if (P.XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) |
+      if (XLENPOS == 3) assign NormSticky = (|Mf[P.CORRSHIFTSZ-P.H_NF-2:P.CORRSHIFTSZ-P.S_NF-1]&(OutFmt==P.H_FMT)) |
                                                 (|Mf[P.CORRSHIFTSZ-P.S_NF-2:P.CORRSHIFTSZ-P.XLEN-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
                                                 (|Mf[P.CORRSHIFTSZ-P.XLEN-2:P.CORRSHIFTSZ-P.D_NF-1]&((OutFmt==P.S_FMT)|(OutFmt==P.H_FMT))) |
                                                 (|Mf[P.CORRSHIFTSZ-P.D_NF-2:P.CORRSHIFTSZ-P.Q_NF-1]&(~(OutFmt==P.Q_FMT))) |

From 24576f5b0c8dd8142c3dc4c31a301b7a560c0143 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Wed, 28 Jun 2023 08:32:13 -0700
Subject: [PATCH 19/40] more lints

---
 src/fpu/divremsqrt/divremsqrt.sv            |  8 ++++----
 src/fpu/divremsqrt/divremsqrtpostprocess.sv | 14 +++++++-------
 src/fpu/divremsqrt/drsu.sv                  |  4 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/fpu/divremsqrt/divremsqrt.sv b/src/fpu/divremsqrt/divremsqrt.sv
index e45e383f9..bf5b1d782 100644
--- a/src/fpu/divremsqrt/divremsqrt.sv
+++ b/src/fpu/divremsqrt/divremsqrt.sv
@@ -73,7 +73,7 @@
   logic [P.XLEN-1:0]           AM;                           // Original Numerator for postprocessor
   logic                       ISpecialCaseE;                // Integer div/remainder special cases
 
-  fdivsqrtpreproc fdivsqrtpreproc(                          // Preprocessor
+  fdivsqrtpreproc #(P) fdivsqrtpreproc(                          // Preprocessor
     .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
     .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
     // Int-specific 
@@ -81,18 +81,18 @@
     .BZeroM, .nM, .mM, .AM, 
     .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
 
-  fdivsqrtfsm fdivsqrtfsm(                                  // FSM
+  fdivsqrtfsm #(P) fdivsqrtfsm(                                  // FSM
     .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
     .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, 
     .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
     // Int-specific 
     .IDivStartE, .ISpecialCaseE, .IntDivE);
 
-  fdivsqrtiter fdivsqrtiter(                                // CSA Iterator
+  fdivsqrtiter #(P) fdivsqrtiter(                                // CSA Iterator
     .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, 
     .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
 
-  fdivsqrtpostproc fdivsqrtpostproc(                        // Postprocessor
+  fdivsqrtpostproc #(P) fdivsqrtpostproc(                        // Postprocessor
     .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, 
     .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, 
     .QmM, .WZeroE, .DivStickyM, 
diff --git a/src/fpu/divremsqrt/divremsqrtpostprocess.sv b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
index 0307121c0..02981ea2d 100644
--- a/src/fpu/divremsqrt/divremsqrtpostprocess.sv
+++ b/src/fpu/divremsqrt/divremsqrtpostprocess.sv
@@ -122,16 +122,16 @@ module divremsqrtpostprocess import cvw::*;  #(parameter cvw_t P)  (
   /*cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,  
       .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);*/
 
-  divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
+  divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
 
   assign ShiftAmt = DivShiftAmt;
   assign ShiftIn = DivShiftIn;
   
   // main normalization shift
-  normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
+  normshift #(P) normshift (.ShiftIn, .ShiftAmt, .Shifted);
 
   // correct for LZA/divsqrt error
-  divremsqrtshiftcorrection shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .Shifted, .Mf);
+  divremsqrtshiftcorrection #(P) shiftcorrection(.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .Shifted, .Mf);
 
   ///////////////////////////////////////////////////////////////////////////////
   // Rounding
@@ -144,9 +144,9 @@ module divremsqrtpostprocess import cvw::*;  #(parameter cvw_t P)  (
   // round to nearest max magnitude
 
   // calulate result sign used in rounding unit
-  divremsqrtroundsign roundsign( .DivOp, .Sqrt, .Xs, .Ys, .Ms);
+  divremsqrtroundsign #(P) roundsign( .DivOp, .Sqrt, .Xs, .Ys, .Ms);
 
-  divremsqrtround round(.OutFmt, .Frm, .Plus1, .Qe,
+  divremsqrtround #(P) round(.OutFmt, .Frm, .Plus1, .Qe,
       .Ms, .Mf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
 
   ///////////////////////////////////////////////////////////////////////////////
@@ -161,7 +161,7 @@ module divremsqrtpostprocess import cvw::*;  #(parameter cvw_t P)  (
   // Flags
   ///////////////////////////////////////////////////////////////////////////////
 
-  divremsqrtflags flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero, 
+  divremsqrtflags #(P) flags(.XSNaN, .YSNaN, .XInf, .YInf, .InfIn, .XZero, .YZero, 
               .Xs, .OutFmt, .Sqrt,
               .NaNIn, .Round, .DivByZero,
               .Guard, .Sticky, .UfPlus1,.DivOp, .FullRe, .Plus1,
@@ -173,7 +173,7 @@ module divremsqrtpostprocess import cvw::*;  #(parameter cvw_t P)  (
 
   //negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
 
-  divremsqrtspecialcase specialcase(.Xs, .Xm, .Ym, .XZero, 
+  divremsqrtspecialcase #(P) specialcase(.Xs, .Xm, .Ym, .XZero, 
       .Frm, .OutFmt, .XNaN, .YNaN,  
       .NaNIn, .Plus1, .Invalid, .Overflow, .InfIn,
       .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .Rs, .Re, .Rf, .PostProcRes );
diff --git a/src/fpu/divremsqrt/drsu.sv b/src/fpu/divremsqrt/drsu.sv
index fc04c0d31..a9fb58860 100644
--- a/src/fpu/divremsqrt/drsu.sv
+++ b/src/fpu/divremsqrt/drsu.sv
@@ -79,7 +79,7 @@ module drsu import cvw::*;  #(parameter cvw_t P) (
   logic [P.NE+1:0]             QeM;
   logic                       DivStickyM;
 
-  divremsqrt divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, 
+  divremsqrt #(P) divremsqrt(.clk, .reset, .XsE, .FmtE, .XmE, .YmE, 
             .XeE, .YeE, .SqrtE, .SqrtM,
                     .XInfE, .YInfE, .XZeroE, .YZeroE, 
             .XNaNE, .YNaNE, 
@@ -89,7 +89,7 @@ module drsu import cvw::*;  #(parameter cvw_t P) (
                     .FlushE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3M,
                     .Funct3E, .IntDivE, .FIntDivResultM,
                     .FDivDoneE, .IFDivStartE);
-  divremsqrtpostprocess divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl,
+  divremsqrtpostprocess #(P) divremsqrtpostprocess(.Xs(XsE), .Ys(YsE), .Xm(XmE), .Ym(YmE), .Frm(Frm), .Fmt(FmtE), .OpCtrl,
     .XZero(XZeroE), .YZero(YZeroE), .XInf(XInfE), .YInf(YInfE), .XNaN(XNaNE), .YNaN(YNaNE), .XSNaN(XSNaNE), 
     .YSNaN(YSNaNE), .PostProcSel,.DivSticky(DivStickyM), .DivQe(QeM), .DivQm(QmM), .PostProcRes(FResM), .PostProcFlg(FlgM));
 endmodule

From 8094c12c80f0b79478743ac08f274f51c5500f6d Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Wed, 28 Jun 2023 08:37:12 -0700
Subject: [PATCH 20/40] fixed port mismatch and conditional postprocessing

---
 testbench/testbench-fp.sv | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index ddeed27ce..a12fbd883 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -651,7 +651,7 @@ module testbenchfp;
       end
       if (TEST === "divremsqrttest") begin // if unified div sqrt is being tested
         Tests = {Tests, f16sqrt};
-        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         for(int i = 0; i<5; i++) begin
             Unit = {Unit, `DIVUNIT};
@@ -660,7 +660,7 @@ module testbenchfp;
       end
       if (TEST === "custom") begin // if unified div sqrt is being tested
         Tests = {Tests, custom};
-        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `DIVUNIT};
         Fmt = {Fmt, 2'b10};
@@ -746,7 +746,7 @@ module testbenchfp;
 		   .ASticky); 
    end
    
-   postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+   /*postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
 				.OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
 				.Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
 				.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
@@ -755,7 +755,7 @@ module testbenchfp;
 				.XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
 				.FmaASticky(ASticky), .FmaSe(Se),
 				.FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
-				.PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
+				.PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));*/
    
    if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt
       fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
@@ -782,6 +782,33 @@ module testbenchfp;
 			     .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
 			     .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
+   if (TEST === "divremsqrt" | TEST === "divremsqrttest") begin: divremsqrt
+    drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
+		       .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
+           .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
+           .PostProcSel(UnitVal[1:0]),
+		       .XNaNE(XNaN), .YNaNE(YNaN), 
+             .OpCtrl(OpCtrlVal),
+             .XSNaNE(XSNaN), .YSNaNE(YSNaN),
+           .Frm(FrmVal), 
+                       .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
+                       .StallM(1'b0), .FDivBusyE,
+                       .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
+                       .Funct3E(Funct3E), .IntDivE(1'b0), 
+                       .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
+  end
+  else begin: postprocess
+    postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+                .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
+                .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
+                .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
+                .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
+                .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
+                .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
+                .FmaASticky(ASticky), .FmaSe(Se),
+                .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+                .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
+  end
 
    assign CmpFlg[3:0] = 0;
 

From 654f6dee3f0c51efa71ec9dddf31e736d96733e2 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Thu, 29 Jun 2023 16:28:01 -0700
Subject: [PATCH 21/40] debug

---
 sim/wave-fpu copy.do      | 29 +++++++++++++++++++++++++++++
 sim/wave-fpu.do           | 11 +----------
 testbench/testbench-fp.sv |  6 +++---
 testbench/tests-fp.vh     |  2 +-
 4 files changed, 34 insertions(+), 14 deletions(-)
 create mode 100644 sim/wave-fpu copy.do

diff --git a/sim/wave-fpu copy.do b/sim/wave-fpu copy.do
new file mode 100644
index 000000000..a1cfa8731
--- /dev/null
+++ b/sim/wave-fpu copy.do	
@@ -0,0 +1,29 @@
+
+add wave -noupdate /testbenchfp/clk
+add wave -noupdate -radix decimal /testbenchfp/VectorNum
+add wave -noupdate /testbenchfp/FrmNum
+add wave -noupdate /testbenchfp/X
+add wave -noupdate /testbenchfp/Y
+add wave -noupdate /testbenchfp/Z
+add wave -noupdate /testbenchfp/Res
+add wave -noupdate /testbenchfp/Ans
+add wave -noupdate /testbenchfp/DivStart
+add wave -noupdate /testbenchfp/FDivBusyE
+add wave -noupdate /testbenchfp/CheckNow
+add wave -noupdate /testbenchfp/DivDone
+add wave -noupdate /testbenchfp/ResMatch
+add wave -noupdate /testbenchfp/FlagMatch
+add wave -noupdate /testbenchfp/CheckNow
+add wave -noupdate /testbenchfp/NaNGood
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
+add wave -group {Testbench} -noupdate /testbenchfp/*
+add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/sim/wave-fpu.do b/sim/wave-fpu.do
index a1cfa8731..85eccac27 100644
--- a/sim/wave-fpu.do
+++ b/sim/wave-fpu.do
@@ -15,15 +15,6 @@ add wave -noupdate /testbenchfp/ResMatch
 add wave -noupdate /testbenchfp/FlagMatch
 add wave -noupdate /testbenchfp/CheckNow
 add wave -noupdate /testbenchfp/NaNGood
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
+
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index a12fbd883..a485a9f5d 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -658,9 +658,9 @@ module testbenchfp;
             Fmt = {Fmt, 2'b10};
         end
       end
-      if (TEST === "custom") begin // if unified div sqrt is being tested
+      if (TEST === "customdiv") begin // if unified div sqrt is being tested
         Tests = {Tests, custom};
-        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `DIVUNIT};
         Fmt = {Fmt, 2'b10};
@@ -782,7 +782,7 @@ module testbenchfp;
 			     .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
 			     .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
-   if (TEST === "divremsqrt" | TEST === "divremsqrttest") begin: divremsqrt
+   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv") begin: divremsqrt
     drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
 		       .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index e32da9d29..3c7e0cbf9 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -582,7 +582,7 @@ string f128fma[] = '{
 };
 
 string custom[] = '{
-	"f16_sqrt_rne.tv"
+	"f16_div_rne.tv"
 };
 
 

From 8d898b16c7d97b924294d6fb010c82b70828a4ad Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Thu, 6 Jul 2023 19:48:25 -0700
Subject: [PATCH 22/40] fixed sticky bit logic bug

---
 src/fpu/divremsqrt/divremsqrtround.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fpu/divremsqrt/divremsqrtround.sv b/src/fpu/divremsqrt/divremsqrtround.sv
index c199f25b6..2911bd920 100644
--- a/src/fpu/divremsqrt/divremsqrtround.sv
+++ b/src/fpu/divremsqrt/divremsqrtround.sv
@@ -167,7 +167,7 @@ module divremsqrtround import cvw::*;  #(parameter cvw_t P)  (
 
   // only add the Addend sticky if doing an FMA opperation
   //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
-  assign Sticky = DivSticky&DivOp;
+  assign Sticky = DivSticky&DivOp | NormSticky;
   
 
 

From 6851233303a1103a9b0e4675568648c3510f961c Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Thu, 6 Jul 2023 19:49:13 -0700
Subject: [PATCH 23/40] extraneous files

---
 setup.sh                  |  3 ++-
 testbench/testbench-fp.sv | 15 +++++++++++++--
 testbench/tests-fp.vh     |  8 ++++++--
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/setup.sh b/setup.sh
index 95026beb1..a92cc84a2 100755
--- a/setup.sh
+++ b/setup.sh
@@ -16,7 +16,8 @@ echo \$WALLY set to ${WALLY}
 # Must edit these based on your local environment.  Ask your sysadmin.
 export MGLS_LICENSE_FILE=27002@zircon.eng.hmc.edu                   # Change this to your Siemens license server
 export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu                # Change this to your Synopsys license server
-export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_2/questasim        # Change this for your path to Questa, excluding bin
+#export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_2/questasim        # Change this for your path to Questa, excluding bin
+export QUESTA_HOME=/cad/mentor/questa_sim-2021.4_4/questasim        # Change this for your path to Questa, excluding bin
 #export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_3/questasim        # Change this for your path to Questa, excluding bin
 export SNPS_HOME=/cad/synopsys/SYN                                  # Change this for your path to Design Compiler, excluding bin
 
diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index a485a9f5d..416b7239a 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -659,7 +659,14 @@ module testbenchfp;
         end
       end
       if (TEST === "customdiv") begin // if unified div sqrt is being tested
-        Tests = {Tests, custom};
+        Tests = {Tests, customdiv};
+        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `DIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
+      if (TEST === "customdivcorrect") begin // if unified div sqrt is being tested
+        Tests = {Tests, customdivcorrect};
         OpCtrl = {OpCtrl, `DIV_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `DIVUNIT};
@@ -770,7 +777,7 @@ module testbenchfp;
                    .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
    end
    
-   if (TEST === "div" | TEST === "sqrt" | TEST === "all"| TEST === "custom") begin: fdivsqrt
+   if (TEST === "div" | TEST === "sqrt" | TEST === "all"| TEST === "custom" | TEST ==="customdivcorrect") begin: fdivsqrt
       fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
 			     .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
 			     .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
@@ -1018,6 +1025,10 @@ module testbenchfp;
 
       assign CheckNow = (DivDone | ~divsqrtop) & (UnitVal !== `CVTINTUNIT) & (UnitVal !== `CMPUNIT);
       if (~(ResMatch & FlagMatch) & CheckNow) begin
+            integer fd;
+            fd = $fopen("fperr.out","a");
+            $fwrite(fd, "%h_%h_%h_%2h\n",X[15:0],Y[15:0],Ans[15:0],AnsFlg);
+            $fclose(fd);
 	 errors += 1;
 	 $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]);
 	 $display("Error in %s", Tests[TestNum]);
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index 3c7e0cbf9..500a65d4b 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -581,8 +581,12 @@ string f128fma[] = '{
 	"f128_mulAdd_rnm.tv"
 };
 
-string custom[] = '{
-	"f16_div_rne.tv"
+string customdiv[] = '{
+	"f16_custom.tv"
+};
+
+string customdivcorrect[] = '{
+	"f16_custom.tv"
 };
 
 

From 23bb96f8573dadb96b5cfb3e568e3704cc2e2d61 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Thu, 6 Jul 2023 21:23:46 -0700
Subject: [PATCH 24/40] divremsqrt now includes f64

---
 testbench/testbench-fp.sv | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index 48fe3ec2c..a0993b6da 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -428,6 +428,15 @@ module testbenchfp;
                Fmt = {Fmt, 2'b01};
             end
 	 end
+   if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
+      Tests = {Tests, f64div, f64sqrt};
+      OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
+      WriteInt = {WriteInt, 1'b0, 1'b0};
+      for(int i = 0; i<10; i++) begin
+         Unit = {Unit, `DIVUNIT};
+         Fmt = {Fmt, 2'b01};
+      end
+   end
       end
       if (P.F_SUPPORTED & (TEST_SIZE == "SP" | TEST_SIZE == "all")) begin // if single precision being supported
 	 if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
@@ -651,12 +660,12 @@ module testbenchfp;
         end
       end
       if (TEST === "divremsqrttest") begin // if unified div sqrt is being tested
-        Tests = {Tests, f16sqrt};
+        Tests = {Tests, f128sqrt};
         OpCtrl = {OpCtrl, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         for(int i = 0; i<5; i++) begin
             Unit = {Unit, `DIVUNIT};
-            Fmt = {Fmt, 2'b10};
+            Fmt = {Fmt, 2'b11};
         end
       end
       if (TEST === "customdiv") begin // if unified div sqrt is being tested
@@ -1030,7 +1039,7 @@ module testbenchfp;
       if (~(ResMatch & FlagMatch) & CheckNow) begin
             integer fd;
             fd = $fopen("fperr.out","a");
-            $fwrite(fd, "%h_%h_%h_%2h\n",X[15:0],Y[15:0],Ans[15:0],AnsFlg);
+            $fwrite(fd, "%h_%h_%h_%2h\n",X,Y,Ans,AnsFlg);
             $fclose(fd);
 	 errors += 1;
 	 $display("\nError in %s", Tests[TestNum]);

From 9343c2296ee2961928413c538231e68426c2854e Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sat, 8 Jul 2023 20:42:16 -0700
Subject: [PATCH 25/40] testbench now compiles with basic infrastructure to do
 int64rem test on drsu

---
 testbench/testbench-fp.sv                     | 483 ++++++++++--------
 testbench/tests-fp.vh                         |   8 +
 .../extract_arch_vectors.py                   |  24 +-
 .../extract_arch_vectors_v2.py                | 294 +++++++++++
 4 files changed, 572 insertions(+), 237 deletions(-)
 create mode 100755 tests/fp/combined_IF_vectors/extract_arch_vectors_v2.py

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index a0993b6da..dd0774b3b 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -56,7 +56,8 @@ module testbenchfp;
    logic                        WriteIntVal;                // value of the current WriteInt
    logic [P.FLEN-1:0] 		X, Y, Z;                    // inputs read from TestFloat
    logic [P.FLEN-1:0] 		XPostBox;                   // inputs read from TestFloat
-   logic [P.XLEN-1:0] 		SrcA;                       // integer input
+   logic [P.XLEN-1:0] 		SrcA, SrcB;                       // integer input
+   logic                   W64;                              // is W64 instruction
    logic [P.FLEN-1:0] 		Ans;                        // correct answer from TestFloat
    logic [P.FLEN-1:0] 		Res;                        // result from other units
    logic [4:0] 			AnsFlg;                     // correct flags read from testfloat
@@ -84,6 +85,7 @@ module testbenchfp;
    logic [P.DIVb:0] 		Quot;
    logic                        CvtResSubnormUfE;
    logic                        DivStart;
+   logic                        IDivStart;
    logic 			FDivBusyE;
    logic 			OldFDivBusyE;
    logic                        reset = 1'b0;
@@ -118,11 +120,13 @@ module testbenchfp;
    logic [P.NE+1:0] 		QeM;
    logic [P.DIVb:0] 		QmM;
    logic [P.XLEN-1:0] 		FIntDivResultM;
+   logic  		IntDivE;
    logic 			ResMatch;                   // Check if result match
    logic 			FlagMatch;                  // Check if IEEE flags match
    logic 			CheckNow;                   // Final check
    logic 			FMAop;                      // Is this a FMA operation?   
    
+	 flop #(3) funct3reg(.clk, .d(Funct3E), .q(Funct3M));
    ///////////////////////////////////////////////////////////////////////////////////////////////
 
    //     ||||||||| |||||||| ||||||| |||||||||   ||||||| |||||||| |||
@@ -149,7 +153,7 @@ module testbenchfp;
       $display("This simulation for TEST is %s", TEST);
       $display("This simulation for TEST is of the operand size of %s", TEST_SIZE);      
       if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported
-	 if (TEST === "cvtint" | TEST === "all") begin  // if testing integer conversion
+   if (TEST === "cvtint" | TEST === "all") begin  // if testing integer conversion
             // add the 128-bit cvtint tests to the to-be-tested list
             Tests = {Tests, f128rv32cvtint};
             // add the op-codes for these tests to the op-code list
@@ -167,13 +171,13 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                // add what unit is used and the fmt to their lists (one for each test)
                for(int i = 0; i<20; i++) begin
-		  Unit = {Unit, `CVTINTUNIT};
-		  Fmt = {Fmt, 2'b11};
+      Unit = {Unit, `CVTINTUNIT};
+      Fmt = {Fmt, 2'b11};
                end
             end
-	 end 
-	 // if the floating-point conversions are being tested          
-	 if (TEST === "cvtfp" | TEST === "all") begin  
+   end 
+   // if the floating-point conversions are being tested          
+   if (TEST === "cvtfp" | TEST === "all") begin  
             if (P.D_SUPPORTED) begin // if double precision is supported
                // add the 128 <-> 64 bit conversions to the to-be-tested list
                Tests = {Tests, f128f64cvt};
@@ -182,12 +186,12 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b11};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b11};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b01};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b01};
                end
             end
             if (P.F_SUPPORTED) begin // if single precision is supported
@@ -198,12 +202,12 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b11};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b11};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b00};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b00};
                end
             end
             if (P.ZFH_SUPPORTED) begin // if half precision is supported
@@ -214,16 +218,16 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b11};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b11};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b10};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b10};
                end
             end
-	 end
-	 if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested
+   end
+   if (TEST === "cmp" | TEST === "all") begin// if comparisons are being tested
             // add the compare tests/op-ctrls/unit/fmt
             Tests = {Tests, f128cmp};
             OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
@@ -232,8 +236,8 @@ module testbenchfp;
                Unit = {Unit, `CMPUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "add" | TEST === "all") begin // if addition is being tested
+   end
+   if (TEST === "add" | TEST === "all") begin // if addition is being tested
             // add the addition tests/op-ctrls/unit/fmt
             Tests = {Tests, f128add};
             OpCtrl = {OpCtrl, `ADD_OPCTRL};
@@ -242,8 +246,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
+   end
+   if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
             // add the subtraction tests/op-ctrls/unit/fmt
             Tests = {Tests, f128sub};
             OpCtrl = {OpCtrl, `SUB_OPCTRL};
@@ -252,8 +256,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+   end
+   if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
             // add the multiply tests/op-ctrls/unit/fmt
             Tests = {Tests, f128mul};
             OpCtrl = {OpCtrl, `MUL_OPCTRL};
@@ -262,8 +266,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "div" | TEST === "all") begin // if division is being tested
+   end
+   if (TEST === "div" | TEST === "all") begin // if division is being tested
             // add the divide tests/op-ctrls/unit/fmt
             Tests = {Tests, f128div};
             OpCtrl = {OpCtrl, `DIV_OPCTRL};
@@ -272,8 +276,8 @@ module testbenchfp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
+   end
+   if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
             // add the square-root tests/op-ctrls/unit/fmt
             Tests = {Tests, f128sqrt};
             OpCtrl = {OpCtrl, `SQRT_OPCTRL};
@@ -282,8 +286,8 @@ module testbenchfp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
-	 if (TEST === "fma" | TEST === "all") begin  // if fused-mutliply-add is being tested
+   end
+   if (TEST === "fma" | TEST === "all") begin  // if fused-mutliply-add is being tested
             Tests = {Tests, f128fma};
             OpCtrl = {OpCtrl, `FMA_OPCTRL};
             WriteInt = {WriteInt, 1'b0};
@@ -291,7 +295,7 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b11};
             end
-	 end
+   end
    if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
       Tests = {Tests, f128div, f128sqrt};
       OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
@@ -303,7 +307,7 @@ module testbenchfp;
    end
       end
       if (P.D_SUPPORTED & (TEST_SIZE == "DP" | TEST_SIZE == "all")) begin // if double precision is supported
-	 if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested
+   if (TEST === "cvtint" | TEST === "all") begin // if integer conversion is being tested
             Tests = {Tests, f64rv32cvtint};
             // add the op-codes for these tests to the op-code list
             OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
@@ -320,12 +324,12 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                // add what unit is used and the fmt to their lists (one for each test)
                for(int i = 0; i<20; i++) begin
-		  Unit = {Unit, `CVTINTUNIT};
-		  Fmt = {Fmt, 2'b01};
+      Unit = {Unit, `CVTINTUNIT};
+      Fmt = {Fmt, 2'b01};
                end
             end
-	 end
-	 if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested
+   end
+   if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested
             if (P.F_SUPPORTED) begin // if single precision is supported
                // add the 64 <-> 32 bit conversions to the to-be-tested list
                Tests = {Tests, f64f32cvt};
@@ -334,12 +338,12 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b01};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b01};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b00};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b00};
                end
             end
             if (P.ZFH_SUPPORTED) begin // if half precision is supported
@@ -350,16 +354,16 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b01};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b01};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b10};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b10};
                end
             end
-	 end
-	 if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
+   end
+   if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64cmp};
             OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
@@ -368,8 +372,8 @@ module testbenchfp;
                Unit = {Unit, `CMPUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "add" | TEST === "all") begin // if addition is being tested
+   end
+   if (TEST === "add" | TEST === "all") begin // if addition is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64add};
             OpCtrl = {OpCtrl, `ADD_OPCTRL};
@@ -378,8 +382,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
+   end
+   if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64sub};
             OpCtrl = {OpCtrl, `SUB_OPCTRL};
@@ -388,8 +392,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+   end
+   if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64mul};
             OpCtrl = {OpCtrl, `MUL_OPCTRL};
@@ -398,8 +402,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "div" | TEST === "all") begin // if division is being tested
+   end
+   if (TEST === "div" | TEST === "all") begin // if division is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64div};
             OpCtrl = {OpCtrl, `DIV_OPCTRL};
@@ -408,8 +412,8 @@ module testbenchfp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
+   end
+   if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f64sqrt};
             OpCtrl = {OpCtrl, `SQRT_OPCTRL};
@@ -418,8 +422,8 @@ module testbenchfp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
-	 if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
+   end
+   if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
             Tests = {Tests, f64fma};
             OpCtrl = {OpCtrl, `FMA_OPCTRL};
             WriteInt = {WriteInt, 1'b0};
@@ -427,7 +431,7 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b01};
             end
-	 end
+   end
    if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
       Tests = {Tests, f64div, f64sqrt};
       OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
@@ -439,7 +443,7 @@ module testbenchfp;
    end
       end
       if (P.F_SUPPORTED & (TEST_SIZE == "SP" | TEST_SIZE == "all")) begin // if single precision being supported
-	 if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
+   if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
             Tests = {Tests, f32rv32cvtint};
             // add the op-codes for these tests to the op-code list
             OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
@@ -456,12 +460,12 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                // add what unit is used and the fmt to their lists (one for each test)
                for(int i = 0; i<20; i++) begin
-		  Unit = {Unit, `CVTINTUNIT};
-		  Fmt = {Fmt, 2'b00};
+      Unit = {Unit, `CVTINTUNIT};
+      Fmt = {Fmt, 2'b00};
                end
             end
-	 end
-	 if (TEST === "cvtfp" | TEST === "all") begin  // if floating point conversion is being tested
+   end
+   if (TEST === "cvtfp" | TEST === "all") begin  // if floating point conversion is being tested
             if (P.ZFH_SUPPORTED) begin 
                // add the 32 <-> 16 bit conversions to the to-be-tested list
                Tests = {Tests, f32f16cvt};
@@ -470,16 +474,16 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0};
                // add the unit being tested and fmt (input format)
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b00};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b00};
                end
                for(int i = 0; i<5; i++) begin
-		  Unit = {Unit, `CVTFPUNIT};
-		  Fmt = {Fmt, 2'b10};
+      Unit = {Unit, `CVTFPUNIT};
+      Fmt = {Fmt, 2'b10};
                end
             end
-	 end
-	 if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested
+   end
+   if (TEST === "cmp" | TEST === "all") begin // if comparision is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32cmp};
             OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
@@ -488,8 +492,8 @@ module testbenchfp;
                Unit = {Unit, `CMPUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "add" | TEST === "all") begin // if addition is being tested
+   end
+   if (TEST === "add" | TEST === "all") begin // if addition is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32add};
             OpCtrl = {OpCtrl, `ADD_OPCTRL};
@@ -498,8 +502,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
+   end
+   if (TEST === "sub" | TEST === "all") begin // if subtration is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32sub};
             OpCtrl = {OpCtrl, `SUB_OPCTRL};
@@ -508,8 +512,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "mul" | TEST === "all") begin // if multiply is being tested
+   end
+   if (TEST === "mul" | TEST === "all") begin // if multiply is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32mul};
             OpCtrl = {OpCtrl, `MUL_OPCTRL};
@@ -518,8 +522,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "div" | TEST === "all") begin // if division is being tested
+   end
+   if (TEST === "div" | TEST === "all") begin // if division is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32div};
             OpCtrl = {OpCtrl, `DIV_OPCTRL};
@@ -528,8 +532,8 @@ module testbenchfp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
+   end
+   if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f32sqrt};
             OpCtrl = {OpCtrl, `SQRT_OPCTRL};
@@ -538,8 +542,8 @@ module testbenchfp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
-	 if (TEST === "fma" | TEST === "all")  begin // if fma is being tested
+   end
+   if (TEST === "fma" | TEST === "all")  begin // if fma is being tested
             Tests = {Tests, f32fma};
             OpCtrl = {OpCtrl, `FMA_OPCTRL};
             WriteInt = {WriteInt, 1'b0};
@@ -547,7 +551,7 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b00};
             end
-	 end
+   end
     if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
          Tests = {Tests, f32div, f32sqrt};
          OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
@@ -559,7 +563,7 @@ module testbenchfp;
       end
       end
       if (P.ZFH_SUPPORTED & (TEST_SIZE == "HP" | TEST_SIZE == "all")) begin // if half precision supported
-	 if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested
+   if (TEST === "cvtint" | TEST === "all") begin // if in conversions are being tested
             Tests = {Tests, f16rv32cvtint};
             // add the op-codes for these tests to the op-code list
             OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
@@ -576,12 +580,12 @@ module testbenchfp;
                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
                // add what unit is used and the fmt to their lists (one for each test)
                for(int i = 0; i<20; i++) begin
-		  Unit = {Unit, `CVTINTUNIT};
-		  Fmt = {Fmt, 2'b10};
+      Unit = {Unit, `CVTINTUNIT};
+      Fmt = {Fmt, 2'b10};
                end
             end
-	 end
-	 if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
+   end
+   if (TEST === "cmp" | TEST === "all") begin // if comparisions are being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16cmp};
             OpCtrl = {OpCtrl, `EQ_OPCTRL, `LE_OPCTRL, `LT_OPCTRL};
@@ -590,8 +594,8 @@ module testbenchfp;
                Unit = {Unit, `CMPUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "add" | TEST === "all") begin //  if addition is being tested
+   end
+   if (TEST === "add" | TEST === "all") begin //  if addition is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16add};
             OpCtrl = {OpCtrl, `ADD_OPCTRL};
@@ -600,8 +604,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
+   end
+   if (TEST === "sub" | TEST === "all") begin // if subtraction is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16sub};
             OpCtrl = {OpCtrl, `SUB_OPCTRL};
@@ -610,8 +614,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
+   end
+   if (TEST === "mul" | TEST === "all") begin // if multiplication is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16mul};
             OpCtrl = {OpCtrl, `MUL_OPCTRL};
@@ -620,8 +624,8 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "div" | TEST === "all") begin // if division is being tested
+   end
+   if (TEST === "div" | TEST === "all") begin // if division is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16div};
             OpCtrl = {OpCtrl, `DIV_OPCTRL};
@@ -630,8 +634,8 @@ module testbenchfp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
-	 if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
+   end
+   if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
             // add the correct tests/op-ctrls/unit/fmt to their lists
             Tests = {Tests, f16sqrt};
             OpCtrl = {OpCtrl, `SQRT_OPCTRL};
@@ -640,8 +644,8 @@ module testbenchfp;
                Unit = {Unit, `DIVUNIT};
                Fmt = {Fmt, 2'b10};
             end 
-	 end
-	 if (TEST === "fma" | TEST === "all") begin // if fma is being tested
+   end
+   if (TEST === "fma" | TEST === "all") begin // if fma is being tested
             Tests = {Tests, f16fma};
             OpCtrl = {OpCtrl, `FMA_OPCTRL};
             WriteInt = {WriteInt, 1'b0};
@@ -649,7 +653,7 @@ module testbenchfp;
                Unit = {Unit, `FMAUNIT};
                Fmt = {Fmt, 2'b10};
             end
-	 end
+   end
    if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
         Tests = {Tests, f16div, f16sqrt};
         OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
@@ -660,12 +664,12 @@ module testbenchfp;
         end
       end
       if (TEST === "divremsqrttest") begin // if unified div sqrt is being tested
-        Tests = {Tests, f128sqrt};
+        Tests = {Tests, f64sqrt};
         OpCtrl = {OpCtrl, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         for(int i = 0; i<5; i++) begin
             Unit = {Unit, `DIVUNIT};
-            Fmt = {Fmt, 2'b11};
+            Fmt = {Fmt, 2'b01};
         end
       end
       if (TEST === "customdiv") begin // if unified div sqrt is being tested
@@ -682,11 +686,19 @@ module testbenchfp;
         Unit = {Unit, `DIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
+      if (TEST === "intdiv") begin // if unified div sqrt is being tested
+        Tests = {Tests, intdiv};
+        OpCtrl = {OpCtrl, `INTREM_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `INTDIVUNIT};
+        Fmt = {Fmt, 2'b10};
       end
+      end
+      
       // check if nothing is being tested
       if (Tests.size() == 0) begin
-	 $display("TEST %s not supported in this configuration", TEST);
-	 $stop;
+   $display("TEST %s not supported in this configuration", TEST);
+   $stop;
       end
    end
 
@@ -732,10 +744,10 @@ module testbenchfp;
 
    // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
    readvectors #(P) readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), 
-                                 .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
+                                 .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .SrcB, 
                                  .Xs, .Ys, .Zs, .Unit(UnitVal),
-                                 .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal),
-                                 .Xm, .Ym, .Zm, .DivStart,
+                                 .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), .Funct3E,
+                                 .Xm, .Ym, .Zm, .DivStart, .IDivStart, .IntDivE,
                                  .XNaN, .YNaN, .ZNaN,
                                  .XSNaN, .YSNaN, .ZSNaN, 
                                  .XSubnorm, .ZSubnorm, 
@@ -756,29 +768,29 @@ module testbenchfp;
    // instantiate devices under test
    if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma
       fma #(P) fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), 
-		   .Xe(Xe), .Ye(Ye), .Ze(Ze), 
-		   .Xm(Xm), .Ym(Ym), .Zm(Zm),
-		   .XZero, .YZero, .ZZero, .Ss, .Se,
-		   .OpCtrl(OpCtrlVal), .Sm, .InvA, .SCnt, .As, .Ps,
-		   .ASticky); 
+       .Xe(Xe), .Ye(Ye), .Ze(Ze), 
+       .Xm(Xm), .Ym(Ym), .Zm(Zm),
+       .XZero, .YZero, .ZZero, .Ss, .Se,
+       .OpCtrl(OpCtrlVal), .Sm, .InvA, .SCnt, .As, .Ps,
+       .ASticky); 
    end
    
    /*postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
-				.OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
-				.Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
-				.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
-				.XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
-				.XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
-				.XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
-				.FmaASticky(ASticky), .FmaSe(Se),
-				.FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
-				.PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));*/
+        .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
+        .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
+        .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
+        .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
+        .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
+        .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
+        .FmaASticky(ASticky), .FmaSe(Se),
+        .FmaSm(Sm), .FmaSCnt(SCnt), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+        .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));*/
    
    if (TEST === "cvtfp" | TEST === "cvtint" | TEST === "all") begin : fcvt
-      fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
-		      .XZero(XZero), .OpCtrl(OpCtrlVal), .IntZero,
-		      .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), 
-		      .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE));
+ 			fcvt #(P) fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
+          .XZero(XZero), .OpCtrl(OpCtrlVal), .IntZero,
+          .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), 
+          .ResSubnormUf(CvtResSubnormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE));
    end
 
    if (TEST === "cmp" | TEST === "all") begin: fcmp
@@ -789,29 +801,29 @@ module testbenchfp;
    
    if (TEST === "div" | TEST === "sqrt" | TEST === "all"| TEST === "custom" | TEST ==="customdivcorrect") begin: fdivsqrt
       fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
-			     .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
-			     .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
-			     .XNaNE(XNaN), .YNaNE(YNaN), 
-			     .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
-			     .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .QeM(DivCalcExp),
-			     .QmM(Quot),
-			     .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
-			     .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
-			     .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
+           .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
+           .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
+           .XNaNE(XNaN), .YNaNE(YNaN), 
+           .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
+           .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .QeM(DivCalcExp),
+           .QmM(Quot),
+           .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
+           .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
+           .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
-   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv") begin: divremsqrt
+   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv") begin: divremsqrt
     drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
-		       .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
+           .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
            .PostProcSel(UnitVal[1:0]),
-		       .XNaNE(XNaN), .YNaNE(YNaN), 
+           .XNaNE(XNaN), .YNaNE(YNaN), 
              .OpCtrl(OpCtrlVal),
              .XSNaNE(XSNaN), .YSNaNE(YSNaN),
            .Frm(FrmVal), 
-                       .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
+                       .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(1'b0),
                        .StallM(1'b0), .FDivBusyE,
-                       .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
-                       .Funct3E(Funct3E), .IntDivE(1'b0), 
+                       .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M),
+                       .Funct3E(Funct3E), .IntDivE(IntDivE), 
                        .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
   end
   else begin: postprocess
@@ -840,8 +852,8 @@ module testbenchfp;
    // the IDLE state.
    initial
      begin
-	#0 reset = 1'b1;
-	#25 reset = 1'b0;     
+  #0 reset = 1'b1;
+  #25 reset = 1'b0;     
      end  
    
    ///////////////////////////////////////////////////////////////////////////////////////////////
@@ -858,12 +870,12 @@ module testbenchfp;
    // Check if the correct answer and result is a NaN
    always_comb begin
       if (UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT | (UnitVal === `DIVREMSQRTUNIT && WriteIntVal == 1'b1)) begin
-	 // an integer output can't be a NaN
-	 AnsNaN = 1'b0;
-	 ResNaN = 1'b0;
+   // an integer output can't be a NaN
+   AnsNaN = 1'b0;
+   ResNaN = 1'b0;
       end
       else if (UnitVal === `CVTFPUNIT) begin
-	 case (OpCtrlVal[1:0])
+   case (OpCtrlVal[1:0])
            4'b11: begin // quad             
               AnsNaN = &Ans[P.Q_LEN-2:P.NF]&(|Ans[P.Q_NF-1:0]);
               ResNaN = &Res[P.Q_LEN-2:P.NF]&(|Res[P.Q_NF-1:0]);
@@ -880,10 +892,10 @@ module testbenchfp;
               AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]);
               ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]);
            end
-	 endcase
+   endcase
       end
       else begin
-	 case (FmtVal)
+   case (FmtVal)
            4'b11: begin // quad             
               AnsNaN = &Ans[P.Q_LEN-2:P.Q_NF]&(|Ans[P.Q_NF-1:0]);
               ResNaN = &Res[P.Q_LEN-2:P.Q_NF]&(|Res[P.Q_NF-1:0]);
@@ -900,27 +912,29 @@ module testbenchfp;
               AnsNaN = &Ans[P.H_LEN-2:P.H_NF]&(|Ans[P.H_NF-1:0]);
               ResNaN = &Res[P.H_LEN-2:P.H_NF]&(|Res[P.H_NF-1:0]);
            end
-	 endcase
+   endcase
       end
    end 
    
    always_comb begin
       // select the result to check
       case (UnitVal)
-	`FMAUNIT: Res = FpRes;
-	`DIVUNIT: Res = FpRes;
-	`CMPUNIT: Res = CmpRes;
-	`CVTINTUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes;
-	`CVTFPUNIT: Res = FpRes;
+  `FMAUNIT: Res = FpRes;
+  `DIVUNIT: Res = FpRes;
+  `CMPUNIT: Res = CmpRes;
+  `CVTINTUNIT: if (WriteIntVal) Res = IntRes; else Res = FpRes;
+  `CVTFPUNIT: Res = FpRes;
+	`INTDIVUNIT: Res = IntRes;
       endcase
 
       // select the flag to check
       case (UnitVal)
-	`FMAUNIT: ResFlg = Flg;
-	`DIVUNIT: ResFlg = Flg;
-	`CMPUNIT: ResFlg = CmpFlg;
-	`CVTINTUNIT: ResFlg = Flg;
-	`CVTFPUNIT: ResFlg = Flg;
+  `FMAUNIT: ResFlg = Flg;
+  `DIVUNIT: ResFlg = Flg;
+  `CMPUNIT: ResFlg = CmpFlg;
+  `CVTINTUNIT: ResFlg = Flg;
+  `CVTFPUNIT: ResFlg = Flg;
+  `INTDIVUNIT: ResFlg = Flg;
       endcase
    end
 
@@ -932,33 +946,33 @@ module testbenchfp;
    always @(posedge clk) begin
       // Add extra clock cycles in beginning for fdivsqrt to adequate reset state
       if (~(FDivBusyE|DivStart)|(UnitVal != `DIVUNIT)) begin
-	 // This allows specific number of clocks to allow each vector
-	 // to complete for division or square root.  It is an
-	 // arbitrary value and can be changed, if needed.
-	 case (FmtVal)
-	   // QP
-	   4'b11: begin
-	      repeat (20)
-		@(posedge clk);
-	   end
-	   // HP
-	   4'b10: begin
-	      repeat (14)
-		@(posedge clk);
-	   end
-	   // DP
-	   4'b01: begin
-	      repeat (18)
-		@(posedge clk);
-	   end
-	   // SP
-	   4'b00: begin
-	      repeat (16)
-		@(posedge clk);
-	   end
-	 endcase // case (FmtVal)	 
-	 if (reset != 1'b1)
-	   VectorNum += 1; // increment the vector
+   // This allows specific number of clocks to allow each vector
+   // to complete for division or square root.  It is an
+   // arbitrary value and can be changed, if needed.
+   case (FmtVal)
+     // QP
+     4'b11: begin
+        repeat (20)
+    @(posedge clk);
+     end
+     // HP
+     4'b10: begin
+        repeat (14)
+    @(posedge clk);
+     end
+     // DP
+     4'b01: begin
+        repeat (18)
+    @(posedge clk);
+     end
+     // SP
+     4'b00: begin
+        repeat (16)
+    @(posedge clk);
+     end
+   endcase // case (FmtVal)	 
+   if (reset != 1'b1)
+     VectorNum += 1; // increment the vector
       end
    end
 
@@ -968,7 +982,7 @@ module testbenchfp;
       //    - the sign of the NaN does not matter for the opperations being tested
       //    - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
       if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
-	case (FmtVal)
+  case (FmtVal)
           4'b11: NaNGood =  (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
                              (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
                              (XNaN&(Res[P.Q_LEN-2:0] === {X[P.Q_LEN-2:P.Q_NF],1'b1,X[P.Q_NF-2:0]})) | 
@@ -989,9 +1003,9 @@ module testbenchfp;
                              (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | 
                              (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})) |
                              (ZNaN&(Res[P.H_LEN-2:0] === {Z[P.H_LEN-2:P.H_NF],1'b1,Z[P.H_NF-2:0]})));
-	endcase
+  endcase
       else if (UnitVal === `CVTFPUNIT) // if converting from floating point to floating point OpCtrl contains the final FP format
-	case (OpCtrlVal[1:0]) 
+  case (OpCtrlVal[1:0]) 
           2'b11: NaNGood = (((P.IEEE754==0)&AnsNaN&(Res === {1'b0, {P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
                             (AnsFlg[4]&(Res[P.Q_LEN-2:0] === {{P.Q_NE+1{1'b1}}, {P.Q_NF-1{1'b0}}})) |
                             (AnsNaN&(Res[P.Q_LEN-2:0] === Ans[P.Q_LEN-2:0])) | 
@@ -1012,7 +1026,7 @@ module testbenchfp;
                             (AnsNaN&(Res[P.H_LEN-2:0] === Ans[P.H_LEN-2:0])) | 
                             (XNaN&(Res[P.H_LEN-2:0] === {X[P.H_LEN-2:P.H_NF],1'b1,X[P.H_NF-2:0]})) | 
                             (YNaN&(Res[P.H_LEN-2:0] === {Y[P.H_LEN-2:P.H_NF],1'b1,Y[P.H_NF-2:0]})));
-	endcase
+  endcase
       else NaNGood = 1'b0; // integers can't be NaNs
 
       
@@ -1030,7 +1044,7 @@ module testbenchfp;
       //  wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
       assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx));
       assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx));
-      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL);
+      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL);
       assign FMAop = (OpCtrlVal == `FMAUNIT);  
       assign DivDone = OldFDivBusyE & ~FDivBusyE;
 
@@ -1041,10 +1055,10 @@ module testbenchfp;
             fd = $fopen("fperr.out","a");
             $fwrite(fd, "%h_%h_%h_%2h\n",X,Y,Ans,AnsFlg);
             $fclose(fd);
-	 errors += 1;
-	 $display("\nError in %s", Tests[TestNum]);
-	 $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]);	 
-	 $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
+   errors += 1;
+   $display("\nError in %s", Tests[TestNum]);
+   $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]);	 
+   $display("inputs: %h %h %h\nSrcA: %h\n SrcB: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, SrcB, Res, ResFlg, Ans, AnsFlg);
       end
       
       // TestFloat sets the result to all 1's when there is an invalid result, however in 
@@ -1054,36 +1068,36 @@ module testbenchfp;
       // Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but 
       // the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff...
       else if ((UnitVal === `CVTINTUNIT) & 
-	       ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&(Res[P.XLEN-1:0] === (P.XLEN)'(0))) | 
-		  (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&OpCtrlVal[1]&(Res[P.XLEN-1:0] === {1'b0, {P.XLEN-1{1'b1}}})) | 
-		  (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&~OpCtrlVal[1]&(Res[P.XLEN-1:0] === {{P.XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | 
-		  (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
-	 errors += 1;
-	 $display("There is an error in %s", Tests[TestNum]);
-	 $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
-	 $stop;
+         ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&(Res[P.XLEN-1:0] === (P.XLEN)'(0))) | 
+      (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&OpCtrlVal[1]&(Res[P.XLEN-1:0] === {1'b0, {P.XLEN-1{1'b1}}})) | 
+      (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&~OpCtrlVal[1]&(Res[P.XLEN-1:0] === {{P.XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | 
+      (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
+   errors += 1;
+   $display("There is an error in %s", Tests[TestNum]);
+   $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
+   $stop;
       end
 
       if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the eof
-	 // increment the test
-	 TestNum += 1;
-	 // clear the vectors
-	 for(int i=0; i<6133248; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}};
-	 // read next files
-	 $readmemh({`PATH, Tests[TestNum]}, TestVectors);
-	 // set the vector index back to 0
-	 VectorNum = 0;
-	 // incemet the operation if all the rounding modes have been tested
-	 if (FrmNum === 4) OpCtrlNum += 1;
-	 // increment the rounding mode or loop back to rne 
-	 if (FrmNum < 4) FrmNum += 1;
-	 else FrmNum = 0; 
-	 // if no more Tests - finish
-	 if (Tests[TestNum] === "") begin
+   // increment the test
+   TestNum += 1;
+   // clear the vectors
+   for(int i=0; i<6133248; i++) TestVectors[i] = {P.FLEN*4+8{1'bx}};
+   // read next files
+   $readmemh({`PATH, Tests[TestNum]}, TestVectors);
+   // set the vector index back to 0
+   VectorNum = 0;
+   // incemet the operation if all the rounding modes have been tested
+   if (FrmNum === 4) OpCtrlNum += 1;
+   // increment the rounding mode or loop back to rne 
+   if (FrmNum < 4) FrmNum += 1;
+   else FrmNum = 0; 
+   // if no more Tests - finish
+   if (Tests[TestNum] === "") begin
             $display("\nAll Tests completed with %d errors\n", errors);
             $stop;
-	 end 
-	 $display("Running %s vectors", Tests[TestNum]);
+   end 
+   $display("Running %s vectors", Tests[TestNum]);
       end
    end
 endmodule
@@ -1100,6 +1114,7 @@ module readvectors (
 		    input logic [2:0] 	        OpCtrl,
 		    output logic [P.FLEN-1:0]   Ans,
 		    output logic [P.XLEN-1:0]   SrcA,
+		    output logic [P.XLEN-1:0]   SrcB,
 		    output logic [4:0] 	        AnsFlg,
 		    output logic 	        Xs, Ys, Zs, // sign bits of XYZ
 		    output logic [P.NE-1:0]     Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
@@ -1111,6 +1126,9 @@ module readvectors (
 		    output logic 	        XInf, YInf, ZInf, // is XYZ infinity
 		    output logic 	        XExpMax,
 		    output logic 	        DivStart,
+		    output logic 	        IDivStart,
+				output logic          IntDivE,
+				output logic [2:0]    Funct3E,
 		    output logic [P.FLEN-1:0]   X, Y, Z, XPostBox
 		    );
 
@@ -1265,6 +1283,21 @@ module readvectors (
 		   DivStart = 1'b0;
               end
             endcase
+	`INTDIVUNIT: begin
+		#20;
+	  X = {P.FLEN{1'bx}};
+		SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
+		SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
+		Ans = TestVector[P.D_LEN-1:0];
+		if (~clk) #5;
+		IDivStart = 1'b1;
+		IntDivE = 1'b1;
+		Funct3E = 3'b110;
+		#10 // one clk cycle
+		IDivStart = 1'b0;
+		IntDivE = 1'b0;
+	end
+	  
 	`CMPUNIT:
           case (Fmt)        
             2'b11: begin // quad
@@ -1491,4 +1524,4 @@ module readvectors (
                       .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN,
                       .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf,
                       .XEn, .YEn, .ZEn, .XExpMax, .XPostBox);
-endmodule
+endmodule
\ No newline at end of file
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index 500a65d4b..6924ab65c 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -42,6 +42,10 @@
 `define FROM_I_OPCTRL  3'b101
 `define FROM_UL_OPCTRL 3'b110
 `define FROM_L_OPCTRL  3'b111
+`define INTREMU_OPCTRL 3'b000
+`define INTREM_OPCTRL  3'b110
+`define INTDIV_OPCTRL  3'b010
+`define INTDIVU_OPCTRL 3'b011
 `define RNE            3'b000
 `define RZ             3'b001
 `define RU             3'b011
@@ -53,6 +57,7 @@
 `define CVTFPUNIT      4
 `define CMPUNIT        3
 `define DIVREMSQRTUNIT 5
+`define INTDIVUNIT     6
 
 string f16rv32cvtint[] = '{
 	"ui32_to_f16_rne.tv",
@@ -589,5 +594,8 @@ string customdivcorrect[] = '{
 	"f16_custom.tv"
 };
 
+string intdiv[] = '{
+	"f16_kevin.tv"
+};
 
 
diff --git a/tests/fp/combined_IF_vectors/extract_arch_vectors.py b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
index 12669bc58..8baa0939b 100755
--- a/tests/fp/combined_IF_vectors/extract_arch_vectors.py
+++ b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
@@ -272,22 +272,22 @@ def create_vectors(my_config):
         src_file2.close()
 
 config_list = [
-Config(32, "M", "div", "div-", 0),
+Config(32, "M", "div", "div-", 4),
 Config(32, "F", "fdiv", "fdiv", 1),
 Config(32, "F", "fsqrt", "fsqrt", 2),
-Config(32, "M", "rem", "rem-", 3),
-Config(32, "M", "divu", "divu-", 4),
-Config(32, "M", "remu", "remu-", 5),
-Config(64, "M", "div", "div-", 0),
+Config(32, "M", "rem", "rem-", 6),
+Config(32, "M", "divu", "divu-", 5),
+Config(32, "M", "remu", "remu-", 7),
+Config(64, "M", "div", "div-", 4),
 Config(64, "F", "fdiv", "fdiv", 1),
 Config(64, "F", "fsqrt", "fsqrt", 2),
-Config(64, "M", "rem", "rem-", 3),
-Config(64, "M", "divu", "divu-", 4),
-Config(64, "M", "remu", "remu-", 5),
-Config(64, "M", "divw", "divw-", 6),
-Config(64, "M", "divuw", "divuw-", 7),
-Config(64, "M", "remw", "remw-", 8),
-Config(64, "M", "remuw", "remuw-", 9)
+Config(64, "M", "rem", "rem-", 6),
+Config(64, "M", "divu", "divu-", 5),
+Config(64, "M", "remu", "remu-", 7),
+Config(64, "M", "divw", "divw-", 4),
+Config(64, "M", "divuw", "divuw-", 5),
+Config(64, "M", "remw", "remw-", 6),
+Config(64, "M", "remuw", "remuw-", 7)
 ]
 
 for c in config_list:
diff --git a/tests/fp/combined_IF_vectors/extract_arch_vectors_v2.py b/tests/fp/combined_IF_vectors/extract_arch_vectors_v2.py
new file mode 100755
index 000000000..6fe63d0c7
--- /dev/null
+++ b/tests/fp/combined_IF_vectors/extract_arch_vectors_v2.py
@@ -0,0 +1,294 @@
+#! /usr/bin/python3
+
+# author: Alessandro Maiuolo, Kevin Kim
+# contact: amaiuolo@g.hmc.edu, kekim@hmc.edu
+# date created: 3-29-2023
+
+# extract all arch test vectors
+import os
+wally = os.popen('echo $WALLY').read().strip()
+
+def ext_bits(my_string):
+    target_len = 32 # we want 128 bits, div by 4 bc hex notation
+    zeroes_to_add = target_len - len(my_string)
+    return zeroes_to_add*"0" + my_string
+
+def twos_comp(b, x):
+    if b == 32:
+        return hex(0x100000000 - int(x,16))[2:]
+    elif b == 64:
+        return hex(0x10000000000000000 - int(x,16))[2:]
+    else:
+        return "UNEXPECTED_BITSIZE"
+
+def unpack_rf(packed):
+    bin_u = bin(int(packed, 16))[2:].zfill(8) # translate to binary
+    flags = hex(int(bin_u[3:],2))[2:].zfill(2)
+    rounding_mode = hex(int(bin_u[:3],2))[2:]
+    return flags, rounding_mode
+
+# rounding mode dictionary
+round_dict = {
+    "rne":"0",
+    "rnm":"4",
+    "ru":"3",
+    "rz":"1",
+    "rd":"2",
+    "dyn":"7"
+}
+
+# fcsr dictionary
+fcsr_dict = {
+    "0":"rne",
+    "128":"rnm",
+    "96":"ru",
+    "32":"rz",
+    "64":"rd",
+    "224":"dyn"
+}
+
+print("creating arch test vectors")
+
+class Config:
+  def __init__(self, bits, letter, op, filt, op_code):
+    self.bits = bits
+    self.letter = letter
+    self.op = op
+    self.filt = filt
+    self.op_code = op_code
+
+def create_vectors(my_config):
+    suite_folder_num = my_config.bits
+    if my_config.bits == 64 and my_config.letter == "F": suite_folder_num = 32
+    source_dir1 = "{}/addins/riscv-arch-test/riscv-test-suite/rv{}i_m/{}/src/".format(wally, suite_folder_num, my_config.letter)
+    source_dir2 = "{}/tests/riscof/work/riscv-arch-test/rv{}i_m/{}/src/".format(wally, my_config.bits, my_config.letter)
+    dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally)
+    all_vectors1 = os.listdir(source_dir1)
+
+    filt_vectors1 = [v for v in all_vectors1 if my_config.filt in v]
+    # print(filt_vectors1)
+    filt_vectors2 = [v + "/ref/Reference-sail_c_simulator.signature" for v in all_vectors1 if my_config.filt in v]
+
+    # iterate through all vectors
+    for i in range(len(filt_vectors1)):
+        vector1 = filt_vectors1[i]
+        vector2 = filt_vectors2[i]
+        operation = my_config.op_code
+        rounding_mode = "X"
+        flags = "XX"
+        # use name to create our new tv
+        dest_file = open("{}cvw_{}_{}.tv".format(dest_dir, my_config.bits, vector1[:-2]), 'w')
+        # open vectors
+        src_file1 = open(source_dir1 + vector1,'r')
+        src_file2 = open(source_dir2 + vector2,'r')
+        # for each test in the vector
+        reading = True
+        src_file2.readline() #skip first bc junk
+        # print(my_config.bits, my_config.letter)
+        if my_config.letter == "F" and my_config.bits == 64:
+            reading = True
+            # print("trigger 64F")
+            #skip first 2 lines bc junk
+            src_file2.readline()
+            while reading:
+                # get answer and flags from Ref...signature
+                # answers are before deadbeef (first line of 4)
+                # flags are after deadbeef (third line of 4)
+                answer = src_file2.readline().strip()
+                deadbeef = src_file2.readline().strip()
+                # print(answer)
+                if not (answer == "e7d4b281" and deadbeef == "6f5ca309"): # if there is still stuff to read
+                    # get flags
+                    packed = src_file2.readline().strip()[6:]
+                    flags, rounding_mode = unpack_rf(packed)
+                    # skip 00000000 buffer
+                    src_file2.readline()
+
+                    # parse through .S file
+                    detected = False
+                    done = False
+                    op1val = "0"
+                    op2val = "0"
+                    while not (detected or done):
+                        # print("det1")
+                        line = src_file1.readline()
+                        # print(line)
+                        if "op1val" in line:
+                            # print("det2")
+                            # parse line
+                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
+                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals
+                                op2val = line.split("op2val")[1].split("x")[1].strip()
+                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
+                            else:
+                                op2val = 32*"X"
+                            # go to next test in vector
+                            detected = True
+                        elif "RVTEST_CODE_END" in line:
+                            done = True
+                    # put it all together
+                    if not done:
+                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
+                        dest_file.write(translation + "\n")
+                else:
+                    # print("read false")
+                    reading = False
+        elif my_config.letter == "M" and my_config.bits == 64:
+            reading = True
+            #skip first 2 lines bc junk
+            src_file2.readline()
+            while reading:
+                # print("trigger 64M")
+                # get answer from Ref...signature
+                # answers span two lines and are reversed
+                answer2 = src_file2.readline().strip()
+                answer1 = src_file2.readline().strip()
+                answer = answer1 + answer2
+                #print(answer1,answer2)
+                if not (answer2 == "e7d4b281" and answer1 == "6f5ca309"): # if there is still stuff to read
+                    # parse through .S file
+                    detected = False
+                    done = False
+                    op1val = "0"
+                    op2val = "0"
+                    while not (detected or done):
+                        # print("det1")
+                        line = src_file1.readline()
+                        # print(line)
+                        if "op1val" in line:
+                            # print("det2")
+                            # parse line
+                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
+                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
+                                op1val = twos_comp(my_config.bits, op1val)
+                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals, unnec here but keeping for later
+                                op2val = line.split("op2val")[1].split("x")[1].strip()
+                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
+                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
+                                    op2val = twos_comp(my_config.bits, op2val)
+                            # go to next test in vector
+                            detected = True
+                        elif "RVTEST_CODE_END" in line:
+                            done = True
+                    # ints don't have flags
+                    flags = "XX"
+                    # put it all together
+                    if not done:
+                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
+                        dest_file.write(translation + "\n")
+                else:
+                    # print("read false")
+                    reading = False
+        elif my_config.letter == "M" and my_config.bits == 32:
+            reading = True
+            while reading:
+                # print("trigger 64M")
+                # get answer from Ref...signature
+                # answers span two lines and are reversed
+                answer = src_file2.readline().strip()
+                print(f"Answer: {answer}")
+                #print(answer1,answer2)
+                if not (answer == "6f5ca309"): # if there is still stuff to read
+                    # parse through .S file
+                    detected = False
+                    done = False
+                    op1val = "0"
+                    op2val = "0"
+                    while not (detected or done):
+                        # print("det1")
+                        line = src_file1.readline()
+                        # print(line)
+                        if "op1val" in line:
+                            # print("det2")
+                            # parse line
+                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
+                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
+                                op1val = twos_comp(my_config.bits, op1val)
+                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals, unnec here but keeping for later
+                                op2val = line.split("op2val")[1].split("x")[1].strip()
+                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
+                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
+                                    op2val = twos_comp(my_config.bits, op2val)
+                            # go to next test in vector
+                            detected = True
+                        elif "RVTEST_CODE_END" in line:
+                            done = True
+                    # ints don't have flags
+                    flags = "XX"
+                    # put it all together
+                    if not done:
+                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
+                        dest_file.write(translation + "\n")
+                else:
+                    # print("read false")
+                    reading = False 
+        else:
+            while reading:
+                # get answer and flags from Ref...signature
+                answer = src_file2.readline()
+                print(answer)
+                packed = src_file2.readline()[6:]
+                print("Packed: ", packed)
+                if len(packed.strip())>0: # if there is still stuff to read
+                    # print("packed")
+                    # parse through .S file
+                    detected = False
+                    done = False
+                    op1val = "0"
+                    op2val = "0"
+                    while not (detected or done):
+                        # print("det1")
+                        line = src_file1.readline()
+                        # print(line)
+                        if "op1val" in line:
+                            # print("det2")
+                            # parse line
+                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
+                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
+                                op1val = twos_comp(my_config.bits, op1val)
+                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals
+                                op2val = line.split("op2val")[1].split("x")[1].strip()
+                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
+                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
+                                    op2val = twos_comp(my_config.bits, op2val)
+                            # go to next test in vector
+                            detected = True
+                        elif "RVTEST_CODE_END" in line:
+                            done = True
+                    # rounding mode for float
+                    if not done and (my_config.op == "fsqrt" or my_config.op == "fdiv"):
+                        flags, rounding_mode = unpack_rf(packed)
+                    
+                    # put it all together
+                    if not done:
+                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
+                        dest_file.write(translation + "\n")
+                else:
+                    # print("read false")
+                    reading = False
+        # print("out")
+        dest_file.close()
+        src_file1.close()
+        src_file2.close()
+
+config_list = [
+Config(32, "M", "div", "div-", 4),
+Config(32, "F", "fdiv", "fdiv", 1),
+Config(32, "F", "fsqrt", "fsqrt", 2),
+Config(32, "M", "rem", "rem-", 6),
+Config(32, "M", "divu", "divu-", 5),
+Config(32, "M", "remu", "remu-", 7),
+Config(64, "M", "div", "div-", 4),
+Config(64, "F", "fdiv", "fdiv", 1),
+Config(64, "F", "fsqrt", "fsqrt", 2),
+Config(64, "M", "rem", "rem-", 6),
+Config(64, "M", "divu", "divu-", 5),
+Config(64, "M", "remu", "remu-", 7),
+Config(64, "M", "divw", "divw-", 4),
+Config(64, "M", "divuw", "divuw-", 5),
+Config(64, "M", "remw", "remw-", 6),
+Config(64, "M", "remuw", "remuw-", 7)
+]
+
+for c in config_list:
+    create_vectors(c)
\ No newline at end of file

From 8ea98c52cfd8bcf920bd26f64828d245ef4e1564 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 9 Jul 2023 21:00:47 -0700
Subject: [PATCH 26/40] divremsqrt passes int64 and rem64!!

---
 testbench/testbench-fp.sv | 70 ++++++++++++++++++++++++++++++---------
 testbench/tests-fp.vh     | 21 +++++++++---
 testbench/tests.vh        |  7 ++--
 3 files changed, 76 insertions(+), 22 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index dd0774b3b..c0e7a0691 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -686,9 +686,30 @@ module testbenchfp;
         Unit = {Unit, `DIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
+      if (TEST === "intrem") begin // if unified div sqrt is being tested
+        Tests = {Tests, intrem};
+        OpCtrl = {OpCtrl, `INTREM_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `INTDIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
       if (TEST === "intdiv") begin // if unified div sqrt is being tested
         Tests = {Tests, intdiv};
-        OpCtrl = {OpCtrl, `INTREM_OPCTRL};
+        OpCtrl = {OpCtrl, `INTDIV_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `INTDIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
+      if (TEST === "intremu") begin // if unified div sqrt is being tested
+        Tests = {Tests, intremu};
+        OpCtrl = {OpCtrl, `INTREMU_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `INTDIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
+      if (TEST === "intdivu") begin // if unified div sqrt is being tested
+        Tests = {Tests, intdiv};
+        OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
@@ -811,7 +832,7 @@ module testbenchfp;
            .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
            .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
-   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv") begin: divremsqrt
+   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem") begin: divremsqrt
     drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
            .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
@@ -1044,7 +1065,7 @@ module testbenchfp;
       //  wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
       assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx));
       assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx));
-      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL);
+      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal == `INTDIVU_OPCTRL) | (OpCtrlVal ==`INTREMU_OPCTRL);
       assign FMAop = (OpCtrlVal == `FMAUNIT);  
       assign DivDone = OldFDivBusyE & ~FDivBusyE;
 
@@ -1059,6 +1080,7 @@ module testbenchfp;
    $display("\nError in %s", Tests[TestNum]);
    $display("TestNum %d OpCtrl %d", TestNum, OpCtrl[TestNum]);	 
    $display("inputs: %h %h %h\nSrcA: %h\n SrcB: %h\n Res: %h %h\n Expected: %h %h", X, Y, Z, SrcA, SrcB, Res, ResFlg, Ans, AnsFlg);
+   $display("time: $t", $realtime);
       end
       
       // TestFloat sets the result to all 1's when there is an invalid result, however in 
@@ -1284,18 +1306,36 @@ module readvectors (
               end
             endcase
 	`INTDIVUNIT: begin
-		#20;
-	  X = {P.FLEN{1'bx}};
-		SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
-		SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
-		Ans = TestVector[P.D_LEN-1:0];
-		if (~clk) #5;
-		IDivStart = 1'b1;
-		IntDivE = 1'b1;
-		Funct3E = 3'b110;
-		#10 // one clk cycle
-		IDivStart = 1'b0;
-		IntDivE = 1'b0;
+   //***NOTE: remove redundancies in code. Conditionals toggle the Funct3E variable only, so we can intialize a new funct3 variable and set Funct3 equal to that.
+	  #20;
+    if (OpCtrl === `INTDIV_OPCTRL) begin
+      X = {P.FLEN{1'bx}};
+      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
+      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
+      Ans = TestVector[P.D_LEN-1:0];
+      AnsFlg = 5'bx;
+      if (~clk) #5;
+      IDivStart = 1'b1;
+      IntDivE = 1'b1;
+      Funct3E = 3'b100;
+      #10 // one clk cycle
+      IDivStart = 1'b0;
+      IntDivE = 1'b0;
+    end
+    else if (OpCtrl == `INTREM_OPCTRL) begin
+      X = {P.FLEN{1'bx}};
+      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
+      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
+      Ans = TestVector[P.D_LEN-1:0];
+      AnsFlg = 5'bx;
+      if (~clk) #5;
+      IDivStart = 1'b1;
+      IntDivE = 1'b1;
+      Funct3E = 3'b110;
+      #10 // one clk cycle
+      IDivStart = 1'b0;
+      IntDivE = 1'b0;
+    end
 	end
 	  
 	`CMPUNIT:
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index 6924ab65c..b22e37350 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -42,10 +42,10 @@
 `define FROM_I_OPCTRL  3'b101
 `define FROM_UL_OPCTRL 3'b110
 `define FROM_L_OPCTRL  3'b111
-`define INTREMU_OPCTRL 3'b000
+`define INTREMU_OPCTRL 3'b111
 `define INTREM_OPCTRL  3'b110
 `define INTDIV_OPCTRL  3'b010
-`define INTDIVU_OPCTRL 3'b011
+`define INTDIVU_OPCTRL 3'b101
 `define RNE            3'b000
 `define RZ             3'b001
 `define RU             3'b011
@@ -594,8 +594,21 @@ string customdivcorrect[] = '{
 	"f16_custom.tv"
 };
 
-string intdiv[] = '{
-	"f16_kevin.tv"
+string intrem[] = '{
+	"cvw_64_rem-01.tv"
 };
 
+string intdiv[] = '{
+	"cvw_64_div-01.tv"
+};
+
+string intremu[] = '{
+	"cvw_64_remu-01.tv"
+}
+
+string intdivu[] = '{
+	"cvw_64_divu-01.tv"
+}
+
+
 
diff --git a/testbench/tests.vh b/testbench/tests.vh
index f38f28056..0745155ce 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -2076,10 +2076,11 @@ string arch64zbs[] = '{
  };
 
  string custom[] = '{
-    `CUSTOM,
-    "simple",
+    `RISCVARCHTEST,
+     "rv64i_m/M/src/rem-01.S"
+    /*"simple",
     "debug",
-    "cacheTest"
+    "cacheTest"*/
  };
   string testsBP64[] = '{
     `IMPERASTEST,

From 2a68e4a065a7a96e51d75b35d099238f09bc6964 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 9 Jul 2023 22:23:39 -0700
Subject: [PATCH 27/40] bug fixes in opctrl and test selection

---
 testbench/testbench-fp.sv | 83 +++++++++++++++++++++++++++++++++++----
 testbench/tests-fp.vh     | 12 +++---
 2 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index c0e7a0691..237998bac 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -125,6 +125,7 @@ module testbenchfp;
    logic 			FlagMatch;                  // Check if IEEE flags match
    logic 			CheckNow;                   // Final check
    logic 			FMAop;                      // Is this a FMA operation?   
+   logic      sqrtop;                     // Is this a SQRT operation?
    
 	 flop #(3) funct3reg(.clk, .d(Funct3E), .q(Funct3M));
    ///////////////////////////////////////////////////////////////////////////////////////////////
@@ -708,7 +709,7 @@ module testbenchfp;
         Fmt = {Fmt, 2'b10};
       end
       if (TEST === "intdivu") begin // if unified div sqrt is being tested
-        Tests = {Tests, intdiv};
+        Tests = {Tests, intdivu};
         OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
@@ -767,7 +768,7 @@ module testbenchfp;
    readvectors #(P) readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), 
                                  .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .SrcB, 
                                  .Xs, .Ys, .Zs, .Unit(UnitVal),
-                                 .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), .Funct3E,
+                                 .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), .Funct3E, .W64,
                                  .Xm, .Ym, .Zm, .DivStart, .IDivStart, .IntDivE,
                                  .XNaN, .YNaN, .ZNaN,
                                  .XSNaN, .YSNaN, .ZSNaN, 
@@ -822,7 +823,7 @@ module testbenchfp;
    
    if (TEST === "div" | TEST === "sqrt" | TEST === "all"| TEST === "custom" | TEST ==="customdivcorrect") begin: fdivsqrt
       fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
-           .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
+           .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL), .SqrtM(OpCtrlVal===`SQRT_OPCTRL),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
            .XNaNE(XNaN), .YNaNE(YNaN), 
            .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
@@ -832,16 +833,16 @@ module testbenchfp;
            .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
            .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
-   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem") begin: divremsqrt
+   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu") begin: divremsqrt
     drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
-           .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal[0]), .SqrtM(OpCtrlVal[0]),
+           .XeE(Xe), .YeE(Ye), .SqrtE(TEST === "sqrt"), .SqrtM(TEST === "sqrt"),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
            .PostProcSel(UnitVal[1:0]),
            .XNaNE(XNaN), .YNaNE(YNaN), 
              .OpCtrl(OpCtrlVal),
              .XSNaNE(XSNaN), .YSNaNE(YSNaN),
            .Frm(FrmVal), 
-                       .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(1'b0),
+                       .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(W64),
                        .StallM(1'b0), .FDivBusyE,
                        .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M),
                        .Funct3E(Funct3E), .IntDivE(IntDivE), 
@@ -1151,6 +1152,7 @@ module readvectors (
 		    output logic 	        IDivStart,
 				output logic          IntDivE,
 				output logic [2:0]    Funct3E,
+        output logic          W64,
 		    output logic [P.FLEN-1:0]   X, Y, Z, XPostBox
 		    );
 
@@ -1231,7 +1233,7 @@ module readvectors (
             end
           endcase
 	`DIVUNIT:
-          if (OpCtrl[0])
+          if (OpCtrl === `SQRT_OPCTRL)
             case (Fmt)
               2'b11: begin // quad
 		 #20;		 
@@ -1306,7 +1308,6 @@ module readvectors (
               end
             endcase
 	`INTDIVUNIT: begin
-   //***NOTE: remove redundancies in code. Conditionals toggle the Funct3E variable only, so we can intialize a new funct3 variable and set Funct3 equal to that.
 	  #20;
     if (OpCtrl === `INTDIV_OPCTRL) begin
       X = {P.FLEN{1'bx}};
@@ -1318,9 +1319,11 @@ module readvectors (
       IDivStart = 1'b1;
       IntDivE = 1'b1;
       Funct3E = 3'b100;
+      W64 = 1'b0;
       #10 // one clk cycle
       IDivStart = 1'b0;
       IntDivE = 1'b0;
+      W64 = 1'b0;
     end
     else if (OpCtrl == `INTREM_OPCTRL) begin
       X = {P.FLEN{1'bx}};
@@ -1332,9 +1335,73 @@ module readvectors (
       IDivStart = 1'b1;
       IntDivE = 1'b1;
       Funct3E = 3'b110;
+      W64 = 1'b0;
       #10 // one clk cycle
       IDivStart = 1'b0;
       IntDivE = 1'b0;
+      W64 = 1'b0;
+    end
+    else if (OpCtrl == `INTREMU_OPCTRL) begin
+      X = {P.FLEN{1'bx}};
+      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
+      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
+      Ans = TestVector[P.D_LEN-1:0];
+      AnsFlg = 5'bx;
+      if (~clk) #5;
+      IDivStart = 1'b1;
+      IntDivE = 1'b1;
+      Funct3E = 3'b111;
+      W64 = 1'b0;
+      #10 // one clk cycle
+      IDivStart = 1'b0;
+      IntDivE = 1'b0;
+      W64 = 1'b0;
+    end
+    else if (OpCtrl == `INTDIVU_OPCTRL) begin
+      X = {P.FLEN{1'bx}};
+      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
+      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
+      Ans = TestVector[P.D_LEN-1:0];
+      AnsFlg = 5'bx;
+      if (~clk) #5;
+      IDivStart = 1'b1;
+      IntDivE = 1'b1;
+      Funct3E = 3'b101;
+      W64 = 1'b0;
+      #10 // one clk cycle
+      IDivStart = 1'b0;
+      IntDivE = 1'b0;
+      W64 = 1'b0;
+    end
+    else if (OpCtrl == `INTDIVW_OPCTRL) begin
+      X = {P.FLEN{1'bx}};
+      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
+      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
+      Ans = TestVector[P.D_LEN-1:0];
+      AnsFlg = 5'bx;
+      if (~clk) #5;
+      IDivStart = 1'b1;
+      IntDivE = 1'b1;
+      Funct3E = 3'b101;
+      #10 // one clk cycle
+      IDivStart = 1'b0;
+      IntDivE = 1'b0;
+    end
+  else if (OpCtrl == `INTREMW_OPCTRL) begin
+      X = {P.FLEN{1'bx}};
+      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
+      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
+      Ans = TestVector[P.D_LEN-1:0];
+      AnsFlg = 5'bx;
+      if (~clk) #5;
+      IDivStart = 1'b1;
+      IntDivE = 1'b1;
+      Funct3E = 3'b110;
+      W64 = 1'b1;
+      #10 // one clk cycle
+      IDivStart = 1'b0;
+      IntDivE = 1'b0;
+      W64 = 1'b0;
     end
 	end
 	  
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index b22e37350..dec1dbee0 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -42,10 +42,12 @@
 `define FROM_I_OPCTRL  3'b101
 `define FROM_UL_OPCTRL 3'b110
 `define FROM_L_OPCTRL  3'b111
-`define INTREMU_OPCTRL 3'b111
-`define INTREM_OPCTRL  3'b110
-`define INTDIV_OPCTRL  3'b010
+`define INTREMU_OPCTRL 3'b001
+`define INTREM_OPCTRL  3'b010
+`define INTDIV_OPCTRL  3'b011
+`define INTDIVW_OPCTRL 3'b100
 `define INTDIVU_OPCTRL 3'b101
+`define INTREMW_OPCTRL 3'b110
 `define RNE            3'b000
 `define RZ             3'b001
 `define RU             3'b011
@@ -604,11 +606,11 @@ string intdiv[] = '{
 
 string intremu[] = '{
 	"cvw_64_remu-01.tv"
-}
+};
 
 string intdivu[] = '{
 	"cvw_64_divu-01.tv"
-}
+};
 
 
 

From 05c2bd88df68ad2824397118cc8c68e0dfe8726c Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Sun, 9 Jul 2023 23:20:18 -0700
Subject: [PATCH 28/40] remw works

---
 testbench/testbench-fp.sv | 24 ++++++++++++++++++++++--
 testbench/tests-fp.vh     | 18 ++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index 237998bac..fe3365201 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -715,6 +715,24 @@ module testbenchfp;
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
+      if (TEST === "intremw") begin // if unified div sqrt is being tested
+        Tests = {Tests, intremw};
+        OpCtrl = {OpCtrl, `INTREMW_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `INTDIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
+      //TODO:REMUWm DIVW, DIVUW
+      if (TEST === "intremuw") begin // if unified div sqrt is being tested
+        Tests = {Tests, intremw};
+        OpCtrl = {OpCtrl, `INTREMW_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `INTDIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
+
+
+
       end
       
       // check if nothing is being tested
@@ -833,7 +851,7 @@ module testbenchfp;
            .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
            .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
-   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu") begin: divremsqrt
+   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw") begin: divremsqrt
     drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
            .XeE(Xe), .YeE(Ye), .SqrtE(TEST === "sqrt"), .SqrtM(TEST === "sqrt"),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
@@ -1066,7 +1084,7 @@ module testbenchfp;
       //  wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
       assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx));
       assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx));
-      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal == `INTDIVU_OPCTRL) | (OpCtrlVal ==`INTREMU_OPCTRL);
+      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal == `INTDIVU_OPCTRL) | (OpCtrlVal ==`INTREMU_OPCTRL) | (OpCtrlVal ==`INTREMW_OPCTRL);
       assign FMAop = (OpCtrlVal == `FMAUNIT);  
       assign DivDone = OldFDivBusyE & ~FDivBusyE;
 
@@ -1383,9 +1401,11 @@ module readvectors (
       IDivStart = 1'b1;
       IntDivE = 1'b1;
       Funct3E = 3'b101;
+      W64 = 1'b1;
       #10 // one clk cycle
       IDivStart = 1'b0;
       IntDivE = 1'b0;
+      W64 = 1'b1;
     end
   else if (OpCtrl == `INTREMW_OPCTRL) begin
       X = {P.FLEN{1'bx}};
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index dec1dbee0..a7cc431e6 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -612,5 +612,23 @@ string intdivu[] = '{
 	"cvw_64_divu-01.tv"
 };
 
+string intremw[] = '{
+	"cvw_64_remw-01.tv"
+};
+
+string intremuw[] = '{
+	"cvw_64_remuw-01.tv"
+};
+
+string intdivuw[] = '{
+	"cvw_64_divuw-01.tv"
+};
+
+string intdivw[] = '{
+	"cvw_64_divw-01.tv"
+};
+
+
+
 
 

From 5fb862639d9c8130c40d46b1ea45f7e88b913243 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Mon, 10 Jul 2023 16:43:54 -0700
Subject: [PATCH 29/40] more bug fixes in testbench-fp

---
 testbench/testbench-fp.sv | 84 ++++++++++++++++++++++++++++++++++-----
 testbench/tests-fp.vh     |  2 +
 2 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index fe3365201..22fef17e1 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -675,14 +675,14 @@ module testbenchfp;
       end
       if (TEST === "customdiv") begin // if unified div sqrt is being tested
         Tests = {Tests, customdiv};
-        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `DIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
       if (TEST === "customdivcorrect") begin // if unified div sqrt is being tested
         Tests = {Tests, customdivcorrect};
-        OpCtrl = {OpCtrl, `DIV_OPCTRL};
+        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `DIVUNIT};
         Fmt = {Fmt, 2'b10};
@@ -722,10 +722,24 @@ module testbenchfp;
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      //TODO:REMUWm DIVW, DIVUW
+      //TODO:DIVW, DIVUW
       if (TEST === "intremuw") begin // if unified div sqrt is being tested
-        Tests = {Tests, intremw};
-        OpCtrl = {OpCtrl, `INTREMW_OPCTRL};
+        Tests = {Tests, intremuw};
+        OpCtrl = {OpCtrl, `INTREMUW_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `INTDIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
+      if (TEST === "intdivw") begin // if unified div sqrt is being tested
+        Tests = {Tests, intdivw};
+        OpCtrl = {OpCtrl, `INTDIVW_OPCTRL};
+        WriteInt = {WriteInt, 1'b0};
+        Unit = {Unit, `INTDIVUNIT};
+        Fmt = {Fmt, 2'b10};
+      end
+      if (TEST === "intdivuw") begin // if unified div sqrt is being tested
+        Tests = {Tests, intdivuw};
+        OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
@@ -853,7 +867,7 @@ module testbenchfp;
    end
    if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw") begin: divremsqrt
     drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
-           .XeE(Xe), .YeE(Ye), .SqrtE(TEST === "sqrt"), .SqrtM(TEST === "sqrt"),
+           .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT), .SqrtM(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
            .PostProcSel(UnitVal[1:0]),
            .XNaNE(XNaN), .YNaNE(YNaN), 
@@ -865,6 +879,19 @@ module testbenchfp;
                        .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M),
                        .Funct3E(Funct3E), .IntDivE(IntDivE), 
                        .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
+    /*drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
+           .XeE(Xe), .YeE(Ye), .SqrtE(1'b1), .SqrtM(1'b1),
+           .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
+           .PostProcSel(UnitVal[1:0]),
+           .XNaNE(XNaN), .YNaNE(YNaN), 
+             .OpCtrl(OpCtrlVal),
+             .XSNaNE(XSNaN), .YSNaNE(YSNaN),
+           .Frm(FrmVal), 
+                       .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
+                       .StallM(1'b0), .FDivBusyE,
+                       .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(3'b0),
+                       .Funct3E(3'b0), .IntDivE(1'b0), 
+                       .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));*/
   end
   else begin: postprocess
     postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
@@ -1084,7 +1111,7 @@ module testbenchfp;
       //  wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
       assign ResMatch = ((Res === Ans) | NaNGood | (NaNGood === 1'bx));
       assign FlagMatch = ((ResFlg === AnsFlg) | (AnsFlg === 5'bx));
-      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal == `INTDIVU_OPCTRL) | (OpCtrlVal ==`INTREMU_OPCTRL) | (OpCtrlVal ==`INTREMW_OPCTRL);
+      assign divsqrtop = (OpCtrlVal == `SQRT_OPCTRL) | (OpCtrlVal == `DIV_OPCTRL) | (OpCtrlVal == `INTREM_OPCTRL) | (OpCtrlVal == `INTDIV_OPCTRL) | (OpCtrlVal == `INTDIVU_OPCTRL) | (OpCtrlVal ==`INTREMU_OPCTRL) | (OpCtrlVal ==`INTREMW_OPCTRL) | (OpCtrlVal ==`INTREMUW_OPCTRL) | (OpCtrlVal == `INTDIVW_OPCTRL) | (OpCtrlVal == `INTDIVW_OPCTRL) | (OpCtrlVal == `INTDIVUW_OPCTRL);
       assign FMAop = (OpCtrlVal == `FMAUNIT);  
       assign DivDone = OldFDivBusyE & ~FDivBusyE;
 
@@ -1250,7 +1277,13 @@ module readvectors (
                Ans = {{P.FLEN-P.H_LEN{1'b1}}, TestVector[8+(P.H_LEN-1):8]};
             end
           endcase
-	`DIVUNIT:
+	`DIVUNIT: begin
+    IDivStart=1'b0;
+    IntDivE=1'b0;
+    SrcA={P.XLEN{1'b0}};
+    SrcB={P.XLEN{1'b0}};
+    W64=1'b0;
+    Funct3E=3'b0;
           if (OpCtrl === `SQRT_OPCTRL)
             case (Fmt)
               2'b11: begin // quad
@@ -1325,6 +1358,7 @@ module readvectors (
 		   DivStart = 1'b0;
               end
             endcase
+  end
 	`INTDIVUNIT: begin
 	  #20;
     if (OpCtrl === `INTDIV_OPCTRL) begin
@@ -1392,6 +1426,22 @@ module readvectors (
       W64 = 1'b0;
     end
     else if (OpCtrl == `INTDIVW_OPCTRL) begin
+      X = {P.FLEN{1'bx}};
+      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
+      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
+      Ans = TestVector[P.D_LEN-1:0];
+      AnsFlg = 5'bx;
+      if (~clk) #5;
+      IDivStart = 1'b1;
+      IntDivE = 1'b1;
+      Funct3E = 3'b100;
+      W64 = 1'b1;
+      #10 // one clk cycle
+      IDivStart = 1'b0;
+      IntDivE = 1'b0;
+      W64 = 1'b1;
+    end
+    else if (OpCtrl == `INTDIVUW_OPCTRL) begin
       X = {P.FLEN{1'bx}};
       SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
       SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
@@ -1407,7 +1457,7 @@ module readvectors (
       IntDivE = 1'b0;
       W64 = 1'b1;
     end
-  else if (OpCtrl == `INTREMW_OPCTRL) begin
+   else if (OpCtrl == `INTREMW_OPCTRL) begin
       X = {P.FLEN{1'bx}};
       SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
       SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
@@ -1422,6 +1472,22 @@ module readvectors (
       IDivStart = 1'b0;
       IntDivE = 1'b0;
       W64 = 1'b0;
+   end
+  else if (OpCtrl == `INTREMUW_OPCTRL) begin
+    X = {P.FLEN{1'bx}};
+    SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
+    SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
+    Ans = TestVector[P.D_LEN-1:0];
+    AnsFlg = 5'bx;
+    if (~clk) #5;
+    IDivStart = 1'b1;
+    IntDivE = 1'b1;
+    Funct3E = 3'b111;
+    W64 = 1'b1;
+    #10 // one clk cycle
+    IDivStart = 1'b0;
+    IntDivE = 1'b0;
+    W64 = 1'b0;
     end
 	end
 	  
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index a7cc431e6..4f63103cc 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -48,6 +48,8 @@
 `define INTDIVW_OPCTRL 3'b100
 `define INTDIVU_OPCTRL 3'b101
 `define INTREMW_OPCTRL 3'b110
+`define INTREMUW_OPCTRL 3'b111
+`define INTDIVUW_OPCTRL 3'b000
 `define RNE            3'b000
 `define RZ             3'b001
 `define RU             3'b011

From f91d8c20e4ed06017398af1ef66c70ffefabfc11 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Wed, 12 Jul 2023 19:26:05 -0700
Subject: [PATCH 30/40] fixed bug where opctrl not changing when running
 several intdivrem tests

---
 testbench/testbench-fp.sv | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index 22fef17e1..78cdd9d4a 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -127,7 +127,7 @@ module testbenchfp;
    logic 			FMAop;                      // Is this a FMA operation?   
    logic      sqrtop;                     // Is this a SQRT operation?
    
-	 flop #(3) funct3reg(.clk, .d(Funct3E), .q(Funct3M));
+	 flopen #(3) funct3reg(.clk, .en(IFDivStartE), .d(Funct3E), .q(Funct3M));
    ///////////////////////////////////////////////////////////////////////////////////////////////
 
    //     ||||||||| |||||||| ||||||| |||||||||   ||||||| |||||||| |||
@@ -687,57 +687,56 @@ module testbenchfp;
         Unit = {Unit, `DIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intrem") begin // if unified div sqrt is being tested
+      if (TEST === "intrem" | TEST === "intdivrem" ) begin // if unified div sqrt is being tested
         Tests = {Tests, intrem};
         OpCtrl = {OpCtrl, `INTREM_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intdiv") begin // if unified div sqrt is being tested
+      if (TEST === "intdiv" | TEST ==="intdivrem") begin // if unified div sqrt is being tested
         Tests = {Tests, intdiv};
         OpCtrl = {OpCtrl, `INTDIV_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intremu") begin // if unified div sqrt is being tested
+      if (TEST === "intremu"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
         Tests = {Tests, intremu};
         OpCtrl = {OpCtrl, `INTREMU_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intdivu") begin // if unified div sqrt is being tested
+      if (TEST === "intdivu"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
         Tests = {Tests, intdivu};
         OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intremw") begin // if unified div sqrt is being tested
+      if (TEST === "intremw"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
         Tests = {Tests, intremw};
         OpCtrl = {OpCtrl, `INTREMW_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      //TODO:DIVW, DIVUW
-      if (TEST === "intremuw") begin // if unified div sqrt is being tested
+      if (TEST === "intremuw"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
         Tests = {Tests, intremuw};
         OpCtrl = {OpCtrl, `INTREMUW_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intdivw") begin // if unified div sqrt is being tested
+      if (TEST === "intdivw"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
         Tests = {Tests, intdivw};
         OpCtrl = {OpCtrl, `INTDIVW_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intdivuw") begin // if unified div sqrt is being tested
+      if (TEST === "intdivuw"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
         Tests = {Tests, intdivuw};
         OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
@@ -745,8 +744,6 @@ module testbenchfp;
         Fmt = {Fmt, 2'b10};
       end
 
-
-
       end
       
       // check if nothing is being tested
@@ -865,7 +862,7 @@ module testbenchfp;
            .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
            .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
-   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw") begin: divremsqrt
+   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" | TEST ==="intdivrem") begin: divremsqrt
     drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
            .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT), .SqrtM(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
@@ -1156,7 +1153,7 @@ module testbenchfp;
    // set the vector index back to 0
    VectorNum = 0;
    // incemet the operation if all the rounding modes have been tested
-   if (FrmNum === 4) OpCtrlNum += 1;
+   if (FrmNum === 4 | TEST === "intdivrem") OpCtrlNum += 1;
    // increment the rounding mode or loop back to rne 
    if (FrmNum < 4) FrmNum += 1;
    else FrmNum = 0; 

From e37e989eded04fbc3e4f5a71df596af13c2b6e69 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Wed, 12 Jul 2023 19:39:11 -0700
Subject: [PATCH 31/40] testbench-fp code cleanup

---
 testbench/testbench-fp.sv | 156 +++++++++-----------------------------
 1 file changed, 37 insertions(+), 119 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index 78cdd9d4a..f4e55b713 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -1358,119 +1358,6 @@ module readvectors (
   end
 	`INTDIVUNIT: begin
 	  #20;
-    if (OpCtrl === `INTDIV_OPCTRL) begin
-      X = {P.FLEN{1'bx}};
-      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
-      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
-      Ans = TestVector[P.D_LEN-1:0];
-      AnsFlg = 5'bx;
-      if (~clk) #5;
-      IDivStart = 1'b1;
-      IntDivE = 1'b1;
-      Funct3E = 3'b100;
-      W64 = 1'b0;
-      #10 // one clk cycle
-      IDivStart = 1'b0;
-      IntDivE = 1'b0;
-      W64 = 1'b0;
-    end
-    else if (OpCtrl == `INTREM_OPCTRL) begin
-      X = {P.FLEN{1'bx}};
-      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
-      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
-      Ans = TestVector[P.D_LEN-1:0];
-      AnsFlg = 5'bx;
-      if (~clk) #5;
-      IDivStart = 1'b1;
-      IntDivE = 1'b1;
-      Funct3E = 3'b110;
-      W64 = 1'b0;
-      #10 // one clk cycle
-      IDivStart = 1'b0;
-      IntDivE = 1'b0;
-      W64 = 1'b0;
-    end
-    else if (OpCtrl == `INTREMU_OPCTRL) begin
-      X = {P.FLEN{1'bx}};
-      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
-      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
-      Ans = TestVector[P.D_LEN-1:0];
-      AnsFlg = 5'bx;
-      if (~clk) #5;
-      IDivStart = 1'b1;
-      IntDivE = 1'b1;
-      Funct3E = 3'b111;
-      W64 = 1'b0;
-      #10 // one clk cycle
-      IDivStart = 1'b0;
-      IntDivE = 1'b0;
-      W64 = 1'b0;
-    end
-    else if (OpCtrl == `INTDIVU_OPCTRL) begin
-      X = {P.FLEN{1'bx}};
-      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
-      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
-      Ans = TestVector[P.D_LEN-1:0];
-      AnsFlg = 5'bx;
-      if (~clk) #5;
-      IDivStart = 1'b1;
-      IntDivE = 1'b1;
-      Funct3E = 3'b101;
-      W64 = 1'b0;
-      #10 // one clk cycle
-      IDivStart = 1'b0;
-      IntDivE = 1'b0;
-      W64 = 1'b0;
-    end
-    else if (OpCtrl == `INTDIVW_OPCTRL) begin
-      X = {P.FLEN{1'bx}};
-      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
-      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
-      Ans = TestVector[P.D_LEN-1:0];
-      AnsFlg = 5'bx;
-      if (~clk) #5;
-      IDivStart = 1'b1;
-      IntDivE = 1'b1;
-      Funct3E = 3'b100;
-      W64 = 1'b1;
-      #10 // one clk cycle
-      IDivStart = 1'b0;
-      IntDivE = 1'b0;
-      W64 = 1'b1;
-    end
-    else if (OpCtrl == `INTDIVUW_OPCTRL) begin
-      X = {P.FLEN{1'bx}};
-      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
-      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
-      Ans = TestVector[P.D_LEN-1:0];
-      AnsFlg = 5'bx;
-      if (~clk) #5;
-      IDivStart = 1'b1;
-      IntDivE = 1'b1;
-      Funct3E = 3'b101;
-      W64 = 1'b1;
-      #10 // one clk cycle
-      IDivStart = 1'b0;
-      IntDivE = 1'b0;
-      W64 = 1'b1;
-    end
-   else if (OpCtrl == `INTREMW_OPCTRL) begin
-      X = {P.FLEN{1'bx}};
-      SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
-      SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
-      Ans = TestVector[P.D_LEN-1:0];
-      AnsFlg = 5'bx;
-      if (~clk) #5;
-      IDivStart = 1'b1;
-      IntDivE = 1'b1;
-      Funct3E = 3'b110;
-      W64 = 1'b1;
-      #10 // one clk cycle
-      IDivStart = 1'b0;
-      IntDivE = 1'b0;
-      W64 = 1'b0;
-   end
-  else if (OpCtrl == `INTREMUW_OPCTRL) begin
     X = {P.FLEN{1'bx}};
     SrcA = TestVector[2*(P.Q_LEN)+P.D_LEN-1:2*(P.Q_LEN)];
     SrcB = TestVector[(P.Q_LEN)+P.D_LEN-1:P.Q_LEN];
@@ -1479,13 +1366,44 @@ module readvectors (
     if (~clk) #5;
     IDivStart = 1'b1;
     IntDivE = 1'b1;
-    Funct3E = 3'b111;
-    W64 = 1'b1;
-    #10 // one clk cycle
-    IDivStart = 1'b0;
-    IntDivE = 1'b0;
-    W64 = 1'b0;
+    case (OpCtrl)
+      `INTDIV_OPCTRL: begin
+        Funct3E = 3'b100;
+        W64 = 1'b0;
+      end
+      `INTREM_OPCTRL: begin
+        Funct3E = 3'b110;
+        W64 = 1'b0;
+      end
+      `INTREMU_OPCTRL: begin
+        Funct3E = 3'b111;
+        W64 = 1'b0;
+      end
+      `INTDIVU_OPCTRL: begin
+        Funct3E = 3'b101;
+        W64 = 1'b0;
+      end
+      `INTDIVW_OPCTRL: begin
+        Funct3E = 3'b100;
+        W64 = 1'b1;
+      end
+      `INTDIVUW_OPCTRL: begin
+        Funct3E = 3'b101;
+        W64 = 1'b1;
+      end
+    `INTREMW_OPCTRL: begin
+        Funct3E = 3'b110;
+        W64 = 1'b1;
     end
+    `INTREMUW_OPCTRL: begin
+      Funct3E = 3'b111;
+      W64 = 1'b1;
+    end
+   endcase
+   #10 // one clk cycle
+   IDivStart = 1'b0;
+   IntDivE = 1'b0;
+   W64 = 1'b0;
 	end
 	  
 	`CMPUNIT:

From 93c94c89b1aa22cd58f93f9ccee5811a499acd99 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Wed, 12 Jul 2023 19:54:22 -0700
Subject: [PATCH 32/40] comments in testbench fp

---
 testbench/testbench-fp.sv | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index f4e55b713..c13113640 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -687,56 +687,56 @@ module testbenchfp;
         Unit = {Unit, `DIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intrem" | TEST === "intdivrem" ) begin // if unified div sqrt is being tested
+      if (TEST === "intrem" | TEST === "intdivrem" ) begin // if integer remainder is being tested
         Tests = {Tests, intrem};
         OpCtrl = {OpCtrl, `INTREM_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intdiv" | TEST ==="intdivrem") begin // if unified div sqrt is being tested
+      if (TEST === "intdiv" | TEST ==="intdivrem") begin // if integer division is being tested
         Tests = {Tests, intdiv};
         OpCtrl = {OpCtrl, `INTDIV_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intremu"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
+      if (TEST === "intremu"| TEST ==="intdivrem") begin // if unsigned integer remainder is being tested
         Tests = {Tests, intremu};
         OpCtrl = {OpCtrl, `INTREMU_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intdivu"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
+      if (TEST === "intdivu"| TEST ==="intdivrem") begin // if unsigned integer division is being tested
         Tests = {Tests, intdivu};
         OpCtrl = {OpCtrl, `INTDIVU_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intremw"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
+      if (TEST === "intremw"| TEST ==="intdivrem") begin // if w-type integer remainder is being tested
         Tests = {Tests, intremw};
         OpCtrl = {OpCtrl, `INTREMW_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intremuw"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
+      if (TEST === "intremuw"| TEST ==="intdivrem") begin // if unsigned w-type integer remainder is being tested
         Tests = {Tests, intremuw};
         OpCtrl = {OpCtrl, `INTREMUW_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intdivw"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
+      if (TEST === "intdivw"| TEST ==="intdivrem") begin // if w-type integer division is being tested
         Tests = {Tests, intdivw};
         OpCtrl = {OpCtrl, `INTDIVW_OPCTRL};
         WriteInt = {WriteInt, 1'b0};
         Unit = {Unit, `INTDIVUNIT};
         Fmt = {Fmt, 2'b10};
       end
-      if (TEST === "intdivuw"| TEST ==="intdivrem") begin // if unified div sqrt is being tested
+      if (TEST === "intdivuw"| TEST ==="intdivrem") begin // if unsigned w-type integer divison is being tested
         Tests = {Tests, intdivuw};
         OpCtrl = {OpCtrl, `INTDIVUW_OPCTRL};
         WriteInt = {WriteInt, 1'b0};

From e8b856bc426657a8527a3c687340027fe5050c42 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Wed, 12 Jul 2023 19:58:39 -0700
Subject: [PATCH 33/40] divsqrt on unified unit is now called fdivremsqrt test

---
 testbench/testbench-fp.sv | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index c13113640..0b50941db 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -297,7 +297,7 @@ module testbenchfp;
                Fmt = {Fmt, 2'b11};
             end
    end
-   if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
+   if (TEST === "fdivremsqrt") begin // if unified div sqrt is being tested
       Tests = {Tests, f128div, f128sqrt};
       OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
       WriteInt = {WriteInt, 1'b0, 1'b0};
@@ -433,7 +433,7 @@ module testbenchfp;
                Fmt = {Fmt, 2'b01};
             end
    end
-   if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
+   if (TEST === "fdivremsqrt") begin // if unified div sqrt is being tested
       Tests = {Tests, f64div, f64sqrt};
       OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
       WriteInt = {WriteInt, 1'b0, 1'b0};
@@ -553,7 +553,7 @@ module testbenchfp;
                Fmt = {Fmt, 2'b00};
             end
    end
-    if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
+    if (TEST === "fdivremsqrt") begin // if unified div sqrt is being tested
          Tests = {Tests, f32div, f32sqrt};
          OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
          WriteInt = {WriteInt, 1'b0, 1'b0};
@@ -655,7 +655,7 @@ module testbenchfp;
                Fmt = {Fmt, 2'b10};
             end
    end
-   if (TEST === "divremsqrt") begin // if unified div sqrt is being tested
+   if (TEST === "fdivremsqrt") begin // if unified div sqrt is being tested
         Tests = {Tests, f16div, f16sqrt};
         OpCtrl = {OpCtrl, `DIV_OPCTRL, `SQRT_OPCTRL};
         WriteInt = {WriteInt, 1'b0, 1'b0};
@@ -862,7 +862,7 @@ module testbenchfp;
            .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
            .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
-   if (TEST === "divremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" | TEST ==="intdivrem") begin: divremsqrt
+   if (TEST === "fdivremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" | TEST ==="intdivrem") begin: divremsqrt
     drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
            .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT), .SqrtM(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 

From dd79397b75650d7b2f7f12c8ca80fa9fbbfab284 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Thu, 13 Jul 2023 08:14:27 -0700
Subject: [PATCH 34/40] added name

---
 testbench/testbench-fp.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index 0b50941db..5b9c606ff 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -1,6 +1,6 @@
 ///////////////////////////////////////////
 //
-// Written: me@KatherineParry.com, james.stine@okstate.edu
+// Written: me@KatherineParry.com, james.stine@okstate.edu, kekim@hmc.edu
 //
 // Purpose: Testbench for UCB Testfloat on Wally
 // 

From 83983a4eb21f12b7e3df5a012d703800fd6c7931 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Thu, 13 Jul 2023 08:15:23 -0700
Subject: [PATCH 35/40] added new tests

---
 testbench/tests.vh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testbench/tests.vh b/testbench/tests.vh
index 0745155ce..916c14686 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -2077,7 +2077,7 @@ string arch64zbs[] = '{
 
  string custom[] = '{
     `RISCVARCHTEST,
-     "rv64i_m/M/src/rem-01.S"
+     "rv64i_m/M/src/div-01.S"
     /*"simple",
     "debug",
     "cacheTest"*/

From db5a138a56dee36f69588fd42a9186a7c8af9f2e Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Thu, 13 Jul 2023 08:24:29 -0700
Subject: [PATCH 36/40] removed custom tests

---
 testbench/testbench-fp.sv | 60 +++++++--------------------------------
 testbench/tests-fp.vh     |  8 ------
 2 files changed, 10 insertions(+), 58 deletions(-)

diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv
index 5b9c606ff..057ab9eab 100644
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@@ -664,29 +664,6 @@ module testbenchfp;
             Fmt = {Fmt, 2'b10};
         end
       end
-      if (TEST === "divremsqrttest") begin // if unified div sqrt is being tested
-        Tests = {Tests, f64sqrt};
-        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
-        WriteInt = {WriteInt, 1'b0};
-        for(int i = 0; i<5; i++) begin
-            Unit = {Unit, `DIVUNIT};
-            Fmt = {Fmt, 2'b01};
-        end
-      end
-      if (TEST === "customdiv") begin // if unified div sqrt is being tested
-        Tests = {Tests, customdiv};
-        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
-        WriteInt = {WriteInt, 1'b0};
-        Unit = {Unit, `DIVUNIT};
-        Fmt = {Fmt, 2'b10};
-      end
-      if (TEST === "customdivcorrect") begin // if unified div sqrt is being tested
-        Tests = {Tests, customdivcorrect};
-        OpCtrl = {OpCtrl, `SQRT_OPCTRL};
-        WriteInt = {WriteInt, 1'b0};
-        Unit = {Unit, `DIVUNIT};
-        Fmt = {Fmt, 2'b10};
-      end
       if (TEST === "intrem" | TEST === "intdivrem" ) begin // if integer remainder is being tested
         Tests = {Tests, intrem};
         OpCtrl = {OpCtrl, `INTREM_OPCTRL};
@@ -850,7 +827,7 @@ module testbenchfp;
                    .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
    end
    
-   if (TEST === "div" | TEST === "sqrt" | TEST === "all"| TEST === "custom" | TEST ==="customdivcorrect") begin: fdivsqrt
+   if (TEST === "div" | TEST === "sqrt" | TEST === "all") begin: fdivsqrt
       fdivsqrt #(P) fdivsqrt(.clk, .reset, .XsE(Xs), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
            .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL), .SqrtM(OpCtrlVal===`SQRT_OPCTRL),
            .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
@@ -862,33 +839,16 @@ module testbenchfp;
            .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
            .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
    end
-   if (TEST === "fdivremsqrt" | TEST === "divremsqrttest" | TEST === "customdiv" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" | TEST ==="intdivrem") begin: divremsqrt
+   if (TEST === "fdivremsqrt" | TEST === "intdiv" | TEST === "intrem" | TEST === "intdivu" | TEST ==="intremu" | TEST ==="intremw" | TEST ==="intremuw" | TEST ==="intdivw" | TEST ==="intdivuw" | TEST ==="intdivrem") begin: divremsqrt
     drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
-           .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT), .SqrtM(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT),
-           .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
-           .PostProcSel(UnitVal[1:0]),
-           .XNaNE(XNaN), .YNaNE(YNaN), 
-             .OpCtrl(OpCtrlVal),
-             .XSNaNE(XSNaN), .YSNaNE(YSNaN),
-           .Frm(FrmVal), 
-                       .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(W64),
-                       .StallM(1'b0), .FDivBusyE,
-                       .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M),
-                       .Funct3E(Funct3E), .IntDivE(IntDivE), 
-                       .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
-    /*drsu #(P) drsu(.clk, .reset, .XsE(Xs), .YsE(Ys), .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), 
-           .XeE(Xe), .YeE(Ye), .SqrtE(1'b1), .SqrtM(1'b1),
-           .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), 
-           .PostProcSel(UnitVal[1:0]),
-           .XNaNE(XNaN), .YNaNE(YNaN), 
-             .OpCtrl(OpCtrlVal),
-             .XSNaNE(XSNaN), .YSNaNE(YSNaN),
-           .Frm(FrmVal), 
-                       .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
-                       .StallM(1'b0), .FDivBusyE,
-                       .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(3'b0),
-                       .Funct3E(3'b0), .IntDivE(1'b0), 
-                       .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));*/
+      .XeE(Xe), .YeE(Ye), .SqrtE(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT), .SqrtM(OpCtrlVal===`SQRT_OPCTRL&UnitVal===`DIVUNIT),
+      .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .PostProcSel(UnitVal[1:0]),
+      .XNaNE(XNaN), .YNaNE(YNaN), .OpCtrl(OpCtrlVal), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .Frm(FrmVal), 
+      .FDivStartE(DivStart), .IDivStartE(IDivStart), .W64E(W64),
+      .StallM(1'b0), .FDivBusyE,
+      .FlushE(1'b0), .ForwardedSrcAE(SrcA), .ForwardedSrcBE(SrcB), .Funct3M(Funct3M),
+      .Funct3E(Funct3E), .IntDivE(IntDivE), 
+      .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE), .FResM(FpRes), .FIntDivResultM(IntRes), .FlgM(Flg));
   end
   else begin: postprocess
     postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
diff --git a/testbench/tests-fp.vh b/testbench/tests-fp.vh
index 4f63103cc..3633094f4 100644
--- a/testbench/tests-fp.vh
+++ b/testbench/tests-fp.vh
@@ -590,14 +590,6 @@ string f128fma[] = '{
 	"f128_mulAdd_rnm.tv"
 };
 
-string customdiv[] = '{
-	"f16_custom.tv"
-};
-
-string customdivcorrect[] = '{
-	"f16_custom.tv"
-};
-
 string intrem[] = '{
 	"cvw_64_rem-01.tv"
 };

From 71172b8608f154d510d17e4696775fe9013511a0 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Thu, 13 Jul 2023 08:28:51 -0700
Subject: [PATCH 37/40] revert setup.sh to old

---
 setup.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.sh b/setup.sh
index a92cc84a2..95026beb1 100755
--- a/setup.sh
+++ b/setup.sh
@@ -16,8 +16,7 @@ echo \$WALLY set to ${WALLY}
 # Must edit these based on your local environment.  Ask your sysadmin.
 export MGLS_LICENSE_FILE=27002@zircon.eng.hmc.edu                   # Change this to your Siemens license server
 export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu                # Change this to your Synopsys license server
-#export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_2/questasim        # Change this for your path to Questa, excluding bin
-export QUESTA_HOME=/cad/mentor/questa_sim-2021.4_4/questasim        # Change this for your path to Questa, excluding bin
+export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_2/questasim        # Change this for your path to Questa, excluding bin
 #export QUESTA_HOME=/cad/mentor/questa_sim-2022.4_3/questasim        # Change this for your path to Questa, excluding bin
 export SNPS_HOME=/cad/synopsys/SYN                                  # Change this for your path to Design Compiler, excluding bin
 

From 345ca6e1b976f24719bf003849fdd311e94e09bb Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Mon, 17 Jul 2023 15:59:36 -0700
Subject: [PATCH 38/40] combined int,fp test generation script update

---
 .../extract_arch_vectors.py                   |   8 +-
 .../extract_arch_vectors_v2.py                | 294 ------------------
 2 files changed, 4 insertions(+), 298 deletions(-)
 delete mode 100755 tests/fp/combined_IF_vectors/extract_arch_vectors_v2.py

diff --git a/tests/fp/combined_IF_vectors/extract_arch_vectors.py b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
index 8baa0939b..6fe63d0c7 100755
--- a/tests/fp/combined_IF_vectors/extract_arch_vectors.py
+++ b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
@@ -128,7 +128,7 @@ def create_vectors(my_config):
                             done = True
                     # put it all together
                     if not done:
-                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode)
+                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
                         dest_file.write(translation + "\n")
                 else:
                     # print("read false")
@@ -174,7 +174,7 @@ def create_vectors(my_config):
                     flags = "XX"
                     # put it all together
                     if not done:
-                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode)
+                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
                         dest_file.write(translation + "\n")
                 else:
                     # print("read false")
@@ -217,7 +217,7 @@ def create_vectors(my_config):
                     flags = "XX"
                     # put it all together
                     if not done:
-                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode)
+                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
                         dest_file.write(translation + "\n")
                 else:
                     # print("read false")
@@ -261,7 +261,7 @@ def create_vectors(my_config):
                     
                     # put it all together
                     if not done:
-                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode)
+                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
                         dest_file.write(translation + "\n")
                 else:
                     # print("read false")
diff --git a/tests/fp/combined_IF_vectors/extract_arch_vectors_v2.py b/tests/fp/combined_IF_vectors/extract_arch_vectors_v2.py
deleted file mode 100755
index 6fe63d0c7..000000000
--- a/tests/fp/combined_IF_vectors/extract_arch_vectors_v2.py
+++ /dev/null
@@ -1,294 +0,0 @@
-#! /usr/bin/python3
-
-# author: Alessandro Maiuolo, Kevin Kim
-# contact: amaiuolo@g.hmc.edu, kekim@hmc.edu
-# date created: 3-29-2023
-
-# extract all arch test vectors
-import os
-wally = os.popen('echo $WALLY').read().strip()
-
-def ext_bits(my_string):
-    target_len = 32 # we want 128 bits, div by 4 bc hex notation
-    zeroes_to_add = target_len - len(my_string)
-    return zeroes_to_add*"0" + my_string
-
-def twos_comp(b, x):
-    if b == 32:
-        return hex(0x100000000 - int(x,16))[2:]
-    elif b == 64:
-        return hex(0x10000000000000000 - int(x,16))[2:]
-    else:
-        return "UNEXPECTED_BITSIZE"
-
-def unpack_rf(packed):
-    bin_u = bin(int(packed, 16))[2:].zfill(8) # translate to binary
-    flags = hex(int(bin_u[3:],2))[2:].zfill(2)
-    rounding_mode = hex(int(bin_u[:3],2))[2:]
-    return flags, rounding_mode
-
-# rounding mode dictionary
-round_dict = {
-    "rne":"0",
-    "rnm":"4",
-    "ru":"3",
-    "rz":"1",
-    "rd":"2",
-    "dyn":"7"
-}
-
-# fcsr dictionary
-fcsr_dict = {
-    "0":"rne",
-    "128":"rnm",
-    "96":"ru",
-    "32":"rz",
-    "64":"rd",
-    "224":"dyn"
-}
-
-print("creating arch test vectors")
-
-class Config:
-  def __init__(self, bits, letter, op, filt, op_code):
-    self.bits = bits
-    self.letter = letter
-    self.op = op
-    self.filt = filt
-    self.op_code = op_code
-
-def create_vectors(my_config):
-    suite_folder_num = my_config.bits
-    if my_config.bits == 64 and my_config.letter == "F": suite_folder_num = 32
-    source_dir1 = "{}/addins/riscv-arch-test/riscv-test-suite/rv{}i_m/{}/src/".format(wally, suite_folder_num, my_config.letter)
-    source_dir2 = "{}/tests/riscof/work/riscv-arch-test/rv{}i_m/{}/src/".format(wally, my_config.bits, my_config.letter)
-    dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally)
-    all_vectors1 = os.listdir(source_dir1)
-
-    filt_vectors1 = [v for v in all_vectors1 if my_config.filt in v]
-    # print(filt_vectors1)
-    filt_vectors2 = [v + "/ref/Reference-sail_c_simulator.signature" for v in all_vectors1 if my_config.filt in v]
-
-    # iterate through all vectors
-    for i in range(len(filt_vectors1)):
-        vector1 = filt_vectors1[i]
-        vector2 = filt_vectors2[i]
-        operation = my_config.op_code
-        rounding_mode = "X"
-        flags = "XX"
-        # use name to create our new tv
-        dest_file = open("{}cvw_{}_{}.tv".format(dest_dir, my_config.bits, vector1[:-2]), 'w')
-        # open vectors
-        src_file1 = open(source_dir1 + vector1,'r')
-        src_file2 = open(source_dir2 + vector2,'r')
-        # for each test in the vector
-        reading = True
-        src_file2.readline() #skip first bc junk
-        # print(my_config.bits, my_config.letter)
-        if my_config.letter == "F" and my_config.bits == 64:
-            reading = True
-            # print("trigger 64F")
-            #skip first 2 lines bc junk
-            src_file2.readline()
-            while reading:
-                # get answer and flags from Ref...signature
-                # answers are before deadbeef (first line of 4)
-                # flags are after deadbeef (third line of 4)
-                answer = src_file2.readline().strip()
-                deadbeef = src_file2.readline().strip()
-                # print(answer)
-                if not (answer == "e7d4b281" and deadbeef == "6f5ca309"): # if there is still stuff to read
-                    # get flags
-                    packed = src_file2.readline().strip()[6:]
-                    flags, rounding_mode = unpack_rf(packed)
-                    # skip 00000000 buffer
-                    src_file2.readline()
-
-                    # parse through .S file
-                    detected = False
-                    done = False
-                    op1val = "0"
-                    op2val = "0"
-                    while not (detected or done):
-                        # print("det1")
-                        line = src_file1.readline()
-                        # print(line)
-                        if "op1val" in line:
-                            # print("det2")
-                            # parse line
-                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
-                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals
-                                op2val = line.split("op2val")[1].split("x")[1].strip()
-                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
-                            else:
-                                op2val = 32*"X"
-                            # go to next test in vector
-                            detected = True
-                        elif "RVTEST_CODE_END" in line:
-                            done = True
-                    # put it all together
-                    if not done:
-                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
-                        dest_file.write(translation + "\n")
-                else:
-                    # print("read false")
-                    reading = False
-        elif my_config.letter == "M" and my_config.bits == 64:
-            reading = True
-            #skip first 2 lines bc junk
-            src_file2.readline()
-            while reading:
-                # print("trigger 64M")
-                # get answer from Ref...signature
-                # answers span two lines and are reversed
-                answer2 = src_file2.readline().strip()
-                answer1 = src_file2.readline().strip()
-                answer = answer1 + answer2
-                #print(answer1,answer2)
-                if not (answer2 == "e7d4b281" and answer1 == "6f5ca309"): # if there is still stuff to read
-                    # parse through .S file
-                    detected = False
-                    done = False
-                    op1val = "0"
-                    op2val = "0"
-                    while not (detected or done):
-                        # print("det1")
-                        line = src_file1.readline()
-                        # print(line)
-                        if "op1val" in line:
-                            # print("det2")
-                            # parse line
-                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
-                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
-                                op1val = twos_comp(my_config.bits, op1val)
-                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals, unnec here but keeping for later
-                                op2val = line.split("op2val")[1].split("x")[1].strip()
-                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
-                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
-                                    op2val = twos_comp(my_config.bits, op2val)
-                            # go to next test in vector
-                            detected = True
-                        elif "RVTEST_CODE_END" in line:
-                            done = True
-                    # ints don't have flags
-                    flags = "XX"
-                    # put it all together
-                    if not done:
-                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
-                        dest_file.write(translation + "\n")
-                else:
-                    # print("read false")
-                    reading = False
-        elif my_config.letter == "M" and my_config.bits == 32:
-            reading = True
-            while reading:
-                # print("trigger 64M")
-                # get answer from Ref...signature
-                # answers span two lines and are reversed
-                answer = src_file2.readline().strip()
-                print(f"Answer: {answer}")
-                #print(answer1,answer2)
-                if not (answer == "6f5ca309"): # if there is still stuff to read
-                    # parse through .S file
-                    detected = False
-                    done = False
-                    op1val = "0"
-                    op2val = "0"
-                    while not (detected or done):
-                        # print("det1")
-                        line = src_file1.readline()
-                        # print(line)
-                        if "op1val" in line:
-                            # print("det2")
-                            # parse line
-                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
-                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
-                                op1val = twos_comp(my_config.bits, op1val)
-                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals, unnec here but keeping for later
-                                op2val = line.split("op2val")[1].split("x")[1].strip()
-                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
-                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
-                                    op2val = twos_comp(my_config.bits, op2val)
-                            # go to next test in vector
-                            detected = True
-                        elif "RVTEST_CODE_END" in line:
-                            done = True
-                    # ints don't have flags
-                    flags = "XX"
-                    # put it all together
-                    if not done:
-                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
-                        dest_file.write(translation + "\n")
-                else:
-                    # print("read false")
-                    reading = False 
-        else:
-            while reading:
-                # get answer and flags from Ref...signature
-                answer = src_file2.readline()
-                print(answer)
-                packed = src_file2.readline()[6:]
-                print("Packed: ", packed)
-                if len(packed.strip())>0: # if there is still stuff to read
-                    # print("packed")
-                    # parse through .S file
-                    detected = False
-                    done = False
-                    op1val = "0"
-                    op2val = "0"
-                    while not (detected or done):
-                        # print("det1")
-                        line = src_file1.readline()
-                        # print(line)
-                        if "op1val" in line:
-                            # print("det2")
-                            # parse line
-                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
-                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
-                                op1val = twos_comp(my_config.bits, op1val)
-                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals
-                                op2val = line.split("op2val")[1].split("x")[1].strip()
-                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
-                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
-                                    op2val = twos_comp(my_config.bits, op2val)
-                            # go to next test in vector
-                            detected = True
-                        elif "RVTEST_CODE_END" in line:
-                            done = True
-                    # rounding mode for float
-                    if not done and (my_config.op == "fsqrt" or my_config.op == "fdiv"):
-                        flags, rounding_mode = unpack_rf(packed)
-                    
-                    # put it all together
-                    if not done:
-                        translation = "{}_{}_{}".format(ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()))
-                        dest_file.write(translation + "\n")
-                else:
-                    # print("read false")
-                    reading = False
-        # print("out")
-        dest_file.close()
-        src_file1.close()
-        src_file2.close()
-
-config_list = [
-Config(32, "M", "div", "div-", 4),
-Config(32, "F", "fdiv", "fdiv", 1),
-Config(32, "F", "fsqrt", "fsqrt", 2),
-Config(32, "M", "rem", "rem-", 6),
-Config(32, "M", "divu", "divu-", 5),
-Config(32, "M", "remu", "remu-", 7),
-Config(64, "M", "div", "div-", 4),
-Config(64, "F", "fdiv", "fdiv", 1),
-Config(64, "F", "fsqrt", "fsqrt", 2),
-Config(64, "M", "rem", "rem-", 6),
-Config(64, "M", "divu", "divu-", 5),
-Config(64, "M", "remu", "remu-", 7),
-Config(64, "M", "divw", "divw-", 4),
-Config(64, "M", "divuw", "divuw-", 5),
-Config(64, "M", "remw", "remw-", 6),
-Config(64, "M", "remuw", "remuw-", 7)
-]
-
-for c in config_list:
-    create_vectors(c)
\ No newline at end of file

From 14f93c69587fc6e22a9a8f1047f4a3e2896e45b7 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kevindkim723@gmail.com>
Date: Tue, 25 Jul 2023 14:46:07 -0700
Subject: [PATCH 39/40] removed old wave do file for new fp testbench

---
 sim/wave-fpu copy.do | 29 -----------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 sim/wave-fpu copy.do

diff --git a/sim/wave-fpu copy.do b/sim/wave-fpu copy.do
deleted file mode 100644
index a1cfa8731..000000000
--- a/sim/wave-fpu copy.do	
+++ /dev/null
@@ -1,29 +0,0 @@
-
-add wave -noupdate /testbenchfp/clk
-add wave -noupdate -radix decimal /testbenchfp/VectorNum
-add wave -noupdate /testbenchfp/FrmNum
-add wave -noupdate /testbenchfp/X
-add wave -noupdate /testbenchfp/Y
-add wave -noupdate /testbenchfp/Z
-add wave -noupdate /testbenchfp/Res
-add wave -noupdate /testbenchfp/Ans
-add wave -noupdate /testbenchfp/DivStart
-add wave -noupdate /testbenchfp/FDivBusyE
-add wave -noupdate /testbenchfp/CheckNow
-add wave -noupdate /testbenchfp/DivDone
-add wave -noupdate /testbenchfp/ResMatch
-add wave -noupdate /testbenchfp/FlagMatch
-add wave -noupdate /testbenchfp/CheckNow
-add wave -noupdate /testbenchfp/NaNGood
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
-add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
-add wave -group {Testbench} -noupdate /testbenchfp/*
-add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

From 52dc71507f6a15f83f7ec6132395ac1f2562c253 Mon Sep 17 00:00:00 2001
From: Ross Thompson <ross1728@gmail.com>
Date: Wed, 26 Jul 2023 15:08:01 -0500
Subject: [PATCH 40/40] Fixed lint errors for issue #368.  Does not fix
 simulation errors.  We made a design decision a long time ago to not support
 DTIM on the rv32gc config because LLEN was greater than XLEN.

---
 src/lsu/lsu.sv | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv
index effa4ff29..3584a05c8 100644
--- a/src/lsu/lsu.sv
+++ b/src/lsu/lsu.sv
@@ -232,7 +232,7 @@ module lsu import cvw::*;  #(parameter cvw_t P) (
     // **** create config to support DTIM with floating point.
     dtim #(P) dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM),
               .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), 
-              .ReadDataWordM(DTIMReadDataWordM[P.XLEN-1:0]), .ByteMaskM(ByteMaskM[P.XLEN/8-1:0]));
+              .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM[P.LLEN/8-1:0]));
   end else begin
   end
   if (P.BUS_SUPPORTED) begin : bus              
@@ -308,11 +308,11 @@ module lsu import cvw::*;  #(parameter cvw_t P) (
 
       ahbinterface #(P.XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), 
         .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA),
-        .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM),
+        .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM[P.XLEN/8-1:0]), .WriteData(LSUWriteDataM[P.XLEN-1:0]),
         .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer));
 
     // Mux between the 2 sources of read data, 0: Bus, 1: DTIM
-      if(P.DTIM_SUPPORTED) mux2 #(P.XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM, SelDTIM, ReadDataWordMuxM);
+      if(P.DTIM_SUPPORTED) mux2 #(P.XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM[P.XLEN-1:0], SelDTIM, ReadDataWordMuxM[P.XLEN-1:0]);
       else assign ReadDataWordMuxM = FetchBuffer[P.XLEN-1:0];
       assign LSUHBURST = 3'b0;
       assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0;