Make changes to fpdiv - still working on clock issue with fsm that was changed from posedge to negedge - also updated fpdivsqrt rounding to handle testfloat

2021-10-06 08:26:09 -05:00 · 2021-10-06 08:26:09 -05:00 · a91c0c8fc7
commit a91c0c8fc7
parent 5bcae393c9
13 changed files with 698 additions and 576 deletions
--- a/wally-pipelined/src/fpu/convert_inputs.sv
+++ b/wally-pipelined/src/fpu/convert_inputs.sv
@ -1,9 +1,26 @@
-// This module takes as inputs two operands (op1 and op2) 
+///////////////////////////////////////////
-// the operation type (op_type) and the result precision (P). 
+//
-// Based on the operation and precision , it conditionally
+// Written: James Stine
-// converts single precision values to double precision values
+// Modified: 8/1/2018
-// and modifies the sign of op1. The converted operands are Float1
+//
-// and Float2.
+// Purpose: Floating point divider/square root top unit (Goldschmidt)
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module convert_inputs(
   input [63:0]  op1,      // 1st input operand (A)
--- a/wally-pipelined/src/fpu/exception_div.sv
+++ b/wally-pipelined/src/fpu/exception_div.sv
@ -23,9 +23,10 @@ module exception_div (
   logic 	      BNaN; 		// '1' if B is a not-a-number
   logic 	      ASNaN;	 	// '1' if A is a signalling not-a-number
   logic 	      BSNaN;	 	// '1' if B is a signalling not-a-number
-   logic 	      ZQNaN;	 	// '1' if result Z is a quiet NaN
+   logic 	      ZSNaN;	 	// '1' if result Z is a quiet NaN
   logic 	      ZInf;	 	// '1' if result Z is an infnity
   logic 	      Zero;             // '1' if result is zero
   logic              NegSqrt;          // '1' if sqrt and operand is negative   
   //***take this module out and add more registers or just recalculate it all
   // Determine if mantissas are all zeros
@ -48,32 +49,34 @@ module exception_div (
   assign AZero = AzeroE & AzeroM;
   assign BZero = BzeroE & BzeroE;
   // Is NaN if operand is negative and its a sqrt
   assign NegSqrt = (A[63] & op_type & ~AZero);
   // An "Invalid Operation" exception occurs if (A or B is a signalling NaN)
   // or (A and B are both Infinite)
   assign Invalid = ASNaN | BSNaN | (((AInf & BInf) | (AZero & BZero))&~op_type) | 
-		    (A[63] & op_type);
+		    NegSqrt;
   // The result is a quiet NaN if (an "Invalid Operation" exception occurs) 
   // or (A is a NaN) or (B is a NaN).
-   assign ZQNaN = Invalid | ANaN | BNaN;
+   assign ZSNaN = Invalid | ANaN | BNaN;
   //  The result is zero
   assign Zero = (AZero | BInf)&~op_type | AZero&op_type;   
   // The result is +Inf if ((A is Inf) or (B is 0)) and (the
   // result is not a quiet NaN).  
-   assign ZInf = (AInf | BZero)&~ZQNaN&~op_type | AInf&op_type&~ZQNaN;   
+   assign ZInf = (AInf | BZero)&~ZSNaN&~op_type | AInf&op_type&~ZSNaN;   
   // Set the type of the result as follows:
   // Ztype	Result 
   //  000     Normal
   //  001     Quiet NaN
   //  010     Infinity
   //  011     Zero
-   //  110     DivZero
+   //  110     Div by 0
-   assign Ztype[0] = ZQNaN | Zero;
+   //  111     SNaN
-   assign Ztype[1] = ZInf | Zero;
+   assign Ztype[2] = (ZSNaN);
-   assign Ztype[2] = BZero&~op_type;   
+   assign Ztype[1] = (ZSNaN) | (Zero) | (ZInf);
   assign Ztype[0] = (ZSNaN) | (Zero);
 endmodule // exception
--- a/wally-pipelined/src/fpu/fpdiv.sv
+++ b/wally-pipelined/src/fpu/fpdiv.sv
@ -1,44 +1,50 @@
 ///////////////////////////////////////////
 //
-// File name : fpdiv
+// Written: James Stine
-// Title     : Floating-Point Divider/Square-Root
+// Modified: 8/1/2018
 // project   : FPU
 // Library   : fpdiv
 // Author(s) : James E. Stine, Jr.
 // Purpose   : definition of main unit to floating-point div/sqrt
 // notes :   
 //
-// Copyright Oklahoma State University
+// Purpose: Floating point divider/square root top unit (Goldschmidt)
 // 
-// Basic Operations
+// A component of the Wally configurable RISC-V project.
 // 
-// Step 1: Load operands, set flags, and convert SP to DP
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 // Step 2: Check for special inputs ( +/- Infinity,  NaN)
 // Step 3: Exponent Logic
 // Step 4: Divide/Sqrt using Goldschmidt
 // Step 5: Normalize the result.//
 //   Shift left until normalized.  Normalized when the value to the 
 //   left of the binrary point is 1.
 // Step 6: Round the result.// 
 // Step 7: Put quotient/remainder onto output.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 // `timescale 1ps/1ps
 module fpdiv (
  input logic 	      clk,
  input logic 	      reset,
  input logic 	      start,
-   input logic [63:0]   op1,		// 1st input operand (A)
+  input logic [63:0]  op1, 
-   input logic [63:0]   op2,		// 2nd input operand (B)
+  input logic [63:0]  op2, 
-   input logic [1:0]    rm,		// Rounding mode - specify values 
+  input logic [1:0]   rm, 
-   input logic 	      op_type,	// Function opcode
+  input logic 	      op_type, 
-   input logic 	      P,   		// Result Precision (0 for double, 1 for single)
+  input logic 	      P, 
-   input logic 	      OvEn,		// Overflow trap enabled
+  input logic 	      OvEn, 
-   input logic 	      UnEn,   	// Underflow trap enabled
+  input logic 	      UnEn,
  input logic 	      XNaNQ,
  input logic 	      YNaNQ,
  input logic 	      XZeroQ,
  input logic 	      YZeroQ,
  input logic 	      XInfQ,
  input logic 	      YInfQ, 
  output logic 	      done,
  output logic 	      FDivBusyE,
-   output logic [63:0]  AS_Result,	// Result of operation
+  output logic [63:0] AS_Result, 
-   output logic [4:0]   Flags);   	// IEEE exception flags 
+  output logic [4:0]  Flags);
   logic [63:0]       Float1; 
   logic [63:0]       Float2;
@ -46,10 +52,6 @@ module fpdiv (
   logic [12:0]       exp1, exp2, expF;
   logic [12:0]       exp_diff, bias;
   logic [13:0]       exp_sqrt;
   logic [12:0] 	exp_s;
   logic [12:0] 	exp_c;
   logic [10:0] 	exponent;
   logic [63:0]       Result;   
   logic [52:0]       mantissaA;
   logic [52:0]       mantissaB; 
@ -72,20 +74,12 @@ module fpdiv (
   logic 	      exp_cout1, exp_cout2;
   logic 	      exp_odd, open;
-   // div/sqrt
+   //  op_type : fdiv=0, fsqrt=1
         //  fdiv  = 0
         //  fsqrt = 1
   assign Float1 = op1;
   assign Float2 = op_type ? op1 : op2;   
-   // Test for exceptions and return the "Invalid Operation" and
+   // Exception detection
-   // "Denormalized" Input Flags. The "sel_inv" is used in
+   exception_div exc1 (.A(Float1), .B(Float2), .op_type, .Ztype(sel_inv), .Invalid);
   // the third pipeline stage to select the result. Also, op1_Norm
   // and op2_Norm are one if op1 and op2 are not zero or denormalized.
   // sub is one if the effective operation is subtaction.   
   exception_div exc1 (.A(Float1), .B(Float2), .op_type,
                     // output:
                     .Ztype(sel_inv), .Invalid);
   // Determine Sign/Mantissa
   assign signResult = (Float1[63]^Float2[63]);
@ -112,7 +106,6 @@ module fpdiv (
   // FSM : control divider   
   fsm control (.clk, .reset, .start, .op_type,
               // outputs:
 		.done, .load_rega, .load_regb, .load_regc, .load_regd, 
 		.load_regr, .load_regs, .sel_muxa, .sel_muxb, .sel_muxr, 
 		.divBusy(FDivBusyE));
@ -122,8 +115,10 @@ module fpdiv (
   // set the exception flags.   
   rounder_div round1 (.rm, .P, .OvEn, .UnEn, .exp_diff(expF), 
   		       .sel_inv, .Invalid, .SignR(signResult),
 		       .Float1(op1), .Float2(op2),
 		       .XNaNQ, .YNaNQ, .XZeroQ, .YZeroQ, 
 		       .XInfQ, .YInfQ, .op_type,		       
 		       .q1, .qm1, .qp1, .q0, .qm0, .qp0, .regr_out, 
                     // outputs:
                       .Result, .Flags(FlagsIn));
   // Store the final result and the exception flags in registers.
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -1,6 +1,6 @@
 ///////////////////////////////////////////
 //
-// Written: Katherine Parry, Bret Mathis
+// Written: Katherine Parry, James Stine, Brett Mathis
 // Modified: 6/23/2021
 //
 // Purpose: FPU
@ -99,35 +99,24 @@ module fpu (
     logic 		  XExpMaxE;                      // is the exponent all ones (max value)
     logic 		  XNormE;                 // is normal     
     // result and flag signals
     logic [63:0] 	  FDivResM, FDivResW; // divide/squareroot result
     logic [4:0] 	  FDivFlgM, FDivFlgW; // divide/squareroot flags  
     logic [63:0] 	  FMAResM, FMAResW;   // FMA/multiply result
     logic [4:0] 	  FMAFlgM, FMAFlgW;   // FMA/multiply result	
     logic [63:0] 	  ReadResW;           // read result (load instruction)
     logic [63:0] 	  CvtFpResE, CvtFpResM, CvtFpResW; // add/FP -> FP convert result
     logic [4:0] 	  CvtFpFlgE, CvtFpFlgM, CvtFpFlgW; // add/FP -> FP convert flags
     logic [63:0] 	  CvtResE, CvtResM;   // FP <-> int convert result
     logic [4:0] 	  CvtFlgE, CvtFlgM;   // FP <-> int convert flags //*** trim this	
     logic [63:0] 	  ClassResE, ClassResM; // classify result
     logic [63:0] 	  CmpResE, CmpResM; // compare result
     logic 		  CmpNVE, CmpNVM;   // compare invalid flag (Not Valid)     
     logic [63:0] 	  SgnResE, SgnResM; // sign injection result
     logic 		  SgnNVE, SgnNVM;   // sign injection invalid flag (Not Valid)     
     logic [63:0] 	  FResE, FResM, FResW;     // selected result that is ready in the memory stage
     logic [4:0] 	  FFlgE, FFlgM;            // selected flag that is ready in the memory stage     
     logic [`XLEN-1:0] 	  FIntResE;     
     logic [63:0] 	  FPUResultW;    // final FP result being written to the FP register
     // other signals
@ -136,19 +125,9 @@ module fpu (
     logic 		  FDivClk;                // clock for divide/squareroot unit
     logic [63:0] 	  AlignedSrcAE;           // align SrcA to the floating point format
  ////////////////////////////////////////////////////////////////////////////////////////
     // DECODE STAGE
 	////////////////////////////////////////////////////////////////////////////////////////
     // calculate FP control signals
     fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
              // outputs:
 		  .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, 
 		  .FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
@ -157,17 +136,9 @@ module fpu (
     fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
 			.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW), 
 			.wd4(FPUResultW),
         // outputs:
 			.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));	
 	////////////////////////////////////////////////////////////////////////////////////////
     // D/E pipeline registers
 	////////////////////////////////////////////////////////////////////////////////////////
     flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
     flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
     flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
@ -177,37 +148,27 @@ module fpu (
 			       {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
 			       {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
 	////////////////////////////////////////////////////////////////////////////////////////
     // EXECUTION STAGE
 	////////////////////////////////////////////////////////////////////////////////////////
     // Hazard unit for FPU  
     //    - determines if any forwarding or stalls are needed
     fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, 
                  // outputs:
                     .FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
     // forwarding muxs
     mux3  #(64)  fxemux(FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
     mux3  #(64)  fyemux(FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
     mux3  #(64)  fzemux(FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
-	mux3  #(64)  fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, {2'b0, {10{1'b1}}, 52'b0}, {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, FSrcYE); // Force Z to be 0 for multiply instructions
+     mux3  #(64)  fyaddmux(FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, 
-	mux3  #(64)  fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); // Force Z to be 0 for multiply instructions
+			   {2'b0, {10{1'b1}}, 52'b0}, 
- 	
+			   {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==3'b01)}, 
 			   FSrcYE); // Force Z to be 0 for multiply instructions
     // Force Z to be 0 for multiply instructions     
     mux3  #(64)  fzmulmux(FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
     // unpacking unit
     //    - splits FP inputs into their various parts
     //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
     unpacking unpacking(.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, 
                      // outputs:
 			 .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
 			 .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
 			 .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
@ -217,8 +178,6 @@ module fpu (
     //   - execute stage - multiplication and addend shifting
     //   - memory stage  - addition and rounding
     //   - handles FMA and multiply instructions
  //    - contains some E/M pipleine registers
  // *** currently handles FLEN and 32 bits(dont know if 32 works with 128 - easy to fix) - change to handle only the supported formats
     fma fma (.clk, .reset, .FlushM, .StallM, 
 	      .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
 	      .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, 
@ -227,7 +186,6 @@ module fpu (
 	      .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
 	      .FOpCtrlE,
 	      .FmtE, .FmtM, .FrmM, 
     // outputs:
 	      .FMAFlgM, .FMAResM);
     // clock gater
@ -249,12 +207,15 @@ module fpu (
     flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E),
 				.en(1'b1), .clear(FDivSqrtDoneE),
 				.reset(reset),  .clk(FDivBusyE));
      flopenrc #(6) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE}), 
 				.q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ}),
 				.en(1'b1), .clear(FDivSqrtDoneE),
 				.reset(reset),  .clk(FDivBusyE));
-	// output for store instructions
+      // fpdivsqrt using Goldschmidt's iteration
  //*** change to use the unpacking unit if possible
      fpdiv fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlE[0]), 
 		      .reset, .clk(FDivClk), .start(FDivStartE), .P(~FmtE), .OvEn(1'b1), .UnEn(1'b1),
-                   // outputs:
+		      .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ,
 		      .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
     // convert from signle to double and vice versa
@ -267,13 +228,11 @@ module fpu (
     fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), 
 		.FSrcXE, .FSrcYE, .FOpCtrlE, 
 		.FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, 
            // outputs:
 		.Invalid(CmpNVE), .CmpResE);
     // sign injection unit
     //    - computation is done in one stage
     fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE,
            // outputs:
 		.SgnNVE, .SgnResE);
     // classify
@ -281,11 +240,9 @@ module fpu (
     //    - most of the work is done in the unpacking unit
     //    - result is written to the integer register
     fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, 
                      // outputs:
 			  .XSNaNE, .ClassResE);
     fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .SrcAE, .FOpCtrlE, .FmtE, .FrmE,
            // outputs: 
 		.CvtResE, .CvtFlgE);
     // data to be stored in memory - to IEU
@ -293,7 +250,6 @@ module fpu (
     //        - if there are any unsused bits the most significant bits are filled with 1s
     assign FWriteDataE = FSrcYE[`XLEN-1:0];     
     // Align SrcA to MSB when single precicion
     mux2  #(64)  SrcAMux({{32{1'b1}}, SrcAE[31:0]}, {{64-`XLEN{1'b1}}, SrcAE}, FmtE, AlignedSrcAE);
@ -302,14 +258,10 @@ module fpu (
     mux5  #(5)  FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE);
     // select the result that may be written to the integer register - to IEU
-	mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
+     mux4  #(`XLEN)  IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], 
 			       CvtResE[`XLEN-1:0], FIntResSelE, FIntResE);
  //***will synth remove registers of values that are always zero?
 	////////////////////////////////////////////////////////////////////////////////////////
     // E/M pipe registers
 	////////////////////////////////////////////////////////////////////////////////////////
     // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
     flopenrc #(65) EMFpReg2(clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM});
@ -318,45 +270,18 @@ module fpu (
     flopenrc #(12) EMFpReg5(clk, reset, FlushM, ~StallM, 
 			     {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
 			     {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});     
     flopenrc #(64) EMRegCmpRes(clk, reset, FlushM, ~StallM, FResE, FResM); 
     flopenrc #(5)  EMRegCmpFlg(clk, reset, FlushM, ~StallM, FFlgE, FFlgM);      
     flopenrc #(`XLEN) EMRegSgnRes(clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
 	// flopenrc #(1) EMRegSgnFlg(clk, reset, FlushM, ~StallM, SgnNVE, SgnNVM);
 	//flopenrc #(64) EMRegCvtFpRes(clk, reset, FlushM, ~StallM, CvtFpResE, CvtFpResM);
 	//flopenrc #(5) EMRegCvtFpFlg(clk, reset, FlushM, ~StallM, CvtFpFlgE, CvtFpFlgM);
 	// flopenrc #(64) EMRegCvtRes(clk, reset, FlushM, ~StallM, CvtResE, CvtResM);
 	// flopenrc #(5) EMRegCvtFlg(clk, reset, FlushM, ~StallM, CvtFlgE, CvtFlgM);
 	// flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResE, ClassResM);
     flopenrc #(11) EMCtrlReg(clk, reset, FlushM, ~StallM,
 			      {FRegWriteE, FResultSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
 			      {FRegWriteM, FResultSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
 	////////////////////////////////////////////////////////////////////////////////////////
     // BEGIN MEMORY STAGE
 	////////////////////////////////////////////////////////////////////////////////////////
     // FPU flag selection - to privileged
     mux4  #(5)  FPUFlgMux(5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelW, SetFflagsM);
 	////////////////////////////////////////////////////////////////////////////////////////
     // M/W pipe registers
 	////////////////////////////////////////////////////////////////////////////////////////
     flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); 
     flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); 
     flopenrc #(64) MWRegAdd(clk, reset, FlushW, ~StallW, CvtFpResM, CvtFpResW); 
@ -365,12 +290,7 @@ module fpu (
 			      {FRegWriteM, FResultSelM, FmtM, FWriteIntM},
 			      {FRegWriteW, FResultSelW, FmtW, FWriteIntW});
 	////////////////////////////////////////////////////////////////////////////////////////
     // BEGIN WRITEBACK STAGE
 	////////////////////////////////////////////////////////////////////////////////////////
     // put ReadData into NaN-blocking format
     //    - if there are any unsused bits the most significant bits are filled with 1s
@ -380,7 +300,6 @@ module fpu (
     // select the result to be written to the FP register
     mux4  #(64)  FPUResultMux(ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW);
  end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low
     assign FStallD = 0;
     assign FWriteIntE = 0; 
--- a/wally-pipelined/src/fpu/fregfile.sv
+++ b/wally-pipelined/src/fpu/fregfile.sv
@ -1,10 +1,9 @@
 ///////////////////////////////////////////
 // regfile.sv
 //
 // Written: David_Harris@hmc.edu 9 January 2021
-// Modified: 
+// Modified: James Stine 
 //
-// Purpose: 4-port register file
+// Purpose: 3-port output register file
 // 
 // A component of the Wally configurable RISC-V project.
 // 
@ -40,8 +39,6 @@ module fregfile (
   // write fourth port on rising edge of clock (A4/WD4/WE4)
   // write occurs on falling edge of clock   
  // reset is intended for simulation only, not synthesis
   always_ff @(negedge clk or posedge reset)
     if (reset) for(i=0; i<32; i++) rf[i] <= 0;
     else if (we4) rf[a4] <= wd4;	
--- a/wally-pipelined/src/fpu/fsm.sv
+++ b/wally-pipelined/src/fpu/fsm.sv
@ -1,49 +1,63 @@
-module fsm (
+///////////////////////////////////////////
 //
 // Written: James Stine
 // Modified: 9/28/2021
 //
 // Purpose: FSM for floating point divider/square root unit (Goldschmidt)
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module fsm (
   input logic 	      clk,
   input logic 	      reset,
   input logic 	      start,
   input logic 	      op_type,
-   output logic 		done,      // End of cycles
+   output logic       done, 
-   output logic 		load_rega, // enable for regA
+   output logic       load_rega, 
-   output logic 		load_regb, // enable for regB
+   output logic       load_regb, 
-   output logic 		load_regc, // enable for regC
+   output logic       load_regc, 
-   output logic 		load_regd, // enable for regD
+   output logic       load_regd,
-   output logic 		load_regr, // enable for rem
+   output logic       load_regr,
-   output logic 		load_regs, // enable for q,qm,qp 
+   output logic       load_regs,
-   output logic [2:0] 	sel_muxa,  // Select muxA
+   output logic [2:0] sel_muxa, 
-   output logic [2:0] 	sel_muxb,  // Select muxB
+   output logic [2:0] sel_muxb, 
-   output logic 		sel_muxr,  // Select rem mux
+   output logic       sel_muxr, 
-   output logic			divBusy	   // calculation is happening
+   output logic       divBusy	   
   );
   typedef enum       logic [4:0] {S0, S1, S2, S3, S4, S5, S6, S7, S8, S9,
 				   S10, S11, S12, S13, S14, S15, S16, S17, S18, S19,
 				   S20, S21, S22, S23, S24, S25, S26, S27, S28, S29,
 				   S30} statetype;
-   reg [4:0] 	CURRENT_STATE;
+   statetype current_state, next_state;
   reg [4:0] 	NEXT_STATE;   
   parameter [4:0] 
     S0=5'd0, S1=5'd1, S2=5'd2,
     S3=5'd3, S4=5'd4, S5=5'd5,
     S6=5'd6, S7=5'd7, S8=5'd8,
     S9=5'd9, S10=5'd10,
     S13=5'd13, S14=5'd14, S15=5'd15,     
     S16=5'd16, S17=5'd17, S18=5'd18,
     S19=5'd19, S20=5'd20, S21=5'd21,
     S22=5'd22, S23=5'd23, S24=5'd24,
     S25=5'd25, S26=5'd26, S27=5'd27,
     S28=5'd28, S29=5'd29, S30=5'd30;
   always @(negedge clk)
     begin
 	if (reset == 1'b1)
-	  CURRENT_STATE=S0;
+	  current_state = S0;
 	else
-	  CURRENT_STATE=NEXT_STATE;
+	  current_state = next_state;
     end
   always @(*)
     begin
- 	case(CURRENT_STATE)
+ 	case(current_state)
 	  S0:  // iteration 0
 	    begin
 	       if (start==1'b0)
@ -59,7 +73,7 @@ module fsm (
 		    sel_muxa = 3'b000;
 		    sel_muxb = 3'b000;
 		    sel_muxr = 1'b0;
-		    NEXT_STATE = S0;
+		    next_state = S0;
 		 end 
 	       else if (start==1'b1 && op_type==1'b0) 
 		 begin
@ -74,7 +88,7 @@ module fsm (
 		    sel_muxa = 3'b001;
 		    sel_muxb = 3'b001;		    
 		    sel_muxr = 1'b0;
-		    NEXT_STATE = S1;
+		    next_state = S1;
 		 end // if (start==1'b1 && op_type==1'b0)
 	       else if (start==1'b1 && op_type==1'b1) 
 		 begin
@ -89,7 +103,7 @@ module fsm (
 		    sel_muxa = 3'b010;
 		    sel_muxb = 3'b000;		    
 		    sel_muxr = 1'b0;
-		    NEXT_STATE = S13;
+		    next_state = S13;
 		 end 	   
 	       else
 		 begin
@ -104,7 +118,7 @@ module fsm (
 		    sel_muxa = 3'b000;
 		    sel_muxb = 3'b000;		    
 		    sel_muxr = 1'b0;
-		    NEXT_STATE = S0;
+		    next_state = S0;
 		 end
 	    end // case: S0
 	  S1:
@ -120,7 +134,7 @@ module fsm (
 	       sel_muxa = 3'b010;
 	       sel_muxb = 3'b000;		    
 	       sel_muxr = 1'b0;	
-	       NEXT_STATE = S2;
+	       next_state = S2;
 	    end	  
 	  S2: // iteration 1
 	    begin
@ -135,7 +149,7 @@ module fsm (
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S3;
+	       next_state = S3;
 	    end
 	  S3:
 	    begin
@ -150,7 +164,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S4;
+	       next_state = S4;
 	    end
 	  S4: // iteration 2
 	    begin
@ -165,7 +179,7 @@ module fsm (
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S5;
+	       next_state = S5;
 	    end
 	  S5:
 	    begin
@ -180,7 +194,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;  // add
-	       NEXT_STATE = S6;
+	       next_state = S6;
 	    end
 	  S6: // iteration 3
 	    begin
@ -195,7 +209,7 @@ module fsm (
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S8;
+	       next_state = S8;
 	    end
 	  S7:
 	    begin
@ -210,7 +224,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S8;
+	       next_state = S8;
 	    end // case: S7
 	  S8: // q,qm,qp
 	    begin
@ -225,7 +239,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S9;
+	       next_state = S9;
 	    end 
 	  S9:  // rem
 	    begin
@ -240,7 +254,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b1;
-	       NEXT_STATE = S10;
+	       next_state = S10;
 	    end 	  
 	  S10:  // done
 	    begin
@ -255,7 +269,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S0;
+	       next_state = S0;
 	    end 
 	  S13:  // start of sqrt path
 	    begin
@ -270,7 +284,7 @@ module fsm (
 	       sel_muxa = 3'b010;
 	       sel_muxb = 3'b001;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S14;
+	       next_state = S14;
 	    end
 	  S14:  
 	    begin
@ -285,7 +299,7 @@ module fsm (
 	       sel_muxa = 3'b001;
 	       sel_muxb = 3'b100;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S15;
+	       next_state = S15;
 	    end 
 	  S15:  // iteration 1
 	    begin
@ -300,7 +314,7 @@ module fsm (
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S16;
+	       next_state = S16;
 	    end
 	  S16:  
 	    begin
@ -315,7 +329,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S17;
+	       next_state = S17;
 	    end
 	  S17:  
 	    begin
@ -330,7 +344,7 @@ module fsm (
 	       sel_muxa = 3'b100;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S18;
+	       next_state = S18;
 	    end
 	  S18:  // iteration 2
 	    begin
@ -345,7 +359,7 @@ module fsm (
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S19;
+	       next_state = S19;
 	    end
 	  S19:  
 	    begin
@ -360,7 +374,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S20;
+	       next_state = S20;
 	    end
 	  S20:  
 	    begin
@ -375,7 +389,7 @@ module fsm (
 	       sel_muxa = 3'b100;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S21;
+	       next_state = S21;
 	    end
 	  S21:  // iteration 3
 	    begin
@ -390,7 +404,7 @@ module fsm (
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S22;
+	       next_state = S22;
 	    end
 	  S22:  
 	    begin
@ -405,7 +419,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b011;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S23;
+	       next_state = S23;
 	    end
 	  S23:  
 	    begin
@ -420,7 +434,7 @@ module fsm (
 	       sel_muxa = 3'b100;
 	       sel_muxb = 3'b010;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S24;
+	       next_state = S24;
 	    end 
 	  S24: // q,qm,qp
 	    begin
@ -435,7 +449,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S25;
+	       next_state = S25;
 	    end 	  
 	  S25:  // rem
 	    begin
@ -450,7 +464,7 @@ module fsm (
 	       sel_muxa = 3'b011;
 	       sel_muxb = 3'b110;
 	       sel_muxr = 1'b1;
-	       NEXT_STATE = S26;
+	       next_state = S26;
 	    end 
 	  S26:  // done
 	    begin
@ -465,7 +479,7 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S0;
+	       next_state = S0;
 	    end 
 	  default: 
 	    begin
@ -480,9 +494,9 @@ module fsm (
 	       sel_muxa = 3'b000;
 	       sel_muxb = 3'b000;
 	       sel_muxr = 1'b0;
-	       NEXT_STATE = S0;
+	       next_state = S0;
 	    end
-	endcase // case(CURRENT_STATE)	
+	endcase // case(current_state)	
-     end // always @ (CURRENT_STATE or X)   
+     end // always @ (current_state or X)   
 endmodule // fsm
--- a/wally-pipelined/src/fpu/rounder_div.sv
+++ b/wally-pipelined/src/fpu/rounder_div.sv
@ -1,16 +1,26 @@
 ///////////////////////////////////////////
 //
-// The rounder takes as inputs a 64-bit value to be rounded, A, the 
+// Written: James Stine
-// exponent of the value to be rounded, the sign of the final result, Sign, 
+// Modified: 8/1/2018
 // the precision of the results, P, and the two-bit rounding mode, rm. 
 // It produces a rounded 52-bit result, Z, the exponent of the rounded 
 // result, Z_exp, and a flag that indicates if the result was rounded,
 // Inexact. The rounding mode has the following values.
 //	    rm		Mode
 //      00 		round-to-nearest-even
 //	    01 		round-toward-zero
 //      10 		round-toward-plus infinity
 //      11  	round-toward-minus infinity
 //
 // Purpose: Floating point divider/square root rounder unit (Goldschmidt)
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module rounder_div (
    input logic [1:0] 	rm,
@ -21,7 +31,15 @@ module rounder_div (
    input logic [2:0] 	sel_inv,
    input logic 	Invalid,
    input logic 	SignR,
-   
+    input logic [63:0] 	Float1,
    input logic [63:0] 	Float2,
    input logic 	XNaNQ,
    input logic 	YNaNQ,
    input logic 	XZeroQ,
    input logic 	YZeroQ, 
    input logic 	XInfQ,
    input logic 	YInfQ,
    input logic 	op_type, 
    input logic [63:0] 	q1,
    input logic [63:0] 	qm1,
    input logic [63:0] 	qp1,
@ -62,6 +80,10 @@ module rounder_div (
   logic [63:0]        q, qm, qp;
   logic 	       exp_ovf;
   logic [50:0]        NaN_out;
   logic 	       NaN_Sign_out;   
   logic 	       Sign_out;     
   // Remainder = 0?
   assign zero_rem = ~(|regr_out);
   // Remainder Sign
@ -117,12 +139,11 @@ module rounder_div (
   // the input was infinite or NaN or the output of the adder is zero.
   // 00 = Valid
   // 10 = NaN
-   assign Valid = (~sel_inv[2]&~sel_inv[1]&~sel_inv[0]);
+   assign Valid = ~sel_inv[2]&~sel_inv[1]&~sel_inv[0];
-   assign NaN = ~sel_inv[1]& sel_inv[0];
+   assign NaN = sel_inv[2]&sel_inv[1]&sel_inv[0]; 
   assign UnderFlow = (P & UnFlow_SP | UnFlow_DP) & Valid;
   assign OverFlow  = (P & OvFlow_SP | OvFlow_DP) & Valid;
-   assign Div0 = sel_inv[2]&sel_inv[1]&~sel_inv[0];
+   assign Div0 = YZeroQ&~XZeroQ&~op_type&~NaN;   
   // The final result is Inexact if any rounding occurred ((i.e., R or S 
   // is one), or (if the result overflows ) or (if the result underflows and the 
@ -162,8 +183,16 @@ module rounder_div (
   // If the result is NaN, the mantissa is 10...0
   // If the result the largest floating point number, the mantissa
   // is all ones. Otherwise, the mantissa is not changed.
   assign NaN_out = ~XNaNQ&YNaNQ ? Float2[50:0] : Float1[50:0];
   assign NaN_Sign_out = ~XNaNQ&YNaNQ ? Float2[63] : Float1[63];
   assign Sign_out = (XZeroQ&YZeroQ | XInfQ&YInfQ)&~op_type | Rsign&~XNaNQ&~YNaNQ | 
   		     NaN_Sign_out&(XNaNQ|YNaNQ);
   // FIXME (jes) - Imperas gives sNaN a Sign=0 where x86 gives Sign=1
   // | Float1[63]&op_type;
   assign Rmant[51] = Largest | NaN | (Smant[51]&~Infinite&~Rzero);
-   assign Rmant[50:0] = {51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}});
+   assign Rmant[50:0] = ({51{Largest}} | (Smant[50:0]&{51{~Infinite&Valid&~Rzero}}) |
 			(NaN_out&{51{NaN}}))&({51{~(op_type&Float1[63]&~XZeroQ)}});
   // For single precision, the 8 least significant bits of the exponent
   // and 23 most significant bits of the mantissa contain bits used 
@ -171,8 +200,8 @@ module rounder_div (
   // overflow has occurred, the overflow trap is enabled, and a conversion
   // is being performed. 
   assign OvCon = OverFlow & OvEn;
-   assign Result = (P&~OvCon) ? { {32{1'b1}}, Rsign, Rexp[7:0], Rmant[51:29]}
+   assign Result = (P&~OvCon) ? { {32{1'b1}}, Sign_out, Rexp[7:0], Rmant[51:29]}
-	           : {Rsign, Rexp, Rmant};
+	           : {Sign_out, Rexp, Rmant};
 endmodule // rounder
--- a/wally-pipelined/src/fpu/sbtm_a0.sv
+++ b/wally-pipelined/src/fpu/sbtm_a0.sv
@ -1,5 +1,30 @@
 ///////////////////////////////////////////
 //
 // Written: James Stine
 // Modified: 8/1/2018
 //
 // Purpose: Bipartite Lookup
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module sbtm_a0 (input  logic [6:0] a,
 		output logic [12:0] y);
   always_comb
     case(a)
       7'b0000000: y = 13'b1111111100010;
--- a/wally-pipelined/src/fpu/sbtm_a1.sv
+++ b/wally-pipelined/src/fpu/sbtm_a1.sv
@ -1,5 +1,30 @@
 ///////////////////////////////////////////
 //
 // Written: James Stine
 // Modified: 8/1/2018
 //
 // Purpose: Bipartite Lookup
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module sbtm_a1 (input  logic [6:0] a,
 		output logic [4:0] y);
   always_comb
     case(a)
       7'b0000000: y = 5'b11100;
--- a/wally-pipelined/src/fpu/sbtm_a2.sv
+++ b/wally-pipelined/src/fpu/sbtm_a2.sv
@ -1,5 +1,30 @@
 ///////////////////////////////////////////
 //
 // Written: James Stine
 // Modified: 8/1/2018
 //
 // Purpose: Bipartite Lookup
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module sbtm_a2 (input  logic [7:0] a,
 		output logic [13:0] y);
   always_comb
     case(a)
       8'b01000000: y = 14'b10110100010111;
--- a/wally-pipelined/src/fpu/sbtm_a3.sv
+++ b/wally-pipelined/src/fpu/sbtm_a3.sv
@ -1,5 +1,30 @@
 ///////////////////////////////////////////
 //
 // Written: James Stine
 // Modified: 8/1/2018
 //
 // Purpose: Bipartite Lookup
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module sbtm_a3 (input  logic [7:0] a,
 		output logic [5:0] y);
   always_comb
     case(a)
       8'b01000000: y = 6'b100110;
--- a/wally-pipelined/src/fpu/sbtm_div.sv
+++ b/wally-pipelined/src/fpu/sbtm_div.sv
@ -1,3 +1,27 @@
 ///////////////////////////////////////////
 //
 // Written: James Stine
 // Modified: 8/1/2018
 //
 // Purpose: Bipartite Lookup for divide portion of fpdivsqrt
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module sbtm_div (input logic [11:0] a, output logic [10:0] ia_out);
   // bit partitions
--- a/wally-pipelined/src/fpu/sbtm_sqrt.sv
+++ b/wally-pipelined/src/fpu/sbtm_sqrt.sv
@ -1,3 +1,27 @@
 ///////////////////////////////////////////
 //
 // Written: James Stine
 // Modified: 8/1/2018
 //
 // Purpose: Bipartite Lookup for sqrt part of fpdivsqrt
 // 
 // A component of the Wally configurable RISC-V project.
 // 
 // Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
 // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
 // is furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 module sbtm_sqrt (input logic [11:0] a, output logic [10:0] y);
   // bit partitions