srt divider merged into fpu

2025-02-11 06:05:49 +00:00 · 2022-07-07 16:01:33 -07:00 · 2022-07-07 16:01:33 -07:00 · 41c16be012
commit 41c16be012
parent b41a6f069b
33 changed files with 1183 additions and 505 deletions
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@ -39,7 +39,7 @@

 // MISA RISC-V configuration per specification
 //                    ZYXWVUTSRQPONMLKJIHGFEDCBA
-`define MISA 32'b0000000000101000001000100101101
+`define MISA 32'b0000000000101000001000100100101
 `define ZICSR_SUPPORTED 1
 `define ZIFENCEI_SUPPORTED 1
 `define COUNTERS 32
--- a/pipelined/regression/testfloat.do
+++ b/pipelined/regression/testfloat.do
@ -32,7 +32,7 @@ vlib work
 # start and run simulation
 # remove +acc flag for faster sim during regressions if there is no need to access internal signals
 # $num = the added words after the call
-vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv  ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 
+vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv  ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 

 vsim -voptargs=+acc work.testbenchfp -G TEST=$2

--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@ -8,7 +8,8 @@ add wave -noupdate /testbenchfp/Z
 add wave -noupdate /testbenchfp/Res
 add wave -noupdate /testbenchfp/Ans
 add wave -noupdate /testbenchfp/DivStart
-add wave -noupdate /testbenchfp/DivDone
+add wave -noupdate /testbenchfp/DivBusy
+add wave -noupdate /testbenchfp/srtfsm/state
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
@ -17,12 +18,13 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
+add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/*
 add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
-add wave -group {Divide} -noupdate /testbenchfp/srtradix4/earlytermination/*
+add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
+add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
--- a/pipelined/src/fpu/cvtshiftcalc.sv
+++ b/pipelined/src/fpu/cvtshiftcalc.sv
@ -1,11 +1,39 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Conversion shift calculation
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module cvtshiftcalc(
-    input logic                    XZeroM,
+    input logic                    XZero,
    input logic                    ToInt,
    input logic                    IntToFp,
    input logic  [`NE:0]           CvtCalcExpM,    // the calculated expoent
-    input logic  [`NF:0]           XManM,          // input mantissas
+    input logic  [`NF:0]           Xm,          // input mantissas
    input logic     [`FMTBITS-1:0]  OutFmt,       // output format
    input logic  [`CVTLEN-1:0]      CvtLzcInM,      // input to the Leading Zero Counter (priority encoder)
    input logic CvtResDenormUfM,
@ -32,8 +60,8 @@ module cvtshiftcalc(
    //          - otherwise:
    //              |     LzcInM      | 0's if nessisary | 
    // change to int shift to the left one
-    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : 
-                     CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} : 
+    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, Xm[`NF]&~CvtCalcExpM[`NE], Xm[`NF-1]|(CvtCalcExpM[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : 
+                     CvtResDenormUfM ? {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}} : 
                                   {CvtLzcInM, {`NF+1{1'b0}}};
    
    
@ -65,6 +93,6 @@ module cvtshiftcalc(
    // determine if the result underflows ??? -> fp
    //      - if the first 1 is shifted out of the result then the result underflows
    //      - can't underflow an integer to fp conversions
-    assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp;
+    assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
   
 endmodule
--- a/pipelined/src/fpu/divconv_pipe.sv
+++ b/pipelined/src/fpu/divconv_pipe.sv
@ -97,7 +97,7 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r
   // R4 Booth TDM multiplier (carry/save)
   redundantmul #(60) bigmul(.a(mcand), .b(mplier), .out0(Sum), .out1(Carry));   
   // Q*D - N (reversed but changed in rounder.v to account for sign reversal)
-   csa #(120) csa1 (Sum, Carry, constant, Sum2, Carry2);
+   csa #(120) csa1 (Sum, Carry, constant, 1'b0, Sum2, Carry2);
   // Add ulp for subtraction in remainder
   mux2 #(1) mx7 (1'b0, 1'b1, sel_muxr, muxr_out);

@ -181,18 +181,18 @@ module divconv_pipe (q1, qm1, qp1, q0, qm0, qp0, rega_out, regb_out, regc_out, r
 endmodule // divconv

 // *** rewrote behaviorally dh 5 Jan 2021 for speed
-module csa #(parameter WIDTH=8) (
-   input logic [WIDTH-1:0] a, b, c,
-	output logic [WIDTH-1:0] sum, carry);
+// module csa #(parameter WIDTH=8) (
+//    input logic [WIDTH-1:0] a, b, c,
+// 	output logic [WIDTH-1:0] sum, carry);

-   assign sum = a ^ b ^ c;
-   assign carry = (a & (b | c)) | (b & c);
-/*
-   logic [WIDTH:0] 					  carry_temp;   
-   genvar 						  i;
-       for (i=0;i<WIDTH;i=i+1) begin : genbit
-	    fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
-	  end
-   assign carry = {carry_temp[WIDTH-1:1], 1'b0};     
-*/
-endmodule // csa
+//    assign sum = a ^ b ^ c;
+//    assign carry = (a & (b | c)) | (b & c);
+// /*
+//    logic [WIDTH:0] 					  carry_temp;   
+//    genvar 						  i;
+//        for (i=0;i<WIDTH;i=i+1) begin : genbit
+// 	    fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
+// 	  end
+//    assign carry = {carry_temp[WIDTH-1:1], 1'b0};     
+// */
+// endmodule // csa
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@ -2,9 +2,9 @@

 module divshiftcalc(
    input logic  [`DIVLEN+2:0] Quot,
-    input logic  [`NE+1:0] DivCalcExpM,
-    input logic  [`FMTBITS-1:0] FmtM,
+    input logic  [`FMTBITS-1:0] Fmt,
    input logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
+    input logic [`NE+1:0] DivCalcExpM,
    output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
    output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
    output logic DivResDenorm,
@ -17,44 +17,44 @@ module divshiftcalc(
    // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
    assign DivResDenorm = DivCalcExpM[`NE+1]|(~|DivCalcExpM[`NE+1:0]);
    // select the proper fraction lengnth
-    if (`FPSIZES == 1) begin
-        assign Nf = (`NE+2)'(`NF);
+    // if (`FPSIZES == 1) begin
+    //     assign Nf = (`NE+2)'(`NF);

-    end else if (`FPSIZES == 2) begin
-        assign Nf = FmtM ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);
+    // end else if (`FPSIZES == 2) begin
+    //     assign Nf = Fmt ? (`NE+2)'(`NF) : (`NE+2)'(`NF1);

-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (FmtM)
-                `FMT: Nf = (`NE+2)'(`NF);
-                `FMT1: Nf = (`NE+2)'(`NF1);
-                `FMT2: Nf = (`NE+2)'(`NF2);
-                default: Nf = 1'bx;
-            endcase
-    end else if (`FPSIZES == 4) begin
-        always_comb
-            case (FmtM)
-                2'h3: Nf = (`NE+2)'(`Q_NF);
-                2'h1: Nf = (`NE+2)'(`D_NF);
-                2'h0: Nf = (`NE+2)'(`S_NF);
-                2'h2: Nf = (`NE+2)'(`H_NF);
-            endcase
-    end
+    // end else if (`FPSIZES == 3) begin
+    //     always_comb
+    //         case (Fmt)
+    //             `FMT: Nf = (`NE+2)'(`NF);
+    //             `FMT1: Nf = (`NE+2)'(`NF1);
+    //             `FMT2: Nf = (`NE+2)'(`NF2);
+    //             default: Nf = 1'bx;
+    //         endcase
+    // end else if (`FPSIZES == 4) begin
+    //     always_comb
+    //         case (Fmt)
+    //             2'h3: Nf = (`NE+2)'(`Q_NF);
+    //             2'h1: Nf = (`NE+2)'(`D_NF);
+    //             2'h0: Nf = (`NE+2)'(`S_NF);
+    //             2'h2: Nf = (`NE+2)'(`H_NF);
+    //         endcase
+    // end
    // if the result is denormalized
-    //  00000000x.xxxxxx...                     Exp = DivCalcExp
-    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  .00xxxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = +1
+    //  00000000x.xxxxxx...                     Exp = DivCalcExpM
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExpM+NF+1
+    //  .00xxxxxxxxxxxxx... << DivCalcExpM+NF+1  Exp = +1
    //  .0000xxxxxxxxxxx... >> 1                Exp = 1
-    // Left shift amount  = DivCalcExp+NF+1-1
-    assign DivDenormShift = Nf+DivCalcExpM;
+    // Left shift amount  = DivCalcExpM+NF+1-1
+    assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExpM;
    // if the result is normalized
-    //  00000000x.xxxxxx...                     Exp = DivCalcExp
-    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  00000000.xxxxxxx... << NF               Exp = DivCalcExp+1
-    //  00000000x.xxxxxx... << NF               Exp = DivCalcExp (extra shift done afterwards)
-    //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1 (determined after)
+    //  00000000x.xxxxxx...                     Exp = DivCalcExpM
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExpM+NF+1
+    //  00000000.xxxxxxx... << NF               Exp = DivCalcExpM+1
+    //  00000000x.xxxxxx... << NF               Exp = DivCalcExpM (extra shift done afterwards)
+    //  00000000xx.xxxxx... << 1?               Exp = DivCalcExpM-1 (determined after)
    // inital Left shift amount  = NF
-    assign NormShift = Nf;
+    assign NormShift = (`NE+2)'(`NF);
    // if the shift amount is negitive then dont shift (keep sticky bit)
    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, EarlyTermShiftDiv2M&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0};

--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@ -0,0 +1,68 @@
+///////////////////////////////////////////
+// srt.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
+// Modified:13 January 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module divsqrt(
+  input  logic clk, 
+  input  logic reset, 
+  input  logic [`FMTBITS-1:0] FmtE,
+  input  logic [`NF:0] XManE, YManE,
+  input  logic [`NE-1:0] XExpE, YExpE,
+  input  logic XInfE, YInfE, 
+  input  logic XZeroE, YZeroE, 
+  input  logic XNaNE, YNaNE, 
+  input  logic DivStartE, 
+  input  logic StallM,
+  input logic StallE,
+  output logic DivStickyM,
+  output logic DivNegStickyM,
+  output logic DivBusy,
+  output logic DivDone,
+  output logic [`NE+1:0] DivCalcExpM,
+  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M,
+  output logic [`DIVLEN+2:0] QuotM
+//   output logic [`XLEN-1:0] RemM,
+);
+
+  logic [`DIVLEN+3:0]  WSN, WCN;
+  logic [`DIVLEN+3:0]  WS, WC;
+  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
+  logic [`DIVLEN-1:0] X;
+  logic [`DIVLEN-1:0] Dpreproc;
+  logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
+
+  srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
+
+  srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
+                .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M));
+  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
+                .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
+endmodule
--- a/pipelined/src/fpu/fclassify.sv
+++ b/pipelined/src/fpu/fclassify.sv
@ -1,4 +1,31 @@
-
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: classify unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module fclassify (
--- a/pipelined/src/fpu/fcmp.sv
+++ b/pipelined/src/fpu/fcmp.sv
@ -1,4 +1,32 @@

+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Comparison unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 // FOpCtrlE values
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@ -1,3 +1,31 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: control unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module fctrl (
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@ -1,4 +1,33 @@

+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Floating point conversions of configurable size
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
 `include "wally-config.vh"

 module fcvt (
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@ -1,12 +1,40 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Post-Processing flag calculation
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module flags(
-    input logic                 XSgnM,
+    input logic                 Xs,
    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
    input logic                 Plus1,
    input logic                 InfIn,                  // is a Inf input being used
-    input logic                 XZeroM, YZeroM,         // inputs are zero
+    input logic                 XZero, YZero,         // inputs are zero
    input logic                 XNaNM, YNaNM,           // inputs are NaN
    input logic                 NaNIn,                  // is a NaN input being used
    input logic                 Sqrt,                   // Sqrt?
@ -108,7 +136,7 @@ module flags(
    //                  if the res is too small to be represented and not 0
    //                  |                                     and if the res is not invalid (outside the integer bounds)
    //                  |                                     |
-    assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~IntInvalid;
+    assign IntInexact = ((CvtCalcExpM[`NE]&~XZero)|Sticky|Round)&~IntInvalid;

    // select the inexact flag to output
    assign Inexact = ToInt ? IntInexact : FpInexact;
@ -125,18 +153,18 @@ module flags(
    //                  |           |                                  |                    |               or the res rounds up out of bounds
    //                  |           |                                  |                    |                       and the res didn't underflow
    //                  |           |                                  |                    |                       |
-    assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
+    assign IntInvalid = XNaNM|XInfM|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
    //                                                                                                     |
    //                                                                                                     or when the positive res rounds up out of range
    assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp);
-    assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
-    assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt);
+    assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZero & YInfM) | (YZero & XInfM);
+    assign DivInvalid = ((XInfM & YInfM) | (XZero & YZero))&~Sqrt | (Xs&Sqrt);

    assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);

    // if dividing by zero and not 0/0
    //  - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
-    assign DivByZero = YZeroM&DivOp&~(XZeroM|NaNIn|InfIn);  
+    assign DivByZero = YZero&DivOp&~(XZero|NaNIn|InfIn);  

    // Combine flags
    //      - to integer results do not set the underflow or overflow flags
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@ -1,6 +1,6 @@
 ///////////////////////////////////////////
 //
-// Written: Katherine Parry, David Harris
+// Written: me@KatherineParry.com, David Harris
 // Modified: 6/23/2021
 //
 // Purpose: Floating point multiply-accumulate of configurable size
@ -33,23 +33,23 @@ module fma(
    input logic                 Xs, Ys, Zs,    // input's signs
    input logic  [`NE-1:0]      Xe, Ye, Ze,    // input's biased exponents in B(NE.0) format
    input logic  [`NF:0]        Xm, Ym, Zm,    // input's significands in U(0.NF) format
-    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
-    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
-    input logic  [`FMTBITS-1:0] FmtE,       // precision 1 = double 0 = single
+    input logic                 XZero, YZero, ZZero, // is the input zero
+    input logic  [2:0]          FOpCtrl,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
+    input logic  [`FMTBITS-1:0] Fmt,       // format of the result single double half or quad
    output logic [`NE+1:0]      Pe,       // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
-    output logic                AddendStickyE,  // sticky bit that is calculated during alignment
-    output logic                KillProdE,      // set the product to zero before addition if the product is too small to matter
-    output logic [3*`NF+5:0]    Sm,           // the positive sum
-    output logic                NegSumE,        // was the sum negitive
-    output logic                InvA,          // intert Z
-    output logic                ZSgnEffE,       // the modified Z sign
+    output logic                ZmSticky,  // sticky bit that is calculated during alignment
+    output logic                KillProd,  // set the product to zero before addition if the product is too small to matter
+    output logic [3*`NF+5:0]    Sm,           // the positive sum's significand
+    output logic                NegSum,        // was the sum negitive
+    output logic                InvA,          // Was A inverted for effective subtraction (P-A or -P+A)
+    output logic                As,       // the aligned addend's sign (modified Z sign for other opperations)
    output logic                Ps,          // the product's sign
-    output logic [$clog2(3*`NF+7)-1:0]          FmaNormCntE        // normalization shift cnt
+    output logic [$clog2(3*`NF+7)-1:0]          NCnt        // normalization shift count
    );

    logic [2*`NF+1:0]   Pm;           // the product's significand in U(2.2Nf) format
-    logic [3*`NF+5:0]   Am;     // Z aligned for addition in U(NF+5.2NF+1)
-    logic [3*`NF+6:0]   AmInv;   // aligned addend possibly inverted
+    logic [3*`NF+5:0]   Am;     // addend aligned's mantissa for addition in U(NF+5.2NF+1)
+    logic [3*`NF+6:0]   AmInv;   // aligned addend's mantissa possibly inverted
    logic [2*`NF+1:0]   PmKilled;      // the product's mantissa possibly killed
    logic [3*`NF+6:0]   PreSum, NegPreSum;  // positive and negitve versions of the sum
    ///////////////////////////////////////////////////////////////////////////////
@ -62,7 +62,7 @@ module fma(
   

   // calculate the product's exponent 
-    expadd expadd(.FmtE, .Xe, .Ye, .XZeroE, .YZeroE, .Pe);
+    expadd expadd(.Fmt, .Xe, .Ye, .XZero, .YZero, .Pe);

    // multiplication of the mantissa's
    mult mult(.Xm, .Ym, .Pm);
@ -71,31 +71,31 @@ module fma(
    // Alignment shifter
    ///////////////////////////////////////////////////////////////////////////////

-    align align(.Ze, .Zm, .XZeroE, .YZeroE, .ZZeroE, .Xe, .Ye,
-                        .Am, .AddendStickyE, .KillProdE);
+    align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
+                        .Am, .ZmSticky, .KillProd);
                        
    // calculate the signs and take the opperation into account
-    sign sign(.FOpCtrlE, .Xs, .Ys, .Zs, .Ps, .ZSgnEffE);
+    sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);

    // ///////////////////////////////////////////////////////////////////////////////
    // // Addition/LZA
    // ///////////////////////////////////////////////////////////////////////////////
        
-    add add(.Am, .Pm, .Ps, .ZSgnEffE, .KillProdE, .AmInv, .PmKilled, .NegSumE, .PreSum, .NegPreSum, .InvA, .XZeroE, .YZeroE, .Sm);
+    add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm);
    
-    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .FmaNormCntE);
+    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt);
 endmodule


 module expadd(    
-    input  logic [`FMTBITS-1:0] FmtE,          // precision
-    input  logic [`NE-1:0]      Xe, Ye,  // input exponents
-    input  logic                XZeroE, YZeroE,        // are the inputs zero
+    input  logic [`FMTBITS-1:0] Fmt,          // format of the output: single double half quad
+    input  logic [`NE-1:0]      Xe, Ye,  // input's exponents
+    input  logic                XZero, YZero,        // are the inputs zero
    output logic [`NE+1:0]      Pe       // product's exponent B^(1023)NE+2
 );

    // kill the exponent if the product is zero - either X or Y is 0
-    assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};
+    assign Pe = ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZero|YZero)}};

 endmodule

@ -118,19 +118,19 @@ endmodule


 module sign(    
-    input  logic [2:0]  FOpCtrlE,               // precision
-    input  logic        Xs, Ys, Zs,    // are the inputs denormalized
+    input  logic [2:0]  FOpCtrl,               // opperation contol
+    input  logic        Xs, Ys, Zs,    // sign of the inputs
    output logic        Ps,     // the product's sign - takes opperation into account
-    output logic        ZSgnEffE   // Z sign used in fma - takes opperation into account
+    output logic        As   // aligned addend sign used in fma - takes opperation into account
 );

    // Calculate the product's sign
    //      Negate product's sign if FNMADD or FNMSUB
    
    // flip is negation opperation
-    assign Ps = Xs ^ Ys ^ (FOpCtrlE[1]&~FOpCtrlE[2]);
+    assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]);
    // flip if subtraction
-    assign ZSgnEffE = Zs^FOpCtrlE[0];
+    assign As = Zs^FOpCtrl[0];

 endmodule

@ -143,16 +143,16 @@ endmodule

 module align(
    input logic  [`NE-1:0]      Xe, Ye, Ze,      // biased exponents in B(NE.0) format
-    input logic  [`NF:0]        Zm,      // fractions in U(0.NF) format]
-    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
-    output logic [3*`NF+5:0]    Am, // Z aligned for addition in U(NF+5.2NF+1)
-    output logic                AddendStickyE,  // Sticky bit calculated from the aliged addend
-    output logic                KillProdE       // should the product be set to zero
+    input logic  [`NF:0]        Zm,      // significand in U(0.NF) format]
+    input logic                 XZero, YZero, ZZero, // is the input zero
+    output logic [3*`NF+5:0]    Am, // addend aligned for addition in U(NF+5.2NF+1)
+    output logic                ZmSticky,  // Sticky bit calculated from the aliged addend
+    output logic                KillProd       // should the product be set to zero
 );

-    logic [`NE+1:0]     AlignCnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
-    logic [4*`NF+5:0]   ZManShifted;        // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
-    logic [4*`NF+5:0]   ZManPreShifted;     // input to the alignment shifter U(NF+5.3NF+1)
+    logic [`NE+1:0]     ACnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
+    logic [4*`NF+5:0]   ZmShifted;        // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
+    logic [4*`NF+5:0]   ZmPreshifted;     // input to the alignment shifter U(NF+5.3NF+1)
    logic KillZ;

    ///////////////////////////////////////////////////////////////////////////////
@ -162,18 +162,18 @@ module align(
    // determine the shift count for alignment
    //      - negitive means Z is larger, so shift Z left
    //      - positive means the product is larger, so shift Z right
-    // This could have been done using Pe, but AlignCnt is on the critical path so we replicate logic for speed
-    assign AlignCnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze};
+    // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
+    assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+3) - {2'b0, Ze};

    // Defualt Addition without shifting
    //          |   54'b0    |  106'b(product)  | 2'b0 |
    //          | addnend |

    // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
-    assign ZManPreShifted = {Zm,(3*`NF+5)'(0)};
+    assign ZmPreshifted = {Zm,(3*`NF+5)'(0)};
    
-    assign KillProdE = AlignCnt[`NE+1]|XZeroE|YZeroE;
-    assign KillZ = $signed(AlignCnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));
+    assign KillProd = ACnt[`NE+1]|XZero|YZero;
+    assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5));

    always_comb
        begin
@ -182,9 +182,9 @@ module align(

        //          |   54'b0    |  106'b(product)  | 2'b0 |
        //  | addnend |
-        if (KillProdE) begin
-            ZManShifted = ZManPreShifted;
-            AddendStickyE = ~(XZeroE|YZeroE);
+        if (KillProd) begin
+            ZmShifted = ZmPreshifted;
+            ZmSticky = ~(XZero|YZero);

        // If the addend is too small to effect the addition        
        //      - The addend has to shift two past the end of the addend to be considered too small
@ -193,20 +193,20 @@ module align(
        //          |   54'b0    |  106'b(product)  | 2'b0 |
        //                                                      | addnend |
        end else if (KillZ)  begin
-            ZManShifted = 0;
-            AddendStickyE = ~ZZeroE;
+            ZmShifted = 0;
+            ZmSticky = ~ZZero;

        // If the Addend is shifted right
        //          |   54'b0    |  106'b(product)  | 2'b0 |
        //                                  | addnend |
        end else begin
-            ZManShifted = ZManPreShifted >> AlignCnt;
-            AddendStickyE = |(ZManShifted[`NF-1:0]);
+            ZmShifted = ZmPreshifted >> ACnt;
+            ZmSticky = |(ZmShifted[`NF-1:0]);

        end
    end

-    assign Am = ZManShifted[4*`NF+5:`NF];
+    assign Am = ZmShifted[4*`NF+5:`NF];

 endmodule

@ -217,15 +217,15 @@ endmodule


 module add(
-    input logic  [3*`NF+5:0]    Am, // Z aligned for addition in U(NF+5.2NF+1)
+    input logic  [3*`NF+5:0]    Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
    input logic  [2*`NF+1:0]    Pm,       // the product's mantissa
-    input logic                 Ps, ZSgnEffE,// the product and modified Z signs
-    input logic                 KillProdE,      // should the product be set to 0
-    input logic                 XZeroE, YZeroE, // is the input zero
+    input logic                 Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
+    input logic                 KillProd,      // should the product be set to 0
+    input logic                 XZero, YZero, // is the input zero
    output logic [3*`NF+6:0]    AmInv,  // aligned addend possibly inverted
    output logic [2*`NF+1:0]    PmKilled,     // the product's mantissa possibly killed
-    output logic                NegSumE,        // was the sum negitive
-    output logic                InvA,          // do you invert Z
+    output logic                NegSum,        // was the sum negitive
+    output logic                InvA,          // do you invert the aligned addend
    output logic [3*`NF+5:0]    Sm,           // the positive sum
    output logic [3*`NF+6:0]    PreSum, NegPreSum// possibly negitive sum
 );
@ -237,12 +237,12 @@ module add(
    // Negate Z  when doing one of the following opperations:
    //      -prod +  Z
    //       prod -  Z
-    assign InvA = ZSgnEffE ^ Ps;
+    assign InvA = As ^ Ps;

    // Choose an inverted or non-inverted addend - the one has to be added now for the LZA
    assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am};
    // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
-    assign PmKilled = Pm&{2*`NF+2{~KillProdE}};
+    assign PmKilled = Pm&{2*`NF+2{~KillProd}};



@ -252,17 +252,17 @@ module add(
    assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)};
     
    // Is the sum negitive
-    assign NegSumE = PreSum[3*`NF+6];
+    assign NegSum = PreSum[3*`NF+6];

    // Choose the positive sum and accompanying LZA result.
-    assign Sm = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
+    assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
 endmodule


 module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
    input logic  [3*`NF+6:0] A,     // addend
    input logic  [2*`NF+1:0] P,     // product
-    output logic [$clog2(3*`NF+7)-1:0]       FmaNormCntE   // normalization shift count for the positive result
+    output logic [$clog2(3*`NF+7)-1:0]       NCnt   // normalization shift count for the positive result
    ); 
    
    logic [3*`NF+6:0] T;
@ -290,6 +290,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE



-    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE));
+    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt));
  
 endmodule
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@ -1,11 +1,39 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Fma shift calculation
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module fmashiftcalc(
    input logic  [3*`NF+5:0]            SumM,       // the positive sum
-    input logic  [`NE-1:0]              ZExpM,      // exponent of Z
+    input logic  [`NE-1:0]              Ze,      // exponent of Z
    input logic  [`NE+1:0]              ProdExpM,   // X exponent + Y exponent - bias
    input logic  [$clog2(3*`NF+7)-1:0]  FmaNormCntM,   // normalization shift count
-    input logic  [`FMTBITS-1:0]         FmtM,       // precision 1 = double 0 = single
+    input logic  [`FMTBITS-1:0]         Fmt,       // precision 1 = double 0 = single
    input logic                         KillProdM,  // is the product set to zero
    input logic 			            ZDenormM,
    output logic [`NE+1:0]              ConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
@ -25,18 +53,18 @@ module fmashiftcalc(
    assign SumZero = ~(|SumM);

    // calculate the sum's exponent
-    assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4);
+    assign NormSumExp = KillProdM ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenormM} : ProdExpM + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} - 1 + (`NE+2)'(`NF+4);

    //convert the sum's exponent into the proper percision
    if (`FPSIZES == 1) begin
        assign ConvNormSumExp = NormSumExp;

    end else if (`FPSIZES == 2) begin
-        assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+        assign ConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};

    end else if (`FPSIZES == 3) begin
        always_comb begin
-            case (FmtM)
+            case (Fmt)
                `FMT: ConvNormSumExp = NormSumExp;
                `FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
                `FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
@ -46,7 +74,7 @@ module fmashiftcalc(

    end else if (`FPSIZES == 4) begin
        always_comb begin
-            case (FmtM)
+            case (Fmt)
                2'h3: ConvNormSumExp = NormSumExp;
                2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
                2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
@ -70,7 +98,7 @@ module fmashiftcalc(
        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
-        assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
+        assign PreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~SumZero;

    end else if (`FPSIZES == 3) begin
        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
@ -81,7 +109,7 @@ module fmashiftcalc(
        assign Sum2LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
        assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
        always_comb begin
-            case (FmtM)
+            case (Fmt)
                `FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
                `FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
                `FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
@ -100,7 +128,7 @@ module fmashiftcalc(
        assign Sum3LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
        assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
        always_comb begin
-            case (FmtM)
+            case (Fmt)
                2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
                2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
                2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -1,6 +1,6 @@
 ///////////////////////////////////////////
 //
-// Written: Katherine Parry, James Stine, Brett Mathis
+// Written: me@KatherineParry.com, James Stine, Brett Mathis
 // Modified: 6/23/2021
 //
 // Purpose: FPU
@ -125,11 +125,12 @@ module fpu (
   logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
   
   //divide signals
-   logic [`DIVLEN+2:0] Quot;
-   logic [`NE+1:0] DivCalcExpM;
-   logic DivNegStickyM;
-   logic DivStickyM;
-   logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M;
+   logic [`DIVLEN+2:0] QuotE, QuotM;
+   logic [`NE+1:0] DivCalcExpE, DivCalcExpM; 
+   logic DivNegStickyE, DivNegStickyM;
+   logic DivStickyE, DivStickyM;
+   logic DivDoneM;
+   logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M;

   // result and flag signals
   logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
@ -185,9 +186,10 @@ module fpu (
   flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
                           {Adr1E, Adr2E, Adr3E});
-   flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE, 
-               {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
-               {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
+   flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+               {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
+               {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
+   flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);

   // EXECUTION STAGE
   
@ -249,37 +251,45 @@ module fpu (
         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
   
   // fma - does multiply, add, and multiply-add instructions 
-   fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), 
-            .Xm(XManE), .Ym(YManE), .Zm(ZManE), .XZeroE, .YZeroE, .ZZeroE, 
-            .FOpCtrlE, .FmtE, .Sm(SumE), .NegSumE, .InvA(InvAE), .FmaNormCntE, 
-            .ZSgnEffE, .Ps(PSgnE), .Pe(ProdExpE), .AddendStickyE, .KillProdE); 
-
-   // fpdivsqrt using Goldschmidt's iteration
-   if(`FLEN == 64) begin 
-   flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
-         .clear(FDivSqrtDoneE), .en(load_preload),
-         .reset(reset),  .clk(clk));
-   flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
-            .clear(FDivSqrtDoneE), .en(load_preload),
-            .reset(reset),  .clk(clk));
-   end
-   else if (`FLEN == 32) begin 
-   flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
-         .clear(FDivSqrtDoneE), .en(load_preload),
-         .reset(reset),  .clk(clk));
-   flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
-            .clear(FDivSqrtDoneE), .en(load_preload),
-            .reset(reset),  .clk(clk));
-   end
-   flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), 
-            .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
-            .clear(FDivSqrtDoneE), .en(load_preload),
-            .reset(reset),  .clk(clk));
-   fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), 
-         .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
-         .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
-         .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
+   fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), 
+            .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), 
+            .Xm(XManE), .Ym(YManE), .Zm(ZManE), 
+            .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), 
+            .FOpCtrl(FOpCtrlE), .Fmt(FmtE), 
+            .As(ZSgnEffE), .Ps(PSgnE),
+            .Sm(SumE), .Pe(ProdExpE), 
+            .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), 
+            .ZmSticky(AddendStickyE), .KillProd(KillProdE)); 

+   // // fpdivsqrt using Goldschmidt's iteration
+   // if(`FLEN == 64) begin 
+   // flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
+   //       .clear(FDivSqrtDoneE), .en(load_preload),
+   //       .reset(reset),  .clk(clk));
+   // flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
+   //          .clear(FDivSqrtDoneE), .en(load_preload),
+   //          .reset(reset),  .clk(clk));
+   // end
+   // else if (`FLEN == 32) begin 
+   // flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
+   //       .clear(FDivSqrtDoneE), .en(load_preload),
+   //       .reset(reset),  .clk(clk));
+   // flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
+   //          .clear(FDivSqrtDoneE), .en(load_preload),
+   //          .reset(reset),  .clk(clk));
+   // end
+   // flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), 
+   //          .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
+   //          .clear(FDivSqrtDoneE), .en(load_preload),
+   //          .reset(reset),  .clk(clk));
+   // fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), 
+   //       .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
+   //       .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
+   //       .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
+   divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, 
+                  .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), 
+                  .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
+                  .EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM));
   // other FP execution units
   fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
            .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
@ -371,10 +381,10 @@ module fpu (

   assign FpLoadStoreM = FResSelM[1];

-   postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .EarlyTermShiftDiv2M,
-                           .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
-                           .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
-                           .NegSumM, .InvZM(InvAM), .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .DivNegStickyM,
+   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .ProdExpM, .EarlyTermShiftDiv2M,
+                           .AddendStickyM, .KillProdM, .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInfM, .YInfM, .Quot(QuotM),
+                           .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM, .DivDone(DivDoneM),
+                           .NegSumM, .InvZM(InvAM), .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrl(FOpCtrlM), .FmaNormCntM, .DivNegStickyM,
                           .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .DivStickyM,
                           .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);

--- a/pipelined/src/fpu/fsgninj.sv
+++ b/pipelined/src/fpu/fsgninj.sv
@ -1,6 +1,6 @@
 ///////////////////////////////////////////
 //
-// Written: Katherine Parry
+// Written: me@KatherineParry.com
 // Modified: 6/23/2021
 //
 // Purpose: FPU Sign Injection instructions
--- a/pipelined/src/fpu/lzacorrection.sv
+++ b/pipelined/src/fpu/lzacorrection.sv
@ -1,3 +1,31 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: shift correction
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module lzacorrection(
--- a/pipelined/src/fpu/negateintres.sv
+++ b/pipelined/src/fpu/negateintres.sv
@ -1,7 +1,35 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Negate integer result
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module negateintres(
-    input logic         XSgnM,
+    input logic         Xs,
    input logic [`NORMSHIFTSZ-1:0]  Shifted,
    input logic         Signed,
    input logic         Int64,
@ -12,7 +40,7 @@ module negateintres(

    
    // round and negate the positive res if needed
-    assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
+    assign NegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
    
    assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
 			              Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
--- a/pipelined/src/fpu/normshift.sv
+++ b/pipelined/src/fpu/normshift.sv
@ -1,3 +1,31 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: normalization shifter
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"


--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@ -1,9 +1,9 @@
 ///////////////////////////////////////////
 //
-// Written: Katherine Parry, David Harris
-// Modified: 6/23/2021
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
 //
-// Purpose: Floating point multiply-accumulate of configurable size
+// Purpose: Post-Processing
 // 
 // A component of the Wally configurable RISC-V project.
 // 
@ -31,13 +31,13 @@

 module postprocess(
    // general signals
-    input logic                             XSgnM, YSgnM,  // input signs
-    input logic  [`NE-1:0]                  ZExpM, // input exponents
-    input logic  [`NF:0]                    XManM, YManM, ZManM, // input mantissas
-    input logic  [2:0]                      FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic  [`FMTBITS-1:0]             FmtM,       // precision 1 = double 0 = single
-    input logic  [2:0]                      FOpCtrlM,       // choose which opperation (look below for values)
-    input logic                             XZeroM, YZeroM, ZZeroM, // inputs are zero
+    input logic                             Xs, Ys,  // input signs
+    input logic  [`NE-1:0]                  Ze, // input exponents
+    input logic  [`NF:0]                    Xm, Ym, Zm, // input mantissas
+    input logic  [2:0]                      Frm,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic  [`FMTBITS-1:0]             Fmt,       // precision 1 = double 0 = single
+    input logic  [2:0]                      FOpCtrl,       // choose which opperation (look below for values)
+    input logic                             XZero, YZero, ZZero, // inputs are zero
    input logic                             XInfM, YInfM, ZInfM,    // inputs are infinity
    input logic                             XNaNM, YNaNM, ZNaNM,    // inputs are NaN
    input logic                             XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
@ -55,9 +55,10 @@ module postprocess(
    input logic  [$clog2(3*`NF+7)-1:0]      FmaNormCntM,   // the normalization shift count
    //divide signals
    input logic  [$clog2(`DIVLEN/2+3)-1:0]  EarlyTermShiftDiv2M,
-    input logic  [`NE+1:0]                  DivCalcExpM,    // the calculated expoent
    input logic                             DivStickyM,
    input logic                             DivNegStickyM,
+    input logic                             DivDone,
+    input logic  [`NE+1:0]                  DivCalcExpM,
    input logic  [`DIVLEN+2:0]              Quot,
    // conversion signals
    input logic  [`NE:0]                    CvtCalcExpM,    // the calculated expoent
@ -125,15 +126,15 @@ module postprocess(
    logic Sqrt;

    // signals to help readability
-    assign Signed =  FOpCtrlM[0];
-    assign Int64 =   FOpCtrlM[1];
-    assign IntToFp = FOpCtrlM[2];
+    assign Signed =  FOpCtrl[0];
+    assign Int64 =   FOpCtrl[1];
+    assign IntToFp = FOpCtrl[2];
    assign ToInt =   FWriteIntM;
-    assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
+    assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0];
    assign CvtOp = (PostProcSelM == 2'b00);
    assign FmaOp = (PostProcSelM == 2'b10);
-    assign DivOp = (PostProcSelM == 2'b01);
-    assign Sqrt =  FOpCtrlM[0];
+    assign DivOp = (PostProcSelM == 2'b01)&DivDone;
+    assign Sqrt =  FOpCtrl[0];

    // is there an input of infinity or NaN being used
    assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
@ -141,21 +142,21 @@ module postprocess(

    // choose the ouptut format depending on the opperation
    //      - fp -> fp: OpCtrl contains the percision of the output
-    //      - otherwise: FmtM contains the percision of the output
+    //      - otherwise: Fmt contains the percision of the output
    if (`FPSIZES == 2) 
-        assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT); 
+        assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); 
    else if (`FPSIZES == 3 | `FPSIZES == 4) 
-        assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0]; 
+        assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; 

    ///////////////////////////////////////////////////////////////////////////////
    // Normalization
    ///////////////////////////////////////////////////////////////////////////////

-    cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM,  
-                              .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
-    fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
+    cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .Xm, .CvtLzcInM,  
+                              .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
+    fmashiftcalc fmashiftcalc(.SumM, .Ze, .ProdExpM, .FmaNormCntM, .Fmt, .KillProdM, .ConvNormSumExp,
                          .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.FmtM, .Quot, .DivCalcExpM, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
+    divshiftcalc divshiftcalc(.Fmt, .DivCalcExpM, .Quot, .EarlyTermShiftDiv2M, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);

    always_comb
        case(PostProcSelM)
@ -168,8 +169,13 @@ module postprocess(
                ShiftIn =  {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
            end
            2'b01: begin //div
-                ShiftAmt = DivShiftAmt;
-                ShiftIn =  DivShiftIn;
+                if(DivDone) begin
+                    ShiftAmt = DivShiftAmt;
+                    ShiftIn =  DivShiftIn;
+                end else begin
+                    ShiftAmt = '0;
+                    ShiftIn =  '0;
+                end
            end
            default: begin 
                ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; 
@ -193,27 +199,28 @@ module postprocess(
    // round to infinity
    // round to nearest max magnitude

-    round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
+                          
+    roundsign roundsign(.PSgnM, .ZSgnEffM, .InvZM, .FmaOp, .DivOp, .CvtOp, .NegSumM, 
+                          .Xs, .Ys, .CvtResSgnM, .RoundSgn);
+
+    round round(.OutFmt, .Frm, .Sticky, .AddendStickyM, .ZZero, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
                .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt,  .CvtResUf,
-                .DivStickyM, .DivNegStickyM,
+                .DivStickyM, .DivNegStickyM, .DivDone,
                .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);

    ///////////////////////////////////////////////////////////////////////////////
    // Sign calculation
    ///////////////////////////////////////////////////////////////////////////////

-    resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .SumExp, .Round, .Sticky,
+    resultsign resultsign(.Frm, .PSgnM, .ZSgnEffM, .SumExp, .Round, .Sticky,
                          .FmaOp, .ZInfM, .InfIn, .SumZero, .Mult, .RoundSgn, .ResSgn);
-                          
-    roundsign roundsign(.PSgnM, .ZSgnEffM, .InvZM, .FmaOp, .DivOp, .CvtOp, .NegSumM, 
-                          .XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn);

    ///////////////////////////////////////////////////////////////////////////////
    // Flags
    ///////////////////////////////////////////////////////////////////////////////

-    flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM, 
-                .XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
+    flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZero, .YZero, 
+                .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
                .XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round, .IntInvalid, .DivByZero,
                .UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
                .RoundExp, .NegResMSBS, .Invalid, .Overflow, .PostProcFlgM);
@ -222,9 +229,9 @@ module postprocess(
    // Select the result
    ///////////////////////////////////////////////////////////////////////////////

-    negateintres negateintres(.XSgnM, .Shifted, .Signed, .Int64, .Plus1, .NegResMSBS, .NegRes);
-    resultselect resultselect(.XSgnM, .XManM, .YManM, .ZManM, .XZeroM, .IntInvalid,
-        .IntZeroM, .FrmM, .OutFmt, .XNaNM, .YNaNM, .ZNaNM, .CvtResUf, 
+    negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .NegResMSBS, .NegRes);
+    resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
+        .IntZeroM, .Frm, .OutFmt, .XNaNM, .YNaNM, .ZNaNM, .CvtResUf, 
        .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegRes,
        .XInfM, .YInfM, .DivOp,
        .DivByZero, .FullResExp, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
--- a/pipelined/src/fpu/resultselect.sv
+++ b/pipelined/src/fpu/resultselect.sv
@ -1,14 +1,43 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: special case selection
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
 `include "wally-config.vh"

 module resultselect(
-    input logic                 XSgnM,        // input signs
-    input logic  [`NF:0]        XManM, YManM, ZManM, // input mantissas
+    input logic                 Xs,        // input signs
+    input logic  [`NF:0]        Xm, Ym, Zm, // input mantissas
    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic  [2:0]          FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic  [2:0]          Frm,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic  [`FMTBITS-1:0] OutFmt,       // output format
    input logic                 InfIn,
    input logic                 XInfM, YInfM,
-    input logic                 XZeroM,
+    input logic                 XZero,
    input logic                 IntZeroM,
    input logic                 NaNIn,
    input logic                 IntToFp,
@ -39,29 +68,29 @@ module resultselect(

    // does the overflow result output the maximum normalized floating point number
    //                output infinity if the input is infinity
-    assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
+    assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~ResSgn) | (Frm[1:0]==2'b11&ResSgn));

    if (`FPSIZES == 1) begin

        //NaN res selection depending on standard
        if(`IEEE754) begin
-            assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
-            assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
-            assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+            assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
+            assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
+            assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
        end else begin
            assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
        end

        assign OfRes =  OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-        assign UfRes = {ResSgn, {`FLEN-1{1'b0}}, Plus1&FrmM[1]&~(DivOp&YInfM)};
+        assign UfRes = {ResSgn, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInfM)};
        assign NormRes = {ResSgn, ResExp, ResFrac};

    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
        if(`IEEE754) begin
-            assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
-            assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
-            assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
+            assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
+            assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
+            assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
        end else begin 
            assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
@ -69,7 +98,7 @@ module resultselect(
        
        assign OfRes =  OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
                               OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-        assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
+        assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
        assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};

    end else if (`FPSIZES == 3) begin
@ -77,43 +106,43 @@ module resultselect(
            case (OutFmt)
                `FMT: begin  
                    if(`IEEE754) begin
-                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
-                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
-                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
+                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
+                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                    end else begin 
                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                    end
                    
                    OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
+                    UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
                    NormRes = {ResSgn, ResExp, ResFrac};
                end
                `FMT1: begin  
                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
-                        YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
-                        ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
+                        XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
+                        YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
+                        ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                    end else begin 
                        InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                    end
                    OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
-                    UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
+                    UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
                    NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
                end
                `FMT2: begin  
                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
-                        YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
-                        ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
+                        XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
+                        YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
+                        ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
                    end else begin 
                        InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
                    end
                    
                    OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
-                    UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
+                    UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
                    NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
                end
                default: begin
@ -136,50 +165,50 @@ module resultselect(
            case (OutFmt)
                2'h3: begin  
                    if(`IEEE754) begin
-                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
-                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
-                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+                        XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
+                        YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
+                        ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                    end else begin 
                        InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                    end
                    
                    OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
-                    UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
+                    UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
                    NormRes = {ResSgn, ResExp, ResFrac};
                end
                2'h1: begin  
                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
-                        YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
-                        ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
+                        XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
+                        YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
+                        ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                    end else begin 
                        InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                    end
                    OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
-                    UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
+                    UfRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
                    NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
                end
                2'h0: begin  
                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
-                        YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
-                        ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
+                        XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
+                        YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
+                        ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
                    end else begin 
                        InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
                    end
                    
                    OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
-                    UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
+                    UfRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
                    NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
                end
                2'h2: begin  
                    if(`IEEE754) begin
-                        XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
-                        YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
-                        ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
+                        XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
+                        YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
+                        ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
                    end else begin 
                        InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
@ -187,7 +216,7 @@ module resultselect(
                    
                    OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
 	            // zero is exact fi dividing by infinity so don't add 1
-                    UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmM[1]&~(DivOp&YInfM)};
+                    UfRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInfM)};
                    NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
                end
            endcase
@ -202,7 +231,7 @@ module resultselect(
    //      - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
    //      - dont set to zero if fp input is zero but not using the fp input
    //      - dont set to zero if int input is zero but not using the int input
-    assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZeroM)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
+    assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1] | (((YInfM&~XInfM)|XZero)&DivOp);//Underflow & ~ResDenorm & (ResExp!=1);
    assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInfM&DivOp));
    // output infinity with result sign if divide by zero
    if(`IEEE754) begin
@ -243,9 +272,9 @@ module resultselect(
    //        unsigned | 2^32-1 | 2^64-1 |
    //
    //      other: 32 bit unsinged res should be sign extended as if it were a signed number
-    assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
+    assign OfIntRes = Signed ? Xs&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
-                               XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
+                               Xs&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
                                              {`XLEN{1'b1}};// unsigned positive


@ -256,6 +285,6 @@ module resultselect(
    //          - otherwise output a rounded 0
    //      - otherwise output the normal res (trmined and sign extended if nessisary)
    assign FCvtIntResM = IntInvalid ?  OfIntRes :
-			            CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
+			            CvtCalcExpM[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
                        Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
 endmodule
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@ -1,7 +1,35 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: calculating the result's sign
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module resultsign(
-    input logic [2:0]   FrmM,
+    input logic [2:0]   Frm,
    input logic         PSgnM, ZSgnEffM,
    input logic         ZInfM,
    input logic         InfIn,
@ -25,7 +53,7 @@ module resultsign(
    //      if multiply then Psgn
    //      otherwise psign
    assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky));
-    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
+    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : PSgnM;


    // is the result negitive
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@ -1,3 +1,31 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Rounder
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"
 // what position is XLEN in?
 //  options: 
@ -9,17 +37,18 @@

 module round(
    input logic  [`FMTBITS-1:0]     OutFmt,       // precision 1 = double 0 = single
-    input logic  [2:0]              FrmM,       // rounding mode
+    input logic  [2:0]              Frm,       // rounding mode
    input logic                     FmaOp,
    input logic                     DivOp,
    input logic                     CvtOp,
    input logic                     ToInt,
+    input logic                     DivDone,
    input logic  [1:0]              PostProcSelM,
    input logic                     CvtResDenormUfM,
    input logic                     CvtResUf,
    input logic  [`CORRSHIFTSZ-1:0] CorrShifted,
    input logic                     AddendStickyM,  // addend's sticky bit
-    input logic                     ZZeroM,         // is Z zero
+    input logic                     ZZero,         // is Z zero
    input logic                     InvZM,          // invert Z
    input logic  [`NE+1:0]          SumExp,         // exponent of the normalized sum
    input logic                     RoundSgn,      // the result's sign
@ -227,13 +256,13 @@ module round(

    // Deterimine if a small number was supposed to be subtrated
    //  - for FMA or if division has a negitive sticky bit
-    assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound);
-    assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZeroM&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky;
+    assign SubBySmallNum = ((AddendStickyM&FmaOp&~ZZero&InvZM) | (DivNegStickyM&DivOp)) & ~(NormSumSticky|UfRound);
+    assign UfSubBySmallNum = ((AddendStickyM&FmaOp&~ZZero&InvZM) | (DivNegStickyM&DivOp)) & ~NormSumSticky;


    always_comb begin
        // Determine if you add 1
-        case (FrmM)
+        case (Frm)
            3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even
            3'b001: CalcPlus1 = 0;//round to zero
            3'b010: CalcPlus1 = RoundSgn & ~(SubBySmallNum & ~Round);//round down
@ -242,7 +271,7 @@ module round(
            default: CalcPlus1 = 1'bx;
        endcase
        // Determine if you add 1 (for underflow flag)
-        case (FrmM)
+        case (Frm)
            3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
            3'b001: UfCalcPlus1 = 0;//round to zero
            3'b010: UfCalcPlus1 = RoundSgn & ~(UfSubBySmallNum & ~UfRound);//round down
@ -251,7 +280,7 @@ module round(
            default: UfCalcPlus1 = 1'bx;
        endcase
        // Determine if you subtract 1
-        case (FrmM)
+        case (Frm)
            3'b000: CalcMinus1 = 0;//round to nearest even
            3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
            3'b010: CalcMinus1 = ~RoundSgn & ~Round & SubBySmallNum;//round down
@ -309,8 +338,8 @@ module round(
        case(PostProcSelM)
            2'b10: RoundExp = SumExp; // fma
            2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
-            2'b01: RoundExp = CorrDivExp; // divide
-            default: RoundExp = 0; 
+            2'b01: RoundExp = DivDone ? CorrDivExp : '0; // divide
+            default: RoundExp = '0; 
        endcase

    // round the result
--- a/pipelined/src/fpu/roundsign.sv
+++ b/pipelined/src/fpu/roundsign.sv
@ -1,10 +1,38 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Sign calculation ofr rounding
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module roundsign(
    input logic         PSgnM, ZSgnEffM,
    input logic         InvZM,
-    input logic         XSgnM,
-    input logic         YSgnM,
+    input logic         Xs,
+    input logic         Ys,
    input logic         NegSumM,
    input logic         FmaOp,
    input logic         DivOp,
@ -24,7 +52,7 @@ module roundsign(

    // assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM);

-    assign DivSgn = XSgnM^YSgnM;
+    assign DivSgn = Xs^Ys;

    // Sign for rounding calulation
    assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
--- a/pipelined/src/fpu/srt-radix4.sv
+++ b/pipelined/src/fpu/srt-radix4.sv
@ -1,8 +1,8 @@
 ///////////////////////////////////////////
 // srt.sv
 //
-// Written: David_Harris@hmc.edu 13 January 2022
-// Modified: 
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
+// Modified:13 January 2022
 //
 // Purpose: Combined Divide and Square Root Floating Point and Integer Unit
 // 
@ -33,38 +33,27 @@
 module srtradix4 (
  input  logic clk,
  input  logic DivStart, 
+  input  logic DivBusy, 
+  input logic  [`FMTBITS-1:0] FmtE,
  input  logic [`NE-1:0] XExpE, YExpE,
-  input  logic [`NF:0] XManE, YManE,
-  input  logic [`XLEN-1:0] SrcA, SrcB,
-  input  logic XInfE, YInfE, 
  input  logic XZeroE, YZeroE, 
-  input  logic XNaNE, YNaNE, 
-  input  logic       W64, // 32-bit ints on XLEN=64
-  input  logic       Signed, // Interpret integers as signed 2's complement
-  input  logic       Int, // Choose integer inputs
-  input  logic       Sqrt, // perform square root, not divide
-  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
-  output logic       DivDone,
-  output logic       DivStickyE,
-  output logic       DivNegStickyE,
+  input logic [`DIVLEN-1:0] X,
+  input logic [`DIVLEN-1:0] Dpreproc,
+  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
  output logic [`DIVLEN+2:0] Quot,
-  output logic [`XLEN-1:0] Rem, // *** later handle integers
-  output logic [`NE+1:0] DivCalcExpE
+  output logic [`DIVLEN+3:0]  WSN, WCN,
+  output logic [`DIVLEN+3:0]  WS, WC,
+  output logic  [`NE+1:0] DivCalcExpM,
+  output logic [`XLEN-1:0] Rem
 );

  logic [3:0]     q;
-  logic [`NE+1:0] DivCalcExp;
-  logic [`DIVLEN-1:0]    X;
-  logic [`DIVLEN-1:0]  Dpreproc;
-  logic [`DIVLEN+3:0]  WS, WSA, WSN;
-  logic [`DIVLEN+3:0]  WC, WCA, WCN;
+  logic [`DIVLEN+3:0]  WSA;
+  logic [`DIVLEN+3:0]  WCA;
  logic [`DIVLEN+3:0]  D, DBar, D2, DBar2, Dsel;
+  logic [`NE+1:0] DivCalcExp;
  logic [$clog2(`XLEN+1)-1:0] intExp;
-  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
  logic           intSign;
- 
-  srtpreproc preproc(.SrcA, .SrcB, .XManE, .YManE, .W64, .Signed, .Int, .Sqrt, .X, 
-                    .XZeroCnt, .YZeroCnt, .Dpreproc, .intExp, .intSign);

  // Top Muxes and Registers
  // When start is asserted, the inputs are loaded into the divider.
@ -79,6 +68,7 @@ module srtradix4 (
  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
  flop   #(`DIVLEN+4) wcflop(clk, WCN, WC);
  flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
+  flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);

  // Quotient Selection logic
  // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
@ -91,9 +81,6 @@ module srtradix4 (
 	// 0001 = -2
  qsel4 qsel4(.D, .WS, .WC, .q);

-  // Store the expoenent and sign until division is DivDone
-  flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
-
  // Divisor Selection logic
  // *** radix 4 change to choose -2 to 2
  // - choose the negitive version of what's being selected
@ -116,12 +103,9 @@ module srtradix4 (
  csa    #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
  
  //*** change for radix 4
-  otfc4 otfc4(.clk, .DivStart, .q, .Quot);
+  otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot);

-  expcalc expcalc(.XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
-
-  earlytermination earlytermination(.clk, .WC, .WS, .XZeroE, .YZeroE, .XInfE, .EarlyTermShiftDiv2E,
-                  .YInfE, .XNaNE, .YNaNE, .DivStickyE, .DivNegStickyE, .DivStart, .DivDone);
+  expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);

 endmodule

@ -129,38 +113,7 @@ endmodule
 // Submodules //
 ////////////////

-module earlytermination(
-  input  logic clk, 
-	input logic [`DIVLEN+3:0] WS, WC,
-  input  logic XInfE, YInfE, 
-  input  logic XZeroE, YZeroE, 
-  input  logic XNaNE, YNaNE, 
-  input  logic DivStart, 
-  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
-  output logic DivStickyE,
-  output logic DivNegStickyE,
-  output logic DivDone);
- 
-   logic [$clog2(`DIVLEN/2+3)-1:0]  Count;
-   logic WZero;
-   logic [`DIVLEN+3:0] W;

-  assign WZero = ((WS^WC)=={WS[`DIVLEN+2:0]|WC[`DIVLEN+2:0], 1'b0})|XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE;
-  assign DivDone = (DivStickyE | WZero);
-  assign DivStickyE = ~|Count;
-  assign W = WC+WS;
-  assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
-  assign EarlyTermShiftDiv2E = Count;
-  // +1 for setup
-  // `DIVLEN/2 to get required number of bits
-  // +1 for possible .5 and round bit
-  // Count down Counter
-  always @(posedge clk)
-    begin
-      if (DivStart) Count <= #1 `DIVLEN/2+2;
-      else     Count <= #1 Count-1;
-    end
-endmodule

 module qsel4 (
 	input logic [`DIVLEN+3:0] D,
@ -234,58 +187,13 @@ module qsel4 (
 	
 endmodule

-///////////////////
-// Preprocessing //
-///////////////////
-module srtpreproc (
-  input  logic [`XLEN-1:0] SrcA, SrcB,
-  input  logic [`NF:0] XManE, YManE,
-  input  logic       W64, // 32-bit ints on XLEN=64
-  input  logic       Signed, // Interpret integers as signed 2's complement
-  input  logic       Int, // Choose integer inputs
-  input  logic       Sqrt, // perform square root, not divide
-  output logic [`DIVLEN-1:0] X,
-  output logic [`DIVLEN-1:0] Dpreproc,
-  output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
-  output logic       intSign // Quotient integer sign
-);
-  // logic  [`XLEN-1:0] PosA, PosB;
-  // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
-  logic  [`DIVLEN-1:0] PreprocA, PreprocX;
-  logic  [`DIVLEN-1:0] PreprocB, PreprocY;
-
-  // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
-  // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
-  // lzc #(`XLEN) lzcA (PosA, zeroCntA);
-  // lzc #(`XLEN) lzcB (PosB, zeroCntB);
-
-  // ***can probably merge X LZC with conversion
-  // cout the number of leading zeros
-  lzc #(`NF+1) lzcA (XManE, XZeroCnt);
-  lzc #(`NF+1) lzcB (YManE, YZeroCnt);
-
-  // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
-  // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
-
-  // assign PreprocA = ExtraA << zeroCntA;
-  // assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
-  assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
-
-  
-  assign X = Int ? PreprocA : PreprocX;
-  assign Dpreproc = Int ? PreprocB : PreprocY;
-  // assign intExp = zeroCntB - zeroCntA + 1;
-  // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
-endmodule
-
 ///////////////////////////////////
 // On-The-Fly Converter, Radix 2 //
 ///////////////////////////////////
 module otfc4 (
  input  logic         clk,
  input  logic         DivStart,
+  input  logic         DivBusy,
  input  logic [3:0]   q,
  output logic [`DIVLEN+2:0] Quot
 );
@ -307,7 +215,7 @@ module otfc4 (
  // if starting a new divison set Q to 0 and QM to -1
  mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
  mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
-  flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); // *** have to connect Quot directly to M stage
+  flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage
  flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);

  // shift Q (quotent) and QM (quotent-1)
@ -361,23 +269,44 @@ module csa #(parameter N=69) (
  // bit, leaving room in the least significant bit to 
  // insert cin.

-  assign #1 out1 = in1 ^ in2 ^ in3;
-  assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
+  assign out1 = in1 ^ in2 ^ in3;
+  assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
 		    (in2[N-2:0] & in3[N-2:0]), cin};
 endmodule

-
-//////////////
-// expcalc  //
-//////////////
 module expcalc(
-  input logic  [`NE-1:0] XExpE, YExpE,
-  input logic XZeroE,
-  input logic  [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic [`NE+1:0] DivCalcExp
-);
+  input logic  [`FMTBITS-1:0] FmtE,
+  input  logic [`NE-1:0] XExpE, YExpE,
+  input logic XZeroE, 
+  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  output logic  [`NE+1:0] DivCalcExp
+  );
+    logic [`NE-2:0] Bias;
+    
+    if (`FPSIZES == 1) begin
+        assign Bias = (`NE-1)'(`BIAS); 

-  // correct exponent for denormalized input's normalization shifts
-  assign DivCalcExp = (XExpE - XZeroCnt - YExpE + YZeroCnt + (`NE)'(`BIAS))&{`NE+2{~XZeroE}};
+    end else if (`FPSIZES == 2) begin
+        assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 

-endmodule
+    end else if (`FPSIZES == 3) begin
+        always_comb
+            case (FmtE)
+                `FMT: Bias  =  (`NE-1)'(`BIAS);
+                `FMT1: Bias = (`NE-1)'(`BIAS1);
+                `FMT2: Bias = (`NE-1)'(`BIAS2);
+                default: Bias = 'x;
+            endcase
+
+    end else if (`FPSIZES == 4) begin        
+        always_comb
+            case (FmtE)
+                2'h3: Bias =  (`NE-1)'(`Q_BIAS);
+                2'h1: Bias =  (`NE-1)'(`D_BIAS);
+                2'h0: Bias =  (`NE-1)'(`S_BIAS);
+                2'h2: Bias =  (`NE-1)'(`H_BIAS);
+            endcase
+    end
+    // correct exponent for denormalized input's normalization shifts
+    assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
+    endmodule
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@ -0,0 +1,85 @@
+///////////////////////////////////////////
+// srt.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
+// Modified:13 January 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module srtfsm(
+  input  logic clk, 
+  input  logic reset, 
+  input logic [`DIVLEN+3:0] WSN, WCN, WS, WC,
+  input  logic XInfE, YInfE, 
+  input  logic XZeroE, YZeroE, 
+  input  logic XNaNE, YNaNE, 
+  input  logic DivStart, 
+  input logic StallE,
+  input logic StallM,
+  input  logic [$clog2(`DIVLEN/2+3)-1:0] Dur,
+  output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E,
+  output logic DivStickyE,
+  output logic DivDone,
+  output logic DivNegStickyE,
+  output logic DivBusy
+  );
+  
+  typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
+  statetype state;
+
+  logic [$clog2(`DIVLEN/2+3)-1:0] step;
+  logic WZero;
+  //logic [$clog2(`DIVLEN/2+3)-1:0] Dur;
+  logic [`DIVLEN+3:0] W;
+
+  //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
+  assign DivBusy = (state == BUSY);
+  assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0});
+  assign DivStickyE = ~WZero;
+  assign DivDone = (state == DONE);
+  assign W = WC+WS;
+  assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
+  assign EarlyTermShiftDiv2E = step;
+
+  always_ff @(posedge clk) begin
+      if (reset) begin
+          state <= #1 IDLE; 
+      end else if (DivStart&~StallE) begin 
+          step <= Dur;
+          if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE;
+          else         state <= #1 BUSY;
+      end else if (state == BUSY) begin
+          if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin
+              state <= #1 DONE;
+          end
+          step <= step - 1;
+      end else if (state == DONE) begin
+        if (StallM) state <= #1 DONE;
+        else        state <= #1 IDLE;
+      end 
+  end
+endmodule
--- a/pipelined/src/fpu/srtpreproc.sv
+++ b/pipelined/src/fpu/srtpreproc.sv
@ -0,0 +1,72 @@
+///////////////////////////////////////////
+// srt.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek
+// Modified:13 January 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module srtpreproc (
+  input  logic [`NF:0] XManE, YManE,
+  output logic [`DIVLEN-1:0] X,
+  output logic [`DIVLEN-1:0] Dpreproc,
+  output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  output logic [$clog2(`DIVLEN/2+3)-1:0] Dur
+);
+  // logic  [`XLEN-1:0] PosA, PosB;
+  // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
+  logic  [`DIVLEN-1:0] PreprocA, PreprocX;
+  logic  [`DIVLEN-1:0] PreprocB, PreprocY;
+
+  // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
+  // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
+  // lzc #(`XLEN) lzcA (PosA, zeroCntA);
+  // lzc #(`XLEN) lzcB (PosB, zeroCntB);
+
+  // ***can probably merge X LZC with conversion
+  // cout the number of leading zeros
+  lzc #(`NF+1) lzcA (XManE, XZeroCnt);
+  lzc #(`NF+1) lzcB (YManE, YZeroCnt);
+
+  // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
+  // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
+
+  // assign PreprocA = ExtraA << zeroCntA;
+  // assign PreprocB = ExtraB << (zeroCntB + 1);
+  assign PreprocX = {XManE[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocY = {YManE[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
+
+  
+  assign X = PreprocX;
+  assign Dpreproc = PreprocY;
+
+  assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2);
+  // assign intExp = zeroCntB - zeroCntA + 1;
+  // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
+
+
+endmodule
--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@ -1,3 +1,31 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: unpack all inputs
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module unpack ( 
--- a/pipelined/src/fpu/unpackinput.sv
+++ b/pipelined/src/fpu/unpackinput.sv
@ -1,3 +1,31 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: unpack input
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"

 module unpackinput ( 
--- a/pipelined/src/generic/lzc.sv
+++ b/pipelined/src/generic/lzc.sv
@ -1,3 +1,31 @@
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Leading Zero Counter
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 //leading zero counter i.e. priority encoder
 module lzc #(parameter WIDTH = 1) (
    input logic  [WIDTH-1:0]            num,
--- a/pipelined/src/hazard/hazard.sv
+++ b/pipelined/src/hazard/hazard.sv
@ -64,9 +64,9 @@ module hazard(
  assign StallFCause = CSRWriteFencePendingDEM & ~(TrapM | RetM | BPPredWrongE);
  // stall in decode if instruction is a load/mul/csr dependent on previous
  assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE);    
-  assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM);  // *** can we move to decode stage (KP?)
+  assign StallECause = (DivBusyE) & ~(TrapM);  // *** can we move to decode stage (KP?)
  // WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled.  It could also terminate with TW trap
-  assign StallMCause = wfiM & (~TrapM & ~IntPendingM);  
+  assign StallMCause = (wfiM & (~TrapM & ~IntPendingM)) | FDivBusyE;  
  assign StallWCause = LSUStallM | IFUStallF;

  assign #1 StallF = StallFCause | StallD;
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -1,4 +1,31 @@
-
+///////////////////////////////////////////
+//
+// Written: me@KatherineParry.com
+// Modified: 7/5/2022
+//
+// Purpose: Testbench for Testfloat
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// MIT LICENSE
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this 
+// software and associated documentation files (the "Software"), to deal in the Software 
+// without restriction, including without limitation the rights to use, copy, modify, merge, 
+// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons 
+// to whom the Software is furnished to do so, subject to the following conditions:
+//
+//   The above copyright notice and this permission notice shall be included in all copies or 
+//   substantial portions of the Software.
+//
+//   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
+//   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
+//   PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+//   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+//   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+//   OR OTHER DEALINGS IN THE SOFTWARE.
+////////////////////////////////////////////////////////////////////////////////////////////////
 `include "wally-config.vh"
 `include "tests-fp.vh"

@ -56,19 +83,25 @@ module testbenchfp;
 	logic [`DIVLEN+2:0] Quot;
  logic CvtResDenormUfE;
  logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2;
-  logic DivStart, DivDone;
-  
+  logic DivStart, DivBusy;
+  logic reset = 1'b0;
+  logic [`DIVLEN-1:0]    DivX;
+  logic [`DIVLEN-1:0]  Dpreproc;
+  logic [`DIVLEN+3:0]  WSN, WS;
+  logic [`DIVLEN+3:0]  WCN, WC;
+  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
+  logic [$clog2(`DIVLEN/2+3)-1:0] Dur;

  // in-between FMA signals
  logic                 Mult;
  logic [`NE+1:0]	      Pe;
-  logic 				        AddendStickyE;
-  logic 					      KillProdE; 
-  logic [$clog2(3*`NF+7)-1:0]	FmaNormCntE;
+  logic 				        ZmSticky;
+  logic 					      KillProd; 
+  logic [$clog2(3*`NF+7)-1:0]	NCnt;
  logic [3*`NF+5:0]	    Sm;       
-  logic 			          InvZE;
-  logic 			          NegSumE;
-  logic 			          ZSgnEffE;
+  logic 			          InvA;
+  logic 			          NegSum;
+  logic 			          As;
  logic 			          Ps;
  logic       DivSticky;
  logic       DivNegSticky;
@ -638,21 +671,21 @@ module testbenchfp;

  // instantiate devices under test
  fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), 
-              .Xe(XExp), .Ye(YExp), .Ze(ZExp), 
-              .Xm(XMan), .Ym(YMan), .Zm(ZMan),
-              .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
-              .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .Sm, .NegSumE, .InvA(InvZE), .FmaNormCntE, .ZSgnEffE, .Ps,
-              .Pe, .AddendStickyE, .KillProdE); 
+          .Xe(XExp), .Ye(YExp), .Ze(ZExp), 
+          .Xm(XMan), .Ym(YMan), .Zm(ZMan),
+          .XZero, .YZero, .ZZero,
+          .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps,
+          .Pe, .ZmSticky, .KillProd); 
              
-  postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]),
-              .ZExpM(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp),
-              .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .DivStickyM(DivSticky),
+  postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSelM(UnitVal[1:0]),
+              .Ze(ZExp),  .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp),
+              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCalcExpM(CvtCalcExpE), .DivStickyM(DivSticky),
              .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .DivNegStickyM(DivNegSticky),
              .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
              .XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
              .XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
-              .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(Pe), 
-              .SumM(Sm), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .EarlyTermShiftDiv2M(EarlyTermShiftDiv2), .ZSgnEffM(ZSgnEffE), .PSgnM(Ps), .FmtM(ModFmt), .FrmM(FrmVal), 
+              .KillProdM(KillProd), .AddendStickyM(ZmSticky), .ProdExpM(Pe), 
+              .SumM(Sm), .NegSumM(NegSum), .InvZM(InvA), .FmaNormCntM(NCnt), .EarlyTermShiftDiv2M(EarlyTermShiftDiv2), .ZSgnEffM(As), .PSgnM(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
              .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
  
  fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), 
@ -661,10 +694,12 @@ module testbenchfp;
  fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
              .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
              .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky),
-                .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .XNaNE(XNaN), .YNaNE(YNaN),
-                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2), .DivDone, .Quot, .Rem());
-                
+  srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
+  srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
+                .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2));
+  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
+                .Quot, .Rem(), .DivCalcExpM(DivCalcExp));
+
  assign CmpFlg[3:0] = 0;

  // produce clock
@ -818,15 +853,7 @@ end

    // check if result is correct
    //  - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage)
-    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&((UnitVal !== `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
-      errors += 1;
-      $display("There is an error in %s", Tests[TestNum]);
-      $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
-      $stop;
-    end
-
-  // division
-    else if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(~DivStart&DivDone)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
+    if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
      errors += 1;
      $display("There is an error in %s", Tests[TestNum]);
      $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -849,7 +876,7 @@ end
      $stop;
    end

-    if((~DivStart&DivDone)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
+    if(~(DivBusy|DivStart)|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector

    if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file

@ -930,7 +957,7 @@ module readvectors (
            end
            Ans = TestVector[8+(`Q_LEN-1):8];
          end
-          2'b01:	begin	  // double
+          2'b01:	if (`D_SUPPORTED)begin	  // double
            if(OpCtrl === `FMA_OPCTRL) begin
              X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
              Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
@ -945,7 +972,7 @@ module readvectors (
            end
            Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
          end
-          2'b00:	begin	  // single
+          2'b00:	if (`S_SUPPORTED)begin	  // single
            if(OpCtrl === `FMA_OPCTRL) begin
              X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
              Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
@ -986,7 +1013,7 @@ module readvectors (
            DivStart = 1'b1; #10 // one clk cycle
            DivStart = 1'b0;
          end
-          2'b01:	begin	  // double
+          2'b01:	if (`D_SUPPORTED)begin	  // double
            X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
            Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
            Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
@ -994,7 +1021,7 @@ module readvectors (
            DivStart = 1'b1; #10
            DivStart = 1'b0;
          end
-          2'b00:	begin	  // single
+          2'b00:	if (`S_SUPPORTED)begin	  // single
            X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
            Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
            Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
@ -1018,12 +1045,12 @@ module readvectors (
            Y = TestVector[12+(`Q_LEN)-1:12];
            Ans = TestVector[8];
          end
-          2'b01:	begin	  // double
+          2'b01:	if (`D_SUPPORTED)begin	  // double
            X = {{`FLEN-`D_LEN{1'b1}}, TestVector[12+2*(`D_LEN)-1:12+(`D_LEN)]};
            Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[12+(`D_LEN)-1:12]};
            Ans = TestVector[8];
          end
-          2'b00:	begin	  // single
+          2'b00:	if (`S_SUPPORTED)begin	  // single
            X = {{`FLEN-`S_LEN{1'b1}}, TestVector[12+2*(`S_LEN)-1:12+(`S_LEN)]};
            Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[12+(`S_LEN)-1:12]};
            Ans = TestVector[8];
@ -1042,7 +1069,7 @@ module readvectors (
              X = {TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
              Ans = TestVector[8+(`Q_LEN-1):8];
            end
-            2'b01:	begin	  // double
+            2'b01:	if (`D_SUPPORTED)begin	  // double
              X = {TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
              Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
            end
@ -1056,7 +1083,7 @@ module readvectors (
            end
          endcase
          end
-          2'b01:	begin	  // double
+          2'b01:	if (`D_SUPPORTED)begin	  // double
          case (OpCtrl[1:0])
            2'b11: begin       // quad
              X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+`D_LEN+`Q_LEN-1:8+(`Q_LEN)]};
@ -1076,13 +1103,13 @@ module readvectors (
            end
          endcase
          end
-          2'b00:	begin	  // single
+          2'b00:	if (`S_SUPPORTED)begin	  // single
          case (OpCtrl[1:0])
            2'b11: begin       // quad
              X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`S_LEN+`Q_LEN-1:8+(`Q_LEN)]};
              Ans = TestVector[8+(`Q_LEN-1):8];
            end
-            2'b01:	begin	  // double
+            2'b01:	if (`D_SUPPORTED)begin	  // double
              X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+`S_LEN+`D_LEN-1:8+(`D_LEN)]};
              Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
            end
@ -1102,11 +1129,11 @@ module readvectors (
              X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`Q_LEN-1:8+(`Q_LEN)]};
              Ans = TestVector[8+(`Q_LEN-1):8];
            end
-            2'b01:	begin	  // double
+            2'b01:	if (`D_SUPPORTED)begin	  // double
              X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`D_LEN-1:8+(`D_LEN)]};
              Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
            end
-            2'b00:	begin	  // single
+            2'b00:	if (`S_SUPPORTED)begin	  // single
              X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+`H_LEN+`S_LEN-1:8+(`S_LEN)]};
              Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
            end
@ -1146,7 +1173,7 @@ module readvectors (
              end
            endcase
          end
-          2'b01:	begin	  // double
+          2'b01:	if (`D_SUPPORTED)begin	  // double
            //     {Int->Fp?, is the integer a long}
            casex ({OpCtrl[2:1]})
              2'b11: begin       // long -> double
@ -1172,7 +1199,7 @@ module readvectors (
              end
            endcase
          end
-          2'b00:	begin	  // single
+          2'b00:	if (`S_SUPPORTED)begin	  // single
            //     {is the integer a long,     is the opperation to an integer}
            casex ({OpCtrl[2:1]})
              2'b11: begin       // long -> single
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@ -203,16 +203,16 @@ string imperas32f[] = '{
    "rv32i_m/F/FCVT-WU-S-RNE-01",
    "rv32i_m/F/FCVT-WU-S-RTZ-01",
    "rv32i_m/F/FCVT-WU-S-RUP-01",
-    // "rv32i_m/F/FDIV-S-DYN-RDN-01",
-    // "rv32i_m/F/FDIV-S-DYN-RMM-01",
-    // "rv32i_m/F/FDIV-S-DYN-RNE-01",
-    // "rv32i_m/F/FDIV-S-DYN-RTZ-01",
-    // "rv32i_m/F/FDIV-S-DYN-RUP-01",
-    // "rv32i_m/F/FDIV-S-RDN-01",
-    // "rv32i_m/F/FDIV-S-RMM-01",
-    // "rv32i_m/F/FDIV-S-RNE-01",
-    // "rv32i_m/F/FDIV-S-RTZ-01",
-    // "rv32i_m/F/FDIV-S-RUP-01",
+    "rv32i_m/F/FDIV-S-DYN-RDN-01",
+    "rv32i_m/F/FDIV-S-DYN-RMM-01",
+    "rv32i_m/F/FDIV-S-DYN-RNE-01",
+    "rv32i_m/F/FDIV-S-DYN-RTZ-01",
+    "rv32i_m/F/FDIV-S-DYN-RUP-01",
+    "rv32i_m/F/FDIV-S-RDN-01",
+    "rv32i_m/F/FDIV-S-RMM-01",
+    "rv32i_m/F/FDIV-S-RNE-01",
+    "rv32i_m/F/FDIV-S-RTZ-01",
+    "rv32i_m/F/FDIV-S-RUP-01",
    "rv32i_m/F/FEQ-S-01",
    "rv32i_m/F/FLE-S-01",
    "rv32i_m/F/FLT-S-01",
@ -390,16 +390,16 @@ string imperas32f[] = '{
    "rv64i_m/F/FCVT-WU-S-RNE-01",
    "rv64i_m/F/FCVT-WU-S-RTZ-01",
    "rv64i_m/F/FCVT-WU-S-RUP-01",
-    // "rv64i_m/F/FDIV-S-DYN-RDN-01",
-    // "rv64i_m/F/FDIV-S-DYN-RMM-01",
-    // "rv64i_m/F/FDIV-S-DYN-RNE-01",
-    // "rv64i_m/F/FDIV-S-DYN-RTZ-01",
-    // "rv64i_m/F/FDIV-S-DYN-RUP-01",
-    // "rv64i_m/F/FDIV-S-RDN-01",
-    // "rv64i_m/F/FDIV-S-RMM-01",
-    // "rv64i_m/F/FDIV-S-RNE-01",
-    // "rv64i_m/F/FDIV-S-RTZ-01",
-    // "rv64i_m/F/FDIV-S-RUP-01",
+    "rv64i_m/F/FDIV-S-DYN-RDN-01",
+    "rv64i_m/F/FDIV-S-DYN-RMM-01",
+    "rv64i_m/F/FDIV-S-DYN-RNE-01",
+    "rv64i_m/F/FDIV-S-DYN-RTZ-01",
+    "rv64i_m/F/FDIV-S-DYN-RUP-01",
+    "rv64i_m/F/FDIV-S-RDN-01",
+    "rv64i_m/F/FDIV-S-RMM-01",
+    "rv64i_m/F/FDIV-S-RNE-01",
+    "rv64i_m/F/FDIV-S-RTZ-01",
+    "rv64i_m/F/FDIV-S-RUP-01",
    "rv64i_m/F/FEQ-S-01",
    "rv64i_m/F/FLE-S-01",
    "rv64i_m/F/FLT-S-01",
@ -570,16 +570,16 @@ string imperas32f[] = '{
    "rv64i_m/D/FCVT-WU-D-RNE-01",
    "rv64i_m/D/FCVT-WU-D-RTZ-01",
    "rv64i_m/D/FCVT-WU-D-RUP-01",
-    // "rv64i_m/D/FDIV-D-DYN-RDN-01",
-    // "rv64i_m/D/FDIV-D-DYN-RMM-01",
-    // "rv64i_m/D/FDIV-D-DYN-RNE-01",
-    // "rv64i_m/D/FDIV-D-DYN-RTZ-01",
-    // "rv64i_m/D/FDIV-D-DYN-RUP-01",
-    // "rv64i_m/D/FDIV-D-RDN-01",
-    // "rv64i_m/D/FDIV-D-RMM-01",
-    // "rv64i_m/D/FDIV-D-RNE-01",
-    // "rv64i_m/D/FDIV-D-RTZ-01",
-    // "rv64i_m/D/FDIV-D-RUP-01",
+    "rv64i_m/D/FDIV-D-DYN-RDN-01",
+    "rv64i_m/D/FDIV-D-DYN-RMM-01",
+    "rv64i_m/D/FDIV-D-DYN-RNE-01",
+    "rv64i_m/D/FDIV-D-DYN-RTZ-01",
+    "rv64i_m/D/FDIV-D-DYN-RUP-01",
+    "rv64i_m/D/FDIV-D-RDN-01",
+    "rv64i_m/D/FDIV-D-RMM-01",
+    "rv64i_m/D/FDIV-D-RNE-01",
+    "rv64i_m/D/FDIV-D-RTZ-01",
+    "rv64i_m/D/FDIV-D-RUP-01",
    "rv64i_m/D/FEQ-D-01",
    "rv64i_m/D/FLD-01",
    "rv64i_m/D/FLE-D-01",
@ -1119,17 +1119,17 @@ string imperas32f[] = '{
    "rv64i_m/D/src/d_fcvt.wu.d_b27-01.S/ref/Ref",
    "rv64i_m/D/src/d_fcvt.wu.d_b28-01.S/ref/Ref",
    "rv64i_m/D/src/d_fcvt.wu.d_b29-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b1-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b20-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b2-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b21-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b3-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b4-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b5-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b6-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b7-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b8-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fdiv_b9-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b1-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b20-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b2-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b21-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b3-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b4-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b5-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b6-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b7-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b8-01.S/ref/Ref",
+    "rv64i_m/D/src/d_fdiv_b9-01.S/ref/Ref",
    "rv64i_m/D/src/d_feq_b1-01.S/ref/Ref",
    "rv64i_m/D/src/d_feq_b19-01.S/ref/Ref",
    "rv64i_m/D/src/d_fle_b1-01.S/ref/Ref",
@ -1291,17 +1291,17 @@ string imperas32f[] = '{
    "rv32i_m/F/src/fcvt.wu.s_b27-01.S/ref/Ref",
    "rv32i_m/F/src/fcvt.wu.s_b28-01.S/ref/Ref",
    "rv32i_m/F/src/fcvt.wu.s_b29-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b1-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b20-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b2-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b21-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b3-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b4-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b5-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b6-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b7-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b8-01.S/ref/Ref",
-    // "rv32i_m/F/src/fdiv_b9-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b1-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b20-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b2-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b21-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b3-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b4-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b5-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b6-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b7-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b8-01.S/ref/Ref",
+    "rv32i_m/F/src/fdiv_b9-01.S/ref/Ref",
    "rv32i_m/F/src/feq_b1-01.S/ref/Ref",
    "rv32i_m/F/src/feq_b19-01.S/ref/Ref",
    "rv32i_m/F/src/fle_b1-01.S/ref/Ref",