From 2cd2fe0828c85c33a6b6ade80a37f331f52a599a Mon Sep 17 00:00:00 2001
From: kipmacsaigoren <kmacsaigoren@hmc.edu>
Date: Wed, 15 Sep 2021 12:15:53 -0500
Subject: [PATCH 01/11] Added git things to make it all a little nicer and
 synthesis work.

---
 .gitmodules | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitmodules b/.gitmodules
index e69de29b..65e1e71c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "sky130/sky130_osu_sc_t12"]
+	path = sky130/sky130_osu_sc_t12
+	url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/

From 97c474327c7d9c45b8ae682e3cf7c19ce9cf4f0c Mon Sep 17 00:00:00 2001
From: kipmacsaigoren <kmacsaigoren@hmc.edu>
Date: Wed, 15 Sep 2021 12:24:24 -0500
Subject: [PATCH 02/11] changed priority circuits for synthesis and light
 cleanup

---
 wally-pipelined/src/mmu/pmpadrdec.sv           |  2 +-
 wally-pipelined/src/mmu/priorityonehot.sv      | 17 ++++++++++-------
 wally-pipelined/src/mmu/prioritythermometer.sv |  8 ++++++--
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv
index 61816782..9fe28b26 100644
--- a/wally-pipelined/src/mmu/pmpadrdec.sv
+++ b/wally-pipelined/src/mmu/pmpadrdec.sv
@@ -82,7 +82,7 @@ module pmpadrdec (
   assign NAMask[1:0] = {2'b11};
 
   prioritythemometer #(`PA_BITS-2) namaskgen(
-    .a({PMPAdr[`PA_BITS-4:0], (AdrMode == NAPOT)}),
+    .a({~PMPAdr[`PA_BITS-4:0], (AdrMode == NAPOT)}), // *** confusing bit bussing to match the logic for the inside of the thermometer.
     .y(NAMask[`PA_BITS-1:2]));
 
   assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | NAMask);
diff --git a/wally-pipelined/src/mmu/priorityonehot.sv b/wally-pipelined/src/mmu/priorityonehot.sv
index 7a17f8d2..f02f8ccc 100644
--- a/wally-pipelined/src/mmu/priorityonehot.sv
+++ b/wally-pipelined/src/mmu/priorityonehot.sv
@@ -40,13 +40,16 @@ module priorityonehot #(parameter ENTRIES = 8) (
   logic [ENTRIES-1:0] nolower;
 
   // generate thermometer code mask
-  genvar i;
-  generate
-    assign nolower[0] = 1'b1;
-    for (i=1; i<ENTRIES; i++) begin:therm
-      assign nolower[i] = nolower[i-1] & ~a[i-1];
-    end
-  endgenerate
+  prioritythemometer #(ENTRIES) maskgen(.a({a[ENTRIES-2:0], 1'b1}), .y(nolower));
+  // genvar i;
+  // generate
+  //   assign nolower[0] = 1'b1;
+  //   for (i=1; i<ENTRIES; i++) begin:therm
+  //     assign nolower[i] = nolower[i-1] & ~a[i-1];
+  //   end
+  // endgenerate
+  // *** replace mask generation logic ^^^ with priority thermometer
+
 
   assign y = a & nolower;
 
diff --git a/wally-pipelined/src/mmu/prioritythermometer.sv b/wally-pipelined/src/mmu/prioritythermometer.sv
index a6374579..a3eab7e4 100644
--- a/wally-pipelined/src/mmu/prioritythermometer.sv
+++ b/wally-pipelined/src/mmu/prioritythermometer.sv
@@ -37,16 +37,20 @@ module prioritythemometer #(parameter N = 8) (
   output logic  [N-1:0] y
 );
 
+// Carefully crafted so design compiler would synthesize into a fast tree structure
+//  Rather than linear.
 
   // generate thermometer code mask
   genvar i;
   generate
     assign y[0] = a[0];
-    for (i=1; i<N; i++) begin
-      assign y[i] = y[i-1] & a[i];
+    for (i=1; i<N; i++) begin:therm
+      assign y[i] = y[i-1] & ~a[i]; // *** made to be the same as onehot (without the inverter) to see if the probelme is something weird with synthesis
+      // assign y[i] = y[i-1] & a[i];
     end
   endgenerate
 
+
 endmodule
 
 

From cc4ad218cb9a77bee22876d173166255222fcfee Mon Sep 17 00:00:00 2001
From: kipmacsaigoren <kmacsaigoren@hmc.edu>
Date: Fri, 17 Sep 2021 12:00:38 -0500
Subject: [PATCH 03/11] added new fun ways of putting inputs into the priority
 thermometer

---
 wally-pipelined/src/mmu/pmpadrdec.sv | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv
index 9fe28b26..46ee6bdc 100644
--- a/wally-pipelined/src/mmu/pmpadrdec.sv
+++ b/wally-pipelined/src/mmu/pmpadrdec.sv
@@ -81,8 +81,16 @@ module pmpadrdec (
 
   assign NAMask[1:0] = {2'b11};
 
+// *** BAD DELETE LATER ADDED for hopefully fixing synth
+  logic [`PA_BITS-3:0] maskInput;
+
+  assign maskInput = `PA_BITS'd39; // *** added to really just try anything with the inputs of the thermometer.
+
+  // *** maskinput used to be {~PMPAdr[`PA_BITS-4:0], (AdrMode == NAPOT)}
+// ****
+
   prioritythemometer #(`PA_BITS-2) namaskgen(
-    .a({~PMPAdr[`PA_BITS-4:0], (AdrMode == NAPOT)}), // *** confusing bit bussing to match the logic for the inside of the thermometer.
+    .a(maskInput), // *** confusing bit bussing to match the logic for the inside of the thermometer.
     .y(NAMask[`PA_BITS-1:2]));
 
   assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | NAMask);

From 4de4774a71f01f9cda381da13324b8a84eb12456 Mon Sep 17 00:00:00 2001
From: Kip Macsai-Goren <kipmacsaigoren@github.com>
Date: Fri, 17 Sep 2021 13:07:21 -0400
Subject: [PATCH 04/11] more input changes on prioirty thermometer. passes lint

---
 wally-pipelined/src/mmu/pmpadrdec.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv
index 46ee6bdc..584b18fb 100644
--- a/wally-pipelined/src/mmu/pmpadrdec.sv
+++ b/wally-pipelined/src/mmu/pmpadrdec.sv
@@ -84,7 +84,7 @@ module pmpadrdec (
 // *** BAD DELETE LATER ADDED for hopefully fixing synth
   logic [`PA_BITS-3:0] maskInput;
 
-  assign maskInput = `PA_BITS'd39; // *** added to really just try anything with the inputs of the thermometer.
+  assign maskInput = 'd39; // *** added to really just try anything with the inputs of the thermometer.
 
   // *** maskinput used to be {~PMPAdr[`PA_BITS-4:0], (AdrMode == NAPOT)}
 // ****

From f3058f94c62d6de43a3228491488d148e6d2775e Mon Sep 17 00:00:00 2001
From: Kip Macsai-Goren <kipmacsaigoren@github.com>
Date: Fri, 8 Oct 2021 15:33:18 -0700
Subject: [PATCH 06/11] removed loops and simplified mask generation logic.
 PMP's now pass my tests and linux tests up to around 300M instructions.

---
 wally-pipelined/src/mmu/pmpadrdec.sv  | 27 +++++++--------------------
 wally-pipelined/src/mmu/pmpchecker.sv |  7 ++++---
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/wally-pipelined/src/mmu/pmpadrdec.sv b/wally-pipelined/src/mmu/pmpadrdec.sv
index 61816782..4f747ffb 100644
--- a/wally-pipelined/src/mmu/pmpadrdec.sv
+++ b/wally-pipelined/src/mmu/pmpadrdec.sv
@@ -34,10 +34,8 @@ module pmpadrdec (
   input  logic [7:0]       PMPCfg,
   input  logic [`XLEN-1:0] PMPAdr,
   input  logic             PAgePMPAdrIn,
-//  input  logic             NoLowerMatchIn,
   input  logic             FirstMatch,
   output logic             PAgePMPAdrOut,
-//  output logic             NoLowerMatchOut,
   output logic             Match, Active, 
   output logic             L, X, W, R
 );
@@ -48,7 +46,6 @@ module pmpadrdec (
 
   logic TORMatch, NAMatch;
   logic PAltPMPAdr;
-//  logic FirstMatch;
   logic [`PA_BITS-1:0] CurrentAdrFull;
   logic [1:0] AdrMode;
 
@@ -67,25 +64,15 @@ module pmpadrdec (
   assign TORMatch = PAgePMPAdrIn && PAltPMPAdr;
 
   // Naturally aligned regions
-  logic [`PA_BITS-1:0] NAMask;
-  //genvar i;
-  
-  // create a mask of which bits to ignore
-  // generate
-  //   assign Mask[1:0] = 2'b11;
-  //   assign Mask[2] = (AdrMode == NAPOT); // mask has 0s in upper bis for NA4 region
-  //   for (i=3; i < `PA_BITS; i=i+1) begin:mask
-  //     assign Mask[i] = Mask[i-1] & PMPAdr[i-3]; // NAPOT mask: 1's indicate bits to ignore
-  //   end
-  // endgenerate
+  logic [`PA_BITS-1:0] NAMask, NABase;
 
   assign NAMask[1:0] = {2'b11};
-
-  prioritythemometer #(`PA_BITS-2) namaskgen(
-    .a({PMPAdr[`PA_BITS-4:0], (AdrMode == NAPOT)}),
-    .y(NAMask[`PA_BITS-1:2]));
-
-  assign NAMatch = &((PhysicalAddress ~^ CurrentAdrFull) | NAMask);
+  assign NAMask[`PA_BITS-1:2] = (PMPAdr[`PA_BITS-3:0] + {{(`PA_BITS-3){1'b0}}, (AdrMode == NAPOT)}) ^ PMPAdr[`PA_BITS-3:0];
+  // generates a mask where the bottom k bits are 1, corresponding to a size of 2^k bytes for this memory region. 
+  // This assumes we're using at least an NA4 region, but works for any size NAPOT region.
+  assign NABase = {(PMPAdr[`PA_BITS-3:0] & ~NAMask[`PA_BITS-1:2]), 2'b00}; // base physical address of the pmp. 
+  
+  assign NAMatch = &((NABase ~^ PhysicalAddress) | NAMask); // check if upper bits of base address match, ignore lower bits correspoonding to inside the memory range
 
   assign Match = (AdrMode == TOR) ? TORMatch : 
                  (AdrMode == NA4 || AdrMode == NAPOT) ? NAMatch :
diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv
index 7dc37163..11cb7ccb 100644
--- a/wally-pipelined/src/mmu/pmpchecker.sv
+++ b/wally-pipelined/src/mmu/pmpchecker.sv
@@ -54,8 +54,9 @@ module pmpchecker (
 
   // Bit i is high when the address falls in PMP region i
   logic                    EnforcePMP;
-  logic [7:0]              PMPCfg[`PMP_ENTRIES-1:0];
-  logic [`PMP_ENTRIES-1:0] Match, FirstMatch;      // PMP Entry matches
+//  logic [7:0]              PMPCfg[`PMP_ENTRIES-1:0];
+  logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges
+  logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address.
   logic [`PMP_ENTRIES-1:0] Active;     // PMP register i is non-null
   logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
   logic [`PMP_ENTRIES-1:0]   PAgePMPAdr;  // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
@@ -69,7 +70,7 @@ module pmpchecker (
     .PAgePMPAdrOut(PAgePMPAdr),
     .FirstMatch, .Match, .Active, .L, .X, .W, .R);
 
-  priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // Take the ripple gates/signals out of the pmpadrdec and into another unit.
+  priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches.
 
   // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
   assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; 

From 303beaa083b877f999e909732198a74263b58c08 Mon Sep 17 00:00:00 2001
From: Kip Macsai-Goren <kipmacsaigoren@github.com>
Date: Fri, 8 Oct 2021 15:40:18 -0700
Subject: [PATCH 07/11] updated pmp output to correspond to test changes,
 commented out execute tests until cache/fence interaction works fully.

---
 wally-pipelined/testbench/testbench-imperas.sv | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv
index 3f6d449e..40719351 100644
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@@ -46,14 +46,14 @@ module testbench();
 
   string tests32mmu[] = '{
     "rv32mmu/WALLY-MMU-SV32", "3000"
-    //"rv32mmu/WALLY-PMA", "3000",
+    //"rv32mmu/WALLY-PMP", "3000",
     //"rv32mmu/WALLY-PMA", "3000"
     };
 
   string tests64mmu[] = '{
     "rv64mmu/WALLY-MMU-SV48", "3000",
-    "rv64mmu/WALLY-MMU-SV39", "3000"
-    //"rv64mmu/WALLY-PMA", "3000",
+    "rv64mmu/WALLY-MMU-SV39", "3000",
+    "rv64mmu/WALLY-PMP", "3000"
     //"rv64mmu/WALLY-PMA", "3000"
   };
 
@@ -539,8 +539,8 @@ string tests32f[] = '{
         if (`F_SUPPORTED) tests = {tests64f, tests};
         if (`D_SUPPORTED) tests = {tests64d, tests};
         if (`MEM_VIRTMEM) tests = {tests64mmu, tests};
-        if (`A_SUPPORTED) tests = {tests64a, tests};
-        if (`M_SUPPORTED) tests = {tests64m, tests};
+        //if (`A_SUPPORTED) tests = {tests64a, tests};
+        //if (`M_SUPPORTED) tests = {tests64m, tests};
       end
       //tests = {tests64a, tests};
     end else begin // RV32
@@ -676,7 +676,7 @@ string tests32f[] = '{
               errors = errors+1;
               $display("  Error on test %s result %d: adr = %h sim (D$) %h sim (TIM) = %h, signature = %h", 
                     tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.uncore.dtim.RAM[testadr+i], signature[i]);
-              $stop;//***debug
+              //$stop;//***debug
             end
           end
           i = i + 1;

From 96565f9435ec29c4db5043834cacd8cfdd2ecb9a Mon Sep 17 00:00:00 2001
From: kipmacsaigoren <kmacsaigoren@hmc.edu>
Date: Fri, 8 Oct 2021 17:47:54 -0500
Subject: [PATCH 08/11] rename adder in fpu for synthesis

---
 wally-pipelined/src/fpu/{adder.sv => adderparts.sv} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename wally-pipelined/src/fpu/{adder.sv => adderparts.sv} (100%)

diff --git a/wally-pipelined/src/fpu/adder.sv b/wally-pipelined/src/fpu/adderparts.sv
similarity index 100%
rename from wally-pipelined/src/fpu/adder.sv
rename to wally-pipelined/src/fpu/adderparts.sv

From 6fce53d146aa62da728d7f5bd97eb3b8b56dcf17 Mon Sep 17 00:00:00 2001
From: bbracker <bbracker@hmc.edu>
Date: Sat, 9 Oct 2021 17:22:30 -0700
Subject: [PATCH 09/11] make testbench-linux halt on some discrepancies with
 QEMUw

---
 wally-pipelined/testbench/testbench-linux.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv
index 73077e7d..0dda6ffb 100644
--- a/wally-pipelined/testbench/testbench-linux.sv
+++ b/wally-pipelined/testbench/testbench-linux.sv
@@ -27,7 +27,7 @@
 
 `include "wally-config.vh"
 
-`define DEBUG_TRACE 0
+`define DEBUG_TRACE 2
 // Debug Levels
 // 0: don't check against QEMU
 // 1: print disagreements with QEMU, but only halt on PCW disagreements

From 77fe00947e03d4704521cdb8f39a1adf4b7ad3f2 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Sat, 9 Oct 2021 17:38:10 -0700
Subject: [PATCH 10/11] FMA matches diagram and lint warnings fixed

---
 wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv |   4 +-
 wally-pipelined/src/fpu/fma.sv                | 486 +++++++++++-------
 wally-pipelined/src/fpu/fpu.sv                |   5 +-
 3 files changed, 299 insertions(+), 196 deletions(-)

diff --git a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
index 3ae751e5..de5e849d 100644
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@@ -139,12 +139,12 @@ assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[
     logic [8:0]			NormCntE, NormCntM;
     
     fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}),
-                .BiasE, .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
+                 .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
                 .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                 .ProdExpE, .AddendStickyE, .KillProdE); 
 fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
               //  .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, 
-               .FOpCtrlM(FOpCtrlE[2:0]), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE),
+                .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE),
                .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
 
 
diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv
index 7bd15030..22b5cbe0 100644
--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@@ -23,7 +23,7 @@
 ///////////////////////////////////////////
 
 `include "wally-config.vh"
-//   `include "../../../config/rv64icfd/wally-config.vh"
+//    `include "../../../config/rv64icfd/wally-config.vh"
 
 module fma(
     input logic                 clk,
@@ -45,7 +45,6 @@ module fma(
     input logic                 XSNaNM, YSNaNM, ZSNaNM,     // is signaling NaN
     input logic                 XZeroM, YZeroM, ZZeroM,     // is zero - memory stage
     input logic                 XInfM, YInfM, ZInfM,        // is infinity
-    input logic [10:0]          BiasE,      // bias (max exponent/2) ***parameterize in unpacking unit
 	output logic [`FLEN-1:0]    FMAResM,    // FMA result
 	output logic [4:0]		    FMAFlgM);   // FMA flags
 	
@@ -70,7 +69,7 @@ module fma(
     logic [8:0]			NormCntE, NormCntM;
     
     fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-                .BiasE, .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
+                .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
                 .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                 .ProdExpE, .AddendStickyE, .KillProdE); 
                 
@@ -96,7 +95,6 @@ module fma1(
     input logic  [`NF:0]        XManE, YManE, ZManE,    // fractions in U(0.NF) format
     input logic                 XDenormE, YDenormE, ZDenormE, // is the input denormal
     input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
-    input logic  [`NE-1:0]      BiasE,      // bias (max exponent/2)
     input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
     input logic                 FmtE,       // precision 1 = double 0 = single
     output logic [`NE+1:0]      ProdExpE,       // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
@@ -111,25 +109,26 @@ module fma1(
     );
 
     logic [`NE-1:0]     Denorm;             // value of a denormaized number based on precision
-    logic [`NE-1:0]     XExpVal, YExpVal;   // Exponent value after taking into account denormals
     logic [2*`NF+1:0]   ProdManE;           // 1.X frac * 1.Y frac in U(2.2Nf) format
     logic [3*`NF+5:0]   AlignedAddendE;     // Z aligned for addition in U(NF+5.2NF+1)
+    logic [3*`NF+6:0]   AlignedAddendInv;   // aligned addend possibly inverted
+    logic [2*`NF+1:0]   ProdManKilled;      // the product's mantissa possibly killed
+    logic [3*`NF+6:0]   NegProdManKilled;   // a negated ProdManKilled
+    logic [8:0]         PNormCnt, NNormCnt; // the positive and nagitive LOA results
+    logic [3*`NF+6:0]   PreSum, NegPreSum;  // positive and negitve versions of the sum
 
     ///////////////////////////////////////////////////////////////////////////////
     // Calculate the product
     //      - When multipliying two fp numbers, add the exponents
     //      - Subtract the bias (XExp + YExp has two biases, one from each exponent)
-    //      - If the product is zero then kill the exponent - this is a problem 
+    //      - If the product is zero then kill the exponent
+    //      - Multiply the mantissas
     ///////////////////////////////////////////////////////////////////////////////
    
-    // denormalized numbers have diffrent values depending on which precison it is.
-    //      double - 1
-    //      single - 1024-128+1 = 897
-    assign Denorm = FmtE ? 1 : 897;
-    assign XExpVal = XDenormE ? Denorm : XExpE;
-    assign YExpVal = YDenormE ? Denorm : YExpE;
-    // take into account if the product is zero, the product's exponent does not compute properly if X or Y is zero
-    assign ProdExpE = (XExpVal + YExpVal - BiasE)&{`NE+2{~(XZeroE|YZeroE)}};
+
+   // calculate the product's exponent 
+    expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, 
+                    .Denorm, .ProdExpE);
 
     // multiplication of the mantissa's
     mult mult(.XManE, .YManE, .ProdManE);
@@ -138,174 +137,49 @@ module fma1(
     // Alignment shifter
     ///////////////////////////////////////////////////////////////////////////////
 
-    alignshift alignshift(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm,
+    align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm,
                         .AlignedAddendE, .AddendStickyE, .KillProdE);
-
                         
-    // Calculate the product's sign
-    //      Negate product's sign if FNMADD or FNMSUB
-
-    assign PSgnE = XSgnE ^ YSgnE ^ (FOpCtrlE[1]&~FOpCtrlE[2]);
-    assign ZSgnEffE = ZSgnE^FOpCtrlE[0]; // Swap sign of Z for subtract
-
+    // calculate the signs and take the opperation into account
+    sign sign(.FOpCtrlE, .XSgnE, .YSgnE, .ZSgnE, .PSgnE, .ZSgnEffE);
 
     // ///////////////////////////////////////////////////////////////////////////////
     // // Addition/LZA
     // ///////////////////////////////////////////////////////////////////////////////
         
-    fmaadd fmaadd(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .SumE, .NegSumE, .InvZE, .NormCntE, .XZeroE, .YZeroE);
+    add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
     
+    loa loa(.AlignedAddendE, .AlignedAddendInv, .ProdManKilled, .NegProdManKilled, .PNormCnt, .NNormCnt);
+
+    // Choose the positive sum and accompanying LZA result.
+    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
+    assign NormCntE = NegSumE ? NNormCnt : PNormCnt;
+
+
 endmodule
 
 
+module expadd(    
+    input  logic            FmtE,          // precision
+    input  logic [`NE-1:0]  XExpE, YExpE,  // input exponents
+    input  logic            XDenormE, YDenormE,    // are the inputs denormalized
+    input  logic            XZeroE, YZeroE,        // are the inputs zero
+    output logic [`NE-1:0]  Denorm,        // value of denormalized exponent
+    output logic [`NE+1:0]  ProdExpE       // product's exponent B^(1023)NE+2
+);
 
+    logic [`NE-1:0] XExpVal, YExpVal;       // Exponent value after taking into account denormals
 
+    // denormalized numbers have diffrent values depending on which precison it is.
+    //      double - 1
+    //      single - 1024-128+1 = 897
+    assign Denorm = FmtE ? 1 : 897;
 
-
-
-
-
-
-
-
-
-
-
-
-
-module fma2(
-    
-    input logic                 XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
-    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic     [`NE+1:0]   ProdExpM,       // X exponent + Y exponent - bias
-    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                 XZeroM, YZeroM, ZZeroM, // inputs are zero
-    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic     [3*`NF+5:0] SumM,       // the positive sum
-    input logic                 NegSumM,    // was the sum negitive
-    input logic                 InvZM,      // do you invert Z
-    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                 PSgnM,      // the product's sign
-    input logic     [8:0]       NormCntM,   // the normalization shift count
-    output logic    [`FLEN-1:0] FMAResM,    // FMA final result
-    output logic    [4:0]       FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
-   
-
-
-    logic [`NF-1:0]     ResultFrac; // Result fraction
-    logic [`NE-1:0]     ResultExp;  // Result exponent
-    logic               ResultSgn;  // Result sign
-    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
-    logic [`NE+1:0]     FullResultExp;  // ResultExp with bits to determine sign and overflow
-    logic [`NF+2:0]     NormSum;        // normalized sum
-    logic               NormSumSticky;  // sticky bit calulated from the normalized sum
-    logic               SumZero;        // is the sum zero
-    logic               ResultDenorm;   // is the result denormalized
-    logic               Sticky, UfSticky;           // Sticky bit
-    logic               Plus1, Minus1, CalcPlus1;   // do you add or subtract one for rounding
-    logic               UfPlus1;                    // do you add one (for determining underflow flag)
-    logic               Invalid,Underflow,Overflow; // flags
-    logic               ZeroSgn;        // the result's sign if the sum is zero
-    logic               ResultSgnTmp;   // the result's sign assuming the result is not zero
-    logic               Guard, Round;   // bits needed to determine rounding
-    logic               UfRound, UfLSBNormSum;   // bits needed to determine rounding for underflow flag
-    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
-   
-    
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Normalization
-    ///////////////////////////////////////////////////////////////////////////////
-
-    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
-            .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Rounding
-    ///////////////////////////////////////////////////////////////////////////////
-
-    // round to nearest even
-    // round to zero
-    // round to -infinity
-    // round to infinity
-    // round to nearest max magnitude
-
-    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgn, .SumExp,
-        .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfRound, .UfLSBNormSum);
-
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Sign calculation
-    ///////////////////////////////////////////////////////////////////////////////
-
-    // Determine the sign if the sum is zero
-    //      if cancelation then 0 unless round to -infinity
-    //      otherwise psign
-    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow ? FrmM[1:0] == 2'b10 : PSgnM;
-
-    // is the result negitive
-    //  if p - z is the Sum negitive
-    //  if -p + z is the Sum positive
-    //  if -p - z then the Sum is negitive
-    assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
-    assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
- 
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Flags
-    ///////////////////////////////////////////////////////////////////////////////
-
-    fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
-        .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfRound, .UfLSBNormSum, .Sticky, .UfPlus1,
-        .FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
-
-
-
-
-    ///////////////////////////////////////////////////////////////////////////////
-    // Select the result
-    ///////////////////////////////////////////////////////////////////////////////
-    assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]};
-    assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]};
-    assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]};
-    assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                          {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
-                                    ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
-                                                                                                                          {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
-    assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
-    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
-    assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + (CalcPlus1&(AddendStickyM|FrmM[1])) : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-    assign FMAResM = XNaNM ? XNaNResult :
-                        YNaNM ? YNaNResult :
-                        ZNaNM ? ZNaNResult :
-                        Invalid ? InvalidResult :
-                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
-                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
-                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
-                        KillProdM ? KillProdResult :  
-			            Overflow ? OverflowResult :
-                        Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
-                        FmtM ? {ResultSgn, ResultExp, ResultFrac} :
-                               {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
-
-// *** use NF where needed
+    // pick denormalized value or exponent
+    assign XExpVal = XDenormE ? Denorm : XExpE;
+    assign YExpVal = YDenormE ? Denorm : YExpE;
+    // kill the exponent if the product is zero - either X or Y is 0
+    assign ProdExpE = (XExpVal + YExpVal - `NE'h3ff)&{`NE+2{~(XZeroE|YZeroE)}};
 
 endmodule
 
@@ -313,7 +187,6 @@ endmodule
 
 
 
-
 module mult(
     input logic [`NF:0] XManE, YManE,
     output logic [2*`NF+1:0] ProdManE
@@ -325,7 +198,34 @@ endmodule
 
 
 
-module alignshift(
+
+
+
+module sign(    
+    input  logic [2:0]  FOpCtrlE,               // precision
+    input  logic        XSgnE, YSgnE, ZSgnE,    // are the inputs denormalized
+    output logic        PSgnE,     // the product's sign - takes opperation into account
+    output logic        ZSgnEffE   // Z sign used in fma - takes opperation into account
+);
+
+    // Calculate the product's sign
+    //      Negate product's sign if FNMADD or FNMSUB
+    
+    // flip is negation opperation
+    assign PSgnE = XSgnE ^ YSgnE ^ (FOpCtrlE[1]&~FOpCtrlE[2]);
+    // flip if subtraction
+    assign ZSgnEffE = ZSgnE^FOpCtrlE[0];
+
+endmodule
+
+
+
+
+
+
+
+
+module align(
     input logic  [`NE-1:0]      ZExpE,      // biased exponents in B(NE.0) format
     input logic  [`NF:0]        ZManE,      // fractions in U(0.NF) format]
     input logic                 ZDenormE,   // is the input denormal
@@ -397,22 +297,25 @@ module alignshift(
 
 endmodule
 
-module fmaadd(
+
+
+
+
+
+
+module add(
     input logic  [3*`NF+5:0]    AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
     input logic  [2*`NF+1:0]    ProdManE,       // the product's mantissa
     input logic                 PSgnE, ZSgnEffE,// the product and modified Z signs
     input logic                 KillProdE,      // should the product be set to 0
     input logic                 XZeroE, YZeroE, // is the input zero
-    output logic [3*`NF+5:0]    SumE,           // the positive sum
+    output logic [3*`NF+6:0] AlignedAddendInv,  // aligned addend possibly inverted
+    output logic [2*`NF+1:0] ProdManKilled,     // the product's mantissa possibly killed
+    output logic [3*`NF+6:0] NegProdManKilled,  // a negated ProdManKilled
     output logic                NegSumE,        // was the sum negitive
     output logic                InvZE,          // do you invert Z
-    output logic [8:0]          NormCntE        // normalization shift count
+    output logic [3*`NF+6:0]   PreSum, NegPreSum// possibly negitive sum
 );
-    logic [3*`NF+6:0]   PreSum, NegPreSum;  // possibly negitive sum
-    logic [2*`NF+1:0]   ProdMan2;           // product being added
-    logic [3*`NF+6:0]   AlignedAddend2;     // possibly inverted aligned Z
-    logic [3*`NF+6:0]   NegProdMan2;        // a negated ProdMan2
-    logic [8:0]         PNormCnt, NNormCnt; // results from the LZA
 
     ///////////////////////////////////////////////////////////////////////////////
     // Addition
@@ -424,36 +327,42 @@ module fmaadd(
     assign InvZE = ZSgnEffE ^ PSgnE;
 
     // Choose an inverted or non-inverted addend - the one has to be added now for the LZA
-    assign AlignedAddend2 = InvZE ? -{1'b0, AlignedAddendE} : {1'b0, AlignedAddendE};
+    assign AlignedAddendInv = InvZE ? -{1'b0, AlignedAddendE} : {1'b0, AlignedAddendE};
     // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
-    assign ProdMan2 = ProdManE&{2*`NF+2{~KillProdE}};
+    assign ProdManKilled = ProdManE&{2*`NF+2{~KillProdE}};
     // Negate ProdMan for LZA and the negitive sum calculation
-    assign NegProdMan2 = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, -ProdMan2, 2'b0};
+    assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, -ProdManKilled, 2'b0};
 
-    // LZAs one for the positive result and one for the negitive
-    //      - the +1 from inverting causes problems for normalization
-    poslza poslza(AlignedAddend2, ProdMan2, PNormCnt);
-    neglza neglza({1'b0,AlignedAddendE}, NegProdMan2, NNormCnt);
 
 
     // Do the addition
     //      - calculate a positive and negitive sum in parallel
-    assign PreSum = AlignedAddend2 + {ProdMan2, 2'b0};
-    assign NegPreSum = AlignedAddendE + NegProdMan2;
+    assign PreSum = AlignedAddendInv + {ProdManKilled, 2'b0};
+    assign NegPreSum = AlignedAddendE + NegProdManKilled;
      
     // Is the sum negitive
     assign NegSumE = PreSum[3*`NF+6];
-    // Choose the positive sum and accompanying LZA result.
-    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
-    assign NormCntE = NegSumE ? NNormCnt : PNormCnt;
 
 endmodule
 
 
+module loa(
+    input logic [3*`NF+5:0] AlignedAddendE,     // Z aligned for addition in U(NF+5.2NF+1)
+    input logic [3*`NF+6:0] AlignedAddendInv,   // aligned addend possibly inverted
+    input logic [2*`NF+1:0] ProdManKilled,      // the product's mantissa possibly killed
+    input logic [3*`NF+6:0] NegProdManKilled,   // a negated ProdManKilled
+    output logic [8:0]      PNormCnt, NNormCnt  // positive and negitive LOA result    
+);
+
+    // LZAs one for the positive result and one for the negitive
+    //      - the +1 from inverting causes problems for normalization
+    posloa posloa(AlignedAddendInv, ProdManKilled, PNormCnt);
+    negloa negloa({1'b0,AlignedAddendE}, NegProdManKilled, NNormCnt);
+
+endmodule
 
 
-
-module poslza(
+module posloa(
     input logic  [3*`NF+6:0] A,     // addend
     input logic  [2*`NF+1:0] P,     // product
     output logic [8:0]       PCnt   // normalization shift count for the positive result
@@ -484,7 +393,7 @@ module poslza(
   
 endmodule
 
-module neglza(
+module negloa(
     input logic  [3*`NF+6:0]    A,      // addend
     input logic  [3*`NF+6:0]    P,      // product
     output logic [8:0]          NCnt    // normalization shift count for the negitive result
@@ -512,6 +421,197 @@ endmodule
 
 
 
+
+
+
+
+
+
+
+
+module fma2(
+    
+    input logic                 XSgnM, YSgnM,        // input signs
+    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
+    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic                 FmtM,       // precision 1 = double 0 = single
+    input logic     [`NE+1:0]   ProdExpM,       // X exponent + Y exponent - bias
+    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                 XZeroM, YZeroM, ZZeroM, // inputs are zero
+    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
+    input logic     [3*`NF+5:0] SumM,       // the positive sum
+    input logic                 NegSumM,    // was the sum negitive
+    input logic                 InvZM,      // do you invert Z
+    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                 PSgnM,      // the product's sign
+    input logic     [8:0]       NormCntM,   // the normalization shift count
+    output logic    [`FLEN-1:0] FMAResM,    // FMA final result
+    output logic    [4:0]       FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
+   
+
+
+    logic [`NF-1:0]     ResultFrac; // Result fraction
+    logic [`NE-1:0]     ResultExp;  // Result exponent
+    logic               ResultSgn;  // Result sign
+    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
+    logic [`NE+1:0]     FullResultExp;  // ResultExp with bits to determine sign and overflow
+    logic [`NF+2:0]     NormSum;        // normalized sum
+    logic               NormSumSticky;  // sticky bit calulated from the normalized sum
+    logic               SumZero;        // is the sum zero
+    logic               ResultDenorm;   // is the result denormalized
+    logic               Sticky, UfSticky;           // Sticky bit
+    logic               Plus1, Minus1, CalcPlus1;   // do you add or subtract one for rounding
+    logic               UfPlus1;                    // do you add one (for determining underflow flag)
+    logic               Invalid,Underflow,Overflow; // flags
+    logic               ZeroSgn;        // the result's sign if the sum is zero
+    logic               ResultSgnTmp;   // the result's sign assuming the result is not zero
+    logic               Guard, Round;   // bits needed to determine rounding
+    logic               UfRound, UfLSBNormSum;   // bits needed to determine rounding for underflow flag
+   
+    
+
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Normalization
+    ///////////////////////////////////////////////////////////////////////////////
+
+    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
+            .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
+
+
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Rounding
+    ///////////////////////////////////////////////////////////////////////////////
+
+    // round to nearest even
+    // round to zero
+    // round to -infinity
+    // round to infinity
+    // round to nearest max magnitude
+
+    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgn, .SumExp,
+        .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfRound, .UfLSBNormSum);
+
+
+
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Sign calculation
+    ///////////////////////////////////////////////////////////////////////////////
+
+ 
+    resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .ResultSgn);
+
+
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Flags
+    ///////////////////////////////////////////////////////////////////////////////
+
+    fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
+        .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfRound, .UfLSBNormSum, .Sticky, .UfPlus1,
+        .FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
+
+
+
+
+    ///////////////////////////////////////////////////////////////////////////////
+    // Select the result
+    ///////////////////////////////////////////////////////////////////////////////
+
+    resultselect resultselect(.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
+        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, 
+        .ZSgnEffM, .PSgnM, .ResultSgn, .Minus1, .Plus1, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
+        .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
+
+// *** use NF where needed
+
+endmodule
+
+module resultsign(
+    input logic [2:0]   FrmM,
+    input logic         PSgnM, ZSgnEffM,
+    input logic         Underflow,
+    input logic         InvZM,
+    input logic         NegSumM,
+    input logic         SumZero,
+    output logic        ResultSgn
+);
+
+    logic ZeroSgn;
+    logic ResultSgnTmp;
+
+    // Determine the sign if the sum is zero
+    //      if cancelation then 0 unless round to -infinity
+    //      otherwise psign
+    assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow ? FrmM[1:0] == 2'b10 : PSgnM;
+
+    // is the result negitive
+    //  if p - z is the Sum negitive
+    //  if -p + z is the Sum positive
+    //  if -p - z then the Sum is negitive
+    assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
+    assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
+
+endmodule
+
+module resultselect(
+    input logic                 XSgnM, YSgnM,        // input signs
+    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
+    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic                 FmtM,       // precision 1 = double 0 = single
+    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                 PSgnM,      // the product's sign
+    input logic                 ResultSgn,  // the result's sign
+    input logic                 Minus1, Plus1, CalcPlus1, // rounding bits
+    input logic                 Invalid, Overflow, Underflow,  // flags
+    input logic                 ResultDenorm,       // is the result denormalized
+    input logic     [`NE-1:0]   ResultExp,          // Result exponent
+    input logic     [`NF-1:0]   ResultFrac,         // Result fraction
+    output logic    [`FLEN-1:0] FMAResM     // FMA final result
+);
+    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
+
+    assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]};
+    assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]};
+    assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]};
+    assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
+                                                                                                                          {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
+                                    ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
+                                                                                                                          {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
+    assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
+    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - (Minus1&AddendStickyM) + (Plus1&AddendStickyM)} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
+    assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + (CalcPlus1&(AddendStickyM|FrmM[1])) : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
+    assign FMAResM = XNaNM ? XNaNResult :
+                        YNaNM ? YNaNResult :
+                        ZNaNM ? ZNaNResult :
+                        Invalid ? InvalidResult :
+                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
+                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
+                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
+                        KillProdM ? KillProdResult :  
+			            Overflow ? OverflowResult :
+                        Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
+                        FmtM ? {ResultSgn, ResultExp, ResultFrac} :
+                               {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
+
+endmodule
+
+
 module normalize(
     input logic  [3*`NF+5:0]    SumM,       // the positive sum
     input logic  [`NE-1:0]      ZExpM,      // exponent of Z
diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv
index 34aa3edd..92fff23c 100755
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@@ -89,13 +89,16 @@ module fpu (
      logic [10:0] 	  BiasE;                   // bias based on precision (single=7f double=3ff - max expoent/2)
      logic 		  XNaNE, YNaNE, ZNaNE;           // is the input a NaN - execute stage
      logic 		  XNaNM, YNaNM, ZNaNM;           // is the input a NaN - memory stage
+     logic       XNaNQ, YNaNQ;                  // is the input a NaN - divide
      logic 		  XSNaNE, YSNaNE, ZSNaNE;        // is the input a signaling NaN - execute stage
      logic 		  XSNaNM, YSNaNM, ZSNaNM;        // is the input a signaling NaN - memory stage
      logic 		  XDenormE, YDenormE, ZDenormE;  // is the input denormalized
      logic 		  XZeroE, YZeroE, ZZeroE;        // is the input zero - execute stage
      logic 		  XZeroM, YZeroM, ZZeroM;        // is the input zero - memory stage
+     logic       XZeroQ, YZeroQ;                // is the input zero - divide
      logic 		  XInfE, YInfE, ZInfE;           // is the input infinity - execute stage
      logic 		  XInfM, YInfM, ZInfM;           // is the input infinity - memory stage
+     logic       XInfQ, YInfQ;                  // is the input infinity - divide
      logic 		  XExpMaxE;                      // is the exponent all ones (max value)
      logic 		  XNormE;                 // is normal     
      
@@ -180,7 +183,7 @@ module fpu (
      //   - handles FMA and multiply instructions
      fma fma (.clk, .reset, .FlushM, .StallM, 
 	      .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-	      .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .BiasE, 
+	      .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
 	      .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
 	      .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, 
 	      .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,

From a88ae5aaff229f2d28cdf8666fb913df81664bdb Mon Sep 17 00:00:00 2001
From: bbracker <bbracker@hmc.edu>
Date: Sun, 10 Oct 2021 10:09:59 -0700
Subject: [PATCH 11/11] use correct string formatting function

---
 wally-pipelined/testbench/testbench-linux.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv
index 0dda6ffb..7cc1cc9f 100644
--- a/wally-pipelined/testbench/testbench-linux.sv
+++ b/wally-pipelined/testbench/testbench-linux.sv
@@ -359,7 +359,7 @@ module testbench();
         end
         if (RegWriteW == "GPR") begin
           `checkEQ("Reg Write Address",dut.hart.ieu.dp.regf.a3,ExpectedRegAdrW)
-          $sprintf(name,"RF[%02d]",ExpectedRegAdrW);
+          $sformat(name,"RF[%02d]",ExpectedRegAdrW);
           `checkEQ(name, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW)
         end
         if (MemOpW.substr(0,2) == "Mem") begin