From 1d4ff095cfbdb5f40637a85ff1139ecfecfd9c61 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 30 Dec 2021 00:19:40 +0000
Subject: [PATCH 1/2] all FCVT imperas tests pass

---
 addins/riscv-arch-test          |   2 +-
 wally-pipelined/src/fpu/fcvt.sv |  18 ++--
 wally-pipelined/src/fpu/fma.sv  | 170 +++++++++++++++-----------------
 3 files changed, 90 insertions(+), 100 deletions(-)

diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test
index 307c77b26..be67c99bd 160000
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@@ -1 +1 @@
-Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
diff --git a/wally-pipelined/src/fpu/fcvt.sv b/wally-pipelined/src/fpu/fcvt.sv
index 087de2630..f48b3fd93 100644
--- a/wally-pipelined/src/fpu/fcvt.sv
+++ b/wally-pipelined/src/fpu/fcvt.sv
@@ -1,6 +1,7 @@
 
 `include "wally-config.vh"
 // `include "../../config/rv64icfd/wally-config.vh"
+//  `define XLEN 64
 module fcvt (
 	input logic             XSgnE,      // X's sign
     input logic [10:0]      XExpE,      // X's exponent
@@ -59,7 +60,7 @@ module fcvt (
       //  fcvt.lu.d = 111
       //  fcvt.d.l  = 100
       //  fcvt.d.lu = 110
-      //  {long, unsigned, to int, from int}
+      //  {long, unsigned, to int}
    
     // calculate signals based off the input and output's size
     assign Res64 = (FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&~FOpCtrlE[0]);
@@ -158,19 +159,24 @@ module fcvt (
 
     // select the integer result
     assign CvtIntRes = Of ? FOpCtrlE[1] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : 
-                    Uf ? FOpCtrlE[1] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
+                    Uf ? FOpCtrlE[1] ? {63'b0, Plus1&~XSgnE} : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
 		            Rounded[64-1:0];
 
     // select the floating point result            
     assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]};
 
     // select the result
-    assign CvtResE = ~FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
+    assign CvtResE = FOpCtrlE[0] ? CvtIntRes : CvtFPRes;
 
     // calculate the flags
-    //      - to int only sets the invalid flag
-    //      - from int only sets the inexact flag
-    assign CvtFlgE = {(Of | Uf)&FOpCtrlE[0], 3'b0, (Guard|Round|Sticky)&~FOpCtrlE[0]};
+    //      - only set invalid flag for out-of-range vales if it isn't be indicated by the inexact
+    //      - don't set inexact flag if converting a really large number (closest __ bit integer value is the max value)
+    //      - don't set inexact flag if converting negitive or tiny number to unsigned (closest integer value is 0 or 1)
+    logic Invalid, Inexact;
+    assign Invalid = (Of | Uf)&FOpCtrlE[0];
+    assign Inexact = (Guard|Round|Sticky)&~((&FOpCtrlE[1:0]&Uf&~(Plus1&~XSgnE))|(FOpCtrlE[0]&Of));
+    assign CvtFlgE = {Invalid&~Inexact, 3'b0, Inexact};
+    // assign CvtFlgE = {(Of | Uf)&FOpCtrlE[0], 3'b0, (Guard|Round|Sticky)&~FOpCtrlE[0]};
 
 
 
diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv
index a90848f5d..32130ffe9 100644
--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@@ -28,7 +28,6 @@
 // `define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
 // `define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
 // `define XLEN 64
-`define NANPAYLOAD 1
 module fma(
     input logic                 clk,
     input logic                 reset,
@@ -117,7 +116,6 @@ module fma1(
     logic [3*`NF+5:0]   AlignedAddendE;     // Z aligned for addition in U(NF+5.2NF+1)
     logic [3*`NF+6:0]   AlignedAddendInv;   // aligned addend possibly inverted
     logic [2*`NF+1:0]   ProdManKilled;      // the product's mantissa possibly killed
-    logic [3*`NF+4:0]   NegProdManKilled;   // a negated ProdManKilled
     logic [3*`NF+6:0]   PreSum, NegPreSum;  // positive and negitve versions of the sum
     logic [`NE-1:0]     XExpVal, YExpVal;   // exponent value after taking into accound denormals
     ///////////////////////////////////////////////////////////////////////////////
@@ -321,7 +319,6 @@ module add(
     output logic [3*`NF+6:0]    PreSum, NegPreSum// possibly negitive sum
 );
 
-    logic [3*`NF+4:0] NegProdManKilled;  // a negated ProdManKilled
     ///////////////////////////////////////////////////////////////////////////////
     // Addition
     ///////////////////////////////////////////////////////////////////////////////
@@ -335,15 +332,13 @@ module add(
     assign AlignedAddendInv = InvZE ? {1'b1, ~AlignedAddendE} : {1'b0, AlignedAddendE};
     // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
     assign ProdManKilled = ProdManE&{2*`NF+2{~KillProdE}};
-    // Negate ProdMan for LZA and the negitive sum calculation
-    assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, ~ProdManKilled&{2*`NF+2{~(XZeroE|YZeroE|KillProdE)}}};
 
 
 
     // Do the addition
     //      - calculate a positive and negitive sum in parallel
     assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE};
-    assign NegPreSum = AlignedAddendE + {NegProdManKilled, 2'b0} + {{(3*`NF+3){1'b0}},~(XZeroE|YZeroE|KillProdE),2'b0};
+    assign NegPreSum = XZeroE|YZeroE|KillProdE ? {1'b0, AlignedAddendE} : {1'b0, AlignedAddendE} + {{`NF+3{1'b1}}, ~ProdManKilled, 2'b0} + {(3*`NF+7)'(4)};
      
     // Is the sum negitive
     assign NegSumE = PreSum[3*`NF+6];
@@ -360,6 +355,8 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
     logic [3*`NF+6:0] T;
     logic [3*`NF+6:0] G;
     logic [3*`NF+6:0] Z;
+    logic [3*`NF+6:0] f;
+
     assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
     assign G[3*`NF+6:2*`NF+4] = 0;
     assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4];
@@ -375,7 +372,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
     //      - note: the paper linked above uses the numbering system where 0 is the most significant bit
     //f[n] = ~T[n]&T[n-1]           note: n is the MSB
     //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1]))
-    logic [3*`NF+6:0] f;
     assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5];
     assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1}));
 
@@ -440,11 +436,12 @@ module fma2(
     logic               SumZero;        // is the sum zero
     logic               ResultDenorm;   // is the result denormalized
     logic               Sticky, UfSticky;           // Sticky bit
-    logic               Plus1, Minus1, CalcPlus1;   // do you add or subtract one for rounding
+    logic               CalcPlus1;                  // do you add or subtract one for rounding
     logic               UfPlus1;                    // do you add one (for determining underflow flag)
     logic               Invalid,Underflow,Overflow; // flags
     logic               Guard, Round;   // bits needed to determine rounding
     logic               UfLSBNormSum;   // bits needed to determine rounding for underflow flag
+    logic [`FLEN:0]     RoundAdd;       // how much to add to the result
    
     
 
@@ -471,7 +468,7 @@ module fma2(
     // round to nearest max magnitude
 
     fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp,
-        .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfLSBNormSum);
+        .CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum);
 
 
 
@@ -503,8 +500,8 @@ module fma2(
     ///////////////////////////////////////////////////////////////////////////////
 
     resultselect resultselect(.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
-        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, 
-        .ZSgnEffM, .PSgnM, .ResultSgn, .Minus1, .Plus1, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
+        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
+        .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
         .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
 
 // *** use NF where needed
@@ -539,61 +536,6 @@ module resultsign(
 
 endmodule
 
-module resultselect(
-    input logic                 XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
-    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                 PSgnM,      // the product's sign
-    input logic                 ResultSgn,  // the result's sign
-    input logic                 Minus1, Plus1, CalcPlus1, // rounding bits
-    input logic                 Invalid, Overflow, Underflow,  // flags
-    input logic                 ResultDenorm,       // is the result denormalized
-    input logic     [`NE-1:0]   ResultExp,          // Result exponent
-    input logic     [`NF-1:0]   ResultFrac,         // Result fraction
-    output logic    [`FLEN-1:0] FMAResM     // FMA final result
-);
-    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
-
-    generate if(`NANPAYLOAD) begin
-        assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]};
-        assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]};
-        assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]};
-    end else begin
-        assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, 22'b0};
-        assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, 22'b0};
-        assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, 22'b0};
-    end
-    endgenerate
-    
-    
-    assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                          {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
-                                    ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
-                                                                                                                          {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
-    assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
-    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - {62'b0, (Minus1&AddendStickyM)} + {62'b0, (Plus1&AddendStickyM)}} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
-    assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-    assign FMAResM = XNaNM ? XNaNResult :
-                        YNaNM ? YNaNResult :
-                        ZNaNM ? ZNaNResult :
-                        Invalid ? InvalidResult :
-                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
-                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
-                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
-                        KillProdM ? KillProdResult :  
-			            Overflow ? OverflowResult :
-                        Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
-                        FmtM ? {ResultSgn, ResultExp, ResultFrac} :
-                               {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
-
-endmodule
 
 module normalize(
     input logic  [3*`NF+5:0]    SumM,       // the positive sum
@@ -624,19 +566,6 @@ module normalize(
     // Normalization
     ///////////////////////////////////////////////////////////////////////////////
 
-
-    // logic [8:0] supposedNormCnt;
-    // logic [8:0] i;
-    // always_comb begin
-    //         i = 0;
-    //         while (~SumM[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
-    //         supposedNormCnt = i;    // compute shift count
-    // end
-
-    // always_comb begin
-    //     assert (NormCntM == supposedNormCnt | NormCntM == supposedNormCnt+1 | NormCntM == supposedNormCnt+2) else $fatal ("normcnt not expected");
-    // end
-
     // Determine if the sum is zero
     assign SumZero = ~(|SumM);
 
@@ -644,18 +573,15 @@ module normalize(
     assign FracLen = FmtM ? `NF+1 : 13'd24;
 
     // calculate the sum's exponent
-    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); // ****try moving this into previous stage
-    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; // ***move this ^ the subtraction by a constant isn't simplified
+    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
+    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
     
     logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL;
     assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-    assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd1)));
+    assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd2)));
     assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127);
-    assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd24+13'd1023-13'd127)) | ~|SumExpTmpTmp;
-    assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; //***make sure math good
-    // always_comb begin
-    //     assert (PreResultDenorm == PreResultDenorm2) else $fatal ("PreResultDenorms not equal");
-    // end
+    assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd25+13'd1023-13'd127)) | ~|SumExpTmpTmp;
+    assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero;
 
     // 010. when should be 001.
     //      - shift left one
@@ -667,9 +593,9 @@ module normalize(
 
     // Determine the shift needed for denormal results
     //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
+    assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1;
     // Normalize the sum
-    assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
+    assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
     // LZA correction
     assign LZAPlus1 = SumShifted[3*`NF+7];
     assign LZAPlus2 = SumShifted[3*`NF+8];
@@ -699,18 +625,18 @@ module fmaround(
     input logic             InvZM,          // invert Z
     input logic  [`NE+1:0]  SumExp,         // exponent of the normalized sum
     input logic             ResultSgnTmp,      // the result's sign
-    output logic            CalcPlus1, Plus1, UfPlus1, Minus1,  // do you add or subtract on from the result
+    output logic            CalcPlus1, UfPlus1,  // do you add or subtract on from the result
     output logic [`NE+1:0]  FullResultExp,      // ResultExp with bits to determine sign and overflow
     output logic [`NF-1:0]  ResultFrac,         // Result fraction
     output logic [`NE-1:0]  ResultExp,          // Result exponent
     output logic            Sticky,             // sticky bit
+    output logic [`FLEN:0]  RoundAdd,           // how much to add to the result
     output logic            Round, Guard, UfLSBNormSum // bits needed to calculate rounding
 );
     logic           LSBNormSum;         // bit used for rounding - least significant bit of the normalized sum
     logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
-    logic           UfGuard;            // gaurd bit used to caluculate underflow
-    logic           UfCalcPlus1, CalcMinus1;    // do you add or subtract on from the result
-    logic [`FLEN:0] RoundAdd;           // how much to add to the result
+    logic           UfGuard;            // guard bit used to caluculate underflow
+    logic           UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result
     logic [`NF-1:0] NormSumTruncated;   // the normalized sum trimed to fit the mantissa
     logic           UfRound;
 
@@ -857,4 +783,62 @@ module fmaflags(
     //      - Don't set the underflow flag if the result was rounded up to a normal number
     assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
 
+endmodule
+
+
+module resultselect(
+    input logic                 XSgnM, YSgnM,        // input signs
+    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
+    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic                 FmtM,       // precision 1 = double 0 = single
+    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                 PSgnM,      // the product's sign
+    input logic                 ResultSgn,  // the result's sign
+    input logic                 CalcPlus1,  // rounding bits
+    input logic     [`FLEN:0]   RoundAdd,   // how much to add to the result
+    input logic                 Invalid, Overflow, Underflow,  // flags
+    input logic                 ResultDenorm,       // is the result denormalized
+    input logic     [`NE-1:0]   ResultExp,          // Result exponent
+    input logic     [`NF-1:0]   ResultFrac,         // Result fraction
+    output logic    [`FLEN-1:0] FMAResM     // FMA final result
+);
+    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
+
+    generate if(`IEEE754) begin
+        assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]};
+        assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]};
+        assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]};
+    end else begin
+        assign XNaNResult = FmtM ? {1'b0, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, XExpM[7:0], 1'b1, 22'b0};
+        assign YNaNResult = FmtM ? {1'b0, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, YExpM[7:0], 1'b1, 22'b0};
+        assign ZNaNResult = FmtM ? {1'b0, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, ZExpM[7:0], 1'b1, 22'b0};
+    end
+    endgenerate
+    
+    
+    assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
+                                                                                                                          {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
+                                    ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
+                                                                                                                          {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
+    assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
+    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} + (RoundAdd[59:29]&{31{AddendStickyM}})};
+    assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
+    assign FMAResM = XNaNM ? XNaNResult :
+                        YNaNM ? YNaNResult :
+                        ZNaNM ? ZNaNResult :
+                        Invalid ? InvalidResult :
+                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
+                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
+                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
+                        KillProdM ? KillProdResult :  
+			            Overflow ? OverflowResult :
+                        Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
+                        FmtM ? {ResultSgn, ResultExp, ResultFrac} :
+                               {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
+
 endmodule
\ No newline at end of file

From 866a5efc43ba8e1e8007160c4b9d06f4df4cd1d7 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Thu, 30 Dec 2021 00:53:39 +0000
Subject: [PATCH 2/2] rv32i regression and linting

---
 wally-pipelined/config/rv32ic/wally-config.vh | 14 ++---
 wally-pipelined/regression/lint-wally         |  2 +-
 .../regression/regression-wally.py            | 30 +++++++----
 wally-pipelined/regression/sim-wally-batch    |  2 +-
 wally-pipelined/src/lsu/lsu.sv                |  2 +-
 wally-pipelined/src/mmu/pmpchecker.sv         | 51 +++++++++----------
 wally-pipelined/src/sdc/SDC.sv                |  2 +-
 wally-pipelined/testbench/testbench.sv        | 24 ++++++---
 8 files changed, 73 insertions(+), 54 deletions(-)

diff --git a/wally-pipelined/config/rv32ic/wally-config.vh b/wally-pipelined/config/rv32ic/wally-config.vh
index 36146e655..b93d77bdc 100644
--- a/wally-pipelined/config/rv32ic/wally-config.vh
+++ b/wally-pipelined/config/rv32ic/wally-config.vh
@@ -40,10 +40,10 @@
 `define IEEE754 0
 
 `define MISA (32'h00000104)
-`define ZICSR_SUPPORTED 1
-`define ZIFENCEI_SUPPORTED 1
+`define ZICSR_SUPPORTED 0
+`define ZIFENCEI_SUPPORTED 0
 `define COUNTERS 32
-`define ZICOUNTERS_SUPPORTED 1
+`define ZICOUNTERS_SUPPORTED 0
 
 // Microarchitectural Features
 `define UARCH_PIPELINED 1
@@ -53,12 +53,12 @@
 `define MEM_DCACHE 1
 `define MEM_IROM 1
 `define MEM_ICACHE 1
-`define MEM_VIRTMEM 1
+`define MEM_VIRTMEM 0
 `define VECTORED_INTERRUPTS_SUPPORTED 1 
 
 // TLB configuration.  Entries should be a power of 2
-`define ITLB_ENTRIES 32
-`define DTLB_ENTRIES 32
+`define ITLB_ENTRIES 0
+`define DTLB_ENTRIES 0
 
 // Cache configuration.  Sizes should be a power of two
 // typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks
@@ -75,7 +75,7 @@
 `define DIV_BITSPERCYCLE 4
 
 // Legal number of PMP entries are 0, 16, or 64
-`define PMP_ENTRIES 16
+`define PMP_ENTRIES 0
 
 // Address space
 `define RESET_VECTOR 32'h80000000
diff --git a/wally-pipelined/regression/lint-wally b/wally-pipelined/regression/lint-wally
index 23f239c1a..5968cb4df 100755
--- a/wally-pipelined/regression/lint-wally
+++ b/wally-pipelined/regression/lint-wally
@@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/
 verilator=`which verilator`
 
 basepath=$(dirname $0)/..
-for config in rv64gc rv32gc; do
+for config in rv64gc rv32gc rv32ic; do
     echo "$config linting..."
     if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then
         echo "Exiting after $config lint due to errors or warnings"
diff --git a/wally-pipelined/regression/regression-wally.py b/wally-pipelined/regression/regression-wally.py
index 508dd3716..2c6850b62 100755
--- a/wally-pipelined/regression/regression-wally.py
+++ b/wally-pipelined/regression/regression-wally.py
@@ -15,7 +15,7 @@ import sys,os
 from collections import namedtuple
 regressionDir = os.path.dirname(os.path.abspath(__file__))
 os.chdir(regressionDir)
-TestCase = namedtuple("TestCase", ['name', 'cmd', 'grepstr'])
+TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
 # name:     the name of this test configuration (used in printing human-readable
 #           output and picking logfile names)
 # cmd:      the command to run to test (should include the logfile as '{}', and
@@ -28,6 +28,7 @@ TestCase = namedtuple("TestCase", ['name', 'cmd', 'grepstr'])
 configs = [
     TestCase(
         name="lints",
+        variant="all",
         cmd="./lint-wally &> {}",
         grepstr="All lints run with no errors or warnings"
     )
@@ -41,29 +42,40 @@ def getBuildrootTC(short):
     else:
         BRcmd="vsim > {} -c <<!\ndo wally-buildroot-batch.do 0 1 0\n!"
         BRgrepstr=str(MAX_EXPECTED)+" instructions"
-    return  TestCase(name="buildroot",cmd=BRcmd,grepstr=BRgrepstr)
+    return  TestCase(name="buildroot",variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)
 
 tc = TestCase(
       name="buildroot-checkpoint",
+      variant="rv6gc",
       cmd="vsim > {} -c <<!\ndo wally-buildroot-batch.do 400100000 400000001 400000000\n!",
       grepstr="400100000 instructions")
 configs.append(tc)
 
-tests64 = ["wally64i", "arch64i", "arch64priv", "arch64c",  "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64p", "imperas64mmu", "imperas64m", "imperas64a",  "imperas64c"] #,  "testsBP64"] 
-for test in tests64:
+tests64gc = ["arch64i", "arch64priv", "arch64c",  "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64p", "imperas64mmu", "imperas64m", "imperas64a",  "imperas64c"] # "wally64i", #,  "testsBP64"] 
+for test in tests64gc:
   tc = TestCase(
         name=test,
+        variant="rv64gc",
         cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do rv64gc "+test+"\n!",
         grepstr="All tests ran without failures")
   configs.append(tc)
-tests32 = ["wally32i", "arch32i", "arch32priv", "arch32c",  "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32p", "imperas32mmu", "imperas32m", "imperas32a",  "imperas32c"] 
-for test in tests32:
+tests32gc = ["arch32i", "arch32priv", "arch32c",  "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32p", "imperas32mmu", "imperas32m", "imperas32a",  "imperas32c"]  #"wally32i", 
+for test in tests32gc:
   tc = TestCase(
         name=test,
+        variant="rv32gc",
         cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do rv32gc "+test+"\n!",
         grepstr="All tests ran without failures")
   configs.append(tc)
 
+tests32ic = ["arch32i", "arch32c"] 
+for test in tests32ic:
+  tc = TestCase(
+        name=test,
+        variant="rv32ic",
+        cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do rv32ic "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)
 
 
 import os
@@ -76,16 +88,16 @@ def search_log_for_text(text, logfile):
 
 def run_test_case(config):
     """Run the given test case, and return 0 if the test suceeds and 1 if it fails"""
-    logname = "logs/wally_"+config.name+".log"
+    logname = "logs/"+config.variant+"_"+config.name+".log"
     cmd = config.cmd.format(logname)
     print(cmd)
     os.chdir(regressionDir)
     os.system(cmd)
     if search_log_for_text(config.grepstr, logname):
-        print("%s: Success" % config.name)
+        print("%s_%s: Success" % (config.variant, config.name))
         return 0
     else:
-        print("%s: Failures detected in output" % config.name)
+        print("%s_%s: Failures detected in output" % (config.variant, config.name))
         print("  Check %s" % logname)
         return 1
 
diff --git a/wally-pipelined/regression/sim-wally-batch b/wally-pipelined/regression/sim-wally-batch
index 6af36b4f7..1dd1e68d2 100755
--- a/wally-pipelined/regression/sim-wally-batch
+++ b/wally-pipelined/regression/sim-wally-batch
@@ -1,3 +1,3 @@
 vsim -c <<!
-do wally-pipelined-batch.do rv64gc imperas64periph
+do wally-pipelined-batch.do rv32ic arch32c
 !
diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv
index d2207be02..077997d17 100644
--- a/wally-pipelined/src/lsu/lsu.sv
+++ b/wally-pipelined/src/lsu/lsu.sv
@@ -310,7 +310,7 @@ module lsu
 	  assign CacheableM = 1;
 	  assign DTLBPageFaultM = 0;
 	  assign LoadAccessFaultM = 0;
-	  assign StoreMisalignedFaultM = 0;
+	  assign StoreAccessFaultM = 0;
 	  assign LoadMisalignedFaultM = 0;
 	  assign StoreMisalignedFaultM = 0;
 	end
diff --git a/wally-pipelined/src/mmu/pmpchecker.sv b/wally-pipelined/src/mmu/pmpchecker.sv
index 06cc9ea8b..90ee0994d 100644
--- a/wally-pipelined/src/mmu/pmpchecker.sv
+++ b/wally-pipelined/src/mmu/pmpchecker.sv
@@ -47,34 +47,33 @@ module pmpchecker (
   output logic             PMPStoreAccessFaultM
 );
 
+  generate
+    if (`PMP_ENTRIES > 0) begin: pmpchecker
+      // Bit i is high when the address falls in PMP region i
+      logic                    EnforcePMP;
+      logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges
+      logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address.
+      logic [`PMP_ENTRIES-1:0] Active;     // PMP register i is non-null
+      logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
+      logic [`PMP_ENTRIES-1:0]   PAgePMPAdr;  // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
+ 
+      pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0](
+        .PhysicalAddress, 
+        .PMPCfg(PMPCFG_ARRAY_REGW),
+        .PMPAdr(PMPADDR_ARRAY_REGW),
+        .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}),
+        .PAgePMPAdrOut(PAgePMPAdr),
+        .FirstMatch, .Match, .Active, .L, .X, .W, .R);
 
-  // Bit i is high when the address falls in PMP region i
-  logic                    EnforcePMP;
-//  logic [7:0]              PMPCfg[`PMP_ENTRIES-1:0];
-  logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges
-  logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address.
-  logic [`PMP_ENTRIES-1:0] Active;     // PMP register i is non-null
-  logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
-  logic [`PMP_ENTRIES-1:0]   PAgePMPAdr;  // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
-  genvar i,j;
+      priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches.
 
-  pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0](
-    .PhysicalAddress, 
-    .PMPCfg(PMPCFG_ARRAY_REGW),
-    .PMPAdr(PMPADDR_ARRAY_REGW),
-    .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}),
-    .PAgePMPAdrOut(PAgePMPAdr),
-    .FirstMatch, .Match, .Active, .L, .X, .W, .R);
-
-  priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches.
-
-  // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
-  assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; 
-
-  assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X;
-  assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM   && ~|W;
-  assign PMPLoadAccessFaultM  = EnforcePMP && ReadAccessM    && ~|R;
+      // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
+      assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; 
 
+      assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X;
+      assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM   && ~|W;
+      assign PMPLoadAccessFaultM  = EnforcePMP && ReadAccessM    && ~|R;
+    end
+  endgenerate
   //assign PMPSquashBusAccess = PMPInstrAccessFaultF | PMPLoadAccessFaultM | PMPStoreAccessFaultM;
-
 endmodule
diff --git a/wally-pipelined/src/sdc/SDC.sv b/wally-pipelined/src/sdc/SDC.sv
index 424d45771..732141152 100644
--- a/wally-pipelined/src/sdc/SDC.sv
+++ b/wally-pipelined/src/sdc/SDC.sv
@@ -79,7 +79,7 @@ module SDC
   
   logic 		    SDCDataValid;
   logic [`XLEN-1:0] 	    SDCReadData;
-    logic [`XLEN-1:0] 	    SDCReadDataPreNibbleSwap;
+  logic [`XLEN-1:0] 	    SDCReadDataPreNibbleSwap;
   logic [`XLEN-1:0] 	    SDCWriteData;
   logic 		    FatalError;
   
diff --git a/wally-pipelined/testbench/testbench.sv b/wally-pipelined/testbench/testbench.sv
index 417bf7811..16e0b3f77 100644
--- a/wally-pipelined/testbench/testbench.sv
+++ b/wally-pipelined/testbench/testbench.sv
@@ -76,7 +76,7 @@ logic [3:0] dummy;
   // pick tests based on modes supported
   initial begin
     $display("TEST is %s", TEST);
-    tests = '{};
+    //tests = '{};
     if (`XLEN == 64) begin // RV64
       case (TEST)
         "arch64i":                        tests = arch64i;
@@ -291,7 +291,15 @@ logic [3:0] dummy;
   // or sw	gp,-56(t0) for new Imperas tests
   // or sw gp, -56(t0) 
   // or on a jump to self infinite loop (6f) for RISC-V Arch tests
-  assign DCacheFlushStart = dut.hart.priv.priv.EcallFaultM && 
+  logic ecf; // remove this once we don't rely on old Imperas tests with Ecalls
+  generate
+    if (`ZICSR_SUPPORTED) begin
+      assign ecf = dut.hart.priv.priv.EcallFaultM;
+    end else begin
+      assign ecf = 0;
+    end
+  endgenerate
+  assign DCacheFlushStart = ecf && 
 			    (dut.hart.ieu.dp.regf.rf[3] == 1 || 
 			     (dut.hart.ieu.dp.regf.we3 && 
 			      dut.hart.ieu.dp.regf.a3 == 3 && 
@@ -330,12 +338,12 @@ module riscvassertions;
     assert (`ICACHE_WAYSIZEINBYTES <= 4096 || `MEM_ICACHE == 0 || `MEM_VIRTMEM == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
     assert (`ICACHE_BLOCKLENINBITS >= 32 || `MEM_ICACHE == 0) else $error("ICACHE_BLOCKLENINBITS must be at least 32 when caches are enabled");
     assert (`ICACHE_BLOCKLENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_BLOCKLENINBITS must be smaller than way size");
-    assert (2**$clog2(`DCACHE_BLOCKLENINBITS) == `DCACHE_BLOCKLENINBITS) else $error("DCACHE_BLOCKLENINBITS must be a power of 2");
-    assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
-    assert (2**$clog2(`ICACHE_BLOCKLENINBITS) == `ICACHE_BLOCKLENINBITS) else $error("ICACHE_BLOCKLENINBITS must be a power of 2");
-    assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
-    assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES) else $error("ITLB_ENTRIES must be a power of 2");
-    assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES) else $error("DTLB_ENTRIES must be a power of 2");
+    assert (2**$clog2(`DCACHE_BLOCKLENINBITS) == `DCACHE_BLOCKLENINBITS || `MEM_DCACHE==0) else $error("DCACHE_BLOCKLENINBITS must be a power of 2");
+    assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES || `MEM_DCACHE==0) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
+    assert (2**$clog2(`ICACHE_BLOCKLENINBITS) == `ICACHE_BLOCKLENINBITS || `MEM_ICACHE==0) else $error("ICACHE_BLOCKLENINBITS must be a power of 2");
+    assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES || `MEM_ICACHE==0) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
+    assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES || `MEM_VIRTMEM==0) else $error("ITLB_ENTRIES must be a power of 2");
+    assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES || `MEM_VIRTMEM==0) else $error("DTLB_ENTRIES must be a power of 2");
     assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF");
 	assert (`ZICSR_SUPPORTED == 1 || (`PMP_ENTRIES == 0 && `MEM_VIRTMEM == 0)) else $error("PMP_ENTRIES and MEM_VIRTMEM must be zero if ZICSR not supported.");
   end