Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2021-12-29 20:18:06 -06:00 · 2021-12-29 20:18:06 -06:00 · fd341eda04
commit fd341eda04
parent dd81076671 5ac170cb3a
11 changed files with 163 additions and 154 deletions
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@ -1 +1 @@
-Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
--- a/wally-pipelined/config/rv32ic/wally-config.vh
+++ b/wally-pipelined/config/rv32ic/wally-config.vh
@ -40,10 +40,10 @@
 `define IEEE754 0

 `define MISA (32'h00000104)
-`define ZICSR_SUPPORTED 1
-`define ZIFENCEI_SUPPORTED 1
+`define ZICSR_SUPPORTED 0
+`define ZIFENCEI_SUPPORTED 0
 `define COUNTERS 32
-`define ZICOUNTERS_SUPPORTED 1
+`define ZICOUNTERS_SUPPORTED 0

 // Microarchitectural Features
 `define UARCH_PIPELINED 1
@ -53,12 +53,12 @@
 `define MEM_DCACHE 1
 `define MEM_IROM 1
 `define MEM_ICACHE 1
-`define MEM_VIRTMEM 1
+`define MEM_VIRTMEM 0
 `define VECTORED_INTERRUPTS_SUPPORTED 1 

 // TLB configuration.  Entries should be a power of 2
-`define ITLB_ENTRIES 32
-`define DTLB_ENTRIES 32
+`define ITLB_ENTRIES 0
+`define DTLB_ENTRIES 0

 // Cache configuration.  Sizes should be a power of two
 // typical configuration 4 ways, 4096 bytes per way, 256 bit or more blocks
@ -75,7 +75,7 @@
 `define DIV_BITSPERCYCLE 4

 // Legal number of PMP entries are 0, 16, or 64
-`define PMP_ENTRIES 16
+`define PMP_ENTRIES 0

 // Address space
 `define RESET_VECTOR 32'h80000000
--- a/wally-pipelined/regression/lint-wally
+++ b/wally-pipelined/regression/lint-wally
@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/
 verilator=`which verilator`

 basepath=$(dirname $0)/..
-for config in rv64gc rv32gc; do
+for config in rv64gc rv32gc rv32ic; do
    echo "$config linting..."
    if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then
        echo "Exiting after $config lint due to errors or warnings"
--- a/wally-pipelined/regression/regression-wally.py
+++ b/wally-pipelined/regression/regression-wally.py
@ -15,7 +15,7 @@ import sys,os
 from collections import namedtuple
 regressionDir = os.path.dirname(os.path.abspath(__file__))
 os.chdir(regressionDir)
-TestCase = namedtuple("TestCase", ['name', 'cmd', 'grepstr'])
+TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
 # name:     the name of this test configuration (used in printing human-readable
 #           output and picking logfile names)
 # cmd:      the command to run to test (should include the logfile as '{}', and
@ -28,6 +28,7 @@ TestCase = namedtuple("TestCase", ['name', 'cmd', 'grepstr'])
 configs = [
    TestCase(
        name="lints",
+        variant="all",
        cmd="./lint-wally &> {}",
        grepstr="All lints run with no errors or warnings"
    )
@ -41,29 +42,40 @@ def getBuildrootTC(short):
    else:
        BRcmd="vsim > {} -c <<!\ndo wally-buildroot-batch.do 0 1 0\n!"
        BRgrepstr=str(MAX_EXPECTED)+" instructions"
-    return  TestCase(name="buildroot",cmd=BRcmd,grepstr=BRgrepstr)
+    return  TestCase(name="buildroot",variant="rv64gc",cmd=BRcmd,grepstr=BRgrepstr)

 tc = TestCase(
      name="buildroot-checkpoint",
+      variant="rv6gc",
      cmd="vsim > {} -c <<!\ndo wally-buildroot-batch.do 400100000 400000001 400000000\n!",
      grepstr="400100000 instructions")
 configs.append(tc)

-tests64 = ["wally64i", "arch64i", "arch64priv", "arch64c",  "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64p", "imperas64mmu", "imperas64m", "imperas64a",  "imperas64c"] #,  "testsBP64"] 
-for test in tests64:
+tests64gc = ["arch64i", "arch64priv", "arch64c",  "arch64m", "arch64d", "imperas64i", "imperas64f", "imperas64d", "imperas64p", "imperas64mmu", "imperas64m", "imperas64a",  "imperas64c"] # "wally64i", #,  "testsBP64"] 
+for test in tests64gc:
  tc = TestCase(
        name=test,
+        variant="rv64gc",
        cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do rv64gc "+test+"\n!",
        grepstr="All tests ran without failures")
  configs.append(tc)
-tests32 = ["wally32i", "arch32i", "arch32priv", "arch32c",  "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32p", "imperas32mmu", "imperas32m", "imperas32a",  "imperas32c"] 
-for test in tests32:
+tests32gc = ["arch32i", "arch32priv", "arch32c",  "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32p", "imperas32mmu", "imperas32m", "imperas32a",  "imperas32c"]  #"wally32i", 
+for test in tests32gc:
  tc = TestCase(
        name=test,
+        variant="rv32gc",
        cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do rv32gc "+test+"\n!",
        grepstr="All tests ran without failures")
  configs.append(tc)

+tests32ic = ["arch32i", "arch32c"] 
+for test in tests32ic:
+  tc = TestCase(
+        name=test,
+        variant="rv32ic",
+        cmd="vsim > {} -c <<!\ndo wally-pipelined-batch.do rv32ic "+test+"\n!",
+        grepstr="All tests ran without failures")
+  configs.append(tc)


 import os
@ -76,16 +88,16 @@ def search_log_for_text(text, logfile):

 def run_test_case(config):
    """Run the given test case, and return 0 if the test suceeds and 1 if it fails"""
-    logname = "logs/wally_"+config.name+".log"
+    logname = "logs/"+config.variant+"_"+config.name+".log"
    cmd = config.cmd.format(logname)
    print(cmd)
    os.chdir(regressionDir)
    os.system(cmd)
    if search_log_for_text(config.grepstr, logname):
-        print("%s: Success" % config.name)
+        print("%s_%s: Success" % (config.variant, config.name))
        return 0
    else:
-        print("%s: Failures detected in output" % config.name)
+        print("%s_%s: Failures detected in output" % (config.variant, config.name))
        print("  Check %s" % logname)
        return 1

--- a/wally-pipelined/regression/sim-wally-batch
+++ b/wally-pipelined/regression/sim-wally-batch
@ -1,3 +1,3 @@
 vsim -c <<!
-do wally-pipelined-batch.do rv64gc imperas64periph
+do wally-pipelined-batch.do rv32ic arch32c
 !
--- a/wally-pipelined/src/fpu/fcvt.sv
+++ b/wally-pipelined/src/fpu/fcvt.sv
@ -1,6 +1,7 @@

 `include "wally-config.vh"
 // `include "../../config/rv64icfd/wally-config.vh"
+//  `define XLEN 64
 module fcvt (
 	input logic             XSgnE,      // X's sign
    input logic [10:0]      XExpE,      // X's exponent
@ -59,7 +60,7 @@ module fcvt (
      //  fcvt.lu.d = 111
      //  fcvt.d.l  = 100
      //  fcvt.d.lu = 110
-      //  {long, unsigned, to int, from int}
+      //  {long, unsigned, to int}
   
    // calculate signals based off the input and output's size
    assign Res64 = (FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&~FOpCtrlE[0]);
@ -158,19 +159,24 @@ module fcvt (

    // select the integer result
    assign CvtIntRes = Of ? FOpCtrlE[1] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : 
-                    Uf ? FOpCtrlE[1] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
+                    Uf ? FOpCtrlE[1] ? {63'b0, Plus1&~XSgnE} : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} :
 		            Rounded[64-1:0];

    // select the floating point result            
    assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]};

    // select the result
-    assign CvtResE = ~FOpCtrlE[0] ? CvtFPRes : CvtIntRes;
+    assign CvtResE = FOpCtrlE[0] ? CvtIntRes : CvtFPRes;

    // calculate the flags
-    //      - to int only sets the invalid flag
-    //      - from int only sets the inexact flag
-    assign CvtFlgE = {(Of | Uf)&FOpCtrlE[0], 3'b0, (Guard|Round|Sticky)&~FOpCtrlE[0]};
+    //      - only set invalid flag for out-of-range vales if it isn't be indicated by the inexact
+    //      - don't set inexact flag if converting a really large number (closest __ bit integer value is the max value)
+    //      - don't set inexact flag if converting negitive or tiny number to unsigned (closest integer value is 0 or 1)
+    logic Invalid, Inexact;
+    assign Invalid = (Of | Uf)&FOpCtrlE[0];
+    assign Inexact = (Guard|Round|Sticky)&~((&FOpCtrlE[1:0]&Uf&~(Plus1&~XSgnE))|(FOpCtrlE[0]&Of));
+    assign CvtFlgE = {Invalid&~Inexact, 3'b0, Inexact};
+    // assign CvtFlgE = {(Of | Uf)&FOpCtrlE[0], 3'b0, (Guard|Round|Sticky)&~FOpCtrlE[0]};



--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@ -28,7 +28,6 @@
 // `define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
 // `define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
 // `define XLEN 64
-`define NANPAYLOAD 1
 module fma(
    input logic                 clk,
    input logic                 reset,
@ -117,7 +116,6 @@ module fma1(
    logic [3*`NF+5:0]   AlignedAddendE;     // Z aligned for addition in U(NF+5.2NF+1)
    logic [3*`NF+6:0]   AlignedAddendInv;   // aligned addend possibly inverted
    logic [2*`NF+1:0]   ProdManKilled;      // the product's mantissa possibly killed
-    logic [3*`NF+4:0]   NegProdManKilled;   // a negated ProdManKilled
    logic [3*`NF+6:0]   PreSum, NegPreSum;  // positive and negitve versions of the sum
    logic [`NE-1:0]     XExpVal, YExpVal;   // exponent value after taking into accound denormals
    ///////////////////////////////////////////////////////////////////////////////
@ -321,7 +319,6 @@ module add(
    output logic [3*`NF+6:0]    PreSum, NegPreSum// possibly negitive sum
 );

-    logic [3*`NF+4:0] NegProdManKilled;  // a negated ProdManKilled
    ///////////////////////////////////////////////////////////////////////////////
    // Addition
    ///////////////////////////////////////////////////////////////////////////////
@ -335,15 +332,13 @@ module add(
    assign AlignedAddendInv = InvZE ? {1'b1, ~AlignedAddendE} : {1'b0, AlignedAddendE};
    // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
    assign ProdManKilled = ProdManE&{2*`NF+2{~KillProdE}};
-    // Negate ProdMan for LZA and the negitive sum calculation
-    assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, ~ProdManKilled&{2*`NF+2{~(XZeroE|YZeroE|KillProdE)}}};



    // Do the addition
    //      - calculate a positive and negitive sum in parallel
    assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE};
-    assign NegPreSum = AlignedAddendE + {NegProdManKilled, 2'b0} + {{(3*`NF+3){1'b0}},~(XZeroE|YZeroE|KillProdE),2'b0};
+    assign NegPreSum = XZeroE|YZeroE|KillProdE ? {1'b0, AlignedAddendE} : {1'b0, AlignedAddendE} + {{`NF+3{1'b1}}, ~ProdManKilled, 2'b0} + {(3*`NF+7)'(4)};
     
    // Is the sum negitive
    assign NegSumE = PreSum[3*`NF+6];
@ -360,6 +355,8 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
    logic [3*`NF+6:0] T;
    logic [3*`NF+6:0] G;
    logic [3*`NF+6:0] Z;
+    logic [3*`NF+6:0] f;
+
    assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
    assign G[3*`NF+6:2*`NF+4] = 0;
    assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4];
@ -375,7 +372,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
    //      - note: the paper linked above uses the numbering system where 0 is the most significant bit
    //f[n] = ~T[n]&T[n-1]           note: n is the MSB
    //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1]))
-    logic [3*`NF+6:0] f;
    assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5];
    assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1}));

@ -440,11 +436,12 @@ module fma2(
    logic               SumZero;        // is the sum zero
    logic               ResultDenorm;   // is the result denormalized
    logic               Sticky, UfSticky;           // Sticky bit
-    logic               Plus1, Minus1, CalcPlus1;   // do you add or subtract one for rounding
+    logic               CalcPlus1;                  // do you add or subtract one for rounding
    logic               UfPlus1;                    // do you add one (for determining underflow flag)
    logic               Invalid,Underflow,Overflow; // flags
    logic               Guard, Round;   // bits needed to determine rounding
    logic               UfLSBNormSum;   // bits needed to determine rounding for underflow flag
+    logic [`FLEN:0]     RoundAdd;       // how much to add to the result
   
    

@ -471,7 +468,7 @@ module fma2(
    // round to nearest max magnitude

    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp,
-        .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfLSBNormSum);
+        .CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum);



@ -503,8 +500,8 @@ module fma2(
    ///////////////////////////////////////////////////////////////////////////////

    resultselect resultselect(.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
-        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, 
-        .ZSgnEffM, .PSgnM, .ResultSgn, .Minus1, .Plus1, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
+        .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
+        .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, 
        .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);

 // *** use NF where needed
@ -539,61 +536,6 @@ module resultsign(

 endmodule

-module resultselect(
-    input logic                 XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
-    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                 PSgnM,      // the product's sign
-    input logic                 ResultSgn,  // the result's sign
-    input logic                 Minus1, Plus1, CalcPlus1, // rounding bits
-    input logic                 Invalid, Overflow, Underflow,  // flags
-    input logic                 ResultDenorm,       // is the result denormalized
-    input logic     [`NE-1:0]   ResultExp,          // Result exponent
-    input logic     [`NF-1:0]   ResultFrac,         // Result fraction
-    output logic    [`FLEN-1:0] FMAResM     // FMA final result
-);
-    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
-
-    generate if(`NANPAYLOAD) begin
-        assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]};
-        assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]};
-        assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]};
-    end else begin
-        assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, 22'b0};
-        assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, 22'b0};
-        assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, 22'b0};
-    end
-    endgenerate
-    
-    
-    assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                          {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
-                                    ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
-                                                                                                                          {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
-    assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
-    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - {62'b0, (Minus1&AddendStickyM)} + {62'b0, (Plus1&AddendStickyM)}} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}};
-    assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-    assign FMAResM = XNaNM ? XNaNResult :
-                        YNaNM ? YNaNResult :
-                        ZNaNM ? ZNaNResult :
-                        Invalid ? InvalidResult :
-                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
-                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
-                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
-                        KillProdM ? KillProdResult :  
-			            Overflow ? OverflowResult :
-                        Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
-                        FmtM ? {ResultSgn, ResultExp, ResultFrac} :
-                               {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
-
-endmodule

 module normalize(
    input logic  [3*`NF+5:0]    SumM,       // the positive sum
@ -624,19 +566,6 @@ module normalize(
    // Normalization
    ///////////////////////////////////////////////////////////////////////////////

-
-    // logic [8:0] supposedNormCnt;
-    // logic [8:0] i;
-    // always_comb begin
-    //         i = 0;
-    //         while (~SumM[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
-    //         supposedNormCnt = i;    // compute shift count
-    // end
-
-    // always_comb begin
-    //     assert (NormCntM == supposedNormCnt | NormCntM == supposedNormCnt+1 | NormCntM == supposedNormCnt+2) else $fatal ("normcnt not expected");
-    // end
-
    // Determine if the sum is zero
    assign SumZero = ~(|SumM);

@ -644,18 +573,15 @@ module normalize(
    assign FracLen = FmtM ? `NF+1 : 13'd24;

    // calculate the sum's exponent
-    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); // ****try moving this into previous stage
-    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; // ***move this ^ the subtraction by a constant isn't simplified
+    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
+    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
    
    logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL;
    assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-    assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd1)));
+    assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd2)));
    assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127);
-    assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd24+13'd1023-13'd127)) | ~|SumExpTmpTmp;
-    assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; //***make sure math good
-    // always_comb begin
-    //     assert (PreResultDenorm == PreResultDenorm2) else $fatal ("PreResultDenorms not equal");
-    // end
+    assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd25+13'd1023-13'd127)) | ~|SumExpTmpTmp;
+    assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero;

    // 010. when should be 001.
    //      - shift left one
@ -667,9 +593,9 @@ module normalize(

    // Determine the shift needed for denormal results
    //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
+    assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1;
    // Normalize the sum
-    assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
+    assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
    // LZA correction
    assign LZAPlus1 = SumShifted[3*`NF+7];
    assign LZAPlus2 = SumShifted[3*`NF+8];
@ -699,18 +625,18 @@ module fmaround(
    input logic             InvZM,          // invert Z
    input logic  [`NE+1:0]  SumExp,         // exponent of the normalized sum
    input logic             ResultSgnTmp,      // the result's sign
-    output logic            CalcPlus1, Plus1, UfPlus1, Minus1,  // do you add or subtract on from the result
+    output logic            CalcPlus1, UfPlus1,  // do you add or subtract on from the result
    output logic [`NE+1:0]  FullResultExp,      // ResultExp with bits to determine sign and overflow
    output logic [`NF-1:0]  ResultFrac,         // Result fraction
    output logic [`NE-1:0]  ResultExp,          // Result exponent
    output logic            Sticky,             // sticky bit
+    output logic [`FLEN:0]  RoundAdd,           // how much to add to the result
    output logic            Round, Guard, UfLSBNormSum // bits needed to calculate rounding
 );
    logic           LSBNormSum;         // bit used for rounding - least significant bit of the normalized sum
    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
-    logic           UfGuard;            // gaurd bit used to caluculate underflow
-    logic           UfCalcPlus1, CalcMinus1;    // do you add or subtract on from the result
-    logic [`FLEN:0] RoundAdd;           // how much to add to the result
+    logic           UfGuard;            // guard bit used to caluculate underflow
+    logic           UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result
    logic [`NF-1:0] NormSumTruncated;   // the normalized sum trimed to fit the mantissa
    logic           UfRound;

@ -857,4 +783,62 @@ module fmaflags(
    //      - Don't set the underflow flag if the result was rounded up to a normal number
    assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};

+endmodule
+
+
+module resultselect(
+    input logic                 XSgnM, YSgnM,        // input signs
+    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
+    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic                 FmtM,       // precision 1 = double 0 = single
+    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                 PSgnM,      // the product's sign
+    input logic                 ResultSgn,  // the result's sign
+    input logic                 CalcPlus1,  // rounding bits
+    input logic     [`FLEN:0]   RoundAdd,   // how much to add to the result
+    input logic                 Invalid, Overflow, Underflow,  // flags
+    input logic                 ResultDenorm,       // is the result denormalized
+    input logic     [`NE-1:0]   ResultExp,          // Result exponent
+    input logic     [`NF-1:0]   ResultFrac,         // Result fraction
+    output logic    [`FLEN-1:0] FMAResM     // FMA final result
+);
+    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
+
+    generate if(`IEEE754) begin
+        assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]};
+        assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]};
+        assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]};
+    end else begin
+        assign XNaNResult = FmtM ? {1'b0, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, XExpM[7:0], 1'b1, 22'b0};
+        assign YNaNResult = FmtM ? {1'b0, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, YExpM[7:0], 1'b1, 22'b0};
+        assign ZNaNResult = FmtM ? {1'b0, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, ZExpM[7:0], 1'b1, 22'b0};
+    end
+    endgenerate
+    
+    
+    assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
+                                                                                                                          {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
+                                    ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
+                                                                                                                          {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
+    assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
+    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} + (RoundAdd[59:29]&{31{AddendStickyM}})};
+    assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
+    assign FMAResM = XNaNM ? XNaNResult :
+                        YNaNM ? YNaNResult :
+                        ZNaNM ? ZNaNResult :
+                        Invalid ? InvalidResult :
+                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
+                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
+                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
+                        KillProdM ? KillProdResult :  
+			            Overflow ? OverflowResult :
+                        Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
+                        FmtM ? {ResultSgn, ResultExp, ResultFrac} :
+                               {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
+
 endmodule
--- a/wally-pipelined/src/lsu/lsu.sv
+++ b/wally-pipelined/src/lsu/lsu.sv
@ -245,7 +245,7 @@ module lsu
 	  assign CacheableM = 1;
 	  assign DTLBPageFaultM = 0;
 	  assign LoadAccessFaultM = 0;
-	  assign StoreMisalignedFaultM = 0;
+	  assign StoreAccessFaultM = 0;
 	  assign LoadMisalignedFaultM = 0;
 	  assign StoreMisalignedFaultM = 0;
 	end
--- a/wally-pipelined/src/mmu/pmpchecker.sv
+++ b/wally-pipelined/src/mmu/pmpchecker.sv
@ -47,34 +47,33 @@ module pmpchecker (
  output logic             PMPStoreAccessFaultM
 );

+  generate
+    if (`PMP_ENTRIES > 0) begin: pmpchecker
+      // Bit i is high when the address falls in PMP region i
+      logic                    EnforcePMP;
+      logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges
+      logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address.
+      logic [`PMP_ENTRIES-1:0] Active;     // PMP register i is non-null
+      logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
+      logic [`PMP_ENTRIES-1:0]   PAgePMPAdr;  // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
+ 
+      pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0](
+        .PhysicalAddress, 
+        .PMPCfg(PMPCFG_ARRAY_REGW),
+        .PMPAdr(PMPADDR_ARRAY_REGW),
+        .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}),
+        .PAgePMPAdrOut(PAgePMPAdr),
+        .FirstMatch, .Match, .Active, .L, .X, .W, .R);

-  // Bit i is high when the address falls in PMP region i
-  logic                    EnforcePMP;
-//  logic [7:0]              PMPCfg[`PMP_ENTRIES-1:0];
-  logic [`PMP_ENTRIES-1:0] Match; // physical address matches one of the pmp ranges
-  logic [`PMP_ENTRIES-1:0] FirstMatch; // onehot encoding for the first pmpaddr to match the current address.
-  logic [`PMP_ENTRIES-1:0] Active;     // PMP register i is non-null
-  logic [`PMP_ENTRIES-1:0] L, X, W, R; // PMP matches and has flag set
-  logic [`PMP_ENTRIES-1:0]   PAgePMPAdr;  // for TOR PMP matching, PhysicalAddress > PMPAdr[i]
-  genvar i,j;
+      priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches.

-  pmpadrdec pmpadrdecs[`PMP_ENTRIES-1:0](
-    .PhysicalAddress, 
-    .PMPCfg(PMPCFG_ARRAY_REGW),
-    .PMPAdr(PMPADDR_ARRAY_REGW),
-    .PAgePMPAdrIn({PAgePMPAdr[`PMP_ENTRIES-2:0], 1'b1}),
-    .PAgePMPAdrOut(PAgePMPAdr),
-    .FirstMatch, .Match, .Active, .L, .X, .W, .R);
-
-  priorityonehot #(`PMP_ENTRIES) pmppriority(.a(Match), .y(FirstMatch)); // combine the match signal from all the adress decoders to find the first one that matches.
-
-  // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
-  assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; 
-
-  assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X;
-  assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM   && ~|W;
-  assign PMPLoadAccessFaultM  = EnforcePMP && ReadAccessM    && ~|R;
+      // Only enforce PMP checking for S and U modes when at least one PMP is active or in Machine mode when L bit is set in selected region
+      assign EnforcePMP = (PrivilegeModeW == `M_MODE) ? |L : |Active; 

+      assign PMPInstrAccessFaultF = EnforcePMP && ExecuteAccessF && ~|X;
+      assign PMPStoreAccessFaultM = EnforcePMP && WriteAccessM   && ~|W;
+      assign PMPLoadAccessFaultM  = EnforcePMP && ReadAccessM    && ~|R;
+    end
+  endgenerate
  //assign PMPSquashBusAccess = PMPInstrAccessFaultF | PMPLoadAccessFaultM | PMPStoreAccessFaultM;
-
 endmodule
--- a/wally-pipelined/src/sdc/SDC.sv
+++ b/wally-pipelined/src/sdc/SDC.sv
@ -79,7 +79,7 @@ module SDC
  
  logic 		    SDCDataValid;
  logic [`XLEN-1:0] 	    SDCReadData;
-    logic [`XLEN-1:0] 	    SDCReadDataPreNibbleSwap;
+  logic [`XLEN-1:0] 	    SDCReadDataPreNibbleSwap;
  logic [`XLEN-1:0] 	    SDCWriteData;
  logic 		    FatalError;
  
--- a/wally-pipelined/testbench/testbench.sv
+++ b/wally-pipelined/testbench/testbench.sv
@ -76,7 +76,7 @@ logic [3:0] dummy;
  // pick tests based on modes supported
  initial begin
    $display("TEST is %s", TEST);
-    tests = '{};
+    //tests = '{};
    if (`XLEN == 64) begin // RV64
      case (TEST)
        "arch64i":                        tests = arch64i;
@ -291,7 +291,15 @@ logic [3:0] dummy;
  // or sw	gp,-56(t0) for new Imperas tests
  // or sw gp, -56(t0) 
  // or on a jump to self infinite loop (6f) for RISC-V Arch tests
-  assign DCacheFlushStart = dut.hart.priv.priv.EcallFaultM && 
+  logic ecf; // remove this once we don't rely on old Imperas tests with Ecalls
+  generate
+    if (`ZICSR_SUPPORTED) begin
+      assign ecf = dut.hart.priv.priv.EcallFaultM;
+    end else begin
+      assign ecf = 0;
+    end
+  endgenerate
+  assign DCacheFlushStart = ecf && 
 			    (dut.hart.ieu.dp.regf.rf[3] == 1 || 
 			     (dut.hart.ieu.dp.regf.we3 && 
 			      dut.hart.ieu.dp.regf.a3 == 3 && 
@ -330,12 +338,12 @@ module riscvassertions;
    assert (`ICACHE_WAYSIZEINBYTES <= 4096 || `MEM_ICACHE == 0 || `MEM_VIRTMEM == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
    assert (`ICACHE_BLOCKLENINBITS >= 32 || `MEM_ICACHE == 0) else $error("ICACHE_BLOCKLENINBITS must be at least 32 when caches are enabled");
    assert (`ICACHE_BLOCKLENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_BLOCKLENINBITS must be smaller than way size");
-    assert (2**$clog2(`DCACHE_BLOCKLENINBITS) == `DCACHE_BLOCKLENINBITS) else $error("DCACHE_BLOCKLENINBITS must be a power of 2");
-    assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
-    assert (2**$clog2(`ICACHE_BLOCKLENINBITS) == `ICACHE_BLOCKLENINBITS) else $error("ICACHE_BLOCKLENINBITS must be a power of 2");
-    assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
-    assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES) else $error("ITLB_ENTRIES must be a power of 2");
-    assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES) else $error("DTLB_ENTRIES must be a power of 2");
+    assert (2**$clog2(`DCACHE_BLOCKLENINBITS) == `DCACHE_BLOCKLENINBITS || `MEM_DCACHE==0) else $error("DCACHE_BLOCKLENINBITS must be a power of 2");
+    assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES || `MEM_DCACHE==0) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
+    assert (2**$clog2(`ICACHE_BLOCKLENINBITS) == `ICACHE_BLOCKLENINBITS || `MEM_ICACHE==0) else $error("ICACHE_BLOCKLENINBITS must be a power of 2");
+    assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES || `MEM_ICACHE==0) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
+    assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES || `MEM_VIRTMEM==0) else $error("ITLB_ENTRIES must be a power of 2");
+    assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES || `MEM_VIRTMEM==0) else $error("DTLB_ENTRIES must be a power of 2");
    assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF");
 	assert (`ZICSR_SUPPORTED == 1 || (`PMP_ENTRIES == 0 && `MEM_VIRTMEM == 0)) else $error("PMP_ENTRIES and MEM_VIRTMEM must be zero if ZICSR not supported.");
  end