Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally into main

2021-12-08 14:12:09 -08:00 · 2021-12-08 14:12:09 -08:00 · f8cffca2b2
commit f8cffca2b2
parent 5feccaec68 f1ea52cb2d
14 changed files with 308 additions and 107 deletions
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@ -1 +1 @@
-Subproject commit 84d043817f75f752c9873326475e11f16e3a6f7c
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
--- a/benchmarks/riscv-coremark/Makefile
+++ b/benchmarks/riscv-coremark/Makefile
@ -14,10 +14,11 @@ work/coremark.bare.riscv.objdump: work/coremark.bare.riscv

 work/coremark.bare.riscv: $(sources)
 #	make -C $(cmbase) PORT_DIR=/home/harris/riscv-wally/benchmarks/riscv-coremark/riscv64-baremetal compile RISCV=/courses/e190ax/riscvcompiler XCFLAGS="-march=rv64g"
-	make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/courses/e190ax/riscvcompiler XCFLAGS="-march=rv64im"
-	mv $(cmbase)/coremark.bare.riscv work
+	make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv XCFLAGS="-march=rv64imd"
+	mkdir -p work/
+	mv $(cmbase)/coremark.bare.riscv work/

 .PHONY: clean

 clean:
-	rm -f work/*
+	rm -f work/*
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@ -1,5 +1,10 @@

-`include "../../../config/rv64icfd/wally-config.vh"
+//`include "../../../config/old/rv64icfd/wally-config.vh"
+
+`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
+`define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
+`define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
+`define XLEN 64
 module testbench3();

 logic [31:0] errors=0;
@ -174,8 +179,9 @@ always @(posedge clk)
 // check results on falling edge of clk
  always @(negedge clk) begin
 
-  //  fp = $fopen("/home/kparry/riscv-wally/wally-pipelined/src/fpu/FMA/tbgen/results.dat","w");
 	if((FmtE==1'b1) & (FMAFlgM != flags[4:0] || (!wnan && (FMAResM != ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] == {XExpE,1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] == {YExpE,1'b1,Y[`NF-2:0]}))  || (ZNaNE && (FMAResM[`FLEN-2:0] == {ZExpE,1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] == ans[`FLEN-2:0]))))) begin
+  //  fp = $fopen("/home/kparry/riscv-wally/wally-pipelined/src/fpu/FMA/tbgen/results.dat","w");
+	// if((FmtE==1'b1) & (FMAFlgM != flags[4:0] || (FMAResM != ans))) begin
        $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
 		if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
 		if(XDenormE) $display( "xdenorm ");
@ -193,7 +199,7 @@ always @(posedge clk)
 		if(ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] != 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
 		if(ans[`FLEN-2:`NF] == {`NE{1'b1}} && ans[`NF-1:0] != 0 && ans[`NF-1]) $display( "ans=qutNaN ");
        errors = errors + 1;
-
+	  //if (errors == 10)
 		$stop;
    end
    if((FmtE==1'b0)&(FMAFlgM != flags[4:0] || (!wnan && (FMAResM != ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[30:0] == {X[30:23],1'b1,X[21:0]})) || (YNaNE && (FMAResM[30:0] == {Y[30:23],1'b1,Y[21:0]}))  || (ZNaNE && (FMAResM[30:0] == {Z[30:23],1'b1,Z[21:0]})) || (FMAResM[30:0] == ans[30:0]))) ))) begin
--- a/wally-pipelined/regression/regression-wally.py
+++ b/wally-pipelined/regression/regression-wally.py
@ -34,7 +34,7 @@ configs = [
 ]
 def getBuildrootTC(short):
    INSTR_LIMIT = 100000 # multiple of 100000
-    MAX_EXPECTED = 182000000
+    MAX_EXPECTED = 246000000
    if short:
        BRcmd="vsim > {} -c <<!\ndo wally-buildroot-batch.do "+str(INSTR_LIMIT)+" 1 0\n!"
        BRgrepstr=str(INSTR_LIMIT)+" instructions"
@ -80,6 +80,7 @@ def run_test_case(config):
    logname = "logs/wally_"+config.name+".log"
    cmd = config.cmd.format(logname)
    print(cmd)
+    os.chdir(regressionDir)
    os.system(cmd)
    if search_log_for_text(config.grepstr, logname):
        print("%s: Success" % config.name)
@ -93,11 +94,13 @@ def main():
    """Run the tests and count the failures"""
    global configs
    try:
+        os.chdir(regressionDir)
        os.mkdir("logs")
    except:
        pass

    if '-makeTests' in sys.argv:
+        os.chdir(regressionDir)
        os.system('./make-tests.sh | tee ./logs/make-tests.log')

    if '-all' in sys.argv:
--- a/wally-pipelined/regression/wally-coremark.do
+++ b/wally-pipelined/regression/wally-coremark.do
@ -35,7 +35,7 @@ vlog +incdir+../config/coremark_bare +incdir+../config/shared ../testbench/testb
 vopt +acc work.testbench -o workopt 
 vsim workopt

-mem load -startaddress 268435456 -endaddress 268566527 -filltype value -fillradix hex -filldata 0 /testbench/dut/uncore/dtim/RAM
+mem load -startaddress 268435456 -endaddress 268566527 -filltype value -fillradix hex -filldata 0 /testbench/dut/uncore/dtim/dtim/RAM

 view wave

--- a/wally-pipelined/src/fpu/fcmp.sv
+++ b/wally-pipelined/src/fpu/fcmp.sv
@ -76,7 +76,12 @@ module fcmp (

   // Determine final values based on output of magnitude comparison, 
   // sign bits, and special case testing. 
-   exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(XNaNE), .BNaN(YNaNE), .Azero(XZeroE), .Bzero(YZeroE), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .FSrcXE, .FSrcYE, .*);
+   exception_cmp_2 exc2 (
+      .invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), 
+      .ANaN(XNaNE), .BNaN(YNaNE), .Azero(XZeroE), .Bzero(YZeroE), 
+      .FOpCtrlE, .A(op1), .B(op2), .FSrcXE, .FSrcYE, 
+      .FmtE, .CmpResE
+      );

 endmodule // fpcomp

--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@ -23,8 +23,11 @@
 ///////////////////////////////////////////

 `include "wally-config.vh"
-//    `include "../../../config/rv64icfd/wally-config.vh"

+// `define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
+// `define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
+// `define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
+// `define XLEN 64
 module fma(
    input logic                 clk,
    input logic                 reset,
@ -113,7 +116,7 @@ module fma1(
    logic [3*`NF+5:0]   AlignedAddendE;     // Z aligned for addition in U(NF+5.2NF+1)
    logic [3*`NF+6:0]   AlignedAddendInv;   // aligned addend possibly inverted
    logic [2*`NF+1:0]   ProdManKilled;      // the product's mantissa possibly killed
-    logic [3*`NF+6:0]   NegProdManKilled;   // a negated ProdManKilled
+    logic [3*`NF+4:0]   NegProdManKilled;   // a negated ProdManKilled
    logic [8:0]         PNormCnt, NNormCnt; // the positive and nagitive LOA results
    logic [3*`NF+6:0]   PreSum, NegPreSum;  // positive and negitve versions of the sum

@ -149,11 +152,11 @@ module fma1(
        
    add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
    
-    loa loa(.AlignedAddendE, .AlignedAddendInv, .ProdManKilled, .NegProdManKilled, .PNormCnt, .NNormCnt);
+    loa loa(.A(AlignedAddendInv+{162'b0,InvZE}), .P(ProdManKilled), .NegSumE, .NormCntE);

    // Choose the positive sum and accompanying LZA result.
    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
-    assign NormCntE = NegSumE ? NNormCnt : PNormCnt;
+    // assign NormCntE = NegSumE ? NNormCnt : PNormCnt;


 endmodule
@ -311,7 +314,7 @@ module add(
    input logic                 XZeroE, YZeroE, // is the input zero
    output logic [3*`NF+6:0] AlignedAddendInv,  // aligned addend possibly inverted
    output logic [2*`NF+1:0] ProdManKilled,     // the product's mantissa possibly killed
-    output logic [3*`NF+6:0] NegProdManKilled,  // a negated ProdManKilled
+    output logic [3*`NF+4:0] NegProdManKilled,  // a negated ProdManKilled
    output logic                NegSumE,        // was the sum negitive
    output logic                InvZE,          // do you invert Z
    output logic [3*`NF+6:0]   PreSum, NegPreSum// possibly negitive sum
@ -327,99 +330,65 @@ module add(
    assign InvZE = ZSgnEffE ^ PSgnE;

    // Choose an inverted or non-inverted addend - the one has to be added now for the LZA
-    assign AlignedAddendInv = InvZE ? -{1'b0, AlignedAddendE} : {1'b0, AlignedAddendE};
+    assign AlignedAddendInv = InvZE ? {1'b1, ~AlignedAddendE} : {1'b0, AlignedAddendE};
    // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
    assign ProdManKilled = ProdManE&{2*`NF+2{~KillProdE}};
    // Negate ProdMan for LZA and the negitive sum calculation
-    assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, -ProdManKilled, 2'b0};
+    assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, ~ProdManKilled&{2*`NF+2{~(XZeroE|YZeroE)}}};


+    // Is the sum negitive
+    assign NegSumE = (AlignedAddendE > {54'b0, ProdManKilled, 2'b0})&InvZE; //***use this to avoid addition and final muxing???

    // Do the addition
    //      - calculate a positive and negitive sum in parallel
-    assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0};
-    assign NegPreSum = AlignedAddendE + NegProdManKilled;
+    assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE};
+    assign NegPreSum = AlignedAddendE + {NegProdManKilled, 2'b0} + {{(3*`NF+3){1'b0}},~(XZeroE|YZeroE),2'b0};
     
-    // Is the sum negitive
-    assign NegSumE = PreSum[3*`NF+6];

 endmodule


-module loa(
-    input logic [3*`NF+5:0] AlignedAddendE,     // Z aligned for addition in U(NF+5.2NF+1)
-    input logic [3*`NF+6:0] AlignedAddendInv,   // aligned addend possibly inverted
-    input logic [2*`NF+1:0] ProdManKilled,      // the product's mantissa possibly killed
-    input logic [3*`NF+6:0] NegProdManKilled,   // a negated ProdManKilled
-    output logic [8:0]      PNormCnt, NNormCnt  // positive and negitive LOA result    
-);
-
-    // LZAs one for the positive result and one for the negitive
-    //      - the +1 from inverting causes problems for normalization
-    posloa posloa(AlignedAddendInv, ProdManKilled, PNormCnt);
-    negloa negloa({1'b0,AlignedAddendE}, NegProdManKilled, NNormCnt);
-
-endmodule
-
-
-module posloa(
+module loa( //https://ieeexplore.ieee.org/abstract/document/930098
    input logic  [3*`NF+6:0] A,     // addend
    input logic  [2*`NF+1:0] P,     // product
-    output logic [8:0]       PCnt   // normalization shift count for the positive result
+    input logic              NegSumE, // is the sum negitive
+    output logic [8:0]       NormCntE   // normalization shift count for the positive result
    ); 
    

-    // calculate the propagate (T) and kill (Z) bits
    logic [3*`NF+6:0] T;
+    logic [3*`NF+5:0] G;
    logic [3*`NF+5:0] Z;
    assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
-    assign Z[3*`NF+5:2*`NF+4] = A[3*`NF+5:2*`NF+4];
+    assign G[3*`NF+5:2*`NF+4] = 0;
+    assign Z[3*`NF+5:2*`NF+4] = ~A[3*`NF+5:2*`NF+4];
    assign T[2*`NF+3:2] = A[2*`NF+3:2]^P;
-    assign Z[2*`NF+3:2] = A[2*`NF+3:2]|P;
+    assign G[2*`NF+3:2] = A[2*`NF+3:2]&P;
+    assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P;
    assign T[1:0] = A[1:0];
-    assign Z[1:0] = A[1:0];
+    assign G[1:0] = 0;
+    assign Z[1:0] = ~A[1:0];
    

    // Apply function to determine Leading pattern
    logic [3*`NF+6:0] f;
-    assign f = T^{Z[3*`NF+5:0], 1'b0};
+    assign f = NegSumE ? T^{~G[3*`NF+5:0],1'b1} : T^{~Z[3*`NF+5:0], 1'b1};

-    lzc lzc(.f, .Cnt(PCnt));
+    lzc lzc(.f, .NormCntE);
  
 endmodule

-module negloa(
-    input logic  [3*`NF+6:0]    A,      // addend
-    input logic  [3*`NF+6:0]    P,      // product
-    output logic [8:0]          NCnt    // normalization shift count for the negitive result
-    ); 
-    
-    // calculate the propagate (T) and kill (Z) bits
-    logic [3*`NF+6:0] T;
-    logic [3*`NF+5:0] Z;
-    assign T = A^P;
-    assign Z = ~(A[3*`NF+5:0]|P[3*`NF+5:0]);
-    
-
-    // Apply function to determine Leading pattern
-    logic [3*`NF+6:0] f;
-    assign f = T^{~Z, 1'b0};
-    
-    lzc lzc(.f, .Cnt(NCnt));
-  
-endmodule
-
-
 module lzc(
    input logic  [3*`NF+6:0]    f,
-    output logic [8:0]          Cnt    // normalization shift count for the negitive result
+    output logic [8:0]          NormCntE    // normalization shift
 );
    
    logic [8:0] i;
    always_comb begin
        i = 0;
        while (~f[3*`NF+6-i] && $unsigned(i) <= $unsigned(9'd3*9'd`NF+9'd6)) i = i+1;  // search for leading one
-        Cnt = i;
+        NormCntE = i;
    end
 endmodule

@ -479,7 +448,7 @@ module fma2(
    // Normalization
    ///////////////////////////////////////////////////////////////////////////////

-    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
+    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum, .NegSumM,
            .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);


@ -611,6 +580,80 @@ module resultselect(
 endmodule


+// module normalize(
+//     input logic  [3*`NF+5:0]    SumM,       // the positive sum
+//     input logic  [`NE-1:0]      ZExpM,      // exponent of Z
+//     input logic  [`NE+1:0]      ProdExpM,   // X exponent + Y exponent - bias
+//     input logic  [8:0]          NormCntM,   // normalization shift count
+//     input logic                 FmtM,       // precision 1 = double 0 = single
+//     input logic                 KillProdM,  // is the product set to zero
+//     input logic                 AddendStickyM,  // the sticky bit caclulated from the aligned addend
+//     input logic                 NegSumM,    // was the sum negitive
+//     output logic [`NF+2:0]      NormSum,        // normalized sum
+//     output logic                SumZero,        // is the sum zero
+//     output logic                NormSumSticky, UfSticky,    // sticky bits
+//     output logic [`NE+1:0]      SumExp,         // exponent of the normalized sum
+//     output logic                ResultDenorm    // is the result denormalized
+// );
+//     logic [`NE+1:0]     FracLen;            // length of the fraction
+//     logic [`NE+1:0]     SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
+//     logic [8:0]         DenormShift;        // right shift if the result is denormalized //***change this later
+//     logic [3*`NF+5:0]   CorrSumShifted;     // the shifted sum after LZA correction
+//     logic [3*`NF+7:0]   SumShifted;         // the shifted sum before LZA correction
+//     logic [`NE+1:0]     SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
+//     logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
+//     logic               PreResultDenorm2;    // is the result denormalized - calculated before LZA corection
+//     logic               LZAPlus1;           // add one to the sum's exponent due to LZA correction
+
+//     ///////////////////////////////////////////////////////////////////////////////
+//     // Normalization
+//     ///////////////////////////////////////////////////////////////////////////////
+
+//     // Determine if the sum is zero
+//     assign SumZero = ~(|SumM);
+
+//     // determine the length of the fraction based on precision
+//     assign FracLen = FmtM ? `NF+1 : 13'd24;
+
+//     // calculate the sum's exponent
+//     assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); // ****try moving this into previous stage
+//     assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; // ***move this ^ the subtraction by a constant isn't simplified
+    
+//     logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL;
+//     assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
+//     assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd1)));
+//     assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127);
+//     assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd24+13'd1023-13'd127)) | ~|SumExpTmpTmp;
+//     assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; //***make sure math good
+//     // always_comb begin
+//     //     assert (PreResultDenorm == PreResultDenorm2) else $fatal ("PreResultDenorms not equal");
+//     // end
+
+
+
+//     // Determine if the result is denormal
+//     // assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
+
+//     // Determine the shift needed for denormal results
+//     //  - if not denorm add 1 to shift out the leading 1
+//     assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
+//     // Normalize the sum
+//     assign SumShifted = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
+//     // LZA correction
+//     assign LZAPlus1 = SumShifted[3*`NF+7];
+//     assign CorrSumShifted =  LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
+//     assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
+//     // Calculate the sticky bit
+//     assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM);
+//     assign UfSticky = AddendStickyM | NormSumSticky;
+
+//     // Determine sum's exponent
+//     assign SumExp = (SumExpTmp+{12'b0, LZAPlus1}+{12'b0, ~|SumExpTmp&SumShifted[3*`NF+6]}) & {`NE+2{~(SumZero|ResultDenorm)}};
+//     // recalculate if the result is denormalized
+//     assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
+
+// endmodule
+
 module normalize(
    input logic  [3*`NF+5:0]    SumM,       // the positive sum
    input logic  [`NE-1:0]      ZExpM,      // exponent of Z
@ -619,6 +662,7 @@ module normalize(
    input logic                 FmtM,       // precision 1 = double 0 = single
    input logic                 KillProdM,  // is the product set to zero
    input logic                 AddendStickyM,  // the sticky bit caclulated from the aligned addend
+    input logic                 NegSumM,    // was the sum negitive
    output logic [`NF+2:0]      NormSum,        // normalized sum
    output logic                SumZero,        // is the sum zero
    output logic                NormSumSticky, UfSticky,    // sticky bits
@ -629,15 +673,29 @@ module normalize(
    logic [`NE+1:0]     SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
    logic [8:0]         DenormShift;        // right shift if the result is denormalized //***change this later
    logic [3*`NF+5:0]   CorrSumShifted;     // the shifted sum after LZA correction
-    logic [3*`NF+7:0]   SumShifted;         // the shifted sum before LZA correction
+    logic [3*`NF+8:0]   SumShifted;         // the shifted sum before LZA correction
    logic [`NE+1:0]     SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
    logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
-    logic               LZAPlus1;           // add one to the sum's exponent due to LZA correction
+    logic               PreResultDenorm2;   // is the result denormalized - calculated before LZA corection
+    logic               LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction

    ///////////////////////////////////////////////////////////////////////////////
    // Normalization
    ///////////////////////////////////////////////////////////////////////////////

+
+    // logic [8:0] supposedNormCnt;
+    // logic [8:0] i;
+    // always_comb begin
+    //         i = 0;
+    //         while (~SumM[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
+    //         supposedNormCnt = i;    // compute shift count
+    // end
+
+    // always_comb begin
+    //     assert (NormCntM == supposedNormCnt | NormCntM == supposedNormCnt+1 | NormCntM == supposedNormCnt+2) else $fatal ("normcnt not expected");
+    // end
+
    // Determine if the sum is zero
    assign SumZero = ~(|SumM);

@ -645,19 +703,36 @@ module normalize(
    assign FracLen = FmtM ? `NF+1 : 13'd24;

    // calculate the sum's exponent
-    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
-    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
+    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); // ****try moving this into previous stage
+    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; // ***move this ^ the subtraction by a constant isn't simplified
+    
+    logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL;
+    assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
+    assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd1)));
+    assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127);
+    assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd24+13'd1023-13'd127)) | ~|SumExpTmpTmp;
+    assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; //***make sure math good
+    // always_comb begin
+    //     assert (PreResultDenorm == PreResultDenorm2) else $fatal ("PreResultDenorms not equal");
+    // end
+
+    // 010. when should be 001.
+    //      - shift left one
+    //      - add one from exp
+    //      - if kill prod dont add to exp

    // Determine if the result is denormal
-    assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
+    // assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;

    // Determine the shift needed for denormal results
    //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = PreResultDenorm ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
+    assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
    // Normalize the sum
-    assign SumShifted = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
+    assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
    // LZA correction
    assign LZAPlus1 = SumShifted[3*`NF+7];
+    assign LZAPlus2 = SumShifted[3*`NF+8];
+	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
    assign CorrSumShifted =  LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
    assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
    // Calculate the sticky bit
@ -665,9 +740,10 @@ module normalize(
    assign UfSticky = AddendStickyM | NormSumSticky;

    // Determine sum's exponent
-    assign SumExp = (SumExpTmp+{12'b0, LZAPlus1}+{12'b0, ~|SumExpTmp&SumShifted[3*`NF+6]}) & {`NE+2{~(SumZero|ResultDenorm)}};
+    //                          if plus1                     If plus2                                      if said denorm but norm plus 1     if said denorm (-1 val) but norm plus 2
+    assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~|SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}+{11'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM, 1'b0}) & {`NE+2{~(SumZero|ResultDenorm)}};
    // recalculate if the result is denormalized
-    assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
+    assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];

 endmodule

--- a/wally-pipelined/src/ieu/datapath.sv
+++ b/wally-pipelined/src/ieu/datapath.sv
@ -96,7 +96,7 @@ module datapath (
  //Mux for writting floating point 
  
  regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D);
-  extend ext(.InstrD(InstrD[31:7]), .*);
+  extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ExtImmD);
 
  // Execute stage pipeline register and logic
  flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, RD1D, RD1E);
--- a/wally-pipelined/src/ieu/ieu.sv
+++ b/wally-pipelined/src/ieu/ieu.sv
@ -90,9 +90,69 @@ module ieu (
  logic             MemReadE, CSRReadE;
  logic             JumpE;
           
-  controller c(.*);
-  datapath   dp(.*);             
-  forward    fw(.*);
+  controller c(
+    .clk, .reset,
+    // Decode stage control signals
+    .StallD, .FlushD, .InstrD, .ImmSrcD,
+    .IllegalIEUInstrFaultD, .IllegalBaseInstrFaultD,
+    // Execute stage control signals
+    .StallE, .FlushE, .FlagsE, 
+    .PCSrcE,        // for datapath and Hazard Unit
+    .ALUControlE, .ALUSrcAE, .ALUSrcBE,
+    .TargetSrcE,
+    .MemReadE, .CSRReadE, // for Hazard Unit
+    .Funct3E, .MulDivE, .W64E,
+    .JumpE,	
+    // Memory stage control signals
+    .StallM, .FlushM, .MemRWM,
+    .CSRReadM, .CSRWriteM, .PrivilegedM,
+    .SCE, .AtomicE, .AtomicM, .Funct3M,
+    .RegWriteM,     // for Hazard Unit
+    .InvalidateICacheM, .FlushDCacheM, .InstrValidM, 
+    // Writeback stage control signals
+    .StallW, .FlushW,
+    .RegWriteW,     // for datapath and Hazard Unit
+    .ResultSrcW,
+    // Stall during CSRs
+    .CSRWritePendingDEM,
+    .StoreStallD
+  );
+
+  datapath   dp(
+    .clk, .reset,
+    // Decode stage signals
+    .ImmSrcD, .InstrD,
+    // Execute stage signals
+    .StallE, .FlushE, .ForwardAE, .ForwardBE,
+    .ALUControlE, .ALUSrcAE, .ALUSrcBE,
+    .TargetSrcE, .JumpE, .IllegalFPUInstrE,
+    .FWriteDataE, .PCE, .PCLinkE, .FlagsE,
+    .PCTargetE,
+    .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
+    .SrcAE, .SrcBE,
+    // Memory stage signals
+    .StallM, .FlushM, .FWriteIntM, .FIntResM, 
+    .SrcAM, .WriteDataM, .MemAdrM, .MemAdrE,
+    // Writeback stage signals
+    .StallW, .FlushW, .FWriteIntW, .RegWriteW, 
+    .SquashSCW, .ResultSrcW, .ReadDataW,
+    // input  logic [`XLEN-1:0] PCLinkW,
+    .CSRReadValW, .ReadDataM, .MulDivResultW, 
+    // Hazard Unit signals 
+    .Rs1D, .Rs2D, .Rs1E, .Rs2E,
+    .RdE, .RdM, .RdW 
+  );             
+  
+  forward    fw(
+    .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,
+    .MemReadE, .MulDivE, .CSRReadE,
+    .RegWriteM, .RegWriteW,
+    .FWriteIntE, .FWriteIntM, .FWriteIntW,
+    .SCE,
+    // Forwarding controls
+    .ForwardAE, .ForwardBE,
+    .FPUStallD, .LoadStallD, .MulDivStallD, .CSRRdStallD
+    );

 endmodule

--- a/wally-pipelined/src/lsu/lsu.sv
+++ b/wally-pipelined/src/lsu/lsu.sv
@ -187,7 +187,9 @@ module lsu
 		 .DCacheStall(DCacheStall));
  
  mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0))
-  dmmu(.PAdr(MemPAdrMtoDCache),
+  dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
+       .PrivilegeModeW, .DisableTranslation(DisableTranslation),
+       .PAdr(MemPAdrMtoDCache),
       .VAdr(MemAdrM),
       .Size(Funct3MtoDCache[1:0]),
       .PTE(PTE),
@ -196,18 +198,16 @@ module lsu
       .TLBFlush(DTLBFlushM),
       .PhysicalAddress(MemPAdrM),
       .TLBMiss(DTLBMissM),
-       .TLBPageFault(DTLBPageFaultM),
-       .ExecuteAccessF(1'b0),
-       //.AtomicAccessM(AtomicMaskedM[1]),
-       .AtomicAccessM(1'b0),
-       .WriteAccessM(MemRWMtoLRSC[0]),
-       .ReadAccessM(MemRWMtoLRSC[1]),
-       .DisableTranslation(DisableTranslation),
-       .InstrAccessFaultF(),
       .Cacheable(CacheableM),
       .Idempotent(),
       .AtomicAllowed(),
-       .*); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist?
+       .TLBPageFault(DTLBPageFaultM),
+       .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM,
+       .AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), 
+       .WriteAccessM(MemRWMtoLRSC[0]), .ReadAccessM(MemRWMtoLRSC[1]),
+       .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW
+       //.AtomicAccessM(AtomicMaskedM[1]),
+       ); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist?


  assign MemReadM = MemRWMtoLRSC[1] & ~(ExceptionM | PendingInterruptMtoDCache) & ~DTLBMissM; // & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED;
--- a/wally-pipelined/src/sdc/SDCcounter.sv
+++ b/wally-pipelined/src/sdc/SDCcounter.sv
--- a/wally-pipelined/src/uncore/dtim.sv
+++ b/wally-pipelined/src/uncore/dtim.sv
@ -25,7 +25,7 @@

 `include "wally-config.vh"

-module dtim #(parameter BASE=0, RANGE = 65535, string PRELOAD="") (
+module dtim #(parameter BASE=0, RANGE = 65535) (
  input  logic             HCLK, HRESETn, 
  input  logic             HSELTim,
  input  logic [31:0]      HADDR,
--- a/wally-pipelined/src/uncore/uncore.sv
+++ b/wally-pipelined/src/uncore/uncore.sv
@ -85,40 +85,85 @@ module uncore (
  assign {HSELEXT, HSELBootTim, HSELTim, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELSDC} = HSELRegions[7:0];

  // subword accesses: converts HWDATAIN to HWDATA
-  subwordwrite sww(.*);
+  subwordwrite sww(
+    .HRDATA, 
+    .HADDRD, .HSIZED, 
+    .HWDATAIN, .HWDATA);

  generate
    // tightly integrated memory
    if (`TIM_SUPPORTED) begin : dtim
-      dtim #(.BASE(`TIM_BASE), .RANGE(`TIM_RANGE)) dtim (.*);
+      dtim #(
+        .BASE(`TIM_BASE), .RANGE(`TIM_RANGE)) dtim (
+        .HCLK, .HRESETn, 
+        .HSELTim, .HADDR,
+        .HWRITE, .HREADY,
+        .HTRANS, .HWDATA, .HREADTim,
+        .HRESPTim, .HREADYTim);
    end

    if (`BOOTTIM_SUPPORTED) begin : bootdtim
-      dtim #(.BASE(`BOOTTIM_BASE), .RANGE(`BOOTTIM_RANGE), .PRELOAD("blink-led.mem"))
-      bootdtim(.HSELTim(HSELBootTim), .HREADTim(HREADBootTim), .HRESPTim(HRESPBootTim), .HREADYTim(HREADYBootTim), .*);
+      dtim #(.BASE(`BOOTTIM_BASE), .RANGE(`BOOTTIM_RANGE))
+      bootdtim(
+        .HCLK, .HRESETn, 
+        .HSELTim(HSELBootTim), .HADDR,
+        .HWRITE, .HREADY, .HTRANS,
+        .HWDATA,
+        .HREADTim(HREADBootTim), .HRESPTim(HRESPBootTim), .HREADYTim(HREADYBootTim));
    end

    // memory-mapped I/O peripherals
    if (`CLINT_SUPPORTED == 1) begin : clint
-      clint clint(.HADDR(HADDR[15:0]), .MTIME(MTIME_CLINT), .MTIMECMP(MTIMECMP_CLINT), .*);
+      clint clint(
+        .HCLK, .HRESETn,
+        .HSELCLINT, .HADDR(HADDR[15:0]), .HWRITE,
+        .HWDATA, .HREADY, .HTRANS,
+        .HREADCLINT,
+        .HRESPCLINT, .HREADYCLINT,
+        .MTIME(MTIME_CLINT), .MTIMECMP(MTIMECMP_CLINT),
+        .TimerIntM, .SwIntM);
+
    end else begin : clint
      assign MTIME_CLINT = 0; assign MTIMECMP_CLINT = 0;
      assign TimerIntM = 0; assign SwIntM = 0;
    end
    if (`PLIC_SUPPORTED == 1) begin : plic
-      plic plic(.HADDR(HADDR[27:0]), .*);
+      plic plic(
+        .HCLK, .HRESETn, 
+        .HSELPLIC, .HADDR(HADDR[27:0]),
+        .HWRITE, .HREADY, .HTRANS, .HWDATA,
+        .UARTIntr, .GPIOIntr,
+        .HREADPLIC, .HRESPPLIC, .HREADYPLIC,
+        .ExtIntM);
    end else begin : plic
      assign ExtIntM = 0;
    end
    if (`GPIO_SUPPORTED == 1) begin : gpio
-      gpio gpio(.HADDR(HADDR[7:0]), .*); 
+      gpio gpio(
+        .HCLK, .HRESETn, .HSELGPIO,
+        .HADDR(HADDR[7:0]), 
+        .HWDATA,
+        .HWRITE, .HREADY, 
+        .HTRANS,
+        .HREADGPIO,
+        .HRESPGPIO, .HREADYGPIO,
+        .GPIOPinsIn,
+        .GPIOPinsOut, .GPIOPinsEn,
+        .GPIOIntr);
+
    end else begin : gpio
      assign GPIOPinsOut = 0; assign GPIOPinsEn = 0; assign GPIOIntr = 0;
    end
    if (`UART_SUPPORTED == 1) begin : uart
-      uart uart(.HADDR(HADDR[2:0]), .TXRDYb(), .RXRDYb(), .INTR(UARTIntr), .SIN(UARTSin), .SOUT(UARTSout),
-                .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), 
-                .RTSb(), .DTRb(), .OUT1b(), .OUT2b(), .*);
+      uart uart(
+        .HCLK, .HRESETn, 
+        .HSELUART,
+        .HADDR(HADDR[2:0]), 
+        .HWRITE, .HWDATA,
+        .HREADUART, .HRESPUART, .HREADYUART,
+        .SIN(UARTSin), .DSRb(1'b1), .DCDb(1'b1), .CTSb(1'b0), .RIb(1'b1), // from E1A driver from RS232 interface
+        .SOUT(UARTSout), .RTSb(), .DTRb(),                                // to E1A driver to RS232 interface
+        .OUT1b(), .OUT2b(), .INTR(UARTIntr), .TXRDYb(), .RXRDYb());       // to CPU
    end else begin : uart
      assign UARTSout = 0; assign UARTIntr = 0; 
    end
--- a/wally-pipelined/testbench/testbench-coremark_bare.sv
+++ b/wally-pipelined/testbench/testbench-coremark_bare.sv
@ -54,8 +54,13 @@ module testbench();
  logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
  logic UARTSin, UARTSout;
  logic SDCCLK;
-  tri1 SDCCmd;
-  tri1 [3:0] SDCDat;
+  logic      SDCCmdIn;
+  logic      SDCCmdOut;
+  logic      SDCCmdOE;
+  logic [3:0] SDCDatIn;
+
+  logic             HREADY;
+  logic 	    HSELEXT;

  assign SDCmd = 1'bz;
  assign SDCDat = 4'bz;
@ -95,7 +100,7 @@ module testbench();
      totalerrors = 0;
      // read test vectors into memory
      memfilename = tests[0];
-      $readmemh(memfilename, dut.uncore.dtim.RAM);
+      $readmemh(memfilename, dut.uncore.dtim.dtim.RAM);
      //for(j=268437955; j < 268566528; j = j+1)
        //dut.uncore.dtim.RAM[j] = 64'b0;
 //      ProgramAddrMapFile = "../../imperas-riscv-tests/riscv-ovpsim-plus/examples/CoreMark/coremark.RV64IM.bare.elf.objdump.addr";