Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally

2022-07-18 23:11:12 +00:00 · 2022-07-18 23:11:12 +00:00 · 59eb11b73a
commit 59eb11b73a
parent ab08826b6a 86f0327f79
18 changed files with 211 additions and 225 deletions
--- a/benchmarks/coremark/riscv64-baremetal/core_portme.c
+++ b/benchmarks/coremark/riscv64-baremetal/core_portme.c
@ -114,7 +114,12 @@ void portable_free(void *p) {
    #define read_csr(reg) ({ unsigned long __tmp; \
       asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
       __tmp; })
-    #define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
+	// #if (XLEN==64) 
+	// 	typedef unsigned long long ee_ptr_int;
+	// #else
+	// 	typedef unsigned long ee_ptr_int;
+	// #endif
+    #define GETMYTIME(_t) (_t = *(volatile ee_ptr_int*)0x0200BFF8)
 	#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
 	// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
 	#define TIMER_RES_DIVIDER 10000
@ -196,8 +201,8 @@ void stop_time(void) {
 CORE_TICKS get_time(void) {
 	CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
 	unsigned long instructions = minstretDiff();
-	long long cm100 = 1000000000 / elapsed;  // coremark score * 100
-	long long cpi100 = elapsed*100/instructions; // CPI * 100
+	ee_ptr_int cm100 = 1000000000 / elapsed;  // coremark score * 100
+	ee_ptr_int cpi100 = elapsed*100/instructions; // CPI * 100
 	ee_printf("   WALLY CoreMark Results (from get_time)\n");
 	ee_printf("    Elapsed MTIME: %u\n", elapsed);
 	ee_printf("    Elapsed MINSTRET: %lu\n", instructions);
--- a/benchmarks/coremark/riscv64-baremetal/core_portme.h
+++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h
@ -69,14 +69,16 @@ typedef clock_t CORE_TICKS;
 // #elif (XLEN==32) 
 // 	#include <sys/types.h>
 // 	typedef ee_u32 CORE_TICKS;
-#else
 /* Configuration: size_t and clock_t
     Note these need to match the size of the clock output and the xLen the processor supports
 */
+#elif (XLEN==64) 
 typedef unsigned long int size_t;
 typedef unsigned long int clock_t;
-typedef clock_t CORE_TICKS;
+#else
+#include <sys/types.h>
 #endif
+typedef clock_t CORE_TICKS;

 /* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
 	Initialize these strings per platform
--- a/pipelined/regression/sim-wally
+++ b/pipelined/regression/sim-wally
@ -1,2 +1,2 @@
-vsim -do "do wally-pipelined.do rv64gc arch64d"
+vsim -do "do wally-pipelined.do rv32gc wally32periph"

--- a/pipelined/regression/sim-wally-batch
+++ b/pipelined/regression/sim-wally-batch
@ -1 +1 @@
-vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d"
+vsim -c -do "do wally-pipelined-batch.do rv32gc wally32periph"
--- a/pipelined/src/fpu/divshiftcalc.sv
+++ b/pipelined/src/fpu/divshiftcalc.sv
@ -1,10 +1,10 @@
 `include "wally-config.vh"

 module divshiftcalc(
-    input logic  [`QLEN-1-(`RADIX/4):0] Quot,
+    input logic  [`QLEN-1-(`RADIX/4):0] DivQm,
    input logic  [`FMTBITS-1:0] Fmt,
    input logic [`DURLEN-1:0] DivEarlyTermShift,
-    input logic [`NE+1:0] DivCalcExp,
+    input logic [`NE+1:0] DivQe,
    output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
    output logic [`NORMSHIFTSZ-1:0] DivShiftIn,
    output logic DivResDenorm,
@ -14,21 +14,21 @@ module divshiftcalc(

    // is the result denromalized
    // if the exponent is 1 then the result needs to be normalized then the result is denormalizes
-    assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]);
+    assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);

    // if the result is denormalized
-    //  00000000x.xxxxxx...                     Exp = DivCalcExp
-    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  .00xxxxxxxxxxxxx... << DivCalcExp+NF+1  Exp = +1
+    //  00000000x.xxxxxx...                     Exp = DivQe
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivQe+NF+1
+    //  .00xxxxxxxxxxxxx... << DivQe+NF+1  Exp = +1
    //  .0000xxxxxxxxxxx... >> 1                Exp = 1
-    // Left shift amount  = DivCalcExp+NF+1-1
-    assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp;
+    // Left shift amount  = DivQe+NF+1-1
+    assign DivDenormShift = (`NE+2)'(`NF)+DivQe;
    // if the result is normalized
-    //  00000000x.xxxxxx...                     Exp = DivCalcExp
-    //  .00000000xxxxxxx... >> NF+1             Exp = DivCalcExp+NF+1
-    //  00000000.xxxxxxx... << NF               Exp = DivCalcExp+1
-    //  00000000x.xxxxxx... << NF               Exp = DivCalcExp (extra shift done afterwards)
-    //  00000000xx.xxxxx... << 1?               Exp = DivCalcExp-1 (determined after)
+    //  00000000x.xxxxxx...                     Exp = DivQe
+    //  .00000000xxxxxxx... >> NF+1             Exp = DivQe+NF+1
+    //  00000000.xxxxxxx... << NF               Exp = DivQe+1
+    //  00000000x.xxxxxx... << NF               Exp = DivQe (extra shift done afterwards)
+    //  00000000xx.xxxxx... << 1?               Exp = DivQe-1 (determined after)
    // inital Left shift amount  = NF
    // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
    assign NormShift = (`NE+2)'(`NF);
@ -36,6 +36,6 @@ module divshiftcalc(
    // need to multiply the early termination shift by LOGR*DIVCOPIES =  left shift of log2(LOGR*DIVCOPIES)
    assign DivShiftAmt = (DivResDenorm ?  DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}};

-    assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};
+    assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}};

 endmodule
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@ -127,7 +127,7 @@ module fcvt (
    //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
    assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
                    ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : 
-                              (LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}};
+                              (LeadingZeros);
    
    ///////////////////////////////////////////////////////////////////////////
    // exp calculations
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@ -48,10 +48,10 @@ module flags(
    input logic                 DivOp,                  // conversion opperation?
    input logic                 FmaOp,                  // Fma opperation?
    input logic  [`NE+1:0]      FullRe,             // Re with bits to determine sign and overflow
-    input logic  [`NE+1:0]      Nexp,               // exponent of the normalized sum
+    input logic  [`NE+1:0]      Me,               // exponent of the normalized sum
    input logic  [1:0]          CvtNegResMsbs,             // the negitive integer result's most significant bits
    input logic                 FmaAs, FmaPs,        // the product and modified Z signs
-    input logic                 R, UfLSBRes, S, UfPlus1, // bits used to determine rounding
+    input logic                 R, UfL, S, UfPlus1, // bits used to determine rounding
    output logic                DivByZero,
    output logic                IntInvalid, Invalid, Overflow, // flags used to select the res
    output logic [4:0]          PostProcFlg // flags
@ -127,11 +127,11 @@ module flags(
    //                  |                    |                    |                                      |                     and if the result is not exact
    //                  |                    |                    |                                      |                     |               and if the input isnt infinity or NaN
    //                  |                    |                    |                                      |                     |               |
-    assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero);
+    assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero);

    // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
    //      - Don't set the underflow flag if an underflowed res isn't outputed
-    assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero);
+    assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero);

    //                  if the res is too small to be represented and not 0
    //                  |                                     and if the res is not invalid (outside the integer bounds)
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@ -44,6 +44,7 @@ module fma(
    output logic                InvA,          // Was A inverted for effective subtraction (P-A or -P+A)
    output logic                As,       // the aligned addend's sign (modified Z sign for other opperations)
    output logic                Ps,          // the product's sign
+    output logic                Ss,          // the sum's sign
    output logic [$clog2(3*`NF+7)-1:0]          NCnt        // normalization shift count
    );

@ -81,7 +82,7 @@ module fma(
    // // Addition/LZA
    // ///////////////////////////////////////////////////////////////////////////////
        
-    add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm);
+    add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Ss);
    
    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
 endmodule
@ -226,6 +227,7 @@ module add(
    output logic [2*`NF+1:0]    PmKilled,     // the product's mantissa possibly killed
    output logic                NegSum,        // was the sum negitive
    output logic                InvA,          // do you invert the aligned addend
+    output logic                Ss,          
    output logic [3*`NF+5:0]    Sm           // the positive sum
 );
    logic [3*`NF+6:0]    PreSum, NegPreSum; // possibly negitive sum
@ -257,6 +259,11 @@ module add(

    // Choose the positive sum and accompanying LZA result.
    assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
+    // is the result negitive
+    //  if p - z is the Sum negitive
+    //  if -p + z is the Sum positive
+    //  if -p - z then the Sum is negitive
+    assign Ss = NegSum^Ps; //*** move to execute stage
 endmodule


--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@ -35,7 +35,7 @@ module fmashiftcalc(
    input logic  [$clog2(3*`NF+7)-1:0]  FmaNCnt,   // normalization shift count
    input logic  [`FMTBITS-1:0]         Fmt,       // precision 1 = double 0 = single
    input logic                         FmaKillProd,  // is the product set to zero
-    output logic [`NE+1:0]              FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    output logic [`NE+1:0]              FmaNe,          // exponent of the normalized sum not taking into account denormal or zero results
    output logic                        FmaSZero,    // is the result denormalized - calculated before LZA corection
    output logic                        FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
    output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
@ -57,28 +57,28 @@ module fmashiftcalc(

    //convert the sum's exponent into the proper percision
    if (`FPSIZES == 1) begin
-        assign FmaConvNormSumExp = NormSumExp;
+        assign FmaNe = NormSumExp;

    end else if (`FPSIZES == 2) begin
-        assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+        assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};

    end else if (`FPSIZES == 3) begin
        always_comb begin
            case (Fmt)
-                `FMT: FmaConvNormSumExp = NormSumExp;
-                `FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
-                `FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
-                default: FmaConvNormSumExp = {`NE+2{1'bx}};
+                `FMT: FmaNe = NormSumExp;
+                `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
+                `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
+                default: FmaNe = {`NE+2{1'bx}};
            endcase
        end

    end else if (`FPSIZES == 4) begin
        always_comb begin
            case (Fmt)
-                2'h3: FmaConvNormSumExp = NormSumExp;
-                2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
-                2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
-                2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
+                2'h3: FmaNe = NormSumExp;
+                2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
+                2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
+                2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
            endcase
        end

@ -144,11 +144,11 @@ module fmashiftcalc(
    //      - if kill prod dont add to exp

    // Determine if the result is denormal
-    // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
+    // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero;

    // Determine the shift needed for denormal results
    //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
+    assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1;
    // set and calculate the shift input and amount
    //  - shift once if killing a product and the result is denormalized
    assign FmaShiftIn = {3'b0, FmaSm};
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -114,6 +114,7 @@ module fpu (
   logic 			    NegSumE, NegSumM;
   logic 			    ZSgnEffE, ZSgnEffM;
   logic 			    PSgnE, PSgnM;
+   logic 			    SsE, SsM;
   logic [$clog2(3*`NF+7)-1:0]			FmaNormCntE, FmaNormCntM;

   // Cvt Signals
@ -255,36 +256,11 @@ module fpu (
            .Xm(XManE), .Ym(YManE), .Zm(ZManE), 
            .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), 
            .FOpCtrl(FOpCtrlE), .Fmt(FmtE), 
-            .As(ZSgnEffE), .Ps(PSgnE),
+            .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE),
            .Sm(SumE), .Pe(ProdExpE), 
            .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), 
            .ZmSticky(AddendStickyE), .KillProd(KillProdE)); 

-   // // fpdivsqrt using Goldschmidt's iteration
-   // if(`FLEN == 64) begin 
-   // flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
-   //       .clear(FDivSqrtDoneE), .en(load_preload),
-   //       .reset(reset),  .clk(clk));
-   // flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
-   //          .clear(FDivSqrtDoneE), .en(load_preload),
-   //          .reset(reset),  .clk(clk));
-   // end
-   // else if (`FLEN == 32) begin 
-   // flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
-   //       .clear(FDivSqrtDoneE), .en(load_preload),
-   //       .reset(reset),  .clk(clk));
-   // flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
-   //          .clear(FDivSqrtDoneE), .en(load_preload),
-   //          .reset(reset),  .clk(clk));
-   // end
-   // flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), 
-   //          .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
-   //          .clear(FDivSqrtDoneE), .en(load_preload),
-   //          .reset(reset),  .clk(clk));
-   // fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), 
-   //       .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
-   //       .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
-   //       .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
   divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, 
                  .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), 
                  .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
@ -359,9 +335,9 @@ module fpu (
               {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
   flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
   flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
-   flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                           {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
-                           {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
+   flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, 
+                           {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE},
+                           {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM});
   flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
                           {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
                           {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
@ -381,10 +357,10 @@ module fpu (
   assign FpLoadStoreM = FResSelM[1];

   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
-                           .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM),
-                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM),
+                           .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM),
+                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM),
-                           .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM),
+                           .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
                           .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));

   // FPU flag selection - to privileged
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@ -48,17 +48,18 @@ module postprocess (
    input logic                             FmaPs,      // the product's sign
    input logic  [`NE+1:0]                  FmaPe,       // Product exponent
    input logic  [3*`NF+5:0]                FmaSm,       // the positive sum
-    input logic                             FmaZmSticky,  // sticky bit that is calculated during alignment
+    input logic                             FmaZmS,  // sticky bit that is calculated during alignment
    input logic                             FmaKillProd,      // set the product to zero before addition if the product is too small to matter
    input logic                             FmaNegSum,    // was the sum negitive
    input logic                             FmaInvA,      // do you invert Z
+    input logic                             FmaSs,
    input logic  [$clog2(3*`NF+7)-1:0]      FmaNCnt,   // the normalization shift count
    //divide signals
    input logic  [`DURLEN-1:0]              DivEarlyTermShift,
-    input logic                             DivSticky,
+    input logic                             DivS,
    input logic                             DivDone,
-    input logic  [`NE+1:0]                  DivCalcExp,
-    input logic  [`QLEN-1-(`RADIX/4):0]                Quot,
+    input logic  [`NE+1:0]                  DivQe,
+    input logic  [`QLEN-1-(`RADIX/4):0]                DivQm,
    // conversion signals
    input logic                             CvtCs,     // the result's sign
    input logic  [`NE:0]                    CvtCe,    // the calculated expoent
@ -77,9 +78,9 @@ module postprocess (
    logic Ws;
    logic [`NF-1:0] Rf; // Result fraction
    logic [`NE-1:0] Re;  // Result exponent
-    logic Nsgn;
-    logic [`NE+1:0] Nexp;
-    logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction
+    logic Ms;
+    logic [`NE+1:0] Me;
+    logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction
    logic [`NE+1:0] FullRe;  // Re with bits to determine sign and overflow
    logic S;           // S bit
    logic UfPlus1;                    // do you add one (for determining underflow flag)
@ -89,19 +90,19 @@ module postprocess (
    logic [`NORMSHIFTSZ-1:0] Shifted;    // the shifted result
    logic Plus1;      // add one to the final result?
    logic IntInvalid, Overflow, Invalid; // flags
-    logic UfLSBRes;
+    logic UfL;
    logic [`FMTBITS-1:0] OutFmt;
    // fma signals
    logic [`NE+1:0] FmaSe;     // exponent of the normalized sum
    logic FmaSZero;        // is the sum zero
    logic [3*`NF+8:0] FmaShiftIn;        // shift input
-    logic [`NE+1:0] FmaConvNormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
+    logic [`NE+1:0] FmaNe;          // exponent of the normalized sum not taking into account denormal or zero results
    logic FmaPreResultDenorm;    // is the result denormalized - calculated before LZA corection
    logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt;   // normalization shift count
    // division singals
    logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
    logic [`NORMSHIFTSZ-1:0] DivShiftIn;
-    logic [`NE+1:0] DivCorrExp;
+    logic [`NE+1:0] Qe;
    logic DivByZero;
    logic DivResDenorm;
    logic [`NE+1:0] DivDenormShift;
@ -150,9 +151,9 @@ module postprocess (

    cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,  
                              .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
-    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp,
+    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe,
                          .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
-    divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
+    divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);

    always_comb
        case(PostProcSel)
@ -181,9 +182,9 @@ module postprocess (
    
    normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);

-    shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp,
-                                .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp,
-                                .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac);
+    shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe,
+                                .DivResDenorm, .DivDenormShift, .DivOp, .DivQe,
+                                .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf);

    ///////////////////////////////////////////////////////////////////////////////
    // Rounding
@ -197,19 +198,19 @@ module postprocess (

                          
    roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, 
-                          .Xs, .Ys, .CvtCs, .Nsgn);
+                        .FmaSs, .Xs, .Ys, .CvtCs, .Ms);

-    round round(.OutFmt, .Frm, .S, .FmaZmSticky, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp,
-                .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt,  .CvtResUf,
-                .DivSticky, .DivDone,
-                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfLSBRes, .Nexp);
+    round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
+                .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt,  .CvtResUf,
+                .DivS, .DivDone,
+                .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);

    ///////////////////////////////////////////////////////////////////////////////
    // Sign calculation
    ///////////////////////////////////////////////////////////////////////////////

    resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
-                          .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws);
+                          .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);

    ///////////////////////////////////////////////////////////////////////////////
    // Flags
@ -218,8 +219,8 @@ module postprocess (
    flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
                .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
                .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
-                .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
-                .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
+                .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
+                .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);

    ///////////////////////////////////////////////////////////////////////////////
    // Select the result
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@ -39,28 +39,25 @@ module resultsign(
    input logic         Mult,
    input logic         R,
    input logic         S,
-    input logic         Nsgn,
+    input logic         Ms,
    output logic        Ws
 );

-    logic ZeroSgn;
-    logic InfSgn;
-    logic Underflow;
-    // logic ResultSgnTmp;
+    logic Zeros;
+    logic Infs;

    // Determine the sign if the sum is zero
    //      if cancelation then 0 unless round to -infinity
    //      if multiply then Psgn
    //      otherwise psign
-    assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S));
-    assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
+    assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;


    // is the result negitive
    //  if p - z is the Sum negitive
    //  if -p + z is the Sum positive
    //  if -p - z then the Sum is negitive
-    assign InfSgn = ZInf ? FmaAs : FmaPs;
-    assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn;
+    assign Infs = ZInf ? FmaAs : FmaPs;
+    assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms;

 endmodule
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@ -46,29 +46,29 @@ module round(
    input logic  [1:0]              PostProcSel,
    input logic                     CvtResDenormUf,
    input logic                     CvtResUf,
-    input logic  [`CORRSHIFTSZ-1:0] Nfrac,
-    input logic                     FmaZmSticky,  // addend's sticky bit
+    input logic  [`CORRSHIFTSZ-1:0] Mf,
+    input logic                     FmaZmS,  // addend's sticky bit
    input logic  [`NE+1:0]          FmaSe,         // exponent of the normalized sum
-    input logic                     Nsgn,      // the result's sign
+    input logic                     Ms,      // the result's sign
    input logic  [`NE:0]            CvtCe,    // the calculated expoent
-    input logic  [`NE+1:0]          DivCorrExp,    // the calculated expoent
-    input logic                     DivSticky,             // sticky bit
+    input logic  [`NE+1:0]          Qe,    // the calculated expoent
+    input logic                     DivS,             // sticky bit
    output logic                    UfPlus1,  // do you add or subtract on from the result
    output logic [`NE+1:0]          FullRe,      // Re with bits to determine sign and overflow
    output logic [`NF-1:0]          Rf,         // Result fraction
    output logic [`NE-1:0]          Re,          // Result exponent
    output logic                    S,             // sticky bit
-    output logic [`NE+1:0]          Nexp,
+    output logic [`NE+1:0]          Me,
    output logic                    Plus1,
-    output logic                    R, UfLSBRes // bits needed to calculate rounding
+    output logic                    R, UfL // bits needed to calculate rounding
 );
-    logic           LSBRes;         // bit used for rounding - least significant bit of the normalized sum
+    logic           L;         // bit used for rounding - least significant bit of the normalized sum
    logic           UfCalcPlus1; 
-    logic           NormSumSticky;  // normalized sum's sticky bit
-    logic           UfSticky;   // sticky bit for underlow calculation
+    logic           NormS;  // normalized sum's sticky bit
+    logic           UfS;   // sticky bit for underlow calculation
    logic [`NF-1:0] RoundFrac;
    logic           FpRes, IntRes;
-    logic           UfRound;
+    logic           UfR;
    logic           FpRound, FpLSBRes, FpUfRound;
    logic           CalcPlus1, FpPlus1;
    logic [`FLEN:0] RoundAdd;           // how much to add to the result
@ -114,61 +114,61 @@ module round(
    //      |    NF     |1|1|
    //                     ^    ^ if floating point result
    //                     ^ if not an FMA result
-        if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                 (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
+        if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                 (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
    //     2: NF > XLEN
-        if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
-                                                 (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
+                                                 (|Mf[`CORRSHIFTSZ-`NF-2:0]);

    end else if (`FPSIZES == 2) begin
        // XLEN is either 64 or 32
        // so half and single are always smaller then XLEN

        // 1: XLEN > NF   > NF1
-        if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
+        if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
        // 2: NF   > XLEN > NF1
-        if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | 
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
        // 3: NF   > NF1  > XLEN
-        if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);

    end else if (`FPSIZES == 3) begin
        // 1: XLEN > NF   > NF1
-        if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]);
+        if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
        // 2: NF   > XLEN > NF1
-        if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | 
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);
        // 3: NF   > NF1  > XLEN
-        if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`NF-2:0]);
+        if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`NF-2:0]);

    end else if (`FPSIZES == 4) begin
        // Quad precision will always be greater than XLEN
        // 2: NF   > XLEN > NF1
-        if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
+        if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | 
+                                                  (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | 
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
        // 3: NF   > NF1  > XLEN
        // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
-        if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
-                                                  (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]);
+        if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
+                                                  (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
+                                                  (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
+                                                  (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);

    end
    
@ -176,37 +176,37 @@ module round(

    // only add the Addend sticky if doing an FMA opperation
    //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
-    assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp;
+    assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp;
    
    // determine round and LSB of the rounded value
    //      - underflow round bit is used to determint the underflow flag
    if (`FPSIZES == 1) begin
-        assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
-        assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
-        assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
+        assign FpRound = Mf[`CORRSHIFTSZ-`NF-1];
+        assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
+        assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];

    end else if (`FPSIZES == 2) begin
-        assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1];
-        assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1];
-        assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2];
+        assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
+        assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
+        assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];

    end else if (`FPSIZES == 3) begin
        always_comb
            case (OutFmt)
                `FMT: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`NF-2];
                end
                `FMT1: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`NF1-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`NF1];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2];
                end
                `FMT2: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`NF2-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`NF2];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2];
                end
                default: begin
                    FpRound = 1'bx;
@ -218,55 +218,55 @@ module round(
        always_comb
            case (OutFmt)
                2'h3: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
                end
                2'h1: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`D_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2];
                end
                2'h0: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`S_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2];
                end
                2'h2: begin
-                    FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1];
-                    FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF];
-                    FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2];
+                    FpRound = Mf[`CORRSHIFTSZ-`H_NF-1];
+                    FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF];
+                    FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2];
                end
            endcase
    end

-    assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound;
-    assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
-    assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
+    assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound;
+    assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
+    assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;

    // used to determine underflow flag
-    assign UfLSBRes = FpRound;
+    assign UfL = FpRound;
    // determine sticky
-    assign S = UfSticky | UfRound;
+    assign S = UfS | UfR;


    always_comb begin
        // Determine if you add 1
        case (Frm)
-            3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even
+            3'b000: CalcPlus1 = R & (S| L);//round to nearest even
            3'b001: CalcPlus1 = 0;//round to zero
-            3'b010: CalcPlus1 = Nsgn;//round down
-            3'b011: CalcPlus1 = ~Nsgn;//round up
+            3'b010: CalcPlus1 = Ms;//round down
+            3'b011: CalcPlus1 = ~Ms;//round up
            3'b100: CalcPlus1 = R;//round to nearest max magnitude
            default: CalcPlus1 = 1'bx;
        endcase
        // Determine if you add 1 (for underflow flag)
        case (Frm)
-            3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even
+            3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even
            3'b001: UfCalcPlus1 = 0;//round to zero
-            3'b010: UfCalcPlus1 = Nsgn;//round down
-            3'b011: UfCalcPlus1 = ~Nsgn;//round up
-            3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude
+            3'b010: UfCalcPlus1 = Ms;//round down
+            3'b011: UfCalcPlus1 = ~Ms;//round up
+            3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude
            default: UfCalcPlus1 = 1'bx;
        endcase
   
@ -275,7 +275,7 @@ module round(
    // If an answer is exact don't round
    assign Plus1 = CalcPlus1 & (S | R);
    assign FpPlus1 = Plus1&~(ToInt&CvtOp);
-    assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky
+    assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky

    // Compute rounded result
    if (`FPSIZES == 1) begin
@ -295,19 +295,19 @@ module round(
        assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};

    // determine the result to be roundned
-    assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
+    assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
    
    always_comb
        case(PostProcSel)
-            2'b10: Nexp = FmaSe; // fma
-            2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
-            2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide
-            default: Nexp = '0; 
+            2'b10: Me = FmaSe; // fma
+            2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
+            2'b01: Me = DivDone ? Qe : '0; // divide
+            default: Me = '0; 
        endcase

    // round the result
    //      - if the fraction overflows one should be added to the exponent
-    assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd;
+    assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
    assign Re = FullRe[`NE-1:0];


--- a/pipelined/src/fpu/roundsign.sv
+++ b/pipelined/src/fpu/roundsign.sv
@ -38,23 +38,15 @@ module roundsign(
    input logic         DivOp,
    input logic         CvtOp,
    input logic         CvtCs,
-    output logic        Nsgn
+    input logic         FmaSs,
+    output logic        Ms
 );

-    logic FmaResSgnTmp;
-    logic DivSgn;
+    logic Qs;

-    // is the result negitive
-    //  if p - z is the Sum negitive
-    //  if -p + z is the Sum positive
-    //  if -p - z then the Sum is negitive
-    assign FmaResSgnTmp = FmaNegSum^FmaPs; //*** move to execute stage
-
-    // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs);
-
-    assign DivSgn = Xs^Ys;
+    assign Qs = Xs^Ys;

    // Sign for rounding calulation
-    assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp);
+    assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);

 endmodule
--- a/pipelined/src/fpu/shiftcorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@ -33,13 +33,13 @@ module shiftcorrection(
    input logic                     FmaOp,
    input logic                     DivOp,
    input logic                     DivResDenorm,
-    input logic  [`NE+1:0]          DivCalcExp,
+    input logic  [`NE+1:0]          DivQe,
    input logic  [`NE+1:0]          DivDenormShift,
-    input logic  [`NE+1:0]          FmaConvNormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
+    input logic  [`NE+1:0]          FmaNe,          // exponent of the normalized sum not taking into account denormal or zero results
    input logic                     FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
    input logic                     FmaSZero,
-    output logic [`CORRSHIFTSZ-1:0] Nfrac,         // the shifted sum before LZA correction
-    output logic [`NE+1:0]          DivCorrExp,
+    output logic [`CORRSHIFTSZ-1:0] Mf,         // the shifted sum before LZA correction
+    output logic [`NE+1:0]          Qe,
    output logic [`NE+1:0]          FmaSe         // exponent of the normalized sum
 );
    logic [3*`NF+5:0]      CorrSumShifted;     // the shifted sum after LZA correction
@ -53,16 +53,16 @@ module shiftcorrection(
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
    assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
    //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
-    assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
+    assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
    // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
-    assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
    // Determine sum's exponent
    //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
+    assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
    // recalculate if the result is denormalized
    assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];

    // the quotent is in the range [.5,2) if there is no early termination
    // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
-    assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2};
+    assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2};
 endmodule
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@ -94,6 +94,7 @@ module testbenchfp;

  // in-between FMA signals
  logic                 Mult;
+  logic                 Ss;
  logic [`NE+1:0]	      Pe;
  logic 				        ZmSticky;
  logic 					      KillProd; 
@ -674,18 +675,18 @@ module testbenchfp;
  fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), 
          .Xe(XExp), .Ye(YExp), .Ze(ZExp), 
          .Xm(XMan), .Ym(YMan), .Zm(ZMan),
-          .XZero, .YZero, .ZZero,
+          .XZero, .YZero, .ZZero, .Ss,
          .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps,
          .Pe, .ZmSticky, .KillProd); 
              
  postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
-              .Ze(ZExp),  .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp),
-              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky),
+              .Ze(ZExp),  .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
+              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss),
              .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
              .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
              .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
              .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
-              .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone,
+              .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone,
              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
              .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
  
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@ -114,7 +114,7 @@ logic [3:0] dummy;
        "arch32f":      if (`F_SUPPORTED) tests = arch32f;
        "imperas32i":                     tests = imperas32i;
        "imperas32f":   if (`F_SUPPORTED) tests = imperas32f;
-        "wally32d":     if (`D_SUPPORTED) tests = wally32d;
+        // "wally32d":     if (`D_SUPPORTED) tests = wally32d;
        "imperas32m":   if (`M_SUPPORTED) tests = imperas32m;
        "wally32a":     if (`A_SUPPORTED) tests = wally32a;
        "imperas32c":   if (`C_SUPPORTED) tests = imperas32c;
--- a/tests/riscof/spike/riscof_spike.py
+++ b/tests/riscof/spike/riscof_spike.py
@ -108,7 +108,7 @@ class spike(pluginTemplate):

      #TODO: The following assumes you are using the riscv-gcc toolchain. If
      #      not please change appropriately
-      self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else 'ilp32 ')
+      self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else ('ilp32e ' if "E" in ispec["ISA"] else 'ilp32 '))

    def runTests(self, testList):

@ -158,6 +158,11 @@ class spike(pluginTemplate):
 	  # echo statement.
          if self.target_run:
            # set up the simulation command. Template is for spike. Please change.
+            if ('NO_SAIL=True' in testentry['macros']):
+                # if the tests can't run on SAIL we copy the reference output to the src directory
+                reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test))
+                simcmd = 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying
+            else:
                simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf)
          else:
            simcmd = 'echo "NO RUN"'