Merge branch 'openhwgroup:main' into cachesim

2023-04-03 14:10:43 -07:00 · 2023-04-03 14:10:43 -07:00 · 37f4443012
commit 37f4443012
parent a1ce7fe321 91803dc684
37 changed files with 897 additions and 398 deletions
--- a/src/cache/cache.sv
+++ b/src/cache/cache.sv
@ -96,8 +96,7 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
  logic [LINELEN-1:0]            ReadDataLine, ReadDataLineCache;
  logic                          SelFetchBuffer;
  logic                          CacheEn;
-  logic [CACHEWORDSPERLINE-1:0]  MemPAdrDecoded;
-  logic [LINELEN/8-1:0]          LineByteMask, DemuxedByteMask, FetchBufferByteSel;
+  logic [LINELEN/8-1:0]          LineByteMask;
  logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr;

  genvar                         index;
@ -161,21 +160,30 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGBWPL, WORDLEN, MUXINTE
  /////////////////////////////////////////////////////////////////////////////////////////////
  // Write Path
  /////////////////////////////////////////////////////////////////////////////////////////////
+  if(!READ_ONLY_CACHE) begin:WriteSelLogic
+    logic [CACHEWORDSPERLINE-1:0]  MemPAdrDecoded;
+    logic [LINELEN/8-1:0]          DemuxedByteMask, FetchBufferByteSel;

-  // Adjust byte mask from word to cache line
-  onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
-  for(index = 0; index < 2**LOGCWPL; index++) begin
-    assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
-  end
-  assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask;  // If load miss set all muxes to 1.
-  assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
+    // Adjust byte mask from word to cache line
+    onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded));
+    for(index = 0; index < 2**LOGCWPL; index++) begin
+       assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0;
+    end
+    assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask;  // If load miss set all muxes to 1.

-  // Merge write data into fetched cache line for store miss
-  for(index = 0; index < LINELEN/8; index++) begin
-    mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
-      .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index]));
+    // Merge write data into fetched cache line for store miss
+    for(index = 0; index < LINELEN/8; index++) begin
+       mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]),
+         .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index]), .y(LineWriteData[8*index+7:8*index]));
+    end
+    assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0;
  end
-   
+  else
+    begin:WriteSelLogic
+       // No need for this mux if the cache does not handle writes.
+       assign LineWriteData = FetchBuffer;
+       assign LineByteMask = '1;
+    end
  /////////////////////////////////////////////////////////////////////////////////////////////
  // Flush logic
  /////////////////////////////////////////////////////////////////////////////////////////////
--- a/src/cache/cacheLRU.sv
+++ b/src/cache/cacheLRU.sv
@ -98,7 +98,9 @@ module cacheLRU
    assign LRUUpdate[t1] = LRUUpdate[s] & WayEncoded[r];
  end

-  mux2 #(1) LRUMuxes[NUMWAYS-2:0](CurrLRU, ~WayExpanded, LRUUpdate, NextLRU);
+  // The root node of the LRU tree will always be selected in LRUUpdate. No mux needed.
+  assign NextLRU[NUMWAYS-2] = ~WayExpanded[NUMWAYS-2];
+  mux2 #(1) LRUMuxes[NUMWAYS-3:0](CurrLRU[NUMWAYS-3:0], ~WayExpanded[NUMWAYS-3:0], LRUUpdate[NUMWAYS-3:0], NextLRU[NUMWAYS-3:0]);

  // Compute next victim way.
  for(s = NUMWAYS-2; s >= NUMWAYS/2; s--) begin
@ -128,8 +130,8 @@ module cacheLRU
  always_ff @(posedge clk) begin
    if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
    if(CacheEn) begin
-      if((InvalidateCache | FlushCache) & ~FlushStage) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
-      else if (LRUWriteEn & ~FlushStage) begin 
+      // if((InvalidateCache | FlushCache) & ~FlushStage) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0;
+      if (LRUWriteEn & ~FlushStage) begin 
        LRUMemory[PAdr] <= NextLRU;
      end
      if(LRUWriteEn & ~FlushStage & (PAdr == CacheSet))
--- a/src/fpu/fctrl.sv
+++ b/src/fpu/fctrl.sv
@ -75,109 +75,139 @@ module fctrl (
  logic [1:0]                 FResSelD;           // Select one of the results that finish in the memory stage
  logic [2:0]                 FrmD, FrmE;         // FP rounding mode
  logic [`FMTBITS-1:0]        FmtD;               // FP format
-  logic [1:0]                 Fmt;                // format - before possible reduction
+  logic [1:0]                 Fmt, Fmt2;          // format - before possible reduction
  logic                       SupportedFmt;       // is the format supported
+  logic                       SupportedFmt2;      // is the source format supported for fp -> fp
  logic                       FCvtIntD, FCvtIntM; // convert to integer opperation

  // FPU Instruction Decoder
  assign Fmt = Funct7D[1:0];
+  assign Fmt2 = Rs2D[1:0]; // source format for fcvt fp->fp

-  // Note: only Fmt is checked; fcvt does not check destination format
  assign SupportedFmt = (Fmt == 2'b00 | (Fmt == 2'b01 & `D_SUPPORTED) |
                         (Fmt == 2'b10 & `ZFH_SUPPORTED) | (Fmt == 2'b11 & `Q_SUPPORTED));
+  assign SupportedFmt2 = (Fmt2 == 2'b00 | (Fmt2 == 2'b01 & `D_SUPPORTED) |
+                         (Fmt2 == 2'b10 & `ZFH_SUPPORTED) | (Fmt2 == 2'b11 & `Q_SUPPORTED));

  // decode the instruction                       
+  // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt
  always_comb
    if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
      ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0;
    else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt) 
      ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // for anything other than loads and stores, check for supported format
-    else case(OpD)
-    // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt
-      7'b0000111: case(Funct3D)
-                    3'b010:                      ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flw
-                    3'b011:  if (`D_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // fld
-                             else                ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fld not supported
-                    3'b100:  if (`Q_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flq
-                             else                ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // flq not supported
-                    3'b001:  if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flh
-                             else                ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // flh not supported
-                    default:                     ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction
-                  endcase
-      7'b0100111: case(Funct3D)
-                    3'b010:                      ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsw
-                    3'b011:  if (`D_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsd
-                             else                ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fsd not supported
-                    3'b100:  if (`Q_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsq
-                             else                ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fsq not supported
-                    3'b001:  if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsh
-                             else                ControlsD = `FCTRLW'b0_0_00_xx_0xx_0_1_0; // fsh not supported
-                    default:                     ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction
-                  endcase
-      7'b1000011:   ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd
-      7'b1000111:   ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub
-      7'b1001011:   ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0; // fnmsub
-      7'b1001111:   ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0; // fnmadd
-      7'b1010011: casez(Funct7D)
-                    7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0; // fadd
-                    7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0; // fsub
-                    7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0; // fmul
-                    7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0; // fdiv
-                    7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt
-                    7'b00100??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_00_xx_000_0_0_0; // fsgnj
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_00_xx_001_0_0_0; // fsgnjn
-                                  3'b010:  ControlsD = `FCTRLW'b1_0_00_xx_010_0_0_0; // fsgnjx
-                                  default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction
-                                endcase
-                    7'b00101??: case(Funct3D)
-                                  3'b000:  ControlsD = `FCTRLW'b1_0_00_xx_110_0_0_0; // fmin
-                                  3'b001:  ControlsD = `FCTRLW'b1_0_00_xx_101_0_0_0; // fmax
-                                  default: ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction
-                                endcase
-                    7'b10100??: case(Funct3D)
-                                  3'b010:  ControlsD = `FCTRLW'b0_1_00_xx_010_0_0_0; // feq
-                                  3'b001:  ControlsD = `FCTRLW'b0_1_00_xx_001_0_0_0; // flt
-                                  3'b000:  ControlsD = `FCTRLW'b0_1_00_xx_011_0_0_0; // fle
-                                  default: ControlsD = `FCTRLW'b0_0_00_xx_000__0_1_0; // non-implemented instruction
-                                endcase
-                    7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)          
-                                                                ControlsD = `FCTRLW'b0_1_10_xx_000_0_0_0; // fclass
-                                else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.w   to int reg
-                                else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.d   to int reg
-                                else                            ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction
-                    7'b1101000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.s.w   w->s
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.s.wu wu->s
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.s.l   l->s
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.s.lu lu->s
-                                endcase
-                    7'b1100000: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.s   s->w
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.s  s->wu
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.s   s->l
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.s  s->lu
-                                endcase
-                    7'b1111000: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0_0; // fmv.w.x   to fp reg
-                    7'b0100000: ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.d
-                    7'b1101001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.d.w   w->d
-                                  2'b01:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.d.wu wu->d
-                                  2'b10:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.d.l   l->d
-                                  2'b11:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.d.lu lu->d
-                                endcase
-                    7'b1100001: case(Rs2D[1:0])
-                                  2'b00:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.d   d->w
-                                  2'b01:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.d  d->wu
-                                  2'b10:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.d   d->l
-                                  2'b11:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d  d->lu
-                                endcase
-                    7'b1111001: ControlsD = `FCTRLW'b1_0_00_xx_011_0_0_0; // fmv.d.x   to fp reg
-                    7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.s
-                    default:    ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction
-                  endcase
-      default:      ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // non-implemented instruction
-    endcase
+    else begin 
+      ControlsD = `FCTRLW'b0_0_00_xx_000_0_1_0; // default: non-implemented instruction
+      /* verilator lint_off CASEINCOMPLETE */ // default value above has priority so no other default needed
+      case(OpD)
+        7'b0000111: case(Funct3D)
+                      3'b010:                      ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flw
+                      3'b011:  if (`D_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // fld
+                      3'b100:  if (`Q_SUPPORTED)   ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flq
+                      3'b001:  if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_xx_0xx_0_0_0; // flh
+                    endcase
+        7'b0100111: case(Funct3D)
+                      3'b010:                      ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsw
+                      3'b011:  if (`D_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsd
+                      3'b100:  if (`Q_SUPPORTED)   ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsq
+                      3'b001:  if (`ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_xx_0xx_0_0_0; // fsh
+                    endcase
+        7'b1000011:   ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0; // fmadd
+        7'b1000111:   ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0; // fmsub
+        7'b1001011:   ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0; // fnmsub
+        7'b1001111:   ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0; // fnmadd
+        7'b1010011: casez(Funct7D)
+                      7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0; // fadd
+                      7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0; // fsub
+                      7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0; // fmul
+                      7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0; // fdiv
+                      7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0; // fsqrt
+                      7'b00100??: case(Funct3D)
+                                    3'b000:  ControlsD = `FCTRLW'b1_0_00_xx_000_0_0_0; // fsgnj
+                                    3'b001:  ControlsD = `FCTRLW'b1_0_00_xx_001_0_0_0; // fsgnjn
+                                    3'b010:  ControlsD = `FCTRLW'b1_0_00_xx_010_0_0_0; // fsgnjx
+                                  endcase
+                      7'b00101??: case(Funct3D)
+                                    3'b000:  ControlsD = `FCTRLW'b1_0_00_xx_110_0_0_0; // fmin
+                                    3'b001:  ControlsD = `FCTRLW'b1_0_00_xx_101_0_0_0; // fmax
+                                  endcase
+                      7'b10100??: case(Funct3D)
+                                    3'b010:  ControlsD = `FCTRLW'b0_1_00_xx_010_0_0_0; // feq
+                                    3'b001:  ControlsD = `FCTRLW'b0_1_00_xx_001_0_0_0; // flt
+                                    3'b000:  ControlsD = `FCTRLW'b0_1_00_xx_011_0_0_0; // fle
+                                  endcase
+                      7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)          
+                                                ControlsD = `FCTRLW'b0_1_10_xx_000_0_0_0; // fclass
+                                  else if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
+                                                ControlsD = `FCTRLW'b0_1_11_xx_000_0_0_0; // fmv.x.w / fmv.x.d to int register
+                      7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
+                                                ControlsD = `FCTRLW'b1_0_00_xx_011_0_0_0; // fmv.w.x / fmv.d.x   to fp reg
+                      7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00)
+                                                ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h)
+                      7'b0100001: if (Rs2D[4:2] == 3'b000  & SupportedFmt2 & Rs2D[1:0] != 2'b01)
+                                                ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0; // fcvt.d.(s/h/q)
+                      // coverage off
+                      // Not covered in testing because rv64gc does not support half or quad precision
+                      7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10)
+                                                ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0; // fcvt.h.(s/d/q)
+                      7'b0100011: if (Rs2D[4:2] == 3'b000  & SupportedFmt2 & Rs2D[1:0] != 2'b11)
+                                                ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0; // fcvt.q.(s/h/d)
+                      // coverage on
+                      7'b1101000: case(Rs2D)
+                                    5'b00000:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.s.w   w->s
+                                    5'b00001:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.s.wu wu->s
+                                    5'b00010:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.s.l   l->s
+                                    5'b00011:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.s.lu lu->s
+                                  endcase
+                      7'b1100000: case(Rs2D)
+                                    5'b00000:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.s   s->w
+                                    5'b00001:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.s  s->wu
+                                    5'b00010:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.s   s->l
+                                    5'b00011:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.s  s->lu
+                                  endcase
+                      7'b1101001: case(Rs2D)
+                                    5'b00000:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.d.w   w->d
+                                    5'b00001:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.d.wu wu->d
+                                    5'b00010:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.d.l   l->d
+                                    5'b00011:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.d.lu lu->d
+                                  endcase
+                      7'b1100001: case(Rs2D)
+                                    5'b00000:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.d   d->w
+                                    5'b00001:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.d  d->wu
+                                    5'b00010:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.d   d->l
+                                    5'b00011:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.d  d->lu
+                                  endcase
+                      // coverage off
+                      // Not covered in testing because rv64gc does not support half or quad precision
+                      7'b1101010: case(Rs2D)
+                                    5'b00000:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.h.w   w->h
+                                    5'b00001:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.h.wu wu->h
+                                    5'b00010:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.h.l   l->h
+                                    5'b00011:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.h.lu lu->h
+                                  endcase
+                      7'b1100010: case(Rs2D)
+                                    5'b00000:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.h   h->w
+                                    5'b00001:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.h  h->wu
+                                    5'b00010:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.h   h->l
+                                    5'b00011:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.h  h->lu
+                                  endcase
+                      7'b1101011: case(Rs2D)
+                                    5'b00000:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0; // fcvt.q.w   w->q
+                                    5'b00001:    ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0; // fcvt.q.wu wu->q
+                                    5'b00010:    ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0; // fcvt.q.l   l->q
+                                    5'b00011:    ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0; // fcvt.q.lu lu->q
+                                  endcase
+                      7'b1100011: case(Rs2D)
+                                    5'b00000:    ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1; // fcvt.w.q   q->w
+                                    5'b00001:    ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1; // fcvt.wu.q  q->wu
+                                    5'b00010:    ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1; // fcvt.l.q   q->l
+                                    5'b00011:    ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1; // fcvt.lu.q  q->lu
+                                  endcase
+                      // coverage on
+                    endcase
+      endcase
+    end
+    /* verilator lint_on CASEINCOMPLETE */

  // unswizzle control bits
  assign #1 {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD} = ControlsD;
@ -303,7 +333,5 @@ module fctrl (
  flopenrc #(4)  MWCtrlReg(clk, reset, FlushW, ~StallW,
          {FRegWriteM, FResSelM, FCvtIntM},
          {FRegWriteW, FResSelW, FCvtIntW});
-  
-  //assign FCvtIntW = (FResSelW == 2'b01);
-
+ 
 endmodule
--- a/src/ieu/alu.sv
+++ b/src/ieu/alu.sv
@ -37,14 +37,13 @@ module alu #(parameter WIDTH=32) (
  input  logic [1:0]       BSelect,     // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction
  input  logic [2:0]       ZBBSelect,   // ZBB mux select signal
  input  logic [2:0]       Funct3,      // For BMU decoding
-  input  logic [1:0]       CompFlags,   // Comparator flags
  input  logic [2:0]       BALUControl, // ALU Control signals for B instructions in Execute Stage
-  output logic [WIDTH-1:0] Result,      // ALU result
+  output logic [WIDTH-1:0] ALUResult,   // ALU result
  output logic [WIDTH-1:0] Sum);        // Sum of operands

  // CondInvB = ~B when subtracting, B otherwise. Shift = shift result. SLT/U = result of a slt/u instruction.
  // FullResult = ALU result before adjusting for a RV64 w-suffix instruction.
-  logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, ALUResult;                   // Intermediate Signals 
+  logic [WIDTH-1:0] CondMaskInvB, Shift, FullResult, PreALUResult;                // Intermediate Signals 
  logic [WIDTH-1:0] CondMaskB;                                                    // Result of B mask select mux
  logic [WIDTH-1:0] CondShiftA;                                                   // Result of A shifted select mux
  logic [WIDTH-1:0] CondExtA;                                                     // Result of Zero Extend A select mux
@ -84,16 +83,16 @@ module alu #(parameter WIDTH=32) (
  end

  // Support RV64I W-type addw/subw/addiw/shifts that discard upper 32 bits and sign-extend 32-bit result to 64 bits
-  if (WIDTH == 64)  assign ALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
-  else              assign ALUResult = FullResult;
+  if (WIDTH == 64)  assign PreALUResult = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult;
+  else              assign PreALUResult = FullResult;

  // Final Result B instruction select mux
  if (`ZBC_SUPPORTED | `ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED) begin : bitmanipalu
    bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, 
-      .Funct3, .CompFlags, .BALUControl, .ALUResult, .FullResult,
-      .CondMaskB, .CondShiftA, .Result);
+      .Funct3, .LT,.LTU, .BALUControl, .PreALUResult, .FullResult,
+      .CondMaskB, .CondShiftA, .ALUResult);
  end else begin
-    assign Result = ALUResult;
+    assign ALUResult = PreALUResult;
    assign CondMaskB = B;
    assign CondShiftA = A;
  end
--- a/src/ieu/bmu/bitmanipalu.sv
+++ b/src/ieu/bmu/bitmanipalu.sv
@ -35,12 +35,13 @@ module bitmanipalu #(parameter WIDTH=32) (
  input  logic [1:0]       BSelect,                 // Binary encoding of if it's a ZBA_ZBB_ZBC_ZBS instruction
  input  logic [2:0]       ZBBSelect,               // ZBB mux select signal
  input  logic [2:0]       Funct3,                  // Funct3 field of opcode indicates operation to perform
-  input  logic [1:0]       CompFlags,               // Comparator flags
+  input  logic             LT,                      // less than flag
+  input  logic             LTU,                     // less than unsigned flag
  input  logic [2:0]       BALUControl,             // ALU Control signals for B instructions in Execute Stage
-  input  logic [WIDTH-1:0] ALUResult, FullResult,   // ALUResult, FullResult signals
+  input  logic [WIDTH-1:0] PreALUResult, FullResult,// PreALUResult, FullResult signals
  output logic [WIDTH-1:0] CondMaskB,               // B is conditionally masked for ZBS instructions
  output logic [WIDTH-1:0] CondShiftA,              // A is conditionally shifted for ShAdd instructions
-  output logic [WIDTH-1:0] Result);                 // Result
+  output logic [WIDTH-1:0] ALUResult);              // Result

  logic [WIDTH-1:0] ZBBResult, ZBCResult;           // ZBB, ZBC Result
  logic [WIDTH-1:0] MaskB;                          // BitMask of B
@ -84,16 +85,16 @@ module bitmanipalu #(parameter WIDTH=32) (

  // ZBB Unit
  if (`ZBB_SUPPORTED) begin: zbb
-    zbb #(WIDTH) ZBB(.A, .RevA, .B, .ALUResult, .W64, .lt(CompFlags[0]), .ZBBSelect, .ZBBResult);
+    zbb #(WIDTH) ZBB(.A, .RevA, .B, .W64, .LT, .LTU, .BUnsigned(Funct3[0]), .ZBBSelect, .ZBBResult);
  end else assign ZBBResult = 0;

  // Result Select Mux
  always_comb
    case (BSelect)
      // 00: ALU, 01: ZBA/ZBS, 10: ZBB, 11: ZBC
-      2'b00: Result = ALUResult; 
-      2'b01: Result = FullResult;         // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word.
-      2'b10: Result = ZBBResult; 
-      2'b11: Result = ZBCResult;
+      2'b00: ALUResult = PreALUResult; 
+      2'b01: ALUResult = FullResult;         // NOTE: We don't use ALUResult because ZBA/ZBS instructions don't sign extend the MSB of the right-hand word.
+      2'b10: ALUResult = ZBBResult; 
+      2'b11: ALUResult = ZBCResult;
    endcase
 endmodule
--- a/src/ieu/bmu/bmuctrl.sv
+++ b/src/ieu/bmu/bmuctrl.sv
@ -48,7 +48,6 @@ module bmuctrl(
  output logic [1:0]  BSelectE,                // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding
  output logic [2:0]  ZBBSelectE,              // ZBB mux select signal
  output logic        BRegWriteE,              // Indicates if it is a R type B instruction in Execute
-  output logic        BComparatorSignedE,      // Indicates if comparator signed in Execute Stage
  output logic [2:0]  BALUControlE             // ALU Control signals for B instructions in Execute Stage
 );

@ -56,7 +55,6 @@ module bmuctrl(
  logic [2:0] Funct3D;                         // Funct3 field in Decode stage
  logic [6:0] Funct7D;                         // Funct7 field in Decode stage
  logic [4:0] Rs2D;                            // Rs2 source register in Decode stage
-  logic       BComparatorSignedD;              // Indicates if comparator signed (max, min instruction) in Decode Stage
  logic       RotateD;                         // Indicates if rotate instruction in Decode Stage
  logic       MaskD;                           // Indicates if zbs instruction in Decode Stage
  logic       PreShiftD;                       // Indicates if sh1add, sh2add, sh3add instruction in Decode Stage
@ -110,10 +108,10 @@ module bmuctrl(
                                  BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0;  // rev8
        17'b0010011_0010100_101: if (Rs2D[4:0] == 5'b00111)
                                  BMUControlsD = `BMUCTRLW'b000_10_010_1_1_0_1_0_0_0_0_0;  // orc.b
-        17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0;  // max
-        17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_0_0_0_0_0;  // maxu
-        17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0;  // min
-        17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_0_0_0_0_0;  // minu
+        17'b0110011_0000101_110: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0;  // max
+        17'b0110011_0000101_111: BMUControlsD = `BMUCTRLW'b000_10_111_1_0_0_1_1_0_0_0_0;  // maxu
+        17'b0110011_0000101_100: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0;  // min
+        17'b0110011_0000101_101: BMUControlsD = `BMUCTRLW'b000_10_011_1_0_0_1_1_0_0_0_0;  // minu
      endcase
      if (`XLEN==32)
        casez({OpD, Funct7D, Funct3D})
@ -172,12 +170,9 @@ module bmuctrl(
  // Pack BALUControl Signals
  assign BALUControlD = {RotateD, MaskD, PreShiftD};

-  // Comparator should perform signed comparison when min/max instruction. We have overlap in funct3 with some branch instructions so we use opcode to differentiate betwen min/max and branches
-  assign BComparatorSignedD = (Funct3D[2]^Funct3D[0]) & ~OpD[6];
-
  // Choose ALUSelect brom BMU for BMU operations, Funct3 for IEU operations, or 0 for addition
  assign ALUSelectD = BALUOpD ? BALUSelectD : (ALUOpD ? Funct3D : 3'b000);

  // BMU Execute stage pipieline control register
-  flopenrc#(10) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BComparatorSignedD,  BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BComparatorSignedE, BALUControlE});
+  flopenrc#(9) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BALUControlE});
 endmodule
--- a/src/ieu/bmu/clmul.sv
+++ b/src/ieu/bmu/clmul.sv
@ -30,20 +30,20 @@
 `include "wally-config.vh"

 module clmul #(parameter WIDTH=32) (
-  input  logic [WIDTH-1:0] A, B,             // Operands
+  input  logic [WIDTH-1:0] X, Y,             // Operands
  output logic [WIDTH-1:0] ClmulResult);     // ZBS result

-  logic [(WIDTH*WIDTH)-1:0] s;               // intermediary signals for carry-less multiply
+  logic [(WIDTH*WIDTH)-1:0] S;               // intermediary signals for carry-less multiply
  
  integer i,j;

  always_comb begin
    for (i=0;i<WIDTH;i++) begin: outer
-      s[WIDTH*i]=A[0]&B[i];
+      S[WIDTH*i] = X[0] & Y[i];
      for (j=1;j<=i;j++) begin: inner
-        s[WIDTH*i+j] = (A[j]&B[i-j])^s[WIDTH*i+j-1];
+        S[WIDTH*i+j] = (X[j] & Y[i-j]) ^ S[WIDTH*i+j-1];
      end
-      ClmulResult[i] = s[WIDTH*i+j-1];
+      ClmulResult[i] = S[WIDTH*i+j-1];
    end
  end
 endmodule
--- a/src/ieu/bmu/cnt.sv
+++ b/src/ieu/bmu/cnt.sv
@ -32,7 +32,7 @@

 module cnt #(parameter WIDTH = 32) (
  input  logic [WIDTH-1:0] A, RevA,    // Operands
-  input  logic [4:0] B,                // Last 5 bits of immediate
+  input  logic [1:0] B,                // Last 2 bits of immediate
  input  logic W64,                    // Indicates word operation
  output logic [WIDTH-1:0] CntResult   // count result
 );
--- a/src/ieu/bmu/zbb.sv
+++ b/src/ieu/bmu/zbb.sv
@ -32,23 +32,26 @@

 module zbb #(parameter WIDTH=32) (
  input  logic [WIDTH-1:0] A, RevA, B,   // Operands
-  input  logic [WIDTH-1:0] ALUResult,    // ALU Result
  input  logic             W64,          // Indicates word operation
-  input  logic             lt,           // lt flag
-  input  logic [2:0]       ZBBSelect,    // Indicates word operation
+  input  logic             LT,           // lt flag
+  input  logic             LTU,          // ltu flag
+  input  logic             BUnsigned,      // max/min (signed) flag
+  input  logic [2:0]       ZBBSelect,    // ZBB Result select signal
  output logic [WIDTH-1:0] ZBBResult);   // ZBB result
-  
+
+  logic lt;                              // lt given signed/unsigned
  logic [WIDTH-1:0] CntResult;           // count result
-  logic [WIDTH-1:0] MinMaxResult;        // min,max result
+  logic [WIDTH-1:0] MinMaxResult;        // min, max result
  logic [WIDTH-1:0] ByteResult;          // byte results
  logic [WIDTH-1:0] ExtResult;           // sign/zero extend results

-  cnt #(WIDTH) cnt(.A, .RevA, .B(B[4:0]), .W64, .CntResult);
+  mux2 #(1) ltmux(LT, LTU, BUnsigned , lt);
+  cnt #(WIDTH) cnt(.A, .RevA, .B(B[1:0]), .W64, .CntResult);
  byteUnit #(WIDTH) bu(.A, .ByteSelect(B[0]), .ByteResult);
  ext #(WIDTH) ext(.A, .ExtSelect({~B[2], {B[2] & B[0]}}), .ExtResult);

  // ZBBSelect[2] differentiates between min(u) vs max(u) instruction
-  mux2 #(WIDTH) minmaxmux(B, A, lt^ZBBSelect[2], MinMaxResult);
+  mux2 #(WIDTH) minmaxmux(B, A, ZBBSelect[2]^lt, MinMaxResult);

  // ZBB Result select mux
  mux4 #(WIDTH) zbbresultmux(CntResult, ExtResult, ByteResult, MinMaxResult, ZBBSelect[1:0], ZBBResult);
--- a/src/ieu/bmu/zbc.sv
+++ b/src/ieu/bmu/zbc.sv
@ -36,19 +36,16 @@ module zbc #(parameter WIDTH=32) (

  logic [WIDTH-1:0] ClmulResult, RevClmulResult;
  logic [WIDTH-1:0] RevB;
-  logic [WIDTH-1:0] x,y;
-  logic [1:0] select;
+  logic [WIDTH-1:0] X, Y;

-  assign select = ~Funct3[1:0];
+  bitreverse #(WIDTH) brB(B, RevB);

-  bitreverse #(WIDTH) brB(.A(B), .RevA(RevB));
+  mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, ~Funct3[1:0], X);
+  mux3 #(WIDTH) ymux({{1'b0}, RevB[WIDTH-2:0]}, RevB, B, ~Funct3[1:0], Y);

-  mux3 #(WIDTH) xmux({RevA[WIDTH-2:0], {1'b0}}, RevA, A, select, x);
-  mux3 #(WIDTH) ymux({{1'b0},RevB[WIDTH-2:0]}, RevB, B,  select, y);
-
-  clmul #(WIDTH) clm(.A(x), .B(y), .ClmulResult(ClmulResult));
+  clmul #(WIDTH) clm(.X, .Y, .ClmulResult);
  
-  bitreverse  #(WIDTH) brClmulResult(.A(ClmulResult), .RevA(RevClmulResult));
+  bitreverse  #(WIDTH) brClmulResult(ClmulResult, RevClmulResult);

  mux2 #(WIDTH) zbcresultmux(ClmulResult, RevClmulResult, Funct3[1], ZBCResult);
 endmodule
--- a/src/ieu/controller.sv
+++ b/src/ieu/controller.sv
@ -125,12 +125,12 @@ module controller(
  logic        IntDivM;                        // Integer divide instruction
  logic [1:0]  BSelectD;                       // One-Hot encoding if it's ZBA_ZBB_ZBC_ZBS instruction in decode stage
  logic [2:0]  ZBBSelectD;                     // ZBB Mux Select Signal
-  logic        BComparatorSignedE;             // Indicates if max, min (signed comarison) instruction in Execute Stage
  logic        IFunctD, RFunctD, MFunctD;      // Detect I, R, and M-type RV32IM/Rv64IM instructions
  logic        LFunctD, SFunctD, BFunctD;      // Detect load, store, branch instructions
  logic        JFunctD;                        // detect jalr instruction
  logic        FenceM;                         // Fence.I or sfence.VMA instruction in memory stage
  logic [2:0]  ALUSelectD;                     // ALU Output selection mux control
+  logic        IWValidFunct3D;                 // Detects if Funct3 is valid for IW instructions

  // Extract fields
  assign OpD = InstrD[6:0];
@ -161,6 +161,7 @@ module controller(
                              ((`XLEN == 64) & (Funct3D == 3'b011));
    assign BFunctD          = (Funct3D[2:1] != 2'b01); // legal branches
    assign JFunctD          = (Funct3D == 3'b000);
+    assign IWValidFunct3D   = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b101;
  end else begin:legalcheck2
    assign IFunctD = 1; // Don't bother to separate out shift decoding
    assign RFunctD = ~Funct7D[0]; // Not a multiply
@ -168,7 +169,8 @@ module controller(
    assign LFunctD = 1; // don't bother to check Funct3 for loads
    assign SFunctD = 1; // don't bother to check Funct3 for stores
    assign BFunctD = 1; // don't bother to check Funct3 for branches
-    assign JFunctD = 1; // don't bother to check Funct3 for jumps    
+    assign JFunctD = 1; // don't bother to check Funct3 for jumps
+    assign IWValidFunct3D = 1;
  end

  // Main Instruction Decoder
@ -187,7 +189,7 @@ module controller(
      7'b0010011: if (IFunctD)    
                      ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_0_0_0_0_0_00_0; // I-type ALU
      7'b0010111:     ControlsD = `CTRLW'b1_100_11_00_000_0_0_0_0_0_0_0_0_0_00_0; // auipc
-      7'b0011011: if (IFunctD & `XLEN == 64)
+      7'b0011011: if (IFunctD & IWValidFunct3D & `XLEN == 64)
                      ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_1_0_0_0_0_00_0; // IW-type ALU for RV64i
      7'b0100011: if (SFunctD) 
                      ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_0; // stores
@ -254,7 +256,7 @@ module controller(

    bmuctrl bmuctrl(.clk, .reset, .StallD, .FlushD, .InstrD, .ALUOpD, .BSelectD, .ZBBSelectD, 
      .BRegWriteD, .BALUSrcBD, .BW64D, .BSubArithD, .IllegalBitmanipInstrD, .StallE, .FlushE, 
-      .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BComparatorSignedE, .BALUControlE);
+      .ALUSelectD, .BSelectE, .ZBBSelectE, .BRegWriteE, .BALUControlE);
    if (`ZBA_SUPPORTED) begin
      // ALU Decoding is more comprehensive when ZBA is supported. slt and slti conflicts with sh1add, sh1add.uw
      assign sltD = (Funct3D == 3'b010 & (~(Funct7D[4]) | ~OpD[5])) ;
@ -280,7 +282,6 @@ module controller(
    assign BSelectE = 2'b00;
    assign BSelectD = 2'b00;
    assign ZBBSelectE = 3'b000;
-    assign BComparatorSignedE = 1'b0;
    assign BALUControlE = 3'b0;
  end

@ -308,8 +309,7 @@ module controller(
  // Branch Logic
  //  The comparator handles both signed and unsigned branches using BranchSignedE
  //  Hence, only eq and lt flags are needed
-  //  We also want comparator to handle signed comparison on a max/min bitmanip instruction
-  assign BranchSignedE = (~(Funct3E[2:1] == 2'b11) & BranchE) | BComparatorSignedE;
+  assign BranchSignedE = (~(Funct3E[2:1] == 2'b11) & BranchE);
  assign {eqE, ltE} = FlagsE;
  mux2 #(1) branchflagmux(eqE, ltE, Funct3E[2], BranchFlagE);
  assign BranchTakenE = BranchFlagE ^ Funct3E[0];
--- a/src/ieu/datapath.sv
+++ b/src/ieu/datapath.sv
@ -114,7 +114,7 @@ module datapath (
  comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
  mux2  #(`XLEN)  srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
  mux2  #(`XLEN)  srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE);
-  alu   #(`XLEN)  alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, FlagsE, BALUControlE, ALUResultE, IEUAdrE);
+  alu   #(`XLEN)  alu(SrcAE, SrcBE, W64E, SubArithE, ALUSelectE, BSelectE, ZBBSelectE, Funct3E, BALUControlE, ALUResultE, IEUAdrE);
  mux2 #(`XLEN)   altresultmux(ImmExtE, PCLinkE, JumpE, AltResultE);
  mux2 #(`XLEN)   ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE);

--- a/src/privileged/csr.sv
+++ b/src/privileged/csr.sv
@ -55,7 +55,7 @@ module csr #(parameter
  input  logic [4:0]       SetFflagsM,                // Set floating point flag bits in FCSR
  input  logic [1:0]       NextPrivilegeModeM,        // STATUS bits updated based on next privilege mode
  input  logic [1:0]       PrivilegeModeW,            // current privilege mode
-  input  logic [`LOG_XLEN-1:0] CauseM,                // Trap cause
+  input  logic [3:0]       CauseM,                    // Trap cause
  input  logic             SelHPTW,                   // hardware page table walker active, so base endianness on supervisor mode
  // inputs for performance counters
  input  logic             LoadStallD,
@ -79,7 +79,7 @@ module csr #(parameter
  // outputs from CSRs
  output logic [1:0]       STATUS_MPP,
  output logic             STATUS_SPP, STATUS_TSR, STATUS_TVM,
-  output logic [`XLEN-1:0] MEDELEG_REGW, 
+  output logic [15:0] MEDELEG_REGW, 
  output logic [`XLEN-1:0] SATP_REGW,
  output logic [11:0]      MIP_REGW, MIE_REGW, MIDELEG_REGW,
  output logic             STATUS_MIE, STATUS_SIE,
@ -106,8 +106,10 @@ module csr #(parameter
  logic [31:0]             MCOUNTINHIBIT_REGW, MCOUNTEREN_REGW, SCOUNTEREN_REGW;
  logic                    WriteMSTATUSM, WriteMSTATUSHM, WriteSSTATUSM;
  logic                    CSRMWriteM, CSRSWriteM, CSRUWriteM;
+  logic                    UngatedCSRMWriteM;
  logic                    WriteFRMM, WriteFFLAGSM;
-  logic [`XLEN-1:0]        UnalignedNextEPCM, NextEPCM, NextCauseM, NextMtvalM;
+  logic [`XLEN-1:0]        UnalignedNextEPCM, NextEPCM, NextMtvalM;
+  logic [4:0]              NextCauseM;
  logic [11:0]             CSRAdrM;
  logic                    IllegalCSRCAccessM, IllegalCSRMAccessM, IllegalCSRSAccessM, IllegalCSRUAccessM;
  logic                    InsufficientCSRPrivilegeM;
@ -153,7 +155,7 @@ module csr #(parameter
    logic VectoredM;
    logic [`XLEN-1:0] TVecPlusCauseM;
    assign VectoredM = InterruptM & (TVecM[1:0] == 2'b01);
-    assign TVecPlusCauseM = {TVecAlignedM[`XLEN-1:6], CauseM[3:0], 2'b00}; // 64-byte alignment allows concatenation rather than addition
+    assign TVecPlusCauseM = {TVecAlignedM[`XLEN-1:6], CauseM, 2'b00}; // 64-byte alignment allows concatenation rather than addition
    mux2 #(`XLEN) trapvecmux(TVecAlignedM, TVecPlusCauseM, VectoredM, TrapVectorM);
  end else 
    assign TrapVectorM = TVecAlignedM;
@ -196,11 +198,12 @@ module csr #(parameter
  assign CSRAdrM = InstrM[31:20];
  assign UnalignedNextEPCM = TrapM ? ((wfiM & IntPendingM) ? PCM+4 : PCM) : CSRWriteValM;
  assign NextEPCM = `C_SUPPORTED ? {UnalignedNextEPCM[`XLEN-1:1], 1'b0} : {UnalignedNextEPCM[`XLEN-1:2], 2'b00}; // 3.1.15 alignment
-  assign NextCauseM = TrapM ? {InterruptM, {(`XLEN-`LOG_XLEN-1){1'b0}}, CauseM}: CSRWriteValM;
+  assign NextCauseM = TrapM ? {InterruptM, CauseM}: {CSRWriteValM[`XLEN-1], CSRWriteValM[3:0]};
  assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM;
-  assign CSRMWriteM = CSRWriteM & (PrivilegeModeW == `M_MODE);
-  assign CSRSWriteM = CSRWriteM & (|PrivilegeModeW);
-  assign CSRUWriteM = CSRWriteM;  
+  assign UngatedCSRMWriteM = CSRWriteM & (PrivilegeModeW == `M_MODE);
+  assign CSRMWriteM = UngatedCSRMWriteM & InstrValidNotFlushedM;
+  assign CSRSWriteM = CSRWriteM & (|PrivilegeModeW)  & InstrValidNotFlushedM;
+  assign CSRUWriteM = CSRWriteM  & InstrValidNotFlushedM;
  assign MTrapM = TrapM & (NextPrivilegeModeM == `M_MODE);
  assign STrapM = TrapM & (NextPrivilegeModeM == `S_MODE) & `S_SUPPORTED;

@ -208,7 +211,7 @@ module csr #(parameter
  // CSRs
  ///////////////////////////////////////////

-  csri   csri(.clk, .reset, .InstrValidNotFlushedM,  
+  csri   csri(.clk, .reset,  
    .CSRMWriteM, .CSRSWriteM, .CSRWriteValM, .CSRAdrM, 
    .MExtInt, .SExtInt, .MTimerInt, .STimerInt, .MSwInt,
    .MIDELEG_REGW, .MIP_REGW, .MIE_REGW, .MIP_REGW_writeable);
@ -222,8 +225,8 @@ module csr #(parameter
    .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TVM,
    .STATUS_FS, .BigEndianM);

-  csrm  csrm(.clk, .reset, .InstrValidNotFlushedM, 
-    .CSRMWriteM, .MTrapM, .CSRAdrM,
+  csrm  csrm(.clk, .reset, 
+    .UngatedCSRMWriteM, .CSRMWriteM, .MTrapM, .CSRAdrM,
    .NextEPCM, .NextCauseM, .NextMtvalM, .MSTATUS_REGW, .MSTATUSH_REGW,
    .CSRWriteValM, .CSRMReadValM, .MTVEC_REGW,
    .MEPC_REGW, .MCOUNTEREN_REGW, .MCOUNTINHIBIT_REGW, 
@ -233,7 +236,7 @@ module csr #(parameter


  if (`S_SUPPORTED) begin:csrs
-    csrs  csrs(.clk, .reset,  .InstrValidNotFlushedM,
+    csrs  csrs(.clk, .reset,
      .CSRSWriteM, .STrapM, .CSRAdrM,
      .NextEPCM, .NextCauseM, .NextMtvalM, .SSTATUS_REGW, 
      .STATUS_TVM, .MCOUNTEREN_TM(MCOUNTEREN_REGW[1]),
--- a/src/privileged/csri.sv
+++ b/src/privileged/csri.sv
@ -35,7 +35,6 @@ module csri #(parameter
  SIE = 12'h104,
  SIP = 12'h144) (
  input  logic              clk, reset, 
-  input  logic              InstrValidNotFlushedM,
  input  logic              CSRMWriteM, CSRSWriteM,
  input  logic [`XLEN-1:0]  CSRWriteValM,
  input  logic [11:0]       CSRAdrM,
@ -50,10 +49,10 @@ module csri #(parameter
  logic                     STIP;

  // Interrupt Write Enables
-  assign WriteMIPM = CSRMWriteM & (CSRAdrM == MIP) & InstrValidNotFlushedM;
-  assign WriteMIEM = CSRMWriteM & (CSRAdrM == MIE) & InstrValidNotFlushedM;
-  assign WriteSIPM = CSRSWriteM & (CSRAdrM == SIP) & InstrValidNotFlushedM;
-  assign WriteSIEM = CSRSWriteM & (CSRAdrM == SIE) & InstrValidNotFlushedM;
+  assign WriteMIPM = CSRMWriteM & (CSRAdrM == MIP);
+  assign WriteMIEM = CSRMWriteM & (CSRAdrM == MIE);
+  assign WriteSIPM = CSRSWriteM & (CSRAdrM == SIP);
+  assign WriteSIEM = CSRSWriteM & (CSRAdrM == SIE);

  // Interrupt Pending and Enable Registers
  // MEIP, MTIP, MSIP are read-only
--- a/src/privileged/csrm.sv
+++ b/src/privileged/csrm.sv
@ -69,20 +69,20 @@ module csrm #(parameter
  DSCRATCH1 = 12'h7B3,
  // Constants
  ZERO = {(`XLEN){1'b0}},
-  MEDELEG_MASK = ~(ZERO | `XLEN'b1 << 11),
+  MEDELEG_MASK = 16'hB3FF,
  MIDELEG_MASK = 12'h222 // we choose to not make machine interrupts delegable
 ) (
  input  logic                    clk, reset, 
-  input  logic                    InstrValidNotFlushedM, 
-  input  logic                    CSRMWriteM, MTrapM,
+  input  logic                    UngatedCSRMWriteM, CSRMWriteM, MTrapM,
  input  logic [11:0]             CSRAdrM,
-  input  logic [`XLEN-1:0]        NextEPCM, NextCauseM, NextMtvalM, MSTATUS_REGW, MSTATUSH_REGW,
+  input  logic [`XLEN-1:0]        NextEPCM, NextMtvalM, MSTATUS_REGW, MSTATUSH_REGW,
+  input  logic [4:0]              NextCauseM,
  input  logic [`XLEN-1:0]        CSRWriteValM,
  input  logic [11:0]             MIP_REGW, MIE_REGW,
  output logic [`XLEN-1:0]        CSRMReadValM, MTVEC_REGW,
  output logic [`XLEN-1:0]        MEPC_REGW,    
  output logic [31:0]             MCOUNTEREN_REGW, MCOUNTINHIBIT_REGW, 
-  output logic [`XLEN-1:0]        MEDELEG_REGW,
+  output logic [15:0]             MEDELEG_REGW,
  output logic [11:0]             MIDELEG_REGW,
  output var logic [7:0]          PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0],
  output var logic [`PA_BITS-3:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0],
@ -91,8 +91,7 @@ module csrm #(parameter
 );

  logic [`XLEN-1:0]               MISA_REGW, MHARTID_REGW;
-  logic [`XLEN-1:0]               MSCRATCH_REGW;
-  logic [`XLEN-1:0]               MCAUSE_REGW, MTVAL_REGW;
+  logic [`XLEN-1:0]               MSCRATCH_REGW, MTVAL_REGW, MCAUSE_REGW;
  logic                           WriteMTVECM, WriteMEDELEGM, WriteMIDELEGM;
  logic                           WriteMSCRATCHM, WriteMEPCM, WriteMCAUSEM, WriteMTVALM;
  logic                           WriteMCOUNTERENM, WriteMCOUNTINHIBITM;
@ -112,13 +111,13 @@ module csrm #(parameter
      else
        assign ADDRLocked[i] = PMPCFG_ARRAY_REGW[i][7] | (PMPCFG_ARRAY_REGW[i+1][7] & PMPCFG_ARRAY_REGW[i+1][4:3] == 2'b01);
      
-      assign WritePMPADDRM[i] = (CSRMWriteM & (CSRAdrM == (PMPADDR0+i))) & InstrValidNotFlushedM & ~ADDRLocked[i];
+      assign WritePMPADDRM[i] = (CSRMWriteM & (CSRAdrM == (PMPADDR0+i))) & ~ADDRLocked[i];
      flopenr #(`PA_BITS-2) PMPADDRreg(clk, reset, WritePMPADDRM[i], CSRWriteValM[`PA_BITS-3:0], PMPADDR_ARRAY_REGW[i]);
      if (`XLEN==64) begin
-        assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+2*(i/8)))) & InstrValidNotFlushedM & ~CFGLocked[i];
+        assign WritePMPCFGM[i] = (CSRMWriteM & (CSRAdrM == (PMPCFG0+2*(i/8)))) & ~CFGLocked[i];
        flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%8)*8+7:(i%8)*8], PMPCFG_ARRAY_REGW[i]);
      end else begin
-        assign WritePMPCFGM[i]  = (CSRMWriteM & (CSRAdrM == (PMPCFG0+i/4))) & InstrValidNotFlushedM & ~CFGLocked[i];
+        assign WritePMPCFGM[i]  = (CSRMWriteM & (CSRAdrM == (PMPCFG0+i/4))) & ~CFGLocked[i];
        flopenr #(8) PMPCFGreg(clk, reset, WritePMPCFGM[i], CSRWriteValM[(i%4)*8+7:(i%4)*8], PMPCFG_ARRAY_REGW[i]);
      end
    end
@ -133,30 +132,30 @@ module csrm #(parameter
  assign MHARTID_REGW = 0;

  // Write machine Mode CSRs 
-  assign WriteMSTATUSM = CSRMWriteM & (CSRAdrM == MSTATUS) & InstrValidNotFlushedM;
-  assign WriteMSTATUSHM = CSRMWriteM & (CSRAdrM == MSTATUSH) & InstrValidNotFlushedM & (`XLEN==32);
-  assign WriteMTVECM = CSRMWriteM & (CSRAdrM == MTVEC) & InstrValidNotFlushedM;
-  assign WriteMEDELEGM = CSRMWriteM & (CSRAdrM == MEDELEG) & InstrValidNotFlushedM;
-  assign WriteMIDELEGM = CSRMWriteM & (CSRAdrM == MIDELEG) & InstrValidNotFlushedM;
-  assign WriteMSCRATCHM = CSRMWriteM & (CSRAdrM == MSCRATCH) & InstrValidNotFlushedM;
-  assign WriteMEPCM = MTrapM | (CSRMWriteM & (CSRAdrM == MEPC)) & InstrValidNotFlushedM;
-  assign WriteMCAUSEM = MTrapM | (CSRMWriteM & (CSRAdrM == MCAUSE)) & InstrValidNotFlushedM;
-  assign WriteMTVALM = MTrapM | (CSRMWriteM & (CSRAdrM == MTVAL)) & InstrValidNotFlushedM;
-  assign WriteMCOUNTERENM = CSRMWriteM & (CSRAdrM == MCOUNTEREN) & InstrValidNotFlushedM;
-  assign WriteMCOUNTINHIBITM = CSRMWriteM & (CSRAdrM == MCOUNTINHIBIT) & InstrValidNotFlushedM;
+  assign WriteMSTATUSM = CSRMWriteM & (CSRAdrM == MSTATUS);
+  assign WriteMSTATUSHM = CSRMWriteM & (CSRAdrM == MSTATUSH)& (`XLEN==32);
+  assign WriteMTVECM = CSRMWriteM & (CSRAdrM == MTVEC);
+  assign WriteMEDELEGM = CSRMWriteM & (CSRAdrM == MEDELEG);
+  assign WriteMIDELEGM = CSRMWriteM & (CSRAdrM == MIDELEG);
+  assign WriteMSCRATCHM = CSRMWriteM & (CSRAdrM == MSCRATCH);
+  assign WriteMEPCM = MTrapM | (CSRMWriteM & (CSRAdrM == MEPC));
+  assign WriteMCAUSEM = MTrapM | (CSRMWriteM & (CSRAdrM == MCAUSE));
+  assign WriteMTVALM = MTrapM | (CSRMWriteM & (CSRAdrM == MTVAL));
+  assign WriteMCOUNTERENM = CSRMWriteM & (CSRAdrM == MCOUNTEREN);
+  assign WriteMCOUNTINHIBITM = CSRMWriteM & (CSRAdrM == MCOUNTINHIBIT);

-  assign IllegalCSRMWriteReadonlyM = CSRMWriteM & (CSRAdrM == MVENDORID | CSRAdrM == MARCHID | CSRAdrM == MIMPID | CSRAdrM == MHARTID);
+  assign IllegalCSRMWriteReadonlyM = UngatedCSRMWriteM & (CSRAdrM == MVENDORID | CSRAdrM == MARCHID | CSRAdrM == MIMPID | CSRAdrM == MHARTID);

  // CSRs
  flopenr #(`XLEN) MTVECreg(clk, reset, WriteMTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, MTVEC_REGW); 
  if (`S_SUPPORTED) begin:deleg // DELEG registers should exist
-    flopenr #(`XLEN) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM & MEDELEG_MASK, MEDELEG_REGW);
-    flopenr #(12)    MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM[11:0] & MIDELEG_MASK, MIDELEG_REGW);
+    flopenr #(16) MEDELEGreg(clk, reset, WriteMEDELEGM, CSRWriteValM[15:0] & MEDELEG_MASK, MEDELEG_REGW);
+    flopenr #(12) MIDELEGreg(clk, reset, WriteMIDELEGM, CSRWriteValM[11:0] & MIDELEG_MASK, MIDELEG_REGW);
  end else assign {MEDELEG_REGW, MIDELEG_REGW} = 0;

  flopenr #(`XLEN) MSCRATCHreg(clk, reset, WriteMSCRATCHM, CSRWriteValM, MSCRATCH_REGW);
  flopenr #(`XLEN) MEPCreg(clk, reset, WriteMEPCM, NextEPCM, MEPC_REGW); 
-  flopenr #(`XLEN) MCAUSEreg(clk, reset, WriteMCAUSEM, NextCauseM, MCAUSE_REGW);
+  flopenr #(`XLEN) MCAUSEreg(clk, reset, WriteMCAUSEM, {NextCauseM[4], {(`XLEN-5){1'b0}}, NextCauseM[3:0]}, MCAUSE_REGW);
  if(`QEMU) assign MTVAL_REGW = `XLEN'b0; // MTVAL tied to 0 in QEMU configuration
  else flopenr #(`XLEN) MTVALreg(clk, reset, WriteMTVALM, NextMtvalM, MTVAL_REGW);
  flopenr #(32)   MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], MCOUNTINHIBIT_REGW);
@ -192,7 +191,7 @@ module csrm #(parameter
      MSTATUS:   CSRMReadValM = MSTATUS_REGW;
      MSTATUSH:  CSRMReadValM = MSTATUSH_REGW; 
      MTVEC:     CSRMReadValM = MTVEC_REGW;
-      MEDELEG:   CSRMReadValM = MEDELEG_REGW;
+      MEDELEG:   CSRMReadValM = {{(`XLEN-16){1'b0}}, MEDELEG_REGW};
      MIDELEG:   CSRMReadValM = {{(`XLEN-12){1'b0}}, MIDELEG_REGW};
      MIP:       CSRMReadValM = {{(`XLEN-12){1'b0}}, MIP_REGW};
      MIE:       CSRMReadValM = {{(`XLEN-12){1'b0}}, MIE_REGW};
--- a/src/privileged/csrs.sv
+++ b/src/privileged/csrs.sv
@ -45,10 +45,10 @@ module csrs #(parameter
  STIMECMPH = 12'h15D,
  SATP = 12'h180) (
  input  logic             clk, reset, 
-  input  logic             InstrValidNotFlushedM, 
  input  logic             CSRSWriteM, STrapM,
  input  logic [11:0]      CSRAdrM,
-  input  logic [`XLEN-1:0] NextEPCM, NextCauseM, NextMtvalM, SSTATUS_REGW, 
+  input  logic [`XLEN-1:0] NextEPCM, NextMtvalM, SSTATUS_REGW, 
+  input  logic [4:0]       NextCauseM,
  input  logic             STATUS_TVM,
  input  logic             MCOUNTEREN_TM, // TM bit (1) of MCOUNTEREN; cause illegal instruction when trying to access STIMECMP if clear
  input  logic [`XLEN-1:0] CSRWriteValM,
@ -72,28 +72,26 @@ module csrs #(parameter
  logic                    WriteSSCRATCHM, WriteSEPCM;
  logic                    WriteSCAUSEM, WriteSTVALM, WriteSATPM, WriteSCOUNTERENM;
  logic                    WriteSTIMECMPM, WriteSTIMECMPHM;
-  logic [`XLEN-1:0]        SSCRATCH_REGW, STVAL_REGW;
-  logic [`XLEN-1:0]        SCAUSE_REGW;      
+  logic [`XLEN-1:0]        SSCRATCH_REGW, STVAL_REGW, SCAUSE_REGW;
  logic [63:0]             STIMECMP_REGW;
  
  // write enables
-  // *** can InstrValidNotFlushed be factored out of all these writes into CSRWriteM?
-  assign WriteSSTATUSM = CSRSWriteM & (CSRAdrM == SSTATUS)  & InstrValidNotFlushedM;
-  assign WriteSTVECM = CSRSWriteM & (CSRAdrM == STVEC) & InstrValidNotFlushedM;
-  assign WriteSSCRATCHM = CSRSWriteM & (CSRAdrM == SSCRATCH) & InstrValidNotFlushedM;
-  assign WriteSEPCM = STrapM | (CSRSWriteM & (CSRAdrM == SEPC)) & InstrValidNotFlushedM;
-  assign WriteSCAUSEM = STrapM | (CSRSWriteM & (CSRAdrM == SCAUSE)) & InstrValidNotFlushedM;
-  assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)) & InstrValidNotFlushedM;
-  assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == `M_MODE | ~STATUS_TVM) & InstrValidNotFlushedM;
-  assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN) & InstrValidNotFlushedM;
-  assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & InstrValidNotFlushedM;
-  assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & (`XLEN == 32) & InstrValidNotFlushedM;
+  assign WriteSSTATUSM = CSRSWriteM & (CSRAdrM == SSTATUS);
+  assign WriteSTVECM = CSRSWriteM & (CSRAdrM == STVEC);
+  assign WriteSSCRATCHM = CSRSWriteM & (CSRAdrM == SSCRATCH);
+  assign WriteSEPCM = STrapM | (CSRSWriteM & (CSRAdrM == SEPC));
+  assign WriteSCAUSEM = STrapM | (CSRSWriteM & (CSRAdrM == SCAUSE));
+  assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL));
+  assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == `M_MODE | ~STATUS_TVM);
+  assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN);
+  assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM);
+  assign WriteSTIMECMPHM = CSRSWriteM & (CSRAdrM == STIMECMPH) & (PrivilegeModeW == `M_MODE | MCOUNTEREN_TM) & (`XLEN == 32);

  // CSRs
  flopenr #(`XLEN) STVECreg(clk, reset, WriteSTVECM, {CSRWriteValM[`XLEN-1:2], 1'b0, CSRWriteValM[0]}, STVEC_REGW); 
  flopenr #(`XLEN) SSCRATCHreg(clk, reset, WriteSSCRATCHM, CSRWriteValM, SSCRATCH_REGW);
  flopenr #(`XLEN) SEPCreg(clk, reset, WriteSEPCM, NextEPCM, SEPC_REGW); 
-  flopenr #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, NextCauseM, SCAUSE_REGW);
+  flopenr #(`XLEN) SCAUSEreg(clk, reset, WriteSCAUSEM, {NextCauseM[4], {(`XLEN-5){1'b0}}, NextCauseM[3:0]}, SCAUSE_REGW);
  flopenr #(`XLEN) STVALreg(clk, reset, WriteSTVALM, NextMtvalM, STVAL_REGW);
  if (`VIRTMEM_SUPPORTED)
    flopenr #(`XLEN) SATPreg(clk, reset, WriteSATPM, CSRWriteValM, SATP_REGW);
--- a/src/privileged/csru.sv
+++ b/src/privileged/csru.sv
@ -51,9 +51,8 @@ module csru #(parameter
  logic                    SetOrWriteFFLAGSM;
  
  // Write enables
-  //assign WriteFCSRM = CSRUWriteM & (CSRAdrM == FCSR)  & InstrValidNotFlushedM;
-  assign WriteFRMM = (CSRUWriteM & (STATUS_FS != 2'b00) & (CSRAdrM == FRM | CSRAdrM == FCSR))  & InstrValidNotFlushedM;
-  assign WriteFFLAGSM = (CSRUWriteM & (STATUS_FS != 2'b00) & (CSRAdrM == FFLAGS | CSRAdrM == FCSR)) & InstrValidNotFlushedM;
+  assign WriteFRMM =    CSRUWriteM & (STATUS_FS != 2'b00) & (CSRAdrM == FRM | CSRAdrM == FCSR);
+  assign WriteFFLAGSM = CSRUWriteM & (STATUS_FS != 2'b00) & (CSRAdrM == FFLAGS | CSRAdrM == FCSR);

  // Write Values
  assign NextFRMM = (CSRAdrM == FCSR) ? CSRWriteValM[7:5] : CSRWriteValM[2:0];
--- a/src/privileged/privileged.sv
+++ b/src/privileged/privileged.sv
@ -96,8 +96,8 @@ module privileged (
  output logic             WFIStallM                                       // Stall in Memory stage for WFI until interrupt or timeout
 );                                                                         
                                                                           
-  logic [`LOG_XLEN-1:0]    CauseM;                                         // trap cause
-  logic [`XLEN-1:0]        MEDELEG_REGW;                                   // exception delegation CSR
+  logic [3:0]              CauseM;                                         // trap cause
+  logic [15:0]             MEDELEG_REGW;                                   // exception delegation CSR
  logic [11:0]             MIDELEG_REGW;                                   // interrupt delegation CSR
  logic                    sretM, mretM;                                   // supervisor / machine return instruction
  logic                    IllegalCSRAccessM;                              // Illegal access to CSR
--- a/src/privileged/trap.sv
+++ b/src/privileged/trap.sv
@ -38,7 +38,7 @@ module trap (
  input  logic                 wfiM,                                            // wait for interrupt instruction
  input  logic [1:0]           PrivilegeModeW,                                  // current privilege mode
  input  logic [11:0]          MIP_REGW, MIE_REGW, MIDELEG_REGW,                // interrupt pending, enabled, and delegate CSRs
-  input  logic [`XLEN-1:0]     MEDELEG_REGW,                                    // exception delegation SR
+  input  logic [15:0]          MEDELEG_REGW,                                    // exception delegation SR
  input  logic                 STATUS_MIE, STATUS_SIE,                          // machine/supervisor interrupt enables
  input  logic                 InstrValidM,                                     // current instruction is valid, not flushed
  input  logic                 CommittedM, CommittedF,                          // LSU/IFU has committed to a bus operation that can't be interrupted
@ -49,7 +49,7 @@ module trap (
  output logic                 IntPendingM,                                     // Interrupt is pending, might occur if enabled
  output logic                 DelegateM,                                       // Delegate trap to supervisor handler
  output logic                 WFIStallM,                                       // Stall due to WFI instruction
-  output logic [`LOG_XLEN-1:0] CauseM                                           // trap cause
+  output logic [3:0]           CauseM                                           // trap cause
 );

  logic                        MIntGlobalEnM, SIntGlobalEnM;                    // Global interupt enables
@ -72,7 +72,7 @@ module trap (
  assign EnabledIntsM = ({12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW);
  assign ValidIntsM = {12{~Committed}} & EnabledIntsM;
  assign InterruptM = (|ValidIntsM) & InstrValidM; // suppress interrupt if the memory system has partially processed a request.
-  assign DelegateM = `S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM]) & 
+  assign DelegateM = `S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM] : MEDELEG_REGW[CauseM]) & 
                     (PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE);
  assign WFIStallM = wfiM & ~IntPendingM;

@ -109,7 +109,7 @@ module trap (
    else if (IllegalInstrFaultM)       CauseM = 2;
    else if (InstrMisalignedFaultM)    CauseM = 0;
    else if (BreakpointFaultM)         CauseM = 3;
-    else if (EcallFaultM)              CauseM = {{(`LOG_XLEN-4){1'b0}}, {2'b10}, PrivilegeModeW};
+    else if (EcallFaultM)              CauseM = {2'b10, PrivilegeModeW};
    else if (LoadMisalignedFaultM)     CauseM = 4;
    else if (StoreAmoMisalignedFaultM) CauseM = 6;
    else if (LoadPageFaultM)           CauseM = 13;
--- a/testbench/testbench-fp.sv
+++ b/testbench/testbench-fp.sv
@ -1,7 +1,8 @@
-///////////////////////////////////////////
+<///////////////////////////////////////////
 //
 // Written: me@KatherineParry.com
 // Modified: 7/5/2022
+// Modified: 4/2/2023
 //
 // Purpose: Testbench for Testfloat
 // 
@ -32,75 +33,74 @@
 module testbenchfp;
  parameter TEST="none";

-  string      Tests[];        // list of tests to be run
-  logic [2:0] OpCtrl[];       // list of op controls
-  logic [2:0] Unit[];         // list of units being tested
-  logic WriteInt[];           // Is being written to integer resgiter
-  logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
-  logic [1:0] Fmt[];          // list of formats for the other units
-  
+  string                       Tests[];        // list of tests to be run
+  logic [2:0]                  OpCtrl[];       // list of op controls
+  logic [2:0]                  Unit[];         // list of units being tested
+  logic                        WriteInt[];           // Is being written to integer resgiter
+  logic [2:0]                  Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
+  logic [1:0]                  Fmt[];          // list of formats for the other units  

-  logic               clk=0;
-  logic [31:0]        TestNum=0;    // index for the test
-  logic [31:0]        OpCtrlNum=0;  // index for OpCtrl
-  logic [31:0]        errors=0;     // how many errors
-  logic [31:0]        VectorNum=0;  // index for test vector
-  logic [31:0]        FrmNum=0;     // index for rounding mode
-  logic [`FLEN*4+7:0] TestVectors[8388609:0];     // list of test vectors
+  logic                        clk=0;
+  logic [31:0]                 TestNum=0;    // index for the test
+  logic [31:0]                 OpCtrlNum=0;  // index for OpCtrl
+  logic [31:0]                 errors=0;     // how many errors
+  logic [31:0]                 VectorNum=0;  // index for test vector
+  logic [31:0]                 FrmNum=0;     // index for rounding mode
+  logic [`FLEN*4+7:0]          TestVectors[8388609:0];     // list of test vectors

-  logic [1:0]           FmtVal;          // value of the current Fmt
-  logic [2:0]           UnitVal, OpCtrlVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal
-  logic                 WriteIntVal;                // value of the current WriteInt
-  logic [`FLEN-1:0]     X, Y, Z;                    // inputs read from TestFloat
-  logic [`XLEN-1:0]     SrcA;                       // integer input
-  logic [`FLEN-1:0]	    Ans;                        // correct answer from TestFloat
-  logic [`FLEN-1:0]	    Res;                        // result from other units
-  logic [4:0]	 	        AnsFlg;                     // correct flags read from testfloat
-  logic [4:0]	 	        ResFlg, Flg;                // Result flags
-  logic	[`FMTBITS-1:0]  ModFmt;                     // format - 10 = half, 00 = single, 01 = double, 11 = quad
-  logic [`FLEN-1:0]     FpRes, FpCmpRes;            // Results from each unit
-  logic [`XLEN-1:0]     IntRes, CmpRes;             // Results from each unit
-  logic [4:0]           FmaFlg, CvtFlg, DivFlg, CmpFlg;  // Outputed flags
-  logic                 AnsNaN, ResNaN, NaNGood;
-  logic                 Xs, Ys, Zs;                 // sign of the inputs
-  logic [`NE-1:0]       Xe, Ye, Ze;                 // exponent of the inputs
-  logic [`NF:0]         Xm, Ym, Zm;                 // mantissas of the inputs
-  logic                 XNaN, YNaN, ZNaN;           // is the input NaN
-  logic                 XSNaN, YSNaN, ZSNaN;        // is the input a signaling NaN
-  logic                 XSubnorm, ZSubnorm;           // is the input denormalized
-  logic                 XInf, YInf, ZInf;           // is the input infinity
-  logic                 XZero, YZero, ZZero;        // is the input zero
-  logic                 XExpMax, YExpMax, ZExpMax;  // is the input's exponent all ones  
-  logic  [`CVTLEN-1:0]  CvtLzcInE;                  // input to the Leading Zero Counter (priority encoder)
-  logic                 IntZero;
-  logic                 CvtResSgnE;
-  logic [`NE:0]         CvtCalcExpE;    // the calculated expoent
-	logic [`LOGCVTLEN-1:0] CvtShiftAmtE;  // how much to shift by
-	logic [`DIVb:0]       Quot;
-  logic                 CvtResSubnormUfE;
-  logic                 DivStart, FDivBusyE, OldFDivBusyE;
-  logic                 reset = 1'b0;
-  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
-  logic [`DURLEN-1:0]   Dur;
+  logic [1:0]                  FmtVal;          // value of the current Fmt
+  logic [2:0]                  UnitVal, OpCtrlVal, FrmVal; // value of the currnet Unit/OpCtrl/FrmVal
+  logic                        WriteIntVal;                // value of the current WriteInt
+  logic [`FLEN-1:0]            X, Y, Z;                    // inputs read from TestFloat
+  logic [`XLEN-1:0]            SrcA;                       // integer input
+  logic [`FLEN-1:0]	       Ans;                        // correct answer from TestFloat
+  logic [`FLEN-1:0]	       Res;                        // result from other units
+  logic [4:0]	 	       AnsFlg;                     // correct flags read from testfloat
+  logic [4:0]	 	       ResFlg, Flg;                // Result flags
+  logic	[`FMTBITS-1:0]         ModFmt;                     // format - 10 = half, 00 = single, 01 = double, 11 = quad
+  logic [`FLEN-1:0]            FpRes, FpCmpRes;            // Results from each unit
+  logic [`XLEN-1:0]            IntRes, CmpRes;             // Results from each unit
+  logic [4:0]                  FmaFlg, CvtFlg, DivFlg, CmpFlg;  // Outputed flags
+  logic                        AnsNaN, ResNaN, NaNGood;
+  logic                        Xs, Ys, Zs;                 // sign of the inputs
+  logic [`NE-1:0]              Xe, Ye, Ze;                 // exponent of the inputs
+  logic [`NF:0]                Xm, Ym, Zm;                 // mantissas of the inputs
+  logic                        XNaN, YNaN, ZNaN;           // is the input NaN
+  logic                        XSNaN, YSNaN, ZSNaN;        // is the input a signaling NaN
+  logic                        XSubnorm, ZSubnorm;           // is the input denormalized
+  logic                        XInf, YInf, ZInf;           // is the input infinity
+  logic                        XZero, YZero, ZZero;        // is the input zero
+  logic                        XExpMax, YExpMax, ZExpMax;  // is the input's exponent all ones  
+  logic  [`CVTLEN-1:0]         CvtLzcInE;                  // input to the Leading Zero Counter (priority encoder)
+  logic                        IntZero;
+  logic                        CvtResSgnE;
+  logic [`NE:0]                CvtCalcExpE;    // the calculated expoent
+  logic [`LOGCVTLEN-1:0]       CvtShiftAmtE;  // how much to shift by
+  logic [`DIVb:0]              Quot;
+  logic                        CvtResSubnormUfE;
+  logic                        DivStart, FDivBusyE, OldFDivBusyE;
+  logic                        reset = 1'b0;
+  logic [$clog2(`NF+2)-1:0]    XZeroCnt, YZeroCnt;
+  logic [`DURLEN-1:0]          Dur;

  // in-between FMA signals
-  logic                 Mult;
-  logic                 Ss;
-  logic [`NE+1:0]	      Pe;
-  logic [`NE+1:0]	      Se;
-  logic 				        ASticky;
-  logic 					      KillProd; 
-  logic [$clog2(3*`NF+5)-1:0]	SCnt;
-  logic [3*`NF+3:0]	    Sm;       
-  logic 			          InvA;
-  logic 			          NegSum;
-  logic 			          As;
-  logic 			          Ps;
-  logic                 DivSticky;
-  logic                 DivDone;
-  logic                 DivNegSticky;
-  logic [`NE+1:0]       DivCalcExp;
-  logic                 divsqrtop;
+  logic                        Mult;
+  logic                        Ss;
+  logic [`NE+1:0]	       Pe;
+  logic [`NE+1:0]	       Se;
+  logic 		       ASticky;
+  logic 		       KillProd; 
+  logic [$clog2(3*`NF+5)-1:0]  SCnt;
+  logic [3*`NF+3:0]	       Sm;       
+  logic 		       InvA;
+  logic 		       NegSum;
+  logic 		       As;
+  logic 		       Ps;
+  logic                        DivSticky;
+  logic                        DivDone;
+  logic                        DivNegSticky;
+  logic [`NE+1:0]              DivCalcExp;
+  logic                        divsqrtop;


  ///////////////////////////////////////////////////////////////////////////////////////////////
@ -126,28 +126,28 @@ module testbenchfp;
    $display("TEST is %s", TEST);
    if (`Q_SUPPORTED) begin // if Quad percision is supported
      if (TEST === "cvtint"| TEST === "all") begin  // if testing integer conversion
-                                              // add the 128-bit cvtint tests to the to-be-tested list
-                                              Tests = {Tests, f128rv32cvtint};
-                                              // add the op-codes for these tests to the op-code list
-                                              OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
-                                              WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
-                                              // add what unit is used and the fmt to their lists (one for each test)
-                                              for(int i = 0; i<20; i++) begin
-                                                Unit = {Unit, `CVTINTUNIT};
-                                                Fmt = {Fmt, 2'b11};
-                                              end
-                                              if (`XLEN == 64) begin // if 64-bit integers are supported add their conversions
-                                                Tests = {Tests, f128rv64cvtint};
-                                              // add the op-codes for these tests to the op-code list
-                                                OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
-                                                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
-                                              // add what unit is used and the fmt to their lists (one for each test)
-                                              for(int i = 0; i<20; i++) begin
-                                                Unit = {Unit, `CVTINTUNIT};
-                                                Fmt = {Fmt, 2'b11};
-                                              end
-                                              end
-                                            end
+         // add the 128-bit cvtint tests to the to-be-tested list
+         Tests = {Tests, f128rv32cvtint};
+         // add the op-codes for these tests to the op-code list
+         OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+         WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+         // add what unit is used and the fmt to their lists (one for each test)
+         for(int i = 0; i<20; i++) begin
+            Unit = {Unit, `CVTINTUNIT};
+            Fmt = {Fmt, 2'b11};
+         end
+         if (`XLEN == 64) begin // if 64-bit integers are supported add their conversions
+            Tests = {Tests, f128rv64cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b11};
+            end
+         end
+      end
      if (TEST === "cvtfp" | TEST === "all") begin  // if the floating-point conversions are being tested
        if(`D_SUPPORTED) begin // if double precision is supported
          // add the 128 <-> 64 bit conversions to the to-be-tested list
@ -270,27 +270,27 @@ module testbenchfp;
    end
    if (`D_SUPPORTED) begin // if double precision is supported
      if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
-                                              Tests = {Tests, f64rv32cvtint};
-                                              // add the op-codes for these tests to the op-code list
-                                              OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
-                                              WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
-                                              // add what unit is used and the fmt to their lists (one for each test)
-                                              for(int i = 0; i<20; i++) begin
-                                                Unit = {Unit, `CVTINTUNIT};
-                                                Fmt = {Fmt, 2'b01};
-                                              end
-                                              if (`XLEN == 64) begin // if 64-bit integers are being supported
-                                                Tests = {Tests, f64rv64cvtint};
-                                                // add the op-codes for these tests to the op-code list
-                                                OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
-                                                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
-                                                // add what unit is used and the fmt to their lists (one for each test)
-                                                for(int i = 0; i<20; i++) begin
-                                                  Unit = {Unit, `CVTINTUNIT};
-                                                  Fmt = {Fmt, 2'b01};
-                                                end
-                                              end
-                                            end
+         Tests = {Tests, f64rv32cvtint};
+         // add the op-codes for these tests to the op-code list
+         OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+         WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+         // add what unit is used and the fmt to their lists (one for each test)
+         for(int i = 0; i<20; i++) begin
+            Unit = {Unit, `CVTINTUNIT};
+            Fmt = {Fmt, 2'b01};
+         end
+         if (`XLEN == 64) begin // if 64-bit integers are being supported
+            Tests = {Tests, f64rv64cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b01};
+            end
+         end
+      end
      if (TEST === "cvtfp" | TEST === "all") begin // if floating point conversions are being tested
        if(`F_SUPPORTED) begin // if single precision is supported
          // add the 64 <-> 32 bit conversions to the to-be-tested list
@ -397,27 +397,27 @@ module testbenchfp;
    end
    if (`F_SUPPORTED) begin // if single precision being supported
      if (TEST === "cvtint"| TEST === "all") begin // if integer conversion is being tested
-                                              Tests = {Tests, f32rv32cvtint};
-                                              // add the op-codes for these tests to the op-code list
-                                              OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
-                                              WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
-                                              // add what unit is used and the fmt to their lists (one for each test)
-                                              for(int i = 0; i<20; i++) begin
-                                                Unit = {Unit, `CVTINTUNIT};
-                                                Fmt = {Fmt, 2'b00};
-                                              end
-                                              if (`XLEN == 64) begin // if 64-bit integers are supported
-                                                Tests = {Tests, f32rv64cvtint};
-                                                // add the op-codes for these tests to the op-code list
-                                                OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
-                                                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
-                                                // add what unit is used and the fmt to their lists (one for each test)
-                                              for(int i = 0; i<20; i++) begin
-                                                Unit = {Unit, `CVTINTUNIT};
-                                                Fmt = {Fmt, 2'b00};
-                                              end
-                                              end
-                                            end
+         Tests = {Tests, f32rv32cvtint};
+         // add the op-codes for these tests to the op-code list
+         OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+         WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+         // add what unit is used and the fmt to their lists (one for each test)
+         for(int i = 0; i<20; i++) begin
+            Unit = {Unit, `CVTINTUNIT};
+            Fmt = {Fmt, 2'b00};
+         end
+         if (`XLEN == 64) begin // if 64-bit integers are supported
+            Tests = {Tests, f32rv64cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b00};
+            end
+         end
+      end
      if (TEST === "cvtfp" | TEST === "all") begin  // if floating point conversion is being tested
        if(`ZFH_SUPPORTED) begin 
          // add the 32 <-> 16 bit conversions to the to-be-tested list
@ -508,27 +508,27 @@ module testbenchfp;
    end
    if (`ZFH_SUPPORTED) begin // if half precision supported
      if (TEST === "cvtint"| TEST === "all") begin // if in conversions are being tested
-                                              Tests = {Tests, f16rv32cvtint};
-                                              // add the op-codes for these tests to the op-code list
-                                              OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
-                                              WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
-                                              // add what unit is used and the fmt to their lists (one for each test)
-                                              for(int i = 0; i<20; i++) begin
-                                                Unit = {Unit, `CVTINTUNIT};
-                                                Fmt = {Fmt, 2'b10};
-                                              end
-                                              if (`XLEN == 64) begin // if 64-bit integers are supported
-                                                Tests = {Tests, f16rv64cvtint};
-                                                // add the op-codes for these tests to the op-code list
-                                                OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
-                                                WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
-                                                // add what unit is used and the fmt to their lists (one for each test)
-                                                for(int i = 0; i<20; i++) begin
-                                                  Unit = {Unit, `CVTINTUNIT};
-                                                  Fmt = {Fmt, 2'b10};
-                                                end
-                                              end
-                                            end
+         Tests = {Tests, f16rv32cvtint};
+         // add the op-codes for these tests to the op-code list
+         OpCtrl = {OpCtrl, `FROM_UI_OPCTRL, `FROM_I_OPCTRL, `TO_UI_OPCTRL, `TO_I_OPCTRL};
+         WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+         // add what unit is used and the fmt to their lists (one for each test)
+         for(int i = 0; i<20; i++) begin
+            Unit = {Unit, `CVTINTUNIT};
+            Fmt = {Fmt, 2'b10};
+         end
+         if (`XLEN == 64) begin // if 64-bit integers are supported
+            Tests = {Tests, f16rv64cvtint};
+            // add the op-codes for these tests to the op-code list
+            OpCtrl = {OpCtrl, `FROM_UL_OPCTRL, `FROM_L_OPCTRL, `TO_UL_OPCTRL, `TO_L_OPCTRL};
+            WriteInt = {WriteInt, 1'b0, 1'b0, 1'b1, 1'b1};
+            // add what unit is used and the fmt to their lists (one for each test)
+            for(int i = 0; i<20; i++) begin
+               Unit = {Unit, `CVTINTUNIT};
+               Fmt = {Fmt, 2'b10};
+            end
+         end
+      end
      if (TEST === "cmp"   | TEST === "all") begin // if comparisions are being tested
        // add the correct tests/op-ctrls/unit/fmt to their lists
        Tests = {Tests, f16cmp};
@ -656,7 +656,8 @@ module testbenchfp;
  end

  // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
-  readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
+  readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), 
+                                    .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
                                    .Xs, .Ys, .Zs, .Unit(UnitVal),
                                    .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal),
                                    .Xm, .Ym, .Zm, .DivStart,
@ -680,7 +681,7 @@ module testbenchfp;
  ///////////////////////////////////////////////////////////////////////////////////////////////

  // instantiate devices under test
-  if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "all") begin : fma
+  if (TEST === "fma"| TEST === "mul" | TEST === "add" | TEST === "sub" | TEST === "all") begin : fma
    fma fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), 
            .Xe(Xe), .Ye(Ye), .Ze(Ze), 
            .Xm(Xm), .Ym(Ym), .Zm(Zm),
@ -1331,4 +1332,4 @@ module readvectors (
                .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN,
                .XSubnorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf,
                .XEn, .YEn, .ZEn, .XExpMax);
-endmodule
+endmodule
--- a/testbench/testbench.sv
+++ b/testbench/testbench.sv
@ -28,7 +28,7 @@
 `include "wally-config.vh"
 `include "tests.vh"

-`define PrintHPMCounters 1
+`define PrintHPMCounters 0
 `define BPRED_LOGGER 0
 `define I_CACHE_ADDR_LOGGER 0
 `define D_CACHE_ADDR_LOGGER 0
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@ -47,7 +47,9 @@ string tvpaths[] = '{
    "ieu",
    "ebu",
    "csrwrites",
-    "priv"
+    "priv",
+    "ifu",
+    "fpu"
  };

  string coremark[] = '{
--- a/tests/coverage/Makefile
+++ b/tests/coverage/Makefile
@ -17,7 +17,7 @@ all: $(OBJECTS)

 # Change many things if bit width isn't 64
 %.elf: $(SRCDIR)/%.$(SEXT) WALLY-init-lib.h Makefile
-	riscv64-unknown-elf-gcc -g -o $@ -march=rv64gc_zba_zbb_zbc_zbs -mabi=lp64 -mcmodel=medany \
+	riscv64-unknown-elf-gcc -g -o $@ -march=rv64gqc_zba_zbb_zbc_zbs_zfh -mabi=lp64 -mcmodel=medany \
 	    -nostartfiles -T../../examples/link/link.ld $<
 	riscv64-unknown-elf-objdump -S $@ > $@.objdump
 	riscv64-unknown-elf-elf2hex --bit-width 64 --input $@ --output $@.memfile
--- a/tests/coverage/fpu.S
+++ b/tests/coverage/fpu.S
@ -0,0 +1,73 @@
+///////////////////////////////////////////
+// fpu.S
+//
+// Written: David_Harris@hmc.edu 28 March 2023
+//
+// Purpose: Test coverage for FPU
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// load code to initalize stack, handle interrupts, terminate
+#include "WALLY-init-lib.h"
+
+main:
+
+    #bseti t0, zero, 14  # turn on FPU
+    csrs mstatus, t0
+
+    # Test legal instructions not covered elsewhere
+    flq ft0, 0(a0)
+    flh ft0, 8(a0)
+    fsq ft0, 0(a0)
+    fsh ft0, 8(a0)
+
+    # Tests for fpu/fctrl.sv
+    fcvt.h.s ft1, ft0
+    fcvt.q.s ft2, ft0
+    fcvt.h.w ft3, a0
+    fcvt.h.wu ft3, a0
+    fcvt.h.l ft3, a0
+    fcvt.h.lu ft3, a0
+    fcvt.w.h a0, ft3
+    fcvt.wu.h a0, ft3
+    fcvt.l.h a0, ft3
+    fcvt.lu.h a0, ft3
+    fcvt.q.w ft3, a0
+    fcvt.q.wu ft3, a0
+    fcvt.q.l ft3, a0
+    fcvt.q.lu ft3, a0
+    fcvt.w.q a0, ft3
+    fcvt.wu.q a0, ft3
+    fcvt.l.q a0, ft3
+    fcvt.lu.q a0, ft3
+
+    # Test illegal instructions are detected
+    .word 0x00000007 // illegal floating-point load (bad Funct3)
+    .word 0x00000027 // illegal floating-point store (bad Funct3)
+    .word 0x58F00053 // illegal fsqrt (bad Rs2D)
+    .word 0x20007053 // illegal fsgnj (bad Funct3)
+    .word 0x28007053 // illegal fmin/max (bad Funct3)
+    .word 0xA0007053 // illegal fcmp (bad Funct3)
+    .word 0xE0007053 // illegal fclass/fmv (bad Funct3)
+    .word 0xF0007053 // illegal fmv (bad Funct3)
+    .word 0x43007053 // illegal fcvt.d.* (bad Rs2D)
+    .word 0x42207053 // illegal fcvt.d.* (bad Rs2D[1])
+
+    j done
+
--- a/tests/coverage/ifu.S
+++ b/tests/coverage/ifu.S
@ -0,0 +1,40 @@
+///////////////////////////////////////////
+// ifu.S
+//
+// Written: sriley@g.hmc.edu 28 March 2023
+//
+// Purpose: Test coverage for IFU
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// load code to initalize stack, handle interrupts, terminate
+#include "WALLY-init-lib.h"
+
+main:
+    # turn floating point on
+    li t0, 0x2000
+    csrs mstatus, t0
+
+    # calling compressed floating point load double instruction
+    //.halfword 0x2000 // CL type compressed floating-point ld-->funct3,imm,rs1',imm,rd',op
+                        // binary version 0000 0000 0000 0000 0010 0000 0000 0000
+    mv s0, sp
+    c.fld fs0, 0(s0)
+
+    j done
--- a/tests/coverage/priv.S
+++ b/tests/coverage/priv.S
@ -36,4 +36,11 @@ main:
    addi t0, zero, 0
    csrr t0, stimecmp 

+    # Test write to STVAL, SCAUSE, SEPC, and STIMECMP CSRs
+    li t0, 0
+    csrw stval, t0
+    csrw scause, t0
+    csrw sepc, t0
+    csrw stimecmp, t0
+    
    j done
--- a/tests/fp/combined_IF_vectors/IF_vectors/README
+++ b/tests/fp/combined_IF_vectors/IF_vectors/README
@ -0,0 +1,4 @@
+This folder holds the archtest and testfloat vectors necessary fo evaluating performance
+of standalone intdiv vs combined IFdivsqrt
+
+to generate vectors, uncomment line 8 in create_all_vectors.sh
--- a/tests/fp/combined_IF_vectors/create_IF_vectors.sh
+++ b/tests/fp/combined_IF_vectors/create_IF_vectors.sh
@ -0,0 +1,8 @@
+#!/bin/sh
+# create test vectors for stand alone int
+
+./extract_testfloat_vectors.py
+./extract_arch_vectors.py
+
+# to create tvs for evaluation of combined IFdivsqrt
+#./combined_IF_vectors/create_IF_vectors.sh
--- a/tests/fp/combined_IF_vectors/extract_arch_vectors.py
+++ b/tests/fp/combined_IF_vectors/extract_arch_vectors.py
@ -0,0 +1,251 @@
+#! /usr/bin/python3
+
+# author: Alessandro Maiuolo
+# contact: amaiuolo@g.hmc.edu
+# date created: 3-29-2023
+
+# extract all arch test vectors
+import os
+wally = os.popen('echo $WALLY').read().strip()
+
+def ext_bits(my_string):
+    target_len = 32 # we want 128 bits, div by 4 bc hex notation
+    zeroes_to_add = target_len - len(my_string)
+    return zeroes_to_add*"0" + my_string
+
+def twos_comp(b, x):
+    if b == 32:
+        return hex(0x100000000 - int(x,16))[2:]
+    elif b == 64:
+        return hex(0x10000000000000000 - int(x,16))[2:]
+    else:
+        return "UNEXPECTED_BITSIZE"
+
+def unpack_rf(packed):
+    bin_u = bin(int(packed, 16))[2:].zfill(8) # translate to binary
+    flags = hex(int(bin_u[3:],2))[2:].zfill(2)
+    rounding_mode = hex(int(bin_u[:3],2))[2:]
+    return flags, rounding_mode
+
+# rounding mode dictionary
+round_dict = {
+    "rne":"0",
+    "rnm":"4",
+    "ru":"3",
+    "rz":"1",
+    "rd":"2",
+    "dyn":"7"
+}
+
+# fcsr dictionary
+fcsr_dict = {
+    "0":"rne",
+    "128":"rnm",
+    "96":"ru",
+    "32":"rz",
+    "64":"rd",
+    "224":"dyn"
+}
+
+print("creating arch test vectors")
+
+class Config:
+  def __init__(self, bits, letter, op, filt, op_code):
+    self.bits = bits
+    self.letter = letter
+    self.op = op
+    self.filt = filt
+    self.op_code = op_code
+
+def create_vectors(my_config):
+    suite_folder_num = my_config.bits
+    if my_config.bits == 64 and my_config.letter == "F": suite_folder_num = 32
+    source_dir1 = "{}/addins/riscv-arch-test/riscv-test-suite/rv{}i_m/{}/src/".format(wally, suite_folder_num, my_config.letter)
+    source_dir2 = "{}/tests/riscof/work/riscv-arch-test/rv{}i_m/{}/src/".format(wally, my_config.bits, my_config.letter)
+    dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally)
+    all_vectors1 = os.listdir(source_dir1)
+
+    filt_vectors1 = [v for v in all_vectors1 if my_config.filt in v]
+    # print(filt_vectors1)
+    filt_vectors2 = [v + "/ref/Reference-sail_c_simulator.signature" for v in all_vectors1 if my_config.filt in v]
+
+    # iterate through all vectors
+    for i in range(len(filt_vectors1)):
+        vector1 = filt_vectors1[i]
+        vector2 = filt_vectors2[i]
+        operation = my_config.op_code
+        rounding_mode = "X"
+        flags = "XX"
+        # use name to create our new tv
+        dest_file = open("{}cvw_{}_{}.tv".format(dest_dir, my_config.bits, vector1[:-2]), 'a')
+        # open vectors
+        src_file1 = open(source_dir1 + vector1,'r')
+        src_file2 = open(source_dir2 + vector2,'r')
+        # for each test in the vector
+        reading = True
+        src_file2.readline() #skip first bc junk
+        # print(my_config.bits, my_config.letter)
+        if my_config.letter == "F" and my_config.bits == 64:
+            reading = True
+            # print("trigger 64F")
+            #skip first 2 lines bc junk
+            src_file2.readline()
+            while reading:
+                # get answer and flags from Ref...signature
+                # answers are before deadbeef (first line of 4)
+                # flags are after deadbeef (third line of 4)
+                answer = src_file2.readline().strip()
+                deadbeef = src_file2.readline().strip()
+                # print(answer)
+                if not (answer == "e7d4b281" and deadbeef == "6f5ca309"): # if there is still stuff to read
+                    # get flags
+                    packed = src_file2.readline().strip()[6:]
+                    flags, rounding_mode = unpack_rf(packed)
+                    # skip 00000000 buffer
+                    src_file2.readline()
+
+                    # parse through .S file
+                    detected = False
+                    done = False
+                    op1val = "0"
+                    op2val = "0"
+                    while not (detected or done):
+                        # print("det1")
+                        line = src_file1.readline()
+                        # print(line)
+                        if "op1val" in line:
+                            # print("det2")
+                            # parse line
+                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
+                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals
+                                op2val = line.split("op2val")[1].split("x")[1].strip()
+                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
+                            else:
+                                op2val = 32*"X"
+                            # go to next test in vector
+                            detected = True
+                        elif "RVTEST_CODE_END" in line:
+                            done = True
+                    # put it all together
+                    if not done:
+                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode)
+                        dest_file.write(translation + "\n")
+                else:
+                    # print("read false")
+                    reading = False
+        elif my_config.letter == "M" and my_config.bits == 64:
+            reading = True
+            #skip first 2 lines bc junk
+            src_file2.readline()
+            while reading:
+                # print("trigger 64M")
+                # get answer from Ref...signature
+                # answers span two lines and are reversed
+                answer2 = src_file2.readline().strip()
+                answer1 = src_file2.readline().strip()
+                answer = answer1 + answer2
+                # print(answer1,answer2)
+                if not (answer2 == "e7d4b281" and answer1 == "6f5ca309"): # if there is still stuff to read
+                    # parse through .S file
+                    detected = False
+                    done = False
+                    op1val = "0"
+                    op2val = "0"
+                    while not (detected or done):
+                        # print("det1")
+                        line = src_file1.readline()
+                        # print(line)
+                        if "op1val" in line:
+                            # print("det2")
+                            # parse line
+                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
+                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
+                                op1val = twos_comp(my_config.bits, op1val)
+                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals, unnec here but keeping for later
+                                op2val = line.split("op2val")[1].split("x")[1].strip()
+                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
+                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
+                                    op2val = twos_comp(my_config.bits, op2val)
+                            # go to next test in vector
+                            detected = True
+                        elif "RVTEST_CODE_END" in line:
+                            done = True
+                    # ints don't have flags
+                    flags = "XX"
+                    # put it all together
+                    if not done:
+                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags.strip(), rounding_mode)
+                        dest_file.write(translation + "\n")
+                else:
+                    # print("read false")
+                    reading = False
+        else:
+            while reading:
+                # get answer and flags from Ref...signature
+                answer = src_file2.readline()
+                # print(answer)
+                packed = src_file2.readline()[6:]
+                # print(packed)
+                if len(packed.strip())>0: # if there is still stuff to read
+                    # print("packed")
+                    # parse through .S file
+                    detected = False
+                    done = False
+                    op1val = "0"
+                    op2val = "0"
+                    while not (detected or done):
+                        # print("det1")
+                        line = src_file1.readline()
+                        # print(line)
+                        if "op1val" in line:
+                            # print("det2")
+                            # parse line
+                            op1val = line.split("op1val")[1].split("x")[1].split(";")[0]
+                            if "-" in line.split("op1val")[1].split("x")[0]: # neg sign handling
+                                op1val = twos_comp(my_config.bits, op1val)
+                            if my_config.op != "fsqrt": # sqrt doesn't have two input vals
+                                op2val = line.split("op2val")[1].split("x")[1].strip()
+                                if op2val[-1] == ";": op2val = op2val[:-1] # remove ; if it's there
+                                if "-" in line.split("op2val")[1].split("x")[0]: # neg sign handling
+                                    op2val = twos_comp(my_config.bits, op2val)
+                            # go to next test in vector
+                            detected = True
+                        elif "RVTEST_CODE_END" in line:
+                            done = True
+                    # rounding mode for float
+                    if not done and (my_config.op == "fsqrt" or my_config.op == "fdiv"):
+                        flags, rounding_mode = unpack_rf(packed)
+                    
+                    # put it all together
+                    if not done:
+                        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(op1val), ext_bits(op2val), ext_bits(answer.strip()), flags, rounding_mode)
+                        dest_file.write(translation + "\n")
+                else:
+                    # print("read false")
+                    reading = False
+        print("out")
+        dest_file.close()
+        src_file1.close()
+        src_file2.close()
+
+config_list = [
+Config(32, "M", "div", "div_", 0),
+Config(32, "F", "fdiv", "fdiv", 1),
+Config(32, "F", "fsqrt", "fsqrt", 2),
+Config(32, "M", "rem", "rem-", 3),
+Config(32, "M", "divu", "divu-", 4),
+Config(32, "M", "remu", "remu-", 5),
+Config(64, "M", "div", "div-", 0),
+Config(64, "F", "fdiv", "fdiv", 1),
+Config(64, "F", "fsqrt", "fsqrt", 2),
+Config(64, "M", "rem", "rem-", 3),
+Config(64, "M", "divu", "divu-", 4),
+Config(64, "M", "remu", "remu-", 5),
+Config(64, "M", "divw", "divw-", 6),
+Config(64, "M", "divuw", "divuw-", 7),
+Config(64, "M", "remw", "remw-", 8),
+Config(64, "M", "remuw", "remuw-", 9)
+]
+
+for c in config_list:
+    create_vectors(c)
--- a/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py
+++ b/tests/fp/combined_IF_vectors/extract_testfloat_vectors.py
@ -0,0 +1,79 @@
+#! /usr/bin/python3
+# extract sqrt and float div testfloat vectors
+
+# author: Alessandro Maiuolo
+# contact: amaiuolo@g.hmc.edu
+# date created: 3-29-2023
+
+import os
+wally = os.popen('echo $WALLY').read().strip()
+# print(wally)
+
+def ext_bits(my_string):
+    target_len = 32 # we want 128 bits, div by 4 bc hex notation
+    zeroes_to_add = target_len - len(my_string)
+    return zeroes_to_add*"0" + my_string
+
+# rounding mode dictionary
+round_dict = {
+    "rne":"0",
+    "rnm":"4",
+    "ru":"3",
+    "rz":"1",
+    "rd":"2",
+    "dyn":"7"
+}
+
+
+print("creating testfloat div test vectors")
+
+source_dir = "{}/tests/fp/vectors/".format(wally)
+dest_dir = "{}/tests/fp/combined_IF_vectors/IF_vectors/".format(wally)
+all_vectors = os.listdir(source_dir)
+
+div_vectors = [v for v in all_vectors if "div" in v]
+
+# iterate through all float div vectors
+for vector in div_vectors:
+    # use name to determine configs
+    config_list = vector.split(".")[0].split("_")
+    operation = "1" #float div
+    rounding_mode = round_dict[str(config_list[2])]
+    # use name to create our new tv
+    dest_file = open(dest_dir + "cvw_" + vector, 'a')
+    # open vector
+    src_file = open(source_dir + vector,'r')
+    # for each test in the vector
+    for i in src_file.readlines():
+        translation = "" # this stores the test that we are currently working on
+        [input_1, input_2, answer, flags] = i.split("_") # separate inputs, answer, and flags
+        # put it all together, strip nec for removing \n on the end of the flags
+        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(input_1), ext_bits(input_2), ext_bits(answer), flags.strip(), rounding_mode)
+        dest_file.write(translation + "\n")
+    dest_file.close()
+    src_file.close()
+
+
+print("creating testfloat sqrt test vectors")
+
+sqrt_vectors = [v for v in all_vectors if "sqrt" in v]
+
+# iterate through all float div vectors
+for vector in sqrt_vectors:
+    # use name to determine configs
+    config_list = vector.split(".")[0].split("_")
+    operation = "2" #sqrt
+    rounding_mode = round_dict[str(config_list[2])]
+    # use name to create our new tv
+    dest_file = open(dest_dir + "cvw_" + vector, 'a')
+    # open vector
+    src_file = open(source_dir + vector,'r')
+    # for each test in the vector
+    for i in src_file.readlines():
+        translation = "" # this stores the test that we are currently working on
+        [input_1, answer, flags] = i.split("_") # separate inputs, answer, and flags
+        # put it all together, strip nec for removing \n on the end of the flags
+        translation = "{}_{}_{}_{}_{}_{}".format(operation, ext_bits(input_1), "X"*32, ext_bits(answer), flags.strip(), rounding_mode)
+        dest_file.write(translation + "\n")
+    dest_file.close()
+    src_file.close()
--- a/tests/fp/create_all_vectors.sh
+++ b/tests/fp/create_all_vectors.sh
@ -3,3 +3,6 @@
 mkdir -p vectors
 ./create_vectors.sh
 ./remove_spaces.sh
+
+# to create tvs for evaluation of combined IFdivsqrt
+#./combined_IF_vectors/create_IF_vectors.sh
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-01.reference_output
@ -53,7 +53,7 @@
 8000000b # mcause value from m ext interrupt
 00000000 # mtval for mext interrupt (0x0)
 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0
-fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable)
+0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable)
 00000222 # mideleg after attempted write of all 1's (only some bits are writeable) # skipping instruction address fault since they're impossible with compressed instrs enabled
 00000001 # mcause from an instruction access fault
 00000000 # mtval of faulting instruction address (0x0)
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-s-01.reference_output
@ -48,7 +48,7 @@
 00000009 # scause from S mode ecall
 00000000 # stval of ecall (*** defined to be zero for now)
 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0
-fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable)
+0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable)
 00000222 # mideleg after attempted write of all 1's (only some bits are writeable)
 0000000b # scause from M mode ecall
 00000000 # stval of ecall (*** defined to be zero for now)
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-trap-u-01.reference_output
@ -45,7 +45,7 @@
 00000008 # scause from U mode ecall
 00000000 # stval of ecall (*** defined to be zero for now)
 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0
-fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable)
+0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable)
 00000222 # mideleg after attempted write of all 1's (only some bits are writeable)
 0000000b # scause from M mode ecall 
 00000000 # stval of ecall (*** defined to be zero for now)
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output
@ -108,8 +108,8 @@
 00000000
 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0
 00000000
-fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable)
-ffffffff
+0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable)
+00000000
 00000222 # mideleg after attempted write of all 1's (only some bits are writeable)
 00000000 # skipping instruction address fault since they're impossible with compressed instrs enabled
 00000001 # mcause from an instruction access fault
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output
@ -98,8 +98,8 @@
 00000000
 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0
 00000000
-fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable)
-ffffffff
+0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable)
+00000000
 00000222 # mideleg after attempted write of all 1's (only some bits are writeable)
 00000000
 0000000b # scause from M mode ecall 
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output
@ -92,8 +92,8 @@
 00000000
 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0
 00000000
-fffff7ff # medeleg after attempted write of all 1's (only some bits are writeable)
-ffffffff
+0000b3ff # medeleg after attempted write of all 1's (only some bits are writeable)
+00000000
 00000222 # mideleg after attempted write of all 1's (only some bits are writeable)
 00000000
 0000000b # scause from M mode ecall