diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally
index a05e21924..59ab11ddf 100755
--- a/pipelined/regression/regression-wally
+++ b/pipelined/regression/regression-wally
@@ -154,13 +154,13 @@ def main():
         os.system('./make-tests.sh | tee ./logs/make-tests.log')
 
     if '-all' in sys.argv:
-        TIMEOUT_DUR = 20*3600 # seconds
+        TIMEOUT_DUR = 30*3600 # seconds
         configs.append(getBuildrootTC(short=False))
     elif '-buildroot' in sys.argv:
-        TIMEOUT_DUR = 20*3600 # seconds
+        TIMEOUT_DUR = 30*3600 # seconds
         configs=[getBuildrootTC(short=False)]
     else:
-        TIMEOUT_DUR = 5*60 # seconds
+        TIMEOUT_DUR = 10*60 # seconds
         configs.append(getBuildrootTC(short=True))
 
     # Scale the number of concurrent processes to the number of test cases, but
diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv
index ed7a12725..2374b4938 100644
--- a/pipelined/src/cache/cache.sv
+++ b/pipelined/src/cache/cache.sv
@@ -119,8 +119,8 @@ module cache #(parameter LINELEN,  NUMLINES,  NUMWAYS, LOGWPL, WORDLEN, MUXINTER
     .s({SelFlush, SelAdr}), .y(RAdr));
 
   // Array of cache ways, along with victim, hit, dirty, and read merging logic
-  cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) CacheWays[NUMWAYS-1:0](
-    .clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask,
+  cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) 
+    CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask,
     .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay,
     .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, 
     .Invalidate(InvalidateCacheM));
diff --git a/pipelined/src/fma/fma16.v b/pipelined/src/fma/fma16.v
new file mode 100644
index 000000000..475457a23
--- /dev/null
+++ b/pipelined/src/fma/fma16.v
@@ -0,0 +1,268 @@
+// fma16.sv
+// David_Harris@hmc.edu 26 February 2022
+// 16-bit floating-point multiply-accumulate
+
+// Operation: general purpose multiply, add, fma, with optional negation
+//   If mul=1, p = x * y.  Else p = x.
+//   If add=1, result = p + z.  Else result = p.
+//   If negr or negz = 1, negate result or z to handle negations and subtractions
+//   fadd: mul = 0, add = 1, negr = negz = 0
+//   fsub: mul = 0, add = 1, negr = 0, negz = 1
+//   fmul: mul = 1, add = 0, negr = 0, negz = 0
+//   fmadd:  mul = 1, add = 1, negr = 0, negz = 0
+//   fmsub:  mul = 1, add = 1, negr = 0, negz = 1
+//   fnmadd: mul = 1, add = 1, negr = 1, negz = 0
+//   fnmsub: mul = 1, add = 1, negr = 1, negz = 1
+
+`define FFLEN 16
+`define Nf 10
+`define Ne 5
+`define BIAS 15
+`define EMIN (-(2**(`Ne-1)-1))
+`define EMAX (2**(`Ne-1)-1)
+
+`define NaN 16'h7E00
+`define INF 15'h7C00
+
+// rounding modes *** update
+`define RZ  3'b00
+`define RNE 3'b01
+`define RM  3'b10
+`define RP  3'b11
+
+module fma16(
+  input  logic [`FFLEN-1:0] x, y, z,
+  input  logic        mul, add, negr, negz,
+  input  logic [1:0]  roundmode,  // 00: rz, 01: rne, 10: rp, 11: rn
+  output logic [`FFLEN-1:0] result);
+ 
+  logic [`Nf:0] xm, ym, zm; // U1.Nf
+  logic [`Ne-1:0]  xe, ye, ze; // B_Ne
+  logic        xs, ys, zs;
+  logic        zs1; // sign before optional negation
+  logic [2*`Nf+1:0] pm; // U2.2Nf
+  logic [`Ne:0]  pe; // B_Ne+1
+  logic        ps;  // sign of product
+  logic [22:0] rm;
+  logic [`Ne+1:0]  re;
+  logic        rs;
+  logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
+  logic [`Ne+1:0]  re2;
+
+  unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);  // unpack inputs
+  //signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs);             // handle negations
+  mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps);                       // p = x * y
+  add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs);             // r = z + p
+  postproc16 post(roundmode,  xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result);                 // normalize, round, pack
+endmodule
+
+module mult16(
+  input  logic        mul,
+  input  logic [`Nf:0] xm, ym,
+  input  logic [`Ne-1:0]  xe, ye,
+  input  logic        xs, ys,
+  output logic [2*`Nf+1:0] pm,
+  output logic [`Ne:0]  pe,
+  output logic        ps);
+
+  // only multiply if mul = 1
+  assign pm = mul ? xm * ym : {1'b0, xm, 10'b0};       // multiply mantiassas 
+  assign pe = mul ? xe + ye - `BIAS : {1'b0, xe};      // add exponents, account for bias
+  assign ps = xs ^ ys;                                 // negative if X xor Y are negative
+endmodule
+
+module add16(
+  input  logic        add,
+  input  logic [2*`Nf+1:0] pm,  // U2.2Nf
+  input  logic [`Nf:0] zm, // U1.Nf
+  input  logic [`Ne:0]  pe, // B_Ne+1
+  input  logic [`Ne-1:0]  ze, // B_Ne
+  input  logic        ps, zs, 
+  input  logic        negz,
+  output logic [22:0] rm,
+  output logic [`Ne+1:0]  re, // B_Ne+2
+  output logic [`Ne+1:0]  re2,
+  output logic        rs);
+
+  logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
+  logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
+  logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.  
+  logic [`Nf-1:0] prezsticky;
+  logic           zsticky;
+  logic          effectivesub;
+  logic           rs0;
+  logic [`Ne:0]     leadingzeros, NormCnt; // *** should paramterize size
+  logic [`Ne:0]   re1;
+
+  // Alignment shift
+  assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
+  assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
+  always_comb // AlignCount mux; see Muller page 254
+    if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7;         re = {1'b0, pe}; end
+    else if (ExpDiff <= 2)       begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
+    else if (ExpDiff <= `Nf+3)   begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
+    else                         begin AlignCnt = 0;                 re = {2'b0, ze}; end
+  // Shift Zm right by AlignCnt.  Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
+  assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
+  assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
+  
+  // Effective subtraction
+  assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
+  assign zalignedaddsub = effectivesub ? ~zaligned : zaligned;  // invert zaligned for subtraction
+
+  // Adder
+  assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
+  assign rs0 = r[`Nf*3+7]; // sign of the initial result
+  assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
+
+  // Sign Logic
+  assign rs = ps ^ rs0; // flip the sign if necessary
+
+  // Leading zero counter
+  lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
+  assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
+
+  // Normalization shift
+  always_comb // NormCount mux
+    if (ExpDiff < 3) begin 
+      if (re1 >= `EMIN) begin  NormCnt = `Nf + 3 + leadingzeros;  re2 = {1'b0, re1}; end
+      else              begin  NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN;    end
+    end else            begin  NormCnt = AlignCnt; re = {2'b00, ze};                  end
+  assign rnormed = r2 << NormCnt; // *** update sticky
+  /* temporarily comment out to start synth
+
+  // One-bit secondary normalization
+  if (ExpDiff <= 2)          begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
+  else begin // *** handle sticky
+    if (rnormed[***])        begin rnormed2 = rnormed >> 1; re2 = re+1; end
+    else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re;        end
+    else                     begin rnormed2 = rnormed << 1; re2 = re-1; end
+  end
+
+  // round
+  assign l = rnormed2[***]; // least significant bit 
+  assign r = rnormed2[***-1]; // rounding bit
+  assign s = ***; // sticky bit
+  always_comb
+    case (roundmode)
+      RZ: roundup = 0;
+      RP: roundup = ~rs & (r | s); 
+      RM: roundup = rs & (r | s);
+      RNE: roundup = r & (s | l);
+      default: roundup = 0;
+    endcase
+  assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
+*/
+
+  // *** need to handle rounding to MAXNUM vs. INFINITY
+  
+  // add or pass product through
+ /* assign rm = add ? arm : {1'b0, pm};
+  assign re = add ? are : {1'b0, pe};
+  assign rs = add ? ars : ps; */
+endmodule
+
+module lzc(
+  input  logic [`Nf*3+7:0] r2,
+  output logic [`Ne:0]   leadingzeros
+);
+
+endmodule
+
+
+module postproc16(
+  input  logic [1:0] roundmode,
+  input  logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
+  input  logic [22:0] rm, 
+  input  logic [`Nf:0] zm, // U1.Nf
+  input  logic [6:0]  re, 
+  input  logic [`Ne-1:0]  ze, // B_Ne
+  input  logic        rs, zs, ps,
+  input  logic [`Ne+1:0]  re2,
+  output logic [15:0] result);
+
+  logic [9:0] uf, uff;
+  logic [6:0] ue;
+  logic [6:0] ueb, uebiased;
+  logic       invalid;
+
+    // Special cases
+  // *** not handling signaling NaN
+  // *** also add overflow/underflow/inexact
+  always_comb begin
+    if (xnan | ynan | znan)                    begin result = `NaN; invalid = 0; end // propagate NANs
+    else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
+    else if (xzero & yinf | xinf & yzero)      begin result = `NaN; invalid = 1; end // zero times infinity
+    else if (xinf | yinf)                      begin result = {ps, `INF}; invalid = 0; end // X or Y
+    else if (zinf)                             begin result = {zs, `INF}; invalid = 0; end // infinite Z
+    else if (xzero | yzero)                    begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
+    else if (re2 >= `EMAX)                     begin result = {rs, `INF}; invalid = 0; end
+    else                                       begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
+  end
+  
+  always_comb 
+    if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
+        ue = re + 7'b1;
+        uf = rm[20:11];
+    end else begin // no normalization shift needed
+        ue = re;
+        uf = rm[19:10];
+    end
+
+  // overflow
+  always_comb begin
+    ueb = ue-7'd15;
+    if (ue >= 7'd46) begin // overflow
+/*      uebiased = 7'd30;
+      uff = 10'h3ff; */
+    end else begin
+      uebiased = ue-7'd15;
+      uff = uf;
+    end
+  end
+  
+  assign result = {rs, uebiased[4:0], uff};
+
+  // add special case handling for zeros, NaN, Infinity
+endmodule
+
+module signadj16(
+  input  logic negr, negz,
+  input  logic xs, ys, zs1,
+  output logic ps, zs);
+
+  assign ps = xs ^ ys; // sign of product
+  assign zs = zs1 ^ negz; // sign of addend
+endmodule
+
+module unpack16(
+  input  logic [15:0] x, y, z,
+  output logic [10:0] xm, ym, zm,
+  output logic [4:0]  xe, ye, ze,
+  output logic        xs, ys, zs,
+  output logic        xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
+
+  unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
+  unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
+  unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
+endmodule
+
+module unpacknum16(
+  input logic  [15:0] num,
+  output logic [10:0] m,
+  output logic [4:0]  e,
+  output logic        s, 
+  output logic        zero, inf, nan);
+
+  logic [9:0] f;  // fraction without leading 1
+  logic [4:0] eb; // biased exponent
+
+  assign {s, eb, f} = num; // pull bit fields out of floating-point number
+  assign m = {1'b1, f}; // prepend leading 1 to fraction
+  assign e = eb;   // leave bias in exponent ***
+  assign zero = (e == 0 && f == 0);
+  assign inf = (e == 31 && f == 0);
+  assign nan = (e == 31 && f != 0);
+endmodule
+
+
diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv
index fd32d379e..153b97ef7 100755
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@@ -5,6 +5,7 @@ module fctrl (
   input  logic [4:0] Rs2D,      // bits 24:20 of instruction
   input  logic [2:0] Funct3D,   // bits 14:12 of instruction - may contain rounding mode
   input  logic [2:0] FRM_REGW,  // rounding mode from CSR
+  input  logic [1:0] STATUS_FS, // is FPU enabled?
   output logic       IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
   output logic       FRegWriteD,  // FP register write enable
   output logic       FDivStartD,  // Start division or squareroot
@@ -21,7 +22,9 @@ module fctrl (
   logic [`FCTRLW-1:0] ControlsD;
   // FPU Instruction Decoder
   always_comb
-    case(OpD)
+    if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
+      ControlsD = `FCTRLW'b0_0_00_000_000_00_0_1;
+    else case(OpD)
     // FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
       7'b0000111: case(Funct3D)
                     3'b010:  ControlsD = `FCTRLW'b1_0_00_000_000_00_0_0; // flw
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 2ffcb1264..0fc9e8635 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -39,6 +39,7 @@ module fpu (
   input logic 		   StallE, StallM, StallW, // stall signals from HZU
   input logic 		   FlushE, FlushM, FlushW, // flush signals from HZU
   input logic [4:0] 	   RdM, RdW, // which FP register to write to (from IEU)
+  input logic [1:0]        STATUS_FS, // Is floating-point enabled?
   output logic 		   FRegWriteM, // FP register write enable
   output logic 		   FStallD, // Stall the decode stage
   output logic 		   FWriteIntE, // integer register write enables
@@ -134,7 +135,7 @@ module fpu (
    // DECODE STAGE
 
    // calculate FP control signals
-   fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW,
+   fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
       .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, 
       .FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
 
diff --git a/pipelined/src/ieu/comparator.sv b/pipelined/src/ieu/comparator.sv
index 26ea6d868..a466675fb 100644
--- a/pipelined/src/ieu/comparator.sv
+++ b/pipelined/src/ieu/comparator.sv
@@ -30,14 +30,16 @@
 
 `include "wally-config.vh"
 
-module comparator #(parameter WIDTH=32) (
+module comparator_sub #(parameter WIDTH=64) (
   input  logic [WIDTH-1:0] a, b,
   output logic [2:0]       flags);
 
-  logic [WIDTH-1:0] bbar, diff;
-  logic             carry, eq, neg, overflow, lt, ltu;
-/*
+  logic eq, lt, ltu;
+
+
   // Subtractor implementation
+  logic [WIDTH-1:0] bbar, diff;
+  logic             carry, neg, overflow;
 
   // subtraction
   assign bbar = ~b;
@@ -52,7 +54,85 @@ module comparator #(parameter WIDTH=32) (
   assign lt = neg ^ overflow;
   assign ltu = ~carry;
   assign flags = {eq, lt, ltu};
-*/
+endmodule
+
+// *** eventually substitute comparator_flip, which gives slightly better synthesis
+module comparator #(parameter WIDTH=64) (
+  input  logic [WIDTH-1:0] a, b,
+  output logic [2:0]       flags);
+
+  logic eq, lt, ltu;
+
+  // Behavioral description gives best results
+  assign eq = (a == b);
+  assign ltu = (a < b);
+  assign lt = ($signed(a) < $signed(b));
+
+  assign flags = {eq, lt, ltu};
+endmodule
+
+// This comaprator 
+module comparator_flip #(parameter WIDTH=16) (
+  input  logic [WIDTH-1:0] a, b,
+  input  logic             sgnd,
+  output logic [1:0]       flags);
+
+  logic eq, lt, ltu;
+  logic [WIDTH-1:0] af, bf;
+
+  // For signed numbers, flip most significant bit
+  assign af = {a[WIDTH-1] ^ sgnd, a[WIDTH-2:0]};
+  assign bf = {b[WIDTH-1] ^ sgnd, b[WIDTH-2:0]};
+
+  // behavioral description gives best results
+  assign eq = (af == bf);
+  assign lt = (af < bf);
+  assign flags = {eq, lt};
+endmodule
+
+module comparator2 #(parameter WIDTH=64) (
+  input  logic             clk, reset,
+  input  logic [WIDTH-1:0] a, b,
+  output logic [2:0]       flags);
+
+  logic eq, lt, ltu;
+
+  /* verilator lint_off UNOPTFLAT */
+  // prefix implementation
+  localparam levels=$clog2(WIDTH);
+  genvar i;
+  genvar level;
+  logic [WIDTH-1:0] e[levels:0];
+  logic [WIDTH-1:0] l[levels:0];
+  logic eq2, lt2, ltu2;
+
+  // Bitwise logic
+  assign e[0] = a ~^ b; // bitwise equality
+  assign l[0] = ~a & b; // bitwise less than unsigned: A=0 and B=1
+
+  // Recursion
+  for (level = 1; level<=levels; level++) begin
+    for (i=0; i<WIDTH/(2**level); i++) begin
+      assign e[level][i] = e[level-1][i*2+1] & e[level-1][i*2];  // group equal if both parts equal
+      assign l[level][i] = l[level-1][i*2+1] | e[level-1][i*2+1] & l[level-1][i*2]; // group less if upper is les or upper equal and lower less
+    end
+  end
+
+  // Output logic
+  assign eq2 = e[levels][0];  // A = B if all bits are equal
+  assign ltu2 = l[levels][0]; // A < B if group is less (unsigned)
+  // A < B signed if less than unsigned and msb is not < unsigned, or if A negative and B positive
+  assign lt2 = ltu2 & ~l[0][WIDTH-1] | a[WIDTH-1] & ~b[WIDTH-1]; 
+  assign flags = {eq2, lt2, ltu2};
+  /* verilator lint_on UNOPTFLAT */
+endmodule
+
+
+module comparator_prefix #(parameter WIDTH=64) (
+  input  logic [WIDTH-1:0] a, b,
+  output logic [2:0]       flags);
+
+  logic eq, lt, ltu;
 
   /* verilator lint_off UNOPTFLAT */
   // prefix implementation
diff --git a/pipelined/src/ieu/controller.sv b/pipelined/src/ieu/controller.sv
index c26551620..cb7e3f2fd 100644
--- a/pipelined/src/ieu/controller.sv
+++ b/pipelined/src/ieu/controller.sv
@@ -120,7 +120,7 @@ module controller(
     // RegWrite_ImmSrc_ALUSrc_MemRW_ResultSrc_Branch_ALUOp_Jump_ALUResultSrc_W64_CSRRead_Privileged_Fence_MDU_Atomic_Illegal
       7'b0000000:   ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // illegal instruction
       7'b0000011:   ControlsD = `CTRLW'b1_000_01_10_001_0_0_0_0_0_0_0_0_0_00_0; // lw
-      7'b0000111:   ControlsD = `CTRLW'b0_000_01_10_001_0_0_0_0_0_0_0_0_0_00_0; // flw
+      7'b0000111:   ControlsD = `CTRLW'b0_000_01_10_001_0_0_0_0_0_0_0_0_0_00_0; // flw - only legal if FP supported
       7'b0001111:   ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_1_0_00_0; // fence
       7'b0010011:   ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_0_0_0_0_0_00_0; // I-type ALU
       7'b0010111:   ControlsD = `CTRLW'b1_100_11_00_000_0_0_0_0_0_0_0_0_0_00_0; // auipc
@@ -129,7 +129,7 @@ module controller(
                   else
                     ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_1; // non-implemented instruction
       7'b0100011:   ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_0; // sw
-      7'b0100111:   ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_0; // fsw
+      7'b0100111:   ControlsD = `CTRLW'b0_001_01_01_000_0_0_0_0_0_0_0_0_0_00_0; // fsw - only legal if FP supported
       7'b0101111: if (`A_SUPPORTED) begin
                     if (InstrD[31:27] == 5'b00010)
                       ControlsD = `CTRLW'b1_000_00_10_001_0_0_0_0_0_0_0_0_0_01_0; // lr
diff --git a/pipelined/src/privileged/csr.sv b/pipelined/src/privileged/csr.sv
index fd8e67762..4a7e06868 100644
--- a/pipelined/src/privileged/csr.sv
+++ b/pipelined/src/privileged/csr.sv
@@ -64,6 +64,7 @@ module csr #(parameter
   output logic [11:0]      MIP_REGW, MIE_REGW, SIP_REGW, SIE_REGW, MIDELEG_REGW,
   output logic             STATUS_MIE, STATUS_SIE,
   output logic             STATUS_MXR, STATUS_SUM, STATUS_MPRV, STATUS_TW,
+  output logic [1:0]       STATUS_FS,
   output var logic [7:0]      PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0],
   output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0],
   
@@ -141,7 +142,8 @@ module csr #(parameter
               .mretM, .sretM, .WriteFRMM, .WriteFFLAGSM, .CSRWriteValM,
               .MSTATUS_REGW, .SSTATUS_REGW, .MSTATUSH_REGW,
               .STATUS_MPP, .STATUS_SPP, .STATUS_TSR, .STATUS_TW,
-              .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TVM);
+              .STATUS_MIE, .STATUS_SIE, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TVM,
+              .STATUS_FS);
   csrc  counters(.clk, .reset,
               .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW,   
               .InstrValidM, .LoadStallD, .CSRMWriteM,
diff --git a/pipelined/src/privileged/csrsr.sv b/pipelined/src/privileged/csrsr.sv
index 088d1e95b..0d1912c16 100644
--- a/pipelined/src/privileged/csrsr.sv
+++ b/pipelined/src/privileged/csrsr.sv
@@ -44,11 +44,12 @@ module csrsr (
   output logic             STATUS_SPP, STATUS_TSR, STATUS_TW,
   output logic             STATUS_MIE, STATUS_SIE,
   output logic             STATUS_MXR, STATUS_SUM,
-  output logic             STATUS_MPRV, STATUS_TVM
+  output logic             STATUS_MPRV, STATUS_TVM,
+  output logic [1:0]       STATUS_FS
 );
   
   logic STATUS_SD, STATUS_TW_INT, STATUS_TSR_INT, STATUS_TVM_INT, STATUS_MXR_INT, STATUS_SUM_INT, STATUS_MPRV_INT;
-  logic [1:0] STATUS_SXL, STATUS_UXL, STATUS_XS, STATUS_FS, STATUS_FS_INT, STATUS_MPP_NEXT;
+  logic [1:0] STATUS_SXL, STATUS_UXL, STATUS_XS, STATUS_FS_INT, STATUS_MPP_NEXT;
   logic STATUS_MPIE, STATUS_SPIE, STATUS_UBE, STATUS_SBE, STATUS_MBE;
 
   // STATUS REGISTER FIELD
diff --git a/pipelined/src/privileged/privileged.sv b/pipelined/src/privileged/privileged.sv
index f75b21e67..a5997a991 100644
--- a/pipelined/src/privileged/privileged.sv
+++ b/pipelined/src/privileged/privileged.sv
@@ -74,6 +74,7 @@ module privileged (
   output logic [`XLEN-1:0] SATP_REGW,
   output logic             STATUS_MXR, STATUS_SUM, STATUS_MPRV,
   output logic  [1:0]      STATUS_MPP,
+  output logic [1:0]       STATUS_FS,
   output var logic [7:0]   PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0],
   output var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW [`PMP_ENTRIES-1:0], 
   output logic [2:0]       FRM_REGW,
@@ -172,7 +173,7 @@ module privileged (
           .SATP_REGW,
           .MIP_REGW, .MIE_REGW, .SIP_REGW, .SIE_REGW, .MIDELEG_REGW,
           .STATUS_MIE, .STATUS_SIE,
-          .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TW,
+          .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_TW, .STATUS_FS,
           .PMPCFG_ARRAY_REGW,
           .PMPADDR_ARRAY_REGW,
           .SetFflagsM,
diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv
index 5f1fc71b1..a136608ef 100644
--- a/pipelined/src/wally/wallypipelinedcore.sv
+++ b/pipelined/src/wally/wallypipelinedcore.sv
@@ -108,7 +108,7 @@ module wallypipelinedcore (
   logic             ITLBMissF;
   logic [`XLEN-1:0]         SATP_REGW;
   logic              STATUS_MXR, STATUS_SUM, STATUS_MPRV;
-  logic  [1:0]       STATUS_MPP;
+  logic  [1:0]       STATUS_MPP, STATUS_FS;
   logic [1:0]             PrivilegeModeW;
   logic [`XLEN-1:0]     PTE;
   logic [1:0]             PageType;
@@ -338,7 +338,7 @@ module wallypipelinedcore (
          .InstrAccessFaultF, .LoadAccessFaultM, .StoreAmoAccessFaultM,
          .ExceptionM, .IllegalFPUInstrE,
          .PrivilegeModeW, .SATP_REGW,
-         .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP,
+         .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .STATUS_FS,
          .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, 
          .FRM_REGW,.BreakpointFaultM, .EcallFaultM
       );
@@ -373,6 +373,7 @@ module wallypipelinedcore (
          .StallE, .StallM, .StallW, // stall signals from HZU
          .FlushE, .FlushM, .FlushW, // flush signals from HZU
          .RdM, .RdW, // which FP register to write to (from IEU)
+         .STATUS_FS, // is floating-point enabled?
          .FRegWriteM, // FP register write enable
          .FStallD, // Stall the decode stage
          .FWriteIntE, // integer register write enable