From 1c20bb9313d39ae3e955d2786cd57544baedc44f Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Tue, 14 May 2024 11:32:21 -0700
Subject: [PATCH 01/30] Added riscv-isac for test vector generation

---
 bin/wally-tool-chain-install.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/wally-tool-chain-install.sh b/bin/wally-tool-chain-install.sh
index 3b08aea54..44ff08aee 100755
--- a/bin/wally-tool-chain-install.sh
+++ b/bin/wally-tool-chain-install.sh
@@ -48,7 +48,8 @@ sudo apt update -y
 sudo apt upgrade -y
 sudo apt install -y git gawk make texinfo bison flex build-essential python3 libz-dev libexpat-dev autoconf device-tree-compiler ninja-build libpixman-1-dev ncurses-base ncurses-bin libncurses5-dev dialog curl wget ftp libgmp-dev libglib2.0-dev python3-pip pkg-config opam z3 zlib1g-dev automake autotools-dev libmpc-dev libmpfr-dev  gperf libtool patchutils bc mutt ssmtp
 # Other python libraries used through the book.
-sudo -H pip3 install sphinx sphinx_rtd_theme matplotlib scipy scikit-learn adjustText lief markdown pyyaml
+sudo -H pip3 install sphinx sphinx_rtd_theme matplotlib scipy scikit-learn adjustText lief markdown pyyaml 
+sudo -H pip3 install riscv_isac # to generate new tests, such as quads with fp_dataset.py
 
 # needed for Ubuntu 22.04, gcc cross compiler expects python not python2 or python3.
 if ! command -v python &> /dev/null

From 506973c27a9d8cfde3ea7a88bf2f03a3a12c2c83 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Wed, 15 May 2024 19:29:42 -0700
Subject: [PATCH 02/30] Added gfmul example

---
 .gitignore                     |  1 +
 examples/crypto/gfmul/Makefile | 16 ++++++++
 examples/crypto/gfmul/gfmul.c  | 72 ++++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+)
 create mode 100644 examples/crypto/gfmul/Makefile
 create mode 100644 examples/crypto/gfmul/gfmul.c

diff --git a/.gitignore b/.gitignore
index 3a7513d12..e085dcb33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -231,6 +231,7 @@ examples/verilog/fulladder/simprofile_dir/
 examples/verilog/fulladder/simv.daidir/
 examples/verilog/fulladder/ucli.key
 examples/verilog/fulladder/verdi_config_file
+examples/crypto/gfmul/gfmul
 tests/functcov
 tests/functcov/*
 tests/functcov/*/*
diff --git a/examples/crypto/gfmul/Makefile b/examples/crypto/gfmul/Makefile
new file mode 100644
index 000000000..a501c3775
--- /dev/null
+++ b/examples/crypto/gfmul/Makefile
@@ -0,0 +1,16 @@
+# Makefile
+
+CC     = gcc
+CFLAGS = -O3
+LIBS   = 
+SRCS   = $(wildcard *.c)
+
+PROGS = $(patsubst %.c,%,$(SRCS))
+
+all:	$(PROGS)
+
+%: %.c
+	$(CC) $(CFLAGS) $(IFLAGS) -o $@ $< $(LIBS)
+
+clean: 
+	rm -f $(PROGS)
diff --git a/examples/crypto/gfmul/gfmul.c b/examples/crypto/gfmul/gfmul.c
new file mode 100644
index 000000000..3c4f585f1
--- /dev/null
+++ b/examples/crypto/gfmul/gfmul.c
@@ -0,0 +1,72 @@
+// gfmul.c - Galois Field multiplication
+// James Stine and David Harris 16 May 2024
+
+#include <stdio.h>
+
+/* return ab mod m(x) - long multiplication in GF(2^n) with polynomial m */
+int gfmul(int a, int b, int n, int m) {
+   int result = 0;
+   while (b) {
+     if (b & 1) result = result ^ a; /* if bit of b is set add a */
+     a = a << 1;                     /* multiply a by x */
+     if (a & 1 << n)
+       a = a ^ m;                    /* reduce/sub modulo AES m(x) = 100011011 */
+     //printf("a = %x, b = %x, result = %x\n", a, b, result);
+     b = b >> 1;                     /* get next bit of b */
+   }
+   return result;
+}
+
+void inverses(void) {
+    int i, j, k, num;
+
+    printf("\nTable of inverses in GF(2^8) with polynomial m(x) = 100011011\n");
+    for (i=0; i<16; i++) {
+        for (j=0; j<16; j++) {
+            num = i*16+j;
+            if (num ==0) printf ("00 ");
+            else for (k=1; k<256; k++) {
+                if (gfmul(num, k, 8, 0b100011011) == 1) {
+                    printf("%02x ", k);
+                    break;
+                }
+            }
+        }
+        printf("\n");
+    }
+}
+
+void inverses3(void) {
+    int k, num;
+
+    printf("\nTable of inverses in GF(2^8) with polynomial m(x) = 100011011\n");
+    for (num=0; num<8; num++) {
+        if (num == 0) printf ("0 ");
+        else for (k=1; k<8; k++) {
+            if (gfmul(num, k, 3, 0b1011) == 1) {
+                printf("%d ", k);
+                break;
+            }
+        }
+    }
+    printf("\n");
+}
+
+
+int main() {
+  int a = 0xC5;
+  int b = 0xA1;
+
+  printf("The GF(2^8) result is %x\n", gfmul(a,b, 8, 0b100011011));
+  printf("The GF(2^8) result is %x\n", gfmul(0xC1, 0x28, 8, 0b100011011));
+  inverses();
+
+  // tabulate inverses for GF(2^3)
+  inverses3();
+  // check worked examples
+    printf("The GF(2^3) result is %d\n", gfmul(0b101,0b011, 3, 0b1011));
+    printf("The GF(2^3) result is %d\n", gfmul(0b101,0b010, 3, 0b1011));
+    printf("The GF(2^3) result is %d\n", gfmul(0b101,0b100, 3, 0b1011));
+    printf("The GF(2^3) result is %d\n", gfmul(0b101,0b011, 3, 0b1011));
+ 
+}

From ad568e9d25081efc028ca39b25c74d3a043d1991 Mon Sep 17 00:00:00 2001
From: Rose Thompson <ross1728@gmail.com>
Date: Mon, 20 May 2024 15:46:26 -0500
Subject: [PATCH 03/30] Updated readme.

---
 README.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 2444ff99a..89dc350f3 100644
--- a/README.md
+++ b/README.md
@@ -41,19 +41,14 @@ Clone your fork of the repo and run the setup script. Change <yourgithubid> to y
 	$ git remote add upstream https://github.com/openhwgroup/cvw
 	$ source ./setup.sh
 
+If you are installing on a new system without any tools installed please jump to the next section, Toolchain Installation then come back here.
+
 Add the following lines to your .bashrc or .bash_profile to run the setup script each time you log in.
 
 	if [ -f ~/cvw/setup.sh ]; then
 		source ~/cvw/setup.sh
 	fi
 
-Edit setup.sh and change the following lines to point to the path and license server for your Siemens Questa and Synopsys Design Compiler installation and license server.  If you only have Questa, you can still simulate but cannot run logic synthesis.
-
-	export MGLS_LICENSE_FILE=..         # Change this to your Siemens license server
-	export SNPSLMD_LICENSE_FILE=..      # Change this to your Synopsys license server
-	export QUESTAPATH=..     			# Change this for your path to Questa
-	export SNPSPATH=..                  # Change this for your path to Design Compiler
-
 If the tools are not yet installed on your server, follow the Toolchain Installation instructions in the section below.
 
 Build the tests and run a regression simulation with Questa to prove everything is installed.  Building tests will take a while.
@@ -73,6 +68,17 @@ Ubuntu users can install the tools by running
 
 	$ sudo $WALLY/bin/wally-tool-chain-install.sh
 
+The default installation directory is /opt/riscv defined by the environment variable RISCV. You must copy and edit ~/cvw/site-setup.sh to $RISCV/  ~/cvw/setup.sh sources $RISCV/site-setup.sh.
+This allows for customization of the site specific information such as commerical licenses and PATH variables.
+
+Change the following lines to point to the path and license server for your Siemens Questa and Synopsys Design Compiler installation and license server.  If you only have Questa, you can still simulate but cannot run logic synthesis.  If Questa or Design Compiler are already setup on this system then don't set these variables.
+
+	export MGLS_LICENSE_FILE=..         # Change this to your Siemens license server
+	export SNPSLMD_LICENSE_FILE=..      # Change this to your Synopsys license server
+	export QUESTAPATH=..                # Change this for your path to Questa
+	export SNPSPATH=..                  # Change this for your path to Design Compiler
+
+
 See wally-tool-chain-install.sh for a detailed description of each component,
 or to issue the commands one at a time to install on the command line.
 ## Installing EDA Tools
@@ -138,3 +144,40 @@ If you want to add a cronjob you can do the following:
 30 21 * * * bash -l -c "source ~/PATH/TO/CVW/setup.sh; PATH_TO_CVW/cvw/bin/wrapper_nightly_runs.sh --path {PATH_TO_TEST_LOCATION} --target all --tests nightly --send_email harris@hmc.edu,kaitlin.verilog@gmail.com"
 ```
 
+# Example wsim commands
+
+wsim runs one of multiple simulators, Questa, VCS, or Verilator using a specific configuration and either a suite of tests or a specific elf file.
+The general syntax is
+wsim <config> <suite or elf file> [--options]
+
+Options:
+  -h, --help            show this help message and exit
+  --elf, -e             Elf file
+  --sim {questa,verilator,vcs}, -s {questa,verilator,vcs}
+                        Simulator
+  --tb {testbench,testbench_fp}, -t {testbench,testbench_fp}
+                        Testbench
+  --gui, -g             Simulate with GUI
+  --coverage, -c        Code & Functional Coverage
+  --args ARGS, -a ARGS  Optional arguments passed to simulator via $value$plusargs
+  --vcd, -v             Generate testbench.vcd
+  --lockstep, -l        Run ImperasDV lock, step, and compare.
+  --locksteplog LOCKSTEPLOG, -b LOCKSTEPLOG
+                        Retired instruction number to be begin logging.
+
+Run basic test with questa
+
+wsim rv64gc arch64i
+
+Run Questa with gui
+
+wsim rv64gc wally64priv --gui
+
+Run lockstep against ImperasDV with a single elf file in the --gui.  Lockstep requires single elf.
+
+wsim rv64gc ../../tests/riscof/work/riscv-arch-test/rv64i_m/I/src/add-01.S/ref/ref.elf --elf --lockstep --gui
+
+Run lockstep against ImperasDV with a single elf file.  Compute coverage.
+
+wsim rv64gc ../../tests/riscof/work/riscv-arch-test/rv64i_m/I/src/add-01.S/ref/ref.elf --elf --lockstep --coverage
+

From 55008e98c95f20c29fc2170fee1f6639533a2a51 Mon Sep 17 00:00:00 2001
From: Rose Thompson <ross1728@gmail.com>
Date: Mon, 20 May 2024 15:50:17 -0500
Subject: [PATCH 04/30] Formated readme.

---
 README.md | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 89dc350f3..ea6c04424 100644
--- a/README.md
+++ b/README.md
@@ -151,33 +151,32 @@ The general syntax is
 wsim <config> <suite or elf file> [--options]
 
 Options:
-  -h, --help            show this help message and exit
-  --elf, -e             Elf file
-  --sim {questa,verilator,vcs}, -s {questa,verilator,vcs}
-                        Simulator
-  --tb {testbench,testbench_fp}, -t {testbench,testbench_fp}
-                        Testbench
-  --gui, -g             Simulate with GUI
-  --coverage, -c        Code & Functional Coverage
-  --args ARGS, -a ARGS  Optional arguments passed to simulator via $value$plusargs
-  --vcd, -v             Generate testbench.vcd
-  --lockstep, -l        Run ImperasDV lock, step, and compare.
-  --locksteplog LOCKSTEPLOG, -b LOCKSTEPLOG
+	-h, --help            show this help message and exit
+	--elf, -e             Elf file
+	--sim {questa,verilator,vcs}, -s {questa,verilator,vcs}
+                      Simulator
+	--tb {testbench,testbench_fp}, -t {testbench,testbench_fp}
+                      Testbench
+	--gui, -g             Simulate with GUI
+	--coverage, -c        Code & Functional Coverage
+	--args ARGS, -a ARGS  Optional arguments passed to simulator via $value$plusargs
+	--vcd, -v             Generate testbench.vcd
+	--lockstep, -l        Run ImperasDV lock, step, and compare.
+	--locksteplog LOCKSTEPLOG, -b LOCKSTEPLOG
                         Retired instruction number to be begin logging.
 
 Run basic test with questa
 
-wsim rv64gc arch64i
+	wsim rv64gc arch64i
 
 Run Questa with gui
 
-wsim rv64gc wally64priv --gui
+	wsim rv64gc wally64priv --gui
 
 Run lockstep against ImperasDV with a single elf file in the --gui.  Lockstep requires single elf.
 
-wsim rv64gc ../../tests/riscof/work/riscv-arch-test/rv64i_m/I/src/add-01.S/ref/ref.elf --elf --lockstep --gui
+	wsim rv64gc ../../tests/riscof/work/riscv-arch-test/rv64i_m/I/src/add-01.S/ref/ref.elf --elf --lockstep --gui
 
 Run lockstep against ImperasDV with a single elf file.  Compute coverage.
 
-wsim rv64gc ../../tests/riscof/work/riscv-arch-test/rv64i_m/I/src/add-01.S/ref/ref.elf --elf --lockstep --coverage
-
+	wsim rv64gc ../../tests/riscof/work/riscv-arch-test/rv64i_m/I/src/add-01.S/ref/ref.elf --elf --lockstep --coverage

From 7cc1fcbd49579e1cf99f6d390676ef569c73910f Mon Sep 17 00:00:00 2001
From: Rose Thompson <ross1728@gmail.com>
Date: Mon, 20 May 2024 15:52:36 -0500
Subject: [PATCH 05/30] More formating.

---
 README.md | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index ea6c04424..88bd2e5fa 100644
--- a/README.md
+++ b/README.md
@@ -151,19 +151,16 @@ The general syntax is
 wsim <config> <suite or elf file> [--options]
 
 Options:
-	-h, --help            show this help message and exit
-	--elf, -e             Elf file
-	--sim {questa,verilator,vcs}, -s {questa,verilator,vcs}
-                      Simulator
-	--tb {testbench,testbench_fp}, -t {testbench,testbench_fp}
-                      Testbench
-	--gui, -g             Simulate with GUI
-	--coverage, -c        Code & Functional Coverage
-	--args ARGS, -a ARGS  Optional arguments passed to simulator via $value$plusargs
-	--vcd, -v             Generate testbench.vcd
-	--lockstep, -l        Run ImperasDV lock, step, and compare.
-	--locksteplog LOCKSTEPLOG, -b LOCKSTEPLOG
-                        Retired instruction number to be begin logging.
+	-h, --help                                                 show this help message and exit
+	--elf, -e                                                  Elf file
+	--sim {questa,verilator,vcs}, -s {questa,verilator,vcs}    Simulator
+	--tb {testbench,testbench_fp}, -t {testbench,testbench_fp} Testbench
+	--gui, -g                                                  Simulate with GUI
+	--coverage, -c                                             Code & Functional Coverage
+	--args ARGS, -a ARGS                                       Optional arguments passed to simulator via $value$plusargs
+	--vcd, -v                                                  Generate testbench.vcd
+	--lockstep, -l                                             Run ImperasDV lock, step, and compare.
+	--locksteplog LOCKSTEPLOG, -b LOCKSTEPLOG                  Retired instruction number to be begin logging.
 
 Run basic test with questa
 

From 33eb5980e7c8660298dd366ce02b3e7baacf06f0 Mon Sep 17 00:00:00 2001
From: Rose Thompson <ross1728@gmail.com>
Date: Mon, 20 May 2024 15:57:45 -0500
Subject: [PATCH 06/30] More readme formating.

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 88bd2e5fa..89ec18424 100644
--- a/README.md
+++ b/README.md
@@ -150,7 +150,8 @@ wsim runs one of multiple simulators, Questa, VCS, or Verilator using a specific
 The general syntax is
 wsim <config> <suite or elf file> [--options]
 
-Options:
+Parameters and options:
+
 	-h, --help                                                 show this help message and exit
 	--elf, -e                                                  Elf file
 	--sim {questa,verilator,vcs}, -s {questa,verilator,vcs}    Simulator

From d025bd0aff5265f9838aaa02b64599b33d4aba81 Mon Sep 17 00:00:00 2001
From: Rose Thompson <ross1728@gmail.com>
Date: Mon, 20 May 2024 16:23:25 -0500
Subject: [PATCH 07/30] More improvements to the readme.

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 89ec18424..c2e0f1d39 100644
--- a/README.md
+++ b/README.md
@@ -68,7 +68,9 @@ Ubuntu users can install the tools by running
 
 	$ sudo $WALLY/bin/wally-tool-chain-install.sh
 
-The default installation directory is /opt/riscv defined by the environment variable RISCV. You must copy and edit ~/cvw/site-setup.sh to $RISCV/  ~/cvw/setup.sh sources $RISCV/site-setup.sh.
+The default installation directory is /opt/riscv defined by the environment variable RISCV. You must copy and edit ~/cvw/site-setup.sh to $RISCV/site-setup.sh.
+
+~/cvw/setup.sh sources $RISCV/site-setup.sh.
 This allows for customization of the site specific information such as commerical licenses and PATH variables.
 
 Change the following lines to point to the path and license server for your Siemens Questa and Synopsys Design Compiler installation and license server.  If you only have Questa, you can still simulate but cannot run logic synthesis.  If Questa or Design Compiler are already setup on this system then don't set these variables.

From 88eb7bd04564ab49f29bf3653171e906b45def41 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Wed, 22 May 2024 00:22:53 -0700
Subject: [PATCH 08/30] Pulled brev8 out of byteop so redundant byteop logic is
 not needed in zbkb

---
 src/ieu/bmu/bitmanipalu.sv |  2 +-
 src/ieu/bmu/byteop.sv      | 10 ++++------
 src/ieu/bmu/zbb.sv         |  2 +-
 src/ieu/kmu/zbkb.sv        | 13 +++++++++----
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv
index fec96883e..7748b8627 100644
--- a/src/ieu/bmu/bitmanipalu.sv
+++ b/src/ieu/bmu/bitmanipalu.sv
@@ -103,7 +103,7 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) (
 
   // ZBKB Unit
   if (P.ZBKB_SUPPORTED) begin: zbkb
-    zbkb #(P.XLEN) ZBKB(.A(ABMU), .B(BBMU), .RevA, .W64, .Funct3, .ZBKBSelect(ZBBSelect[2:0]), .ZBKBResult);
+    zbkb #(P.XLEN) ZBKB(.A(ABMU), .B(BBMU), .W64, .Funct3, .ZBKBSelect(ZBBSelect[2:0]), .ZBKBResult);
   end else assign ZBKBResult = '0;
 
   // ZBKX Unit
diff --git a/src/ieu/bmu/byteop.sv b/src/ieu/bmu/byteop.sv
index 980c6d586..913c852b1 100644
--- a/src/ieu/bmu/byteop.sv
+++ b/src/ieu/bmu/byteop.sv
@@ -30,24 +30,22 @@
 
 module byteop #(parameter WIDTH=32) (
   input  logic [WIDTH-1:0] A,             // Operands
-  input  logic [WIDTH-1:0] RevA,          // Reversed A
   input  logic [1:0]       ByteSelect,    // LSB of Immediate
   output logic [WIDTH-1:0] ByteResult);   // rev8, orcb result
 
-  logic [WIDTH-1:0] OrcBResult, Rev8Result, Brev8Result;
+  logic [WIDTH-1:0] OrcBResult, Rev8Result;
   genvar i;
 
   for (i=0;i<WIDTH;i+=8) begin:loop
     assign OrcBResult[i+7:i] = {8{|A[i+7:i]}};
     assign Rev8Result[WIDTH-i-1:WIDTH-i-8] = A[i+7:i];
-    assign Brev8Result[i+7:i] = RevA[WIDTH-1-i:WIDTH-i-8];
   end
 
   // ByteOp Result Mux
+//  mux3 #(WIDTH) byteresultmux(Rev8Result, Brev8Result, OrcBResult, ByteSelect, ByteResult);
   always_comb begin
     if (ByteSelect[0] == 1'b0)      ByteResult = Rev8Result;
-    else if (ByteSelect[1] == 1'b0) ByteResult = OrcBResult;
-    else                            ByteResult = Brev8Result;
-  end
+    else /*if (ByteSelect[1] == 1'b0) */ ByteResult = OrcBResult;
+  end 
   
 endmodule
diff --git a/src/ieu/bmu/zbb.sv b/src/ieu/bmu/zbb.sv
index e96ed7acd..bdfcb0426 100644
--- a/src/ieu/bmu/zbb.sv
+++ b/src/ieu/bmu/zbb.sv
@@ -45,7 +45,7 @@ module zbb #(parameter WIDTH=32) (
 
   mux2 #(1) ltmux(LT, LTU, BUnsigned , lt);
   cnt #(WIDTH) cnt(.A, .RevA, .B(B[1:0]), .W64, .CntResult);
-  byteop #(WIDTH) bu(.A, .RevA, .ByteSelect({B[10], B[0]}), .ByteResult);
+  byteop #(WIDTH) bu(.A, .ByteSelect({B[10], B[0]}), .ByteResult);
   ext #(WIDTH) ext(.A, .ExtSelect({~B[2], {B[2] & B[0]}}), .ExtResult);
 
   // ZBBSelect[2] differentiates between min(u) vs max(u) instruction
diff --git a/src/ieu/kmu/zbkb.sv b/src/ieu/kmu/zbkb.sv
index 90b774042..8d437f62f 100644
--- a/src/ieu/kmu/zbkb.sv
+++ b/src/ieu/kmu/zbkb.sv
@@ -26,21 +26,26 @@
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
 module zbkb #(parameter WIDTH=32) (
-   input  logic [WIDTH-1:0] A, B, RevA,
+   input  logic [WIDTH-1:0] A, B,
    input  logic 	          W64,
    input  logic [2:0] 	    Funct3,
    input  logic [2:0] 	    ZBKBSelect,
    output logic [WIDTH-1:0] ZBKBResult
 );
    
-   logic [WIDTH-1:0] 	     ByteResult;   // rev8, brev8
+   logic [WIDTH-1:0] 	     Brev8Result;  // rev8, brev8
    logic [WIDTH-1:0] 	     PackResult;   // pack, packh, packw (RB64 only)
    logic [WIDTH-1:0] 	     ZipResult;    // zip, unzip
+
+   // brev8 just uses wires
+   genvar i, j;
+   for (i=0;i<WIDTH/8;i=i+1) 
+      for (j=0; j<8; j=j+1) 
+         assign Brev8Result[i*8+j] = A[i*8+7-j];
    
-   byteop #(WIDTH) rev(.A, .RevA, .ByteSelect({B[10], B[0]}), .ByteResult);
    packer #(WIDTH) pack(.A, .B, .PackSelect({ZBKBSelect[2], Funct3[1:0]}), .PackResult);
    zipper #(WIDTH) zip(.A, .ZipSelect(Funct3[2]), .ZipResult);
    
    // ZBKB Result Select Mux
-   mux3 #(WIDTH) zbkbresultmux(ByteResult, PackResult, ZipResult, ZBKBSelect[1:0], ZBKBResult);   
+   mux3 #(WIDTH) zbkbresultmux(Brev8Result, PackResult, ZipResult, ZBKBSelect[1:0], ZBKBResult);   
 endmodule

From a17204b0fe7c60e566dae64caa8bc7c8297ddd49 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Wed, 22 May 2024 00:48:04 -0700
Subject: [PATCH 09/30] Continued bmu cleanup

---
 src/ieu/bmu/byteop.sv | 12 +++---------
 src/ieu/bmu/zbb.sv    |  2 +-
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/ieu/bmu/byteop.sv b/src/ieu/bmu/byteop.sv
index 913c852b1..263680aea 100644
--- a/src/ieu/bmu/byteop.sv
+++ b/src/ieu/bmu/byteop.sv
@@ -30,22 +30,16 @@
 
 module byteop #(parameter WIDTH=32) (
   input  logic [WIDTH-1:0] A,             // Operands
-  input  logic [1:0]       ByteSelect,    // LSB of Immediate
+  input  logic             ByteSelect,    // LSB of Immediate
   output logic [WIDTH-1:0] ByteResult);   // rev8, orcb result
 
   logic [WIDTH-1:0] OrcBResult, Rev8Result;
   genvar i;
 
-  for (i=0;i<WIDTH;i+=8) begin:loop
+  for (i=0;i<WIDTH;i+=8) begin:byteloop
     assign OrcBResult[i+7:i] = {8{|A[i+7:i]}};
     assign Rev8Result[WIDTH-i-1:WIDTH-i-8] = A[i+7:i];
   end
 
-  // ByteOp Result Mux
-//  mux3 #(WIDTH) byteresultmux(Rev8Result, Brev8Result, OrcBResult, ByteSelect, ByteResult);
-  always_comb begin
-    if (ByteSelect[0] == 1'b0)      ByteResult = Rev8Result;
-    else /*if (ByteSelect[1] == 1'b0) */ ByteResult = OrcBResult;
-  end 
-  
+  mux2 #(WIDTH) byteresultmux(Rev8Result, OrcBResult, ByteSelect, ByteResult);
 endmodule
diff --git a/src/ieu/bmu/zbb.sv b/src/ieu/bmu/zbb.sv
index bdfcb0426..f9957c787 100644
--- a/src/ieu/bmu/zbb.sv
+++ b/src/ieu/bmu/zbb.sv
@@ -45,7 +45,7 @@ module zbb #(parameter WIDTH=32) (
 
   mux2 #(1) ltmux(LT, LTU, BUnsigned , lt);
   cnt #(WIDTH) cnt(.A, .RevA, .B(B[1:0]), .W64, .CntResult);
-  byteop #(WIDTH) bu(.A, .ByteSelect({B[10], B[0]}), .ByteResult);
+  byteop #(WIDTH) bu(.A, .ByteSelect(B[0]), .ByteResult);
   ext #(WIDTH) ext(.A, .ExtSelect({~B[2], {B[2] & B[0]}}), .ExtResult);
 
   // ZBBSelect[2] differentiates between min(u) vs max(u) instruction

From 3ad815ce34199ed3a8b25a90f23f9739c24c5329 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Wed, 22 May 2024 08:29:08 -0700
Subject: [PATCH 10/30] Reordered Zicond support in ALU

---
 src/ieu/alu.sv | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/ieu/alu.sv b/src/ieu/alu.sv
index 783d39495..e1cae73a6 100644
--- a/src/ieu/alu.sv
+++ b/src/ieu/alu.sv
@@ -60,7 +60,22 @@ module alu import cvw::*; #(parameter cvw_t P) (
   // CondShiftA is A for add/sub or a shifted version of A for shift-and-add BMU instructions
   assign CondMaskInvB = SubArith ? ~CondMaskB : CondMaskB;
   assign {Carry, Sum} = CondShiftA + CondMaskInvB + {{(P.XLEN-1){1'b0}}, SubArith};
-  
+
+  // Zicond block conditionally zeros B
+  if (P.ZICOND_SUPPORTED) begin: zicond
+    logic  BZero;
+    
+    assign BZero = (B == 0); // check if rs2 = 0
+    // Create a signal that is 0 when czero.* instruction should clear result
+    // If B = 0 for czero.eqz or if B != 0 for czero.nez
+    always_comb 
+     case (CZero)
+        2'b01:   ZeroCondMaskInvB = {P.XLEN{~BZero}}; // czero.eqz: kill if B = 0
+        2'b10:   ZeroCondMaskInvB = {P.XLEN{BZero}};  // czero.nez: kill if B != 0
+        default: ZeroCondMaskInvB = CondMaskInvB;     // otherwise normal behavior
+      endcase
+  end else assign ZeroCondMaskInvB = CondMaskInvB; // no masking if Zicond is not supported
+
   // Shifts (configurable for rotation)
   shifter #(P) sh(.A, .Amt(B[P.LOG_XLEN-1:0]), .Right(Funct3[2]), .W64, .SubArith, .Y(Shift), .Rotate(BALUControl[2]));
 
@@ -105,18 +120,4 @@ module alu import cvw::*; #(parameter cvw_t P) (
     assign CondShiftA = A;
   end
 
-  // Zicond block
-  if (P.ZICOND_SUPPORTED) begin: zicond
-    logic  BZero;
-    
-    assign BZero = (B == 0); // check if rs2 = 0
-    // Create a signal that is 0 when czero.* instruction should clear result
-    // If B = 0 for czero.eqz or if B != 0 for czero.nez
-    always_comb 
-     case (CZero)
-        2'b01:   ZeroCondMaskInvB = {P.XLEN{~BZero}}; // czero.eqz: kill if B = 0
-        2'b10:   ZeroCondMaskInvB = {P.XLEN{BZero}};  // czero.nez: kill if B != 0
-        default: ZeroCondMaskInvB = CondMaskInvB;     // otherwise normal behavior
-      endcase
-  end else assign ZeroCondMaskInvB = CondMaskInvB; // no masking if Zicond is not supported
 endmodule

From c160ced2d2257df95d3333bd48805f740501775a Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Wed, 22 May 2024 15:01:20 -0700
Subject: [PATCH 11/30] Zk* cleanup

---
 src/ieu/bmu/bitmanipalu.sv | 4 ++--
 src/ieu/kmu/zbkb.sv        | 3 +--
 src/ieu/kmu/zbkx.sv        | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv
index 7748b8627..b0af3e347 100644
--- a/src/ieu/bmu/bitmanipalu.sv
+++ b/src/ieu/bmu/bitmanipalu.sv
@@ -103,12 +103,12 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) (
 
   // ZBKB Unit
   if (P.ZBKB_SUPPORTED) begin: zbkb
-    zbkb #(P.XLEN) ZBKB(.A(ABMU), .B(BBMU), .W64, .Funct3, .ZBKBSelect(ZBBSelect[2:0]), .ZBKBResult);
+    zbkb #(P.XLEN) ZBKB(.A(ABMU), .B(BBMU), .Funct3, .ZBKBSelect(ZBBSelect[2:0]), .ZBKBResult);
   end else assign ZBKBResult = '0;
 
   // ZBKX Unit
   if (P.ZBKX_SUPPORTED) begin: zbkx
-    zbkx #(P.XLEN) ZBKX(.A(ABMU), .B(BBMU), .ZBKXSelect(ZBBSelect[2:0]), .ZBKXResult);
+    zbkx #(P.XLEN) ZBKX(.A(ABMU), .B(BBMU), .ZBKXSelect(ZBBSelect[0]), .ZBKXResult);
   end else assign ZBKXResult = '0;
 
   // ZKND and ZKNE AES decryption and encryption
diff --git a/src/ieu/kmu/zbkb.sv b/src/ieu/kmu/zbkb.sv
index 8d437f62f..61173e982 100644
--- a/src/ieu/kmu/zbkb.sv
+++ b/src/ieu/kmu/zbkb.sv
@@ -27,8 +27,7 @@
 
 module zbkb #(parameter WIDTH=32) (
    input  logic [WIDTH-1:0] A, B,
-   input  logic 	          W64,
-   input  logic [2:0] 	    Funct3,
+  input  logic [2:0] 	    Funct3,
    input  logic [2:0] 	    ZBKBSelect,
    output logic [WIDTH-1:0] ZBKBResult
 );
diff --git a/src/ieu/kmu/zbkx.sv b/src/ieu/kmu/zbkx.sv
index dbbaf3d2d..18fe9a657 100644
--- a/src/ieu/kmu/zbkx.sv
+++ b/src/ieu/kmu/zbkx.sv
@@ -27,7 +27,7 @@
 
 module zbkx #(parameter WIDTH=32) (
    input  logic [WIDTH-1:0] A, B,
-   input  logic [2:0] 	    ZBKXSelect,
+   input  logic  	          ZBKXSelect,
    output logic [WIDTH-1:0] ZBKXResult
 );
    
@@ -46,5 +46,5 @@ module zbkx #(parameter WIDTH=32) (
       end   
    end
 
-   assign ZBKXResult = ZBKXSelect[0] ? xperm4 : xperm8;
+   assign ZBKXResult = ZBKXSelect ? xperm4 : xperm8;
 endmodule

From d9a1691c8323f5ca0c8ec3a1e7086ad0f4f74efd Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Thu, 23 May 2024 05:39:50 -0700
Subject: [PATCH 12/30] Simplified sha512_32

---
 src/ieu/sha/sha512_32.sv | 65 ++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 43 deletions(-)

diff --git a/src/ieu/sha/sha512_32.sv b/src/ieu/sha/sha512_32.sv
index 679d6d740..4ce8e31ef 100644
--- a/src/ieu/sha/sha512_32.sv
+++ b/src/ieu/sha/sha512_32.sv
@@ -31,67 +31,46 @@ module sha512_32 (
    output logic [31:0] result
 );
 
-   logic [31:0] x[6][6];
-   logic [31:0] y[6];
+   logic [31:0] x[6][3];
+   logic [31:0] y[3];
 
    // sha512{sig0h/sig0l/sig1h/sig1l/sum0r/sum1r} select shifted operands for 32-bit xor6
 
    // sha512sig0h
-   assign x[0][0] = A >> 1;
-   assign x[0][1] = A >> 7;
-   assign x[0][2] = A >> 8;
-   assign x[0][3] = B << 31;
-   assign x[0][4] = B << 24;
-   assign x[0][5] = '0;   
+   assign x[0][0] = {B[0], A[31:1]};
+   assign x[0][1] = {B[7:0], A[31:8]}; 
+   assign x[0][2] = {7'b0, A[31:7]};
 
    // sha512sig0l
-   assign x[1][0] = A >> 1; 
-   assign x[1][1] = A >> 7; 
-   assign x[1][2] = A >> 8; 
-   assign x[1][3] = B << 31;
-   assign x[1][4] = B << 25;
-   assign x[1][5] = B << 24;
+   assign x[1][0] = x[0][0];
+   assign x[1][1] = x[0][1]; 
+   assign x[1][2] = {B[6:0], A[31:7]};
 
    // sha512sig1h
-   assign x[2][0] = A << 3;
-   assign x[2][1] = A >> 6;
-   assign x[2][2] = A >> 19;
-   assign x[2][3] = B >> 29;
-   assign x[2][4] = B << 13;
-   assign x[2][5] = '0;      
+   assign x[2][0] = {A[28:0], B[31:29]};
+   assign x[2][1] = {B[18:0], A[31:19]};
+   assign x[2][2] = {6'b0, A[31:6]};  
 
    // sha512sig1l
-   assign x[3][0] = A << 3; 
-   assign x[3][1] = A >> 6; 
-   assign x[3][2] = A >> 19; 
-   assign x[3][3] = B >> 29;
-   assign x[3][4] = B << 26;
-   assign x[3][5] = B << 13;
+   assign x[3][0] = x[2][0];
+   assign x[3][1] = x[2][1];
+   assign x[3][2] = {B[5:0], A[31:6]};    
 
    // sha512sum0r
-   assign x[4][0] = A << 25; 
-   assign x[4][1] = A << 30; 
-   assign x[4][2] = A >> 28; 
-   assign x[4][3] = B >> 7;
-   assign x[4][4] = B >> 2;
-   assign x[4][5] = B << 4;
+   assign x[4][0] = {A[6:0], B[31:7]}; 
+   assign x[4][1] = {A[1:0], B[31:2]};
+   assign x[4][2] = {B[27:0], A[31:28]};    
 
    // sha512sum1r
-   assign x[5][0] = A << 23; 
-   assign x[5][1] = A >> 14; 
-   assign x[5][2] = A >> 18; 
-   assign x[5][3] = B >> 9;
-   assign x[5][4] = B << 18;
-   assign x[5][5] = B << 14;   
+   assign x[5][0] = {A[8:0], B[31:9]}; 
+   assign x[5][1] = {B[13:0], A[31:14]};
+   assign x[5][2] = {B[17:0], A[31:18]}; 
 
    // 32-bit muxes to select inputs to xor6 for sha512
    assign y[0] = x[ZKNHSelect[2:0]][0]; 
    assign y[1] = x[ZKNHSelect[2:0]][1]; 
    assign y[2] = x[ZKNHSelect[2:0]][2];
-   assign y[3] = x[ZKNHSelect[2:0]][3]; 
-   assign y[4] = x[ZKNHSelect[2:0]][4]; 
-   assign y[5] = x[ZKNHSelect[2:0]][5];    
-
+ 
    // sha512 32-bit xor6
-   assign result = y[0] ^ y[1] ^ y[2] ^ y[3] ^ y[4] ^ y[5];
+   assign result = y[0] ^ y[1] ^ y[2];
 endmodule

From ac153bc4ed75c83983e5953fc763c6a09723e376 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Thu, 23 May 2024 05:46:56 -0700
Subject: [PATCH 13/30] More simplifying sha512_32

---
 src/ieu/bmu/bmuctrl.sv   |  2 +-
 src/ieu/sha/sha512_32.sv | 46 +++++++++++++++++-----------------------
 2 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/src/ieu/bmu/bmuctrl.sv b/src/ieu/bmu/bmuctrl.sv
index 62376865f..76e08aba8 100644
--- a/src/ieu/bmu/bmuctrl.sv
+++ b/src/ieu/bmu/bmuctrl.sv
@@ -264,7 +264,7 @@ module bmuctrl import cvw::*;  #(parameter cvw_t P) (
           17'b0110011_0101111_000:     BMUControlsD = `BMUCTRLW'b000_1000_1010_1_0_0_1_0_0_0_0_0;  // sha512sig1h
           17'b0110011_0101011_000:     BMUControlsD = `BMUCTRLW'b000_1000_1011_1_0_0_1_0_0_0_0_0;  // sha512sig1l
           17'b0110011_0101000_000:     BMUControlsD = `BMUCTRLW'b000_1000_1100_1_0_0_1_0_0_0_0_0;  // sha512sum0r
-          17'b0110011_0101001_000:     BMUControlsD = `BMUCTRLW'b000_1000_1101_1_0_0_1_0_0_0_0_0;  // sha512sum1r
+          17'b0110011_0101001_000:     BMUControlsD = `BMUCTRLW'b000_1000_1110_1_0_0_1_0_0_0_0_0;  // sha512sum1r
         endcase
 
       else if (P.XLEN==64)
diff --git a/src/ieu/sha/sha512_32.sv b/src/ieu/sha/sha512_32.sv
index 4ce8e31ef..484ce3586 100644
--- a/src/ieu/sha/sha512_32.sv
+++ b/src/ieu/sha/sha512_32.sv
@@ -31,45 +31,37 @@ module sha512_32 (
    output logic [31:0] result
 );
 
-   logic [31:0] x[6][3];
+   logic [31:0] x[4][3];
    logic [31:0] y[3];
 
-   // sha512{sig0h/sig0l/sig1h/sig1l/sum0r/sum1r} select shifted operands for 32-bit xor6
+   // sha512{sig0h/sig0l/sig1h/sig1l/sum0r/sum1r} select shifted operands for 32-bit xor
 
-   // sha512sig0h
+   // The l flavors differ from h by using low bits of B instead of zeros in x[0/1][2]
+
+   // sha512sig0h/l
    assign x[0][0] = {B[0], A[31:1]};
    assign x[0][1] = {B[7:0], A[31:8]}; 
-   assign x[0][2] = {7'b0, A[31:7]};
+   assign x[0][2] = {B[6:0] & {7{ZKNHSelect[0]}}, A[31:7]};
 
-   // sha512sig0l
-   assign x[1][0] = x[0][0];
-   assign x[1][1] = x[0][1]; 
-   assign x[1][2] = {B[6:0], A[31:7]};
-
-   // sha512sig1h
-   assign x[2][0] = {A[28:0], B[31:29]};
-   assign x[2][1] = {B[18:0], A[31:19]};
-   assign x[2][2] = {6'b0, A[31:6]};  
-
-   // sha512sig1l
-   assign x[3][0] = x[2][0];
-   assign x[3][1] = x[2][1];
-   assign x[3][2] = {B[5:0], A[31:6]};    
+   // sha512sig1h/l
+   assign x[1][0] = {A[28:0], B[31:29]};
+   assign x[1][1] = {B[18:0], A[31:19]};
+   assign x[1][2] = {B[5:0] & {6{ZKNHSelect[0]}}, A[31:6]};  
 
    // sha512sum0r
-   assign x[4][0] = {A[6:0], B[31:7]}; 
-   assign x[4][1] = {A[1:0], B[31:2]};
-   assign x[4][2] = {B[27:0], A[31:28]};    
+   assign x[2][0] = {A[6:0], B[31:7]};
+   assign x[2][1] = {A[1:0], B[31:2]};
+   assign x[2][2] = {B[27:0], A[31:28]};
 
    // sha512sum1r
-   assign x[5][0] = {A[8:0], B[31:9]}; 
-   assign x[5][1] = {B[13:0], A[31:14]};
-   assign x[5][2] = {B[17:0], A[31:18]}; 
+   assign x[3][0] = {A[8:0], B[31:9]}; 
+   assign x[3][1] = {B[13:0], A[31:14]};
+   assign x[3][2] = {B[17:0], A[31:18]}; 
 
    // 32-bit muxes to select inputs to xor6 for sha512
-   assign y[0] = x[ZKNHSelect[2:0]][0]; 
-   assign y[1] = x[ZKNHSelect[2:0]][1]; 
-   assign y[2] = x[ZKNHSelect[2:0]][2];
+   assign y[0] = x[ZKNHSelect[2:1]][0]; 
+   assign y[1] = x[ZKNHSelect[2:1]][1]; 
+   assign y[2] = x[ZKNHSelect[2:1]][2];
  
    // sha512 32-bit xor6
    assign result = y[0] ^ y[1] ^ y[2];

From fb8e97dd04eac8e981b0f48ec99f03ca520f99a5 Mon Sep 17 00:00:00 2001
From: Jordan Carlin <jordanmcarlin@gmail.com>
Date: Thu, 23 May 2024 13:17:24 -0700
Subject: [PATCH 14/30] Remove existing derived configs before creating new
 ones

---
 sim/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sim/Makefile b/sim/Makefile
index 09d417124..79a3042e4 100644
--- a/sim/Makefile
+++ b/sim/Makefile
@@ -62,6 +62,7 @@ coveragetests:
 	make -C ../tests/coverage/ --jobs
 
 deriv:
+	rm -rf ../config/deriv
 	derivgen.pl
 
 benchmarks:

From 6a2192db6e0201a521c33e91705814040a91ecee Mon Sep 17 00:00:00 2001
From: Jordan Carlin <jordanmcarlin@gmail.com>
Date: Thu, 23 May 2024 13:56:38 -0700
Subject: [PATCH 15/30] Revert "Remove existing derived configs before creating
 new ones"

---
 sim/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sim/Makefile b/sim/Makefile
index 79a3042e4..09d417124 100644
--- a/sim/Makefile
+++ b/sim/Makefile
@@ -62,7 +62,6 @@ coveragetests:
 	make -C ../tests/coverage/ --jobs
 
 deriv:
-	rm -rf ../config/deriv
 	derivgen.pl
 
 benchmarks:

From a1e22adc1ed3f66ce0bf7a288a678922de9dbb20 Mon Sep 17 00:00:00 2001
From: Jordan Carlin <jordanmcarlin@gmail.com>
Date: Thu, 23 May 2024 14:01:13 -0700
Subject: [PATCH 16/30] Delete deriv directory in derivgen.pl before remaking
 derived configs

---
 bin/derivgen.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/derivgen.pl b/bin/derivgen.pl
index 630962ca8..442455c53 100755
--- a/bin/derivgen.pl
+++ b/bin/derivgen.pl
@@ -70,10 +70,10 @@ foreach my $line (<$fh>) {
 }
 &terminateDeriv();
 close($fh);
+system("rm -rf $ENV{WALLY}/config/deriv");
 #foreach my $key (keys %derivs) {
 foreach my $key (@derivnames) {
     my $dir = "$ENV{WALLY}/config/deriv/$key";
-    system("rm -rf $dir");
     system("mkdir -p $dir");
     my $configunmod = "$dir/config_unmod.vh";
     my $config = "$dir/config.vh";

From b0d13441217c50ae744b03f9162ec5a46d599e92 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Thu, 23 May 2024 22:06:37 -0700
Subject: [PATCH 17/30] Commented sha instructions

---
 src/ieu/sha/sha256.sv    | 30 +++++++++++++++---------------
 src/ieu/sha/sha512_32.sv | 29 +++++++++++++++--------------
 src/ieu/sha/sha512_64.sv | 34 +++++++++++++++++-----------------
 3 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/src/ieu/sha/sha256.sv b/src/ieu/sha/sha256.sv
index 118f9e24b..12d08f2ce 100644
--- a/src/ieu/sha/sha256.sv
+++ b/src/ieu/sha/sha256.sv
@@ -37,29 +37,29 @@ module sha256 (
    // sha256{sig0/sig1/sum0/sum1} select shifted operands for 32-bit xor3 and then sign-extend
 
    // sha256sig0
-   assign x[0][0] = {A[6:0], A[31:7]};
-   assign x[0][1] = {A[17:0], A[31:18]};
-   assign x[0][2] = {3'b0, A[31:3]};
+   assign x[0][0] = {A[6:0], A[31:7]};    // ror 7
+   assign x[0][1] = {A[17:0], A[31:18]};  // ror 18
+   assign x[0][2] = {3'b0, A[31:3]};      // >> 3
 
    // sha256sig1
-   assign x[1][0] = {A[16:0], A[31:17]};
-   assign x[1][1] = {A[18:0], A[31:19]};
-   assign x[1][2] = {10'b0, A[31:10]};
+   assign x[1][0] = {A[16:0], A[31:17]};  // ror 17
+   assign x[1][1] = {A[18:0], A[31:19]};  // ror 19
+   assign x[1][2] = {10'b0, A[31:10]};    // >> 10
 
    // sha256sum0
-   assign x[2][0] = {A[1:0],  A[31:2]};
-   assign x[2][1] = {A[12:0], A[31:13]};
-   assign x[2][2] = {A[21:0], A[31:22]};
+   assign x[2][0] = {A[1:0],  A[31:2]};   // ror 2
+   assign x[2][1] = {A[12:0], A[31:13]};  // ror 13
+   assign x[2][2] = {A[21:0], A[31:22]};  // ror 22
 
    // sha256sum1
-   assign x[3][0] = {A[5:0], A[31:6]};
-   assign x[3][1] ={A[10:0], A[31:11]};
-   assign x[3][2] = {A[24:0], A[31:25]};
+   assign x[3][0] = {A[5:0], A[31:6]};    // ror 6
+   assign x[3][1] ={ A[10:0], A[31:11]};  // ror 11
+   assign x[3][2] = {A[24:0], A[31:25]};  // ror 25
 
    // 32-bit muxes to select inputs to xor3 for sha256 
-   assign y[0] = x[ZKNHSelect[1:0]][0]; 
-   assign y[1] = x[ZKNHSelect[1:0]][1]; 
-   assign y[2] = x[ZKNHSelect[1:0]][2]; 
+   assign y[0] = x[ZKNHSelect[1:0]][0];
+   assign y[1] = x[ZKNHSelect[1:0]][1];
+   assign y[2] = x[ZKNHSelect[1:0]][2];
 
    // sha256 32-bit xor3
    assign result = y[0] ^ y[1] ^ y[2];
diff --git a/src/ieu/sha/sha512_32.sv b/src/ieu/sha/sha512_32.sv
index 484ce3586..ce205172c 100644
--- a/src/ieu/sha/sha512_32.sv
+++ b/src/ieu/sha/sha512_32.sv
@@ -34,33 +34,34 @@ module sha512_32 (
    logic [31:0] x[4][3];
    logic [31:0] y[3];
 
+   // rotate/shift a 64-bit value contained in {B, A} and select 32 bits
    // sha512{sig0h/sig0l/sig1h/sig1l/sum0r/sum1r} select shifted operands for 32-bit xor
 
    // The l flavors differ from h by using low bits of B instead of zeros in x[0/1][2]
 
    // sha512sig0h/l
-   assign x[0][0] = {B[0], A[31:1]};
-   assign x[0][1] = {B[7:0], A[31:8]}; 
-   assign x[0][2] = {B[6:0] & {7{ZKNHSelect[0]}}, A[31:7]};
+   assign x[0][0] = {B[0], A[31:1]};                           // ror 1
+   assign x[0][1] = {B[7:0], A[31:8]};                         // ror 8
+   assign x[0][2] = {B[6:0] & {7{ZKNHSelect[0]}}, A[31:7]};    // ror/srl 7
 
    // sha512sig1h/l
-   assign x[1][0] = {A[28:0], B[31:29]};
-   assign x[1][1] = {B[18:0], A[31:19]};
-   assign x[1][2] = {B[5:0] & {6{ZKNHSelect[0]}}, A[31:6]};  
+   assign x[1][0] = {A[28:0], B[31:29]};                       // ror 61
+   assign x[1][1] = {B[18:0], A[31:19]};                       // ror 19
+   assign x[1][2] = {B[5:0] & {6{ZKNHSelect[0]}}, A[31:6]};    // ror/srl 6
 
    // sha512sum0r
-   assign x[2][0] = {A[6:0], B[31:7]};
-   assign x[2][1] = {A[1:0], B[31:2]};
-   assign x[2][2] = {B[27:0], A[31:28]};
+   assign x[2][0] = {A[6:0], B[31:7]};                         // ror 39
+   assign x[2][1] = {A[1:0], B[31:2]};                         // ror 34
+   assign x[2][2] = {B[27:0], A[31:28]};                       // ror 28
 
    // sha512sum1r
-   assign x[3][0] = {A[8:0], B[31:9]}; 
-   assign x[3][1] = {B[13:0], A[31:14]};
-   assign x[3][2] = {B[17:0], A[31:18]}; 
+   assign x[3][0] = {A[8:0], B[31:9]};                         // ror 41
+   assign x[3][1] = {B[13:0], A[31:14]};                       // ror 14
+   assign x[3][2] = {B[17:0], A[31:18]};                       // ror 18
 
    // 32-bit muxes to select inputs to xor6 for sha512
-   assign y[0] = x[ZKNHSelect[2:1]][0]; 
-   assign y[1] = x[ZKNHSelect[2:1]][1]; 
+   assign y[0] = x[ZKNHSelect[2:1]][0];
+   assign y[1] = x[ZKNHSelect[2:1]][1];
    assign y[2] = x[ZKNHSelect[2:1]][2];
  
    // sha512 32-bit xor6
diff --git a/src/ieu/sha/sha512_64.sv b/src/ieu/sha/sha512_64.sv
index 8707311e8..47fefce04 100644
--- a/src/ieu/sha/sha512_64.sv
+++ b/src/ieu/sha/sha512_64.sv
@@ -33,33 +33,33 @@ module sha512_64 (
 
    logic [63:0] x[4][3];
    logic [63:0] y[3];
-   
-   // sha512{sig0/sig1/sum0/sum1} select shifted operands for 64-bit xor3
+
+   // sha512{sig0/sig1/sum0/sum1} select rotated/shifted operands for 64-bit xor3
 
    // sha512sig0
-   assign x[0][0] = {A[0],   A[63:1]};
-   assign x[0][1] = {A[7:0], A[63:8]};
-   assign x[0][2] = A >> 7;
+   assign x[0][0] = {A[0],   A[63:1]};    // ror 1
+   assign x[0][1] = {A[7:0], A[63:8]};    // ror 8
+   assign x[0][2] = {7'b0,   A[63:7]};    // >> 7
 
    // sha512sig1
-   assign x[1][0] = {A[18:0], A[63:19]};
-   assign x[1][1] = {A[60:0], A[63:61]};
-   assign x[1][2] = A >> 6;
+   assign x[1][0] = {A[18:0], A[63:19]};  // ror 19
+   assign x[1][1] = {A[60:0], A[63:61]};  // ror 61
+   assign x[1][2] = {6'b0,    A[63:6]};   // >> 6
 
    // sha512sum0
-   assign x[2][0] = {A[27:0], A[63:28]};
-   assign x[2][1] = {A[33:0], A[63:34]};
-   assign x[2][2] = {A[38:0], A[63:39]};
+   assign x[2][0] = {A[27:0], A[63:28]};  // ror 28
+   assign x[2][1] = {A[33:0], A[63:34]};  // ror 34
+   assign x[2][2] = {A[38:0], A[63:39]};  // ror 39
 
    // sha512sum1
-   assign x[3][0] = {A[13:0], A[63:14]};
-   assign x[3][1] = {A[17:0], A[63:18]};
-   assign x[3][2] = {A[40:0], A[63:41]};
+   assign x[3][0] = {A[13:0], A[63:14]};  // ror 14
+   assign x[3][1] = {A[17:0], A[63:18]};  // ror 18
+   assign x[3][2] = {A[40:0], A[63:41]};  // ror 41
 
    // 64-bit muxes to select inputs to xor3 for sha512
-   assign y[0] = x[ZKNHSelect[1:0]][0]; 
-   assign y[1] = x[ZKNHSelect[1:0]][1]; 
-   assign y[2] = x[ZKNHSelect[1:0]][2]; 
+   assign y[0] = x[ZKNHSelect[1:0]][0];
+   assign y[1] = x[ZKNHSelect[1:0]][1];
+   assign y[2] = x[ZKNHSelect[1:0]][2];
 
    // sha512 64-bit xor3
    assign result = y[0] ^ y[1] ^ y[2];

From e626052ec93c5a402d7109ce01538556b304cf53 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Thu, 23 May 2024 22:30:25 -0700
Subject: [PATCH 18/30] simplified AES32de mixcolumns because input is only one
 byte

---
 src/ieu/aes/aes32d.sv            |  6 ++--
 src/ieu/aes/aes32e.sv            |  8 +++---
 src/ieu/aes/aes64d.sv            |  2 +-
 src/ieu/aes/aes64e.sv            |  2 +-
 src/ieu/aes/aesinvmixcolumns8.sv | 47 ++++++++++++++++++++++++++++++++
 src/ieu/aes/aesmixcolumns8.sv    | 39 ++++++++++++++++++++++++++
 6 files changed, 95 insertions(+), 9 deletions(-)
 create mode 100644 src/ieu/aes/aesinvmixcolumns8.sv
 create mode 100644 src/ieu/aes/aesmixcolumns8.sv

diff --git a/src/ieu/aes/aes32d.sv b/src/ieu/aes/aes32d.sv
index e3eb61cec..f761b5060 100644
--- a/src/ieu/aes/aes32d.sv
+++ b/src/ieu/aes/aes32d.sv
@@ -34,8 +34,8 @@ module aes32d(
    logic [7:0] 			  SboxOut;
    logic [31:0] 		     so, mixed;
    
-   aesinvsbox8 inv_sbox(SboxIn, SboxOut);         // Apply inverse sbox to si
-   assign so = {24'h0, SboxOut};                  // Pad output of inverse substitution box
-   aesinvmixcolumns32 mix(so, mixed);             // Run so through the mixword AES function
+   aesinvsbox8 inv_sbox(SboxIn, SboxOut);          // Apply inverse sbox to si
+   aesinvmixcolumns8 mix(SboxOut, mixed);          // Run so through the InvMixColumns AES function
+   assign so = {24'h0, SboxOut};                   // Pad output of inverse substitution box
    mux2 #(32) rmux(mixed, so, finalround, result); // on final round, skip mixcolumns
 endmodule
diff --git a/src/ieu/aes/aes32e.sv b/src/ieu/aes/aes32e.sv
index ca00afdd3..ab28db196 100644
--- a/src/ieu/aes/aes32e.sv
+++ b/src/ieu/aes/aes32e.sv
@@ -34,8 +34,8 @@ module aes32e(
    logic [7:0] 			  SboxOut;
    logic [31:0] 		     so, mixed;
    
-   aessbox8 sbox(SboxIn, SboxOut);                // Substitute
-   assign so = {24'h0, SboxOut};                  // Pad sbox output
-   aesmixcolumns32 mwd(so, mixed);                // Mix Word using aesmixword component
-   mux2 #(32) rmux(mixed, so, finalround, result); // on final round, skip mixcolumns
+   aessbox8 sbox(SboxIn, SboxOut);                 // Substitute
+   assign so = {24'h0, SboxOut};                   // Pad sbox output
+   aesmixcolumns32 mb(so, mixed);                  // Mix using MixColumns component
+   mux2 #(32) rmux(mixed, so, finalround, result); // on final round, skip MixColumns
 endmodule
diff --git a/src/ieu/aes/aes64d.sv b/src/ieu/aes/aes64d.sv
index 96355a566..a9e6feb75 100644
--- a/src/ieu/aes/aes64d.sv
+++ b/src/ieu/aes/aes64d.sv
@@ -42,7 +42,7 @@ module aes64d(
    
    mux2 #(64) mixcolmux(SboxOut, rs1, aes64im, MixcolIn);
    
-   // Apply inverse mixword to sbox outputs
+   // Apply inverse MixColumns to sbox outputs
    aesinvmixcolumns32 invmw0(MixcolIn[31:0], MixcolOut[31:0]);
    aesinvmixcolumns32 invmw1(MixcolIn[63:32], MixcolOut[63:32]);
    
diff --git a/src/ieu/aes/aes64e.sv b/src/ieu/aes/aes64e.sv
index b37d8787c..7435f4327 100644
--- a/src/ieu/aes/aes64e.sv
+++ b/src/ieu/aes/aes64e.sv
@@ -46,7 +46,7 @@ module aes64e(
 
     aessbox32 sbox1(ShiftRowOut[63:32], SboxOut[63:32]); // instantiate second sbox
 
-    // Apply mix columns operations
+    // Apply MixColumns operations
     aesmixcolumns32 mw0(SboxOut[31:0],  MixcolOut[31:0]);
     aesmixcolumns32 mw1(SboxOut[63:32], MixcolOut[63:32]);    
 
diff --git a/src/ieu/aes/aesinvmixcolumns8.sv b/src/ieu/aes/aesinvmixcolumns8.sv
new file mode 100644
index 000000000..134ceeb11
--- /dev/null
+++ b/src/ieu/aes/aesinvmixcolumns8.sv
@@ -0,0 +1,47 @@
+///////////////////////////////////////////
+// aesinvmixcolumns8.sv
+//
+// Written: kelvin.tran@okstate.edu, james.stine@okstate.edu
+// Created: 05 March 2024
+//
+// Purpose: AES Inverted Mix Column Function for use with AES
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-24 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module aesinvmixcolumns8(
+   input  logic [7:0] a, 
+   output logic [31:0] y
+);
+
+   logic [10:0] t, x0, x1, x2, x3;
+
+   // aes32d operates on shifted versions of the input
+   assign t  = {a, 3'b0} ^ {3'b0, a};
+   assign x0 = {a, 3'b0} ^ {1'b0, a, 2'b0} ^ {2'b0, a, 1'b0};
+   assign x1 = t;
+   assign x2 = t ^ {1'b0, a, 2'b0};
+   assign x3 = t ^ {2'b0, a, 1'b0};
+
+   galoismultinverse8 gm0 (x0, y[7:0]);
+   galoismultinverse8 gm1 (x1, y[15:8]);
+   galoismultinverse8 gm2 (x2, y[23:16]);
+   galoismultinverse8 gm3 (x3, y[31:24]);
+
+ endmodule 
diff --git a/src/ieu/aes/aesmixcolumns8.sv b/src/ieu/aes/aesmixcolumns8.sv
new file mode 100644
index 000000000..66ab6534f
--- /dev/null
+++ b/src/ieu/aes/aesmixcolumns8.sv
@@ -0,0 +1,39 @@
+///////////////////////////////////////////
+// aesmixcolumns8.sv
+//
+// Written: ryan.swann@okstate.edu, james.stine@okstate.edu, David_Harris@hmc.edu
+// Created: 20 February 2024
+//
+// Purpose: Galois field operation to byte in an individual 32-bit word
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-24 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+module aesmixcolumns8(
+   input  logic [7:0] a, 
+   output logic [31:0] y
+);
+
+   logic [7:0] xa, xapa;
+
+   galoismultforward8 gm(a, xa); // xa
+   assign xapa = a ^ xa;         // a ^ xa
+   assign y = {xapa, a, a, xa};
+endmodule

From ec5c67a5c12b52435686c2c7df03fe42d467ec3b Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Fri, 24 May 2024 13:48:53 -0700
Subject: [PATCH 19/30] AES cleanup

---
 src/ieu/aes/aes64e.sv         | 2 +-
 src/ieu/aes/aesmixcolumns8.sv | 2 +-
 src/ieu/bmu/bitmanipalu.sv    | 4 ++--
 src/ieu/kmu/zknde32.sv        | 7 ++++---
 src/ieu/kmu/zknde64.sv        | 7 ++++---
 5 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/ieu/aes/aes64e.sv b/src/ieu/aes/aes64e.sv
index 7435f4327..c1ca9013e 100644
--- a/src/ieu/aes/aes64e.sv
+++ b/src/ieu/aes/aes64e.sv
@@ -48,7 +48,7 @@ module aes64e(
 
     // Apply MixColumns operations
     aesmixcolumns32 mw0(SboxOut[31:0],  MixcolOut[31:0]);
-    aesmixcolumns32 mw1(SboxOut[63:32], MixcolOut[63:32]);    
+    aesmixcolumns32 mw1(SboxOut[63:32], MixcolOut[63:32]);
 
     // Skip mixcolumns on last round
     mux2 #(64) resultmux(MixcolOut, SboxOut, finalround, result);
diff --git a/src/ieu/aes/aesmixcolumns8.sv b/src/ieu/aes/aesmixcolumns8.sv
index 66ab6534f..256f728d0 100644
--- a/src/ieu/aes/aesmixcolumns8.sv
+++ b/src/ieu/aes/aesmixcolumns8.sv
@@ -27,7 +27,7 @@
 
 
 module aesmixcolumns8(
-   input  logic [7:0] a, 
+   input  logic [7:0]  a, 
    output logic [31:0] y
 );
 
diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv
index b0af3e347..36feff63e 100644
--- a/src/ieu/bmu/bitmanipalu.sv
+++ b/src/ieu/bmu/bitmanipalu.sv
@@ -113,8 +113,8 @@ module bitmanipalu import cvw::*; #(parameter cvw_t P) (
 
   // ZKND and ZKNE AES decryption and encryption
   if (P.ZKND_SUPPORTED | P.ZKNE_SUPPORTED) begin: zknde
-    if (P.XLEN == 32) zknde32 #(P) ZKN32(.A(ABMU), .B(BBMU), .Funct7, .round(Rs2E[3:0]), .ZKNSelect(ZBBSelect[3:0]), .ZKNDEResult); 
-    else              zknde64 #(P) ZKN64(.A(ABMU), .B(BBMU), .Funct7, .round(Rs2E[3:0]), .ZKNSelect(ZBBSelect[3:0]), .ZKNDEResult); 
+    if (P.XLEN == 32) zknde32 #(P) ZKN32(.A(ABMU), .B(BBMU), .bs(Funct7[6:5]), .round(Rs2E[3:0]), .ZKNSelect(ZBBSelect[3:0]), .ZKNDEResult); 
+    else              zknde64 #(P) ZKN64(.A(ABMU), .B(BBMU),                   .round(Rs2E[3:0]), .ZKNSelect(ZBBSelect[3:0]), .ZKNDEResult); 
   end else assign ZKNDEResult = '0;
  
   // ZKNH Unit
diff --git a/src/ieu/kmu/zknde32.sv b/src/ieu/kmu/zknde32.sv
index 4c845599c..7e482d757 100644
--- a/src/ieu/kmu/zknde32.sv
+++ b/src/ieu/kmu/zknde32.sv
@@ -28,7 +28,7 @@
 
 module zknde32 import cvw::*; #(parameter cvw_t P) (
    input  logic [31:0] A, B,
-   input  logic [6:0]  Funct7,
+   input  logic [1:0]  bs,
    input  logic [3:0]  round,
    input  logic [3:0]  ZKNSelect,
    output logic [31:0] ZKNDEResult
@@ -39,7 +39,7 @@ module zknde32 import cvw::*; #(parameter cvw_t P) (
     logic [31:0]    ZKNEResult, ZKNDResult, rotin, rotout;             
 
     // Initial shamt and Sbox input selection steps shared between encrypt and decrypt
-    assign shamt = {Funct7[6:5], 3'b0};          // shamt = bs * 8 (convert bytes to bits)
+    assign shamt = {bs, 3'b0};          // shamt = bs * 8 (convert bytes to bits)
     assign SboxIn = B[shamt +: 8];               // select byte bs of rs2
 
     // Handle logic specific to encrypt or decrypt
@@ -55,6 +55,7 @@ module zknde32 import cvw::*; #(parameter cvw_t P) (
         assign rotin = ZKNEResult;
 
     // final rotate and XOR steps shared between encrypt and decrypt
-    rotate #(32) mrot(rotin, shamt, rotout);       // Rotate the mixcolumns output left by shamt (bs * 8)
+    mux4 #(32) mrotmux(rotin, {rotin[23:0], rotin[31:24]}, 
+                       {rotin[15:0], rotin[31:16]}, {rotin[7:0], rotin[31:8]},  bs, rotout); // Rotate the mixcolumns output left by shamt (bs * 8)
     assign ZKNDEResult = A ^ rotout;               // xor with running value (A = rs1)
 endmodule
diff --git a/src/ieu/kmu/zknde64.sv b/src/ieu/kmu/zknde64.sv
index 2a2b6cc10..9c2566718 100644
--- a/src/ieu/kmu/zknde64.sv
+++ b/src/ieu/kmu/zknde64.sv
@@ -28,7 +28,6 @@
 
 module zknde64 import cvw::*; #(parameter cvw_t P) (
    input  logic [63:0] A, B,
-   input  logic [6:0]  Funct7,
    input  logic [3:0]  round,
    input  logic [3:0]  ZKNSelect,
    output logic [63:0] ZKNDEResult
@@ -39,11 +38,13 @@ module zknde64 import cvw::*; #(parameter cvw_t P) (
    
     if (P.ZKND_SUPPORTED) // ZKND supports aes64ds, aes64dsm, aes64im
         aes64d    aes64d(.rs1(A), .rs2(B), .finalround(ZKNSelect[2]), .aes64im(ZKNSelect[3]), .result(aes64dRes)); // decode AES
-    if (P.ZKNE_SUPPORTED) // ZKNE supports aes64es, aes64esm
+    if (P.ZKNE_SUPPORTED) begin // ZKNE supports aes64es, aes64esm
         aes64e    aes64e(.rs1(A), .rs2(B), .finalround(ZKNSelect[2]), .Sbox0Out, .SboxEIn, .result(aes64eRes));
+        mux2 #(32) sboxmux(SboxEIn, SboxKIn, ZKNSelect[1], Sbox0In);
+    end else    
+        assign Sbox0In = SboxKIn;
 
     // One S Box is always needed for aes64ks1i and is also needed for aes64e if that is supported.  Put it at the top level to allow sharing
-    mux2 #(32) sboxmux(SboxEIn, SboxKIn, ZKNSelect[1], Sbox0In);
     aessbox32 sbox(Sbox0In, Sbox0Out);                       // Substitute bytes of value obtained for tmp2 using Rijndael sbox
 
     // Both ZKND and ZKNE support aes64ks1i and aes64ks2 instructions

From b2689b4f01620e8f4dfb22a2e9ae085a5cbfec26 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Fri, 24 May 2024 14:13:57 -0700
Subject: [PATCH 20/30] AES cleanup

---
 src/ieu/aes/aes64d.sv                         |  6 ++--
 src/ieu/aes/aes64e.sv                         |  8 ++---
 ...sinvshiftrow64.sv => aesinvshiftrows64.sv} |  6 ++--
 .../{aesshiftrow64.sv => aesshiftrows64.sv}   |  4 +--
 src/ieu/aes/aesshiftrows64.xv                 | 35 +++++++++++++++++++
 5 files changed, 47 insertions(+), 12 deletions(-)
 rename src/ieu/aes/{aesinvshiftrow64.sv => aesinvshiftrows64.sv} (94%)
 rename src/ieu/aes/{aesshiftrow64.sv => aesshiftrows64.sv} (96%)
 create mode 100644 src/ieu/aes/aesshiftrows64.xv

diff --git a/src/ieu/aes/aes64d.sv b/src/ieu/aes/aes64d.sv
index a9e6feb75..8934f74c3 100644
--- a/src/ieu/aes/aes64d.sv
+++ b/src/ieu/aes/aes64d.sv
@@ -32,13 +32,13 @@ module aes64d(
    output logic [63:0] result
 );
    
-   logic [63:0] 		    ShiftRowOut, SboxOut, MixcolIn, MixcolOut;
+   logic [63:0] 		    ShiftRowsOut, SboxOut, MixcolIn, MixcolOut;
    
    // Apply inverse shiftrows to rs2 and rs1
-   aesinvshiftrow64 srow({rs2, rs1}, ShiftRowOut);
+   aesinvshiftrows64 srow({rs2, rs1}, ShiftRowsOut);
    
    // Apply full word inverse substitution to lower doubleord of shiftrow out
-   aesinvsbox64 invsbox(ShiftRowOut,  SboxOut);
+   aesinvsbox64 invsbox(ShiftRowsOut,  SboxOut);
    
    mux2 #(64) mixcolmux(SboxOut, rs1, aes64im, MixcolIn);
    
diff --git a/src/ieu/aes/aes64e.sv b/src/ieu/aes/aes64e.sv
index c1ca9013e..f40535d8d 100644
--- a/src/ieu/aes/aes64e.sv
+++ b/src/ieu/aes/aes64e.sv
@@ -34,17 +34,17 @@ module aes64e(
     output logic [63:0] result
 );
   
-    logic [63:0]  ShiftRowOut, SboxOut, MixcolOut;
+    logic [63:0]  ShiftRowsOut, SboxOut, MixcolOut;
                 
     // AES shiftrow unit
-    aesshiftrow64 srow({rs2,rs1}, ShiftRowOut);
+    aesshiftrows64 srow({rs2,rs1}, ShiftRowsOut);
    
     // Apply substitution box to 2 lower words
     // Use the shared sbox in zknde64.sv for the first sbox
-    assign SboxEIn = ShiftRowOut[31:0];
+    assign SboxEIn = ShiftRowsOut[31:0];
     assign SboxOut[31:0] = Sbox0Out;
 
-    aessbox32 sbox1(ShiftRowOut[63:32], SboxOut[63:32]); // instantiate second sbox
+    aessbox32 sbox1(ShiftRowsOut[63:32], SboxOut[63:32]); // instantiate second sbox
 
     // Apply MixColumns operations
     aesmixcolumns32 mw0(SboxOut[31:0],  MixcolOut[31:0]);
diff --git a/src/ieu/aes/aesinvshiftrow64.sv b/src/ieu/aes/aesinvshiftrows64.sv
similarity index 94%
rename from src/ieu/aes/aesinvshiftrow64.sv
rename to src/ieu/aes/aesinvshiftrows64.sv
index c6d355b63..c934116ac 100644
--- a/src/ieu/aes/aesinvshiftrow64.sv
+++ b/src/ieu/aes/aesinvshiftrows64.sv
@@ -1,5 +1,5 @@
 ///////////////////////////////////////////
-// aesinvshiftrow.sv
+// aesinvshiftrows64.sv
 //
 // Written: ryan.swann@okstate.edu, james.stine@okstate.edu
 // Created: 20 February 2024
@@ -25,9 +25,9 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
-module aesinvshiftrow64(
+module aesinvshiftrows64(
    input  logic [127:0] a, 
-   output logic [63:0] y
+   output logic [63:0]  y
 );
 
    assign y = {a[95:88],   a[119:112], a[15:8],    a[39:32],
diff --git a/src/ieu/aes/aesshiftrow64.sv b/src/ieu/aes/aesshiftrows64.sv
similarity index 96%
rename from src/ieu/aes/aesshiftrow64.sv
rename to src/ieu/aes/aesshiftrows64.sv
index 8691a9946..7c8a68120 100644
--- a/src/ieu/aes/aesshiftrow64.sv
+++ b/src/ieu/aes/aesshiftrows64.sv
@@ -1,5 +1,5 @@
 ///////////////////////////////////////////
-// aesshiftrow.sv
+// aesshiftrows64.sv
 //
 // Written: ryan.swann@okstate.edu, james.stine@okstate.edu
 // Created: 20 February 2024
@@ -25,7 +25,7 @@
 // and limitations under the License.
 ////////////////////////////////////////////////////////////////////////////////////////////////
 
-module aesshiftrow64(
+module aesshiftrows64(
    input  logic [127:0] a, 
    output logic [63:0] y
 );
diff --git a/src/ieu/aes/aesshiftrows64.xv b/src/ieu/aes/aesshiftrows64.xv
new file mode 100644
index 000000000..58638cea5
--- /dev/null
+++ b/src/ieu/aes/aesshiftrows64.xv
@@ -0,0 +1,35 @@
+///////////////////////////////////////////
+// aesshiftrows64.sv
+//
+// Written: ryan.swann@okstate.edu, james.stine@okstate.edu
+// Created: 20 February 2024
+//
+// Purpose: aesshiftrow for taking in first Data line
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// https://github.com/openhwgroup/cvw
+// 
+// Copyright (C) 2021-24 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+module aesshiftrows64(
+   input  logic [127:0] a, 
+   output logic [63:0]  y
+);
+		    
+   assign y = {a[31:24],   a[119:112], a[79:72],   a[39:32],
+               a[127:120], a[87:80],   a[47:40],   a[7:0]};   
+endmodule

From a95977590dceb76120dc2240b6810f421664f998 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Fri, 24 May 2024 14:28:30 -0700
Subject: [PATCH 21/30] AES cleanup

---
 src/ieu/aes/aes64d.sv | 10 +++++-----
 src/ieu/aes/aes64e.sv |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/ieu/aes/aes64d.sv b/src/ieu/aes/aes64d.sv
index 8934f74c3..517dd4bf0 100644
--- a/src/ieu/aes/aes64d.sv
+++ b/src/ieu/aes/aes64d.sv
@@ -32,7 +32,7 @@ module aes64d(
    output logic [63:0] result
 );
    
-   logic [63:0] 		    ShiftRowsOut, SboxOut, MixcolIn, MixcolOut;
+   logic [63:0] 		    ShiftRowsOut, SboxOut, MixcolsIn, MixcolsOut;
    
    // Apply inverse shiftrows to rs2 and rs1
    aesinvshiftrows64 srow({rs2, rs1}, ShiftRowsOut);
@@ -40,12 +40,12 @@ module aes64d(
    // Apply full word inverse substitution to lower doubleord of shiftrow out
    aesinvsbox64 invsbox(ShiftRowsOut,  SboxOut);
    
-   mux2 #(64) mixcolmux(SboxOut, rs1, aes64im, MixcolIn);
+   mux2 #(64) mixcolmux(SboxOut, rs1, aes64im, MixcolsIn);
    
    // Apply inverse MixColumns to sbox outputs
-   aesinvmixcolumns32 invmw0(MixcolIn[31:0], MixcolOut[31:0]);
-   aesinvmixcolumns32 invmw1(MixcolIn[63:32], MixcolOut[63:32]);
+   aesinvmixcolumns32 invmw0(MixcolsIn[31:0], MixcolsOut[31:0]);
+   aesinvmixcolumns32 invmw1(MixcolsIn[63:32], MixcolsOut[63:32]);
    
    // Final round skips mixcolumns.
-   mux2 #(64) resultmux(MixcolOut, SboxOut, finalround, result);
+   mux2 #(64) resultmux(MixcolsOut, SboxOut, finalround, result);
 endmodule
diff --git a/src/ieu/aes/aes64e.sv b/src/ieu/aes/aes64e.sv
index f40535d8d..f4b59178a 100644
--- a/src/ieu/aes/aes64e.sv
+++ b/src/ieu/aes/aes64e.sv
@@ -34,7 +34,7 @@ module aes64e(
     output logic [63:0] result
 );
   
-    logic [63:0]  ShiftRowsOut, SboxOut, MixcolOut;
+    logic [63:0]  ShiftRowsOut, SboxOut, MixcolsOut;
                 
     // AES shiftrow unit
     aesshiftrows64 srow({rs2,rs1}, ShiftRowsOut);
@@ -47,9 +47,9 @@ module aes64e(
     aessbox32 sbox1(ShiftRowsOut[63:32], SboxOut[63:32]); // instantiate second sbox
 
     // Apply MixColumns operations
-    aesmixcolumns32 mw0(SboxOut[31:0],  MixcolOut[31:0]);
-    aesmixcolumns32 mw1(SboxOut[63:32], MixcolOut[63:32]);
+    aesmixcolumns32 mw0(SboxOut[31:0],  MixcolsOut[31:0]);
+    aesmixcolumns32 mw1(SboxOut[63:32], MixcolsOut[63:32]);
 
     // Skip mixcolumns on last round
-    mux2 #(64) resultmux(MixcolOut, SboxOut, finalround, result);
+    mux2 #(64) resultmux(MixcolsOut, SboxOut, finalround, result);
 endmodule

From dcafe4793ed6ec8cdf674eb6de3124e38c207ebd Mon Sep 17 00:00:00 2001
From: Jordan Carlin <jordanmcarlin@gmail.com>
Date: Fri, 24 May 2024 15:16:35 -0700
Subject: [PATCH 22/30] Add froundnx and fround.d tests

---
 testbench/tests.vh                            |  10 +-
 .../rv32i_m/D_Zfa/src/froundnx.d_b1-01.S      | 353 ++++++++++++++++++
 .../rv32i_m/D_Zfa/src/froundnx_b1-01.S        | 353 ++++++++++++++++++
 .../rv32i_m/F_Zfa/src/froundnx_b1-01.S        | 353 ++++++++++++++++++
 .../rv64i_m/D_Zfa/src/froundnx.d_b1-01.S      | 353 ++++++++++++++++++
 .../rv64i_m/D_Zfa/src/froundnx_b1-01.S        | 353 ++++++++++++++++++
 .../rv64i_m/F_Zfa/src/froundnx_b1-01.S        | 353 ++++++++++++++++++
 7 files changed, 2127 insertions(+), 1 deletion(-)
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D_Zfa/src/froundnx.d_b1-01.S
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D_Zfa/src/froundnx_b1-01.S
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/F_Zfa/src/froundnx_b1-01.S
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/D_Zfa/src/froundnx.d_b1-01.S
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/D_Zfa/src/froundnx_b1-01.S
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/F_Zfa/src/froundnx_b1-01.S

diff --git a/testbench/tests.vh b/testbench/tests.vh
index 0386dba6e..44e72d53d 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -2279,6 +2279,7 @@ string arch64zknh[] = '{
     //`RISCVARCHTEST,
     `WALLYTEST,
     "rv32i_m/F_Zfa/src/fround_b1-01.S",
+    "rv32i_m/F_Zfa/src/froundnx_b1-01.S",
     "rv32i_m/F_Zfa/src/fleq_b1-01.S",
     "rv32i_m/F_Zfa/src/fleq_b19-01.S", 
     "rv32i_m/F_Zfa/src/fli.s-01.S",
@@ -2296,6 +2297,9 @@ string arch64zknh[] = '{
     //`RISCVARCHTEST,
     `WALLYTEST,
     "rv32i_m/D_Zfa/src/fround_b1-01.S",
+    "rv32i_m/D_Zfa/src/froundnx_b1-01.S",
+    "rv32i_m/D_Zfa/src/fround.d_b1-01.S",
+    "rv32i_m/D_Zfa/src/froundnx.d_b1-01.S",
     "rv32i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S",
     "rv32i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S",
     "rv32i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S",
@@ -2333,6 +2337,7 @@ string arch64zknh[] = '{
     //`RISCVARCHTEST,
     `WALLYTEST,
     "rv64i_m/F_Zfa/src/fround_b1-01.S",
+    "rv64i_m/F_Zfa/src/froundnx_b1-01.S",
     "rv64i_m/F_Zfa/src/fleq_b1-01.S",
     "rv64i_m/F_Zfa/src/fleq_b19-01.S", 
     "rv64i_m/F_Zfa/src/fli.s-01.S",
@@ -2347,7 +2352,10 @@ string arch64zknh[] = '{
   string arch64zfad[] = '{
     //`RISCVARCHTEST,
     `WALLYTEST,
-     "rv64i_m/D_Zfa/src/fround_b1-01.S",
+    "rv64i_m/D_Zfa/src/fround_b1-01.S",
+    "rv64i_m/D_Zfa/src/froundnx_b1-01.S",
+    "rv64i_m/D_Zfa/src/fround.d_b1-01.S",
+    "rv64i_m/D_Zfa/src/froundnx.d_b1-01.S",
     "rv64i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S",
     "rv64i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S", 
     "rv64i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S",
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D_Zfa/src/froundnx.d_b1-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D_Zfa/src/froundnx.d_b1-01.S
new file mode 100644
index 000000000..8e7afb48b
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D_Zfa/src/froundnx.d_b1-01.S
@@ -0,0 +1,353 @@
+
+// -----------
+// This file was generated by riscv_ctg (https://github.com/riscv-software-src/riscv-ctg)
+// version   : 0.12.1
+// timestamp : Mon Apr  1 19:41:20 2024 GMT
+// usage     : riscv_ctg \
+//                  -- cgf //                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/dataset.cgf \
+//                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/zfa/froundnx.d.cgf \
+ \
+//                  -- xlen 32  \
+// -----------
+//
+// -----------
+// Copyright (c) 2020. RISC-V International. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// -----------
+//
+// This assembly file tests the froundnx.d instruction of the RISC-V RV32FD_Zicsr_Zfa,RV64FD_Zicsr_Zfa extension for the froundnx.d_b1 covergroup.
+// 
+#include "model_test.h"
+#include "arch_test.h"
+RVTEST_ISA("RV32IFD_Zicsr_Zfa,RV64IFD_Zicsr_Zfa")
+
+.section .text.init
+.globl rvtest_entry_point
+rvtest_entry_point:
+RVMODEL_BOOT
+RVTEST_CODE_BEGIN
+
+#ifdef TEST_CASE_1
+
+RVTEST_CASE(0,"//check ISA:=regex(.*I.*D.*Zfa.*);def TEST_CASE_1=True;",froundnx.d_b1)
+
+RVTEST_FP_ENABLE()
+RVTEST_VALBASEUPD(x3,test_dataset_0)
+RVTEST_SIGBASE(x1,signature_x1_1)
+
+inst_0:
+// rs1 == rd, rs1==f31, rd==f31,fs1 == 0 and fe1 == 0x000 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f31; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:0*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f31, f31, dyn, 0, 0, x3, 0*FLEN/8, x4, x1, x2)
+
+inst_1:
+// rs1 != rd, rs1==f29, rd==f30,fs1 == 1 and fe1 == 0x000 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f29; dest:f30; op1val:0x8000000000000000; valaddr_reg:x3;
+val_offset:1*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f30, f29, dyn, 0, 0, x3, 1*FLEN/8, x4, x1, x2)
+
+inst_2:
+// rs1==f30, rd==f29,fs1 == 0 and fe1 == 0x000 and fm1 == 0x0000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f30; dest:f29; op1val:0x1; valaddr_reg:x3;
+val_offset:2*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f29, f30, dyn, 0, 0, x3, 2*FLEN/8, x4, x1, x2)
+
+inst_3:
+// rs1==f27, rd==f28,fs1 == 1 and fe1 == 0x000 and fm1 == 0x0000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f27; dest:f28; op1val:0x8000000000000001; valaddr_reg:x3;
+val_offset:3*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f28, f27, dyn, 0, 0, x3, 3*FLEN/8, x4, x1, x2)
+
+inst_4:
+// rs1==f28, rd==f27,fs1 == 0 and fe1 == 0x000 and fm1 == 0x0000000000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f28; dest:f27; op1val:0x2; valaddr_reg:x3;
+val_offset:4*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f27, f28, dyn, 0, 0, x3, 4*FLEN/8, x4, x1, x2)
+
+inst_5:
+// rs1==f25, rd==f26,fs1 == 1 and fe1 == 0x000 and fm1 == 0x0000000000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f25; dest:f26; op1val:0x8000000000000002; valaddr_reg:x3;
+val_offset:5*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f26, f25, dyn, 0, 0, x3, 5*FLEN/8, x4, x1, x2)
+
+inst_6:
+// rs1==f26, rd==f25,fs1 == 0 and fe1 == 0x000 and fm1 == 0xfffffffffffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f26; dest:f25; op1val:0xfffffffffffff; valaddr_reg:x3;
+val_offset:6*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f25, f26, dyn, 0, 0, x3, 6*FLEN/8, x4, x1, x2)
+
+inst_7:
+// rs1==f23, rd==f24,fs1 == 1 and fe1 == 0x000 and fm1 == 0xfffffffffffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f23; dest:f24; op1val:0x800fffffffffffff; valaddr_reg:x3;
+val_offset:7*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f24, f23, dyn, 0, 0, x3, 7*FLEN/8, x4, x1, x2)
+
+inst_8:
+// rs1==f24, rd==f23,fs1 == 0 and fe1 == 0x001 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f24; dest:f23; op1val:0x10000000000000; valaddr_reg:x3;
+val_offset:8*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f23, f24, dyn, 0, 0, x3, 8*FLEN/8, x4, x1, x2)
+
+inst_9:
+// rs1==f21, rd==f22,fs1 == 1 and fe1 == 0x001 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f21; dest:f22; op1val:0x8010000000000000; valaddr_reg:x3;
+val_offset:9*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f22, f21, dyn, 0, 0, x3, 9*FLEN/8, x4, x1, x2)
+
+inst_10:
+// rs1==f22, rd==f21,fs1 == 0 and fe1 == 0x001 and fm1 == 0x0000000000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f22; dest:f21; op1val:0x10000000000002; valaddr_reg:x3;
+val_offset:10*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f21, f22, dyn, 0, 0, x3, 10*FLEN/8, x4, x1, x2)
+
+inst_11:
+// rs1==f19, rd==f20,fs1 == 1 and fe1 == 0x001 and fm1 == 0x0000000000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f19; dest:f20; op1val:0x8010000000000002; valaddr_reg:x3;
+val_offset:11*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f20, f19, dyn, 0, 0, x3, 11*FLEN/8, x4, x1, x2)
+
+inst_12:
+// rs1==f20, rd==f19,fs1 == 0 and fe1 == 0x7fe and fm1 == 0xfffffffffffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f20; dest:f19; op1val:0x7fefffffffffffff; valaddr_reg:x3;
+val_offset:12*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f19, f20, dyn, 0, 0, x3, 12*FLEN/8, x4, x1, x2)
+
+inst_13:
+// rs1==f17, rd==f18,fs1 == 1 and fe1 == 0x7fe and fm1 == 0xfffffffffffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f17; dest:f18; op1val:0xffefffffffffffff; valaddr_reg:x3;
+val_offset:13*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f18, f17, dyn, 0, 0, x3, 13*FLEN/8, x4, x1, x2)
+
+inst_14:
+// rs1==f18, rd==f17,fs1 == 0 and fe1 == 0x7ff and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f18; dest:f17; op1val:0x7ff0000000000000; valaddr_reg:x3;
+val_offset:14*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f17, f18, dyn, 0, 0, x3, 14*FLEN/8, x4, x1, x2)
+
+inst_15:
+// rs1==f15, rd==f16,fs1 == 1 and fe1 == 0x7ff and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f15; dest:f16; op1val:0xfff0000000000000; valaddr_reg:x3;
+val_offset:15*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f16, f15, dyn, 0, 0, x3, 15*FLEN/8, x4, x1, x2)
+
+inst_16:
+// rs1==f16, rd==f15,fs1 == 0 and fe1 == 0x7ff and fm1 == 0x8000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f16; dest:f15; op1val:0x7ff8000000000000; valaddr_reg:x3;
+val_offset:16*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f15, f16, dyn, 0, 0, x3, 16*FLEN/8, x4, x1, x2)
+
+inst_17:
+// rs1==f13, rd==f14,fs1 == 1 and fe1 == 0x7ff and fm1 == 0x8000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f13; dest:f14; op1val:0xfff8000000000000; valaddr_reg:x3;
+val_offset:17*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f14, f13, dyn, 0, 0, x3, 17*FLEN/8, x4, x1, x2)
+
+inst_18:
+// rs1==f14, rd==f13,fs1 == 0 and fe1 == 0x7ff and fm1 == 0x8000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f14; dest:f13; op1val:0x7ff8000000000001; valaddr_reg:x3;
+val_offset:18*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f13, f14, dyn, 0, 0, x3, 18*FLEN/8, x4, x1, x2)
+
+inst_19:
+// rs1==f11, rd==f12,fs1 == 1 and fe1 == 0x7ff and fm1 == 0x8000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f11; dest:f12; op1val:0xfff8000000000001; valaddr_reg:x3;
+val_offset:19*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f12, f11, dyn, 0, 0, x3, 19*FLEN/8, x4, x1, x2)
+
+inst_20:
+// rs1==f12, rd==f11,fs1 == 0 and fe1 == 0x7ff and fm1 == 0x0000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f12; dest:f11; op1val:0x7ff0000000000001; valaddr_reg:x3;
+val_offset:20*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f11, f12, dyn, 0, 0, x3, 20*FLEN/8, x4, x1, x2)
+
+inst_21:
+// rs1==f9, rd==f10,fs1 == 1 and fe1 == 0x7ff and fm1 == 0x0000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f9; dest:f10; op1val:0xfff0000000000001; valaddr_reg:x3;
+val_offset:21*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f10, f9, dyn, 0, 0, x3, 21*FLEN/8, x4, x1, x2)
+
+inst_22:
+// rs1==f10, rd==f9,fs1 == 0 and fe1 == 0x3ff and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f10; dest:f9; op1val:0x3ff0000000000000; valaddr_reg:x3;
+val_offset:22*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f9, f10, dyn, 0, 0, x3, 22*FLEN/8, x4, x1, x2)
+
+inst_23:
+// rs1==f7, rd==f8,fs1 == 1 and fe1 == 0x3f8 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f7; dest:f8; op1val:0xbf80000000000000; valaddr_reg:x3;
+val_offset:23*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f8, f7, dyn, 0, 0, x3, 23*FLEN/8, x4, x1, x2)
+
+inst_24:
+// rs1==f8, rd==f7,
+/* opcode: froundnx.d ; op1:f8; dest:f7; op1val:0x0; valaddr_reg:x3;
+val_offset:24*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f7, f8, dyn, 0, 0, x3, 24*FLEN/8, x4, x1, x2)
+
+inst_25:
+// rs1==f5, rd==f6,
+/* opcode: froundnx.d ; op1:f5; dest:f6; op1val:0x0; valaddr_reg:x3;
+val_offset:25*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f6, f5, dyn, 0, 0, x3, 25*FLEN/8, x4, x1, x2)
+
+inst_26:
+// rs1==f6, rd==f5,
+/* opcode: froundnx.d ; op1:f6; dest:f5; op1val:0x0; valaddr_reg:x3;
+val_offset:26*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f5, f6, dyn, 0, 0, x3, 26*FLEN/8, x4, x1, x2)
+
+inst_27:
+// rs1==f3, rd==f4,
+/* opcode: froundnx.d ; op1:f3; dest:f4; op1val:0x0; valaddr_reg:x3;
+val_offset:27*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f4, f3, dyn, 0, 0, x3, 27*FLEN/8, x4, x1, x2)
+
+inst_28:
+// rs1==f4, rd==f3,
+/* opcode: froundnx.d ; op1:f4; dest:f3; op1val:0x0; valaddr_reg:x3;
+val_offset:28*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f3, f4, dyn, 0, 0, x3, 28*FLEN/8, x4, x1, x2)
+
+inst_29:
+// rs1==f1, rd==f2,
+/* opcode: froundnx.d ; op1:f1; dest:f2; op1val:0x0; valaddr_reg:x3;
+val_offset:29*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f2, f1, dyn, 0, 0, x3, 29*FLEN/8, x4, x1, x2)
+
+inst_30:
+// rs1==f2, rd==f1,
+/* opcode: froundnx.d ; op1:f2; dest:f1; op1val:0x0; valaddr_reg:x3;
+val_offset:30*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f1, f2, dyn, 0, 0, x3, 30*FLEN/8, x4, x1, x2)
+
+inst_31:
+// rs1==f0,
+/* opcode: froundnx.d ; op1:f0; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:31*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f31, f0, dyn, 0, 0, x3, 31*FLEN/8, x4, x1, x2)
+
+inst_32:
+// rd==f0,
+/* opcode: froundnx.d ; op1:f31; dest:f0; op1val:0x0; valaddr_reg:x3;
+val_offset:32*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f0, f31, dyn, 0, 0, x3, 32*FLEN/8, x4, x1, x2)
+#endif
+
+
+RVTEST_CODE_END
+RVMODEL_HALT
+
+RVTEST_DATA_BEGIN
+.align 4
+rvtest_data:
+.word 0xbabecafe
+.word 0xabecafeb
+.word 0xbecafeba
+.word 0xecafebab
+test_dataset_0:
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(9223372036854775808,64,FLEN)
+NAN_BOXED(1,64,FLEN)
+NAN_BOXED(9223372036854775809,64,FLEN)
+NAN_BOXED(2,64,FLEN)
+NAN_BOXED(9223372036854775810,64,FLEN)
+NAN_BOXED(4503599627370495,64,FLEN)
+NAN_BOXED(9227875636482146303,64,FLEN)
+NAN_BOXED(4503599627370496,64,FLEN)
+NAN_BOXED(9227875636482146304,64,FLEN)
+NAN_BOXED(4503599627370498,64,FLEN)
+NAN_BOXED(9227875636482146306,64,FLEN)
+NAN_BOXED(9218868437227405311,64,FLEN)
+NAN_BOXED(18442240474082181119,64,FLEN)
+NAN_BOXED(9218868437227405312,64,FLEN)
+NAN_BOXED(18442240474082181120,64,FLEN)
+NAN_BOXED(9221120237041090560,64,FLEN)
+NAN_BOXED(18444492273895866368,64,FLEN)
+NAN_BOXED(9221120237041090561,64,FLEN)
+NAN_BOXED(18444492273895866369,64,FLEN)
+NAN_BOXED(9218868437227405313,64,FLEN)
+NAN_BOXED(18442240474082181121,64,FLEN)
+NAN_BOXED(4607182418800017408,64,FLEN)
+NAN_BOXED(13799029258263199744,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+RVTEST_DATA_END
+
+RVMODEL_DATA_BEGIN
+rvtest_sig_begin:
+sig_begin_canary:
+CANARY;
+
+
+
+signature_x1_0:
+    .fill 0*((SIGALIGN)/4),4,0xdeadbeef
+
+
+signature_x1_1:
+    .fill 66*((SIGALIGN)/4),4,0xdeadbeef
+
+#ifdef rvtest_mtrap_routine
+tsig_begin_canary:
+CANARY;
+
+mtrap_sigptr:
+    .fill 64*XLEN/32,4,0xdeadbeef
+
+tsig_end_canary:
+CANARY;
+#endif
+
+#ifdef rvtest_gpr_save
+
+gpr_save:
+    .fill 32*XLEN/32,4,0xdeadbeef
+
+#endif
+
+
+sig_end_canary:
+CANARY;
+rvtest_sig_end:
+RVMODEL_DATA_END
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D_Zfa/src/froundnx_b1-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D_Zfa/src/froundnx_b1-01.S
new file mode 100644
index 000000000..6771814ea
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/D_Zfa/src/froundnx_b1-01.S
@@ -0,0 +1,353 @@
+
+// -----------
+// This file was generated by riscv_ctg (https://github.com/riscv-software-src/riscv-ctg)
+// version   : 0.12.1
+// timestamp : Wed Mar  6 21:52:24 2024 GMT
+// usage     : riscv_ctg \
+//                  -- cgf //                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/dataset.cgf \
+//                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/zfa/froundnx.s.cgf \
+ \
+//                  -- xlen 32  \
+// -----------
+//
+// -----------
+// Copyright (c) 2020. RISC-V International. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// -----------
+//
+// This assembly file tests the froundnx.s instruction of the RISC-V RV32F_Zicsr_Zfa,RV32FD_Zicsr_Zfa,RV64F_Zicsr_Zfa,RV64FD_Zicsr_Zfa extension for the froundnx_b1 covergroup.
+// 
+#include "model_test.h"
+#include "arch_test.h"
+RVTEST_ISA("RV32IF_Zicsr_Zfa,RV32IFD_Zicsr_Zfa,RV64IF_Zicsr_Zfa,RV64IFD_Zicsr_Zfa")
+
+.section .text.init
+.globl rvtest_entry_point
+rvtest_entry_point:
+RVMODEL_BOOT
+RVTEST_CODE_BEGIN
+
+#ifdef TEST_CASE_1
+
+RVTEST_CASE(0,"//check ISA:=regex(.*I.*F.*Zfa.*);def TEST_CASE_1=True;",froundnx_b1)
+
+RVTEST_FP_ENABLE()
+RVTEST_VALBASEUPD(x3,test_dataset_0)
+RVTEST_SIGBASE(x1,signature_x1_1)
+
+inst_0:
+// rs1 == rd, rs1==f31, rd==f31,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f31; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:0*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f31, f31, dyn, 0, 0, x3, 0*FLEN/8, x4, x1, x2)
+
+inst_1:
+// rs1 != rd, rs1==f29, rd==f30,fs1 == 1 and fe1 == 0x00 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f29; dest:f30; op1val:0x80000000; valaddr_reg:x3;
+val_offset:2*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f30, f29, dyn, 0, 0, x3, 2*FLEN/8, x4, x1, x2)
+
+inst_2:
+// rs1==f30, rd==f29,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f30; dest:f29; op1val:0x1; valaddr_reg:x3;
+val_offset:4*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f29, f30, dyn, 0, 0, x3, 4*FLEN/8, x4, x1, x2)
+
+inst_3:
+// rs1==f27, rd==f28,fs1 == 1 and fe1 == 0x00 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f27; dest:f28; op1val:0x80000001; valaddr_reg:x3;
+val_offset:6*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f28, f27, dyn, 0, 0, x3, 6*FLEN/8, x4, x1, x2)
+
+inst_4:
+// rs1==f28, rd==f27,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000002 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f28; dest:f27; op1val:0x2; valaddr_reg:x3;
+val_offset:8*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f27, f28, dyn, 0, 0, x3, 8*FLEN/8, x4, x1, x2)
+
+inst_5:
+// rs1==f25, rd==f26,fs1 == 1 and fe1 == 0x00 and fm1 == 0x7ffffe and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f25; dest:f26; op1val:0x807ffffe; valaddr_reg:x3;
+val_offset:10*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f26, f25, dyn, 0, 0, x3, 10*FLEN/8, x4, x1, x2)
+
+inst_6:
+// rs1==f26, rd==f25,fs1 == 0 and fe1 == 0x00 and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f26; dest:f25; op1val:0x7fffff; valaddr_reg:x3;
+val_offset:12*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f25, f26, dyn, 0, 0, x3, 12*FLEN/8, x4, x1, x2)
+
+inst_7:
+// rs1==f23, rd==f24,fs1 == 1 and fe1 == 0x00 and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f23; dest:f24; op1val:0x807fffff; valaddr_reg:x3;
+val_offset:14*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f24, f23, dyn, 0, 0, x3, 14*FLEN/8, x4, x1, x2)
+
+inst_8:
+// rs1==f24, rd==f23,fs1 == 0 and fe1 == 0x01 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f24; dest:f23; op1val:0x800000; valaddr_reg:x3;
+val_offset:16*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f23, f24, dyn, 0, 0, x3, 16*FLEN/8, x4, x1, x2)
+
+inst_9:
+// rs1==f21, rd==f22,fs1 == 1 and fe1 == 0x01 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f21; dest:f22; op1val:0x80800000; valaddr_reg:x3;
+val_offset:18*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f22, f21, dyn, 0, 0, x3, 18*FLEN/8, x4, x1, x2)
+
+inst_10:
+// rs1==f22, rd==f21,fs1 == 0 and fe1 == 0x01 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f22; dest:f21; op1val:0x800001; valaddr_reg:x3;
+val_offset:20*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f21, f22, dyn, 0, 0, x3, 20*FLEN/8, x4, x1, x2)
+
+inst_11:
+// rs1==f19, rd==f20,fs1 == 1 and fe1 == 0x01 and fm1 == 0x055555 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f19; dest:f20; op1val:0x80855555; valaddr_reg:x3;
+val_offset:22*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f20, f19, dyn, 0, 0, x3, 22*FLEN/8, x4, x1, x2)
+
+inst_12:
+// rs1==f20, rd==f19,fs1 == 0 and fe1 == 0xfe and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f20; dest:f19; op1val:0x7f7fffff; valaddr_reg:x3;
+val_offset:24*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f19, f20, dyn, 0, 0, x3, 24*FLEN/8, x4, x1, x2)
+
+inst_13:
+// rs1==f17, rd==f18,fs1 == 1 and fe1 == 0xfe and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f17; dest:f18; op1val:0xff7fffff; valaddr_reg:x3;
+val_offset:26*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f18, f17, dyn, 0, 0, x3, 26*FLEN/8, x4, x1, x2)
+
+inst_14:
+// rs1==f18, rd==f17,fs1 == 0 and fe1 == 0xff and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f18; dest:f17; op1val:0x7f800000; valaddr_reg:x3;
+val_offset:28*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f17, f18, dyn, 0, 0, x3, 28*FLEN/8, x4, x1, x2)
+
+inst_15:
+// rs1==f15, rd==f16,fs1 == 1 and fe1 == 0xff and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f15; dest:f16; op1val:0xff800000; valaddr_reg:x3;
+val_offset:30*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f16, f15, dyn, 0, 0, x3, 30*FLEN/8, x4, x1, x2)
+
+inst_16:
+// rs1==f16, rd==f15,fs1 == 0 and fe1 == 0xff and fm1 == 0x400000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f16; dest:f15; op1val:0x7fc00000; valaddr_reg:x3;
+val_offset:32*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f15, f16, dyn, 0, 0, x3, 32*FLEN/8, x4, x1, x2)
+
+inst_17:
+// rs1==f13, rd==f14,fs1 == 1 and fe1 == 0xff and fm1 == 0x400000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f13; dest:f14; op1val:0xffc00000; valaddr_reg:x3;
+val_offset:34*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f14, f13, dyn, 0, 0, x3, 34*FLEN/8, x4, x1, x2)
+
+inst_18:
+// rs1==f14, rd==f13,fs1 == 0 and fe1 == 0xff and fm1 == 0x400001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f14; dest:f13; op1val:0x7fc00001; valaddr_reg:x3;
+val_offset:36*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f13, f14, dyn, 0, 0, x3, 36*FLEN/8, x4, x1, x2)
+
+inst_19:
+// rs1==f11, rd==f12,fs1 == 1 and fe1 == 0xff and fm1 == 0x455555 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f11; dest:f12; op1val:0xffc55555; valaddr_reg:x3;
+val_offset:38*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f12, f11, dyn, 0, 0, x3, 38*FLEN/8, x4, x1, x2)
+
+inst_20:
+// rs1==f12, rd==f11,fs1 == 0 and fe1 == 0xff and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f12; dest:f11; op1val:0x7f800001; valaddr_reg:x3;
+val_offset:40*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f11, f12, dyn, 0, 0, x3, 40*FLEN/8, x4, x1, x2)
+
+inst_21:
+// rs1==f9, rd==f10,fs1 == 1 and fe1 == 0xff and fm1 == 0x2aaaaa and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f9; dest:f10; op1val:0xffaaaaaa; valaddr_reg:x3;
+val_offset:42*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f10, f9, dyn, 0, 0, x3, 42*FLEN/8, x4, x1, x2)
+
+inst_22:
+// rs1==f10, rd==f9,fs1 == 0 and fe1 == 0x7f and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f10; dest:f9; op1val:0x3f800000; valaddr_reg:x3;
+val_offset:44*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f9, f10, dyn, 0, 0, x3, 44*FLEN/8, x4, x1, x2)
+
+inst_23:
+// rs1==f7, rd==f8,fs1 == 1 and fe1 == 0x7f and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f7; dest:f8; op1val:0xbf800000; valaddr_reg:x3;
+val_offset:46*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f8, f7, dyn, 0, 0, x3, 46*FLEN/8, x4, x1, x2)
+
+inst_24:
+// rs1==f8, rd==f7,
+/* opcode: froundnx.s ; op1:f8; dest:f7; op1val:0x0; valaddr_reg:x3;
+val_offset:48*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f7, f8, dyn, 0, 0, x3, 48*FLEN/8, x4, x1, x2)
+
+inst_25:
+// rs1==f5, rd==f6,
+/* opcode: froundnx.s ; op1:f5; dest:f6; op1val:0x0; valaddr_reg:x3;
+val_offset:50*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f6, f5, dyn, 0, 0, x3, 50*FLEN/8, x4, x1, x2)
+
+inst_26:
+// rs1==f6, rd==f5,
+/* opcode: froundnx.s ; op1:f6; dest:f5; op1val:0x0; valaddr_reg:x3;
+val_offset:52*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f5, f6, dyn, 0, 0, x3, 52*FLEN/8, x4, x1, x2)
+
+inst_27:
+// rs1==f3, rd==f4,
+/* opcode: froundnx.s ; op1:f3; dest:f4; op1val:0x0; valaddr_reg:x3;
+val_offset:54*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f4, f3, dyn, 0, 0, x3, 54*FLEN/8, x4, x1, x2)
+
+inst_28:
+// rs1==f4, rd==f3,
+/* opcode: froundnx.s ; op1:f4; dest:f3; op1val:0x0; valaddr_reg:x3;
+val_offset:56*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f3, f4, dyn, 0, 0, x3, 56*FLEN/8, x4, x1, x2)
+
+inst_29:
+// rs1==f1, rd==f2,
+/* opcode: froundnx.s ; op1:f1; dest:f2; op1val:0x0; valaddr_reg:x3;
+val_offset:58*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f2, f1, dyn, 0, 0, x3, 58*FLEN/8, x4, x1, x2)
+
+inst_30:
+// rs1==f2, rd==f1,
+/* opcode: froundnx.s ; op1:f2; dest:f1; op1val:0x0; valaddr_reg:x3;
+val_offset:60*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f1, f2, dyn, 0, 0, x3, 60*FLEN/8, x4, x1, x2)
+
+inst_31:
+// rs1==f0,
+/* opcode: froundnx.s ; op1:f0; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:62*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f31, f0, dyn, 0, 0, x3, 62*FLEN/8, x4, x1, x2)
+
+inst_32:
+// rd==f0,
+/* opcode: froundnx.s ; op1:f31; dest:f0; op1val:0x0; valaddr_reg:x3;
+val_offset:64*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f0, f31, dyn, 0, 0, x3, 64*FLEN/8, x4, x1, x2)
+#endif
+
+
+RVTEST_CODE_END
+RVMODEL_HALT
+
+RVTEST_DATA_BEGIN
+.align 4
+rvtest_data:
+.word 0xbabecafe
+.word 0xabecafeb
+.word 0xbecafeba
+.word 0xecafebab
+test_dataset_0:
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(2147483648,32,FLEN)
+NAN_BOXED(1,32,FLEN)
+NAN_BOXED(2147483649,32,FLEN)
+NAN_BOXED(2,32,FLEN)
+NAN_BOXED(2155872254,32,FLEN)
+NAN_BOXED(8388607,32,FLEN)
+NAN_BOXED(2155872255,32,FLEN)
+NAN_BOXED(8388608,32,FLEN)
+NAN_BOXED(2155872256,32,FLEN)
+NAN_BOXED(8388609,32,FLEN)
+NAN_BOXED(2156221781,32,FLEN)
+NAN_BOXED(2139095039,32,FLEN)
+NAN_BOXED(4286578687,32,FLEN)
+NAN_BOXED(2139095040,32,FLEN)
+NAN_BOXED(4286578688,32,FLEN)
+NAN_BOXED(2143289344,32,FLEN)
+NAN_BOXED(4290772992,32,FLEN)
+NAN_BOXED(2143289345,32,FLEN)
+NAN_BOXED(4291122517,32,FLEN)
+NAN_BOXED(2139095041,32,FLEN)
+NAN_BOXED(4289374890,32,FLEN)
+NAN_BOXED(1065353216,32,FLEN)
+NAN_BOXED(3212836864,32,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+RVTEST_DATA_END
+
+RVMODEL_DATA_BEGIN
+rvtest_sig_begin:
+sig_begin_canary:
+CANARY;
+
+
+
+signature_x1_0:
+    .fill 0*((SIGALIGN)/4),4,0xdeadbeef
+
+
+signature_x1_1:
+    .fill 66*((SIGALIGN)/4),4,0xdeadbeef
+
+#ifdef rvtest_mtrap_routine
+tsig_begin_canary:
+CANARY;
+
+mtrap_sigptr:
+    .fill 64*XLEN/32,4,0xdeadbeef
+
+tsig_end_canary:
+CANARY;
+#endif
+
+#ifdef rvtest_gpr_save
+
+gpr_save:
+    .fill 32*XLEN/32,4,0xdeadbeef
+
+#endif
+
+
+sig_end_canary:
+CANARY;
+rvtest_sig_end:
+RVMODEL_DATA_END
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/F_Zfa/src/froundnx_b1-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/F_Zfa/src/froundnx_b1-01.S
new file mode 100644
index 000000000..bef26add0
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/F_Zfa/src/froundnx_b1-01.S
@@ -0,0 +1,353 @@
+
+// -----------
+// This file was generated by riscv_ctg (https://github.com/riscv-software-src/riscv-ctg)
+// version   : 0.12.1
+// timestamp : Mon Apr  1 19:36:23 2024 GMT
+// usage     : riscv_ctg \
+//                  -- cgf //                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/dataset.cgf \
+//                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/zfa/froundnx.s.cgf \
+ \
+//                  -- xlen 32  \
+// -----------
+//
+// -----------
+// Copyright (c) 2020. RISC-V International. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// -----------
+//
+// This assembly file tests the froundnx.s instruction of the RISC-V RV32F_Zicsr_Zfa,RV32FD_Zicsr_Zfa,RV64F_Zicsr_Zfa,RV64FD_Zicsr_Zfa extension for the froundnx_b1 covergroup.
+// 
+#include "model_test.h"
+#include "arch_test.h"
+RVTEST_ISA("RV32IF_Zicsr_Zfa,RV32IFD_Zicsr_Zfa,RV64IF_Zicsr_Zfa,RV64IFD_Zicsr_Zfa")
+
+.section .text.init
+.globl rvtest_entry_point
+rvtest_entry_point:
+RVMODEL_BOOT
+RVTEST_CODE_BEGIN
+
+#ifdef TEST_CASE_1
+
+RVTEST_CASE(0,"//check ISA:=regex(.*I.*F.*Zfa.*);def TEST_CASE_1=True;",froundnx_b1)
+
+RVTEST_FP_ENABLE()
+RVTEST_VALBASEUPD(x3,test_dataset_0)
+RVTEST_SIGBASE(x1,signature_x1_1)
+
+inst_0:
+// rs1 == rd, rs1==f31, rd==f31,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f31; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:0*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f31, f31, dyn, 0, 0, x3, 0*FLEN/8, x4, x1, x2)
+
+inst_1:
+// rs1 != rd, rs1==f29, rd==f30,fs1 == 1 and fe1 == 0x00 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f29; dest:f30; op1val:0x80000000; valaddr_reg:x3;
+val_offset:2*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f30, f29, dyn, 0, 0, x3, 2*FLEN/8, x4, x1, x2)
+
+inst_2:
+// rs1==f30, rd==f29,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f30; dest:f29; op1val:0x1; valaddr_reg:x3;
+val_offset:4*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f29, f30, dyn, 0, 0, x3, 4*FLEN/8, x4, x1, x2)
+
+inst_3:
+// rs1==f27, rd==f28,fs1 == 1 and fe1 == 0x00 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f27; dest:f28; op1val:0x80000001; valaddr_reg:x3;
+val_offset:6*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f28, f27, dyn, 0, 0, x3, 6*FLEN/8, x4, x1, x2)
+
+inst_4:
+// rs1==f28, rd==f27,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f28; dest:f27; op1val:0x2; valaddr_reg:x3;
+val_offset:8*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f27, f28, dyn, 0, 0, x3, 8*FLEN/8, x4, x1, x2)
+
+inst_5:
+// rs1==f25, rd==f26,fs1 == 1 and fe1 == 0x00 and fm1 == 0x7ffffe and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f25; dest:f26; op1val:0x807ffffe; valaddr_reg:x3;
+val_offset:10*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f26, f25, dyn, 0, 0, x3, 10*FLEN/8, x4, x1, x2)
+
+inst_6:
+// rs1==f26, rd==f25,fs1 == 0 and fe1 == 0x00 and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f26; dest:f25; op1val:0x7fffff; valaddr_reg:x3;
+val_offset:12*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f25, f26, dyn, 0, 0, x3, 12*FLEN/8, x4, x1, x2)
+
+inst_7:
+// rs1==f23, rd==f24,fs1 == 1 and fe1 == 0x00 and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f23; dest:f24; op1val:0x807fffff; valaddr_reg:x3;
+val_offset:14*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f24, f23, dyn, 0, 0, x3, 14*FLEN/8, x4, x1, x2)
+
+inst_8:
+// rs1==f24, rd==f23,fs1 == 0 and fe1 == 0x01 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f24; dest:f23; op1val:0x800000; valaddr_reg:x3;
+val_offset:16*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f23, f24, dyn, 0, 0, x3, 16*FLEN/8, x4, x1, x2)
+
+inst_9:
+// rs1==f21, rd==f22,fs1 == 1 and fe1 == 0x01 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f21; dest:f22; op1val:0x80800000; valaddr_reg:x3;
+val_offset:18*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f22, f21, dyn, 0, 0, x3, 18*FLEN/8, x4, x1, x2)
+
+inst_10:
+// rs1==f22, rd==f21,fs1 == 0 and fe1 == 0x01 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f22; dest:f21; op1val:0x800001; valaddr_reg:x3;
+val_offset:20*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f21, f22, dyn, 0, 0, x3, 20*FLEN/8, x4, x1, x2)
+
+inst_11:
+// rs1==f19, rd==f20,fs1 == 1 and fe1 == 0x01 and fm1 == 0x055555 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f19; dest:f20; op1val:0x80855555; valaddr_reg:x3;
+val_offset:22*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f20, f19, dyn, 0, 0, x3, 22*FLEN/8, x4, x1, x2)
+
+inst_12:
+// rs1==f20, rd==f19,fs1 == 0 and fe1 == 0xfe and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f20; dest:f19; op1val:0x7f7fffff; valaddr_reg:x3;
+val_offset:24*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f19, f20, dyn, 0, 0, x3, 24*FLEN/8, x4, x1, x2)
+
+inst_13:
+// rs1==f17, rd==f18,fs1 == 1 and fe1 == 0xfe and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f17; dest:f18; op1val:0xff7fffff; valaddr_reg:x3;
+val_offset:26*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f18, f17, dyn, 0, 0, x3, 26*FLEN/8, x4, x1, x2)
+
+inst_14:
+// rs1==f18, rd==f17,fs1 == 0 and fe1 == 0xff and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f18; dest:f17; op1val:0x7f800000; valaddr_reg:x3;
+val_offset:28*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f17, f18, dyn, 0, 0, x3, 28*FLEN/8, x4, x1, x2)
+
+inst_15:
+// rs1==f15, rd==f16,fs1 == 1 and fe1 == 0xff and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f15; dest:f16; op1val:0xff800000; valaddr_reg:x3;
+val_offset:30*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f16, f15, dyn, 0, 0, x3, 30*FLEN/8, x4, x1, x2)
+
+inst_16:
+// rs1==f16, rd==f15,fs1 == 0 and fe1 == 0xff and fm1 == 0x400000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f16; dest:f15; op1val:0x7fc00000; valaddr_reg:x3;
+val_offset:32*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f15, f16, dyn, 0, 0, x3, 32*FLEN/8, x4, x1, x2)
+
+inst_17:
+// rs1==f13, rd==f14,fs1 == 1 and fe1 == 0xff and fm1 == 0x400000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f13; dest:f14; op1val:0xffc00000; valaddr_reg:x3;
+val_offset:34*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f14, f13, dyn, 0, 0, x3, 34*FLEN/8, x4, x1, x2)
+
+inst_18:
+// rs1==f14, rd==f13,fs1 == 0 and fe1 == 0xff and fm1 == 0x400001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f14; dest:f13; op1val:0x7fc00001; valaddr_reg:x3;
+val_offset:36*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f13, f14, dyn, 0, 0, x3, 36*FLEN/8, x4, x1, x2)
+
+inst_19:
+// rs1==f11, rd==f12,fs1 == 1 and fe1 == 0xff and fm1 == 0x455555 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f11; dest:f12; op1val:0xffc55555; valaddr_reg:x3;
+val_offset:38*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f12, f11, dyn, 0, 0, x3, 38*FLEN/8, x4, x1, x2)
+
+inst_20:
+// rs1==f12, rd==f11,fs1 == 0 and fe1 == 0xff and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f12; dest:f11; op1val:0x7f800001; valaddr_reg:x3;
+val_offset:40*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f11, f12, dyn, 0, 0, x3, 40*FLEN/8, x4, x1, x2)
+
+inst_21:
+// rs1==f9, rd==f10,fs1 == 1 and fe1 == 0xff and fm1 == 0x2aaaaa and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f9; dest:f10; op1val:0xffaaaaaa; valaddr_reg:x3;
+val_offset:42*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f10, f9, dyn, 0, 0, x3, 42*FLEN/8, x4, x1, x2)
+
+inst_22:
+// rs1==f10, rd==f9,fs1 == 0 and fe1 == 0x7f and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f10; dest:f9; op1val:0x3f800000; valaddr_reg:x3;
+val_offset:44*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f9, f10, dyn, 0, 0, x3, 44*FLEN/8, x4, x1, x2)
+
+inst_23:
+// rs1==f7, rd==f8,fs1 == 1 and fe1 == 0x7f and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f7; dest:f8; op1val:0xbf800000; valaddr_reg:x3;
+val_offset:46*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f8, f7, dyn, 0, 0, x3, 46*FLEN/8, x4, x1, x2)
+
+inst_24:
+// rs1==f8, rd==f7,
+/* opcode: froundnx.s ; op1:f8; dest:f7; op1val:0x0; valaddr_reg:x3;
+val_offset:48*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f7, f8, dyn, 0, 0, x3, 48*FLEN/8, x4, x1, x2)
+
+inst_25:
+// rs1==f5, rd==f6,
+/* opcode: froundnx.s ; op1:f5; dest:f6; op1val:0x0; valaddr_reg:x3;
+val_offset:50*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f6, f5, dyn, 0, 0, x3, 50*FLEN/8, x4, x1, x2)
+
+inst_26:
+// rs1==f6, rd==f5,
+/* opcode: froundnx.s ; op1:f6; dest:f5; op1val:0x0; valaddr_reg:x3;
+val_offset:52*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f5, f6, dyn, 0, 0, x3, 52*FLEN/8, x4, x1, x2)
+
+inst_27:
+// rs1==f3, rd==f4,
+/* opcode: froundnx.s ; op1:f3; dest:f4; op1val:0x0; valaddr_reg:x3;
+val_offset:54*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f4, f3, dyn, 0, 0, x3, 54*FLEN/8, x4, x1, x2)
+
+inst_28:
+// rs1==f4, rd==f3,
+/* opcode: froundnx.s ; op1:f4; dest:f3; op1val:0x0; valaddr_reg:x3;
+val_offset:56*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f3, f4, dyn, 0, 0, x3, 56*FLEN/8, x4, x1, x2)
+
+inst_29:
+// rs1==f1, rd==f2,
+/* opcode: froundnx.s ; op1:f1; dest:f2; op1val:0x0; valaddr_reg:x3;
+val_offset:58*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f2, f1, dyn, 0, 0, x3, 58*FLEN/8, x4, x1, x2)
+
+inst_30:
+// rs1==f2, rd==f1,
+/* opcode: froundnx.s ; op1:f2; dest:f1; op1val:0x0; valaddr_reg:x3;
+val_offset:60*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f1, f2, dyn, 0, 0, x3, 60*FLEN/8, x4, x1, x2)
+
+inst_31:
+// rs1==f0,
+/* opcode: froundnx.s ; op1:f0; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:62*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f31, f0, dyn, 0, 0, x3, 62*FLEN/8, x4, x1, x2)
+
+inst_32:
+// rd==f0,
+/* opcode: froundnx.s ; op1:f31; dest:f0; op1val:0x0; valaddr_reg:x3;
+val_offset:64*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f0, f31, dyn, 0, 0, x3, 64*FLEN/8, x4, x1, x2)
+#endif
+
+
+RVTEST_CODE_END
+RVMODEL_HALT
+
+RVTEST_DATA_BEGIN
+.align 4
+rvtest_data:
+.word 0xbabecafe
+.word 0xabecafeb
+.word 0xbecafeba
+.word 0xecafebab
+test_dataset_0:
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(2147483648,32,FLEN)
+NAN_BOXED(1,32,FLEN)
+NAN_BOXED(2147483649,32,FLEN)
+NAN_BOXED(2,32,FLEN)
+NAN_BOXED(2155872254,32,FLEN)
+NAN_BOXED(8388607,32,FLEN)
+NAN_BOXED(2155872255,32,FLEN)
+NAN_BOXED(8388608,32,FLEN)
+NAN_BOXED(2155872256,32,FLEN)
+NAN_BOXED(8388609,32,FLEN)
+NAN_BOXED(2156221781,32,FLEN)
+NAN_BOXED(2139095039,32,FLEN)
+NAN_BOXED(4286578687,32,FLEN)
+NAN_BOXED(2139095040,32,FLEN)
+NAN_BOXED(4286578688,32,FLEN)
+NAN_BOXED(2143289344,32,FLEN)
+NAN_BOXED(4290772992,32,FLEN)
+NAN_BOXED(2143289345,32,FLEN)
+NAN_BOXED(4291122517,32,FLEN)
+NAN_BOXED(2139095041,32,FLEN)
+NAN_BOXED(4289374890,32,FLEN)
+NAN_BOXED(1065353216,32,FLEN)
+NAN_BOXED(3212836864,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+RVTEST_DATA_END
+
+RVMODEL_DATA_BEGIN
+rvtest_sig_begin:
+sig_begin_canary:
+CANARY;
+
+
+
+signature_x1_0:
+    .fill 0*((SIGALIGN)/4),4,0xdeadbeef
+
+
+signature_x1_1:
+    .fill 66*((SIGALIGN)/4),4,0xdeadbeef
+
+#ifdef rvtest_mtrap_routine
+tsig_begin_canary:
+CANARY;
+
+mtrap_sigptr:
+    .fill 64*XLEN/32,4,0xdeadbeef
+
+tsig_end_canary:
+CANARY;
+#endif
+
+#ifdef rvtest_gpr_save
+
+gpr_save:
+    .fill 32*XLEN/32,4,0xdeadbeef
+
+#endif
+
+
+sig_end_canary:
+CANARY;
+rvtest_sig_end:
+RVMODEL_DATA_END
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/D_Zfa/src/froundnx.d_b1-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/D_Zfa/src/froundnx.d_b1-01.S
new file mode 100644
index 000000000..16874bb6a
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/D_Zfa/src/froundnx.d_b1-01.S
@@ -0,0 +1,353 @@
+
+// -----------
+// This file was generated by riscv_ctg (https://github.com/riscv-software-src/riscv-ctg)
+// version   : 0.12.1
+// timestamp : Mon Apr  1 19:41:22 2024 GMT
+// usage     : riscv_ctg \
+//                  -- cgf //                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/dataset.cgf \
+//                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/zfa/froundnx.d.cgf \
+ \
+//                  -- xlen 64  \
+// -----------
+//
+// -----------
+// Copyright (c) 2020. RISC-V International. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// -----------
+//
+// This assembly file tests the froundnx.d instruction of the RISC-V RV64FD_Zicsr_Zfa extension for the froundnx.d_b1 covergroup.
+// 
+#include "model_test.h"
+#include "arch_test.h"
+RVTEST_ISA("RV64IFD_Zicsr_Zfa")
+
+.section .text.init
+.globl rvtest_entry_point
+rvtest_entry_point:
+RVMODEL_BOOT
+RVTEST_CODE_BEGIN
+
+#ifdef TEST_CASE_1
+
+RVTEST_CASE(0,"//check ISA:=regex(.*RV64.*I.*D.*Zfa.*);def TEST_CASE_1=True;",froundnx.d_b1)
+
+RVTEST_FP_ENABLE()
+RVTEST_VALBASEUPD(x3,test_dataset_0)
+RVTEST_SIGBASE(x1,signature_x1_1)
+
+inst_0:
+// rs1 == rd, rs1==f31, rd==f31,fs1 == 0 and fe1 == 0x000 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f31; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:0*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f31, f31, dyn, 0, 0, x3, 0*FLEN/8, x4, x1, x2)
+
+inst_1:
+// rs1 != rd, rs1==f29, rd==f30,fs1 == 1 and fe1 == 0x000 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f29; dest:f30; op1val:0x8000000000000000; valaddr_reg:x3;
+val_offset:1*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f30, f29, dyn, 0, 0, x3, 1*FLEN/8, x4, x1, x2)
+
+inst_2:
+// rs1==f30, rd==f29,fs1 == 0 and fe1 == 0x000 and fm1 == 0x0000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f30; dest:f29; op1val:0x1; valaddr_reg:x3;
+val_offset:2*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f29, f30, dyn, 0, 0, x3, 2*FLEN/8, x4, x1, x2)
+
+inst_3:
+// rs1==f27, rd==f28,fs1 == 1 and fe1 == 0x000 and fm1 == 0x0000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f27; dest:f28; op1val:0x8000000000000001; valaddr_reg:x3;
+val_offset:3*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f28, f27, dyn, 0, 0, x3, 3*FLEN/8, x4, x1, x2)
+
+inst_4:
+// rs1==f28, rd==f27,fs1 == 0 and fe1 == 0x000 and fm1 == 0x0000000000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f28; dest:f27; op1val:0x2; valaddr_reg:x3;
+val_offset:4*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f27, f28, dyn, 0, 0, x3, 4*FLEN/8, x4, x1, x2)
+
+inst_5:
+// rs1==f25, rd==f26,fs1 == 1 and fe1 == 0x000 and fm1 == 0x0000000000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f25; dest:f26; op1val:0x8000000000000002; valaddr_reg:x3;
+val_offset:5*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f26, f25, dyn, 0, 0, x3, 5*FLEN/8, x4, x1, x2)
+
+inst_6:
+// rs1==f26, rd==f25,fs1 == 0 and fe1 == 0x000 and fm1 == 0xfffffffffffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f26; dest:f25; op1val:0xfffffffffffff; valaddr_reg:x3;
+val_offset:6*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f25, f26, dyn, 0, 0, x3, 6*FLEN/8, x4, x1, x2)
+
+inst_7:
+// rs1==f23, rd==f24,fs1 == 1 and fe1 == 0x000 and fm1 == 0xfffffffffffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f23; dest:f24; op1val:0x800fffffffffffff; valaddr_reg:x3;
+val_offset:7*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f24, f23, dyn, 0, 0, x3, 7*FLEN/8, x4, x1, x2)
+
+inst_8:
+// rs1==f24, rd==f23,fs1 == 0 and fe1 == 0x001 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f24; dest:f23; op1val:0x10000000000000; valaddr_reg:x3;
+val_offset:8*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f23, f24, dyn, 0, 0, x3, 8*FLEN/8, x4, x1, x2)
+
+inst_9:
+// rs1==f21, rd==f22,fs1 == 1 and fe1 == 0x001 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f21; dest:f22; op1val:0x8010000000000000; valaddr_reg:x3;
+val_offset:9*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f22, f21, dyn, 0, 0, x3, 9*FLEN/8, x4, x1, x2)
+
+inst_10:
+// rs1==f22, rd==f21,fs1 == 0 and fe1 == 0x001 and fm1 == 0x0000000000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f22; dest:f21; op1val:0x10000000000002; valaddr_reg:x3;
+val_offset:10*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f21, f22, dyn, 0, 0, x3, 10*FLEN/8, x4, x1, x2)
+
+inst_11:
+// rs1==f19, rd==f20,fs1 == 1 and fe1 == 0x001 and fm1 == 0x0000000000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f19; dest:f20; op1val:0x8010000000000002; valaddr_reg:x3;
+val_offset:11*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f20, f19, dyn, 0, 0, x3, 11*FLEN/8, x4, x1, x2)
+
+inst_12:
+// rs1==f20, rd==f19,fs1 == 0 and fe1 == 0x7fe and fm1 == 0xfffffffffffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f20; dest:f19; op1val:0x7fefffffffffffff; valaddr_reg:x3;
+val_offset:12*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f19, f20, dyn, 0, 0, x3, 12*FLEN/8, x4, x1, x2)
+
+inst_13:
+// rs1==f17, rd==f18,fs1 == 1 and fe1 == 0x7fe and fm1 == 0xfffffffffffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f17; dest:f18; op1val:0xffefffffffffffff; valaddr_reg:x3;
+val_offset:13*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f18, f17, dyn, 0, 0, x3, 13*FLEN/8, x4, x1, x2)
+
+inst_14:
+// rs1==f18, rd==f17,fs1 == 0 and fe1 == 0x7ff and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f18; dest:f17; op1val:0x7ff0000000000000; valaddr_reg:x3;
+val_offset:14*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f17, f18, dyn, 0, 0, x3, 14*FLEN/8, x4, x1, x2)
+
+inst_15:
+// rs1==f15, rd==f16,fs1 == 1 and fe1 == 0x7ff and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f15; dest:f16; op1val:0xfff0000000000000; valaddr_reg:x3;
+val_offset:15*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f16, f15, dyn, 0, 0, x3, 15*FLEN/8, x4, x1, x2)
+
+inst_16:
+// rs1==f16, rd==f15,fs1 == 0 and fe1 == 0x7ff and fm1 == 0x8000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f16; dest:f15; op1val:0x7ff8000000000000; valaddr_reg:x3;
+val_offset:16*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f15, f16, dyn, 0, 0, x3, 16*FLEN/8, x4, x1, x2)
+
+inst_17:
+// rs1==f13, rd==f14,fs1 == 1 and fe1 == 0x7ff and fm1 == 0x8000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f13; dest:f14; op1val:0xfff8000000000000; valaddr_reg:x3;
+val_offset:17*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f14, f13, dyn, 0, 0, x3, 17*FLEN/8, x4, x1, x2)
+
+inst_18:
+// rs1==f14, rd==f13,fs1 == 0 and fe1 == 0x7ff and fm1 == 0x8000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f14; dest:f13; op1val:0x7ff8000000000001; valaddr_reg:x3;
+val_offset:18*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f13, f14, dyn, 0, 0, x3, 18*FLEN/8, x4, x1, x2)
+
+inst_19:
+// rs1==f11, rd==f12,fs1 == 1 and fe1 == 0x7ff and fm1 == 0x8000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f11; dest:f12; op1val:0xfff8000000000001; valaddr_reg:x3;
+val_offset:19*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f12, f11, dyn, 0, 0, x3, 19*FLEN/8, x4, x1, x2)
+
+inst_20:
+// rs1==f12, rd==f11,fs1 == 0 and fe1 == 0x7ff and fm1 == 0x0000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f12; dest:f11; op1val:0x7ff0000000000001; valaddr_reg:x3;
+val_offset:20*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f11, f12, dyn, 0, 0, x3, 20*FLEN/8, x4, x1, x2)
+
+inst_21:
+// rs1==f9, rd==f10,fs1 == 1 and fe1 == 0x7ff and fm1 == 0x0000000000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f9; dest:f10; op1val:0xfff0000000000001; valaddr_reg:x3;
+val_offset:21*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f10, f9, dyn, 0, 0, x3, 21*FLEN/8, x4, x1, x2)
+
+inst_22:
+// rs1==f10, rd==f9,fs1 == 0 and fe1 == 0x3ff and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f10; dest:f9; op1val:0x3ff0000000000000; valaddr_reg:x3;
+val_offset:22*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f9, f10, dyn, 0, 0, x3, 22*FLEN/8, x4, x1, x2)
+
+inst_23:
+// rs1==f7, rd==f8,fs1 == 1 and fe1 == 0x3f8 and fm1 == 0x0000000000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.d ; op1:f7; dest:f8; op1val:0xbf80000000000000; valaddr_reg:x3;
+val_offset:23*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f8, f7, dyn, 0, 0, x3, 23*FLEN/8, x4, x1, x2)
+
+inst_24:
+// rs1==f8, rd==f7,
+/* opcode: froundnx.d ; op1:f8; dest:f7; op1val:0x0; valaddr_reg:x3;
+val_offset:24*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f7, f8, dyn, 0, 0, x3, 24*FLEN/8, x4, x1, x2)
+
+inst_25:
+// rs1==f5, rd==f6,
+/* opcode: froundnx.d ; op1:f5; dest:f6; op1val:0x0; valaddr_reg:x3;
+val_offset:25*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f6, f5, dyn, 0, 0, x3, 25*FLEN/8, x4, x1, x2)
+
+inst_26:
+// rs1==f6, rd==f5,
+/* opcode: froundnx.d ; op1:f6; dest:f5; op1val:0x0; valaddr_reg:x3;
+val_offset:26*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f5, f6, dyn, 0, 0, x3, 26*FLEN/8, x4, x1, x2)
+
+inst_27:
+// rs1==f3, rd==f4,
+/* opcode: froundnx.d ; op1:f3; dest:f4; op1val:0x0; valaddr_reg:x3;
+val_offset:27*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f4, f3, dyn, 0, 0, x3, 27*FLEN/8, x4, x1, x2)
+
+inst_28:
+// rs1==f4, rd==f3,
+/* opcode: froundnx.d ; op1:f4; dest:f3; op1val:0x0; valaddr_reg:x3;
+val_offset:28*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f3, f4, dyn, 0, 0, x3, 28*FLEN/8, x4, x1, x2)
+
+inst_29:
+// rs1==f1, rd==f2,
+/* opcode: froundnx.d ; op1:f1; dest:f2; op1val:0x0; valaddr_reg:x3;
+val_offset:29*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f2, f1, dyn, 0, 0, x3, 29*FLEN/8, x4, x1, x2)
+
+inst_30:
+// rs1==f2, rd==f1,
+/* opcode: froundnx.d ; op1:f2; dest:f1; op1val:0x0; valaddr_reg:x3;
+val_offset:30*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f1, f2, dyn, 0, 0, x3, 30*FLEN/8, x4, x1, x2)
+
+inst_31:
+// rs1==f0,
+/* opcode: froundnx.d ; op1:f0; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:31*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f31, f0, dyn, 0, 0, x3, 31*FLEN/8, x4, x1, x2)
+
+inst_32:
+// rd==f0,
+/* opcode: froundnx.d ; op1:f31; dest:f0; op1val:0x0; valaddr_reg:x3;
+val_offset:32*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.d, f0, f31, dyn, 0, 0, x3, 32*FLEN/8, x4, x1, x2)
+#endif
+
+
+RVTEST_CODE_END
+RVMODEL_HALT
+
+RVTEST_DATA_BEGIN
+.align 4
+rvtest_data:
+.word 0xbabecafe
+.word 0xabecafeb
+.word 0xbecafeba
+.word 0xecafebab
+test_dataset_0:
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(9223372036854775808,64,FLEN)
+NAN_BOXED(1,64,FLEN)
+NAN_BOXED(9223372036854775809,64,FLEN)
+NAN_BOXED(2,64,FLEN)
+NAN_BOXED(9223372036854775810,64,FLEN)
+NAN_BOXED(4503599627370495,64,FLEN)
+NAN_BOXED(9227875636482146303,64,FLEN)
+NAN_BOXED(4503599627370496,64,FLEN)
+NAN_BOXED(9227875636482146304,64,FLEN)
+NAN_BOXED(4503599627370498,64,FLEN)
+NAN_BOXED(9227875636482146306,64,FLEN)
+NAN_BOXED(9218868437227405311,64,FLEN)
+NAN_BOXED(18442240474082181119,64,FLEN)
+NAN_BOXED(9218868437227405312,64,FLEN)
+NAN_BOXED(18442240474082181120,64,FLEN)
+NAN_BOXED(9221120237041090560,64,FLEN)
+NAN_BOXED(18444492273895866368,64,FLEN)
+NAN_BOXED(9221120237041090561,64,FLEN)
+NAN_BOXED(18444492273895866369,64,FLEN)
+NAN_BOXED(9218868437227405313,64,FLEN)
+NAN_BOXED(18442240474082181121,64,FLEN)
+NAN_BOXED(4607182418800017408,64,FLEN)
+NAN_BOXED(13799029258263199744,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+RVTEST_DATA_END
+
+RVMODEL_DATA_BEGIN
+rvtest_sig_begin:
+sig_begin_canary:
+CANARY;
+
+
+
+signature_x1_0:
+    .fill 0*((SIGALIGN)/4),4,0xdeadbeef
+
+
+signature_x1_1:
+    .fill 66*((SIGALIGN)/4),4,0xdeadbeef
+
+#ifdef rvtest_mtrap_routine
+tsig_begin_canary:
+CANARY;
+
+mtrap_sigptr:
+    .fill 64*XLEN/32,4,0xdeadbeef
+
+tsig_end_canary:
+CANARY;
+#endif
+
+#ifdef rvtest_gpr_save
+
+gpr_save:
+    .fill 32*XLEN/32,4,0xdeadbeef
+
+#endif
+
+
+sig_end_canary:
+CANARY;
+rvtest_sig_end:
+RVMODEL_DATA_END
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/D_Zfa/src/froundnx_b1-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/D_Zfa/src/froundnx_b1-01.S
new file mode 100644
index 000000000..a275776bd
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/D_Zfa/src/froundnx_b1-01.S
@@ -0,0 +1,353 @@
+
+// -----------
+// This file was generated by riscv_ctg (https://github.com/riscv-software-src/riscv-ctg)
+// version   : 0.12.1
+// timestamp : Wed Mar  6 21:52:28 2024 GMT
+// usage     : riscv_ctg \
+//                  -- cgf //                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/dataset.cgf \
+//                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/zfa/froundnx.s.cgf \
+ \
+//                  -- xlen 64  \
+// -----------
+//
+// -----------
+// Copyright (c) 2020. RISC-V International. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// -----------
+//
+// This assembly file tests the froundnx.s instruction of the RISC-V RV64F_Zicsr_Zfa,RV64FD_Zicsr_Zfa extension for the froundnx_b1 covergroup.
+// 
+#include "model_test.h"
+#include "arch_test.h"
+RVTEST_ISA("RV64IF_Zicsr_Zfa,RV64IFD_Zicsr_Zfa")
+
+.section .text.init
+.globl rvtest_entry_point
+rvtest_entry_point:
+RVMODEL_BOOT
+RVTEST_CODE_BEGIN
+
+#ifdef TEST_CASE_1
+
+RVTEST_CASE(0,"//check ISA:=regex(.*RV64.*I.*F.*Zfa.*);def TEST_CASE_1=True;",froundnx_b1)
+
+RVTEST_FP_ENABLE()
+RVTEST_VALBASEUPD(x3,test_dataset_0)
+RVTEST_SIGBASE(x1,signature_x1_1)
+
+inst_0:
+// rs1 == rd, rs1==f31, rd==f31,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f31; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:0*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f31, f31, dyn, 0, 0, x3, 0*FLEN/8, x4, x1, x2)
+
+inst_1:
+// rs1 != rd, rs1==f29, rd==f30,fs1 == 1 and fe1 == 0x00 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f29; dest:f30; op1val:0x80000000; valaddr_reg:x3;
+val_offset:2*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f30, f29, dyn, 0, 0, x3, 2*FLEN/8, x4, x1, x2)
+
+inst_2:
+// rs1==f30, rd==f29,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f30; dest:f29; op1val:0x1; valaddr_reg:x3;
+val_offset:4*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f29, f30, dyn, 0, 0, x3, 4*FLEN/8, x4, x1, x2)
+
+inst_3:
+// rs1==f27, rd==f28,fs1 == 1 and fe1 == 0x00 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f27; dest:f28; op1val:0x80000001; valaddr_reg:x3;
+val_offset:6*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f28, f27, dyn, 0, 0, x3, 6*FLEN/8, x4, x1, x2)
+
+inst_4:
+// rs1==f28, rd==f27,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000002 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f28; dest:f27; op1val:0x2; valaddr_reg:x3;
+val_offset:8*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f27, f28, dyn, 0, 0, x3, 8*FLEN/8, x4, x1, x2)
+
+inst_5:
+// rs1==f25, rd==f26,fs1 == 1 and fe1 == 0x00 and fm1 == 0x7ffffe and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f25; dest:f26; op1val:0x807ffffe; valaddr_reg:x3;
+val_offset:10*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f26, f25, dyn, 0, 0, x3, 10*FLEN/8, x4, x1, x2)
+
+inst_6:
+// rs1==f26, rd==f25,fs1 == 0 and fe1 == 0x00 and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f26; dest:f25; op1val:0x7fffff; valaddr_reg:x3;
+val_offset:12*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f25, f26, dyn, 0, 0, x3, 12*FLEN/8, x4, x1, x2)
+
+inst_7:
+// rs1==f23, rd==f24,fs1 == 1 and fe1 == 0x00 and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f23; dest:f24; op1val:0x807fffff; valaddr_reg:x3;
+val_offset:14*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f24, f23, dyn, 0, 0, x3, 14*FLEN/8, x4, x1, x2)
+
+inst_8:
+// rs1==f24, rd==f23,fs1 == 0 and fe1 == 0x01 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f24; dest:f23; op1val:0x800000; valaddr_reg:x3;
+val_offset:16*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f23, f24, dyn, 0, 0, x3, 16*FLEN/8, x4, x1, x2)
+
+inst_9:
+// rs1==f21, rd==f22,fs1 == 1 and fe1 == 0x01 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f21; dest:f22; op1val:0x80800000; valaddr_reg:x3;
+val_offset:18*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f22, f21, dyn, 0, 0, x3, 18*FLEN/8, x4, x1, x2)
+
+inst_10:
+// rs1==f22, rd==f21,fs1 == 0 and fe1 == 0x01 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f22; dest:f21; op1val:0x800001; valaddr_reg:x3;
+val_offset:20*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f21, f22, dyn, 0, 0, x3, 20*FLEN/8, x4, x1, x2)
+
+inst_11:
+// rs1==f19, rd==f20,fs1 == 1 and fe1 == 0x01 and fm1 == 0x055555 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f19; dest:f20; op1val:0x80855555; valaddr_reg:x3;
+val_offset:22*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f20, f19, dyn, 0, 0, x3, 22*FLEN/8, x4, x1, x2)
+
+inst_12:
+// rs1==f20, rd==f19,fs1 == 0 and fe1 == 0xfe and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f20; dest:f19; op1val:0x7f7fffff; valaddr_reg:x3;
+val_offset:24*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f19, f20, dyn, 0, 0, x3, 24*FLEN/8, x4, x1, x2)
+
+inst_13:
+// rs1==f17, rd==f18,fs1 == 1 and fe1 == 0xfe and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f17; dest:f18; op1val:0xff7fffff; valaddr_reg:x3;
+val_offset:26*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f18, f17, dyn, 0, 0, x3, 26*FLEN/8, x4, x1, x2)
+
+inst_14:
+// rs1==f18, rd==f17,fs1 == 0 and fe1 == 0xff and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f18; dest:f17; op1val:0x7f800000; valaddr_reg:x3;
+val_offset:28*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f17, f18, dyn, 0, 0, x3, 28*FLEN/8, x4, x1, x2)
+
+inst_15:
+// rs1==f15, rd==f16,fs1 == 1 and fe1 == 0xff and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f15; dest:f16; op1val:0xff800000; valaddr_reg:x3;
+val_offset:30*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f16, f15, dyn, 0, 0, x3, 30*FLEN/8, x4, x1, x2)
+
+inst_16:
+// rs1==f16, rd==f15,fs1 == 0 and fe1 == 0xff and fm1 == 0x400000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f16; dest:f15; op1val:0x7fc00000; valaddr_reg:x3;
+val_offset:32*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f15, f16, dyn, 0, 0, x3, 32*FLEN/8, x4, x1, x2)
+
+inst_17:
+// rs1==f13, rd==f14,fs1 == 1 and fe1 == 0xff and fm1 == 0x400000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f13; dest:f14; op1val:0xffc00000; valaddr_reg:x3;
+val_offset:34*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f14, f13, dyn, 0, 0, x3, 34*FLEN/8, x4, x1, x2)
+
+inst_18:
+// rs1==f14, rd==f13,fs1 == 0 and fe1 == 0xff and fm1 == 0x400001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f14; dest:f13; op1val:0x7fc00001; valaddr_reg:x3;
+val_offset:36*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f13, f14, dyn, 0, 0, x3, 36*FLEN/8, x4, x1, x2)
+
+inst_19:
+// rs1==f11, rd==f12,fs1 == 1 and fe1 == 0xff and fm1 == 0x455555 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f11; dest:f12; op1val:0xffc55555; valaddr_reg:x3;
+val_offset:38*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f12, f11, dyn, 0, 0, x3, 38*FLEN/8, x4, x1, x2)
+
+inst_20:
+// rs1==f12, rd==f11,fs1 == 0 and fe1 == 0xff and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f12; dest:f11; op1val:0x7f800001; valaddr_reg:x3;
+val_offset:40*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f11, f12, dyn, 0, 0, x3, 40*FLEN/8, x4, x1, x2)
+
+inst_21:
+// rs1==f9, rd==f10,fs1 == 1 and fe1 == 0xff and fm1 == 0x2aaaaa and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f9; dest:f10; op1val:0xffaaaaaa; valaddr_reg:x3;
+val_offset:42*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f10, f9, dyn, 0, 0, x3, 42*FLEN/8, x4, x1, x2)
+
+inst_22:
+// rs1==f10, rd==f9,fs1 == 0 and fe1 == 0x7f and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f10; dest:f9; op1val:0x3f800000; valaddr_reg:x3;
+val_offset:44*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f9, f10, dyn, 0, 0, x3, 44*FLEN/8, x4, x1, x2)
+
+inst_23:
+// rs1==f7, rd==f8,fs1 == 1 and fe1 == 0x7f and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7  and rs1_nan_prefix == 0xffffffff  
+/* opcode: froundnx.s ; op1:f7; dest:f8; op1val:0xbf800000; valaddr_reg:x3;
+val_offset:46*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f8, f7, dyn, 0, 0, x3, 46*FLEN/8, x4, x1, x2)
+
+inst_24:
+// rs1==f8, rd==f7,
+/* opcode: froundnx.s ; op1:f8; dest:f7; op1val:0x0; valaddr_reg:x3;
+val_offset:48*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f7, f8, dyn, 0, 0, x3, 48*FLEN/8, x4, x1, x2)
+
+inst_25:
+// rs1==f5, rd==f6,
+/* opcode: froundnx.s ; op1:f5; dest:f6; op1val:0x0; valaddr_reg:x3;
+val_offset:50*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f6, f5, dyn, 0, 0, x3, 50*FLEN/8, x4, x1, x2)
+
+inst_26:
+// rs1==f6, rd==f5,
+/* opcode: froundnx.s ; op1:f6; dest:f5; op1val:0x0; valaddr_reg:x3;
+val_offset:52*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f5, f6, dyn, 0, 0, x3, 52*FLEN/8, x4, x1, x2)
+
+inst_27:
+// rs1==f3, rd==f4,
+/* opcode: froundnx.s ; op1:f3; dest:f4; op1val:0x0; valaddr_reg:x3;
+val_offset:54*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f4, f3, dyn, 0, 0, x3, 54*FLEN/8, x4, x1, x2)
+
+inst_28:
+// rs1==f4, rd==f3,
+/* opcode: froundnx.s ; op1:f4; dest:f3; op1val:0x0; valaddr_reg:x3;
+val_offset:56*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f3, f4, dyn, 0, 0, x3, 56*FLEN/8, x4, x1, x2)
+
+inst_29:
+// rs1==f1, rd==f2,
+/* opcode: froundnx.s ; op1:f1; dest:f2; op1val:0x0; valaddr_reg:x3;
+val_offset:58*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f2, f1, dyn, 0, 0, x3, 58*FLEN/8, x4, x1, x2)
+
+inst_30:
+// rs1==f2, rd==f1,
+/* opcode: froundnx.s ; op1:f2; dest:f1; op1val:0x0; valaddr_reg:x3;
+val_offset:60*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f1, f2, dyn, 0, 0, x3, 60*FLEN/8, x4, x1, x2)
+
+inst_31:
+// rs1==f0,
+/* opcode: froundnx.s ; op1:f0; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:62*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f31, f0, dyn, 0, 0, x3, 62*FLEN/8, x4, x1, x2)
+
+inst_32:
+// rd==f0,
+/* opcode: froundnx.s ; op1:f31; dest:f0; op1val:0x0; valaddr_reg:x3;
+val_offset:64*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f0, f31, dyn, 0, 0, x3, 64*FLEN/8, x4, x1, x2)
+#endif
+
+
+RVTEST_CODE_END
+RVMODEL_HALT
+
+RVTEST_DATA_BEGIN
+.align 4
+rvtest_data:
+.word 0xbabecafe
+.word 0xabecafeb
+.word 0xbecafeba
+.word 0xecafebab
+test_dataset_0:
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(2147483648,32,FLEN)
+NAN_BOXED(1,32,FLEN)
+NAN_BOXED(2147483649,32,FLEN)
+NAN_BOXED(2,32,FLEN)
+NAN_BOXED(2155872254,32,FLEN)
+NAN_BOXED(8388607,32,FLEN)
+NAN_BOXED(2155872255,32,FLEN)
+NAN_BOXED(8388608,32,FLEN)
+NAN_BOXED(2155872256,32,FLEN)
+NAN_BOXED(8388609,32,FLEN)
+NAN_BOXED(2156221781,32,FLEN)
+NAN_BOXED(2139095039,32,FLEN)
+NAN_BOXED(4286578687,32,FLEN)
+NAN_BOXED(2139095040,32,FLEN)
+NAN_BOXED(4286578688,32,FLEN)
+NAN_BOXED(2143289344,32,FLEN)
+NAN_BOXED(4290772992,32,FLEN)
+NAN_BOXED(2143289345,32,FLEN)
+NAN_BOXED(4291122517,32,FLEN)
+NAN_BOXED(2139095041,32,FLEN)
+NAN_BOXED(4289374890,32,FLEN)
+NAN_BOXED(1065353216,32,FLEN)
+NAN_BOXED(3212836864,32,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+NAN_BOXED(0,64,FLEN)
+RVTEST_DATA_END
+
+RVMODEL_DATA_BEGIN
+rvtest_sig_begin:
+sig_begin_canary:
+CANARY;
+
+
+
+signature_x1_0:
+    .fill 0*((SIGALIGN)/4),4,0xdeadbeef
+
+
+signature_x1_1:
+    .fill 66*((SIGALIGN)/4),4,0xdeadbeef
+
+#ifdef rvtest_mtrap_routine
+tsig_begin_canary:
+CANARY;
+
+mtrap_sigptr:
+    .fill 64*XLEN/32,4,0xdeadbeef
+
+tsig_end_canary:
+CANARY;
+#endif
+
+#ifdef rvtest_gpr_save
+
+gpr_save:
+    .fill 32*XLEN/32,4,0xdeadbeef
+
+#endif
+
+
+sig_end_canary:
+CANARY;
+rvtest_sig_end:
+RVMODEL_DATA_END
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/F_Zfa/src/froundnx_b1-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/F_Zfa/src/froundnx_b1-01.S
new file mode 100644
index 000000000..046749f85
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/F_Zfa/src/froundnx_b1-01.S
@@ -0,0 +1,353 @@
+
+// -----------
+// This file was generated by riscv_ctg (https://github.com/riscv-software-src/riscv-ctg)
+// version   : 0.12.1
+// timestamp : Mon Apr  1 19:36:25 2024 GMT
+// usage     : riscv_ctg \
+//                  -- cgf //                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/dataset.cgf \
+//                  --cgf /home/cm/src/riscv-ctg/zfa/sample_cgfs/zfa/froundnx.s.cgf \
+ \
+//                  -- xlen 64  \
+// -----------
+//
+// -----------
+// Copyright (c) 2020. RISC-V International. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+// -----------
+//
+// This assembly file tests the froundnx.s instruction of the RISC-V RV64F_Zicsr_Zfa,RV64FD_Zicsr_Zfa extension for the froundnx_b1 covergroup.
+// 
+#include "model_test.h"
+#include "arch_test.h"
+RVTEST_ISA("RV64IF_Zicsr_Zfa,RV64IFD_Zicsr_Zfa")
+
+.section .text.init
+.globl rvtest_entry_point
+rvtest_entry_point:
+RVMODEL_BOOT
+RVTEST_CODE_BEGIN
+
+#ifdef TEST_CASE_1
+
+RVTEST_CASE(0,"//check ISA:=regex(.*RV64.*I.*F.*Zfa.*);def TEST_CASE_1=True;",froundnx_b1)
+
+RVTEST_FP_ENABLE()
+RVTEST_VALBASEUPD(x3,test_dataset_0)
+RVTEST_SIGBASE(x1,signature_x1_1)
+
+inst_0:
+// rs1 != rd, rs1==f30, rd==f31,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f30; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:0*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f31, f30, dyn, 0, 0, x3, 0*FLEN/8, x4, x1, x2)
+
+inst_1:
+// rs1 == rd, rs1==f29, rd==f29,fs1 == 1 and fe1 == 0x00 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f29; dest:f29; op1val:0x80000000; valaddr_reg:x3;
+val_offset:2*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f29, f29, dyn, 0, 0, x3, 2*FLEN/8, x4, x1, x2)
+
+inst_2:
+// rs1==f31, rd==f30,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f31; dest:f30; op1val:0x1; valaddr_reg:x3;
+val_offset:4*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f30, f31, dyn, 0, 0, x3, 4*FLEN/8, x4, x1, x2)
+
+inst_3:
+// rs1==f27, rd==f28,fs1 == 1 and fe1 == 0x00 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f27; dest:f28; op1val:0x80000001; valaddr_reg:x3;
+val_offset:6*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f28, f27, dyn, 0, 0, x3, 6*FLEN/8, x4, x1, x2)
+
+inst_4:
+// rs1==f28, rd==f27,fs1 == 0 and fe1 == 0x00 and fm1 == 0x000002 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f28; dest:f27; op1val:0x2; valaddr_reg:x3;
+val_offset:8*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f27, f28, dyn, 0, 0, x3, 8*FLEN/8, x4, x1, x2)
+
+inst_5:
+// rs1==f25, rd==f26,fs1 == 1 and fe1 == 0x00 and fm1 == 0x7ffffe and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f25; dest:f26; op1val:0x807ffffe; valaddr_reg:x3;
+val_offset:10*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f26, f25, dyn, 0, 0, x3, 10*FLEN/8, x4, x1, x2)
+
+inst_6:
+// rs1==f26, rd==f25,fs1 == 0 and fe1 == 0x00 and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f26; dest:f25; op1val:0x7fffff; valaddr_reg:x3;
+val_offset:12*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f25, f26, dyn, 0, 0, x3, 12*FLEN/8, x4, x1, x2)
+
+inst_7:
+// rs1==f23, rd==f24,fs1 == 1 and fe1 == 0x00 and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f23; dest:f24; op1val:0x807fffff; valaddr_reg:x3;
+val_offset:14*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f24, f23, dyn, 0, 0, x3, 14*FLEN/8, x4, x1, x2)
+
+inst_8:
+// rs1==f24, rd==f23,fs1 == 0 and fe1 == 0x01 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f24; dest:f23; op1val:0x800000; valaddr_reg:x3;
+val_offset:16*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f23, f24, dyn, 0, 0, x3, 16*FLEN/8, x4, x1, x2)
+
+inst_9:
+// rs1==f21, rd==f22,fs1 == 1 and fe1 == 0x01 and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f21; dest:f22; op1val:0x80800000; valaddr_reg:x3;
+val_offset:18*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f22, f21, dyn, 0, 0, x3, 18*FLEN/8, x4, x1, x2)
+
+inst_10:
+// rs1==f22, rd==f21,fs1 == 0 and fe1 == 0x01 and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f22; dest:f21; op1val:0x800001; valaddr_reg:x3;
+val_offset:20*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f21, f22, dyn, 0, 0, x3, 20*FLEN/8, x4, x1, x2)
+
+inst_11:
+// rs1==f19, rd==f20,fs1 == 1 and fe1 == 0x01 and fm1 == 0x055555 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f19; dest:f20; op1val:0x80855555; valaddr_reg:x3;
+val_offset:22*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f20, f19, dyn, 0, 0, x3, 22*FLEN/8, x4, x1, x2)
+
+inst_12:
+// rs1==f20, rd==f19,fs1 == 0 and fe1 == 0xfe and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f20; dest:f19; op1val:0x7f7fffff; valaddr_reg:x3;
+val_offset:24*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f19, f20, dyn, 0, 0, x3, 24*FLEN/8, x4, x1, x2)
+
+inst_13:
+// rs1==f17, rd==f18,fs1 == 1 and fe1 == 0xfe and fm1 == 0x7fffff and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f17; dest:f18; op1val:0xff7fffff; valaddr_reg:x3;
+val_offset:26*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f18, f17, dyn, 0, 0, x3, 26*FLEN/8, x4, x1, x2)
+
+inst_14:
+// rs1==f18, rd==f17,fs1 == 0 and fe1 == 0xff and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f18; dest:f17; op1val:0x7f800000; valaddr_reg:x3;
+val_offset:28*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f17, f18, dyn, 0, 0, x3, 28*FLEN/8, x4, x1, x2)
+
+inst_15:
+// rs1==f15, rd==f16,fs1 == 1 and fe1 == 0xff and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f15; dest:f16; op1val:0xff800000; valaddr_reg:x3;
+val_offset:30*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f16, f15, dyn, 0, 0, x3, 30*FLEN/8, x4, x1, x2)
+
+inst_16:
+// rs1==f16, rd==f15,fs1 == 0 and fe1 == 0xff and fm1 == 0x400000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f16; dest:f15; op1val:0x7fc00000; valaddr_reg:x3;
+val_offset:32*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f15, f16, dyn, 0, 0, x3, 32*FLEN/8, x4, x1, x2)
+
+inst_17:
+// rs1==f13, rd==f14,fs1 == 1 and fe1 == 0xff and fm1 == 0x400000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f13; dest:f14; op1val:0xffc00000; valaddr_reg:x3;
+val_offset:34*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f14, f13, dyn, 0, 0, x3, 34*FLEN/8, x4, x1, x2)
+
+inst_18:
+// rs1==f14, rd==f13,fs1 == 0 and fe1 == 0xff and fm1 == 0x400001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f14; dest:f13; op1val:0x7fc00001; valaddr_reg:x3;
+val_offset:36*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f13, f14, dyn, 0, 0, x3, 36*FLEN/8, x4, x1, x2)
+
+inst_19:
+// rs1==f11, rd==f12,fs1 == 1 and fe1 == 0xff and fm1 == 0x455555 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f11; dest:f12; op1val:0xffc55555; valaddr_reg:x3;
+val_offset:38*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f12, f11, dyn, 0, 0, x3, 38*FLEN/8, x4, x1, x2)
+
+inst_20:
+// rs1==f12, rd==f11,fs1 == 0 and fe1 == 0xff and fm1 == 0x000001 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f12; dest:f11; op1val:0x7f800001; valaddr_reg:x3;
+val_offset:40*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f11, f12, dyn, 0, 0, x3, 40*FLEN/8, x4, x1, x2)
+
+inst_21:
+// rs1==f9, rd==f10,fs1 == 1 and fe1 == 0xff and fm1 == 0x2aaaaa and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f9; dest:f10; op1val:0xffaaaaaa; valaddr_reg:x3;
+val_offset:42*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f10, f9, dyn, 0, 0, x3, 42*FLEN/8, x4, x1, x2)
+
+inst_22:
+// rs1==f10, rd==f9,fs1 == 0 and fe1 == 0x7f and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f10; dest:f9; op1val:0x3f800000; valaddr_reg:x3;
+val_offset:44*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f9, f10, dyn, 0, 0, x3, 44*FLEN/8, x4, x1, x2)
+
+inst_23:
+// rs1==f7, rd==f8,fs1 == 1 and fe1 == 0x7f and fm1 == 0x000000 and  fcsr == 0x0 and rm_val == 7   
+/* opcode: froundnx.s ; op1:f7; dest:f8; op1val:0xbf800000; valaddr_reg:x3;
+val_offset:46*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f8, f7, dyn, 0, 0, x3, 46*FLEN/8, x4, x1, x2)
+
+inst_24:
+// rs1==f8, rd==f7,
+/* opcode: froundnx.s ; op1:f8; dest:f7; op1val:0x0; valaddr_reg:x3;
+val_offset:48*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f7, f8, dyn, 0, 0, x3, 48*FLEN/8, x4, x1, x2)
+
+inst_25:
+// rs1==f5, rd==f6,
+/* opcode: froundnx.s ; op1:f5; dest:f6; op1val:0x0; valaddr_reg:x3;
+val_offset:50*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f6, f5, dyn, 0, 0, x3, 50*FLEN/8, x4, x1, x2)
+
+inst_26:
+// rs1==f6, rd==f5,
+/* opcode: froundnx.s ; op1:f6; dest:f5; op1val:0x0; valaddr_reg:x3;
+val_offset:52*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f5, f6, dyn, 0, 0, x3, 52*FLEN/8, x4, x1, x2)
+
+inst_27:
+// rs1==f3, rd==f4,
+/* opcode: froundnx.s ; op1:f3; dest:f4; op1val:0x0; valaddr_reg:x3;
+val_offset:54*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f4, f3, dyn, 0, 0, x3, 54*FLEN/8, x4, x1, x2)
+
+inst_28:
+// rs1==f4, rd==f3,
+/* opcode: froundnx.s ; op1:f4; dest:f3; op1val:0x0; valaddr_reg:x3;
+val_offset:56*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f3, f4, dyn, 0, 0, x3, 56*FLEN/8, x4, x1, x2)
+
+inst_29:
+// rs1==f1, rd==f2,
+/* opcode: froundnx.s ; op1:f1; dest:f2; op1val:0x0; valaddr_reg:x3;
+val_offset:58*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f2, f1, dyn, 0, 0, x3, 58*FLEN/8, x4, x1, x2)
+
+inst_30:
+// rs1==f2, rd==f1,
+/* opcode: froundnx.s ; op1:f2; dest:f1; op1val:0x0; valaddr_reg:x3;
+val_offset:60*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f1, f2, dyn, 0, 0, x3, 60*FLEN/8, x4, x1, x2)
+
+inst_31:
+// rs1==f0,
+/* opcode: froundnx.s ; op1:f0; dest:f31; op1val:0x0; valaddr_reg:x3;
+val_offset:62*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f31, f0, dyn, 0, 0, x3, 62*FLEN/8, x4, x1, x2)
+
+inst_32:
+// rd==f0,
+/* opcode: froundnx.s ; op1:f31; dest:f0; op1val:0x0; valaddr_reg:x3;
+val_offset:64*FLEN/8; rmval:dyn; correctval:??; testreg:x2;
+fcsr_val: 0 */
+TEST_FPSR_OP(froundnx.s, f0, f31, dyn, 0, 0, x3, 64*FLEN/8, x4, x1, x2)
+#endif
+
+
+RVTEST_CODE_END
+RVMODEL_HALT
+
+RVTEST_DATA_BEGIN
+.align 4
+rvtest_data:
+.word 0xbabecafe
+.word 0xabecafeb
+.word 0xbecafeba
+.word 0xecafebab
+test_dataset_0:
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(2147483648,32,FLEN)
+NAN_BOXED(1,32,FLEN)
+NAN_BOXED(2147483649,32,FLEN)
+NAN_BOXED(2,32,FLEN)
+NAN_BOXED(2155872254,32,FLEN)
+NAN_BOXED(8388607,32,FLEN)
+NAN_BOXED(2155872255,32,FLEN)
+NAN_BOXED(8388608,32,FLEN)
+NAN_BOXED(2155872256,32,FLEN)
+NAN_BOXED(8388609,32,FLEN)
+NAN_BOXED(2156221781,32,FLEN)
+NAN_BOXED(2139095039,32,FLEN)
+NAN_BOXED(4286578687,32,FLEN)
+NAN_BOXED(2139095040,32,FLEN)
+NAN_BOXED(4286578688,32,FLEN)
+NAN_BOXED(2143289344,32,FLEN)
+NAN_BOXED(4290772992,32,FLEN)
+NAN_BOXED(2143289345,32,FLEN)
+NAN_BOXED(4291122517,32,FLEN)
+NAN_BOXED(2139095041,32,FLEN)
+NAN_BOXED(4289374890,32,FLEN)
+NAN_BOXED(1065353216,32,FLEN)
+NAN_BOXED(3212836864,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+NAN_BOXED(0,32,FLEN)
+RVTEST_DATA_END
+
+RVMODEL_DATA_BEGIN
+rvtest_sig_begin:
+sig_begin_canary:
+CANARY;
+
+
+
+signature_x1_0:
+    .fill 0*((SIGALIGN)/4),4,0xdeadbeef
+
+
+signature_x1_1:
+    .fill 66*((SIGALIGN)/4),4,0xdeadbeef
+
+#ifdef rvtest_mtrap_routine
+tsig_begin_canary:
+CANARY;
+
+mtrap_sigptr:
+    .fill 64*XLEN/32,4,0xdeadbeef
+
+tsig_end_canary:
+CANARY;
+#endif
+
+#ifdef rvtest_gpr_save
+
+gpr_save:
+    .fill 32*XLEN/32,4,0xdeadbeef
+
+#endif
+
+
+sig_end_canary:
+CANARY;
+rvtest_sig_end:
+RVMODEL_DATA_END

From cfe83f5b498667c669c29d1218010fb4813a3e27 Mon Sep 17 00:00:00 2001
From: David Harris <David_Harris@hmc.edu>
Date: Fri, 24 May 2024 15:18:36 -0700
Subject: [PATCH 23/30] Added derived configs to test Zb* and Zk* individually

---
 config/derivlist.txt | 240 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 240 insertions(+)

diff --git a/config/derivlist.txt b/config/derivlist.txt
index 1ed046a6a..174ca5191 100644
--- a/config/derivlist.txt
+++ b/config/derivlist.txt
@@ -530,6 +530,246 @@ ZALRSC_SUPPORTED    0
 deriv zalrsc_rv64gc rv64gc
 ZAAMO_SUPPORTED     0
 
+deriv zba_rv32gc rv32gc
+ZBA_SUPPORTED     1     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbb_rv32gc rv32gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     1     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbc_rv32gc rv32gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     1     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbs_rv32gc rv32gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     1     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbkb_rv32gc rv32gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     1     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbkc_rv32gc rv32gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     1     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbkx_rv32gc rv32gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     1     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zknd_rv32gc rv32gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     1     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zkne_rv32gc rv32gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     1     
+ZKNH_SUPPORTED     0     
+
+deriv zknh_rv32gc rv32gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     1     
+
+deriv zba_rv64gc rv64gc
+ZBA_SUPPORTED     1     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbb_rv64gc rv64gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     1     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbc_rv64gc rv64gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     1     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbs_rv64gc rv64gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     1     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbkb_rv64gc rv64gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     1     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbkc_rv64gc rv64gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     1     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zbkx_rv64gc rv64gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     1     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zknd_rv64gc rv64gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     1     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     0     
+
+deriv zkne_rv64gc rv64gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     1     
+ZKNH_SUPPORTED     0     
+
+deriv zknh_rv64gc rv64gc
+ZBA_SUPPORTED     0     
+ZBB_SUPPORTED     0     
+ZBS_SUPPORTED     0     
+ZBC_SUPPORTED     0     
+ZBKB_SUPPORTED     0     
+ZBKC_SUPPORTED     0     
+ZBKX_SUPPORTED     0     
+ZKND_SUPPORTED     0     
+ZKNE_SUPPORTED     0     
+ZKNH_SUPPORTED     1     
+
 # Floating-point modes supported
 
 deriv f_rv32gc rv32gc

From ae29a9b8616e1ef124a3103b859cd41cafc648da Mon Sep 17 00:00:00 2001
From: Jordan Carlin <jordanmcarlin@gmail.com>
Date: Fri, 24 May 2024 15:17:36 -0700
Subject: [PATCH 24/30] Update control bits for froundnx

---
 src/fpu/fctrl.sv | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv
index 2d456aeee..bbe2955c3 100755
--- a/src/fpu/fctrl.sv
+++ b/src/fpu/fctrl.sv
@@ -165,19 +165,19 @@ module fctrl import cvw::*;  #(parameter cvw_t P) (
                                   else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
                                                 ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.s  (Zfa) 
                                   else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
-                                                ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.s  (Zfa) 
+                                                ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_1; // froundnx.s  (Zfa) 
                       7'b0100001: if (Rs2D[4:2] == 3'b000  & SupportedFmt2 & Rs2D[1:0] != 2'b01)
                                                 ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0_0; // fcvt.d.(s/h/q)
                                   else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
                                                 ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.d  (Zfa)
                                   else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
-                                                ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.d  (Zfa)
+                                                ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_1; // froundnx.d  (Zfa)
                       7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10)
                                                 ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0_0; // fcvt.h.(s/d/q)
                                   else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
                                                 ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.h  (Zfa)
                                   else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
-                                                ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.h  (Zfa)
+                                                ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_1; // froundnx.h  (Zfa)
                       // coverage off
                       // Not covered in testing because rv64gc does not support quad precision
                       7'b0100011: if (Rs2D[4:2] == 3'b000  & SupportedFmt2 & Rs2D[1:0] != 2'b11)
@@ -185,7 +185,7 @@ module fctrl import cvw::*;  #(parameter cvw_t P) (
                                   else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED)
                                                 ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.q  (Zfa)
                                   else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED)
-                                                ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.q  (Zfa)
+                                                ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_1; // froundnx.q  (Zfa)
                       // coverage on
                       7'b1101000: case(Rs2D)
                                     5'b00000:    ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.s.w   w->s

From fb77440a6434a2ab06f80a5c26b00b39f51d5bff Mon Sep 17 00:00:00 2001
From: Jordan Carlin <jordanmcarlin@gmail.com>
Date: Fri, 24 May 2024 15:33:45 -0700
Subject: [PATCH 25/30] Update fpctrl fmt to work for fround instructions

---
 src/fpu/fctrl.sv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv
index bbe2955c3..d8c1fe1d7 100755
--- a/src/fpu/fctrl.sv
+++ b/src/fpu/fctrl.sv
@@ -273,10 +273,10 @@ module fctrl import cvw::*;  #(parameter cvw_t P) (
       assign FmtD = 1'b0;
     else if (P.FPSIZES == 2) begin
       logic [1:0] FmtTmp;
-      assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
+      assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]&~Rs2D[2]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0];
       assign FmtD = (P.FMT == FmtTmp);
     end else if (P.FPSIZES == 3|P.FPSIZES == 4)
-      assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
+      assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]&~Rs2D[2]) ? Rs2D[1:0] : Funct7D[1:0];
 
   // Enables indicate that a source register is used and may need stalls. Also indicate special cases for infinity or NaN.
   // When disabled infinity and NaN on source registers are ignored by the unpacker and thus special case logic.

From b830d20f2de7f591f2f16613a5bfe3510fef4d5e Mon Sep 17 00:00:00 2001
From: Jordan Carlin <jordanmcarlin@gmail.com>
Date: Sat, 25 May 2024 12:56:02 -0700
Subject: [PATCH 26/30] Modify Fround Tmask to work for X=1

---
 src/fpu/fround.sv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fpu/fround.sv b/src/fpu/fround.sv
index 085e25771..64700834a 100644
--- a/src/fpu/fround.sv
+++ b/src/fpu/fround.sv
@@ -79,7 +79,7 @@ module fround import cvw::*;  #(parameter cvw_t P) (
 
   // Logic for nonnegative mask and rounding bits
   assign IMask = {1'b1, {P.NF{1'b0}}} >>> E;
-  assign Tmasknonneg = ~(IMask >>> 1'b1);
+  assign Tmasknonneg = ~IMask >>> 1'b1;
   assign HotE = IMask & ~(IMask << 1'b1);
   assign HotEP1 = HotE >> 1'b1;
   assign Lnonneg = |(Xm & HotE);
@@ -139,7 +139,7 @@ module fround import cvw::*;  #(parameter cvw_t P) (
     else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1
       if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1
       else         W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0
-    else begin // |X| > 1 rounds to an integer
+    else begin // |X| >= 1 rounds to an integer
       if (RoundUp & Two) W = {Xs, Xep1, {(P.NF){1'b0}}}; // Round up to 2.0
       else if (RoundUp)  W = {Xs, Xe, Rnd[P.NF-1:0]};      // Round up to Rnd
       else               W = {Xs, Xe, Trunc[P.NF-1:0]};    // Round down to Trunc

From 8edc4057ed9cd5b6e740dd944650f59040c8d1c6 Mon Sep 17 00:00:00 2001
From: Quswar Abid <quswarabid@gmail.com>
Date: Sat, 25 May 2024 23:10:09 -0700
Subject: [PATCH 27/30] compilable tests generating for loaditypes[lb, lh, lw,
 ld, lbu, lhu, lwu]

---
 Makefile                  |  1 +
 tests/testgen/covergen.py | 38 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 24d531917..b78bb689d 100644
--- a/Makefile
+++ b/Makefile
@@ -89,6 +89,7 @@ riscvdv_functcov:
 
 combine_functcov:
 	mkdir -p ${SIM}/questa/functcov
+	mkdir -p ${SIM}/questa/functcov_logs
 	cd ${SIM}/questa/functcov && rm -rf *
 	run-elf-cov.bash --seed ${SIM}/questa/seed0.txt --verbose --coverdb ${SIM}/questa/functcov/add.ucdb --elf ${WALLY}/tests/functcov/rv64/I/WALLY-COV-add.elf								>> ${SIM}/questa/functcov_logs/add.log 2>&1
 	run-elf-cov.bash --seed ${SIM}/questa/seed0.txt --verbose --coverdb ${SIM}/questa/functcov/and.ucdb --elf ${WALLY}/tests/functcov/rv64/I/WALLY-COV-and.elf								>> ${SIM}/questa/functcov_logs/add.log 2>&1
diff --git a/tests/testgen/covergen.py b/tests/testgen/covergen.py
index 44a0eea4c..03edc2f90 100755
--- a/tests/testgen/covergen.py
+++ b/tests/testgen/covergen.py
@@ -31,6 +31,23 @@ def signedImm12(imm):
     imm = imm - 0x1000
   return str(imm)
 
+def signedImm20(imm):
+  imm = imm % pow(2, 20)
+  if (imm & 0x80000):
+    imm = imm - 0x100000
+  return str(imm)
+
+'''
+rtype = ["add", "sub", "sll", "slt", "sltu", "xor", "srl", "sra", "or", "and",
+          "addw", "subw", "sllw", "srlw", "sraw"
+          "mul", "mulh", "mulhsu", "mulhu", "div", "divu", "rem", "remu",
+          "mulw", "divw", "divuw", "remw", "remuw"]
+loaditype = ["lb", "lh", "lw", "ld", "lbu", "lhu", "lwu"]
+shiftitype = ["slli", "srli", "srai"]
+itype = ["addi", "slti", "sltiu", "xori", "ori", "andi"]
+stypes = ["sb", "sh", "sw", "sd"]
+btypes = ["beq", "bne", "blt", "bge", "bltu", "bgeu"]
+'''
 def writeCovVector(desc, rs1, rs2, rd, rs1val, rs2val, immval, rdval, test, storecmd, xlen):
   lines = "\n# Testcase " + str(desc) + "\n"
   if (rs1val < 0):
@@ -48,6 +65,20 @@ def writeCovVector(desc, rs1, rs2, rd, rs1val, rs2val, immval, rdval, test, stor
   elif (test in itype):
     lines = lines + "li x" + str(rs1) + ", " + formatstr.format(rs1val) + " # initialize rs1 to a random value \n"
     lines = lines + test + " x" + str(rd) + ", x" + str(rs1) + ", " + signedImm12(immval) + " # perform operation\n"
+  elif (test in loaditype):
+    '''
+    auipc	s9,0x2
+    addi	s9,s9,-448 # 80002800 <mtrap_sigptr+0x7f0>
+    lw	a4,-2048(s9)
+    '''
+    lines = lines + "auipc x" + str(rs1) + ", 0x20" + " # add upper immediate value to pc \n"
+    lines = lines + "addi x" + str(rs1) + ", x" + str(rs1) + ", " + signedImm12(immval) + " # add immediate to lower part of rs1 \n"
+    lines = lines + test + " x" + str(rd) + ", " + signedImm12(immval) + "(x" + str(rs1) + ") # perform operation \n"
+    #print("Error: %s type not implemented yet" % test)
+  elif (test in stypes):
+    print("Error: %s type not implemented yet" % test)
+  elif (test in btypes):
+    print("Error: %s type not implemented yet" % test)
   else:
     pass
     #print("Error: %s type not implemented yet" % test)
@@ -130,12 +161,12 @@ def make_rd_maxvals(test, storecmd, xlen):
 def make_rd_rs1_eqval(test, storecmd, xlen):
   [rs1, rs2, rd, rs1val, rs2val, immval, rdval] = randomize()
   desc = "cmp_rdm_rs1_eqval (Test rs1 = rd = " + hex(rs1val) + ")"
-  writeCovVector(desc, rs1, 0, rd, rs1val, rs2val, immval, rdval, test, storecmd, xlen)
+  writeCovVector(desc, rs1, 0, rd, rdval, rs2val, immval, rdval, test, storecmd, xlen)
 
 def make_rd_rs2_eqval(test, storecmd, xlen):
   [rs1, rs2, rd, rs1val, rs2val, immval, rdval] = randomize()
   desc = "cmp_rd_rs2_eqval (Test rs2 = rd = " + hex(rs2val) + ")"
-  writeCovVector(desc, 0, rs2, rd, rs1val, rs2val, immval, rdval, test, storecmd, xlen)
+  writeCovVector(desc, 0, rs2, rd, rs1val, rdval, immval, rdval, test, storecmd, xlen)
 
 def make_rs1_rs2_eqval(test, storecmd, xlen):
   [rs1, rs2, rd, rs1val, rs2val, immval, rdval] = randomize()
@@ -238,6 +269,7 @@ def getcovergroups(coverdefdir, coverfiles):
       if (m):
         coverpoints[curinstr].append(m.group(1))
     f.close()
+    print(coverpoints)
     return coverpoints
 
 ##################################
@@ -258,6 +290,8 @@ shiftitype = ["slli", "srli", "srai"]
 itype = ["addi", "slti", "sltiu", "xori", "ori", "andi"]
 stypes = ["sb", "sh", "sw", "sd"]
 btypes = ["beq", "bne", "blt", "bge", "bltu", "bgeu"]
+# TODO: auipc missing, check whatelse is missing in ^these^ types
+
 coverpoints = getcovergroups(coverdefdir, coverfiles)
 
 author = "David_Harris@hmc.edu"

From 29d7cd56634caab652490b2d85c8a95cccf23280 Mon Sep 17 00:00:00 2001
From: Quswar Abid <quswarabid@gmail.com>
Date: Sat, 25 May 2024 23:16:07 -0700
Subject: [PATCH 28/30] unwanted comments

---
 tests/testgen/covergen.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/tests/testgen/covergen.py b/tests/testgen/covergen.py
index 03edc2f90..53313d9ab 100755
--- a/tests/testgen/covergen.py
+++ b/tests/testgen/covergen.py
@@ -37,17 +37,6 @@ def signedImm20(imm):
     imm = imm - 0x100000
   return str(imm)
 
-'''
-rtype = ["add", "sub", "sll", "slt", "sltu", "xor", "srl", "sra", "or", "and",
-          "addw", "subw", "sllw", "srlw", "sraw"
-          "mul", "mulh", "mulhsu", "mulhu", "div", "divu", "rem", "remu",
-          "mulw", "divw", "divuw", "remw", "remuw"]
-loaditype = ["lb", "lh", "lw", "ld", "lbu", "lhu", "lwu"]
-shiftitype = ["slli", "srli", "srai"]
-itype = ["addi", "slti", "sltiu", "xori", "ori", "andi"]
-stypes = ["sb", "sh", "sw", "sd"]
-btypes = ["beq", "bne", "blt", "bge", "bltu", "bgeu"]
-'''
 def writeCovVector(desc, rs1, rs2, rd, rs1val, rs2val, immval, rdval, test, storecmd, xlen):
   lines = "\n# Testcase " + str(desc) + "\n"
   if (rs1val < 0):
@@ -66,15 +55,9 @@ def writeCovVector(desc, rs1, rs2, rd, rs1val, rs2val, immval, rdval, test, stor
     lines = lines + "li x" + str(rs1) + ", " + formatstr.format(rs1val) + " # initialize rs1 to a random value \n"
     lines = lines + test + " x" + str(rd) + ", x" + str(rs1) + ", " + signedImm12(immval) + " # perform operation\n"
   elif (test in loaditype):
-    '''
-    auipc	s9,0x2
-    addi	s9,s9,-448 # 80002800 <mtrap_sigptr+0x7f0>
-    lw	a4,-2048(s9)
-    '''
     lines = lines + "auipc x" + str(rs1) + ", 0x20" + " # add upper immediate value to pc \n"
     lines = lines + "addi x" + str(rs1) + ", x" + str(rs1) + ", " + signedImm12(immval) + " # add immediate to lower part of rs1 \n"
     lines = lines + test + " x" + str(rd) + ", " + signedImm12(immval) + "(x" + str(rs1) + ") # perform operation \n"
-    #print("Error: %s type not implemented yet" % test)
   elif (test in stypes):
     print("Error: %s type not implemented yet" % test)
   elif (test in btypes):

From 1bf9b1395325e33cdaee850fd5ee2fb0d187eef3 Mon Sep 17 00:00:00 2001
From: Quswar Abid <quswarabid@gmail.com>
Date: Sun, 26 May 2024 03:47:08 -0700
Subject: [PATCH 29/30] added some sb types

---
 tests/testgen/covergen.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/tests/testgen/covergen.py b/tests/testgen/covergen.py
index 53313d9ab..233fa3495 100755
--- a/tests/testgen/covergen.py
+++ b/tests/testgen/covergen.py
@@ -54,14 +54,25 @@ def writeCovVector(desc, rs1, rs2, rd, rs1val, rs2val, immval, rdval, test, stor
   elif (test in itype):
     lines = lines + "li x" + str(rs1) + ", " + formatstr.format(rs1val) + " # initialize rs1 to a random value \n"
     lines = lines + test + " x" + str(rd) + ", x" + str(rs1) + ", " + signedImm12(immval) + " # perform operation\n"
-  elif (test in loaditype):
+  elif (test in loaditype):#["lb", "lh", "lw", "ld", "lbu", "lhu", "lwu"]
     lines = lines + "auipc x" + str(rs1) + ", 0x20" + " # add upper immediate value to pc \n"
     lines = lines + "addi x" + str(rs1) + ", x" + str(rs1) + ", " + signedImm12(immval) + " # add immediate to lower part of rs1 \n"
     lines = lines + test + " x" + str(rd) + ", " + signedImm12(immval) + "(x" + str(rs1) + ") # perform operation \n"
-  elif (test in stypes):
-    print("Error: %s type not implemented yet" % test)
-  elif (test in btypes):
-    print("Error: %s type not implemented yet" % test)
+  elif (test in stypes):#["sb", "sh", "sw", "sd"]
+    #lines = lines + test + " x" + str(rs2) + ", " + signedImm12(immval) + "(x" + str(rs1) + ") # perform operation \n"
+    lines = lines + test + " x" + str(rs2) + ", " "0(x" + str(rs1) + ") # perform operation \n"
+    #print("Error: %s type not implemented yet" % test)
+  elif (test in btypes):#["beq", "bne", "blt", "bge", "bltu", "bgeu"]
+    if (randint(1,100) > 50):
+      rs1val = rs2val
+      lines = lines + "# same values in both registers\n"
+    lines = lines + "nop \n"
+    lines = lines + "li x" + str(rs1) + ", " + formatstr.format(rs1val) + " # initialize rs1 to a random value that should get changed\n"
+    lines = lines + "li x" + str(rs2) + ", " + formatstr.format(rs2val) + " # initialize rs2 to a random value that should get changed\n"
+    lines = lines + test + " x" + str(rs1) + ", x" + str(rs2) + ", some_label_for_sb_types_" + str(immval) + "+4" + " # perform operation \n"
+    lines = lines + "some_label_for_sb_types_" + str(immval) + ":\n"
+    lines = lines + "nop \nnop \nnop \nnop \nnop \n"
+    #print("Error: %s type not implemented yet" % test)
   else:
     pass
     #print("Error: %s type not implemented yet" % test)

From 997b5901cc47f2a6237c7be6bef8d7c16ddc8e2a Mon Sep 17 00:00:00 2001
From: Quswar Abid <quswarabid@gmail.com>
Date: Mon, 27 May 2024 04:27:50 -0700
Subject: [PATCH 30/30] sb types are all passing, loaditypes are not!

---
 tests/testgen/covergen.py | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/tests/testgen/covergen.py b/tests/testgen/covergen.py
index 233fa3495..5a01b7c62 100755
--- a/tests/testgen/covergen.py
+++ b/tests/testgen/covergen.py
@@ -66,13 +66,14 @@ def writeCovVector(desc, rs1, rs2, rd, rs1val, rs2val, immval, rdval, test, stor
     if (randint(1,100) > 50):
       rs1val = rs2val
       lines = lines + "# same values in both registers\n"
-    lines = lines + "nop \n"
+    lines = lines + "nop\n"
     lines = lines + "li x" + str(rs1) + ", " + formatstr.format(rs1val) + " # initialize rs1 to a random value that should get changed\n"
     lines = lines + "li x" + str(rs2) + ", " + formatstr.format(rs2val) + " # initialize rs2 to a random value that should get changed\n"
     lines = lines + test + " x" + str(rs1) + ", x" + str(rs2) + ", some_label_for_sb_types_" + str(immval) + "+4" + " # perform operation \n"
+    lines = lines + "addi x0, x1, 1\n"
     lines = lines + "some_label_for_sb_types_" + str(immval) + ":\n"
-    lines = lines + "nop \nnop \nnop \nnop \nnop \n"
-    #print("Error: %s type not implemented yet" % test)
+    lines = lines + "addi x0, x2, 2\n"
+    lines = lines + "nop\nnop\nnop\nnop\nnop\n"
   else:
     pass
     #print("Error: %s type not implemented yet" % test)
@@ -232,17 +233,37 @@ def write_tests(coverpoints, test, storecmd, xlen):
     elif (coverpoint == "cp_rs2_sign"):
       make_rs2_sign(test, storecmd, xlen)
     elif (coverpoint == "cp_rd_sign"):
-      pass # hope already covered by rd_maxvals
+      pass #TODO hope already covered by rd_maxvals
     elif (coverpoint == "cr_rs1_rs2"):
       make_cr_rs1_rs2_sign(test, storecmd, xlen)
     elif (coverpoint == "cp_rs1_toggle"):
-      pass # toggle not needed and seems to be covered by other things
+      pass #TODO toggle not needed and seems to be covered by other things
     elif (coverpoint == "cp_rs2_toggle"):
-      pass # toggle not needed and seems to be covered by other things
+      pass #TODO toggle not needed and seems to be covered by other things
     elif (coverpoint == "cp_rd_toggle"):
-      pass # toggle not needed and seems to be covered by other things
+      pass #TODO toggle not needed and seems to be covered by other things
     elif (coverpoint == "cp_gpr_hazard"):
-      pass # not yet implemented
+      pass #TODO not yet implemented
+    elif (coverpoint == "cp_imm_sign"):
+      pass #TODO
+    elif (coverpoint == "cr_rs1_imm"):
+      pass #TODO (not if crosses are not needed)
+    elif (coverpoint == "cp_imm_ones_zeros"):
+      pass #TODO
+    elif (coverpoint == "cp_mem_hazard"):
+      pass #TODO
+    elif (coverpoint == "cp_imm_zero"):
+      pass #TODO
+    elif (coverpoint == "cp_mem_unaligned"):
+      pass #TODO
+    elif (coverpoint == "cp_offset"):
+      pass #TODO
+    elif (coverpoint == "cr_nord_rs1_rs2"):
+      pass #TODO (not if crosses are not needed)
+    elif (coverpoint == "cp_imm_shift"):
+      pass #TODO
+    elif (coverpoint == "cp_rd_boolean"):
+      pass #TODO
     else:
       print("Warning: " + coverpoint + " not implemented yet for " + test)