diff --git a/testsBP/crt0/Makefile b/testsBP/crt0/Makefile
index ab47384f..2af43a40 100644
--- a/testsBP/crt0/Makefile
+++ b/testsBP/crt0/Makefile
@@ -4,12 +4,12 @@ ROOT		:= ..
 LIBRARY_DIRS	:= 
 LIBRARY_FILES	:=
 
-MARCH           :=-march=rv64ic
-MABI            :=-mabi=lp64
+MARCH           :=-march=rv64imfdc
+MABI            :=-mabi=lp64d
 LINK_FLAGS      :=$(MARCH) $(MABI) -nostartfiles
 
-AFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64 -W
-CFLAGS =$(MARCH) $(MABI) -march=rv64ic -mabi=lp64  -mcmodel=medany 
+AFLAGS =$(MARCH) $(MABI) -W
+CFLAGS =$(MARCH) $(MABI) -mcmodel=medany  -O2
 AS=riscv64-unknown-elf-as
 CC=riscv64-unknown-elf-gcc
 AR=riscv64-unknown-elf-ar
@@ -19,7 +19,7 @@ all: libcrt0.a
 %.o: %.s
 	${AS} ${AFLAGS} -c $< -o $@
 
-libcrt0.a: start.o
+libcrt0.a: start.o pcnt_driver.o pre_main.o
 	${AR} -r $@ $^
 
 clean:
diff --git a/testsBP/crt0/start.s b/testsBP/crt0/start.s
index 19a240d8..731a61e3 100644
--- a/testsBP/crt0/start.s
+++ b/testsBP/crt0/start.s
@@ -43,11 +43,10 @@ _start:
 
 
 
-	# set the stack pointer to the top of memory
-	# 0x8000_0000 + 64K - 8 bytes
-	li sp, 0x007FFFF8
+	# set the stack pointer to the top of memory - 8 bytes (pointer size)
+	li sp, 0x07FFFFF8
 
-	jal ra, main
+	jal ra, pre_main
 	jal ra, _halt
 
 .section .text
diff --git a/testsBP/mibench_qsort/Makefile b/testsBP/mibench_qsort/Makefile
index f4d36839..b1cf7b67 100644
--- a/testsBP/mibench_qsort/Makefile
+++ b/testsBP/mibench_qsort/Makefile
@@ -8,7 +8,7 @@ MARCH           :=-march=rv64ic
 MABI            :=-mabi=lp64
 LINK_FLAGS      :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
 
-CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany  -mstrict-align
+CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany  -mstrict-align -O2
 
 CC=riscv64-unknown-elf-gcc
 DA=riscv64-unknown-elf-objdump -d
diff --git a/testsBP/sieve/Makefile b/testsBP/sieve/Makefile
index 1d38d123..9c884f48 100644
--- a/testsBP/sieve/Makefile
+++ b/testsBP/sieve/Makefile
@@ -8,7 +8,7 @@ MARCH           :=-march=rv64ic
 MABI            :=-mabi=lp64
 LINK_FLAGS      :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
 
-CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany  -mstrict-align
+CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany  -mstrict-align -O2
 
 CC=riscv64-unknown-elf-gcc
 DA=riscv64-unknown-elf-objdump -d
diff --git a/testsBP/sieve/sieve.c b/testsBP/sieve/sieve.c
index e8207404..f7d36d95 100644
--- a/testsBP/sieve/sieve.c
+++ b/testsBP/sieve/sieve.c
@@ -66,21 +66,21 @@ int main () {
     
   ans = sieve ();
   //gettimeofday(&after , NULL);
-  if (ans != 1899)
-    printf ("Sieve result wrong, ans = %d, expected 1899", ans);
+  /* /\* /\\* if (ans != 1899) *\\/ *\/ */
+  /* /\* /\\*   printf ("Sieve result wrong, ans = %d, expected 1899", ans); *\\/ *\/ */
 
-  //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) );
+  /* /\* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); *\/ */
 
 
-  printf("Round 2\n");
-  //gettimeofday(&before , NULL);
+  /* /\* printf("Round 2\n"); *\/ */
+  /* //gettimeofday(&before , NULL); */
     
-  ans = sieve ();
-  //gettimeofday(&after , NULL);
-  if (ans != 1899)
-    printf ("Sieve result wrong, ans = %d, expected 1899", ans);
+  /* ans = sieve (); */
+  /* //gettimeofday(&after , NULL); */
+  /* if (ans != 1899) */
+  /*   printf ("Sieve result wrong, ans = %d, expected 1899", ans); */
 
-  //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) ); 
+  /* //printf("Total time elapsed : %.0lf us\n" , time_diff(before , after) );  */
   
   return 0;
 
diff --git a/testsBP/simple/Makefile b/testsBP/simple/Makefile
index 450aacaa..4447f284 100644
--- a/testsBP/simple/Makefile
+++ b/testsBP/simple/Makefile
@@ -8,7 +8,7 @@ MARCH           :=-march=rv64ic
 MABI            :=-mabi=lp64
 LINK_FLAGS      :=$(MARCH) $(MABI) -nostartfiles -Wl,-Map=$(TARGET).map
 
-CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany  -mstrict-align
+CFLAGS =$(MARCH) $(MABI) -Wa,-alhs -Wa,-L -mcmodel=medany  -mstrict-align -O2
 CC=riscv64-unknown-elf-gcc
 DA=riscv64-unknown-elf-objdump -d
 
diff --git a/testsBP/simple/header.h b/testsBP/simple/header.h
index bfe014a4..aab8973f 100644
--- a/testsBP/simple/header.h
+++ b/testsBP/simple/header.h
@@ -5,4 +5,8 @@ int fail();
 int simple_csrbr_test();
 int lbu_test();
 int icache_spill_test();
+void global_hist_0_space_test();
+void global_hist_1_space_test();
+void global_hist_2_space_test();
+void global_hist_3_space_test();
 #endif
diff --git a/testsBP/simple/main.c b/testsBP/simple/main.c
index 0d14fcfb..564b474e 100644
--- a/testsBP/simple/main.c
+++ b/testsBP/simple/main.c
@@ -2,6 +2,10 @@
 
 int main(){
   //int res = icache_spill_test();
+  global_hist_3_space_test();  
+  global_hist_2_space_test();
+  global_hist_1_space_test();
+  global_hist_0_space_test();    
   int res = 1;
   if (res < 0) {
     fail();
diff --git a/wally-pipelined/config/buildroot/wally-constants.vh b/wally-pipelined/config/buildroot/wally-constants.vh
index 43d95863..cc6c27fc 100644
--- a/wally-pipelined/config/buildroot/wally-constants.vh
+++ b/wally-pipelined/config/buildroot/wally-constants.vh
@@ -2,11 +2,14 @@
 // wally-constants.vh
 //
 // Written: tfleming@hmc.edu 4 March 2021
-// Modified:
+// Modified: Kmacsaigoren@hmc.edu 31 May 2021
+//              Added constants for checking sv mode and changed existing constants to accomodate
+//              both sv48 and sv39
 //
-// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
-//          These macros should not be changed, except in the event of an
-//          update to the architecture or particularly special circumstances.
+// Purpose: Specify constants nexessary for different memory virtualization modes.
+//              These are specific to sv49, defined in section 4.5 of the privileged spec.
+//              However, despite different constants for different modes, the hardware helps distinguish between
+//              each mode.
 //
 // A component of the Wally configurable RISC-V project.
 //
@@ -25,9 +28,16 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
-// Virtual Memory Constants (sv39)
+// Virtual Memory Constants (sv48)
 `define VPN_SEGMENT_BITS 9
-`define VPN_BITS 27
+`define VPN_BITS 36
+`define PPN_HIGH_SEGMENT_BITS 17
 `define PPN_BITS 44
-`define PPN_HIGH_SEGMENT_BITS 26
-`define PA_BITS  56
+`define PA_BITS 56
+`define SVMODE_BITS 4
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+`define NO_TRANSLATE 0
+`define SV32 1
+`define SV39 8
+`define SV48 9
diff --git a/wally-pipelined/config/busybear/wally-constants.vh b/wally-pipelined/config/busybear/wally-constants.vh
index 43d95863..cc6c27fc 100644
--- a/wally-pipelined/config/busybear/wally-constants.vh
+++ b/wally-pipelined/config/busybear/wally-constants.vh
@@ -2,11 +2,14 @@
 // wally-constants.vh
 //
 // Written: tfleming@hmc.edu 4 March 2021
-// Modified:
+// Modified: Kmacsaigoren@hmc.edu 31 May 2021
+//              Added constants for checking sv mode and changed existing constants to accomodate
+//              both sv48 and sv39
 //
-// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
-//          These macros should not be changed, except in the event of an
-//          update to the architecture or particularly special circumstances.
+// Purpose: Specify constants nexessary for different memory virtualization modes.
+//              These are specific to sv49, defined in section 4.5 of the privileged spec.
+//              However, despite different constants for different modes, the hardware helps distinguish between
+//              each mode.
 //
 // A component of the Wally configurable RISC-V project.
 //
@@ -25,9 +28,16 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
-// Virtual Memory Constants (sv39)
+// Virtual Memory Constants (sv48)
 `define VPN_SEGMENT_BITS 9
-`define VPN_BITS 27
+`define VPN_BITS 36
+`define PPN_HIGH_SEGMENT_BITS 17
 `define PPN_BITS 44
-`define PPN_HIGH_SEGMENT_BITS 26
-`define PA_BITS  56
+`define PA_BITS 56
+`define SVMODE_BITS 4
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+`define NO_TRANSLATE 0
+`define SV32 1
+`define SV39 8
+`define SV48 9
diff --git a/wally-pipelined/config/coremark/wally-constants.vh b/wally-pipelined/config/coremark/wally-constants.vh
index 43d95863..cc6c27fc 100644
--- a/wally-pipelined/config/coremark/wally-constants.vh
+++ b/wally-pipelined/config/coremark/wally-constants.vh
@@ -2,11 +2,14 @@
 // wally-constants.vh
 //
 // Written: tfleming@hmc.edu 4 March 2021
-// Modified:
+// Modified: Kmacsaigoren@hmc.edu 31 May 2021
+//              Added constants for checking sv mode and changed existing constants to accomodate
+//              both sv48 and sv39
 //
-// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
-//          These macros should not be changed, except in the event of an
-//          update to the architecture or particularly special circumstances.
+// Purpose: Specify constants nexessary for different memory virtualization modes.
+//              These are specific to sv49, defined in section 4.5 of the privileged spec.
+//              However, despite different constants for different modes, the hardware helps distinguish between
+//              each mode.
 //
 // A component of the Wally configurable RISC-V project.
 //
@@ -25,9 +28,16 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
-// Virtual Memory Constants (sv39)
+// Virtual Memory Constants (sv48)
 `define VPN_SEGMENT_BITS 9
-`define VPN_BITS 27
+`define VPN_BITS 36
+`define PPN_HIGH_SEGMENT_BITS 17
 `define PPN_BITS 44
-`define PPN_HIGH_SEGMENT_BITS 26
-`define PA_BITS  56
+`define PA_BITS 56
+`define SVMODE_BITS 4
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+`define NO_TRANSLATE 0
+`define SV32 1
+`define SV39 8
+`define SV48 9
diff --git a/wally-pipelined/config/coremark_bare/wally-constants.vh b/wally-pipelined/config/coremark_bare/wally-constants.vh
index 43d95863..cc6c27fc 100644
--- a/wally-pipelined/config/coremark_bare/wally-constants.vh
+++ b/wally-pipelined/config/coremark_bare/wally-constants.vh
@@ -2,11 +2,14 @@
 // wally-constants.vh
 //
 // Written: tfleming@hmc.edu 4 March 2021
-// Modified:
+// Modified: Kmacsaigoren@hmc.edu 31 May 2021
+//              Added constants for checking sv mode and changed existing constants to accomodate
+//              both sv48 and sv39
 //
-// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
-//          These macros should not be changed, except in the event of an
-//          update to the architecture or particularly special circumstances.
+// Purpose: Specify constants nexessary for different memory virtualization modes.
+//              These are specific to sv49, defined in section 4.5 of the privileged spec.
+//              However, despite different constants for different modes, the hardware helps distinguish between
+//              each mode.
 //
 // A component of the Wally configurable RISC-V project.
 //
@@ -25,9 +28,16 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
-// Virtual Memory Constants (sv39)
+// Virtual Memory Constants (sv48)
 `define VPN_SEGMENT_BITS 9
-`define VPN_BITS 27
+`define VPN_BITS 36
+`define PPN_HIGH_SEGMENT_BITS 17
 `define PPN_BITS 44
-`define PPN_HIGH_SEGMENT_BITS 26
-`define PA_BITS  56
+`define PA_BITS 56
+`define SVMODE_BITS 4
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+`define NO_TRANSLATE 0
+`define SV32 1
+`define SV39 8
+`define SV48 9
diff --git a/wally-pipelined/config/rv32ic/wally-constants.vh b/wally-pipelined/config/rv32ic/wally-constants.vh
index ec4a48b4..f4c5ce9a 100644
--- a/wally-pipelined/config/rv32ic/wally-constants.vh
+++ b/wally-pipelined/config/rv32ic/wally-constants.vh
@@ -2,7 +2,10 @@
 // wally-constants.vh
 //
 // Written: tfleming@hmc.edu 4 March 2021
-// Modified:
+// Modified: kmacsaigoren@hmc.edu 31 May 2021
+//              added svmode constants. These aren't strictly necessary since we're just checking one bit,
+//              but they're here to stay consistent and to make sure we dont wind up
+//              a "NO_TRANSLATE undefined" situation.
 //
 // Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
 //          These macros should not be changed, except in the event of an
@@ -31,3 +34,10 @@
 `define PPN_BITS 22
 `define PPN_HIGH_SEGMENT_BITS 12
 `define PA_BITS  34
+`define SVMODE_BITS 1
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+`define NO_TRANSLATE 0
+`define SV32 1
+`define SV39 8 // These two are only here to stop
+`define SV48 9 // the verilator from yelling at me
diff --git a/wally-pipelined/config/rv64BP/wally-config.vh b/wally-pipelined/config/rv64BP/wally-config.vh
index 17a8c284..f85e0c22 100644
--- a/wally-pipelined/config/rv64BP/wally-config.vh
+++ b/wally-pipelined/config/rv64BP/wally-config.vh
@@ -32,7 +32,7 @@
 `define XLEN 64
 
 //`define MISA (32'h00000105)
-`define MISA (32'h00000104 | 1<<5 | 1<<18 | 1 << 20 | 1 << 12 | 1 << 0)
+`define MISA (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
 `define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
 `define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
 `define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
@@ -107,8 +107,9 @@
 /* verilator lint_off ASSIGNDLY */
 /* verilator lint_off PINCONNECTEMPTY */
 
-`define TWO_BIT_PRELOAD "../config/rv64icfd/twoBitPredictor.txt"
-`define BTB_PRELOAD "../config/rv64icfd/BTBPredictor.txt"
+`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt"
+`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt"
 `define BPRED_ENABLED 1
-`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
+//`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
+`define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL"  or BPLOCALPAg or BPGSHARE
 `define TESTSBP 1
diff --git a/wally-pipelined/config/rv64BP/wally-constants.vh b/wally-pipelined/config/rv64BP/wally-constants.vh
index 43d95863..cc6c27fc 100644
--- a/wally-pipelined/config/rv64BP/wally-constants.vh
+++ b/wally-pipelined/config/rv64BP/wally-constants.vh
@@ -2,11 +2,14 @@
 // wally-constants.vh
 //
 // Written: tfleming@hmc.edu 4 March 2021
-// Modified:
+// Modified: Kmacsaigoren@hmc.edu 31 May 2021
+//              Added constants for checking sv mode and changed existing constants to accomodate
+//              both sv48 and sv39
 //
-// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
-//          These macros should not be changed, except in the event of an
-//          update to the architecture or particularly special circumstances.
+// Purpose: Specify constants nexessary for different memory virtualization modes.
+//              These are specific to sv49, defined in section 4.5 of the privileged spec.
+//              However, despite different constants for different modes, the hardware helps distinguish between
+//              each mode.
 //
 // A component of the Wally configurable RISC-V project.
 //
@@ -25,9 +28,16 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
-// Virtual Memory Constants (sv39)
+// Virtual Memory Constants (sv48)
 `define VPN_SEGMENT_BITS 9
-`define VPN_BITS 27
+`define VPN_BITS 36
+`define PPN_HIGH_SEGMENT_BITS 17
 `define PPN_BITS 44
-`define PPN_HIGH_SEGMENT_BITS 26
-`define PA_BITS  56
+`define PA_BITS 56
+`define SVMODE_BITS 4
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+`define NO_TRANSLATE 0
+`define SV32 1
+`define SV39 8
+`define SV48 9
diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh
index 259e41ae..12d254ba 100644
--- a/wally-pipelined/config/rv64ic/wally-config.vh
+++ b/wally-pipelined/config/rv64ic/wally-config.vh
@@ -31,7 +31,7 @@
 `define XLEN 64
 
 // MISA RISC-V configuration per specification
-`define MISA (32'h00000104 | 0 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
+`define MISA (32'h00000104 | 0 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0)
 `define A_SUPPORTED ((`MISA >> 0) % 2 == 1)
 `define C_SUPPORTED ((`MISA >> 2) % 2 == 1)
 `define D_SUPPORTED ((`MISA >> 3) % 2 == 1)
diff --git a/wally-pipelined/config/rv64ic/wally-constants.vh b/wally-pipelined/config/rv64ic/wally-constants.vh
index 43d95863..cc6c27fc 100644
--- a/wally-pipelined/config/rv64ic/wally-constants.vh
+++ b/wally-pipelined/config/rv64ic/wally-constants.vh
@@ -2,11 +2,14 @@
 // wally-constants.vh
 //
 // Written: tfleming@hmc.edu 4 March 2021
-// Modified:
+// Modified: Kmacsaigoren@hmc.edu 31 May 2021
+//              Added constants for checking sv mode and changed existing constants to accomodate
+//              both sv48 and sv39
 //
-// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
-//          These macros should not be changed, except in the event of an
-//          update to the architecture or particularly special circumstances.
+// Purpose: Specify constants nexessary for different memory virtualization modes.
+//              These are specific to sv49, defined in section 4.5 of the privileged spec.
+//              However, despite different constants for different modes, the hardware helps distinguish between
+//              each mode.
 //
 // A component of the Wally configurable RISC-V project.
 //
@@ -25,9 +28,16 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
-// Virtual Memory Constants (sv39)
+// Virtual Memory Constants (sv48)
 `define VPN_SEGMENT_BITS 9
-`define VPN_BITS 27
+`define VPN_BITS 36
+`define PPN_HIGH_SEGMENT_BITS 17
 `define PPN_BITS 44
-`define PPN_HIGH_SEGMENT_BITS 26
-`define PA_BITS  56
+`define PA_BITS 56
+`define SVMODE_BITS 4
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+`define NO_TRANSLATE 0
+`define SV32 1
+`define SV39 8
+`define SV48 9
diff --git a/wally-pipelined/config/rv64icfd/wally-constants.vh b/wally-pipelined/config/rv64icfd/wally-constants.vh
index 43d95863..cc6c27fc 100644
--- a/wally-pipelined/config/rv64icfd/wally-constants.vh
+++ b/wally-pipelined/config/rv64icfd/wally-constants.vh
@@ -2,11 +2,14 @@
 // wally-constants.vh
 //
 // Written: tfleming@hmc.edu 4 March 2021
-// Modified:
+// Modified: Kmacsaigoren@hmc.edu 31 May 2021
+//              Added constants for checking sv mode and changed existing constants to accomodate
+//              both sv48 and sv39
 //
-// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
-//          These macros should not be changed, except in the event of an
-//          update to the architecture or particularly special circumstances.
+// Purpose: Specify constants nexessary for different memory virtualization modes.
+//              These are specific to sv49, defined in section 4.5 of the privileged spec.
+//              However, despite different constants for different modes, the hardware helps distinguish between
+//              each mode.
 //
 // A component of the Wally configurable RISC-V project.
 //
@@ -25,9 +28,16 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
-// Virtual Memory Constants (sv39)
+// Virtual Memory Constants (sv48)
 `define VPN_SEGMENT_BITS 9
-`define VPN_BITS 27
+`define VPN_BITS 36
+`define PPN_HIGH_SEGMENT_BITS 17
 `define PPN_BITS 44
-`define PPN_HIGH_SEGMENT_BITS 26
-`define PA_BITS  56
+`define PA_BITS 56
+`define SVMODE_BITS 4
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+`define NO_TRANSLATE 0
+`define SV32 1
+`define SV39 8
+`define SV48 9
diff --git a/wally-pipelined/config/rv64imc/wally-constants.vh b/wally-pipelined/config/rv64imc/wally-constants.vh
index 43d95863..cc6c27fc 100644
--- a/wally-pipelined/config/rv64imc/wally-constants.vh
+++ b/wally-pipelined/config/rv64imc/wally-constants.vh
@@ -2,11 +2,14 @@
 // wally-constants.vh
 //
 // Written: tfleming@hmc.edu 4 March 2021
-// Modified:
+// Modified: Kmacsaigoren@hmc.edu 31 May 2021
+//              Added constants for checking sv mode and changed existing constants to accomodate
+//              both sv48 and sv39
 //
-// Purpose: Specify certain constants defined in the RISC-V 64-bit architecture.
-//          These macros should not be changed, except in the event of an
-//          update to the architecture or particularly special circumstances.
+// Purpose: Specify constants nexessary for different memory virtualization modes.
+//              These are specific to sv49, defined in section 4.5 of the privileged spec.
+//              However, despite different constants for different modes, the hardware helps distinguish between
+//              each mode.
 //
 // A component of the Wally configurable RISC-V project.
 //
@@ -25,9 +28,16 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
-// Virtual Memory Constants (sv39)
+// Virtual Memory Constants (sv48)
 `define VPN_SEGMENT_BITS 9
-`define VPN_BITS 27
+`define VPN_BITS 36
+`define PPN_HIGH_SEGMENT_BITS 17
 `define PPN_BITS 44
-`define PPN_HIGH_SEGMENT_BITS 26
-`define PA_BITS  56
+`define PA_BITS 56
+`define SVMODE_BITS 4
+// constants to check SATP_MODE against
+// defined in Table 4.3 of the privileged spec
+`define NO_TRANSLATE 0
+`define SV32 1
+`define SV39 8
+`define SV48 9
diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv
index c876b313..e303f205 100755
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@@ -25,528 +25,455 @@
 `include "wally-config.vh"
 
 module fpu (
-  input  logic [2:0]       FRM_REGW,    // Rounding mode from CSR
-  input  logic             reset,
+  input logic [2:0] 	   FRM_REGW, // Rounding mode from CSR
+  input logic 		   reset,
   //input  logic             clear,     // *** not being used anywhere
-  input  logic             clk,
-  input  logic [31:0]      InstrD,
-  input  logic [`XLEN-1:0] SrcAE,       // Integer input being processed
-  input  logic [`XLEN-1:0] SrcAM,       // Integer input being written into fpreg
-  input  logic 		         StallE, StallM, StallW,
-  input  logic             FlushE, FlushM, FlushW,
-  input  logic [`AHBW-1:0] HRDATA,
-  input  logic             RegWriteD,
-  output logic [4:0]       SetFflagsM,
-  output logic [31:0]      FSROutW,
-  output logic [1:0]       FMemRWM,
-	output logic             FStallD,
-  output logic             FWriteIntE, FWriteIntM, FWriteIntW,
+  input logic 		   clk,
+  input logic [31:0] 	   InstrD,
+  input logic [`XLEN-1:0]  SrcAE, // Integer input being processed
+  input logic [`XLEN-1:0]  SrcAM, // Integer input being written into fpreg
+  input logic 		   StallE, StallM, StallW,
+  input logic 		   FlushE, FlushM, FlushW,
+  input logic [`AHBW-1:0]  HRDATA,
+  input logic 		   RegWriteD,
+  output logic [4:0] 	   SetFflagsM,
+  output logic [31:0] 	   FSROutW,
+  output logic [1:0] 	   FMemRWM,
+  output logic 		   FStallD,
+  output logic 		   FWriteIntE, FWriteIntM, FWriteIntW,
   output logic [`XLEN-1:0] FWriteDataM,
-  output logic             FDivSqrtDoneM,
-  output logic             IllegalFPUInstrD,
+  output logic 		   FDivSqrtDoneM,
+  output logic 		   IllegalFPUInstrD,
   output logic [`XLEN-1:0] FPUResultW);
 
-
-
-
-
-  //control logic signal instantiation
-  logic             FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW;             // FP register write enable
-  logic [2:0]       FrmD, FrmE, FrmM, FrmW;                                 // FP rounding mode
-  logic             FmtD, FmtE, FmtM, FmtW;                                 // FP precision 0-single 1-double
-  logic             FDivStartD, FDivStartE;                                 // Start division
-  logic             FWriteIntD;                                 // Write to integer register
-  logic             FOutputInput2D, FOutputInput2E;                         // Put Input2 in Input1 if a store instruction
-  logic [1:0]       FMemRWD, FMemRWE;                                       // Read and write enable for memory
-  logic [1:0]       FForwardInput1D, FForwardInput1E;                       // Input1 forwarding mux control signal
-  logic [1:0]       FForwardInput2D, FForwardInput2E;                       // Input2 forwarding mux control signal
-  logic             FForwardInput3D, FForwardInput3E;                       // Input3 forwarding mux control signal
-  logic             FInput2UsedD;                                           // Is input 2 used
-  logic             FInput3UsedD;                                           // Is input 3 used
-  logic [2:0]       FResultSelD, FResultSelE, FResultSelM, FResultSelW;     // Select FP result
-  logic [3:0]       FOpCtrlD, FOpCtrlE, FOpCtrlM;                           // Select which opperation to do in each component
-  
-  // regfile signals
-  logic [4:0]       RdE, RdM, RdW; // ***Can take from ieu
-  logic [`XLEN-1:0] FWDM;                                                   // Write data for FP register
-  logic [`XLEN-1:0] FRD1D, FRD2D, FRD3D;                                    // Read Data from FP register
-  logic [`XLEN-1:0] FRD1E, FRD2E, FRD3E;
-  logic [`XLEN-1:0] FInput1E, FInput1M, FInput1tmpE;
-  logic [`XLEN-1:0] FInput2E, FInput2M;
-  logic [`XLEN-1:0] FInput3E, FInput3M;
-  logic [`XLEN-1:0] FLoadStoreResultM, FLoadStoreResultW;                   // Result for load, store, and move to int-reg instructions
-
-  // div/sqrt signals
-  logic             DivDenormM, DivDenormW;
-  logic             DivOvEn, DivUnEn;
-  logic             DivBusyM;
-  logic [63:0]      FDivResultM, FDivResultW;
-  logic [4:0]       FDivFlagsM, FDivFlagsW;
-
-  // FMA signals
-  logic [12:0]		  aligncntE, aligncntM; 
-  logic [105:0]		  rE, rM; 
-  logic [105:0]		  sE, sM; 
-  logic [163:0]		  tE, tM;	
-  logic [8:0]		    normcntE, normcntM; 
-  logic [12:0]		  aeE, aeM; 
-  logic 		        bsE, bsM;
-  logic 		        killprodE, killprodM; 
-  logic 		        prodofE, prodofM; 
-  logic			        xzeroE, xzeroM;
-  logic			        yzeroE, yzeroM;
-  logic			        zzeroE, zzeroM;
-  logic			        xdenormE, xdenormM;
-  logic			        ydenormE, ydenormM;
-  logic			        zdenormE, zdenormM;
-  logic			        xinfE, xinfM;
-  logic			        yinfE, yinfM;
-  logic			        zinfE, zinfM;
-  logic			        xnanE, xnanM;
-  logic			        ynanE, ynanM;
-  logic			        znanE, znanM;
-  logic			        nanE, nanM;
-  logic	[8:0]		    sumshiftE, sumshiftM;
-  logic			        sumshiftzeroE, sumshiftzeroM;
-  logic             prodinfE, prodinfM;
-  logic [63:0]      FmaResultM, FmaResultW;
-  logic [4:0]       FmaFlagsM, FmaFlagsW;
-  
-  // add/cvt signals
-  logic [63:0]      AddSumE, AddSumTcE;
-  logic [3:0]       AddSelInvE;
-  logic [10:0]      AddExpPostSumE;
-  logic             AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE;
-  logic             AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
-  logic             AddConvertE;
-  logic [63:0]      AddFloat1E, AddFloat2E;
-  logic [11:0]      AddExp1DenormE, AddExp2DenormE;
-  logic [10:0]      AddExponentE;
-  logic [2:0]       AddRmE;
-  logic [3:0]       AddOpTypeE;
-  logic             AddPE, AddOvEnE, AddUnEnE;    
-  logic             AddDenormM;
-  logic [63:0]      AddSumM, AddSumTcM;
-  logic [3:0]       AddSelInvM;
-  logic [10:0]      AddExpPostSumM;
-  logic             AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM;
-  logic             AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
-  logic             AddConvertM, AddSignM;
-  logic [63:0]      AddFloat1M, AddFloat2M;
-  logic [11:0]      AddExp1DenormM, AddExp2DenormM;
-  logic [10:0]      AddExponentM;
-  logic [63:0]      AddOp1M, AddOp2M;
-  logic [2:0]       AddRmM;
-  logic [3:0]       AddOpTypeM;
-  logic             AddPM, AddOvEnM, AddUnEnM;  
-  logic [63:0]      FAddResultM, FAddResultW;
-  logic [4:0]       FAddFlagsM, FAddFlagsW;
-
-  //cmp signals 
-  logic [7:0]       WE, WM;
-  logic [7:0]       XE, XM;
-  logic             ANaNE, ANaNM;
-  logic             BNaNE, BNaNM;
-  logic             AzeroE, AzeroM;
-  logic             BzeroE, BzeroM;
-  logic             CmpInvalidM, CmpInvalidW;
-  logic [1:0]       CmpFCCM, CmpFCCW; 
-  logic [63:0]      FCmpResultM, FCmpResultW;
-
-  // fsgn signals
-  logic [63:0]      SgnResultE, SgnResultM, SgnResultW;
-  logic [4:0]       SgnFlagsE, SgnFlagsM, SgnFlagsW;
-
-  //instantiation of W stage regfile signals
-  logic [`XLEN-1:0] SrcAW;
-
-  // classify signals
-  logic [63:0]      ClassResultE, ClassResultM, ClassResultW;
-
-  // other
-  logic [63:0]      FPUResult64W, FPUResult64E;                                           // 64-bit FPU result
-  logic [4:0]       FPUFlagsW;
-
-  // pipeline control logic
-  logic	                   PipeEnableDE;
-  logic	                   PipeEnableEM;
-  logic	                   PipeEnableMW;
-  logic                    PipeClearDE;
-  logic                    PipeClearEM;
-  logic                    PipeClearMW;
-
-  //temporarily assign pipe clear and enable signals
-  //to never flush & always be running
-  localparam PipeClear = 1'b0;
-  localparam PipeEnable = 1'b1;
-  always_comb begin
-
-	  PipeEnableDE = ~StallE;
-	  PipeEnableEM = ~StallM;
-	  PipeEnableMW = ~StallW;
-	  PipeClearDE = FlushE;
-	  PipeClearEM = FlushM;
-	  PipeClearMW = FlushW;
-
-  end
-
- 
-
-
-
-
-
-
-
-
-
-
-
-  //DECODE STAGE
-
-  //Hazard unit for FPU
-  fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
-
-  //top-level controller for FPU
-  fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
-
-
-  //regfile instantiation
+   // control logic signal instantiation
+   logic 		   FWriteEnD, FWriteEnE, FWriteEnM, FWriteEnW;             // FP register write enable
+   logic [2:0] 		   FrmD, FrmE, FrmM, FrmW;                                 // FP rounding mode
+   logic 		   FmtD, FmtE, FmtM, FmtW;                                 // FP precision 0-single 1-double
+   logic 		   FDivStartD, FDivStartE;                                 // Start division
+   logic 		   FWriteIntD;                                 // Write to integer register
+   logic 		   FOutputInput2D, FOutputInput2E;                         // Put Input2 in Input1 if a store instruction
+   logic [1:0] 		   FMemRWD, FMemRWE;                                       // Read and write enable for memory
+   logic [1:0] 		   FForwardInput1D, FForwardInput1E;                       // Input1 forwarding mux control signal
+   logic [1:0] 		   FForwardInput2D, FForwardInput2E;                       // Input2 forwarding mux control signal
+   logic 		   FForwardInput3D, FForwardInput3E;                       // Input3 forwarding mux control signal
+   logic 		   FInput2UsedD;                                           // Is input 2 used
+   logic 		   FInput3UsedD;                                           // Is input 3 used
+   logic [2:0] 		   FResultSelD, FResultSelE, FResultSelM, FResultSelW;     // Select FP result
+   logic [3:0] 		   FOpCtrlD, FOpCtrlE, FOpCtrlM;                           // Select which opperation to do in each component
+   
+   // regfile signals
+   logic [4:0] 		   RdE, RdM, RdW; // ***Can take from ieu
+   logic [`XLEN-1:0] 	   FWDM;                                                   // Write data for FP register
+   logic [`XLEN-1:0] 	   FRD1D, FRD2D, FRD3D;                                    // Read Data from FP register
+   logic [`XLEN-1:0] 	   FRD1E, FRD2E, FRD3E;
+   logic [`XLEN-1:0] 	   FInput1E, FInput1M, FInput1tmpE;
+   logic [`XLEN-1:0] 	   FInput2E, FInput2M;
+   logic [`XLEN-1:0] 	   FInput3E, FInput3M;
+   logic [`XLEN-1:0] 	   FLoadStoreResultM, FLoadStoreResultW;                   // Result for load, store, and move to int-reg instructions
+   
+   // div/sqrt signals
+   logic 		   DivDenormM, DivDenormW;
+   logic 		   DivOvEn, DivUnEn;
+   logic 		   DivBusyM;
+   logic [63:0] 	   FDivResultM, FDivResultW;
+   logic [4:0] 		   FDivFlagsM, FDivFlagsW;
+   
+   // FMA signals
+   logic [12:0] 	   aligncntE, aligncntM; 
+   logic [105:0] 	   rE, rM; 
+   logic [105:0] 	   sE, sM; 
+   logic [163:0] 	   tE, tM;	
+   logic [8:0] 		   normcntE, normcntM; 
+   logic [12:0] 	   aeE, aeM; 
+   logic 		   bsE, bsM;
+   logic 		   killprodE, killprodM; 
+   logic 		   prodofE, prodofM; 
+   logic 		   xzeroE, xzeroM;
+   logic 		   yzeroE, yzeroM;
+   logic 		   zzeroE, zzeroM;
+   logic 		   xdenormE, xdenormM;
+   logic 		   ydenormE, ydenormM;
+   logic 		   zdenormE, zdenormM;
+   logic 		   xinfE, xinfM;
+   logic 		   yinfE, yinfM;
+   logic 		   zinfE, zinfM;
+   logic 		   xnanE, xnanM;
+   logic 		   ynanE, ynanM;
+   logic 		   znanE, znanM;
+   logic 		   nanE, nanM;
+   logic [8:0] 		   sumshiftE, sumshiftM;
+   logic 		   sumshiftzeroE, sumshiftzeroM;
+   logic 		   prodinfE, prodinfM;
+   logic [63:0] 	   FmaResultM, FmaResultW;
+   logic [4:0] 		   FmaFlagsM, FmaFlagsW;
+   
+   // add/cvt signals
+   logic [63:0] 	   AddSumE, AddSumTcE;
+   logic [3:0] 		   AddSelInvE;
+   logic [10:0] 	   AddExpPostSumE;
+   logic 		   AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE;
+   logic 		   AddDenormInE, AddSwapE, AddNormOvflowE, AddSignAE;
+   logic 		   AddConvertE;
+   logic [63:0] 	   AddFloat1E, AddFloat2E;
+   logic [11:0] 	   AddExp1DenormE, AddExp2DenormE;
+   logic [10:0] 	   AddExponentE;
+   logic [2:0] 		   AddRmE;
+   logic [3:0] 		   AddOpTypeE;
+   logic 		   AddPE, AddOvEnE, AddUnEnE;    
+   logic 		   AddDenormM;
+   logic [63:0] 	   AddSumM, AddSumTcM;
+   logic [3:0] 		   AddSelInvM;
+   logic [10:0] 	   AddExpPostSumM;
+   logic 		   AddCorrSignM, AddOp1NormM, AddOp2NormM, AddOpANormM, AddOpBNormM, AddInvalidM;
+   logic 		   AddDenormInM, AddSwapM, AddNormOvflowM, AddSignAM;
+   logic 		   AddConvertM, AddSignM;
+   logic [63:0] 	   AddFloat1M, AddFloat2M;
+   logic [11:0] 	   AddExp1DenormM, AddExp2DenormM;
+   logic [10:0] 	   AddExponentM;
+   logic [63:0] 	   AddOp1M, AddOp2M;
+   logic [2:0] 		   AddRmM;
+   logic [3:0] 		   AddOpTypeM;
+   logic 		   AddPM, AddOvEnM, AddUnEnM;  
+   logic [63:0] 	   FAddResultM, FAddResultW;
+   logic [4:0] 		   FAddFlagsM, FAddFlagsW;
+   
+   // cmp signals 
+   logic [7:0] 		   WE, WM;
+   logic [7:0] 		   XE, XM;
+   logic 		   ANaNE, ANaNM;
+   logic 		   BNaNE, BNaNM;
+   logic 		   AzeroE, AzeroM;
+   logic 		   BzeroE, BzeroM;
+   logic 		   CmpInvalidM, CmpInvalidW;
+   logic [1:0] 		   CmpFCCM, CmpFCCW; 
+   logic [63:0] 	   FCmpResultM, FCmpResultW;
+   
+   // fsgn signals
+   logic [63:0] 	   SgnResultE, SgnResultM, SgnResultW;
+   logic [4:0] 		   SgnFlagsE, SgnFlagsM, SgnFlagsW;
+   
+   // instantiation of W stage regfile signals
+   logic [`XLEN-1:0] 	   SrcAW;
+   
+   // classify signals
+   logic [63:0] 	   ClassResultE, ClassResultM, ClassResultW;
+   
+   // 64-bit FPU result   
+   logic [63:0] 	   FPUResult64W, FPUResult64E;                                           
+   logic [4:0] 		   FPUFlagsW;
+   
+   // pipeline control logic
+   logic 		   PipeEnableDE;
+   logic 		   PipeEnableEM;
+   logic 		   PipeEnableMW;
+   logic 		   PipeClearDE;
+   logic 		   PipeClearEM;
+   logic 		   PipeClearMW;
+   
+   // temporarily assign pipe clear and enable signals
+   // to never flush & always be running
+   localparam PipeClear = 1'b0;
+   localparam PipeEnable = 1'b1;
+   always_comb begin      
+      PipeEnableDE = ~StallE;
+      PipeEnableEM = ~StallM;
+      PipeEnableMW = ~StallW;
+      PipeClearDE = FlushE;
+      PipeClearEM = FlushM;
+      PipeClearMW = FlushW;      
+   end
+   
+   //DECODE STAGE
+   
+   // Hazard unit for FPU
+   fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
+   
+   // top-level controller for FPU
+   fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
+   
+   // regfile instantiation
    FPregfile fpregfile (clk, reset, FWriteEnW,
 			InstrD[19:15], InstrD[24:20], InstrD[31:27], RdW,
 			FPUResult64W,
 			FRD1D, FRD2D, FRD3D);	
-
-
-
-
-
-
-
-
-
-  //*****************
-  //fpregfile D/E pipe registers
-  //*****************
-  flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E);
-  flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E);
-  flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E);
-
-  //*****************
-  //other  D/E pipe registers
-  //*****************
-  flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE);
-  flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
-  flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
-  flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
-  flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
-  flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE);
-  flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
-  flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
-  flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
-  flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
-  flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
-  flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
-  flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
-  flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
-
-
-
-
-
-
-
-
-
-
-
-
-
-  //EXECUTION STAGE
-
-
-
-  // input muxs for forwarding
-  mux4  #(64)  FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE);
-  mux3  #(64)  FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E);
-  mux2  #(64)  FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
-  mux2  #(64)  FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
-
-  fma1 fma1 (.*);
-
-  //first and only instance of floating-point divider
-  fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .*);
-
-  //first of two-stage instance of floating-point add/cvt unit
-  fpuaddcvt1 fpadd1 (.*);
-
-  //first of two-stage instance of floating-point comparator
-  fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]);
-
-  //first and only instance of floating-point sign converter
-  fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
-
-  //first and only instance of floating-point classify unit
-  fpuclassify fpuclass (.*);
-
-  
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  //*****************
-  //fpregfile D/E pipe registers
-  //*****************
-  flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M);
-  flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M);
-  flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);
-
-  //*****************
-  //fma E/M pipe registers
-  //*****************  
-  flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM); 
-  flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM); 
-  flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM); 
-  flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM); 
-  flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM); 
-  flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM);  
-  flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM); 
-  flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM); 
-  flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM); 
-  flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); 
-  flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM); 
-  flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM); 
-  flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM); 
-  flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM); 
-  flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM); 
-  flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); 
-  flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM); 
-  flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM); 
-  flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); 
-  flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM); 
-  flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM); 
-  flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM); 
-  flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM); 
-  flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM); 
-  flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM); 
-
-  //*****************
-  //fpadd E/M pipe registers
-  //*****************
-  flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); 
-  flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); 
-  flopenrc #(4)  EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); 
-  flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); 
-  flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); 
-  flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM); 
-  flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); 
-  flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); 
-  flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); 
-  flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); 
-  flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); 
-  flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); 
-  flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM); 
-  flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); 
-  flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM); 
-  flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); 
-  flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); 
-  flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); 
-  flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); 
-  flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); 
-  flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); 
-  flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); 
-  flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); 
-  flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); 
-  flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM); 
-
-  //*****************
-  //fpcmp E/M pipe registers
-  //*****************
-  flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM); 
-  flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); 
-  flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM); 
-  flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); 
-  flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM); 
-  flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); 
-
-  //put this in for the event we want to delay fsgn - will otherwise bypass
-  //*****************
-  //fpsgn E/M pipe registers
-  //***************** 
-  flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
-  flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);
-
-  //*****************
-  //other E/M pipe registers
-  //*****************
-  flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM);
-  flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
-  flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
-  flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
-  flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
-  flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
-  flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
-  flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);
-
-  //*****************
-  //fpuclassify E/M pipe registers
-  //***************** 
-  flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM);
-
-
-
-
-
-
-
-
-  //BEGIN MEMORY STAGE
-
-  assign FWriteDataM = FInput1M;
-
-  mux2  #(64)  FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
-
-  fma2 fma2(.*);
-
-  //second instance of two-stage floating-point add/cvt unit
-  fpuaddcvt2 fpadd2 (.*);
-
-  //second instance of two-stage floating-point comparator
-  fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*);
-
-
-
-
-
-
-
-
-
-
-  
-  //*****************
-  //fma M/W pipe registers
-  //*****************
-  flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW); 
-  flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW); 
-
-  //*****************
-  //fpdiv M/W pipe registers
-  //*****************
-  flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW); 
-  flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
-  flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); 
-
-  //*****************
-  //fpadd M/W pipe registers
-  //*****************
-  flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW); 
-  flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW); 
-
-  //*****************
-  //fpcmp M/W pipe registers
-  //*****************
-  flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW); 
-  flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW); 
-  flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW); 
-
-  //*****************
-  //fpsgn M/W pipe registers
-  //***************** 
-  flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW);
-  flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW);
-
-  //*****************
-  //other M/W pipe registers
-  //*****************
-  flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
-  flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
-  flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
-  flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
-  flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
-  flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
-  flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
-
-
-  //*****************
-  //fpuclassify M/W pipe registers
-  //***************** 
-  flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW);
-
-
-
-
-
-
-
+   
+   //*****************
+   // fpregfile D/E pipe registers
+   //*****************
+   flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E);
+   flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E);
+   flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E);
+   
+   //*****************
+   // other  D/E pipe registers
+   //*****************
+   flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE);
+   flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
+   flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
+   flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
+   flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
+   flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE);
+   flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
+   flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
+   flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
+   flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
+   flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
+   flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
+   flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
+   flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
+   
+   //EXECUTION STAGE
+   
+   // input muxs for forwarding
+   mux4  #(64)  FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, SrcAM, FForwardInput1E, FInput1tmpE);
+   mux3  #(64)  FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E);
+   mux2  #(64)  FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
+   mux2  #(64)  FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
+   
+   fma1 fma1 (.*);
+   
+   // first and only instance of floating-point divider
+   logic fpdivClk;
+   
+   clockgater fpdivclkg(.E(FDivStartE),
+			.SE(DivBusyM),
+			.CLK(clk),
+			.ECLK(fpdivClk));
+   
+   fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk));
+   
+   // first of two-stage instance of floating-point add/cvt unit
+   fpuaddcvt1 fpadd1 (.*);
+   
+   // first of two-stage instance of floating-point comparator
+   fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]);
+   
+   // first and only instance of floating-point sign converter
+   fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
+   
+   // first and only instance of floating-point classify unit
+   fpuclassify fpuclass (.*);
+   
+   //*****************
+   //fpregfile D/E pipe registers
+   //*****************
+   flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M);
+   flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M);
+   flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);
+   
+   //*****************
+   // fma E/M pipe registers
+   //*****************  
+   flopenrc #(13) EMRegFma1(clk, reset, PipeClearEM, PipeEnableEM, aligncntE, aligncntM); 
+   flopenrc #(106) EMRegFma2(clk, reset, PipeClearEM, PipeEnableEM, rE, rM); 
+   flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, sE, sM); 
+   flopenrc #(164) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, tE, tM); 
+   flopenrc #(9) EMRegFma5(clk, reset, PipeClearEM, PipeEnableEM, normcntE, normcntM); 
+   flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, aeE, aeM);  
+   flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, bsE, bsM); 
+   flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, killprodE, killprodM); 
+   flopenrc #(1) EMRegFma9(clk, reset, PipeClearEM, PipeEnableEM, prodofE, prodofM); 
+   flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, xzeroE, xzeroM); 
+   flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, yzeroE, yzeroM); 
+   flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, zzeroE, zzeroM); 
+   flopenrc #(1) EMRegFma13(clk, reset, PipeClearEM, PipeEnableEM, xdenormE, xdenormM); 
+   flopenrc #(1) EMRegFma14(clk, reset, PipeClearEM, PipeEnableEM, ydenormE, ydenormM); 
+   flopenrc #(1) EMRegFma15(clk, reset, PipeClearEM, PipeEnableEM, zdenormE, zdenormM); 
+   flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, xinfE, xinfM); 
+   flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, yinfE, yinfM); 
+   flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, zinfE, zinfM); 
+   flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, xnanE, xnanM); 
+   flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, ynanE, ynanM); 
+   flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, znanE, znanM); 
+   flopenrc #(1) EMRegFma22(clk, reset, PipeClearEM, PipeEnableEM, nanE, nanM); 
+   flopenrc #(9) EMRegFma23(clk, reset, PipeClearEM, PipeEnableEM, sumshiftE, sumshiftM); 
+   flopenrc #(1) EMRegFma24(clk, reset, PipeClearEM, PipeEnableEM, sumshiftzeroE, sumshiftzeroM); 
+   flopenrc #(1) EMRegFma25(clk, reset, PipeClearEM, PipeEnableEM, prodinfE, prodinfM); 
+   
+   //*****************
+   // fpadd E/M pipe registers
+   //*****************
+   flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); 
+   flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); 
+   flopenrc #(4)  EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); 
+   flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); 
+   flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); 
+   flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM); 
+   flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); 
+   flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); 
+   flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); 
+   flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); 
+   flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); 
+   flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); 
+   flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM); 
+   flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); 
+   flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM); 
+   flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); 
+   flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); 
+   flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); 
+   flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); 
+   flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); 
+   flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); 
+   flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); 
+   flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); 
+   flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); 
+   flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM); 
+   
+   //*****************
+   // fpcmp E/M pipe registers
+   //*****************
+   flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM); 
+   flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); 
+   flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM); 
+   flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); 
+   flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM); 
+   flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); 
+   
+   // put this in for the event we want to delay fsgn - will otherwise bypass
+   //*****************
+   // fpsgn E/M pipe registers
+   //***************** 
+   flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
+   flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);
+   
+   //*****************
+   // other E/M pipe registers
+   //*****************
+   flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM);
+   flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
+   flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
+   flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
+   flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
+   flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
+   flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
+   flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);
+   
+   //*****************
+   // fpuclassify E/M pipe registers
+   //***************** 
+   flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM);
+   
+   //BEGIN MEMORY STAGE
+   
+   assign FWriteDataM = FInput1M;
+   
+   mux2  #(64)  FLoadStoreResultMux(HRDATA, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
+   
+   fma2 fma2(.*);
+   
+   // second instance of two-stage floating-point add/cvt unit
+   fpuaddcvt2 fpadd2 (.*);
+   
+   // second instance of two-stage floating-point comparator
+   fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), 
+		   .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*);
+   
+   //*****************
+   // fma M/W pipe registers
+   //*****************
+   flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW); 
+   flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW); 
+   
+   //*****************
+   // fpdiv M/W pipe registers
+   //*****************
+   flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW); 
+   flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
+   flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); 
+   
+   //*****************
+   // fpadd M/W pipe registers
+   //*****************
+   flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW); 
+   flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW); 
+   
+   //*****************
+   // fpcmp M/W pipe registers
+   //*****************
+   flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW); 
+   flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW); 
+   flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW); 
+   
+   //*****************
+   // fpsgn M/W pipe registers
+   //***************** 
+   flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW);
+   flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW);
+   
+   //*****************
+   // other M/W pipe registers
+   //*****************
+   flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
+   flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
+   flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
+   flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
+   flopenrc #(`XLEN) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, SrcAM, SrcAW);
+   flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
+   flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
+   
+   //*****************
+   // fpuclassify M/W pipe registers
+   //***************** 
+   flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW);
 
   //#########################################
-  //BEGIN WRITEBACK STAGE
+  // BEGIN WRITEBACK STAGE
   //#########################################
-
-  always_comb begin
-	case (FResultSelW)
-		// div/sqrt
-		3'b000 : FPUFlagsW = FDivFlagsW;
-		// cmp		
-		3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
-		//fma/mult
-		3'b010 : FPUFlagsW = FmaFlagsW;
-		// sgn inj
-		3'b011 : FPUFlagsW = SgnFlagsW;
-		// add/sub/cnvt
-		3'b100 : FPUFlagsW = FAddFlagsW;
-		// classify
-		3'b101 : FPUFlagsW = 5'b0;
-		// output SrcAW
-		3'b110 : FPUFlagsW = 5'b0;
-		// output FRD1
-		3'b111 : FPUFlagsW = 5'b0;
-		default : FPUFlagsW = 5'bxxxxx;
-	endcase
-  end
-
-
-  always_comb begin
-	case (FResultSelW)
-		// div/sqrt
-		3'b000 : FPUResult64W = FDivResultW;
-		// cmp		
-		3'b001 : FPUResult64W = FCmpResultW;
-		//fma/mult
-		3'b010 : FPUResult64W = FmaResultW;
-		// sgn inj
-		3'b011 : FPUResult64W = SgnResultW;
-		// add/sub/cnvt
-		3'b100 : FPUResult64W = FAddResultW;
-		// classify
-		3'b101 : FPUResult64W = ClassResultW;
-		// output SrcAW
-		3'b110 : FPUResult64W = SrcAW;
-		// Load/Store/Move to FP-register
-		3'b111 : FPUResult64W = FLoadStoreResultW;
-		default : FPUResult64W = {64{1'bx}};
-	endcase
-  end
-  //interface between XLEN size datapath and double-precision sized
-  //floating-point results
-  //
-  //define offsets for LSB zero extension or truncation
-  always_comb begin
-           
-  //zero extension 
+   
+   always_comb begin
+      case (FResultSelW)
+	// div/sqrt
+	3'b000 : FPUFlagsW = FDivFlagsW;
+	// cmp		
+	3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
+	//fma/mult
+	3'b010 : FPUFlagsW = FmaFlagsW;
+	// sgn inj
+	3'b011 : FPUFlagsW = SgnFlagsW;
+	// add/sub/cnvt
+	3'b100 : FPUFlagsW = FAddFlagsW;
+	// classify
+	3'b101 : FPUFlagsW = 5'b0;
+	// output SrcAW
+	3'b110 : FPUFlagsW = 5'b0;
+	// output FRD1
+	3'b111 : FPUFlagsW = 5'b0;
+	default : FPUFlagsW = 5'bxxxxx;
+      endcase
+   end
+   
+   always_comb begin
+      case (FResultSelW)
+	// div/sqrt
+	3'b000 : FPUResult64W = FDivResultW;
+	// cmp		
+	3'b001 : FPUResult64W = FCmpResultW;
+	//fma/mult
+	3'b010 : FPUResult64W = FmaResultW;
+	// sgn inj
+	3'b011 : FPUResult64W = SgnResultW;
+	// add/sub/cnvt
+	3'b100 : FPUResult64W = FAddResultW;
+	// classify
+	3'b101 : FPUResult64W = ClassResultW;
+	// output SrcAW
+	3'b110 : FPUResult64W = SrcAW;
+	// Load/Store/Move to FP-register
+	3'b111 : FPUResult64W = FLoadStoreResultW;
+	default : FPUResult64W = {64{1'bx}};
+      endcase
+   end // always_comb
+   
+   // interface between XLEN size datapath and double-precision sized
+   // floating-point results
+   //
+   // define offsets for LSB zero extension or truncation
+   always_comb begin      
+      // zero extension 
       FPUResultW = FPUResult64W[63:64-`XLEN];
-      SetFflagsM = FPUFlagsW;
+      SetFflagsM = FPUFlagsW;      
+   end
+  
+endmodule // fpu
 
-  end  
-endmodule
diff --git a/wally-pipelined/src/generic/clockgater.sv b/wally-pipelined/src/generic/clockgater.sv
new file mode 100644
index 00000000..c06a1cbd
--- /dev/null
+++ b/wally-pipelined/src/generic/clockgater.sv
@@ -0,0 +1,46 @@
+///////////////////////////////////////////
+// clockgater.sv
+//
+// Written: Ross Thompson 9 January 2021
+// Modified: 
+//
+// Purpose: Clock gater model. Must use standard cell for synthesis.
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module clockgater
+  (input logic 	E,
+   input logic 	SE,
+   input logic 	CLK,
+   output logic ECLK);
+
+  // VERY IMPORTANT.
+  // This part functionally models a clock gater, but does not necessarily meet the timing constrains a real standard cell would.
+  // Do not use this in synthesis!
+
+  logic 	enable_q;
+  
+
+  always @(~CLK) begin
+    enable_q <= E | SE;
+  end
+  assign ECLK = enable_q & CLK;
+
+endmodule
diff --git a/wally-pipelined/src/generic/lzd.sv b/wally-pipelined/src/generic/lzd.sv
new file mode 100755
index 00000000..98642c15
--- /dev/null
+++ b/wally-pipelined/src/generic/lzd.sv
@@ -0,0 +1,195 @@
+///////////////////////////////////////////
+// lzd.sv
+//
+// Written: James.Stine@okstate.edu 1 February 2021
+// Modified: 
+//
+// Purpose: Integer Divide instructions
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-config.vh"
+/* verilator lint_off DECLFILENAME */
+
+// Original idea came from  V. G. Oklobdzija, "An algorithmic and novel
+// design of a leading zero detector circuit: comparison with logic
+// synthesis," in IEEE Transactions on Very Large Scale Integration
+// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
+// 10.1109/92.273153.
+
+// Modified to be more hierarchical
+
+module lzd2 (P, V, B);
+
+   input logic  [1:0] B;
+
+   output logic P;
+   output logic V;
+
+   assign V = B[0] | B[1];
+   assign P = B[0] & ~B[1];
+   
+endmodule // lz2
+
+module lzd_hier #(parameter WIDTH=8) 
+   (input logic [WIDTH-1:0]          B,
+    output logic [$clog2(WIDTH)-1:0] ZP,
+    output logic 		     ZV);
+
+   if (WIDTH == 128)
+     lzd128 lz127 (ZP, ZV, B);	      
+   else if (WIDTH == 64)
+     lzd64 lz64 (ZP, ZV, B);	   
+   else if (WIDTH == 32)
+     lzd32 lz32 (ZP, ZV, B);
+   else if (WIDTH == 16)
+     lzd16 lz16 (ZP, ZV, B);
+   else if (WIDTH == 8)
+     lzd8 lz8 (ZP, ZV, B);
+   else if (WIDTH == 4)
+     lzd4 lz4 (ZP, ZV, B);
+
+endmodule // lzd_hier
+
+module lzd4 (ZP, ZV, B);
+
+   input logic [3:0]  B;
+
+   logic  	       ZPa;
+   logic  	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [1:0]  ZP;
+   output logic        ZV;
+
+   lz2 l1(ZPa, ZVa, B[1:0]);
+   lz2 l2(ZPb, ZVb, B[3:2]);
+
+   assign ZP[0:0] = ZVb ? ZPb : ZPa;
+   assign ZP[1]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd4
+
+module lzd8 (ZP, ZV, B);
+
+   input logic [7:0]  B;
+
+   logic [1:0] 	       ZPa;
+   logic [1:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [2:0]  ZP;
+   output logic        ZV;
+
+   lz4 l1(ZPa, ZVa, B[3:0]);
+   lz4 l2(ZPb, ZVb, B[7:4]);
+
+   assign ZP[1:0] = ZVb ? ZPb : ZPa;
+   assign ZP[2]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd8
+
+module lzd16 (ZP, ZV, B);
+
+   input logic [15:0]  B;
+
+   logic [2:0] 	       ZPa;
+   logic [2:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [3:0]  ZP;
+   output logic        ZV;
+
+   lz8 l1(ZPa, ZVa, B[7:0]);
+   lz8 l2(ZPb, ZVb, B[15:8]);
+
+   assign ZP[2:0] = ZVb ? ZPb : ZPa;
+   assign ZP[3]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd16
+
+module lzd32 (ZP, ZV, B);
+
+   input logic [31:0] B;
+
+   logic [3:0] 	      ZPa;
+   logic [3:0] 	      ZPb;
+   logic 	      ZVa;
+   logic 	      ZVb;
+   
+   output logic [4:0] ZP;
+   output logic       ZV;
+   
+   lz16 l1(ZPa, ZVa, B[15:0]);
+   lz16 l2(ZPb, ZVb, B[31:16]);
+   
+   assign ZP[3:0] = ZVb ? ZPb : ZPa;
+   assign ZP[4]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd32
+
+module lzd64 (ZP, ZV, B);
+
+   input logic [63:0]  B;
+   
+   logic [4:0] 	       ZPa;
+   logic [4:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;
+   
+   output logic [5:0]  ZP;
+   output logic        ZV;
+   
+   lz32 l1(ZPa, ZVa, B[31:0]);
+   lz32 l2(ZPb, ZVb, B[63:32]);
+   
+   assign ZP[4:0] = ZVb ? ZPb : ZPa;
+   assign ZP[5]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd64
+
+module lzd128 (ZP, ZV, B);
+
+   input logic [127:0]  B;
+   
+   logic [5:0] 	       ZPa;
+   logic [5:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;
+   
+   output logic [6:0]  ZP;
+   output logic        ZV;
+   
+   lz64 l1(ZPa, ZVa, B[64:0]);
+   lz64 l2(ZPb, ZVb, B[127:63]);
+   
+   assign ZP[5:0] = ZVb ? ZPb : ZPa;
+   assign ZP[6]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lzd128
+
+/* verilator lint_on DECLFILENAME */
diff --git a/wally-pipelined/src/generic/lzd.sv~ b/wally-pipelined/src/generic/lzd.sv~
new file mode 100755
index 00000000..bfffe5e5
--- /dev/null
+++ b/wally-pipelined/src/generic/lzd.sv~
@@ -0,0 +1,195 @@
+///////////////////////////////////////////
+// lzd.sv
+//
+// Written: James.Stine@okstate.edu 1 February 2021
+// Modified: 
+//
+// Purpose: Integer Divide instructions
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-config.vh"
+/* verilator lint_off DECLFILENAME */
+
+// Original idea came from  V. G. Oklobdzija, "An algorithmic and novel
+// design of a leading zero detector circuit: comparison with logic
+// synthesis," in IEEE Transactions on Very Large Scale Integration
+// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
+// 10.1109/92.273153.
+
+// Modified to be more hierarchical
+
+module lz2 (P, V, B);
+
+   input logic  [1:0] B;
+
+   output logic P;
+   output logic V;
+
+   assign V = B[0] | B[1];
+   assign P = B[0] & ~B[1];
+   
+endmodule // lz2
+
+module lzd_hier #(parameter WIDTH=8) 
+   (input logic [WIDTH-1:0]          B,
+    output logic [$clog2(WIDTH)-1:0] ZP,
+    output logic 		     ZV);
+
+   if (WIDTH == 128)
+     lz128 lzd127 (ZP, ZV, B);	      
+   else if (WIDTH == 64)
+     lz64 lzd64 (ZP, ZV, B);	   
+   else if (WIDTH == 32)
+     lz32 lzd32 (ZP, ZV, B);
+   else if (WIDTH == 16)
+     lz16 lzd16 (ZP, ZV, B);
+   else if (WIDTH == 8)
+     lz8 lzd8 (ZP, ZV, B);
+   else if (WIDTH == 4)
+     lz4 lzd4 (ZP, ZV, B);
+
+endmodule // lzd_hier
+
+module lz4 (ZP, ZV, B);
+
+   input logic [3:0]  B;
+
+   logic  	       ZPa;
+   logic  	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [1:0]  ZP;
+   output logic        ZV;
+
+   lz2 l1(ZPa, ZVa, B[1:0]);
+   lz2 l2(ZPb, ZVb, B[3:2]);
+
+   assign ZP[0:0] = ZVb ? ZPb : ZPa;
+   assign ZP[1]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule 
+
+module lz8 (ZP, ZV, B);
+
+   input logic [7:0]  B;
+
+   logic [1:0] 	       ZPa;
+   logic [1:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [2:0]  ZP;
+   output logic        ZV;
+
+   lz4 l1(ZPa, ZVa, B[3:0]);
+   lz4 l2(ZPb, ZVb, B[7:4]);
+
+   assign ZP[1:0] = ZVb ? ZPb : ZPa;
+   assign ZP[2]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule 
+
+module lz16 (ZP, ZV, B);
+
+   input logic [15:0]  B;
+
+   logic [2:0] 	       ZPa;
+   logic [2:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;   
+
+   output logic [3:0]  ZP;
+   output logic        ZV;
+
+   lz8 l1(ZPa, ZVa, B[7:0]);
+   lz8 l2(ZPb, ZVb, B[15:8]);
+
+   assign ZP[2:0] = ZVb ? ZPb : ZPa;
+   assign ZP[3]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lz16
+
+module lz32 (ZP, ZV, B);
+
+   input logic [31:0] B;
+
+   logic [3:0] 	      ZPa;
+   logic [3:0] 	      ZPb;
+   logic 	      ZVa;
+   logic 	      ZVb;
+   
+   output logic [4:0] ZP;
+   output logic       ZV;
+   
+   lz16 l1(ZPa, ZVa, B[15:0]);
+   lz16 l2(ZPb, ZVb, B[31:16]);
+   
+   assign ZP[3:0] = ZVb ? ZPb : ZPa;
+   assign ZP[4]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lz32
+
+module lz64 (ZP, ZV, B);
+
+   input logic [63:0]  B;
+   
+   logic [4:0] 	       ZPa;
+   logic [4:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;
+   
+   output logic [5:0]  ZP;
+   output logic        ZV;
+   
+   lz32 l1(ZPa, ZVa, B[31:0]);
+   lz32 l2(ZPb, ZVb, B[63:32]);
+   
+   assign ZP[4:0] = ZVb ? ZPb : ZPa;
+   assign ZP[5]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lz64
+
+module lz128 (ZP, ZV, B);
+
+   input logic [127:0]  B;
+   
+   logic [5:0] 	       ZPa;
+   logic [5:0] 	       ZPb;
+   logic 	       ZVa;
+   logic 	       ZVb;
+   
+   output logic [6:0]  ZP;
+   output logic        ZV;
+   
+   lz64 l1(ZPa, ZVa, B[64:0]);
+   lz64 l2(ZPb, ZVb, B[127:63]);
+   
+   assign ZP[5:0] = ZVb ? ZPb : ZPa;
+   assign ZP[6]   = ~ZVb;
+   assign ZV = ZVa | ZVb;
+
+endmodule // lz128
+
+/* verilator lint_on DECLFILENAME */
diff --git a/wally-pipelined/src/generic/shift.sv b/wally-pipelined/src/generic/shift.sv
new file mode 100755
index 00000000..88152588
--- /dev/null
+++ b/wally-pipelined/src/generic/shift.sv
@@ -0,0 +1,76 @@
+///////////////////////////////////////////
+// shifters.sv
+//
+// Written: James.Stine@okstate.edu 1 February 2021
+// Modified: 
+//
+// Purpose: Integer Divide instructions
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-config.vh"
+/* verilator lint_off DECLFILENAME */
+/* verilator lint_off UNOPTFLAT */
+
+module shift_right #(parameter WIDTH=8) 
+   (input logic [WIDTH-1:0]         A,
+    input logic [$clog2(WIDTH)-1:0] Shift,
+    output logic [WIDTH-1:0] 	    Z);
+   
+   logic [WIDTH-1:0] 		    stage [$clog2(WIDTH):0];
+   logic 			    sign;   
+   genvar 			    i;
+
+   assign stage[0] = A;   
+   generate
+      for (i=0;i<$clog2(WIDTH);i=i+1)
+	begin : genbit
+	   mux2 #(WIDTH) mux_inst (stage[i], 
+				   {{(WIDTH/(2**(i+1))){1'b0}}, stage[i][WIDTH-1:WIDTH/(2**(i+1))]}, 
+				   Shift[$clog2(WIDTH)-i-1], 
+				   stage[i+1]);
+	end
+   endgenerate
+   assign Z = stage[$clog2(WIDTH)];   
+
+endmodule // shift_right
+
+module shift_left #(parameter WIDTH=8) 
+   (input logic [WIDTH-1:0]         A,
+    input logic [$clog2(WIDTH)-1:0] Shift,
+    output logic [WIDTH-1:0] 	    Z);
+   
+   logic [WIDTH-1:0] 		    stage [$clog2(WIDTH):0];
+   genvar 			    i;
+   
+   assign stage[0] = A;   
+   generate
+      for (i=0;i<$clog2(WIDTH);i=i+1)
+	begin : genbit
+	   mux2 #(WIDTH) mux_inst (stage[i], 
+				   {stage[i][WIDTH-1-WIDTH/(2**(i+1)):0], {(WIDTH/(2**(i+1))){1'b0}}}, 
+				   Shift[$clog2(WIDTH)-i-1], 
+				   stage[i+1]);
+	end
+   endgenerate
+   assign Z = stage[$clog2(WIDTH)];   
+
+endmodule // shift_left
+
+/* verilator lint_on DECLFILENAME */
+/* verilator lint_on UNOPTFLAT */
diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv
index de0f8143..92471c57 100644
--- a/wally-pipelined/src/ifu/bpred.sv
+++ b/wally-pipelined/src/ifu/bpred.sv
@@ -30,7 +30,8 @@
 
 module bpred 
   (input logic clk, reset,
-   input logic 		    StallF, StallD, StallE, FlushF, FlushD, FlushE,
+   input logic 		    StallF, StallD, StallE, StallM, StallW, 
+   input logic 		    FlushF, FlushD, FlushE, FlushM, FlushW,
    // Fetch stage
    // the prediction
    input logic [`XLEN-1:0]  PCNextF, // *** forgot to include this one on the I/O list
@@ -88,25 +89,29 @@ module bpred
       globalHistoryPredictor DirPredictor(.clk(clk),
 					  .reset(reset),
 					  .*, // Stalls and flushes
-					  .LookUpPC(PCNextF),
-					  .Prediction(BPPredF),
+					  .PCNextF(PCNextF),
+					  .BPPredF(BPPredF),
 					  // update
-					  .UpdatePC(PCE),
-					  .UpdateEN(InstrClassE[0] & ~StallE),
+					  .InstrClassE(InstrClassE),
+					  .BPInstrClassE(BPInstrClassE),
+					  .BPPredDirWrongE(BPPredDirWrongE),
+					  .PCE(PCE),
 					  .PCSrcE(PCSrcE),
-					  .UpdatePrediction(UpdateBPPredE));
+					  .UpdateBPPredE(UpdateBPPredE));
     end else if (`BPTYPE == "BPGSHARE") begin:Predictor
 
       gsharePredictor DirPredictor(.clk(clk),
-				   .reset(reset),
-				   .*, // Stalls and flushes
-				   .LookUpPC(PCNextF),
-				   .Prediction(BPPredF),
-				   // update
-				   .UpdatePC(PCE),
-				   .UpdateEN(InstrClassE[0] & ~StallE),
-				   .PCSrcE(PCSrcE),
-				   .UpdatePrediction(UpdateBPPredE));
+					  .reset(reset),
+					  .*, // Stalls and flushes
+					  .PCNextF(PCNextF),
+					  .BPPredF(BPPredF),
+					  // update
+					  .InstrClassE(InstrClassE),
+					  .BPInstrClassE(BPInstrClassE),
+					  .BPPredDirWrongE(BPPredDirWrongE),
+					  .PCE(PCE),
+					  .PCSrcE(PCSrcE),
+					  .UpdateBPPredE(UpdateBPPredE));
     end 
     else if (`BPTYPE == "BPLOCALPAg") begin:Predictor
 
@@ -190,14 +195,14 @@ module bpred
   flopenrc #(2) BPPredRegD(.clk(clk),
 			   .reset(reset),
 			   .en(~StallD),
-			   .clear(FlushD),
+			   .clear(1'b0),
 			   .d(BPPredF),
 			   .q(BPPredD));
 
   flopenrc #(2) BPPredRegE(.clk(clk),
 			   .reset(reset),
 			   .en(~StallE),
-			   .clear(FlushE),
+			   .clear(1'b0),
 			   .d(BPPredD),
 			   .q(BPPredE));
 
diff --git a/wally-pipelined/src/ifu/globalHistoryPredictor.sv b/wally-pipelined/src/ifu/globalHistoryPredictor.sv
index 087458df..516de633 100644
--- a/wally-pipelined/src/ifu/globalHistoryPredictor.sv
+++ b/wally-pipelined/src/ifu/globalHistoryPredictor.sv
@@ -32,76 +32,89 @@ module globalHistoryPredictor
     )
   (input logic clk,
    input logic 		   reset,
-   input logic 		    StallF, StallD, StallE, FlushF, FlushD, FlushE,
-   input logic [`XLEN-1:0] LookUpPC,
-   output logic [1:0] 	   Prediction,
+   input logic 		   StallF, StallD, StallE, FlushF, FlushD, FlushE,
+   input logic [`XLEN-1:0] PCNextF,
+   output logic [1:0] 	   BPPredF,
    // update
-   input logic [`XLEN-1:0] UpdatePC,
-   input logic 		   UpdateEN, PCSrcE, 
-   input logic [1:0] 	   UpdatePrediction
-   
+   input logic [4:0] 	   InstrClassE,
+   input logic [4:0] 	   BPInstrClassE,
+   input logic [4:0] 	   BPInstrClassD,
+   input logic [4:0] 	   BPInstrClassF, 
+   input logic 		   BPPredDirWrongE,
+
+   input logic [`XLEN-1:0] PCE,
+   input logic 		   PCSrcE,
+   input logic [1:0] 	   UpdateBPPredE
+  
    );
-   logic [k-1:0] GHRF, GHRFNext;
-   assign GHRFNext = {PCSrcE, GHRF[k-1:1]}; 
+  logic [k+1:0] 	   GHR, GHRNext;
+  logic [k-1:0] 	   PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1;
+  logic 		   PHTUpdateEN;
+  logic 		   BPClassWrongNonCFI;
+  logic 		   BPClassWrongCFI;
+  logic 		   BPClassRightNonCFI;
 
-    flopenr #(k) GlobalHistoryRegister(.clk(clk),
-            .reset(reset),
-            .en(UpdateEN),
-            .d(GHRFNext),
-            .q(GHRF));
+  logic [6:0] 		   GHRMuxSel;
+  logic 		   GHRUpdateEN;
+  logic [k-1:0] 	   GHRLookup;
 
+  assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0];
+  assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0];
+  assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0];
+  assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE;
+  assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE;
+  
+  
+  // GHR update selection, 1 hot encoded.
+  assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight);
+  assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0];
+  assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0];
+  assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]);
+  assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0];
+  assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0];
+  assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight));
+  assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF;
 
+  // hoping this created a AND-OR mux.
+  always_comb begin
+    case (GHRMuxSel) 
+      7'b000_0001: GHRNext = GHR[k-1+2:0];  // no change
+      7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update
+      7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1
+      7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction
+      7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2
+      7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1
+      7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update
+      default: GHRNext = GHR[k-1+2:0];
+    endcase
+  end
 
-  logic [1:0] 		   PredictionMemory;
-  logic 		   DoForwarding, DoForwardingF;
-  logic [1:0] 		   UpdatePredictionF;
- 
+  flopenr #(k+2) GlobalHistoryRegister(.clk(clk),
+				       .reset(reset),
+				       .en((GHRUpdateEN)),
+				       .d(GHRNext),
+				       .q(GHR));
 
+  // if actively updating the GHR at the time of prediction we want to us
+  // GHRNext as the lookup rather than GHR.
+
+  assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0];
+  assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1];  
+  assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0;
+  assign PHTUpdateEN = InstrClassE[0] & ~StallE;
+
+  assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0];
+  
   // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT 
-  // GHR referes to the address that the past k branches points to in the prediction stage 
-  // GHRE refers to the address that the past k branches points to in the exectution stage
-    SRAM2P1R1W #(k, 2) PHT(.clk(clk),
-				.reset(reset),
-				.RA1(GHRF),
-				.RD1(PredictionMemory),
-				.REN1(~StallF),
-				.WA1(GHRFNext),
-				.WD1(UpdatePrediction),
-				.WEN1(UpdateEN),
-				.BitWEN1(2'b11));
+  SRAM2P1R1W #(k, 2) PHT(.clk(clk),
+			 .reset(reset),
+			 //.RA1(GHR[k-1:0]),
+			 .RA1(GHRLookup),
+			 .RD1(BPPredF),
+			 .REN1(~StallF),
+			 .WA1(PHTUpdateAdr),
+			 .WD1(UpdateBPPredE),
+			 .WEN1(PHTUpdateEN),
+			 .BitWEN1(2'b11));
 
-
-  // need to forward when updating to the same address as reading.
-  // first we compare to see if the update and lookup addreses are the same
-  assign DoForwarding = GHRF == GHRFNext;
-
-  // register the update value and the forwarding signal into the Fetch stage
-  // TODO: add stall logic ***
-  flopr #(1) DoForwardingReg(.clk(clk),
-			     .reset(reset),
-			     .d(DoForwarding),
-			     .q(DoForwardingF));
-  
-  flopr #(2) UpdatePredictionReg(.clk(clk),
-				 .reset(reset),
-				 .d(UpdatePrediction),
-				 .q(UpdatePredictionF));
-
-  assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory;
-  
-  //pipeline for GHR
-  /*flopenrc #(k) GHRDReg(.clk(clk),
-      .reset(reset),
-      .en(~StallD),
-      .clear(FlushD),
-      .d(GHRF),
-      .q(GHRD));
-
-  flopenrc #(k) GHREReg(.clk(clk),
-        .reset(reset),
-        .en(~StallE),
-        .clear(FlushE),
-        .d(GHRD),
-        .q(GHRE));
-*/
 endmodule
diff --git a/wally-pipelined/src/ifu/gshare.sv b/wally-pipelined/src/ifu/gshare.sv
deleted file mode 100644
index 4d31e519..00000000
--- a/wally-pipelined/src/ifu/gshare.sv
+++ /dev/null
@@ -1,128 +0,0 @@
-///////////////////////////////////////////
-// gshare.sv
-//
-// Written: Shreya Sanghai
-// Email: ssanghai@hmc.edu
-// Created: March 16, 2021
-// Modified: 
-//
-// Purpose: Gshare predictor with parameterized global history register
-// 
-// A component of the Wally configurable RISC-V project.
-// 
-// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
-// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
-// is furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
-// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
-// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-///////////////////////////////////////////
-
-`include "wally-config.vh"
-
-module gsharePredictor
-  #(parameter int k = 10
-    )
-  (input logic clk,
-   input logic 		   reset,
-   input logic 		   StallF, StallD, StallE, FlushF, FlushD, FlushE,
-   input logic [`XLEN-1:0] LookUpPC,
-   output logic [1:0] 	   Prediction,
-   // update
-   input logic [`XLEN-1:0] UpdatePC,
-   input logic 		   UpdateEN, PCSrcE,
-   input logic [1:0] 	   UpdatePrediction
-  
-   );
-
-  logic [k-1:0] 	   GHRF, GHRFNext;
-  //logic [k-1:0] 	   LookUpPCIndexD, LookUpPCIndexE;
-  logic [k-1:0] 	   LookUpPCIndex, UpdatePCIndex;
-  logic [1:0] 		   PredictionMemory;
-  logic 		   DoForwarding, DoForwardingF;
-  logic [1:0] 		   UpdatePredictionF;
-
-  assign GHRFNext = {PCSrcE, GHRF[k-1:1]};
-  
-  flopenr #(k) GlobalHistoryRegister(.clk(clk),
-				     .reset(reset),
-				     .en(UpdateEN),
-				     .d(GHRFNext),
-				     .q(GHRF));
-
-
-  // for gshare xor the PC with the GHR 
-  assign UpdatePCIndex = GHRFNext ^ UpdatePC[k:1];
-  assign LookUpPCIndex = GHRF ^ LookUpPC[k:1];  
-  // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT 
-  // GHR referes to the address that the past k branches points to in the prediction stage 
-  // GHRE refers to the address that the past k branches points to in the exectution stage
-  SRAM2P1R1W #(k, 2) PHT(.clk(clk),
-			 .reset(reset),
-			 .RA1(LookUpPCIndex),
-			 .RD1(PredictionMemory),
-			 .REN1(~StallF),
-			 .WA1(UpdatePCIndex),
-			 .WD1(UpdatePrediction),
-			 .WEN1(UpdateEN),
-			 .BitWEN1(2'b11));
-
-
-  // need to forward when updating to the same address as reading.
-  // first we compare to see if the update and lookup addreses are the same
-  assign DoForwarding = LookUpPCIndex == UpdatePCIndex;
-
-  // register the update value and the forwarding signal into the Fetch stage
-  // TODO: add stall logic ***
-  flopr #(1) DoForwardingReg(.clk(clk),
-			     .reset(reset),
-			     .d(DoForwarding),
-			     .q(DoForwardingF));
-  
-  flopr #(2) UpdatePredictionReg(.clk(clk),
-				 .reset(reset),
-				 .d(UpdatePrediction),
-				 .q(UpdatePredictionF));
-
-  assign Prediction = DoForwardingF ? UpdatePredictionF : PredictionMemory;
-  
-  //pipeline for GHR
-/* -----\/----- EXCLUDED -----\/-----
-  flopenrc #(k) LookUpDReg(.clk(clk),
-			   .reset(reset),
-			   .en(~StallD),
-			   .clear(FlushD),
-			   .d(LookUpPCIndex),
-			   .q(LookUpPCIndexD));
-
-  flopenrc #(k) LookUpEReg(.clk(clk),
-			   .reset(reset),
-			   .en(~StallE),
-			   .clear(FlushE),
-			   .d(LookUpPCIndexD),
-			   .q(LookUpPCIndexE));
- -----/\----- EXCLUDED -----/\----- */
-
-/*  flopenrc #(k) GHRRegD(.clk(clk),
-			.reset(reset),
-			.en(~StallD),
-			.clear(FlushD),
-			.d(GHRF),
-			.q(GHRD));
-
-  flopenrc #(k) GHRRegE(.clk(clk),
-			.reset(reset),
-			.en(~StallE),
-			.clear(FlushE),
-			.d(GHRD),
-			.q(GHRE));
-  
-*/
-endmodule
diff --git a/wally-pipelined/src/ifu/gsharePredictor.sv b/wally-pipelined/src/ifu/gsharePredictor.sv
new file mode 100644
index 00000000..b4a60827
--- /dev/null
+++ b/wally-pipelined/src/ifu/gsharePredictor.sv
@@ -0,0 +1,120 @@
+///////////////////////////////////////////
+// globalHistoryPredictor.sv
+//
+// Written: Shreya Sanghai
+// Email: ssanghai@hmc.edu
+// Created: March 16, 2021
+// Modified: 
+//
+// Purpose: Gshare predictor with parameterized global history register
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module gsharePredictor
+  #(parameter int k = 10
+    )
+  (input logic clk,
+   input logic 		   reset,
+   input logic 		   StallF, StallD, StallE, FlushF, FlushD, FlushE,
+   input logic [`XLEN-1:0] PCNextF,
+   output logic [1:0] 	   BPPredF,
+   // update
+   input logic [4:0] 	   InstrClassE,
+   input logic [4:0] 	   BPInstrClassE,
+   input logic [4:0] 	   BPInstrClassD,
+   input logic [4:0] 	   BPInstrClassF, 
+   input logic 		   BPPredDirWrongE,
+
+   input logic [`XLEN-1:0] PCE,
+   input logic 		   PCSrcE,
+   input logic [1:0] 	   UpdateBPPredE
+  
+   );
+  logic [k+1:0] 	   GHR, GHRNext;
+  logic [k-1:0] 	   PHTUpdateAdr, PHTUpdateAdr0, PHTUpdateAdr1;
+  logic 		   PHTUpdateEN;
+  logic 		   BPClassWrongNonCFI;
+  logic 		   BPClassWrongCFI;
+  logic 		   BPClassRightNonCFI;
+
+  logic [6:0] 		   GHRMuxSel;
+  logic 		   GHRUpdateEN;
+  logic [k-1:0] 	   GHRLookup;
+
+  assign BPClassRightNonCFI = ~BPInstrClassE[0] & ~InstrClassE[0];
+  assign BPClassWrongCFI = ~BPInstrClassE[0] & InstrClassE[0];
+  assign BPClassWrongNonCFI = BPInstrClassE[0] & ~InstrClassE[0];
+  assign BPClassRightBPWrong = BPInstrClassE[0] & InstrClassE[0] & BPPredDirWrongE;
+  assign BPClassRightBPRight = BPInstrClassE[0] & InstrClassE[0] & ~BPPredDirWrongE;
+  
+  
+  // GHR update selection, 1 hot encoded.
+  assign GHRMuxSel[0] = ~BPInstrClassF[0] & (BPClassRightNonCFI | BPClassRightBPRight);
+  assign GHRMuxSel[1] = BPClassWrongCFI & ~BPInstrClassD[0];
+  assign GHRMuxSel[2] = BPClassWrongNonCFI & ~BPInstrClassD[0];
+  assign GHRMuxSel[3] = (BPClassRightBPWrong & ~BPInstrClassD[0]) | (BPClassWrongCFI & BPInstrClassD[0]);
+  assign GHRMuxSel[4] = BPClassWrongNonCFI & BPInstrClassD[0];
+  assign GHRMuxSel[5] = InstrClassE[0] & BPClassRightBPWrong & BPInstrClassD[0];
+  assign GHRMuxSel[6] = BPInstrClassF[0] & (BPClassRightNonCFI | (InstrClassE[0] & BPClassRightBPRight));
+  assign GHRUpdateEN = (| GHRMuxSel[5:1] & ~StallE) | GHRMuxSel[6] & ~StallF;
+
+  // hoping this created a AND-OR mux.
+  always_comb begin
+    case (GHRMuxSel) 
+      7'b000_0001: GHRNext = GHR[k-1+2:0];  // no change
+      7'b000_0010: GHRNext = {GHR[k-2+2:0], PCSrcE}; // branch update
+      7'b000_0100: GHRNext = {1'b0, GHR[k+1:1]}; // repair 1
+      7'b000_1000: GHRNext = {GHR[k-1+2:1], PCSrcE}; // branch update with mis prediction correction
+      7'b001_0000: GHRNext = {2'b00, GHR[k+1:2]}; // repair 2
+      7'b010_0000: GHRNext = {1'b0, GHR[k+1:2], PCSrcE}; // branch update + repair 1
+      7'b100_0000: GHRNext = {GHR[k-2+2:0], BPPredF[1]}; // speculative update
+      default: GHRNext = GHR[k-1+2:0];
+    endcase
+  end
+
+  flopenr #(k+2) GlobalHistoryRegister(.clk(clk),
+				       .reset(reset),
+				       .en((GHRUpdateEN)),
+				       .d(GHRNext),
+				       .q(GHR));
+
+  // if actively updating the GHR at the time of prediction we want to us
+  // GHRNext as the lookup rather than GHR.
+
+  assign PHTUpdateAdr0 = InstrClassE[0] ? GHR[k:1] : GHR[k-1:0];
+  assign PHTUpdateAdr1 = InstrClassE[0] ? GHR[k+1:2] : GHR[k:1];  
+  assign PHTUpdateAdr = BPInstrClassD[0] ? PHTUpdateAdr1 : PHTUpdateAdr0;
+  assign PHTUpdateEN = InstrClassE[0] & ~StallE;
+
+  assign GHRLookup = |GHRMuxSel[6:1] ? GHRNext[k-1:0] : GHR[k-1:0];
+  
+  // Make Prediction by reading the correct address in the PHT and also update the new address in the PHT 
+  SRAM2P1R1W #(k, 2) PHT(.clk(clk),
+			 .reset(reset),
+			 //.RA1(GHR[k-1:0]),
+			 .RA1(GHRLookup ^ PCNextF[k:1]),
+			 .RD1(BPPredF),
+			 .REN1(~StallF),
+			 .WA1(PHTUpdateAdr ^ PCE[k:1]),
+			 .WD1(UpdateBPPredE),
+			 .WEN1(PHTUpdateEN),
+			 .BitWEN1(2'b11));
+
+endmodule // gsharePredictor
diff --git a/wally-pipelined/src/ifu/icache.sv b/wally-pipelined/src/ifu/icache.sv
index 9e30a083..4f51edd7 100644
--- a/wally-pipelined/src/ifu/icache.sv
+++ b/wally-pipelined/src/ifu/icache.sv
@@ -154,15 +154,16 @@ module icachecontroller #(parameter LINESIZE = 256) (
   localparam STATE_MISS_SPILL_FETCH_DONE = 10; // write data into SRAM/LUT
   localparam STATE_MISS_SPILL_READ1 = 11; // read block 0 from SRAM/LUT
   localparam STATE_MISS_SPILL_2 = 12; // return to ready if hit or do second block update.
-  localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 13; // miss on block 1, issue read to AHB and wait
-  localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 14; // write data to SRAM/LUT
-  localparam STATE_MISS_SPILL_MERGE = 15; // read block 0 of CPU access,
+  localparam STATE_MISS_SPILL_2_START = 13; // return to ready if hit or do second block update.  
+  localparam STATE_MISS_SPILL_MISS_FETCH_WDV = 14; // miss on block 1, issue read to AHB and wait
+  localparam STATE_MISS_SPILL_MISS_FETCH_DONE = 15; // write data to SRAM/LUT
+  localparam STATE_MISS_SPILL_MERGE = 16; // read block 0 of CPU access,
 
-  localparam STATE_MISS_SPILL_FINAL = 16; // this state replicates STATE_READY's replay of the
+  localparam STATE_MISS_SPILL_FINAL = 17; // this state replicates STATE_READY's replay of the
   // spill access but does nto consider spill.  It also does not do another operation.
   
 
-  localparam STATE_INVALIDATE = 17; // *** not sure if invalidate or evict? invalidate by cache block or address?
+  localparam STATE_INVALIDATE = 18; // *** not sure if invalidate or evict? invalidate by cache block or address?
   
   localparam AHBByteLength = `XLEN / 8;
   localparam AHBOFFETWIDTH = $clog2(AHBByteLength);
@@ -380,11 +381,20 @@ module icachecontroller #(parameter LINESIZE = 256) (
 	PCMux = 2'b10;
 	UnalignedSelect = 1'b1;
 	spillSave = 1'b1; /// *** Could pipeline these to make it clearer in the fsm.
+	ICacheReadEn = 1'b1;
+	NextState = STATE_MISS_SPILL_2_START;
+      end
+      STATE_MISS_SPILL_2_START: begin
 	if (~hit) begin
 	  CntReset = 1'b1;
 	  NextState = STATE_MISS_SPILL_MISS_FETCH_WDV;
 	end else begin
-	  NextState = STATE_MISS_SPILL_FINAL;
+	  NextState = STATE_READY;
+	  ICacheReadEn = 1'b1;
+	  PCMux = 2'b00;
+	  UnalignedSelect = 1'b1;
+	  SavePC = 1'b1;
+	  ICacheStallF = 1'b0;	
 	end
       end
       STATE_MISS_SPILL_MISS_FETCH_WDV: begin
diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv
index 28f7597e..e0507b63 100644
--- a/wally-pipelined/src/ifu/ifu.sv
+++ b/wally-pipelined/src/ifu/ifu.sv
@@ -154,14 +154,7 @@ module ifu (
   generate 
     if (`BPRED_ENABLED == 1) begin : bpred
       // I am making the port connection explicit for now as I want to see them and they will be changing.
-      bpred bpred(.clk(clk),
-		  .reset(reset),
-		  .StallF(StallF),
-		  .StallD(StallD),
-		  .StallE(StallE),
-		  .FlushF(FlushF),
-		  .FlushD(FlushD),
-		  .FlushE(FlushE),
+      bpred bpred(.*,
 		  .PCNextF(PCNextF),
 		  .BPPredPCF(BPPredPCF),
 		  .SelBPPredF(SelBPPredF),
diff --git a/wally-pipelined/src/mmu/cam_line.sv b/wally-pipelined/src/mmu/cam_line.sv
index b7577573..6bab0b60 100644
--- a/wally-pipelined/src/mmu/cam_line.sv
+++ b/wally-pipelined/src/mmu/cam_line.sv
@@ -2,7 +2,9 @@
 // cam_line.sv
 //
 // Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021
-// Modified:
+// Modified: kmacsaigoren@hmc.edu 1 June 2021
+//            Implemented SV48 on top of SV39. This included adding SvMode input signal and the wally constants
+//            Mostly this was done to make the PageNumberMixer work.
 //
 // Purpose: CAM line for the translation lookaside buffer (TLB)
 //          Determines whether a virtual address matches the stored key.
@@ -24,12 +26,17 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
+`include "wally-constants.vh"
+
 module cam_line #(parameter KEY_BITS = 20,
                   parameter HIGH_SEGMENT_BITS = 10) (
   input                 clk, reset,
 
+  // input to scheck which SvMode is running
+  input [`SVMODE_BITS-1:0] SvMode,
+  
   // The requested page number to compare against the key
-  input  [KEY_BITS-1:0] VirtualPageNumber,
+  input [KEY_BITS-1:0]  VirtualPageNumber,
 
   // Signals to write a new entry to this line
   input                 CAMLineWrite,
@@ -38,10 +45,11 @@ module cam_line #(parameter KEY_BITS = 20,
   // Flush this line (set valid to 0)
   input                 TLBFlush,
 
-  // This entry is a key for a giga, mega, or kilopage.
+  // This entry is a key for a tera, giga, mega, or kilopage.
   // PageType == 2'b00 --> kilopage
   // PageType == 2'b01 --> megapage
-  // PageType == 2'b11 --> gigapage
+  // PageType == 2'b10 --> gigapage
+  // PageType == 2'b11 --> terapage
   output [1:0]          PageType,  // *** should this be the stored version or the always updated one?
   output                Match
 );
@@ -67,9 +75,9 @@ module cam_line #(parameter KEY_BITS = 20,
   flopenr #(KEY_BITS) keyflop(clk, reset, CAMLineWrite, VirtualPageNumber, Key);
 
   // Calculate the actual query key based on the input key and the page type.
-  // For example, a megapage in sv39 only cares about VPN2 and VPN1, so VPN0
+  // For example, a megapage in SV39 only cares about VPN2 and VPN1, so VPN0
   // should automatically match.
-  page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, VirtualPageNumberQuery);
+  page_number_mixer #(KEY_BITS, HIGH_SEGMENT_BITS) mixer(VirtualPageNumber, Key, PageType, SvMode, VirtualPageNumberQuery);
 
   assign Match = ({1'b1, VirtualPageNumberQuery} == {Valid, Key});
 
diff --git a/wally-pipelined/src/mmu/page_number_mixer.sv b/wally-pipelined/src/mmu/page_number_mixer.sv
index 57b8e4b7..03851018 100644
--- a/wally-pipelined/src/mmu/page_number_mixer.sv
+++ b/wally-pipelined/src/mmu/page_number_mixer.sv
@@ -2,7 +2,11 @@
 // page_number_mixer.sv
 //
 // Written: tfleming@hmc.edu & jtorrey@hmc.edu 6 April 2021
-// Modified:
+// Modified: kmacsaigoren@hmc.edu 1 June 2021
+//              Implemented SV48 on top of SV39. This included adding a 3rd Segment to each of the pagenumbers,
+//              Ensuring that the BITS and HIGH_SEGMENT_BITS inputs were correct everywhere this module gets instatniated,
+//              Adding seveeral muxes to decide the bit selection to turn pagenumbers into segments based on SV mode,
+//              Adding support for terapage/newgigapage encoding.
 //
 // Purpose: Takes two page numbers and replaces segments of the first page
 //          number with segments from the second, based on the page type.
@@ -25,22 +29,29 @@
 ///////////////////////////////////////////
 
 `include "wally-config.vh"
+`include "wally-constants.vh"
 
 module page_number_mixer #(parameter BITS = 20,
                            parameter HIGH_SEGMENT_BITS = 10) (
-    input  [BITS-1:0] PageNumber,
-    input  [BITS-1:0] MixPageNumber,
-    input  [1:0]      PageType,
-    output [BITS-1:0] PageNumberCombined
+    input  [BITS-1:0]         PageNumber,
+    input  [BITS-1:0]         MixPageNumber,
+    input  [1:0]              PageType,
+    input  [`SVMODE_BITS-1:0] SvMode,
+
+    output [BITS-1:0]         PageNumberCombined
 );
 
+  // The upper segment might have a different width than the lower segments.
+  // For example, an SV39 PTE has 26 bits for PPN2 and 9 bits for the other
+  // segments. This is outside the 'if XLEN' b/c the constant is already configured
+  // to the correct value for the XLEN in the relevant wally-constants.vh file.
+  localparam LOW_SEGMENT_BITS = `VPN_SEGMENT_BITS;
+  // *** each time this module is implemented, low segment bits is either
+  // `VPN_SEGMENT_BITS or `PPN_LOW_SEGMENT_BITS (if it existed)
+  // in every mode so far, these are the same, so it's left as it is above. 
+
   generate
-    // *** Just checking XLEN is not enough to support sv39 AND sv48.
     if (`XLEN == 32) begin
-      // The upper segment might have a different width than the lower segments.
-      // For example, an sv39 PTE has 26 bits for PPN2 and 9 bits for the other
-      // segments.
-      localparam LOW_SEGMENT_BITS = (BITS - HIGH_SEGMENT_BITS);
 
       logic [HIGH_SEGMENT_BITS-1:0] Segment1, MixSegment1, Segment1Combined;
       logic [LOW_SEGMENT_BITS-1:0]  Segment0, MixSegment0, Segment0Combined;
@@ -58,28 +69,60 @@ module page_number_mixer #(parameter BITS = 20,
       // Reswizzle segments of the combined page number
       assign PageNumberCombined = {Segment1Combined, Segment0Combined};
     end else begin
-      // The upper segment might have a different width than the lower segments.
-      // For example, an sv39 PTE has 26 bits for PPN2 and 9 bits for the other
-      // segments.
-      localparam LOW_SEGMENT_BITS = (BITS - HIGH_SEGMENT_BITS) / 2;
 
-      logic [HIGH_SEGMENT_BITS-1:0] Segment2, MixSegment2, Segment2Combined;
+      // After segment 0 and 1 of the page number, the width of each segment is dependant on the SvMode.
+      // For this reason, each segment bus is the width of its widest value across each mode
+      // when a smaller value needs to be loaded in to a wider bus, it's loaded in the least significant bits
+      // and left padded with zeros. MAKE SURE that if a value is being padded with zeros here,
+      // that it's padded with zeros everywhere else in the MMU ans beyond to avoid false misses in the TLB.
+      logic [HIGH_SEGMENT_BITS-1:0] Segment3, MixSegment3, Segment3Combined;
+      logic [HIGH_SEGMENT_BITS + LOW_SEGMENT_BITS-1:0]  Segment2, MixSegment2, Segment2Combined;
       logic [LOW_SEGMENT_BITS-1:0]  Segment1, MixSegment1, Segment1Combined;
       logic [LOW_SEGMENT_BITS-1:0]  Segment0, MixSegment0, Segment0Combined;
+      
 
       // Unswizzle segments of the input page number
-      assign {Segment2, Segment1, Segment0} = PageNumber;
-      assign {MixSegment2, MixSegment1, MixSegment0} = MixPageNumber;
+      // *** these muxes assume that only Sv48 and SV39 are implemented in rv64. for future SV57 and up,
+      //      there will have to be more muxes to select which value each segment gets.
+      //      as a cool reminder: BITS is the width of the page number, virt or phys, coming into this module
+      //      while high segment bits is the width of the highest segment of that page number.
+      //      Note for future work: this module has to work with both VPNs and PPNs and due to their differing 
+      //         widths and the fact that the ppn has one longer segment at the top makes the muxes below very confusing.
+      //      Potentially very annoying thing for future workers: the number of bits in a ppn is always 44 (for SV39 and48)
+      //         but in SV57 and above, this might be a new longer length. In that case these selectors will most likely
+      //         become even more complicated and confusing.
+      assign Segment3 = (SvMode == `SV48) ? 
+                        PageNumber[BITS-1:3*LOW_SEGMENT_BITS] : // take the top segment or not
+                        {HIGH_SEGMENT_BITS{1'b0}}; // for virtual page numbers in SV39, both options should be zeros.
+      assign Segment2 = (SvMode == `SV48) ? 
+                        {{HIGH_SEGMENT_BITS{1'b0}}, PageNumber[3*LOW_SEGMENT_BITS-1:2*LOW_SEGMENT_BITS]} : // just take another low segment left padded with zeros.
+                        PageNumber[BITS-1:2*LOW_SEGMENT_BITS]; // otherwise take the rest of the PageNumber
+      assign Segment1 = PageNumber[2*LOW_SEGMENT_BITS-1:LOW_SEGMENT_BITS];
+      assign Segment0 = PageNumber[LOW_SEGMENT_BITS-1:0];
+
+
+      assign MixSegment3 = (SvMode == `SV48) ? 
+                        MixPageNumber[BITS-1:3*LOW_SEGMENT_BITS] : // take the top segment or not
+                        {HIGH_SEGMENT_BITS{1'b0}}; // for virtual page numbers in SV39, both options should be zeros.
+      assign MixSegment2 = (SvMode == `SV48) ? 
+                        {{HIGH_SEGMENT_BITS{1'b0}}, MixPageNumber[3*LOW_SEGMENT_BITS-1:2*LOW_SEGMENT_BITS]} : // just take another low segment left padded with zeros.
+                        MixPageNumber[BITS-1:2*LOW_SEGMENT_BITS]; // otherwise take the rest of the PageNumber
+      assign MixSegment1 = MixPageNumber[2*LOW_SEGMENT_BITS-1:LOW_SEGMENT_BITS];
+      assign MixSegment0 = MixPageNumber[LOW_SEGMENT_BITS-1:0];
+
 
       // Pass through the high segment
-      assign Segment2Combined = Segment2;
+      assign Segment3Combined = Segment3;
 
-      // Either pass through or zero out segments 1 and 0 based on the page type
-      mux2 #(LOW_SEGMENT_BITS) segment1mux(Segment1, MixSegment1, PageType[1], Segment1Combined);
-      mux2 #(LOW_SEGMENT_BITS) segment0mux(Segment0, MixSegment0, PageType[0], Segment0Combined);
+      // Either pass through or zero out lower segments based on the page type
+      assign Segment2Combined = (PageType[1] && PageType[0]) ? MixSegment2 : Segment2; // terapage (page == 11)
+      assign Segment1Combined = (PageType[1]) ? MixSegment1 : Segment1; // gigapage and higher (page == 10 or 11)
+      assign Segment0Combined = (PageType[1] || PageType[0]) ? MixSegment0 : Segment0; // megapage and higher (page == 01 or 10 or 11)
 
       // Reswizzle segments of the combined page number
-      assign PageNumberCombined = {Segment2Combined, Segment1Combined, Segment0Combined};
+      assign PageNumberCombined = (SvMode == `SV48) ? 
+                                  {Segment3Combined, Segment2Combined[LOW_SEGMENT_BITS-1:0], Segment1Combined, Segment0Combined} :
+                                  {Segment2Combined, Segment1Combined, Segment0Combined};
     end
   endgenerate
 endmodule
diff --git a/wally-pipelined/src/mmu/pagetablewalker.sv b/wally-pipelined/src/mmu/pagetablewalker.sv
index f2aada44..b0e4fe8e 100644
--- a/wally-pipelined/src/mmu/pagetablewalker.sv
+++ b/wally-pipelined/src/mmu/pagetablewalker.sv
@@ -2,7 +2,10 @@
 // pagetablewalker.sv
 //
 // Written: tfleming@hmc.edu 2 March 2021
-// Modified: 
+// Modified: kmacsaigoren@hmc.edu 1 June 2021
+//            implemented SV48 on top of SV39. This included, adding a level of the FSM for the extra page number segment
+//            adding support for terapage encoding, and for setting the TranslationPAdr using the new level,
+//            adding the internal SvMode signal
 //
 // Purpose: Page Table Walker
 //          Part of the Memory Management Unit (MMU)
@@ -70,6 +73,7 @@ module pagetablewalker (
   logic [`XLEN-1:0]     SavedPTE, CurrentPTE;
   logic [`PA_BITS-1:0]  TranslationPAdr;
   logic [`PPN_BITS-1:0] CurrentPPN;
+  logic [`SVMODE_BITS-1:0]  SvMode;
   logic                 MemStore;
 
   // PTE Control Bits
@@ -82,6 +86,8 @@ module pagetablewalker (
   logic [`XLEN-1:0] PageTableEntry;
   logic [1:0] PageType;
 
+  assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
+
   assign BasePageTablePPN = SATP_REGW[`PPN_BITS-1:0];
 
   assign MemStore = MemRWM[0];
@@ -105,11 +111,12 @@ module pagetablewalker (
   assign PageTypeF = PageType;
   assign PageTypeM = PageType;
 
-  localparam IDLE = 3'h0;
+  localparam LEVEL0 = 3'h0;
   localparam LEVEL1 = 3'h1;
-  localparam LEVEL0 = 3'h2;
-  localparam LEAF = 3'h3;
-  localparam FAULT = 3'h4;
+  // space left for more levels
+  localparam LEAF = 3'h5;
+  localparam IDLE = 3'h6;
+  localparam FAULT = 3'h7;
 
   logic [2:0] WalkerState, NextWalkerState;
 
@@ -208,18 +215,32 @@ module pagetablewalker (
       assign MMUPAdr = TranslationPAdr[31:0];
 
     end else begin
-      localparam LEVEL2 = 3'h5;
+      localparam LEVEL2 = 3'h2;
+      localparam LEVEL3 = 3'h3;
 
-      logic [8:0] VPN2, VPN1, VPN0;
+      logic [8:0] VPN3, VPN2, VPN1, VPN0;
 
-      logic GigapageMisaligned, BadGigapage;
+      logic TerapageMisaligned, GigapageMisaligned, BadTerapage, BadGigapage;
 
       flopenl #(3) mmureg(HCLK, ~HRESETn, 1'b1, NextWalkerState, IDLE, WalkerState);
 
       always_comb begin
         case (WalkerState)
-          IDLE:   if      (MMUTranslate)           NextWalkerState = LEVEL2;
+          IDLE:   if      (MMUTranslate)           NextWalkerState = LEVEL3;
                   else                             NextWalkerState = IDLE;
+          LEVEL3: if      (SvMode != `SV48)         NextWalkerState = LEVEL2;
+                  // 3rd level used if SV48 is enabled.
+                  else begin
+                    if      (~MMUReady)              NextWalkerState = LEVEL3;
+                    // *** <FUTURE WORK> According to the architecture, we should
+                    // fault upon finding a superpage that is misaligned or has 0
+                    // access bit. The following commented line of code is
+                    // supposed to perform that check. However, it is untested.
+                    else if (ValidPTE && LeafPTE && ~BadTerapage) NextWalkerState = LEAF;
+                    // else if (ValidPTE && LeafPTE)    NextWalkerState = LEAF;  // *** Once the above line is properly tested, delete this line.
+                    else if (ValidPTE && ~LeafPTE)   NextWalkerState = LEVEL2;
+                    else                             NextWalkerState = FAULT;
+                  end
           LEVEL2: if      (~MMUReady)              NextWalkerState = LEVEL2;
                   // *** <FUTURE WORK> According to the architecture, we should
                   // fault upon finding a superpage that is misaligned or has 0
@@ -242,24 +263,29 @@ module pagetablewalker (
                   else if (ValidPTE && LeafPTE && ~AccessAlert)
                                                    NextWalkerState = LEAF;
                   else                             NextWalkerState = FAULT;
-          LEAF:   if      (MMUTranslate)           NextWalkerState = LEVEL2;
+          LEAF:   if      (MMUTranslate)           NextWalkerState = LEVEL3;
                   else                             NextWalkerState = IDLE;
-          FAULT:  if      (MMUTranslate)           NextWalkerState = LEVEL2;
+          FAULT:  if      (MMUTranslate)           NextWalkerState = LEVEL3;
                   else                             NextWalkerState = IDLE;
           // Default case should never happen, but is included for linter.
           default:                                 NextWalkerState = IDLE;
         endcase
       end
 
+      // A terapage is a level 3 leaf page. This page must have zero PPN[2],
+      // zero PPN[1], and zero PPN[0]
+      assign TerapageMisaligned = |(CurrentPPN[26:0]);
       // A gigapage is a Level 2 leaf page. This page must have zero PPN[1] and
       // zero PPN[0]
       assign GigapageMisaligned = |(CurrentPPN[17:0]);
       // A megapage is a Level 1 leaf page. This page must have zero PPN[0].
       assign MegapageMisaligned = |(CurrentPPN[8:0]);
 
+      assign BadTerapage = TerapageMisaligned || AccessAlert;  // *** Implement better access/dirty scheme
       assign BadGigapage = GigapageMisaligned || AccessAlert;  // *** Implement better access/dirty scheme
       assign BadMegapage = MegapageMisaligned || AccessAlert;  // *** Implement better access/dirty scheme
 
+      assign VPN3 = TranslationVAdr[47:39];
       assign VPN2 = TranslationVAdr[38:30];
       assign VPN1 = TranslationVAdr[29:21];
       assign VPN0 = TranslationVAdr[20:12];
@@ -282,8 +308,13 @@ module pagetablewalker (
           IDLE: begin
             MMUStall = '0;
           end
+          LEVEL3: begin
+            TranslationPAdr = {BasePageTablePPN, VPN3, 3'b000};
+            // *** this is a huge breaking point. if we're going through level3 every time, even when sv48 is off,
+            // what should translationPAdr be when level3 is just off?
+          end
           LEVEL2: begin
-            TranslationPAdr = {BasePageTablePPN, VPN2, 3'b000};
+            TranslationPAdr = {(SvMode == `SV48) ? CurrentPPN : BasePageTablePPN, VPN2, 3'b000};
           end
           LEVEL1: begin
             TranslationPAdr = {CurrentPPN, VPN1, 3'b000};
@@ -295,8 +326,9 @@ module pagetablewalker (
             // Keep physical address alive to prevent HADDR dropping to 0
             TranslationPAdr = {CurrentPPN, VPN0, 3'b000};
             PageTableEntry = CurrentPTE;
-            PageType = (WalkerState == LEVEL2) ? 2'b11 : 
-                                ((WalkerState == LEVEL1) ? 2'b01 : 2'b00);
+            PageType = (WalkerState == LEVEL3) ? 2'b11 :
+                                ((WalkerState == LEVEL2) ? 2'b10 : 
+                                ((WalkerState == LEVEL1) ? 2'b01 : 2'b00));
             DTLBWriteM = DTLBMissM;
             ITLBWriteF = ~DTLBMissM;  // Prefer data over instructions
           end
diff --git a/wally-pipelined/src/mmu/priority_encoder.sv b/wally-pipelined/src/mmu/priority_encoder.sv
index e4a62ce1..dade2e83 100644
--- a/wally-pipelined/src/mmu/priority_encoder.sv
+++ b/wally-pipelined/src/mmu/priority_encoder.sv
@@ -4,7 +4,11 @@
 // Written: tfleming@hmc.edu & jtorrey@hmc.edu 7 April 2021
 // Based on implementation from https://www.allaboutcircuits.com/ip-cores/communication-controller/priority-encoder/
 // *** Give proper LGPL attribution for above source
-// Modified:
+// Modified: Teo Ene 15 Apr 2021:
+//              Temporarily removed paramterized priority encoder for non-parameterized one
+//              To get synthesis working quickly
+//           Kmacsaigoren@hmc.edu 28 May 2021:
+//              Added working version of parameterized priority encoder. 
 //
 // Purpose: One-hot encoding to binary encoder
 //
@@ -27,51 +31,33 @@
 
 `include "wally-config.vh"
 
-// Teo Ene 04/15:
-// Temporarily removed paramterized priority encoder for non-parameterized one
-// To get synthesis working quickly
 module priority_encoder #(parameter BINARY_BITS = 3) (
-  input  logic  [7:0] one_hot,
-  output logic  [2:0] binary
+  input  logic  [2**BINARY_BITS - 1:0] one_hot,
+  output logic  [BINARY_BITS - 1:0] binary
 );
 
-  // localparam ONE_HOT_BITS = 2**BINARY_BITS;
-
-  /*
-  genvar i, j;
-  generate
-    for (i = 0; i < ONE_HOT_BITS; i++) begin
-      for (j = 0; j < BINARY_BITS; j++) begin
-        if (i[j]) begin
-          assign binary[j] = one_hot[i];
-        end
-      end
-    end
-  endgenerate
-  */
-
-  /*
-  logic [BINARY_BITS-1:0] binary_comb;
-
+  integer i;
   always_comb begin
-    binary_comb = 0;
-    for (int i = 0; i < ONE_HOT_BITS; i++)
-      if (one_hot[i]) binary_comb = i;
+    binary = 0;
+    for (i = 0; i < 2**BINARY_BITS; i++) begin
+      if (one_hot[i]) binary = i; // prioritizes the most significant bit
+    end
   end
+  // *** triple check synthesizability here
 
-  assign binary = binary_comb;
+  // Ideally this mimics the following:
+  /*
+  always_comb begin
+    casex (one_hot)
+      1xx ... x: binary = BINARY_BITS - 1;
+      01x ... x: binary = BINARY_BITS - 2;
+      001 ... x: binary = BINARY_BITS - 3;
+      
+      {...}
+
+      00 ... 1xx: binary = 2;
+      00 ... 01x: binary = 1;
+      00 ... 001: binary = 0;
+  end
   */
-  always_comb
-    case (one_hot)
-      8'h1:     binary=3'h0;
-      8'h2:     binary=3'h1;
-      8'h4:     binary=3'h2;
-      8'h8:     binary=3'h3;
-      8'h10:    binary=3'h4;
-      8'h20:    binary=3'h5;
-      8'h40:    binary=3'h6;
-      8'h80:    binary=3'h7;
-      default:  binary=3'h0; //should never happen
-    endcase
-
 endmodule
diff --git a/wally-pipelined/src/mmu/tlb.sv b/wally-pipelined/src/mmu/tlb.sv
index 7ed594e4..1828c98e 100644
--- a/wally-pipelined/src/mmu/tlb.sv
+++ b/wally-pipelined/src/mmu/tlb.sv
@@ -2,7 +2,9 @@
 // tlb.sv
 //
 // Written: jtorrey@hmc.edu 16 February 2021
-// Modified:
+// Modified: kmacsaigoren@hmc.edu 1 June 2021
+//            Implemented SV48 on top of SV39. This included adding the SvMode signal,
+//            and using it to decide the translate signal and get the virtual page number
 //
 // Purpose: Translation lookaside buffer
 //          Cache of virtural-to-physical address translations
@@ -25,7 +27,7 @@
 ///////////////////////////////////////////
 
 /**
- * sv32 specs
+ * SV32 specs
  * ----------
  * Virtual address [31:0] (32 bits)
  *    [________________________________]
@@ -85,14 +87,11 @@ module tlb #(parameter ENTRY_BITS = 3,
   output             TLBPageFault
 );
 
-  logic SvMode;
   logic Translate;
   logic TLBAccess, ReadAccess, WriteAccess;
 
-  // *** If we want to support multiple virtual memory modes (ie sv39 AND sv48),
-  // we could have some muxes that control which parameters are current.
-  // Although then some of the signals are not big enough. But that's a problem
-  // for much later.
+  // Store current virtual memory mode (SV32, SV39, SV48, ect...)
+  logic [`SVMODE_BITS-1:0] SvMode;
 
   // Index (currently random) to write the next TLB entry
   logic [ENTRY_BITS-1:0] WriteIndex;
@@ -116,17 +115,24 @@ module tlb #(parameter ENTRY_BITS = 3,
   // Whether the virtual address has a match in the CAM
   logic                  CAMHit;
 
-  // Grab the sv bit from SATP
+  // Grab the sv mode from SATP
+  assign SvMode = SATP_REGW[`XLEN-1:`XLEN-`SVMODE_BITS];
+
+  // The bus width is always the largest it could be for that XLEN. For example, vpn will be 36 bits wide in rv64
+  // this, even though it could be 27 bits (SV39) or 36 bits (SV48) wide. When the value of VPN is narrower,
+  // is shorter, the extra bits are used as padded zeros on the left of the full value.
   generate
     if (`XLEN == 32) begin
-      assign SvMode = SATP_REGW[31];  // *** change to an enum somehow?
+      assign VirtualPageNumber = VirtualAddress[`VPN_BITS+11:12];
     end else begin
-      assign SvMode = SATP_REGW[63]; // currently just a boolean whether translation enabled
+      assign VirtualPageNumber = (SvMode == `SV48) ?
+                                 VirtualAddress[`VPN_BITS+11:12] :
+                                 {{`VPN_SEGMENT_BITS{1'b0}}, VirtualAddress[3*`VPN_SEGMENT_BITS+11:12]};
     end
   endgenerate
 
   // Whether translation should occur
-  assign Translate = SvMode & (PrivilegeModeW != `M_MODE);
+  assign Translate = (SvMode != `NO_TRANSLATE) & (PrivilegeModeW != `M_MODE);
 
   // Determine how the TLB is currently being used
   // Note that we use ReadAccess for both loads and instruction fetches
@@ -134,7 +140,7 @@ module tlb #(parameter ENTRY_BITS = 3,
   assign WriteAccess = TLBAccessType[0];
   assign TLBAccess = ReadAccess || WriteAccess;
 
-  assign VirtualPageNumber = VirtualAddress[`VPN_BITS+11:12];
+  
   assign PageOffset        = VirtualAddress[11:0];
 
   // TLB entries are evicted according to the LRU algorithm
@@ -188,9 +194,10 @@ module tlb #(parameter ENTRY_BITS = 3,
   // page number. For 4 KB pages, the entire virtual page number is replaced.
   // For superpages, some segments are considered offsets into a larger page.
   page_number_mixer #(`PPN_BITS, `PPN_HIGH_SEGMENT_BITS)
-    physical_mixer(PhysicalPageNumber,
+    physical_mixer(PhysicalPageNumber, 
       {{EXTRA_PHYSICAL_BITS{1'b0}}, VirtualPageNumber},
       HitPageType,
+      SvMode,
       PhysicalPageNumberMixed);
 
   // Provide physical address only on TLBHits to cause catastrophic errors if
diff --git a/wally-pipelined/src/mmu/tlb_cam.sv b/wally-pipelined/src/mmu/tlb_cam.sv
index 330bb382..78d9ff8d 100644
--- a/wally-pipelined/src/mmu/tlb_cam.sv
+++ b/wally-pipelined/src/mmu/tlb_cam.sv
@@ -2,7 +2,9 @@
 // tlb_cam.sv
 //
 // Written: jtorrey@hmc.edu 16 February 2021
-// Modified:
+// Modified: kmacsaigoren@hmc.edu 1 June 2021
+//            Implemented SV48 on top of SV39. This included adding the SvMode signal input and wally constants
+//            Mostly this was to make the cam_lines work.
 //
 // Purpose: Stores virtual page numbers with cached translations.
 //          Determines whether a given virtual page number is in the TLB.
@@ -24,18 +26,21 @@
 // OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 ///////////////////////////////////////////
 
+`include "wally-constants.vh"
+
 module tlb_cam #(parameter ENTRY_BITS = 3,
                  parameter KEY_BITS   = 20,
                  parameter HIGH_SEGMENT_BITS = 10) (
-  input                    clk, reset,
-  input  [KEY_BITS-1:0]    VirtualPageNumber,
-  input  [1:0]             PageTypeWrite,
-  input  [ENTRY_BITS-1:0]  WriteIndex,
-  input                    TLBWrite,
-  input                    TLBFlush,
-  output [ENTRY_BITS-1:0]  VPNIndex,
-  output [1:0]             HitPageType,
-  output                   CAMHit
+  input                     clk, reset,
+  input  [KEY_BITS-1:0]     VirtualPageNumber,
+  input  [1:0]              PageTypeWrite,
+  input  [ENTRY_BITS-1:0]   WriteIndex,
+  input  [`SVMODE_BITS-1:0] SvMode,
+  input                     TLBWrite,
+  input                     TLBFlush,
+  output [ENTRY_BITS-1:0]   VPNIndex,
+  output [1:0]              HitPageType,
+  output                    CAMHit
 );
 
   localparam NENTRIES = 2**ENTRY_BITS;
diff --git a/wally-pipelined/src/muldiv/div.sv b/wally-pipelined/src/muldiv/div.sv
index db830ca3..10af5eee 100755
--- a/wally-pipelined/src/muldiv/div.sv
+++ b/wally-pipelined/src/muldiv/div.sv
@@ -1,5 +1,5 @@
 ///////////////////////////////////////////
-// mul.sv
+// divide4x64.sv
 //
 // Written: James.Stine@okstate.edu 1 February 2021
 // Modified: 
@@ -29,60 +29,55 @@
 /* verilator lint_off COMBDLY */
 /* verilator lint_off IMPLICIT */
 
-`include "wally-config.vh"
+module intdiv #(parameter WIDTH=64) 
+   (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S);
 
-module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S);
-
-   input logic [63:0]  N, D;
-   input logic 	       clk;
-   input logic 	       reset;
-   input logic 	       start;
-   input logic 	       S;   
+   input logic [WIDTH-1:0]   N, D;
+   input logic 		     clk;
+   input logic 		     reset;
+   input logic 		     start;
+   input logic 		     S;   
+   
+   output logic [WIDTH-1:0]  Qf;
+   output logic [WIDTH-1:0]  remf;
+   output logic 	     div0;
+   output logic 	     done;
+   output logic 	     divBusy;   
+   
+   logic 		     enable;
+   logic 		     state0;
+   logic 		     V;   
+   logic [$clog2(WIDTH):0]   Num;
+   logic [$clog2(WIDTH)-1:0] P, NumIter, RemShift;
+   logic [WIDTH-1:0] 	     op1, op2, op1shift, Rem5;
+   logic [WIDTH:0] 	     Qd, Rd, Qd2, Rd2;
+   logic [WIDTH-1:0] 	     Q, rem0;
+   logic [3:0] 		     quotient;
+   logic 		     otfzero; 
+   logic 		     shiftResult;
+   logic 		     enablev, state0v, donev, oftzerov, divBusyv, ulp;   
+   
+   logic [WIDTH-1:0] 	     twoD;
+   logic [WIDTH-1:0] 	     twoN;
+   logic 		     SignD;
+   logic 		     SignN;
+   logic [WIDTH-1:0] 	     QT, remT;
+   logic 		     D_NegOne;
+   logic 		     Max_N;      
    
-   output logic [63:0] Qf;
-   output logic [63:0] remf;
-   output logic        div0;
-   output logic        done;
-   output logic        divBusy;   
-
-   logic 	       divdone;   
-   logic 	       enable;
-   logic 	       state0;
-   logic 	       V;   
-   logic [7:0] 	       Num;
-   logic [5:0] 	       P, NumIter, RemShift;
-   logic [63:0]        op1, op2, op1shift, Rem5;
-   logic [64:0]        Qd, Rd, Qd2, Rd2;
-   logic [63:0]        Q, rem0;
-   logic [3:0] 	       quotient;
-   logic 	       otfzero; 
-   logic 	       shiftResult;
-   logic 	       enablev, state0v, donev, divdonev, oftzerov, divBusyv, ulp;
-
-   logic [63:0]        twoD;
-   logic [63:0]        twoN;
-   logic 	       SignD;
-   logic 	       SignN;
-   logic [63:0]        QT, remT;
-   logic 	       D_NegOne;
-   logic 	       Max_N;
 
    // Check if negative (two's complement)
    //   If so, convert to positive
-   adder #(64) cpa1 ((D ^ {64{D[63]&S}}), {63'h0, D[63]&S}, twoD);
-   adder #(64) cpa2 ((N ^ {64{N[63]&S}}), {63'h0, N[63]&S}, twoN);   
-   assign SignD = D[63];
-   assign SignN = N[63];   
+   adder #(WIDTH) cpa1 ((D ^ {WIDTH{D[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, D[WIDTH-1]&S}, twoD);
+   adder #(WIDTH) cpa2 ((N ^ {WIDTH{N[WIDTH-1]&S}}), {{WIDTH-1{1'b0}}, N[WIDTH-1]&S}, twoN);   
+   assign SignD = D[WIDTH-1];
+   assign SignN = N[WIDTH-1];   
    // Max N and D = -1 (Overflow)
-   assign Max_N = (~|N[62:0]) & N[63];
+   assign Max_N = (~|N[WIDTH-2:0]) & N[WIDTH-1];
    assign D_NegOne = &D;
-
+   
    // Divider goes the distance to 37 cycles
-   // (thanks the evil divisor for D = 0x1) 
-   // but could theoretically be stopped when
-   // divdone is asserted.  The enable signal
-   // turns off register storage thus invalidating
-   // any future cycles.
+   // (thanks to the evil divisor for D = 0x1) 
    
    // Shift D, if needed (for integer)
    // needed to allow qst to be in range for integer
@@ -92,32 +87,31 @@ module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S);
    // is 0 and thus a divide by 0 exception.  This div0
    // exception is given to FSM to tell the operation to 
    // quit gracefully.
-
-   lz64 p1 (P, V, twoD);
-   shifter_l64 p2 (op2, twoD, P);
-   assign op1 = twoN;
+   lzd_hier #(WIDTH) p1 (.ZP(P), .ZV(V), .B(twoD));
+   shift_left #(WIDTH) p2 (twoD, P, op2);
+   assign op1 = twoN;   
    assign div0 = ~V;
 
-   // #iter: N = m+v+s = m+(s+2) = m+2+s (mod k = 0)
+   // #iter: N = m+v+s = m+2+s (mod k = 0)
    // v = 2 since \rho < 1 (add 4 to make sure its a ceil)
-   adder #(8) cpa3 ({2'b0, P}, 
-		    {5'h0, shiftResult, ~shiftResult, 1'b0}, 
-		    Num);      
+   // k = 2 (r = 2^k)
+   adder #($clog2(WIDTH)+1) cpa3 ({1'b0, P}, 
+				  {{$clog2(WIDTH)+1-3{1'b0}}, shiftResult, ~shiftResult, 1'b0}, 
+				  Num);      
    
    // Determine whether need to add just Q/Rem
    assign shiftResult = P[0];   
    // div by 2 (ceil)
-   assign NumIter = Num[6:1];   
+   assign NumIter = Num[$clog2(WIDTH):1];   
    assign RemShift = P;
 
    // FSM to control integer divider
    //   assume inputs are postive edge and
    //   datapath (divider) is negative edge
-   fsm64 fsm1 (enablev, state0v, donev, divdonev, otfzerov, divBusyv,
-	       start, div0, NumIter, ~clk, reset);
+   fsm64 #($clog2(WIDTH)) fsm1 (enablev, state0v, donev, otfzerov, divBusyv,
+				start, div0, NumIter, ~clk, reset);
 
    flopr #(1) rega (~clk, reset, donev, done);
-   flopr #(1) regb (~clk, reset, divdonev, divdone);
    flopr #(1) regc (~clk, reset, otfzerov, otfzero);
    flopr #(1) regd (~clk, reset, enablev, enable);
    flopr #(1) rege (~clk, reset, state0v, state0);
@@ -129,65 +123,66 @@ module div (Qf, remf, done, divBusy, div0, N, D, clk, reset, start, S);
    // integer bit and m fractional bits), this is achieved by
    // shifting N right by v+s so that (m+v+s) mod k = 0.  And,
    // the quotient has to be aligned to the integer position.
-
-   divide4x64 p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, 
-		  enable, otfzero, shiftResult);
+   divide4 #(WIDTH) p3 (Qd, Rd, quotient, op1, op2, clk, reset, state0, 
+			enable, otfzero, shiftResult);
 
    // Storage registers to hold contents stable
-   flopenr #(65) reg3 (clk, reset, enable, Rd, Rd2);
-   flopenr #(65) reg4 (clk, reset, enable, Qd, Qd2);         
+   flopenr #(WIDTH+1) reg3 (clk, reset, enable, Rd, Rd2);
+   flopenr #(WIDTH+1) reg4 (clk, reset, enable, Qd, Qd2);         
 
    // Probably not needed - just assigns results
-   assign Q = Qd2[63:0];
-   assign Rem5 = Rd2[64:1];  
+   assign Q = Qd2[WIDTH-1:0];
+   assign Rem5 = Rd2[WIDTH:1];  
    
    // Adjust remainder by m (no need to adjust by
-   // n ln(r)
-   shifter_r64 p4 (rem0, Rem5, RemShift);
+   shift_right #(WIDTH) p4 (Rem5, RemShift, rem0);
 
    // Adjust Q/Rem for Signed
    assign tcQ = (SignN ^ SignD) & S;
    assign tcR = SignN & S;
-   // Signed Divide
+
+   // When Dividend (N) and/or Divisor (D) are negative (first bit is '1'):
    // - When N and D are negative: Remainder is negative (undergoes a two's complement).
    // - When N is negative: Quotient and Remainder are both negative (undergo a two's complement).
    // - When D is negative: Quotient is negative (undergoes a two's complement).
-   adder #(64) cpa4 ((rem0 ^ {64{tcR}}), {63'h0, tcR}, remT);
-   adder #(64) cpa5 ((Q ^ {64{tcQ}}), {63'h0, tcQ}, QT);         
+   adder #(WIDTH) cpa4 ((rem0 ^ {WIDTH{tcR}}), {{WIDTH-1{1'b0}}, tcR}, remT);
+   adder #(WIDTH) cpa5 ((Q ^ {WIDTH{tcQ}}), {{WIDTH-1{1'b0}}, tcQ}, QT);         
 
    // RISC-V has exceptions for divide by 0 and overflow (see Table 6.1 of spec)
-   exception_int exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf);
-
+   exception_int #(WIDTH) exc (QT, remT, N, S, div0, Max_N, D_NegOne, Qf, remf);
+   
 endmodule // int32div
 
-module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0, 
-		   enable, otfzero, shiftResult); 
+// Division by Recurrence (r=4)
+module divide4 #(parameter WIDTH=64) 
+   (Q, rem0, quotient, op1, op2, clk, reset, state0, 
+    enable, otfzero, shiftResult); 
 
-   input logic [63:0]   op1, op2;
-   input logic 		clk, state0;
-   input logic 		reset;
-   input logic 		enable;
-   input logic 		otfzero;
-   input logic 		shiftResult;   
+   input logic [WIDTH-1:0]   op1, op2;
+   input logic 		     clk, state0;
+   input logic 		     reset;
+   input logic 		     enable;
+   input logic 		     otfzero;
+   input logic 		     shiftResult;   
    
-   output logic [64:0] 	rem0;
-   output logic [64:0] 	Q;
-   output logic [3:0] 	quotient;   
+   output logic [WIDTH:0]    rem0;
+   output logic [WIDTH:0]    Q;
+   output logic [3:0] 	     quotient;   
 
-   logic [67:0] 	Sum, Carry;   
-   logic [64:0] 	Qstar;   
-   logic [64:0] 	QMstar;   
-   logic [7:0] 		qtotal;   
-   logic [67:0] 	SumN, CarryN, SumN2, CarryN2;
-   logic [67:0] 	divi1, divi2, divi1c, divi2c, dive1;
-   logic [67:0] 	mdivi_temp, mdivi;   
-   logic 		zero;
-   logic [1:0] 		qsel;
-   logic [1:0] 		Qin, QMin;
-   logic 		CshiftQ, CshiftQM;
-   logic [67:0] 	rem1, rem2, rem3;
-   logic [67:0] 	SumR, CarryR;
-   logic [64:0] 	Qt;   
+   logic [WIDTH+3:0] 	     Sum, Carry;   
+   logic [WIDTH:0] 	     Qstar;   
+   logic [WIDTH:0] 	     QMstar;   
+   logic [7:0] 		     qtotal;   
+   logic [WIDTH+3:0] 	     SumN, CarryN, SumN2, CarryN2;
+   logic [WIDTH+3:0] 	     divi1, divi2, divi1c, divi2c, dive1;
+   logic [WIDTH+3:0] 	     mdivi_temp, mdivi;   
+   logic 		     zero;
+   logic [1:0] 		     qsel;
+   logic [1:0] 		     Qin, QMin;
+   logic 		     CshiftQ, CshiftQM;
+   logic [WIDTH+3:0] 	     rem1, rem2, rem3;
+   logic [WIDTH+3:0] 	     SumR, CarryR;
+   logic [WIDTH:0] 	     Qt;   
 
    // Create one's complement values of Divisor (for q*D)
    assign divi1 = {3'h0, op2, 1'b0};
@@ -195,46 +190,47 @@ module divide4x64 (Q, rem0, quotient, op1, op2, clk, reset, state0,
    assign divi1c = ~divi1;
    assign divi2c = ~divi2;
    // Shift x1 if not mod k
-   mux2 #(68) mx1 ({3'b000, op1, 1'b0},  {4'h0, op1}, shiftResult, dive1);   
+   mux2 #(WIDTH+4) mx1 ({3'b000, op1, 1'b0},  {4'h0, op1}, shiftResult, dive1);   
 
    // I I I . F F F F F ... (Robertson Criteria - \rho * qmax * D)
-   mux2 #(68) mx2 ({CarryN2[65:0], 2'h0}, 68'h0, state0, CarryN);
-   mux2 #(68) mx3 ({SumN2[65:0], 2'h0}, dive1, state0, SumN);
+   mux2 #(WIDTH+4) mx2 ({CarryN2[WIDTH+1:0], 2'h0}, {WIDTH+4{1'b0}}, state0, CarryN);
+   mux2 #(WIDTH+4) mx3 ({SumN2[WIDTH+1:0], 2'h0}, dive1, state0, SumN);
    // Simplify QST
-   adder #(8) cpa1 (SumN[67:60], CarryN[67:60], qtotal);   
+   adder #(8) cpa1 (SumN[WIDTH+3:WIDTH-4], CarryN[WIDTH+3:WIDTH-4], qtotal);   
    // q = {+2, +1, -1, -2} else q = 0
-   qst4 pd1 (qtotal[7:1], divi1[63:61], quotient);
+   qst4 pd1 (qtotal[7:1], divi1[WIDTH-1:WIDTH-3], quotient);
    assign ulp = quotient[2]|quotient[3];
    assign zero = ~(quotient[3]|quotient[2]|quotient[1]|quotient[0]);
    // Map to binary encoding
    assign qsel[1] = quotient[3]|quotient[2];
    assign qsel[0] = quotient[3]|quotient[1];   
-   mux4 #(68) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp);
-   mux2 #(68) mx5 (mdivi_temp, 68'h0, zero, mdivi);
-   csa #(68) csa1 (mdivi, SumN, {CarryN[67:1], ulp}, Sum, Carry);
+   mux4 #(WIDTH+4) mx4 (divi2, divi1, divi1c, divi2c, qsel, mdivi_temp);
+   mux2 #(WIDTH+4) mx5 (mdivi_temp, {WIDTH+4{1'b0}}, zero, mdivi);
+   csa #(WIDTH+4) csa1 (mdivi, SumN, {CarryN[WIDTH+3:1], ulp}, Sum, Carry);
    // regs : save CSA
-   flopenr #(68) reg1 (clk, reset, enable, Sum, SumN2);
-   flopenr #(68) reg2 (clk, reset, enable, Carry, CarryN2);
+   flopenr #(WIDTH+4) reg1 (clk, reset, enable, Sum, SumN2);
+   flopenr #(WIDTH+4) reg2 (clk, reset, enable, Carry, CarryN2);
    // OTF
    ls_control otf1 (quotient, Qin, QMin, CshiftQ, CshiftQM);   
-   otf #(65) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, 
-		   otfzero, enable, Qstar, QMstar);
+   otf #(WIDTH+1) otf2 (Qin, QMin, CshiftQ, CshiftQM, clk, 
+			otfzero, enable, Qstar, QMstar);
 
    // Correction and generation of Remainder
-   adder #(68) cpa2 (SumN2[67:0], CarryN2[67:0], rem1);
+   adder #(WIDTH+4) cpa2 (SumN2[WIDTH+3:0], CarryN2[WIDTH+3:0], rem1);
    // Add back +D as correction
-   csa #(68) csa2 (CarryN2[67:0], SumN2[67:0], divi1, SumR, CarryR);
-   adder #(68) cpa3 (SumR, CarryR, rem2);   
+   csa #(WIDTH+4) csa2 (CarryN2[WIDTH+3:0], SumN2[WIDTH+3:0], divi1, SumR, CarryR);
+   adder #(WIDTH+4) cpa3 (SumR, CarryR, rem2);   
    // Choose remainder (Rem or Rem+D)
-   mux2 #(68) mx6 (rem1, rem2, rem1[67], rem3);
+   mux2 #(WIDTH+4) mx6 (rem1, rem2, rem1[WIDTH+3], rem3);
    // Choose correct Q or QM
-   mux2 #(65) mx7 (Qstar, QMstar, rem1[67], Qt);
+   mux2 #(WIDTH+1) mx7 (Qstar, QMstar, rem1[WIDTH+3], Qt);
    // Final results
-   assign rem0 = rem3[64:0];
+   assign rem0 = rem3[WIDTH:0];
    assign Q = Qt;   
    
 endmodule // divide4x64
 
+// Load/Control for OTFC
 module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM);
 
    input logic [3:0] quot;
@@ -255,8 +251,7 @@ module ls_control (quot, Qin, QMin, CshiftQ, CshiftQM);
 
 endmodule 
 
-// On-the-fly Conversion per Ercegovac/Lang
-
+// On-the-fly Conversion (OTFC)
 module otf #(parameter WIDTH=8) 
    (Qin, QMin, CshiftQ, CshiftQM, clk, reset, enable, R2Q, R1Q);
    
@@ -309,10 +304,9 @@ module csa #(parameter WIDTH=8) (input logic [WIDTH-1:0] a, b, c,
 	   fa fa_inst (a[i], b[i], c[i], sum[i], carry_temp[i+1]);
 	end
    endgenerate
-   //assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};     // trmimmed excess bit dh 5/3/21
-   assign carry = {carry_temp[WIDTH-1:1], 1'b0};     
+   assign carry = {1'b0, carry_temp[WIDTH-1:1], 1'b0};     
 
-endmodule // adder
+endmodule // csa
 
 module eqcmp #(parameter WIDTH = 8)
    (input  logic [WIDTH-1:0] a, b,
@@ -322,6 +316,7 @@ module eqcmp #(parameter WIDTH = 8)
    
 endmodule // eqcmp
 
+// QST for r=4
 module qst4 (input logic [6:0] s, input logic [2:0] d,
 	     output logic [3:0] q);
    
@@ -368,8 +363,6 @@ module qst4 (input logic [6:0] s, input logic [2:0] d,
    
 endmodule // qst4
 
-// LZD
-
 module lz2 (P, V, B0, B1);
 
    input logic  B0;
@@ -497,27 +490,24 @@ module lz64 (ZP, ZV, B);
 endmodule // lz64
 
 // FSM Control for Integer Divider
+module fsm64 #(parameter WIDTH=6)
+  (en, state0, done, otfzero, divBusy, start, error, NumIter, clk, reset);
 
-module fsm64 (en, state0, done, divdone, otfzero, divBusy,
-	      start, error, NumIter, clk, reset);
-
-   input logic [5:0]  NumIter;   
-   input logic 	      clk;
-   input logic 	      reset;
-   input logic 	      start;
-   input logic 	      error;   
+   input logic [WIDTH-1:0]  NumIter;   
+   input logic 		    clk;
+   input logic 		    reset;
+   input logic 		    start;
+   input logic 		    error;   
    
-   output logic       done;      
-   output logic       en;
-   output logic       state0;
-   output logic       divdone;
-   output logic       otfzero;
-   output logic       divBusy;   
+   output logic 	    done;      
+   output logic 	    en;
+   output logic 	    state0;
+   output logic 	    otfzero;
+   output logic 	    divBusy;   
    
-   logic 	      LT, EQ;
-   logic 	      Divide0;   
-   logic [5:0] 	      CURRENT_STATE;
-   logic [5:0] 	      NEXT_STATE;   
+   logic 		    LT, EQ;
+   logic [5:0] 		    CURRENT_STATE;
+   logic [5:0] 		    NEXT_STATE;   
    
    parameter [5:0] 
      S0=6'd0, S1=6'd1, S2=6'd2,
@@ -542,12 +532,8 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 	  CURRENT_STATE<=NEXT_STATE;
      end
 
-   // Going to cheat and hard code number of states 
-   // needed into FSM instead of using a counter
-   // FIXME: could counter be better
-
    // Cheated and made 8 - let synthesis do its magic
-   magcompare8 comp1 (LT, EQ, {2'h0, CURRENT_STATE}, {2'h0, NumIter});
+   magcompare8 comp1 (LT, EQ, {2'h0, CURRENT_STATE}, {{8-WIDTH{1'b0}}, NumIter});
 
    always @(CURRENT_STATE or start)
      begin
@@ -560,7 +546,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    divBusy = 1'b0;		    
 		    state0 = 1'b0;
-		    divdone = 1'b0;		    
 		    done = 1'b0;
 		    NEXT_STATE <= S0;
 		 end 
@@ -568,30 +553,21 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		 begin
 		    otfzero = 1'b0;	       		    
 		    en = 1'b1;
-		    divBusy = 1'b1;		    		    
+		    divBusy = 1'b1;		    
 		    state0 = 1'b1;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		    
 		    done = 1'b0;
-		    divdone = 1'b0;		 		 
 		    NEXT_STATE <= S1;
 		 end 
 	    end	    
 	  S1:
 	    begin
-	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       
+	       otfzero = 1'b0;	   
+	       divBusy = 1'b1;
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S2;
 		 end
 	       else
@@ -599,8 +575,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S2;
+		    NEXT_STATE <= S36;
 		 end		    
 	    end // case: S1	  
 	  S2:
@@ -612,10 +587,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S3;
 		 end // if (LT|EQ)
 	       else
@@ -623,8 +594,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S3;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S2
 	  S3:
@@ -636,10 +606,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S4;
 		 end 
 	       else
@@ -647,8 +613,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S4;
+		    NEXT_STATE <= S36;
 		 end		    	       
 	    end // case: S3
 	  S4:
@@ -660,10 +625,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S5;
 		 end 	       	    
 	       else
@@ -671,8 +632,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S5;
+		    NEXT_STATE <= S36;
 		 end		       	       
 	    end // case: S4
 	  S5:
@@ -684,10 +644,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S6;
 		 end // if (LT|EQ)
 	       else
@@ -695,8 +651,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S6;
+		    NEXT_STATE <= S36;
 		 end		    	       	       	       
 	    end // case: S5
 	  S6:
@@ -708,10 +663,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S7;
 		 end // if (LT|EQ)
 	       else
@@ -719,8 +670,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S7;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S6
 	  S7:
@@ -732,10 +682,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S8;
 		 end // if (LT|EQ)
 	       else
@@ -743,8 +689,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S8;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S7
 	  S8:
@@ -756,10 +701,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S9;
 		 end // if (LT|EQ)
 	       else
@@ -767,8 +708,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S9;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S8
 	  S9:
@@ -780,10 +720,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S10;
 		 end // if (LT|EQ)
 	       else
@@ -791,8 +727,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S10;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S9
 	  S10:
@@ -804,10 +739,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S11;
 		 end // if (LT|EQ)
 	       else
@@ -815,8 +746,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S11;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S10
 	  S11:
@@ -828,10 +758,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S12;
 		 end // if (LT|EQ)
 	       else
@@ -839,8 +765,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S12;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S11
 	  S12:
@@ -852,10 +777,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S13;
 		 end // if (LT|EQ)
 	       else
@@ -863,8 +784,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S13;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S12
 	  S13:
@@ -876,10 +796,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S14;
 		 end // if (LT|EQ)
 	       else
@@ -887,23 +803,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S14;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S13
 	  S14:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S15;
 		 end // if (LT|EQ)
 	       else
@@ -911,23 +822,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S15;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S14
 	  S15:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S16;
 		 end // if (LT|EQ)
 	       else
@@ -935,23 +841,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S16;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S15
 	  S16:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S17;
 		 end // if (LT|EQ)
 	       else
@@ -959,23 +860,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S17;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S16
 	  S17:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S18;
 		 end // if (LT|EQ)
 	       else
@@ -983,23 +879,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S18;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S17
 	  S18:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S19;
 		 end // if (LT|EQ)
 	       else
@@ -1007,23 +898,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S19;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S18
 	  S19:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S20;
 		 end // if (LT|EQ)
 	       else
@@ -1031,23 +917,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S20;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S19
 	  S20:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S21;
 		 end // if (LT|EQ)
 	       else
@@ -1055,23 +936,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S21;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S20
 	  S21:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S22;
 		 end // if (LT|EQ)
 	       else
@@ -1079,23 +955,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S22;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S21
 	  S22:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;
 		    NEXT_STATE <= S23;
 		 end // if (LT|EQ)
 	       else
@@ -1103,23 +974,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S23;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S22
 	  S23:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S24;		    
 		 end // if (LT|EQ)
 	       else
@@ -1127,23 +993,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S24;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S23 
 	  S24:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S25;
 		 end // if (LT|EQ)
 	       else
@@ -1151,23 +1012,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S25;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S24
 	  S25:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S26;
 		 end // if (LT|EQ)
 	       else
@@ -1175,23 +1031,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S26;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S25
 	  S26:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S27;
 		 end // if (LT|EQ)
 	       else
@@ -1199,23 +1050,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S27;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S26
 	  S27:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S28;
 		 end // if (LT|EQ)
 	       else
@@ -1223,23 +1069,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S28;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S27
 	  S28:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S29;
 		 end // if (LT|EQ)
 	       else
@@ -1247,23 +1088,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S29;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S28
 	  S29:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S30;
 		 end // if (LT|EQ)
 	       else
@@ -1271,23 +1107,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S30;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S29
 	  S30:
 	    begin
 	       otfzero = 1'b0;
-     	       divBusy = 1'b1;	       
+	       divBusy = 1'b1;	       
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S31;
 		 end // if (LT|EQ)
 	       else
@@ -1295,8 +1126,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S31;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S30
 	  S31:
@@ -1308,10 +1138,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S32;
 		 end // if (LT|EQ)
 	       else
@@ -1319,8 +1145,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S32;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S31  
 	  S32:
@@ -1332,10 +1157,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S33;
 		 end // if (LT|EQ)
 	       else
@@ -1343,8 +1164,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S33;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S32
 	  S33:
@@ -1356,10 +1176,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S34;
 		 end // if (LT|EQ)
 	       else
@@ -1367,23 +1183,18 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S34;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S33
 	  S34:
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       
+	       divBusy = 1'b1;
 	       if (LT|EQ)
 		 begin
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S35;
 		 end // if (LT|EQ)
 	       else
@@ -1391,8 +1202,7 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
-		    NEXT_STATE <= S35;
+		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S34  	  
 	  S35:
@@ -1404,10 +1214,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b1;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    if (EQ)
-		      divdone = 1'b1;		    
-		    else
-		      divdone = 1'b0;		 		 
 		    NEXT_STATE <= S36;
 		 end // if (LT|EQ)
 	       else
@@ -1415,7 +1221,6 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 		    en = 1'b0;
 		    state0 = 1'b0;
 		    done = 1'b0;
-		    divdone = 1'b0;
 		    NEXT_STATE <= S36;
 		 end		    	       	       
 	    end // case: S35	  
@@ -1427,12 +1232,10 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 	       done = 1'b1;
 	       if (EQ)
 		 begin
-		    divdone = 1'b1;
 		    en = 1'b1;
 		 end
 	       else
 		 begin
-		    divdone = 1'b0;
 		    en = 1'b0;
 		 end
 	       NEXT_STATE <= S0;
@@ -1440,11 +1243,10 @@ module fsm64 (en, state0, done, divdone, otfzero, divBusy,
 	  default: 
 	    begin
 	       otfzero = 1'b0;
-	       divBusy = 1'b1;	       
+	       divBusy = 1'b0;	       
 	       en = 1'b0;
 	       state0 = 1'b0;
 	       done = 1'b0;
-	       divdone = 1'b0;
 	       NEXT_STATE <= S0;
 	    end
 	endcase // case(CURRENT_STATE)	
@@ -1505,166 +1307,39 @@ module magcompare8 (LT, EQ, A, B);
 
 endmodule // magcompare8
 
-module shifter_l64 (Z, A, Shift);
+// RISC-V Exception Logic for Divide by 0 and Overflow (Signed Integer Divide)
+module exception_int #(parameter WIDTH=8) 
+   (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf);
 
-   input logic [63:0]  A;
-   input logic [5:0]   Shift;
+   input logic [WIDTH-1:0] Q;
+   input logic [WIDTH-1:0] rem;
+   input logic [WIDTH-1:0] op1;      
+   input logic 		   S;
+   input logic 		   div0;
+   input logic 		   Max_N;
+   input logic 		   D_NegOne;
    
-   logic [63:0]        stage1;
-   logic [63:0]        stage2;
-   logic [63:0]        stage3;
-   logic [63:0]        stage4;
-   logic [63:0]        stage5;   
-   
-   output logic [63:0] Z;      
-   
-   mux2 #(64) mx01(A,      {A[31:0], 32'h0}, Shift[5], stage1);   
-   mux2 #(64) mx02(stage1, {stage1[47:0], 16'h0}, Shift[4], stage2);
-   mux2 #(64) mx03(stage2, {stage2[55:0], 8'h0}, Shift[3], stage3);
-   mux2 #(64) mx04(stage3, {stage3[59:0], 4'h0}, Shift[2], stage4);
-   mux2 #(64) mx05(stage4, {stage4[61:0], 2'h0}, Shift[1], stage5);
-   mux2 #(64) mx06(stage5, {stage5[62:0], 1'h0}, Shift[0], Z);
+   output logic [WIDTH-1:0] Qf;
+   output logic [WIDTH-1:0] remf;
 
-endmodule // shifter_l64
-
-module shifter_r64 (Z, A, Shift);
-
-   input logic [63:0]  A;
-   input logic [5:0]   Shift;
-   
-   logic [63:0]        stage1;
-   logic [63:0]        stage2;
-   logic [63:0]        stage3;
-   logic [63:0]        stage4;
-   logic [63:0]        stage5;   		  
-   
-   output logic [63:0] Z;
-   
-   mux2 #(64) mx01(A, {32'h0, A[63:32]}, Shift[5], stage1);		  
-   mux2 #(64) mx02(stage1, {16'h0, stage1[63:16]}, Shift[4], stage2);
-   mux2 #(64) mx03(stage2, {8'h0, stage2[63:8]}, Shift[3], stage3);
-   mux2 #(64) mx04(stage3, {4'h0, stage3[63:4]}, Shift[2], stage4);
-   mux2 #(64) mx05(stage4, {2'h0, stage4[63:2]}, Shift[1], stage5);
-   mux2 #(64) mx06(stage5, {1'h0, stage5[63:1]},  Shift[0], Z);
-   
-endmodule // shifter_r64
-
-module shifter_l32 (Z, A, Shift);
-
-   input logic [31:0]  A;
-   input logic [4:0]   Shift;
-   
-   logic [31:0]        stage1;
-   logic [31:0]        stage2;
-   logic [31:0]        stage3;
-   logic [31:0]        stage4;
-   
-   output logic [31:0] Z;      
-
-   mux2 #(32) mx01(A,      {A[15:0], 16'h0},    Shift[4], stage1);
-   mux2 #(32) mx02(stage1, {stage1[23:0], 8'h0}, Shift[3], stage2);
-   mux2 #(32) mx03(stage2, {stage2[27:0], 4'h0},  Shift[2], stage3);
-   mux2 #(32) mx04(stage3, {stage3[29:0], 2'h0},   Shift[1], stage4);
-   mux2 #(32) mx05(stage4, {stage4[30:0], 1'h0},    Shift[0], Z);
-
-endmodule // shifter_l32
-
-module shifter_r32 (Z, A, Shift);
-
-   input logic [31:0]  A;
-   input logic [4:0]   Shift;
-   
-   logic [31:0]        stage1;
-   logic [31:0]        stage2;
-   logic [31:0]        stage3;
-   logic [31:0]        stage4;
-   
-   output logic [31:0] Z;
-   
-   mux2 #(32) mx01(A,      {16'h0, A[31:16]},   Shift[4], stage1);
-   mux2 #(32) mx02(stage1, {8'h0, stage1[31:8]}, Shift[3], stage2);
-   mux2 #(32) mx03(stage2, {4'h0, stage2[31:4]},  Shift[2], stage3);
-   mux2 #(32) mx04(stage3, {2'h0, stage3[31:2]},   Shift[1], stage4);
-   mux2 #(32) mx05(stage4, {1'h0, stage4[31:1]},    Shift[0], Z);
-   
-endmodule // shifter_r32
-
-module shift_right #(parameter WIDTH=8) 
-   (input logic [`XLEN-1:0]         A,
-    input logic [$clog2(`XLEN)-1:0] Shift,
-    output logic [`XLEN-1:0] 	    Z);
-   
-   logic [`XLEN-1:0] 							 stage [$clog2(`XLEN):0];
-   genvar 								 i;
-   
-   assign stage[0] = A;   
-   generate
-      for (i=0;i<$clog2(`XLEN);i=i+1)
-	begin : genbit
-	   mux2 #(`XLEN) mux_inst (stage[i], 
-				   {{(`XLEN/(2**(i+1))){1'b0}}, stage[i][`XLEN-1:`XLEN/(2**(i+1))]}, 
-				   Shift[$clog2(`XLEN)-i-1], 
-				   stage[i+1]);
-	end
-   endgenerate
-   assign Z = stage[$clog2(`XLEN)];   
-
-endmodule // shift_right
-
-module shift_left #(parameter WIDTH=8) 
-   (input logic [`XLEN-1:0]         A,
-    input logic [$clog2(`XLEN)-1:0] Shift,
-    output logic [`XLEN-1:0] 	    Z);
-   
-   logic [`XLEN-1:0] 							stage [$clog2(`XLEN):0];
-   genvar 								i;
-   
-   assign stage[0] = A;   
-   generate
-      for (i=0;i<$clog2(`XLEN);i=i+1)
-	begin : genbit
-	   mux2 #(`XLEN) mux_inst (stage[i], 
-				   {stage[i][`XLEN-1-`XLEN/(2**(i+1)):0], {(`XLEN/(2**(i+1))){1'b0}}}, 
-				   Shift[$clog2(`XLEN)-i-1], 
-				   stage[i+1]);
-	end
-   endgenerate
-   assign Z = stage[$clog2(`XLEN)];   
-
-endmodule // shift_right
-
-module exception_int (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf);
-
-   input logic [63:0] Q;
-   input logic [63:0] rem;
-   input logic [63:0] op1;      
-   input logic 	      S;
-   input logic 	      div0;
-   input logic 	      Max_N;
-   input logic 	      D_NegOne;
-   
-   output logic [63:0] Qf;
-   output logic [63:0] remf;
-
-   // Needs to be optimized
    always_comb
      case ({div0, S, Max_N, D_NegOne})
        4'b0000 : Qf = Q;
        4'b0001 : Qf = Q;
-       4'b0010 : Qf = Q;              
-       4'b0011 : Qf = Q;              
+       4'b0010 : Qf = Q;       
+       4'b0011 : Qf = Q;
        4'b0100 : Qf = Q;
-       4'b0101 : Qf = Q;
+       4'b0101 : Qf = Q;       
        4'b0110 : Qf = Q;       
-       4'b0111 : Qf = {1'b1, 31'h0};
-       4'b1000 : Qf = {64{1'b1}};
-       4'b1001 : Qf = {64{1'b1}};
-       4'b1010 : Qf = {64{1'b1}};
-       4'b1011 : Qf = {64{1'b1}};              
-       4'b1100 : Qf = {64{1'b1}};
-       4'b1101 : Qf = {64{1'b1}};       
-       4'b1110 : Qf = {64{1'b1}};       
-       4'b1111 : Qf = {64{1'b1}};              
+       4'b0111 : Qf = {1'b1, {WIDTH-1{1'h0}}};       
+       4'b1000 : Qf = {WIDTH{1'b1}};
+       4'b1001 : Qf = {WIDTH{1'b1}};
+       4'b1010 : Qf = {WIDTH{1'b1}};
+       4'b1011 : Qf = {WIDTH{1'b1}};       
+       4'b1100 : Qf = {WIDTH{1'b1}};
+       4'b1101 : Qf = {WIDTH{1'b1}};
+       4'b1110 : Qf = {WIDTH{1'b1}};
+       4'b1111 : Qf = {WIDTH{1'b1}};       
        default: Qf = Q;       
      endcase 
 
@@ -1672,18 +1347,18 @@ module exception_int (Q, rem, op1, S, div0, Max_N, D_NegOne, Qf, remf);
      case ({div0, S, Max_N, D_NegOne})
        4'b0000 : remf = rem;
        4'b0001 : remf = rem;
-       4'b0010 : remf = rem;
+       4'b0010 : remf = rem;       
        4'b0011 : remf = rem;
        4'b0100 : remf = rem;
        4'b0101 : remf = rem;
        4'b0110 : remf = rem;
-       4'b0111 : remf = 64'h0;     
+       4'b0111 : remf = {WIDTH{1'h0}};
        4'b1000 : remf = op1;
        4'b1001 : remf = op1;
        4'b1010 : remf = op1;
        4'b1011 : remf = op1;       
        4'b1100 : remf = op1;
-       4'b1101 : remf = op1;
+       4'b1101 : remf = op1;       
        4'b1110 : remf = op1;       
        4'b1111 : remf = op1;              
        default: remf = rem;
@@ -1693,4 +1368,3 @@ endmodule // exception_int
 
 /* verilator lint_on COMBDLY */
 /* verilator lint_on IMPLICIT */
-
diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv
index 17c4aac5..e10b0c55 100644
--- a/wally-pipelined/src/muldiv/muldiv.sv
+++ b/wally-pipelined/src/muldiv/muldiv.sv
@@ -47,13 +47,13 @@ module muldiv (
 	 logic [`XLEN-1:0] MulDivResultE, MulDivResultM;
 	 logic [`XLEN-1:0] PrelimResultE;
 	 logic [`XLEN-1:0] QuotE, RemE;
-	 //logic [`XLEN-1:0] Q, R;	 
 	 logic [`XLEN*2-1:0] ProdE; 
 
 	 logic 		     enable_q;	 
 	 logic [2:0] 	     Funct3E_Q;
 	 logic 		     div0error;
 	 logic [`XLEN-1:0]   N, D;
+	 logic [`XLEN-1:0]   Num0, Den0;	 
 
 	 logic 		     gclk;
 	 logic 		     DivStartE;
@@ -70,15 +70,25 @@ module muldiv (
 	 end
 	 assign gclk = enable_q & clk;
 
+	 // Handle sign extension for W-type instructions
+	 if (`XLEN == 64) begin // RV64 has W-type instructions
+            assign Num0 = W64E ? {{32{SrcAE[31]&signedDivide}}, SrcAE[31:0]} : SrcAE;
+            assign Den0 = W64E ? {{32{SrcBE[31]&signedDivide}}, SrcBE[31:0]} : SrcBE;
+	 end else begin // RV32 has no W-type instructions
+            assign Num0 = SrcAE;
+            assign Den0 = SrcBE;	    
+	 end	    
+
 	 // capture the Numerator/Denominator	 
-	 flopenrc #(`XLEN) reg_num (.d(SrcAE), .q(N),
+	 flopenrc #(`XLEN) reg_num (.d(Num0), .q(N),
 				    .en(startDivideE), .clear(DivDoneE),
 				    .reset(reset),  .clk(~gclk));
-	 flopenrc #(`XLEN) reg_den (.d(SrcBE), .q(D),
+	 flopenrc #(`XLEN) reg_den (.d(Den0), .q(D),
 				    .en(startDivideE), .clear(DivDoneE),
-				    .reset(reset),  .clk(~gclk));	 
+				    .reset(reset),  .clk(~gclk));
+	 
 	 assign signedDivide = (Funct3E[2]&~Funct3E[1]&~Funct3E[0]) | (Funct3E[2]&Funct3E[1]&~Funct3E[0]);	 
-	 div div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide);
+	 intdiv #(`XLEN) div (QuotE, RemE, DivDoneE, DivBusyE, div0error, N, D, gclk, reset, startDivideE, signedDivide);
 
 	 // Added for debugging of start signal for divide
 	 assign startDivideE = MulDivE&DivStartE&~DivBusyE;
@@ -93,7 +103,6 @@ module muldiv (
 	 
 	 // Select result
 	 always_comb
-	   //           case (DivDoneE ? Funct3E_Q : Funct3E)
            case (Funct3E)	   
              3'b000: PrelimResultE = ProdE[`XLEN-1:0];
              3'b001: PrelimResultE = ProdE[`XLEN*2-1:`XLEN];
diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv
index ea693900..dabc6d12 100644
--- a/wally-pipelined/testbench/testbench-imperas.sv
+++ b/wally-pipelined/testbench/testbench-imperas.sv
@@ -166,12 +166,12 @@ string tests32f[] = '{
     "rv64m/I-MULW-01", "3000",
     "rv64m/I-DIV-01", "3000",
     "rv64m/I-DIVU-01", "3000",
-    //"rv64m/I-DIVUW-01", "3000",
-    //"rv64m/I-DIVW-01", "3000",
+    "rv64m/I-DIVUW-01", "3000",
+    "rv64m/I-DIVW-01", "3000",
     "rv64m/I-REM-01", "3000",
-    "rv64m/I-REMU-01", "3000"
-    //"rv64m/I-REMUW-01", "3000",
-    //"rv64m/I-REMW-01", "3000"
+    "rv64m/I-REMU-01", "3000",
+    "rv64m/I-REMUW-01", "3000",
+    "rv64m/I-REMW-01", "3000"
   };
 
   string tests64ic[] = '{
@@ -320,11 +320,11 @@ string tests32f[] = '{
     "rv32m/I-MUL-01", "2000",
     "rv32m/I-MULH-01", "2000",
     "rv32m/I-MULHSU-01", "2000",
-    "rv32m/I-MULHU-01", "2000"
-    //"rv32m/I-DIV-01", "2000",
-    //"rv32m/I-DIVU-01", "2000",
-    //"rv32m/I-REM-01", "2000",
-    //"rv32m/I-REMU-01", "2000"
+    "rv32m/I-MULHU-01", "2000",
+    "rv32m/I-DIV-01", "2000",
+    "rv32m/I-DIVU-01", "2000",
+    "rv32m/I-REM-01", "2000",
+    "rv32m/I-REMU-01", "2000"
   };
 
   string tests32ic[] = '{
@@ -439,8 +439,11 @@ string tests32f[] = '{
 
   string testsBP64[] = '{
     "rv64BP/simple", "10000",
+    "rv64BP/mmm", "1000000",
+    "rv64BP/linpack_bench", "1000000",
+    "rv64BP/sieve", "1000000",
     "rv64BP/qsort", "1000000",
-    "rv64BP/sieve", "1000000"
+    "rv64BP/dhrystone", "1000000"
   };
 
   string tests64p[] = '{