Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

2025-02-11 06:05:49 +00:00 · 2021-09-08 12:47:03 -05:00 · 2021-09-08 12:47:03 -05:00 · 6606eea27e
commit 6606eea27e
parent 5bc90ef32f 5e9a39e755
15 changed files with 661 additions and 1173 deletions
--- a/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/wally-pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@ -142,7 +142,7 @@ assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[
                .BiasE, .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                .ProdExpE, .AddendStickyE, .KillProdE); 
-fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .ZSgnM(ZSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
+fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
              //  .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, 
               .FOpCtrlM(FOpCtrlE[2:0]), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE),
               .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
--- a/wally-pipelined/linux-testgen/testvector-generation/CreateTrace.sh
+++ b/wally-pipelined/linux-testgen/testvector-generation/CreateTrace.sh
@ -15,4 +15,11 @@ outDir="../linux-testvectors"

 # - Logs info needed by buildroot testbench

-($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1236 -S 2>&1 >/dev/null | ./parse_qemu.py | ./parseNew.py | ./remove_dup.awk > all.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog
+read -p "Warning: running this script will overwrite the contents of $outDir/all.txt.
+Would you like to proceed? (y/n) " -n 1 -r
+echo
+if [[ $REPLY =~ ^[Yy]$ ]]
+then
+    ($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -rtc clock=vm -icount shift=1 -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1236 -S 2>&1 >/dev/null | ./parse_qemu.py | ./parseNew.py | ./remove_dup.awk > $outDir/all.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog
+fi
+
--- a/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh
+++ b/wally-pipelined/linux-testgen/testvector-generation/debugBuildroot.sh
@ -14,16 +14,16 @@ outDir="../linux-testvectors"
 # Uncomment this version for QEMU debugging of kernel
 #  - good for poking around VM if it boots up
 #  - good for running QEMU commands (press "Ctrl-A" then "c" to open QEMU command prompt)
-#$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio 
+#$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -rtc clock=vm -icount shift=1
 # Uncomment this version for GDB debugging of kernel
 #  - attempts to load in symbols from "vmlinux"
 #  - good for looking at backtraces when Linux gets stuck for some reason 
-#$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -gdb tcp::1237 -S & riscv64-unknown-elf-gdb -x gdbinit_debug
+$customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -rtc clock=vm -icount shift=1 -gdb tcp::1237 -S & riscv64-unknown-elf-gdb -x gdbinit_debug

 # Uncomment this version to generate qemu_output.txt
 # - Uses GDB script
 # - Logs raw QEMU output to qemu_output.txt
-($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1237 -S 2> $intermedDir/qemu_output.txt) & riscv64-unknown-elf-gdb -x gdbinit_debug
+#($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -rtc clock=vm -icount shift=1 -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1237 -S 2> $intermedDir/qemu_output.txt) & riscv64-unknown-elf-gdb -x gdbinit_debug

 # Uncomment this version for parse_qemu.py debugging
 # - Uses qemu_output.txt
--- a/wally-pipelined/linux-testgen/testvector-generation/gdbinit_qemulog
+++ b/wally-pipelined/linux-testgen/testvector-generation/gdbinit_qemulog
@ -3,9 +3,9 @@ target extended-remote :1236
 file ../buildroot-image-output/vmlinux
 stepi 1000
 b do_idle
+ignore 1 2
 c
-c
-c
+# using 3 continues didn't work because the first breakpoint hit causes a pipe break error
 set confirm off
 kill
 q
--- a/wally-pipelined/linux-testgen/testvector-generation/logBuildrootMem.sh
+++ b/wally-pipelined/linux-testgen/testvector-generation/logBuildrootMem.sh
@ -8,6 +8,11 @@ customQemu="/courses/e190ax/qemu_sim/rv64_initrd/qemu_experimental/qemu/build/qe
 imageDir="../buildroot-image-output"
 testVecDir="../linux-testvectors"

+read -p "Warning: running this script will overwrite the contents of memory dumps needed for simulation.
+Would you like to proceed? (y/n) " -n 1 -r
+echo
+if [[ $REPLY =~ ^[Yy]$ ]]
+then
    ($customQemu -M virt -nographic -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -gdb tcp::1235 -S 2>/dev/null >/dev/null) &
    riscv64-unknown-elf-gdb -x gdbinit_mem
    echo "Translating Mem from GDB to Questa format"
@ -19,3 +24,4 @@ echo "Creating debugging objdump of linux image"
    riscv64-unknown-elf-objdump -D $imageDir/vmlinux > $testVecDir/vmlinux.objdump
    extractFunctionRadix.sh $testVecDir/vmlinux.objdump
    echo "Done"
+fi
--- a/wally-pipelined/regression/wally-buildroot.do
+++ b/wally-pipelined/regression/wally-buildroot.do
@ -34,19 +34,10 @@ vopt +acc work.testbench -o workopt

 vsim workopt -suppress 8852,12070

-run 150 ms
-add log -r /*
-do linux-wave.do
-
-run 150 ms
-#run 180 us
-
 #-- Run the Simulation 
-#run -all
-#do ./wave-dos/linux-waves.do
-#run 60 ms
-
-#run -all
+run -all
+do ./wave-dos/linux-waves.do
+run -all

 exec ./slack-notifier/slack-notifier.py
 ##quit
--- a/wally-pipelined/regression/wave-dos/linux-waves.do
+++ b/wally-pipelined/regression/wave-dos/linux-waves.do
@ -3,7 +3,8 @@ quietly WaveActivateNextPane {} 0
 add wave -noupdate -divider <NULL>
 add wave -noupdate /testbench/clk
 add wave -noupdate /testbench/reset
-add wave -noupdate -radix decimal /testbench/instrs
+add wave -noupdate -radix decimal /testbench/errorCount
+add wave -noupdate -radix decimal /testbench/InstrCountW
 add wave -noupdate -divider Stalls_and_Flushes
 add wave -noupdate /testbench/dut/hart/StallF
 add wave -noupdate /testbench/dut/hart/StallD
@ -14,33 +15,15 @@ add wave -noupdate /testbench/dut/hart/FlushD
 add wave -noupdate /testbench/dut/hart/FlushE
 add wave -noupdate /testbench/dut/hart/FlushM
 add wave -noupdate /testbench/dut/hart/FlushW
-add wave -noupdate -divider InstrTranslator
-add wave -noupdate -group InstrTranslator /testbench/SvMode
-add wave -noupdate -group InstrTranslator /testbench/PTE_R
-add wave -noupdate -group InstrTranslator /testbench/PTE_X
-add wave -noupdate -group InstrTranslator /testbench/SATP
-add wave -noupdate -group InstrTranslator /testbench/PTE
-add wave -noupdate -group InstrTranslator /testbench/BaseAdr
-add wave -noupdate -group InstrTranslator /testbench/PAdr
-add wave -noupdate -group InstrTranslator /testbench/VPN
-add wave -noupdate -group InstrTranslator /testbench/Offset
-add wave -noupdate -group InstrTranslator /testbench/readAdrExpected
-add wave -noupdate -group InstrTranslator /testbench/readAdrTranslated
-add wave -noupdate -group InstrTranslator /testbench/writeAdrExpected
-add wave -noupdate -group InstrTranslator /testbench/writeAdrTranslated
 add wave -noupdate -divider F
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCF
 add wave -noupdate -divider D
-add wave -noupdate -radix hexadecimal /testbench/PCDexpected
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCD
-add wave -noupdate -radix hexadecimal /testbench/PCtextD
 add wave -noupdate /testbench/InstrDName
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/InstrD
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/InstrValidD
-add wave -noupdate -radix hexadecimal /testbench/PCDwrong
 add wave -noupdate -divider E
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCE
-add wave -noupdate -radix hexadecimal /testbench/PCtextE
 add wave -noupdate /testbench/InstrEName
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/InstrE
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/InstrValidE
@ -49,8 +32,8 @@ add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/dp/SrcBE
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/dp/ALUResultE
 add wave -noupdate -divider M
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/PCM
-add wave -noupdate -radix hexadecimal /testbench/PCtextM
 add wave -noupdate /testbench/InstrMName
+add wave -noupdate /testbench/textM
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ifu/InstrM
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/InstrValidM
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/lsu/dcache/MemPAdrM
@ -80,168 +63,8 @@ add wave -noupdate -group Walker /testbench/dut/hart/lsu/hptw/genblk1/Translatio
 add wave -noupdate -group Walker /testbench/dut/hart/lsu/hptw/genblk1/WalkerState
 add wave -noupdate -group Walker /testbench/dut/hart/lsu/hptw/genblk1/NextWalkerState
 add wave -noupdate -group Walker /testbench/dut/hart/lsu/hptw/genblk1/InitialWalkerState
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/clk
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/reset
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/StallM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/FlushM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/StallW
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/FlushW
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/LSUStall
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/MemRWM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/Funct3M
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/Funct7M
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/AtomicM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/ExceptionM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/PendingInterruptM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/CommittedM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/SquashSCW
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DataMisalignedM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/MemAdrM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/MemAdrE
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/WriteDataM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/ReadDataW
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/PrivilegeModeW
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DTLBFlushM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DTLBLoadPageFaultM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DTLBStorePageFaultM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/LoadMisalignedFaultM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/LoadAccessFaultM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/StoreMisalignedFaultM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/StoreAccessFaultM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/CommitM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DCtoAHBPAdrM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DCtoAHBReadM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DCtoAHBWriteM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DCfromAHBAck
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DCfromAHBReadData
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DCtoAHBWriteData
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DCtoAHBSizeM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/SATP_REGW
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/STATUS_MXR
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/STATUS_SUM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/STATUS_MPRV
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/STATUS_MPP
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/PCF
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/ITLBMissF
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/PageType
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/ITLBWriteF
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/WalkerInstrPageFaultF
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/WalkerLoadPageFaultM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/WalkerStorePageFaultM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DTLBHitM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/SquashSCM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DTLBPageFaultM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/MemAccessM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/CurrState
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/NextState
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/MemPAdrM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DTLBMissM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DTLBWriteM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/HPTWReadPTE
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/HPTWStall
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/HPTWPAdrE
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/HPTWRead
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/MemRWMtoDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/Funct3MtoDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/AtomicMtoDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/MemAdrEtoDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/ReadDataWfromDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/StallWtoDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DataMisalignedMfromDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/HPTWReady
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DisableTranslation
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/DCacheStall
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/CacheableM
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/CacheableMtoDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/SelPTW
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/CommittedMfromDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/PendingInterruptMtoDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/FlushWtoDCache
-add wave -noupdate -group LSU /testbench/dut/hart/lsu/WalkerPageFaultM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/clk
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/reset
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/StallM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/StallW
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/FlushM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/FlushW
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/MemRWM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/Funct3M
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/Funct7M
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/AtomicM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/MemAdrE
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/MemPAdrM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/WriteDataM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/ReadDataW
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/ReadDataM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/DCacheStall
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/CommittedM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/ExceptionM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/PendingInterruptM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/DTLBMissM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/CacheableM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/DTLBWriteM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/ITLBWriteF
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SelPTW
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/WalkerPageFaultM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/AHBPAdr
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/AHBRead
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/AHBWrite
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/AHBAck
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/HRDATA
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/HWDATA
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SelAdrM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SRAMAdr
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SRAMWriteData
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/DCacheMemWriteData
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SetValidM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/ClearValidM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SetDirtyM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/ClearDirtyM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/Valid
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/Dirty
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/WayHit
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/CacheHit
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/NewReplacement
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/ReadDataBlockM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/ReadDataWordM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/ReadDataWordMuxM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/FinalWriteDataM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/FinalAMOWriteDataM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/FinalWriteDataWordsM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/FetchCount
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/NextFetchCount
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SRAMWordEnable
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SelMemWriteDataM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/Funct3W
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SRAMWordWriteEnableM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SRAMWordWriteEnableW
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SRAMBlockWriteEnableM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SRAMWriteEnable
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SRAMWayWriteEnable
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SaveSRAMRead
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/AtomicW
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/VictimWay
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/VictimDirtyWay
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/VictimReadDataBlockM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/VictimDirty
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SelAMOWrite
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SelUncached
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/Funct7W
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/MemPAdrDecodedW
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/BasePAdrM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/BasePAdrOffsetM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/BasePAdrMaskedM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/VictimTag
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/AnyCPUReqM
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/FetchCountFlag
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/PreCntEn
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/CntEn
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/CntReset
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/CPUBusy
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/PreviousCPUBusy
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/SelEvict
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/CurrState
-add wave -noupdate -group DCache /testbench/dut/hart/lsu/dcache/NextState
+add wave -noupdate -group LSU -r /testbench/dut/hart/lsu/*
+add wave -noupdate -group DCache -r /testbench/dut/hart/lsu/dcache/*
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/clk
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/reset
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/StallW
@ -275,7 +98,6 @@ add wave -noupdate -group EBU /testbench/dut/hart/ebu/HMASTLOCK
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/HADDRD
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/HSIZED
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/HWRITED
-add wave -noupdate -group EBU /testbench/dut/hart/ebu/CommitM
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/GrantData
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/AccessAddress
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/ISize
@ -292,81 +114,77 @@ add wave -noupdate -group EBU /testbench/dut/hart/ebu/BusState
 add wave -noupdate -group EBU /testbench/dut/hart/ebu/NextBusState
 add wave -noupdate -divider W
 add wave -noupdate -radix hexadecimal /testbench/PCW
-add wave -noupdate -radix hexadecimal /testbench/PCtextW
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/c/InstrValidW
+add wave -noupdate /testbench/textM
 add wave -noupdate /testbench/dut/hart/ieu/dp/ReadDataW
 add wave -noupdate -radix hexadecimal /testbench/dut/hart/ieu/dp/ResultW
-add wave -noupdate -divider RegFile
-add wave -noupdate /testbench/dut/hart/ieu/dp/RegWriteW
-add wave -noupdate -radix unsigned /testbench/regNumExpected
-add wave -noupdate -radix unsigned /testbench/dut/hart/ieu/dp/RdW
-add wave -noupdate -radix hexadecimal /testbench/regExpected
-add wave -noupdate /testbench/dut/hart/ieu/dp/regf/wd3
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[2]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[3]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[4]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[5]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[6]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[7]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[8]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[9]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[10]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[11]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[12]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[13]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[14]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[15]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[16]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[17]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[18]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[19]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[20]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[21]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[22]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[23]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[24]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[25]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[26]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[27]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[28]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[29]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[30]}
-add wave -noupdate -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[31]}
-add wave -noupdate -divider CSRs
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/MSTATUS_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/MCOUNTINHIBIT_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/MCOUNTEREN_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/MIDELEG_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/MIP_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/MIE_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MEPC_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MTVEC_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MCOUNTEREN_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MCOUNTINHIBIT_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MEDELEG_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MIDELEG_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MSCRATCH_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MCAUSE_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MTVAL_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/SSTATUS_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/SCOUNTEREN_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/SIP_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/SIE_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SEPC_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/STVEC_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SCOUNTEREN_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SEDELEG_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SIDELEG_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SATP_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/USTATUS_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrn/UEPC_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrn/UTVEC_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrn/UIP_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrn/UIE_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG_ARRAY_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW
-add wave -noupdate -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW
+add wave -noupdate -group RF /testbench/dut/hart/ieu/dp/RegWriteW
+add wave -noupdate -group RF -radix unsigned /testbench/dut/hart/ieu/dp/RdW
+add wave -noupdate -group RF /testbench/dut/hart/ieu/dp/regf/wd3
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[2]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[3]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[4]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[5]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[6]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[7]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[8]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[9]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[10]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[11]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[12]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[13]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[14]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[15]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[16]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[17]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[18]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[19]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[20]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[21]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[22]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[23]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[24]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[25]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[26]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[27]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[28]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[29]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[30]}
+add wave -noupdate -group RF -radix hexadecimal {/testbench/dut/hart/ieu/dp/regf/rf[31]}
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/MSTATUS_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/MCOUNTINHIBIT_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/MCOUNTEREN_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/MIDELEG_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/MIP_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/MIE_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MEPC_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MTVEC_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MCOUNTEREN_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MCOUNTINHIBIT_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MEDELEG_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MIDELEG_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MSCRATCH_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MCAUSE_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MTVAL_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/SSTATUS_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/SCOUNTEREN_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/SIP_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csri/SIE_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SEPC_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/STVEC_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SCOUNTEREN_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SEDELEG_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SIDELEG_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrs/SATP_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/USTATUS_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrn/UEPC_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrn/UTVEC_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrn/UIP_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrn/UIE_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/PMPCFG_ARRAY_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/PMPADDR_ARRAY_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csrm/MISA_REGW
+add wave -noupdate -group CSR -radix hexadecimal /testbench/dut/hart/priv/csr/genblk1/csru/FRM_REGW
 add wave -noupdate -divider <NULL>
 add wave -hex -r /testbench/*
 TreeUpdate [SetDefaultTree]
--- a/wally-pipelined/regression/wave-dos/peripheral-waves.do
+++ b/wally-pipelined/regression/wave-dos/peripheral-waves.do
@ -112,16 +112,3 @@ add wave -hex /testbench/dut/uncore/gpio/gpio/*

 # everything else
 add wave -hex -r /testbench/*
-
-# appearance
-TreeUpdate [SetDefaultTree]
-WaveRestoreZoom {0 ps} {100 ps}
-configure wave -namecolwidth 250
-configure wave -valuecolwidth 150
-configure wave -justifyvalue left
-configure wave -signalnamewidth 0
-configure wave -snapdistance 10
-configure wave -datasetprefix 0
-configure wave -rowmargin 4
-configure wave -childrowmargin 2
-set DefaultRadix hexadecimal
--- a/wally-pipelined/src/fpu/fma.sv
+++ b/wally-pipelined/src/fpu/fma.sv
@ -31,7 +31,7 @@ module fma(
    input logic                 FlushM,     // flush the memory stage
    input logic                 StallM,     // stall memory stage
    input logic                 FmtE, FmtM, // precision 1 = double 0 = single
-    input logic  [2:0]          FOpCtrlM, FOpCtrlE, // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
+    input logic  [2:0]          FOpCtrlE, // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic  [2:0]          FrmM,               // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic                 XSgnE, YSgnE, ZSgnE,    // input signs - execute stage
    input logic [`NE-1:0]       XExpE, YExpE, ZExpE,    // input exponents - execute stage
@ -45,21 +45,20 @@ module fma(
    input logic                 XSNaNM, YSNaNM, ZSNaNM,     // is signaling NaN
    input logic                 XZeroM, YZeroM, ZZeroM,     // is zero - memory stage
    input logic                 XInfM, YInfM, ZInfM,        // is infinity
-    input logic [10:0]          BiasE,      // bias - depends on precison (max exponent/2)
+    input logic [10:0]          BiasE,      // bias (max exponent/2) ***parameterize in unpacking unit
 	output logic [`FLEN-1:0]    FMAResM,    // FMA result
 	output logic [4:0]		    FMAFlgM);   // FMA flags
 	
-  //fma/mult	
-      //  fmadd  = ?000
-      //  fmsub  = ?001
-      //  fnmsub = ?010	-(a*b)+c
-      //  fnmadd = ?011 -(a*b)-c
-      //  fmul   = ?100
-      //	{?, is mul, negate product, negate addend}
+  //fma/mult/add	
+      //  fmadd  = 000
+      //  fmsub  = 001
+      //  fnmsub = 010	-(a*b)+c
+      //  fnmadd = 011  -(a*b)-c
+      //  fmul   = 100
+      //  fadd   = 110
+      //  fsub   = 111

    // signals transfered between pipeline stages
-    // logic [2*`NF+1:0]	ProdManE, ProdManM; 
-    // logic [3*`NF+5:0]	AlignedAddendE, AlignedAddendM;
    logic [3*`NF+5:0]	SumE, SumM;                       
    logic [`NE+1:0]	    ProdExpE, ProdExpM;
    logic 			    AddendStickyE, AddendStickyM;
@ -76,7 +75,6 @@ module fma(
                .ProdExpE, .AddendStickyE, .KillProdE); 
                
    // E/M pipeline registers
-    // flopenrc #(106) EMRegFma1(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); 
    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
    flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
    flopenrc #(15) EMRegFma4(clk, reset, FlushM, ~StallM, 
@ -84,7 +82,7 @@ module fma(
                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM});

    fma2 fma2(.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
-            .FOpCtrlM, .FrmM, .FmtM,  .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
+            .FrmM, .FmtM,  .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
            .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM,
            .FMAResM, .FMAFlgM);

@ -93,29 +91,27 @@ endmodule


 module fma1(
-    input logic        XSgnE, YSgnE, ZSgnE,
+    input logic                 XSgnE, YSgnE, ZSgnE,    // input's signs
    input logic  [`NE-1:0]      XExpE, YExpE, ZExpE,    // biased exponents in B(NE.0) format
-    input logic [`NF:0] XManE, YManE, ZManE,   // fractions in U(0.NF) format]
+    input logic  [`NF:0]        XManE, YManE, ZManE,    // fractions in U(0.NF) format
    input logic                 XDenormE, YDenormE, ZDenormE, // is the input denormal
    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
-    input logic [`NE-1:0] BiasE,
+    input logic  [`NE-1:0]      BiasE,      // bias (max exponent/2)
    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic                 FmtE,       // precision 1 = double 0 = single
-    // output logic    [2*`NF+1:0]     ProdManE,   // 1.X frac * 1.Y frac in U(2.2Nf) format
-    // output logic    [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
    output logic [`NE+1:0]      ProdExpE,       // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
    output logic                AddendStickyE,  // sticky bit that is calculated during alignment
    output logic                KillProdE,      // set the product to zero before addition if the product is too small to matter
-    output logic [3*`NF+5:0]   SumE,
-    output logic NegSumE,
-    output logic InvZE,
-    output logic ZSgnEffE,
-    output logic PSgnE,
-    output logic [8:0] NormCntE
+    output logic [3*`NF+5:0]    SumE,           // the positive sum
+    output logic                NegSumE,        // was the sum negitive
+    output logic                InvZE,          // intert Z
+    output logic                ZSgnEffE,       // the modified Z sign
+    output logic                PSgnE,          // the product's sign
+    output logic [8:0]          NormCntE        // normalization shift cnt
    );
-    logic [`NE-1:0] Denorm;
-    logic [`NE-1:0] DenormXExp, DenormYExp;             // Denormalized input value

+    logic [`NE-1:0]     Denorm;             // value of a denormaized number based on precision
+    logic [`NE-1:0]     XExpVal, YExpVal;   // Exponent value after taking into account denormals
    logic [2*`NF+1:0]   ProdManE;           // 1.X frac * 1.Y frac in U(2.2Nf) format
    logic [3*`NF+5:0]   AlignedAddendE;     // Z aligned for addition in U(NF+5.2NF+1)

@ -123,82 +119,24 @@ module fma1(
    // Calculate the product
    //      - When multipliying two fp numbers, add the exponents
    //      - Subtract the bias (XExp + YExp has two biases, one from each exponent)
-    //      - Denormal numbers have an an exponent value of 1, however they are
-    //        represented with an exponent of 0. add one if there is a denormal number
+    //      - If the product is zero then kill the exponent - this is a problem 
    ///////////////////////////////////////////////////////////////////////////////
   
    // denormalized numbers have diffrent values depending on which precison it is.
+    //      double - 1
+    //      single - 1024-128+1 = 897
    assign Denorm = FmtE ? 1 : 897;
-    assign DenormXExp = XDenormE ? Denorm : XExpE;
-    assign DenormYExp = YDenormE ? Denorm : YExpE;
-    assign ProdExpE = (DenormXExp + DenormYExp - BiasE)&{`NE+2{~(XZeroE|YZeroE)}};
+    assign XExpVal = XDenormE ? Denorm : XExpE;
+    assign YExpVal = YDenormE ? Denorm : YExpE;
+    // take into account if the product is zero, the product's exponent does not compute properly if X or Y is zero
+    assign ProdExpE = (XExpVal + YExpVal - BiasE)&{`NE+2{~(XZeroE|YZeroE)}};

-    // Calculate the product's mantissa
-    //      - Mantissa includes the assumed one. If the number is denormalized or zero, it does not have an assumed one.
-            // assign ProdManE =  XManE * YManE;
+    // multiplication of the mantissa's
    mult mult(.XManE, .YManE, .ProdManE);
   
-            // ///////////////////////////////////////////////////////////////////////////////
-            // // Alignment shifter
-            // ///////////////////////////////////////////////////////////////////////////////
-
-            // // determine the shift count for alignment
-            // //      - negitive means Z is larger, so shift Z left
-            // //      - positive means the product is larger, so shift Z right
-            // //      - Denormal numbers have an an exponent value of 1, however they are
-            // //        represented with an exponent of 0. add one to the exponent if it is a denormal number
-            // assign AlignCnt = ProdExpE - (ZExpE + ({`NE-1{ZDenormE}}&Denorm));
-
-            // // Defualt Addition without shifting
-            // //          |   54'b0    |  106'b(product)  | 2'b0 |
-            // //                       |1'b0| addnend |
-
-            // // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
-            // assign ZManPreShifted = {(`NF+3)'(0), ZManE, /*106*/(2*`NF+2)'(0)};
-            // always_comb
-            //     begin
-                
-            //     // If the product is too small to effect the sum, kill the product
-
-            //     //          |   54'b0    |  106'b(product)  | 2'b0 |
-            //     //  | addnend |
-            //     if ($signed(AlignCnt) <= $signed(-(`NF+4))) begin
-            //         KillProdE = 1;
-            //         ZManShifted = ZManPreShifted;//{107'b0, XManE, 54'b0};
-            //         AddendStickyE = ~(XZeroE|YZeroE);
-
-            //     // If the Addend is shifted left (negitive AlignCnt)
-
-            //     //          |   54'b0    |  106'b(product)  | 2'b0 |
-            //     //                  | addnend |
-            //     end else if($signed(AlignCnt) <= $signed(0))  begin
-            //         KillProdE = 0;
-            //         ZManShifted = ZManPreShifted << -AlignCnt;
-            //         AddendStickyE = |(ZManShifted[`NF-1:0]);
-
-            //     // If the Addend is shifted right (positive AlignCnt)
-
-            //     //          |   54'b0    |  106'b(product)  | 2'b0 |
-            //     //                                  | addnend |
-            //     end else if ($signed(AlignCnt)<=$signed(2*`NF+1))  begin
-            //         KillProdE = 0;
-            //         ZManShifted = ZManPreShifted >> AlignCnt;
-            //         AddendStickyE = |(ZManShifted[`NF-1:0]);
-
-            //     // If the addend is too small to effect the addition        
-            //     //      - The addend has to shift two past the end of the addend to be considered too small
-            //     //      - The 2 extra bits are needed for rounding
-
-            //     //          |   54'b0    |  106'b(product)  | 2'b0 |
-            //     //                                                      | addnend |
-            //     end else begin
-            //         KillProdE = 0;
-            //         ZManShifted = 0;
-            //         AddendStickyE = ~ZZeroE;
-
-            //     end
-            // end
-            // assign AlignedAddendE = ZManShifted[4*`NF+5:`NF];
+    ///////////////////////////////////////////////////////////////////////////////
+    // Alignment shifter
+    ///////////////////////////////////////////////////////////////////////////////

    alignshift alignshift(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm,
                        .AlignedAddendE, .AddendStickyE, .KillProdE);
@ -209,22 +147,39 @@ module fma1(

    assign PSgnE = XSgnE ^ YSgnE ^ (FOpCtrlE[1]&~FOpCtrlE[2]);
    assign ZSgnEffE = ZSgnE^FOpCtrlE[0]; // Swap sign of Z for subtract
+
+
+    // ///////////////////////////////////////////////////////////////////////////////
+    // // Addition/LZA
+    // ///////////////////////////////////////////////////////////////////////////////
+        
    fmaadd fmaadd(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .SumE, .NegSumE, .InvZE, .NormCntE, .XZeroE, .YZeroE);
    
 endmodule



+
+
+
+
+
+
+
+
+
+
+
+
+
+
 module fma2(
    
-    input logic        XSgnM, YSgnM,
-    input logic [`NE-1:0] XExpM, YExpM, ZExpM,
-    input logic [`NF:0] XManM, YManM, ZManM,
+    input logic                 XSgnM, YSgnM,        // input signs
+    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
+    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic     [2:0]       FOpCtrlM,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic                 FmtM,       // precision 1 = double 0 = single
-    // input logic     [2*`NF+1:0]     ProdManM,   // 1.X frac * 1.Y frac
-    // input logic     [3*`NF+5:0]     AlignedAddendM, // Z aligned for addition
    input logic     [`NE+1:0]   ProdExpM,       // X exponent + Y exponent - bias
    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
@ -232,12 +187,12 @@ module fma2(
    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic [3*`NF+5:0]   SumM,
-    input logic NegSumM,
-    input logic InvZM,
-    input logic ZSgnEffM,
-    input logic PSgnM,
-    input logic [8:0] NormCntM,
+    input logic     [3*`NF+5:0] SumM,       // the positive sum
+    input logic                 NegSumM,    // was the sum negitive
+    input logic                 InvZM,      // do you invert Z
+    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                 PSgnM,      // the product's sign
+    input logic     [8:0]       NormCntM,   // the normalization shift count
    output logic    [`FLEN-1:0] FMAResM,    // FMA final result
    output logic    [4:0]       FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
   
@ -246,199 +201,45 @@ module fma2(
    logic [`NF-1:0]     ResultFrac; // Result fraction
    logic [`NE-1:0]     ResultExp;  // Result exponent
    logic               ResultSgn;  // Result sign
-    // logic               PSgn;       // product sign
-    // logic [2*`NF+1:0]   ProdMan2;   // product being added
-    // logic [3*`NF+6:0]   AlignedAddend2; // possibly inverted aligned Z
-    // logic [3*`NF+5:0]   Sum;        // positive sum
-    // logic [3*`NF+6:0]   PreSum;     // possibly negitive sum
    logic [`NE+1:0]     SumExp;     // exponent of the normalized sum
-    // logic [`NE+1:0]     SumExpTmp;  // exponent of the normalized sum not taking into account denormal or zero results
-    // logic [`NE+1:0]     SumExpTmpMinus1;    // SumExpTmp-1
    logic [`NE+1:0]     FullResultExp;  // ResultExp with bits to determine sign and overflow
    logic [`NF+2:0]     NormSum;        // normalized sum
-    // logic [3*`NF+5:0]   SumShifted; // sum shifted for normalization
-    // logic [8:0]         NormCnt;    // output of the leading zero detector //***change this later
    logic               NormSumSticky;  // sticky bit calulated from the normalized sum
    logic               SumZero;        // is the sum zero
-    // logic               NegSum;     // is the sum negitive
-    // logic               InvZ;       // invert Z if there is a subtraction (-product + Z or product - Z)
    logic               ResultDenorm;   // is the result denormalized
    logic               Sticky, UfSticky;           // Sticky bit
-    logic               Plus1, Minus1, CalcPlus1, CalcMinus1;   // do you add or subtract one for rounding
-    logic               UfPlus1, UfCalcPlus1;  // do you add one (for determining underflow flag)
-    logic               Invalid,Underflow,Overflow,Inexact; // flags
-    // logic [8:0]         DenormShift;    // right shift if the result is denormalized //***change this later
-    // logic               SubBySmallNum;  // was there supposed to be a subtraction by a small number
-    logic [`FLEN-1:0]    Addend;     // value to add (Z or zero)
+    logic               Plus1, Minus1, CalcPlus1;   // do you add or subtract one for rounding
+    logic               UfPlus1;                    // do you add one (for determining underflow flag)
+    logic               Invalid,Underflow,Overflow; // flags
    logic               ZeroSgn;        // the result's sign if the sum is zero
    logic               ResultSgnTmp;   // the result's sign assuming the result is not zero
-    logic           Guard, Round, LSBNormSum;   // bits needed to determine rounding
-    logic           UfGuard, UfRound, UfLSBNormSum;   // bits needed to determine rounding for underflow flag
-    // logic [`NE+1:0]    MaxExp;     // maximum value of the exponent
-    // logic [`NE+1:0]    FracLen;    // length of the fraction
-    logic           SigNaN;     // is an input a signaling NaN
-    logic           UnderflowFlag;  // Underflow singal used in FMAFlgM (used to avoid a circular depencency)
+    logic               Guard, Round;   // bits needed to determine rounding
+    logic               UfRound, UfLSBNormSum;   // bits needed to determine rounding for underflow flag
    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
-    //logic           ZSgnEffM;
   
    


-            // ///////////////////////////////////////////////////////////////////////////////
-            // // Addition
-            // ///////////////////////////////////////////////////////////////////////////////

-            // // Negate Z  when doing one of the following opperations:
-            // //      -prod +  Z
-            // //       prod -  Z
-            // assign ZSgnEffM = ZSgnM^FOpCtrlM[0]; // Swap sign of Z for subtract
-            // assign InvZ = ZSgnEffM ^ PSgn;
+    ///////////////////////////////////////////////////////////////////////////////
+    // Normalization
+    ///////////////////////////////////////////////////////////////////////////////

-            // // Choose an inverted or non-inverted addend - the one is added later
-            // assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
-            // // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
-            // assign ProdMan2 = KillProdM ? 0 : ProdManM;
-
-            // // Do the addition
-            // //      - add one to negate if the added was inverted
-            // //      - the 2 extra bits at the begining and end are needed for rounding
-            // assign PreSum = AlignedAddend2 + {ProdMan2, 2'b0} + InvZ;
-            
-            // // Is the sum negitive
-            // assign NegSum = PreSum[3*`NF+6];
-            // // If the sum is negitive, negate the sum.
-            // assign Sum = NegSum ? -PreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
-
-
-
-
-            // ///////////////////////////////////////////////////////////////////////////////
-            // // Normalization
-            // ///////////////////////////////////////////////////////////////////////////////
-
-            // // Determine if the sum is zero
-            // assign SumZero = ~(|Sum);
-
-            // // determine the length of the fraction based on precision
-            // assign FracLen = FmtM ? `NF : 13'd23;
-            // //assign FracLen = `NF;
-
-            // // Determine if the result is denormal
-            // logic [`NE+1:0] SumExpTmpTmp;
-            // assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} - (`NF+4));
-            // assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
-
-            // assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
-
-            // // Determine the shift needed for denormal results
-            // assign SumExpTmpMinus1 = SumExpTmp-1;
-            // assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 0; //*** change this when changing the size of DenormShift also change to an and opperation
-
-            // // Normalize the sum
-            // assign SumShifted = SumZero ? 0 : Sum << NormCnt+DenormShift; //*** fix mux's with constants in them
-            // assign NormSum = SumShifted[3*`NF+5:2*`NF+3];
-            // // Calculate the sticky bit
-            // assign NormSumSticky = FmtM ? (|SumShifted[2*`NF+3:0]) : (|SumShifted[136:0]);
-            // assign Sticky = AddendStickyM | NormSumSticky;
-
-            // // Determine sum's exponent
-            // assign SumExp = SumZero ? 0 : //***again fix mux
-            //                 ResultDenorm ? 0 :
-            //                 SumExpTmp;
    normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
            .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);




-                // ///////////////////////////////////////////////////////////////////////////////
-                // // Rounding
-                // ///////////////////////////////////////////////////////////////////////////////
+    ///////////////////////////////////////////////////////////////////////////////
+    // Rounding
+    ///////////////////////////////////////////////////////////////////////////////

-                // // round to nearest even
-                // //      {Guard, Round, Sticky}
-                // //      0xx - do nothing
-                // //      100 - tie - Plus1 if result is odd  (LSBNormSum = 1)
-                // //          - don't add 1 if a small number was supposed to be subtracted
-                // //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
-                // //      110/111 - Plus1
-
-                // //  round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
-
-                // //  round to -infinity
-                // //          - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
-                // //          - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
-
-                // //  round to infinity
-                // //          - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
-                // //          - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
-
-                // //  round to nearest max magnitude
-                // //      {Guard, Round, Sticky}
-                // //      0xx - do nothing
-                // //      100 - tie - Plus1
-                // //          - don't add 1 if a small number was supposed to be subtracted
-                // //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
-                // //      110/111 - Plus1
-
-                // // determine guard, round, and least significant bit of the result
-                // assign Guard = FmtM ? NormSum[2] : NormSum[31];
-                // assign Round = FmtM ? NormSum[1] : NormSum[30];
-                // assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32];
-
-                // // used to determine underflow flag
-                // assign UfGuard = FmtM ? NormSum[1] : NormSum[30];
-                // assign UfRound = FmtM ? NormSum[0] : NormSum[29];
-                // assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31];
-
-                // // Deterimine if a small number was supposed to be subtrated
-                // assign SubBySmallNum = AddendStickyM & InvZ & ~(NormSumSticky) & ~ZZeroM;
-
-                // always_comb begin
-                //     // Determine if you add 1
-                //     case (FrmM)
-                //         3'b000: CalcPlus1 = Guard & (Round | ((Sticky|UfRound)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfRound)&LSBNormSum&~SubBySmallNum));//round to nearest even
-                //         3'b001: CalcPlus1 = 0;//round to zero
-                //         3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down
-                //         3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up
-                //         3'b100: CalcPlus1 = (Guard & (Round | ((Sticky|UfRound)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfRound)&~SubBySmallNum)));//round to nearest max magnitude
-                //         default: CalcPlus1 = 1'bx;
-                //     endcase
-                //     // Determine if you add 1 (for underflow flag)
-                //     case (FrmM)
-                //         3'b000: UfCalcPlus1 = UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&UfLSBNormSum&~SubBySmallNum));//round to nearest even
-                //         3'b001: UfCalcPlus1 = 0;//round to zero
-                //         3'b010: UfCalcPlus1 = ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round down
-                //         3'b011: UfCalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round up
-                //         3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&~SubBySmallNum)));//round to nearest max magnitude
-                //         default: UfCalcPlus1 = 1'bx;
-                //     endcase
-                //     // Determine if you subtract 1
-                //     case (FrmM)
-                //         3'b000: CalcMinus1 = 0;//round to nearest even
-                //         3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
-                //         3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down
-                //         3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up
-                //         3'b100: CalcMinus1 = 0;//round to nearest max magnitude
-                //         default: CalcMinus1 = 1'bx;
-                //     endcase
-            
-                // end
-
-                // // If an answer is exact don't round
-                // assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
-                // assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard | UfRound);
-                // assign Minus1 = CalcMinus1 & (Sticky | UfGuard | Guard | Round);
-
-                // // Compute rounded result
-                // logic [`FLEN:0] RoundAdd; //*** move this up 
-                // logic [`NF-1:0] NormSumTruncated;
-                // assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
-                //                          Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
-                // assign NormSumTruncated = FmtM ? NormSum[`NF+2:3] : {NormSum[54:32], 29'b0};
-
-                // assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
-                // assign ResultExp = FullResultExp[`NE-1:0];
+    // round to nearest even
+    // round to zero
+    // round to -infinity
+    // round to infinity
+    // round to nearest max magnitude

    fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgn, .SumExp,
        .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfRound, .UfLSBNormSum);
@ -467,38 +268,9 @@ module fma2(



-        // ///////////////////////////////////////////////////////////////////////////////
-        // // Flags
-        // ///////////////////////////////////////////////////////////////////////////////
-
-
-
-        // // Set Invalid flag for following cases:
-        // //   1) any input is a signaling NaN
-        // //   2) Inf - Inf (unless x or y is NaN)
-        // //   3) 0 * Inf
-
-        // assign MaxExp = FmtM ? {`NE{1'b1}} : {8{1'b1}};
-        // assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
-        // assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);  
-
-        // // Set Overflow flag if the number is too big to be represented
-        // //      - Don't set the overflow flag if an overflowed result isn't outputed
-        // assign Overflow = FullResultExp >= {MaxExp} & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-
-        // // Set Underflow flag if the number is too small to be represented in normal numbers
-        // //      - Don't set the underflow flag if the result is exact
-
-        // assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky|UfRound)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-        // assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-        // // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
-        // //      - Don't set the underflow flag if an underflowed result isn't outputed
-        // assign Inexact = (Sticky|UfRound|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
-
-        // // Combine flags
-        // //      - FMA can't set the Divide by zero flag
-        // //      - Don't set the underflow flag if the result was rounded up to a normal number
-        // assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
+    ///////////////////////////////////////////////////////////////////////////////
+    // Flags
+    ///////////////////////////////////////////////////////////////////////////////

    fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
        .XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfRound, .UfLSBNormSum, .Sticky, .UfPlus1,
@ -523,7 +295,7 @@ module fma2(
    assign FMAResM = XNaNM ? XNaNResult :
                        YNaNM ? YNaNResult :
                        ZNaNM ? ZNaNResult :
-                        Invalid ? InvalidResult : // has to be before inf
+                        Invalid ? InvalidResult :
                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
@ -537,6 +309,11 @@ module fma2(

 endmodule

+
+
+
+
+
 module mult(
    input logic [`NF:0] XManE, YManE,
    output logic [2*`NF+1:0] ProdManE
@ -544,22 +321,26 @@ module mult(
    assign ProdManE = XManE * YManE;
 endmodule

+
+
+
+
 module alignshift(
    input logic  [`NE-1:0]      ZExpE,      // biased exponents in B(NE.0) format
    input logic  [`NF:0]        ZManE,      // fractions in U(0.NF) format]
    input logic                 ZDenormE,   // is the input denormal
    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
-    input logic [`NE+1:0] ProdExpE,
-    input logic [`NE-1:0] Denorm,
-    output logic [3*`NF+5:0] AlignedAddendE,
-    output logic AddendStickyE,
-    output logic KillProdE
+    input logic  [`NE+1:0]      ProdExpE,       // the product's exponent
+    input logic  [`NE-1:0]      Denorm,         // the biased value of a denormalized number
+    output logic [3*`NF+5:0]    AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
+    output logic                AddendStickyE,  // Sticky bit calculated from the aliged addend
+    output logic                KillProdE       // should the product be set to zero
 );

    logic [`NE+1:0]     AlignCnt;           // how far to shift the addend to align with the product in Q(NE+2.0) format
    logic [4*`NF+5:0]   ZManShifted;        // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
    logic [4*`NF+5:0]   ZManPreShifted;     // input to the alignment shifter U(NF+5.3NF+1)
-    logic [`NE-1:0]   DenormZExp;
+    logic [`NE-1:0]     ZExpVal;            // Exponent value after taking into account denormals

    ///////////////////////////////////////////////////////////////////////////////
    // Alignment shifter
@ -568,14 +349,13 @@ module alignshift(
    // determine the shift count for alignment
    //      - negitive means Z is larger, so shift Z left
    //      - positive means the product is larger, so shift Z right
-    //      - Denormal numbers have an an exponent value of 1, however they are
-    //        represented with an exponent of 0. add one to the exponent if it is a denormal number
-    assign DenormZExp = ZDenormE ? Denorm : ZExpE;
-    assign AlignCnt = ProdExpE - DenormZExp + (`NF+3);
+    //      - Denormal numbers have a diffrent exponent value depending on the precision
+    assign ZExpVal = ZDenormE ? Denorm : ZExpE;
+    assign AlignCnt = ProdExpE - ZExpVal + (`NF+3);

    // Defualt Addition without shifting
    //          |   54'b0    |  106'b(product)  | 2'b0 |
-    //                       |1'b0| addnend |
+    //          | addnend |

    // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
    assign ZManPreShifted = {ZManE,(3*`NF+5)'(0)};
@ -588,20 +368,10 @@ module alignshift(
        //  | addnend |
        if ($signed(AlignCnt) < $signed(0)) begin
            KillProdE = 1;
-            ZManShifted = ZManPreShifted;//{107'b0, XManE, 54'b0};
+            ZManShifted = ZManPreShifted;
            AddendStickyE = ~(XZeroE|YZeroE);

-        // // If the Addend is shifted left (negitive AlignCnt)
-
-        // //          |   54'b0    |  106'b(product)  | 2'b0 |
-        // //                  | addnend |
-        // end else if($signed(AlignCnt) <= $signed(0))  begin
-        //     KillProdE = 0;
-        //     ZManShifted = ZManPreShifted << -AlignCnt;
-        //     AddendStickyE = |(ZManShifted[`NF-1:0]);
-
-        // If the Addend is shifted right (positive AlignCnt)
-
+        // If the Addend is shifted right
        //          |   54'b0    |  106'b(product)  | 2'b0 |
        //                                  | addnend |
        end else if ($signed(AlignCnt)<=$signed(3*`NF+4))  begin
@ -622,25 +392,27 @@ module alignshift(

        end
    end
+
    assign AlignedAddendE = ZManShifted[4*`NF+5:`NF];
+
 endmodule

 module fmaadd(
-    input logic     [3*`NF+5:0]     AlignedAddendE, // Z aligned for addition
-    input logic [2*`NF+1:0] ProdManE,
-    input logic PSgnE, ZSgnEffE,
-    input logic KillProdE,
-    input logic XZeroE, YZeroE,
-    output logic [3*`NF+5:0]   SumE,
-    output logic NegSumE,
-    output logic InvZE,
-    output logic [8:0] NormCntE
+    input logic  [3*`NF+5:0]    AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1)
+    input logic  [2*`NF+1:0]    ProdManE,       // the product's mantissa
+    input logic                 PSgnE, ZSgnEffE,// the product and modified Z signs
+    input logic                 KillProdE,      // should the product be set to 0
+    input logic                 XZeroE, YZeroE, // is the input zero
+    output logic [3*`NF+5:0]    SumE,           // the positive sum
+    output logic                NegSumE,        // was the sum negitive
+    output logic                InvZE,          // do you invert Z
+    output logic [8:0]          NormCntE        // normalization shift count
 );
    logic [3*`NF+6:0]   PreSum, NegPreSum;  // possibly negitive sum
    logic [2*`NF+1:0]   ProdMan2;           // product being added
    logic [3*`NF+6:0]   AlignedAddend2;     // possibly inverted aligned Z
-    logic [3*`NF+6:0]   NegProdMan2;
-    logic [8:0] PNormCnt, NNormCnt;
+    logic [3*`NF+6:0]   NegProdMan2;        // a negated ProdMan2
+    logic [8:0]         PNormCnt, NNormCnt; // results from the LZA

    ///////////////////////////////////////////////////////////////////////////////
    // Addition
@ -651,83 +423,46 @@ module fmaadd(
    //       prod -  Z
    assign InvZE = ZSgnEffE ^ PSgnE;

-    // Choose an inverted or non-inverted addend - the one is added later
+    // Choose an inverted or non-inverted addend - the one has to be added now for the LZA
    assign AlignedAddend2 = InvZE ? -{1'b0, AlignedAddendE} : {1'b0, AlignedAddendE};
    // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
    assign ProdMan2 = ProdManE&{2*`NF+2{~KillProdE}};
+    // Negate ProdMan for LZA and the negitive sum calculation
    assign NegProdMan2 = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, -ProdMan2, 2'b0};
+
+    // LZAs one for the positive result and one for the negitive
+    //      - the +1 from inverting causes problems for normalization
    poslza poslza(AlignedAddend2, ProdMan2, PNormCnt);
    neglza neglza({1'b0,AlignedAddendE}, NegProdMan2, NNormCnt);
+
+
    // Do the addition
-    //      - add one to negate if the added was inverted
-    //      - the 2 extra bits at the begining and end are needed for rounding
+    //      - calculate a positive and negitive sum in parallel
    assign PreSum = AlignedAddend2 + {ProdMan2, 2'b0};
    assign NegPreSum = AlignedAddendE + NegProdMan2;
     
    // Is the sum negitive
    assign NegSumE = PreSum[3*`NF+6];
-    // If the sum is negitive, negate the sum.
+    // Choose the positive sum and accompanying LZA result.
    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
    assign NormCntE = NegSumE ? NNormCnt : PNormCnt;
-// set to PNormCnt if the product is zero (there may be an additional bit of error from the negation)

 endmodule

-// module fmalzc(
-//     input logic [3*`NF+5:0]   Sum,
-//     output logic [8:0] NormCntCheck
-// );

-//     ///////////////////////////////////////////////////////////////////////////////
-//     // Leading one detector
-//     ///////////////////////////////////////////////////////////////////////////////

-//     //*** replace with non-behavoral code
-//     logic [8:0] i;
-//     always_comb begin
-//             i = 0;
-//             while (~Sum[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1;  // search for leading one
-//             NormCntCheck = i;
-//     end

-// endmodule
-////////////////////////////////////////////////////////////////////////////////////
-//	Filename: 	lza.v
-//	Author:		Katherine Parry
-//  Date:		2021/02/07
-//
-// Description:  Leading Zero Anticipator
-// This a the Kershaw Leading Zero Anticipator(LZA) using the algorithm described in
-// "Leading Zero Anticipation and Dectection - A Comparison of Methods" (2001)
-//    Schmookler and Nowka.
-// After swapping, alignment and inversion of A & B, the following functions are 
-// applied to all 'i' bits. 
-//   -- T[i] =   A[i] XOR B[i];  // Propagation that will occur
-//   -- G[i] =   A[i] AND B[i];  // The value Generated
-//   -- Z[i] = ~(A[i] OR  B[i]): // Fill functions
-// The leading Zero is determined by the first occurance of the pattern T*GGZ*,
-// whereas Leading ones are found by the pattern T*ZG*
-// To evaluate the pattern we map it to the function that evaluates the three bits 
-//     (current, before, & after): 
-//  f[i] = T[i-1](G[i]~Z[i+1] & ~G[i+1]Z[i]) | ~T[i-1](Z[i]~Z[i+1] & G[i]~G[i+1])
-// 
-////////////////////////////////////////////////////////////////////////////////////

 module poslza(
-    //   parameter SIGNIFICANT_SZ=52;
-    //leading digit anticipator
-    //   localparam sz=SIGNIFICANT_SZ+1;
-    input logic [3*`NF+6:0] A, 
-    input logic [2*`NF+1:0] P,
-    output logic [8:0] PCnt
+    input logic  [3*`NF+6:0] A,     // addend
+    input logic  [2*`NF+1:0] P,     // product
+    output logic [8:0]       PCnt   // normalization shift count for the positive result
    ); 
    
-    // Compute Generate, Propageate and Kill for each bit

+    // calculate the propagate (T) and kill (Z) bits
    logic [3*`NF+6:0] T;
    logic [3*`NF+5:0] Z;
-    // assign T = A^{{`NF+3{1'b0}}, P, 2'b0};
-    // assign Z = ~(A|{{`NF+3{1'b0}}, P, 2'b0});
    assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4];
    assign Z[3*`NF+5:2*`NF+4] = A[3*`NF+5:2*`NF+4];
    assign T[2*`NF+3:2] = A[2*`NF+3:2]^P;
@ -739,7 +474,6 @@ module poslza(
    // Apply function to determine Leading pattern
    logic [3*`NF+6:0] pf;
    assign pf = T^{Z[3*`NF+5:0], 1'b0};
-    // assign pf = T^{~Z[3*`NF+5:0], 1'b0};

    logic [8:0] i;
    always_comb begin
@ -751,16 +485,12 @@ module poslza(
 endmodule

 module neglza(
-    //   parameter SIGNIFICANT_SZ=52;
-    //leading digit anticipator
-    //   localparam sz=SIGNIFICANT_SZ+1;
-    input logic [3*`NF+6:0] A, 
-    input logic [3*`NF+6:0] P,
-    output logic [8:0] NCnt
+    input logic  [3*`NF+6:0]    A,      // addend
+    input logic  [3*`NF+6:0]    P,      // product
+    output logic [8:0]          NCnt    // normalization shift count for the negitive result
    ); 
    
-    // Compute Generate, Propageate and Kill for each bit
-    
+    // calculate the propagate (T) and kill (Z) bits
    logic [3*`NF+6:0] T;
    logic [3*`NF+5:0] Z;
    assign T = A^P;
@ -783,28 +513,27 @@ endmodule


 module normalize(
-    input logic [3*`NF+5:0]   SumM,
-    input logic [`NE-1:0] ZExpM,
+    input logic  [3*`NF+5:0]    SumM,       // the positive sum
+    input logic  [`NE-1:0]      ZExpM,      // exponent of Z
    input logic  [`NE+1:0]      ProdExpM,   // X exponent + Y exponent - bias
-    input logic [8:0] NormCntM,
+    input logic  [8:0]          NormCntM,   // normalization shift count
    input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic KillProdM,
-    input logic AddendStickyM,
+    input logic                 KillProdM,  // is the product set to zero
+    input logic                 AddendStickyM,  // the sticky bit caclulated from the aligned addend
    output logic [`NF+2:0]      NormSum,        // normalized sum
-    output logic SumZero,
-    output logic NormSumSticky, UfSticky,
+    output logic                SumZero,        // is the sum zero
+    output logic                NormSumSticky, UfSticky,    // sticky bits
    output logic [`NE+1:0]      SumExp,         // exponent of the normalized sum
-    output logic ResultDenorm
+    output logic                ResultDenorm    // is the result denormalized
 );
    logic [`NE+1:0]     FracLen;            // length of the fraction
    logic [`NE+1:0]     SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
-    logic [`NE+1:0]     SumExpTmpMinus1;    // SumExpTmp-1
    logic [8:0]         DenormShift;        // right shift if the result is denormalized //***change this later
-    logic [3*`NF+5:0]   SumShifted; // sum shifted for normalization
-    logic [3*`NF+7:0]   SumShiftedTmp; // sum shifted for normalization
-    logic [`NE+1:0] SumExpTmpTmp;
-    logic PreResultDenorm;
-    logic LZAPlus1;
+    logic [3*`NF+5:0]   CorrSumShifted;     // the shifted sum after LZA correction
+    logic [3*`NF+7:0]   SumShifted;         // the shifted sum before LZA correction
+    logic [`NE+1:0]     SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
+    logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
+    logic               LZAPlus1;           // add one to the sum's exponent due to LZA correction

    ///////////////////////////////////////////////////////////////////////////////
    // Normalization
@ -815,87 +544,58 @@ module normalize(

    // determine the length of the fraction based on precision
    assign FracLen = FmtM ? `NF+1 : 13'd24;
-    //assign FracLen = `NF;

-    // Determine if the result is denormal
+    // calculate the sum's exponent
    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};

+    // Determine if the result is denormal
    assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;

    // Determine the shift needed for denormal results
    //  - if not denorm add 1 to shift out the leading 1
    assign DenormShift = PreResultDenorm ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
    // Normalize the sum
-    assign SumShiftedTmp = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
+    assign SumShifted = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified
    // LZA correction
-    assign LZAPlus1 = SumShiftedTmp[3*`NF+7];
-    assign SumShifted =  LZAPlus1 ? SumShiftedTmp[3*`NF+6:1] : SumShiftedTmp[3*`NF+5:0];
-    assign NormSum = SumShifted[3*`NF+5:2*`NF+3];
+    assign LZAPlus1 = SumShifted[3*`NF+7];
+    assign CorrSumShifted =  LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
+    assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
    // Calculate the sticky bit
-    assign NormSumSticky = (|SumShifted[2*`NF+2:0]) | (|SumShifted[136:2*`NF+3]&~FmtM);
+    assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM);
    assign UfSticky = AddendStickyM | NormSumSticky;

    // Determine sum's exponent
-    assign SumExp = (SumExpTmp+LZAPlus1+(~|SumExpTmp&SumShiftedTmp[3*`NF+6])) & {`NE+2{~(SumZero|ResultDenorm)}};
+    assign SumExp = (SumExpTmp+LZAPlus1+(~|SumExpTmp&SumShifted[3*`NF+6])) & {`NE+2{~(SumZero|ResultDenorm)}};
    // recalculate if the result is denormalized
-assign ResultDenorm = PreResultDenorm&~SumShiftedTmp[3*`NF+6]&~SumShiftedTmp[3*`NF+7];
-                 
-    // // Determine if the sum is zero
-    // assign SumZero = ~(|Sum);
-
-    // // determine the length of the fraction based on precision
-    // assign FracLen = FmtM ? `NF : 13'd23;
-    // //assign FracLen = `NF;
-
-    // // Determine if the result is denormal
-    // assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCnt} + 1 - (`NF+4));
-    // assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
-
-    // assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
-
-    // // Determine the shift needed for denormal results
-    // //  - if not denorm add 1 to shift out the leading 1
-    // assign DenormShift = ResultDenorm ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation
-
-    // // Normalize the sum
-    // assign SumShifted = SumZero ? 0 : Sum << NormCnt+DenormShift; //*** fix mux's with constants in them
-    // assign NormSum = SumShifted[3*`NF+5:2*`NF+3];
-    // // Calculate the sticky bit
-    // assign NormSumSticky = FmtM ? (|SumShifted[2*`NF+2:0]) : (|SumShifted[136:0]);
-    // assign UfSticky = AddendStickyM | NormSumSticky;
-
-    // // Determine sum's exponent
-    // assign SumExp = SumZero ? 0 : //***again fix mux
-    //              ResultDenorm ? 0 :
-    //              SumExpTmp;
+    assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];

 endmodule

 module fmaround(
    input logic             FmtM,       // precision 1 = double 0 = single
-    input logic [2:0] FrmM,
-    input logic UfSticky,
-    output logic Sticky,
+    input logic  [2:0]      FrmM,       // rounding mode
+    input logic             UfSticky,   // sticky bit for underlow calculation
    input logic  [`NF+2:0]  NormSum,    // normalized sum
-    input logic AddendStickyM,
-    input logic NormSumSticky,
-    input logic ZZeroM,
-    input logic InvZM,
+    input logic             AddendStickyM,  // addend's sticky bit
+    input logic             NormSumSticky,  // normalized sum's sticky bit
+    input logic             ZZeroM,         // is Z zero
+    input logic             InvZM,          // invert Z
    input logic  [`NE+1:0]  SumExp,         // exponent of the normalized sum
-    input logic ResultSgn,
-    output logic CalcPlus1, Plus1, UfPlus1, Minus1,
+    input logic             ResultSgn,      // the result's sign
+    output logic            CalcPlus1, Plus1, UfPlus1, Minus1,  // do you add or subtract on from the result
    output logic [`NE+1:0]  FullResultExp,      // ResultExp with bits to determine sign and overflow
    output logic [`NF-1:0]  ResultFrac,         // Result fraction
    output logic [`NE-1:0]  ResultExp,          // Result exponent
-    output logic Round, Guard, UfRound, UfLSBNormSum
+    output logic            Sticky,             // sticky bit
+    output logic            Round, Guard, UfRound, UfLSBNormSum // bits needed to calculate rounding
 );
-    logic LSBNormSum;
+    logic           LSBNormSum;         // bit used for rounding - least significant bit of the normalized sum
    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
-    logic UfGuard;
-    logic UfCalcPlus1, CalcMinus1;
-    logic [`FLEN:0] RoundAdd; //*** move this up 
-    logic [`NF-1:0] NormSumTruncated;
+    logic           UfGuard;            // gaurd bit used to caluculate underflow
+    logic           UfCalcPlus1, CalcMinus1;    // do you add or subtract on from the result
+    logic [`FLEN:0] RoundAdd;           // how much to add to the result
+    logic [`NF-1:0] NormSumTruncated;   // the normalized sum trimed to fit the mantissa

    ///////////////////////////////////////////////////////////////////////////////
    // Rounding
@ -997,15 +697,15 @@ module fmaflags(
    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
    input logic  [`NE+1:0]      FullResultExp,          // ResultExp with bits to determine sign and overflow
    input logic  [`NE+1:0]      SumExp,                 // exponent of the normalized sum
-    input logic ZSgnEffM, PSgnM,
-    input logic Round, Guard, UfRound, UfLSBNormSum, Sticky, UfPlus1,
+    input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
+    input logic                 Round, Guard, UfRound, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
    input logic                 FmtM,                   // precision 1 = double 0 = single
-    output logic Invalid, Overflow, Underflow,
-    output logic [4:0] FMAFlgM
+    output logic                Invalid, Overflow, Underflow, // flags used to select the result
+    output logic [4:0]          FMAFlgM // FMA flags
 );
    logic [`NE+1:0]     MaxExp;     // maximum value of the exponent
-    logic SigNaN;
-    logic UnderflowFlag, Inexact;
+    logic               SigNaN;     // is an input a signaling NaN
+    logic               UnderflowFlag, Inexact; // flags

    ///////////////////////////////////////////////////////////////////////////////
    // Flags
--- a/wally-pipelined/src/fpu/fpu.sv
+++ b/wally-pipelined/src/fpu/fpu.sv
@ -225,7 +225,7 @@ module fpu (
 		 .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, 
     .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, 
     .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
-		 .FOpCtrlE, .FOpCtrlM, 
+		 .FOpCtrlE,
 		 .FmtE, .FmtM, .FrmM, 
     // outputs:
     .FMAFlgM, .FMAResM);
@ -257,19 +257,7 @@ module fpu (
                   // outputs:
 			             .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
 	
-	
-	// add/FP <-> FP convert
-  //    - computation is done in two stages
-  //    - contains some E/M pipleine registers
-  //*** remove uneeded logic
-  //*** change to use the unpacking unit if possible
-// 	faddcvt faddcvt (.clk, .reset, .FlushM, .StallM, .FrmM, .FOpCtrlM, .FmtE, .FmtM, .FSrcXE, .FSrcYE, .FOpCtrlE, 
-//    .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM,
-//    .XSgnE, .YSgnE, .XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .XNormM, .YNormM,  .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE,
-//                   // outputs:
-//                   .CvtFpResM, .CvtFpFlgM);
-
-
+	// convert from signle to double and vice versa
 	cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE);
 	
 	// compare unit
--- a/wally-pipelined/src/mmu/hptw.sv
+++ b/wally-pipelined/src/mmu/hptw.sv
@ -146,6 +146,7 @@ module hptw
 	  if (`XLEN == 32) begin
 		assign InitialWalkerState = L1_ADR;
 		assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
+	     // *** Possible bug - should be L1_ADR?
 		assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
 	  end else begin
 		logic  GigapageMisaligned, TerapageMisaligned;
--- a/wally-pipelined/src/privileged/csri.sv
+++ b/wally-pipelined/src/privileged/csri.sv
@ -78,7 +78,7 @@ module csri #(parameter
      assign MIP_WRITE_MASK = 12'h000;
      assign SIP_WRITE_MASK = 12'h000;
    end
-    always @(posedge clk, posedge reset) begin
+    always @(posedge clk, posedge reset) begin // *** I strongly feel that IntInM should go directly to IP_REGW -- Ben 9/7/21
      if (reset)          IP_REGW_writeable <= 10'b0;
      else if (WriteMIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & MIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable
      else if (WriteSIPM) IP_REGW_writeable <= (CSRWriteValM[9:0] & SIP_WRITE_MASK[9:0]) | IntInM[9:0]; // MTIP unclearable
--- a/wally-pipelined/src/privileged/trap.sv
+++ b/wally-pipelined/src/privileged/trap.sv
@ -63,7 +63,7 @@ module trap (
  // & with a M stage valid bit to avoid interrupts from interrupt a nonexistent flushed instruction (in the M stage)
  // & with ~CommittedM to make sure MEPC isn't chosen so as to rerun the same instr twice
  assign MIntGlobalEnM = (PrivilegeModeW != `M_MODE) || STATUS_MIE; // if M ints enabled or lower priv 3.1.9
-  assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) || STATUS_SIE; // if S ints enabled or lower priv 3.1.9
+  assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) || ((PrivilegeModeW == `S_MODE) && STATUS_SIE); // if in lower priv mode, or if S ints enabled and not in higher priv mode 3.1.9
  assign PendingIntsM = ((MIP_REGW & MIE_REGW) & ({12{MIntGlobalEnM}} & 12'h888)) | ((SIP_REGW & SIE_REGW) & ({12{SIntGlobalEnM}} & 12'h222));
  assign PendingInterruptM = (|PendingIntsM) & InstrValidM;  
  assign InterruptM = PendingInterruptM & ~CommittedM;
--- a/wally-pipelined/testbench/testbench-linux.sv
+++ b/wally-pipelined/testbench/testbench-linux.sv
@ -30,8 +30,7 @@

 module testbench();
  
-  parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*3080000; // # of instructions at which to turn on waves in graphical sim
-  parameter stopICount   = `BUSYBEAR*143898 + `BUILDROOT*0000000; // # instructions at which to halt sim completely (set to 0 to let it run as far as it can)  
+  parameter waveOnICount = `BUSYBEAR*140000 + `BUILDROOT*6779000; // # of instructions at which to turn on waves in graphical sim

  string ProgramAddrMapFile, ProgramLabelMapFile;

@ -72,9 +71,9 @@ module testbench();
  // Signal Declarations
  // -------------------
  // Testbench Core
-  integer instrs;
  integer warningCount = 0;
-  // PC, Instr Checking
+  integer errorCount = 0;
+  // P, Instr Checking
  logic [`XLEN-1:0] PCW;
  integer data_file_all;

@ -127,6 +126,7 @@ module testbench();
  string            ExpectedCSRArrayW[10:0];
  logic [`XLEN-1:0] ExpectedCSRArrayValueW[10:0];
  logic [`XLEN-1:0] ExpectedIntType;
+  logic             forcedInterrupt;
  integer           NumCSRMIndex;
  integer           NumCSRWIndex;
  integer           NumCSRPostWIndex;    
@ -137,12 +137,16 @@ module testbench();
  // Error Macro
  // -----------
  `define ERROR \
-    $display("processed %0d instructions with %0d warnings", instrs, warningCount); \
+    errorCount +=1; \
+    $display("processed %0d instructions with %0d warnings", InstrCountW, warningCount); \
    $stop;

  initial begin
    data_file_all = $fopen({`LINUX_TEST_VECTORS,"all.txt"}, "r");
    InstrCountW = '0;
+    force dut.hart.priv.SwIntM = 0;
+    force dut.hart.priv.TimerIntM = 0;
+    force dut.hart.priv.ExtIntM = 0;    
  end

 /* -----\/----- EXCLUDED -----\/-----
@ -160,7 +164,7 @@ module testbench();
  flopenrc #(`XLEN) PCWReg(clk, reset, dut.hart.FlushW, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW);
  flopenr #(1) TrapWReg(clk, reset, ~dut.hart.StallW, dut.hart.hzu.TrapM, TrapW);

-  // because qemu does not match exactly to wally it is necessary to read the the
+  // Because qemu does not match exactly to wally it is necessary to read the the
  // trace in the memory stage and detect if anything in wally must be overwritten.
  // This includes mtimer, interrupts, and various bits in mstatus and xtval.

@ -242,32 +246,30 @@ module testbench();
          if(ExpectedCSRArrayM[NumCSRM].substr(1, 5) == "cause" && (ExpectedCSRArrayValueM[NumCSRM][`XLEN-1] == 1'b1)) begin
            //what type?
            ExpectedIntType = ExpectedCSRArrayValueM[NumCSRM] & 64'h0000_000C;
-	    $display("%t: CSR = %s. Forcing interrupt of cause = %x", $time, ExpectedCSRArrayM[NumCSRM], ExpectedCSRArrayValueM[NumCSRM]);
-	    
+            $display("%tns, %d instrs: CSR = %s. Forcing interrupt of cause = %x", $time, InstrCountW, ExpectedCSRArrayM[NumCSRM], ExpectedCSRArrayValueM[NumCSRM]);
+            forcedInterrupt = 1;
            if(ExpectedIntType == 0) begin
              force dut.hart.priv.SwIntM = 1'b1;
-	      $display("Force SwIntM");
-	    end
-	    else if(ExpectedIntType == 4) begin
+              $display("Activate spoofed SwIntM");
+            end else if(ExpectedIntType == 4) begin
              force dut.hart.priv.TimerIntM = 1'b1;
-	      $display("Force TimeIntM");
-	    end
-	    else if(ExpectedIntType == 8) begin
+              $display("Activate spoofed TimeIntM");
+            end else if(ExpectedIntType == 8) begin
              force dut.hart.priv.ExtIntM = 1'b1;
-	      $display("Force ExtIntM");
-	    end
+              $display("Activate spoofed ExtIntM");
+            end else forcedInterrupt = 0;
          end      
          NumCSRM++;      
        end
      end
      // override on special conditions
      if (ExpectedMemAdrM == 'h10000005) begin
-	//$display("%t: Overwriting read data from CLINT.", $time);
+        //$display("%tns, %d instrs: Overwriting read data from CLINT.", $time, InstrCountW);
        force dut.hart.ieu.dp.ReadDataM = ExpectedMemReadDataM;
      end
      if(textM.substr(0,5) == "rdtime") begin
-	$display("%t: Overwrite read value of CSR on read of MTIME in memory stage.", $time);
-        force dut.hart.priv.csr.CSRReadValM = ExpectedRegValueM;
+        $display("%tns, %d instrs: Overwrite MTIME_CLINT on read of MTIME in memory stage.", $time, InstrCountW);
+        force dut.uncore.clint.clint.MTIME = ExpectedRegValueM;
        //dut.hart.ieu.dp.regf.wd3
      end

@ -288,8 +290,7 @@ module testbench();
      ExpectedMemWriteDataW <= '0;
      ExpectedMemReadDataW <= '0;
      NumCSRW <= '0;
-    end
-    else if(~dut.hart.StallW) begin
+    end else if(~dut.hart.StallW) begin
      if(dut.hart.FlushW) begin
        ExpectedPCW <= '0;
        ExpectedInstrW <= '0;
@ -324,26 +325,31 @@ module testbench();


      if(~dut.hart.StallW) begin
-	if(textM.substr(0,5) == "rdtime") begin
-	  $display("%t:Releasing force of CSRReadValM.", $time);
-          release dut.hart.priv.csr.CSRReadValM;
+        if(textW.substr(0,5) == "rdtime") begin
+          $display("%tns, %d instrs: Releasing force of MTIME_CLINT.", $time, InstrCountW);
+          release dut.uncore.clint.clint.MTIME;
          //release dut.hart.ieu.dp.regf.wd3;
        end
        
        if (ExpectedMemAdrM == 'h10000005) begin
-	  //$display("%t: releasing force of ReadDataM.", $time);
+          //$display("%tns, %d instrs: releasing force of ReadDataM.", $time, InstrCountW);
              release dut.hart.ieu.dp.ReadDataM;
        end

-	// remove forces on interrupts
-	for(NumCSRMIndex = 0; NumCSRMIndex < NumCSRM; NumCSRMIndex++) begin
-	  if(ExpectedCSRArrayM[NumCSRMIndex].substr(1, 5) == "cause" && (ExpectedCSRArrayValueM[NumCSRMIndex][`XLEN-1] == 1'b1)) begin
-	    //what type?
-	    $display("%t: Releasing all forces on interrupts", $time);
-	    
-	    release dut.hart.priv.SwIntM;
-	    release dut.hart.priv.TimerIntM;
-	    release dut.hart.priv.ExtIntM;	    
+        // force interrupts to 0
+        if (forcedInterrupt) begin
+          forcedInterrupt = 0;
+          if(ExpectedIntType == 0) begin
+            force dut.hart.priv.SwIntM = 1'b0;
+            $display("Deactivate spoofed SwIntM");
+          end
+          else if(ExpectedIntType == 4) begin
+            force dut.hart.priv.TimerIntM = 1'b0;
+            $display("Deactivate spoofed TimeIntM");
+          end
+          else if(ExpectedIntType == 8) begin
+            force dut.hart.priv.ExtIntM = 1'b0;
+            $display("Deactivate spoofed ExtIntM");
          end
        end
      end
@ -355,6 +361,8 @@ module testbench();
    // always check PC, instruction bits
    if (checkInstrW) begin
      InstrCountW += 1;
+      // turn on waves at certain point
+      if (InstrCountW == waveOnICount) $stop;
      // check PCW
      fault = 0;
      if(PCW != ExpectedPCW) begin
@ -504,7 +512,6 @@ module testbench();
              if(!`DontHaltOnCSRMisMatch) fault = 1;
            end
          end
-
          "mtval": begin
            if(`DEBUG_TRACE > 0) begin
              $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]);
@ -514,7 +521,6 @@ module testbench();
              if(!`DontHaltOnCSRMisMatch) fault = 1;
            end
          end
-
          "sepc": begin
            if(`DEBUG_TRACE > 0) begin
              $display("CSR: %s = %016x, expected = %016x", ExpectedCSRArrayW[NumCSRPostWIndex], dut.hart.priv.csr.genblk1.csrs.SEPC_REGW, ExpectedCSRArrayValueW[NumCSRPostWIndex]);
@ -552,23 +558,8 @@ module testbench();
            end
          end
        endcase // case (ExpectedCSRArrayW[NumCSRPostWIndex])
-
-/* -----\/----- EXCLUDED -----\/-----
-	if(CurrentInterruptForce) begin
-	  CurrentInterruptForce = 1'b0;
-	  // remove forces on interrupts
-	  $display("%t: Releasing all forces on interrupts", $time);
-	  
-	  release dut.hart.priv.SwIntM;
-	  release dut.hart.priv.TimerIntM;
-	  release dut.hart.priv.ExtIntM;	    
-	end
- -----/\----- EXCLUDED -----/\----- */
-	  
      end // for (NumCSRPostWIndex = 0; NumCSRPostWIndex < NumCSRW; NumCSRPostWIndex++)
-      if (fault == 1) begin
-	`ERROR
-      end
+      if (fault == 1) begin `ERROR end
    end // if (checkInstrW)
  end // always @ (negedge clk)

@ -589,7 +580,6 @@ module testbench();
  // --------------
  initial
    begin
-      instrs = 0;
      reset <= 1; # 22; reset <= 0;
    end
  // initial loading of memories