Merge branch 'main' into bigbadbranch

This commit is contained in:
Ross Thompson 2021-07-02 11:52:26 -05:00
commit dbd33465e1
35 changed files with 1838 additions and 1818 deletions

2
.gitignore vendored
View File

@ -24,6 +24,8 @@ testsBP/*/*/*.elf*
testsBP/*/OBJ/* testsBP/*/OBJ/*
testsBP/*/*.a testsBP/*/*.a
wally-pipelined/linux-testgen/linux-testvectors/* wally-pipelined/linux-testgen/linux-testvectors/*
wally-pipelined/linux-testgen/nohup*
wally-pipelined/linux-testgen/x*
!wally-pipelined/linux-testgen/linux-testvectors/tvCopier.py !wally-pipelined/linux-testgen/linux-testvectors/tvCopier.py
!wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh !wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh
wally-pipelined/regression/slack-notifier/slack-webhook-url.txt wally-pipelined/regression/slack-notifier/slack-webhook-url.txt

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "sky130/sky130_osu_sc_t12"]
path = sky130/sky130_osu_sc_t12
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/

View File

@ -211,26 +211,53 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) {
core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]); core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]);
} }
} }
/*int foreverLoop = 1;
secs_ret timing = 0;
int timingInt;
ee_printf("\nENTERING FOREVER WHILE LOOP\n");
while(foreverLoop == 1)
{
start_time();
//filler
stop_time();
timing += time_in_secs(get_time());
timingInt = (int)timing;
ee_printf("Timing is %d\n", timingInt);
}/*
/* automatically determine number of iterations if not set */ /* automatically determine number of iterations if not set */
if (results[0].iterations==0) { if (results[0].iterations==0) {
secs_ret secs_passed=0; secs_ret secs_passed=0;
ee_u32 divisor; ee_u32 divisor;
results[0].iterations=1; results[0].iterations=1;
int iterationInc = 0;
ee_printf("\n\nENTERING ITERATION WHILE LOOP\n");
while (secs_passed < (secs_ret)1) { while (secs_passed < (secs_ret)1) {
results[0].iterations*=10; if(iterationInc != 0)
{
results[0].iterations++;
}
ee_printf("iterations is %d\n", results[0].iterations);
start_time(); start_time();
iterate(&results[0]); iterate(&results[0]);
stop_time(); stop_time();
secs_passed=time_in_secs(get_time()); secs_passed = time_in_secs(get_time());
int secs_passed_int = (int)secs_passed;
ee_printf("secs passed is %d\n", secs_passed_int);
iterationInc++;
} }
ee_printf("LEAVING ITERATION WHILE LOOP!\n\n");
/* now we know it executes for at least 1 sec, set actual run time at about 10 secs */ /* now we know it executes for at least 1 sec, set actual run time at about 10 secs */
divisor=(ee_u32)secs_passed; divisor=(ee_u32)secs_passed;
ee_printf("divisor is %lu\n", divisor);
if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */ if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */
divisor=1; divisor=1;
results[0].iterations*=1+10/divisor; results[0].iterations*=1+10/divisor;
ee_printf("iterations is %d\n", results[0].iterations);
} }
/* perform actual benchmark */ /* perform actual benchmark */
ee_printf("Starting benchmark\n");
start_time(); start_time();
#if (MULTITHREAD>1) #if (MULTITHREAD>1)
if (default_num_contexts>MULTITHREAD) { if (default_num_contexts>MULTITHREAD) {
@ -249,7 +276,8 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) {
#endif #endif
stop_time(); stop_time();
total_time=get_time(); total_time=get_time();
ee_printf("ending benchmark"); ee_printf("total time is %u\n", total_time);
ee_printf("ending benchmark\n");
/* get a function of the input to report */ /* get a function of the input to report */
seedcrc=crc16(results[0].seed1,seedcrc); seedcrc=crc16(results[0].seed1,seedcrc);
seedcrc=crc16(results[0].seed2,seedcrc); seedcrc=crc16(results[0].seed2,seedcrc);
@ -340,12 +368,17 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) {
for (i=0 ; i<default_num_contexts; i++) for (i=0 ; i<default_num_contexts; i++)
ee_printf("[%d]crcstate : 0x%04x\n",i,results[i].crcstate); ee_printf("[%d]crcstate : 0x%04x\n",i,results[i].crcstate);
for (i=0 ; i<default_num_contexts; i++) for (i=0 ; i<default_num_contexts; i++)
ee_printf("[%d]crcfinal : 0x%04x\"n",i,results[i].crc); ee_printf("[%d]crcfinal : 0x%04x\n",i,results[i].crc);
if (total_errors==0) { if (total_errors==0) {
ee_printf("Correct operation validated. See README.md for run and reporting rules.\n"); ee_printf("Correct operation validated. See README.md for run and reporting rules.\n");
#if HAS_FLOAT #if HAS_FLOAT
if (known_id==3) { if (known_id==3) {
ee_printf("CoreMark 1.0 : %f / %s %s",default_num_contexts*results[0].iterations/time_in_secs(total_time),COMPILER_VERSION,COMPILER_FLAGS); unsigned long long tmp = (unsigned long long) 1000.0*default_num_contexts*results[0].iterations/time_in_secs(total_time);
secs_ret totalmsecs = time_in_secs(total_time);
int totalmint = (int) totalmsecs;
ee_printf("ELAPSED S: %d\n", totalmint);
ee_printf("CoreMark 1.0 : %d / %s %s\n",tmp,COMPILER_VERSION,COMPILER_FLAGS);
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC) #if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
ee_printf(" / %s",MEM_LOCATION); ee_printf(" / %s",MEM_LOCATION);
#else #else

View File

@ -114,9 +114,10 @@ void portable_free(void *p) {
#define read_csr(reg) ({ unsigned long __tmp; \ #define read_csr(reg) ({ unsigned long __tmp; \
asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
__tmp; }) __tmp; })
#define GETMYTIME(_t) (*_t=read_csr(cycle)) #define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
#define MYTIMEDIFF(fin,ini) ((fin)-(ini)) #define MYTIMEDIFF(fin,ini) ((fin)-(ini))
#define TIMER_RES_DIVIDER 1 // Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
#define TIMER_RES_DIVIDER 10000
#define SAMPLE_TIME_IMPLEMENTATION 1 #define SAMPLE_TIME_IMPLEMENTATION 1
#endif #endif
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER) #define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
@ -132,7 +133,9 @@ static CORETIMETYPE start_time_val, stop_time_val;
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0. or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
*/ */
void start_time(void) { void start_time(void) {
GETMYTIME(&start_time_val ); GETMYTIME(start_time_val);
ee_printf("Timer started\n");
ee_printf(" MTIME: %u\n", start_time_val);
#if CALLGRIND_RUN #if CALLGRIND_RUN
CALLGRIND_START_INSTRUMENTATION CALLGRIND_START_INSTRUMENTATION
#endif #endif
@ -153,7 +156,9 @@ void stop_time(void) {
#if MICA #if MICA
asm volatile("int3");/*1 */ asm volatile("int3");/*1 */
#endif #endif
GETMYTIME(&stop_time_val ); GETMYTIME(stop_time_val);
ee_printf("Timer stopped\n");
ee_printf(" MTIME: %u\n", stop_time_val);
} }
/* Function: get_time /* Function: get_time
Return an abstract "ticks" number that signifies time on the system. Return an abstract "ticks" number that signifies time on the system.
@ -166,6 +171,7 @@ void stop_time(void) {
*/ */
CORE_TICKS get_time(void) { CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
ee_printf(" Elapsed MTIME: %u\n", elapsed);
return elapsed; return elapsed;
} }
/* Function: time_in_secs /* Function: time_in_secs
@ -176,13 +182,15 @@ CORE_TICKS get_time(void) {
*/ */
secs_ret time_in_secs(CORE_TICKS ticks) { secs_ret time_in_secs(CORE_TICKS ticks) {
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC; secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
int retvalint = (int)retval;
ee_printf(" RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
return retval; return retval;
} }
#else #else
#error "Please implement timing functionality in core_portme.c" #error "Please implement timing functionality in core_portme.c"
#endif /* SAMPLE_TIME_IMPLEMENTATION */ #endif /* SAMPLE_TIME_IMPLEMENTATION */
ee_u32 default_num_contexts=MULTITHREAD; ee_u32 default_num_contexts = MULTITHREAD;
/* Function: portable_init /* Function: portable_init
Target specific initialization code Target specific initialization code

@ -1 +0,0 @@
Subproject commit f60f2d0395053c4df362a97d7e2099721b6face6

View File

@ -62,25 +62,25 @@
// Peripheral memory space extends from BASE to BASE+RANGE // Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIM_SUPPORTED 1'b1 `define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder `define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 32'h00003FFF `define BOOTTIM_RANGE 56'h00003FFF
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder //`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 32'h00000FFF //`define BOOTTIM_RANGE 56'h00000FFF
`define TIM_SUPPORTED 1'b1 `define TIM_SUPPORTED 1'b1
`define TIM_BASE 32'h80000000 `define TIM_BASE 56'h80000000
`define TIM_RANGE 32'h07FFFFFF `define TIM_RANGE 56'h07FFFFFF
`define CLINT_SUPPORTED 1'b1 `define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 32'h02000000 `define CLINT_BASE 56'h02000000
`define CLINT_RANGE 32'h0000FFFF `define CLINT_RANGE 56'h0000FFFF
`define GPIO_SUPPORTED 1'b1 `define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 32'h10012000 `define GPIO_BASE 56'h10012000
`define GPIO_RANGE 32'h000000FF `define GPIO_RANGE 56'h000000FF
`define UART_SUPPORTED 1'b1 `define UART_SUPPORTED 1'b1
`define UART_BASE 32'h10000000 `define UART_BASE 56'h10000000
`define UART_RANGE 32'h00000007 `define UART_RANGE 56'h00000007
`define PLIC_SUPPORTED 1'b1 `define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 32'h0C000000 `define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 32'h03FFFFFF `define PLIC_RANGE 56'h03FFFFFF
// Bus Interface width // Bus Interface width
`define AHBW 64 `define AHBW 64

View File

@ -31,6 +31,7 @@
`define BUSYBEAR 1 `define BUSYBEAR 1
`define LINUX_FIX_READ {'h10000005} `define LINUX_FIX_READ {'h10000005}
`define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/" `define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/"
//`define LINUX_TEST_VECTORS "../../../busybear_boot/"
// RV32 or RV64: XLEN = 32 or 64 // RV32 or RV64: XLEN = 32 or 64
`define XLEN 64 `define XLEN 64
@ -63,25 +64,25 @@
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIM_SUPPORTED 1'b1 `define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder `define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 32'h00003FFF `define BOOTTIM_RANGE 56'h00003FFF
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder //`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 32'h00000FFF //`define BOOTTIM_RANGE 56'h00000FFF
`define TIM_SUPPORTED 1'b1 `define TIM_SUPPORTED 1'b1
`define TIM_BASE 32'h80000000 `define TIM_BASE 56'h80000000
`define TIM_RANGE 32'h07FFFFFF `define TIM_RANGE 56'h07FFFFFF
`define CLINT_SUPPORTED 1'b1 `define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 32'h02000000 `define CLINT_BASE 56'h02000000
`define CLINT_RANGE 32'h0000FFFF `define CLINT_RANGE 56'h0000FFFF
`define GPIO_SUPPORTED 1'b1 `define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 32'h10012000 `define GPIO_BASE 56'h10012000
`define GPIO_RANGE 32'h000000FF `define GPIO_RANGE 56'h000000FF
`define UART_SUPPORTED 1'b1 `define UART_SUPPORTED 1'b1
`define UART_BASE 32'h10000000 `define UART_BASE 56'h10000000
`define UART_RANGE 32'h00000007 `define UART_RANGE 56'h00000007
`define PLIC_SUPPORTED 1'b1 `define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 32'h0C000000 `define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 32'h03FFFFFF `define PLIC_RANGE 56'h03FFFFFF
// Bus Interface width // Bus Interface width
`define AHBW 64 `define AHBW 64

View File

@ -1,3 +1,9 @@
# Oftentimes this script runs so long you'll go to sleep.
# But you don't want the script to die when your computer goes to sleep.
# So consider invoking this with nohup (i.e. "nohup ./logAllBuildroot.sh")
# You can run "tail -f nohup.out" to see what would've
# outputted to the terminal if you didn't use nohup
# =========== Debug the Process ========== # =========== Debug the Process ==========
# Uncomment this version for GDB/QEMU debugging # Uncomment this version for GDB/QEMU debugging
# - Opens up GDB interactively # - Opens up GDB interactively
@ -15,6 +21,12 @@
# - Logs parse_qemu.py's simulated gdb output to qemu_in_gdb_format.txt # - Logs parse_qemu.py's simulated gdb output to qemu_in_gdb_format.txt
#cat qemu_output.txt | ./parse_qemu.py >qemu_in_gdb_format.txt #cat qemu_output.txt | ./parse_qemu.py >qemu_in_gdb_format.txt
#cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/" #cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/"
# Uncomment this version in case you just want to have qemu_in_gdb_format.txt around
# It is often helpful for general debugging
#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog
# Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection
#split -d -l 5600000 qemu_in_gdb_format.txt --verbose
# Uncomment this version for parse_gdb_output.py debugging # Uncomment this version for parse_gdb_output.py debugging
# - Uses qemu_in_gdb_format.txt # - Uses qemu_in_gdb_format.txt
@ -24,4 +36,4 @@
# =========== Just Do the Thing ========== # =========== Just Do the Thing ==========
# Uncomment this version for the whole thing # Uncomment this version for the whole thing
# - Logs info needed by buildroot testbench # - Logs info needed by buildroot testbench
(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | pv -l | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog (qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog

View File

@ -44,7 +44,7 @@ try:
instrs += 1 instrs += 1
storeAMO = '' storeAMO = ''
if instrs % 10000 == 0: if instrs % 10000 == 0:
print(instrs) print(instrs,flush=True)
# Instr in human assembly # Instr in human assembly
wPC.write('{} ***\n'.format(' '.join(l.split(':')[1].split()[0:2]))) wPC.write('{} ***\n'.format(' '.join(l.split(':')[1].split()[0:2])))
if '\tld' in l or '\tlw' in l or '\tlh' in l or '\tlb' in l: if '\tld' in l or '\tlw' in l or '\tlh' in l or '\tlb' in l:
@ -63,6 +63,15 @@ try:
storeLoc = readLoc storeLoc = readLoc
storeReg = l.split()[-1].split(',')[1] storeReg = l.split()[-1].split(',')[1]
storeAMO = l.split()[-2] storeAMO = l.split()[-2]
if '\tlr' in l:
currentRead = l.split()[-1].split(',')[0]
readOffset = "0"
readLoc = l.split()[-1].split('(')[1][:-1]
readType = "0" # *** I don't see that readType or lastReadType are ever used; we can probably get rid of them
if '\tsc' in l:
storeOffset = "0"
storeLoc = l.split()[-1].split('(')[1][:-1]
storeReg = l.split()[-1].split(',')[1]
if '\tsd' in l or '\tsw' in l or '\tsh' in l or '\tsb' in l: if '\tsd' in l or '\tsw' in l or '\tsh' in l or '\tsb' in l:
s = l.split('#')[0].split()[-1] s = l.split('#')[0].split()[-1]
storeReg = s.split(',')[0] storeReg = s.split(',')[0]

View File

@ -40,13 +40,12 @@ def parseCSRs(l):
val = int(l.split()[1],16) val = int(l.split()[1],16)
if inPageFault: if inPageFault:
# Not sure if these CSRs should be updated or not during page fault. # Not sure if these CSRs should be updated or not during page fault.
#if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"): if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"):
# # We do update some CSRs # We do update some CSRs
# CSRs[csr] = val CSRs[csr] = val
#else: else:
# # Others we preserve until changed later # Others we preserve until changed later
# pageFaultCSRs[csr] = val pageFaultCSRs[csr] = val
pageFaultCSRs[csr] = val
elif pageFaultCSRs and (csr in pageFaultCSRs): elif pageFaultCSRs and (csr in pageFaultCSRs):
if (val != pageFaultCSRs[csr]): if (val != pageFaultCSRs[csr]):
del pageFaultCSRs[csr] del pageFaultCSRs[csr]

View File

@ -26,12 +26,12 @@ configs = [
TestCase( TestCase(
name="busybear", name="busybear",
cmd="vsim -do wally-busybear-batch.do -c > {}", cmd="vsim -do wally-busybear-batch.do -c > {}",
grepstr="# loaded 100000 instructions" grepstr="loaded 100000 instructions"
), ),
TestCase( TestCase(
name="buildroot", name="buildroot",
cmd="vsim -do wally-buildroot-batch.do -c > {}", cmd="vsim -do wally-buildroot-batch.do -c > {}",
grepstr="# loaded 2000000 instructions" grepstr="loaded 2500000 instructions"
), ),
TestCase( TestCase(
name="rv32ic", name="rv32ic",

View File

@ -36,5 +36,4 @@ vopt work_busybear.testbench -o workopt_busybear
vsim workopt_busybear -suppress 8852,12070 vsim workopt_busybear -suppress 8852,12070
run -all run -all
exec ./slack-notifier/slack-notifier.py
quit quit

View File

@ -40,5 +40,4 @@ do ./wave-dos/linux-waves.do
#-- Run the Simulation #-- Run the Simulation
run -all run -all
exec ./slack-notifier/slack-notifier.py
##quit ##quit

View File

@ -4,6 +4,7 @@ view wave
add wave -divider add wave -divider
add wave /testbench/clk add wave /testbench/clk
add wave /testbench/reset add wave /testbench/reset
add wave -dec /testbench/instrs
add wave -divider Stalls_and_Flushes add wave -divider Stalls_and_Flushes
add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallF
@ -19,12 +20,13 @@ add wave /testbench/dut/hart/FlushW
add wave -divider F add wave -divider F
add wave -hex /testbench/dut/hart/ifu/PCF add wave -hex /testbench/dut/hart/ifu/PCF
add wave -divider D add wave -divider D
add wave -hex /testbench/pcExpected add wave -hex /testbench/PCDexpected
add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/PCtextD add wave -hex /testbench/PCtextD
add wave /testbench/InstrDName add wave /testbench/InstrDName
add wave -hex /testbench/dut/hart/ifu/InstrD add wave -hex /testbench/dut/hart/ifu/InstrD
add wave -hex /testbench/dut/hart/ieu/c/InstrValidD add wave -hex /testbench/dut/hart/ieu/c/InstrValidD
add wave -hex /testbench/PCDwrong
add wave -divider E add wave -divider E
add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/dut/hart/ifu/PCE
add wave -hex /testbench/PCtextE add wave -hex /testbench/PCtextE

View File

@ -45,8 +45,8 @@ assign FOpCtrlE = 3'b0;
// down - 010 // down - 010
// up - 011 // up - 011
// nearest max mag - 100 // nearest max mag - 100
assign FrmE = 3'b010; assign FrmE = 3'b011;
assign FmtE = 1'b1; assign FmtE = 1'b0;
assign wnan = FmtE ? &FmaResultM[62:52] && |FmaResultM[51:0] : &FmaResultM[62:55] && |FmaResultM[54:32]; assign wnan = FmtE ? &FmaResultM[62:52] && |FmaResultM[51:0] : &FmaResultM[62:55] && |FmaResultM[54:32];
@ -110,7 +110,6 @@ always @(posedge clk)
if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $display( "ans=qutNaN "); if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $display( "ans=qutNaN ");
if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $display( "ans=qutNaN "); if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $display( "ans=qutNaN ");
errors = errors + 1; errors = errors + 1;
if (errors == 20)
$stop; $stop;
end end
if((FmtE==1'b0)&(FmaFlagsM != flags[4:0] || (!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {FInput1E[62:55],1'b1,FInput1E[53:0]})) || (ynan && (FmaResultM[62:0] == {FInput2E[62:55],1'b1,FInput2E[53:0]})) || (znan && (FmaResultM[62:0] == {FInput3E[62:55],1'b1,FInput3E[53:0]})) || (FmaResultM[62:0] == ans[62:0]))) ))) begin if((FmtE==1'b0)&(FmaFlagsM != flags[4:0] || (!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {FInput1E[62:55],1'b1,FInput1E[53:0]})) || (ynan && (FmaResultM[62:0] == {FInput2E[62:55],1'b1,FInput2E[53:0]})) || (znan && (FmaResultM[62:0] == {FInput3E[62:55],1'b1,FInput3E[53:0]})) || (FmaResultM[62:0] == ans[62:0]))) ))) begin

View File

@ -1,3 +1,3 @@
testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rmax -seed 113355 -level 1 > testFloat
tr -d ' ' < testFloat > testFloatNoSpace tr -d ' ' < testFloat > testFloatNoSpace

View File

@ -10,172 +10,124 @@ module fctrl (
output logic FDivStartD, output logic FDivStartD,
output logic [2:0] FResultSelD, output logic [2:0] FResultSelD,
output logic [3:0] FOpCtrlD, output logic [3:0] FOpCtrlD,
output logic [1:0] FResSelD,
output logic [1:0] FIntResSelD,
output logic FmtD, output logic FmtD,
output logic [2:0] FrmD, output logic [2:0] FrmD,
output logic [1:0] FMemRWD,
output logic FOutputInput2D,
output logic FInput2UsedD, FInput3UsedD,
output logic FWriteIntD); output logic FWriteIntD);
`define FCTRLW 15
logic IllegalFPUInstr1D, IllegalFPUInstr2D; logic [`FCTRLW-1:0] ControlsD;
// *** fix rounding for dynamic rounding // FPU Instruction Decoder
always_comb
case(OpD)
// FWriteEn_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
7'b0000111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_000_0010_00_00_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_000_0011_00_00_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_001_0000_00_00_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_001_0001_00_00_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_001_0010_00_00_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_001_0011_00_00_0_0; // fnmadd
7'b1010011: casez(Funct7D)
7'b00000??: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_010_0001_00_00_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_001_0100_00_00_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_011_0000_00_00_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_011_0001_00_00_1_0; // fsqrt
7'b00100??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_100_0000_01_00_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_100_0001_01_00_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_100_0010_01_00_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b00101??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_100_0111_10_00_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_100_0101_10_00_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b10100??: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_1_100_0010_00_00_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_100_0011_00_00_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b11100??: if (Funct3D == 3'b001)
ControlsD = `FCTRLW'b0_1_100_0000_00_10_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
7'b1100000: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b0_1_010_0110_00_00_0_0; // fcvt.s.w
1'b1: ControlsD = `FCTRLW'b0_1_010_0101_00_00_0_0; // fcvt.s.wu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101000: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b1_1_010_0100_00_00_0_0; // fcvt.w.s
1'b1: ControlsD = `FCTRLW'b1_1_010_0101_00_00_0_0; // fcvt.wu.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0010_00_00_0_0; // fcvt.s.d
7'b1100001: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b0_1_010_1110_00_00_0_0; // fcvt.d.w
1'b1: ControlsD = `FCTRLW'b0_1_010_1111_00_00_0_0; // fcvt.d.wu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101001: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b1_0_010_1100_00_00_0_0; // fcvt.w.d
1'b1: ControlsD = `FCTRLW'b1_0_010_1101_00_00_0_0; // fcvt.wu.d
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_010_1000_00_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
// unswizzle control bits
assign {FWriteEnD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
// if dynamic rounding, choose FRM_REGW
assign FrmD = &Funct3D ? FRM_REGW : Funct3D; assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
//all subsequent logic is based on the table present // Precision
//in Section 5 of Wally Architecture Specification // 0-single
// 1-double
//write is enabled for all fp instruciton op codes assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0];
//sans fp load // div/sqrt
logic isFP, isFPLD;
always_comb begin
//case statement is easier to modify
//in case of errors
case(OpD)
//fp instructions sans load
7'b1010011 : isFP = 1'b1;
7'b1000011 : isFP = 1'b1;
7'b1000111 : isFP = 1'b1;
7'b1001011 : isFP = 1'b1;
7'b1001111 : isFP = 1'b1;
7'b0100111 : isFP = 1'b1;
7'b0000111 : isFP = 1'b1;// KEP change 7'b1010011 to 7'b0000111
default : isFP = 1'b0;
endcase
end
//useful intermediary signals
//
//(mult only not supported in current datapath)
//set third FMA operand to zero in this case
//(or equivalent)
always_comb begin
//checks all but FMA/store/load
IllegalFPUInstr2D = 0;
FDivStartD = 1'b0;
if(OpD == 7'b1010011) begin
casez(Funct7D)
//compare
7'b10100?? : FResultSelD = 3'b001;
//div/sqrt
7'b0?011?? : begin FResultSelD = 3'b000; FDivStartD = 1'b1; end
//add/sub
7'b0000??? : FResultSelD = 3'b100;
//mult
7'b00010?? : FResultSelD = 3'b010;
//convert (not precision)
7'b110?0?? : FResultSelD = 3'b100;
//convert (precision)
7'b010000? : FResultSelD = 3'b100;
//Min/Max
7'b00101?? : FResultSelD = 3'b001;
//sign injection
7'b00100?? : FResultSelD = 3'b011;
//classify //only if funct3 = 001
7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101;
//output ReadData1
else if (Funct7D[1] == 0) FResultSelD = 3'b111;
//output SrcW
7'b111100? : FResultSelD = 3'b110;
default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end
endcase
end
//FMA/store/load
else begin
case(OpD)
//4 FMA instructions
7'b1000011 : FResultSelD = 3'b010;
7'b1000111 : FResultSelD = 3'b010;
7'b1001011 : FResultSelD = 3'b010;
7'b1001111 : FResultSelD = 3'b010;
//store
7'b0100111 : FResultSelD = 3'b111;
//load
7'b0000111 : FResultSelD = 3'b111;
default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end
endcase
end
end
assign FOutputInput2D = OpD == 7'b0100111;
assign FMemRWD[0] = FOutputInput2D;
assign FMemRWD[1] = OpD == 7'b0000111;
//register is chosen based on operation performed
//----
//write selection is chosen in the same way as
//register selection
//
// reg/write sel logic and assignment
//
// 3'b000 = div/sqrt
// 3'b001 = cmp
// 3'b010 = fma/mult
// 3'b011 = sgn inj
// 3'b100 = add/sub/cnvt
// 3'b101 = classify
// 3'b110 = output SrcAW
// 3'b111 = output ReadData1
//
//reg select
//this value is used enough to be shorthand
//operation control for each fp operation
//has to be expanded over standard to account for
//integrated fpadd/cvt
//
//will integrate FMA opcodes into design later
//
//conversion instructions will
//also need to be added later as I find the opcode
//version I used for this repo
//let's do separate SOP for each type of operation
// assign FOpCtrlD[3] = 1'b0;
//
//
always_comb begin
IllegalFPUInstr1D = 0;
FInput3UsedD = 0;
case (FResultSelD)
// div/sqrt
// fdiv = ???0 // fdiv = ???0
// fsqrt = ???1 // fsqrt = ???1
3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end
// cmp // cmp
// fmin = ?111 // fmin = ?111
// fmax = ?101 // fmax = ?101
// feq = ?010 // feq = ?010
// flt = ?001 // flt = ?001
// fle = ?011 // fle = ?011
// {?, is min or max, is eq or le, is lt or le} // {?, is min or max, is eq or le, is lt or le}
3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end
//fma/mult //fma/mult
// fmadd = ?000 // fmadd = ?000
// fmsub = ?001 // fmsub = ?001
// fnmsub = ?010 -(a*b)+c // fnmsub = ?010 -(a*b)+c
// fnmadd = ?011 -(a*b)-c // fnmadd = ?011 -(a*b)-c
// fmul = ?100 // fmul = ?100
// {?, is mul, is negitive, is sub} // {?, is mul, is negitive, is sub}
3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end
// sgn inj // sgn inj
// fsgnj = ??00 // fsgnj = ??00
// fsgnjn = ??01 // fsgnjn = ??01
// fsgnjx = ??10 // fsgnjx = ??10
3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end
// add/sub/cnvt // add/sub/cnvt
// fadd = 0000 // fadd = 0000
// fsub = 0001 // fsub = 0001
// fcvt.w.s = 0100 // fcvt.w.s = 0100
@ -188,35 +140,18 @@ module fctrl (
// fcvt.d.w = 1110 // fcvt.d.w = 1110
// fcvt.d.wu = 1111 // fcvt.d.wu = 1111
// fcvt.d.s = 1000 // fcvt.d.s = 1000
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub // { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub}
3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), (Rs2D[0]&Funct7D[5])|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end
// classify {?, ?, ?, ?}
3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end
// output SrcAW
// fmv.w.x = ???0 // fmv.w.x = ???0
// fmv.w.d = ???1 // fmv.w.d = ???1
3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end
// output Input1
// flw = ?000 // flw = ?000
// fld = ?001 // fld = ?001
// fsw = ?010 // output Input2 // fsw = ?010
// fsd = ?011 // output Input2 // fsd = ?011
// fmv.x.w = ?100 // fmv.x.w = ?100
// fmv.x.d = ?101 // fmv.x.d = ?101
// {?, is mv, is store, is double or fmv} // {?, is mv, is store, is double or fmv}
3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end
default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end
endcase
end
//precision
assign FmtD = (~&FResultSelD & Funct7D[0]) | (&FResultSelD & FOpCtrlD[0]);
assign IllegalFPUInstrD = IllegalFPUInstr1D | IllegalFPUInstr2D;
//write to integer source if conv to int occurs
//AND of Funct7 for int results
// is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv
assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]);
// if not writting to int reg and not a store function and not move
assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP;
endmodule endmodule

View File

@ -1,111 +1,111 @@
module fma1( module fma1(
input logic [63:0] X, // X input logic [63:0] X, // X
input logic [63:0] Y, // Y input logic [63:0] Y, // Y
input logic [63:0] Z, // Z input logic [63:0] Z, // Z
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
output logic XInfE, YInfE, ZInfE, // inputs are infinity output logic XInfE, YInfE, ZInfE, // inputs are infinity
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
logic [51:0] XFrac,YFrac,ZFrac; // input fraction logic [51:0] XFrac,YFrac,ZFrac; // input fraction
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one) logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
logic [12:0] XExp,YExp,ZExp; // input exponents logic [12:0] XExp,YExp,ZExp; // input exponents
logic XSgn,YSgn,ZSgn; // input signs logic XSgn,YSgn,ZSgn; // input signs
logic [12:0] AlignCnt; // how far to shift the addend to align with the product logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [211:0] ZManShifted; // output of the alignment shifter including sticky bit logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [211:0] ZManPreShifted; // input to the alignment shifter logic [213:0] ZManPreShifted; // input to the alignment shifter
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
logic [63:0] Addend; // value to add (Z or zero) logic [63:0] Addend; // value to add (Z or zero)
logic [12:0] Bias; // 1023 for double, 127 for single logic [12:0] Bias; // 1023 for double, 127 for single
logic XExpZero, YExpZero, ZExpZero; // input exponent zero logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic XFracZero, YFracZero, ZFracZero; // input fraction zero logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// split inputs into the sign bit, fraction, and exponent to handle single or double precision // split inputs into the sign bit, fraction, and exponent to handle single or double precision
// - single precision is in the top half of the inputs // - single precision is in the top half of the inputs
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction // Set addend to zero if FMUL instruction
assign Addend = FOpCtrlE[2] ? 64'b0 : Z; assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
assign XSgn = X[63]; assign XSgn = X[63];
assign YSgn = Y[63]; assign YSgn = Y[63];
assign ZSgn = Addend[63]; assign ZSgn = Addend[63];
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]}; assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]}; assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]}; assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0}; assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0}; assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
assign XMan = {~XExpZero, XFrac}; assign XMan = {~XExpZero, XFrac};
assign YMan = {~YExpZero, YFrac}; assign YMan = {~YExpZero, YFrac};
assign ZMan = {~ZExpZero, ZFrac}; assign ZMan = {~ZExpZero, ZFrac};
assign Bias = FmtE ? 13'h3ff : 13'h7f; assign Bias = FmtE ? 13'h3ff : 13'h7f;
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// determine if an input is a special value // determine if an input is a special value
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
assign XExpZero = ~|XExp; assign XExpZero = ~|XExp;
assign YExpZero = ~|YExp; assign YExpZero = ~|YExp;
assign ZExpZero = ~|ZExp; assign ZExpZero = ~|ZExp;
assign XFracZero = ~|XFrac; assign XFracZero = ~|XFrac;
assign YFracZero = ~|YFrac; assign YFracZero = ~|YFrac;
assign ZFracZero = ~|ZFrac; assign ZFracZero = ~|ZFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0]; assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0]; assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
assign XNaNE = XExpMax & ~XFracZero; assign XNaNE = XExpMax & ~XFracZero;
assign YNaNE = YExpMax & ~YFracZero; assign YNaNE = YExpMax & ~YFracZero;
assign ZNaNE = ZExpMax & ~ZFracZero; assign ZNaNE = ZExpMax & ~ZFracZero;
assign XDenorm = XExpZero & ~XFracZero; assign XDenorm = XExpZero & ~XFracZero;
assign YDenorm = YExpZero & ~YFracZero; assign YDenorm = YExpZero & ~YFracZero;
assign ZDenorm = ZExpZero & ~ZFracZero; assign ZDenorm = ZExpZero & ~ZFracZero;
assign XInfE = XExpMax & XFracZero; assign XInfE = XExpMax & XFracZero;
assign YInfE = YExpMax & YFracZero; assign YInfE = YExpMax & YFracZero;
assign ZInfE = ZExpMax & ZFracZero; assign ZInfE = ZExpMax & ZFracZero;
assign XZeroE = XExpZero & XFracZero; assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero; assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero; assign ZZeroE = ZExpZero & ZFracZero;
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Calculate the product // Calculate the product
// - When multipliying two fp numbers, add the exponents // - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent) // - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are // - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number // represented with an exponent of 0. add one if there is a denormal number
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// verilator lint_off WIDTH // verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExp + YExp - Bias + XDenorm + YDenorm; XExp + YExp - Bias + XDenorm + YDenorm;
// Calculate the product's mantissa // Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = XMan * YMan; assign ProdManE = XMan * YMan;
@ -114,72 +114,71 @@ module fma1(
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Alignment shifter // Alignment shifter
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment // determine the shift count for alignment
// - negitive means Z is larger, so shift Z left // - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right // - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are // - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number // represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenorm; assign AlignCnt = ProdExpE - ZExp - ZDenorm;
// verilator lint_on WIDTH // verilator lint_on WIDTH
// Defualt Addition without shifting // Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 | // | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend | // |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZManPreShifted = {55'b0, ZMan, 104'b0}; assign ZManPreShifted = {55'b0, ZMan, 106'b0};
always_comb always_comb
begin begin
// If the product is too small to effect the sum, kill the product // If the product is too small to effect the sum, kill the product
// | 55'b0 | 106'b(product) | 2'b0 | // | 54'b0 | 106'b(product) | 2'b0 |
// | addnend | // | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1; KillProdE = 1;
ZManShifted = {107'b0, ZMan, 52'b0}; ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
AddendStickyE = ~(XZeroE|YZeroE); AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt) // If the Addend is shifted left (negitive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 | // | 54'b0 | 106'b(product) | 2'b0 |
// | addnend | // | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin end else if($signed(AlignCnt) <= $signed(13'd0)) begin
KillProdE = 0; KillProdE = 0;
ZManShifted = ZManPreShifted << -AlignCnt; ZManShifted = ZManPreShifted << -AlignCnt;
AddendStickyE = |(ZManShifted[49:0]); AddendStickyE = |(ZManShifted[51:0]);
// If the Addend is shifted right (positive AlignCnt) // If the Addend is shifted right (positive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 | // | 54'b0 | 106'b(product) | 2'b0 |
// | addnend | // | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd104)) begin end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
KillProdE = 0; KillProdE = 0;
ZManShifted = ZManPreShifted >> AlignCnt; ZManShifted = ZManPreShifted >> AlignCnt;
AddendStickyE = |(ZManShifted[49:0]); AddendStickyE = |(ZManShifted[51:0]);
// If the addend is too small to effect the addition // If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small // - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding // - The 2 extra bits are needed for rounding
// | 55'b0 | 106'b(product) | 2'b0 | // | 54'b0 | 106'b(product) | 2'b0 |
// | addnend | // | addnend |
end else begin end else begin
KillProdE = 0; KillProdE = 0;
ZManShifted = 0; ZManShifted = 0;
AddendStickyE = ~ZZeroE; AddendStickyE = ~ZZeroE;
end end
end end
assign AlignedAddendE = ZManShifted[211:50]; assign AlignedAddendE = ZManShifted[213:52];
endmodule
endmodule

View File

@ -1,127 +1,131 @@
module fma2( module fma2(
input logic [63:0] X, // X input logic [63:0] X, // X
input logic [63:0] Y, // Y input logic [63:0] Y, // Y
input logic [63:0] Z, // Z input logic [63:0] Z, // Z
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtM, // precision 1 = double 0 = single input logic FmtM, // precision 1 = double 0 = single
input logic [105:0] ProdManM, // 1.X frac * 1.Y frac input logic [105:0] ProdManM, // 1.X frac * 1.Y frac
input logic [161:0] AlignedAddendM, // Z aligned for addition input logic [161:0] AlignedAddendM, // Z aligned for addition
input logic [12:0] ProdExpM, // X exponent + Y exponent - bias input logic [12:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
output logic [63:0] FmaResultM, // FMA final result output logic [63:0] FmaResultM, // FMA final result
output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
logic [51:0] ResultFrac; // Result fraction logic [51:0] ResultFrac; // Result fraction
logic [10:0] ResultExp; // Result exponent logic [10:0] ResultExp; // Result exponent
logic ResultSgn; // Result sign logic ResultSgn; // Result sign
logic [10:0] ZExp; // input exponent logic [10:0] ZExp; // input exponent
logic XSgn, YSgn, ZSgn; // input sign logic XSgn, YSgn, ZSgn; // input sign
logic PSgn; // product sign logic PSgn; // product sign
logic [105:0] ProdMan2; // product being added logic [105:0] ProdMan2; // product being added
logic [162:0] AlignedAddend2; // possibly inverted aligned Z logic [162:0] AlignedAddend2; // possibly inverted aligned Z
logic [161:0] Sum; // positive sum logic [161:0] Sum; // positive sum
logic [162:0] PreSum; // possibly negitive sum logic [162:0] PreSum; // possibly negitive sum
logic [12:0] SumExp; // exponent of the normalized sum logic [12:0] SumExp; // exponent of the normalized sum
logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [12:0] SumExpTmpMinus1; // SumExpTmp-1 logic [12:0] SumExpTmpMinus1; // SumExpTmp-1
logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow
logic [53:0] NormSum; // normalized sum logic [54:0] NormSum; // normalized sum
logic [161:0] SumShifted; // sum shifted for normalization logic [161:0] SumShifted; // sum shifted for normalization
logic [8:0] NormCnt; // output of the leading zero detector logic [8:0] NormCnt; // output of the leading zero detector
logic NormSumSticky; // sticky bit calulated from the normalized sum logic NormSumSticky; // sticky bit calulated from the normalized sum
logic SumZero; // is the sum zero logic SumZero; // is the sum zero
logic NegSum; // is the sum negitive logic NegSum; // is the sum negitive
logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z) logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z)
logic ResultDenorm; // is the result denormalized logic ResultDenorm; // is the result denormalized
logic Sticky; // Sticky bit logic Sticky; // Sticky bit
logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding
logic Invalid,Underflow,Overflow,Inexact; // flags logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag)
logic [8:0] DenormShift; // right shift if the result is denormalized logic Invalid,Underflow,Overflow,Inexact; // flags
logic SubBySmallNum; // was there supposed to be a subtraction by a small number logic [8:0] DenormShift; // right shift if the result is denormalized
logic [63:0] Addend; // value to add (Z or zero) logic SubBySmallNum; // was there supposed to be a subtraction by a small number
logic ZeroSgn; // the result's sign if the sum is zero logic [63:0] Addend; // value to add (Z or zero)
logic ResultSgnTmp; // the result's sign assuming the result is not zero logic ZeroSgn; // the result's sign if the sum is zero
logic Guard, Round, LSBNormSum; // bits needed to determine rounding logic ResultSgnTmp; // the result's sign assuming the result is not zero
logic [12:0] MaxExp; // maximum value of the exponent logic Guard, Round, LSBNormSum; // bits needed to determine rounding
logic [12:0] FracLen; // length of the fraction logic UfGuard, UfRound, UfLSBNormSum; // bits needed to determine rounding for underflow flag
logic SigNaN; // is an input a signaling NaN logic [12:0] MaxExp; // maximum value of the exponent
logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) logic [12:0] FracLen; // length of the fraction
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results logic SigNaN; // is an input a signaling NaN
logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency)
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Select input fields // Select input fields
// The following logic duplicates fma1 because it's cheaper to recompute than provide registers // The following logic duplicates fma1 because it's cheaper to recompute than provide registers
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction // Set addend to zero if FMUL instruction
assign Addend = FOpCtrlM[2] ? 64'b0 : Z; assign Addend = FOpCtrlM[2] ? 64'b0 : Z;
// split inputs into the sign bit, and exponent to handle single or double precision // split inputs into the sign bit, and exponent to handle single or double precision
// - single precision is in the top half of the inputs // - single precision is in the top half of the inputs
assign XSgn = X[63]; assign XSgn = X[63];
assign YSgn = Y[63]; assign YSgn = Y[63];
assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction
assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]}; assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]};
// Calculate the product's sign // Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB // Negate product's sign if FNMADD or FNMSUB
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1]; assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Addition // Addition
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Negate Z when doing one of the following opperations: // Negate Z when doing one of the following opperations:
// -prod + Z // -prod + Z
// prod - Z // prod - Z
assign InvZ = ZSgn ^ PSgn; assign InvZ = ZSgn ^ PSgn;
// Choose an inverted or non-inverted addend - the one is added later // Choose an inverted or non-inverted addend - the one is added later
assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv) // Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM; assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
// Do the addition // Do the addition
// - add one to negate if the added was inverted // - add one to negate if the added was inverted
// - the 2 extra bits at the begining and end are needed for rounding // - the 2 extra bits at the begining and end are needed for rounding
assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ}; assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ};
// Is the sum negitive // Is the sum negitive
assign NegSum = PreSum[162]; assign NegSum = PreSum[162];
// If the sum is negitive, negate the sum. // If the sum is negitive, negate the sum.
assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0]; assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0];
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Leading one detector // Leading one detector
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
//*** replace with non-behavoral code //*** replace with non-behavoral code
logic [8:0] i; logic [8:0] i;
always_comb begin always_comb begin
i = 0; i = 0;
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
NormCnt = i+1; // compute shift count NormCnt = i+1; // compute shift count
end end
@ -133,112 +137,127 @@ module fma2(
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Normalization // Normalization
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Determine if the sum is zero // Determine if the sum is zero
assign SumZero = ~(|Sum); assign SumZero = ~(|Sum);
// determine the length of the fraction based on precision // determine the length of the fraction based on precision
assign FracLen = FmtM ? 13'd52 : 13'd23; assign FracLen = FmtM ? 13'd52 : 13'd23;
// Determine if the result is denormal // Determine if the result is denormal
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56); assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results // Determine the shift needed for denormal results
assign SumExpTmpMinus1 = SumExpTmp-1; assign SumExpTmpMinus1 = SumExpTmp-1;
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0; assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
// Normalize the sum // Normalize the sum
assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift; assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
assign NormSum = SumShifted[161:108]; assign NormSum = SumShifted[161:107];
// Calculate the sticky bit // Calculate the sticky bit
assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]); assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]);
assign Sticky = AddendStickyM | NormSumSticky; assign Sticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent // Determine sum's exponent
assign SumExp = SumZero ? 13'b0 : assign SumExp = SumZero ? 13'b0 :
ResultDenorm ? 13'b0 : ResultDenorm ? 13'b0 :
SumExpTmp; SumExpTmp;
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Rounding // Rounding
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// round to nearest even // round to nearest even
// {Guard, Round, Sticky} // {Guard, Round, Sticky}
// 0xx - do nothing // 0xx - do nothing
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1) // 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted // - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1 // 110/111 - Plus1
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity // round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity // round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude // round to nearest max magnitude
// {Guard, Round, Sticky} // {Guard, Round, Sticky}
// 0xx - do nothing // 0xx - do nothing
// 100 - tie - Plus1 // 100 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted // - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1 // 110/111 - Plus1
// determine guard, round, and least significant bit of the result // determine guard, round, and least significant bit of the result
assign Guard = FmtM ? NormSum[1] : NormSum[30]; assign Guard = FmtM ? NormSum[2] : NormSum[31];
assign Round = FmtM ? NormSum[0] : NormSum[29]; assign Round = FmtM ? NormSum[1] : NormSum[30];
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[31]; assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32];
// Deterimine if a small number was supposed to be subtrated // used to determine underflow flag
assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM; assign UfGuard = FmtM ? NormSum[1] : NormSum[30];
assign UfRound = FmtM ? NormSum[0] : NormSum[29];
assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31];
always_comb begin // Deterimine if a small number was supposed to be subtrated
// Determine if you add 1 assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM;
case (FrmM)
3'b000: CalcPlus1 = Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&LSBNormSum&~SubBySmallNum));//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down
3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up
3'b100: CalcPlus1 = (Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&~SubBySmallNum)));//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// If an answer is exact don't round always_comb begin
assign Plus1 = CalcPlus1 & (Sticky | Guard | Round); // Determine if you add 1
assign Minus1 = CalcMinus1 & (Sticky | Guard | Round); case (FrmM)
3'b000: CalcPlus1 = Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&LSBNormSum&~SubBySmallNum));//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down
3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up
3'b100: CalcPlus1 = (Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&~SubBySmallNum)));//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
3'b000: UfCalcPlus1 = UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&UfLSBNormSum&~SubBySmallNum));//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round up
3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&~SubBySmallNum)));//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// Compute rounded result // If an answer is exact don't round
logic [64:0] RoundAdd; assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
logic [51:0] NormSumTruncated; assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard | UfRound);
assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} : assign Minus1 = CalcMinus1 & (Sticky | UfGuard | Guard | Round);
Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
assign NormSumTruncated = FmtM ? NormSum[53:2] : {NormSum[53:31], 29'b0};
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; // Compute rounded result
logic [64:0] RoundAdd;
logic [51:0] NormSumTruncated;
assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} :
Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
assign NormSumTruncated = FmtM ? NormSum[54:3] : {NormSum[54:32], 29'b0};
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
assign ResultExp = FullResultExp[10:0]; assign ResultExp = FullResultExp[10:0];
@ -247,58 +266,57 @@ module fma2(
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Sign calculation // Sign calculation
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Determine the sign if the sum is zero // Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity // if cancelation then 0 unless round to -infinity
// otherwise psign // otherwise psign
assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn; assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn;
// is the result negitive // is the result negitive
// if p - z is the Sum negitive // if p - z is the Sum negitive
// if -p + z is the Sum positive // if -p + z is the Sum positive
// if -p - z then the Sum is negitive // if -p - z then the Sum is negitive
assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn); assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp; assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Flags // Flags
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Set Invalid flag for following cases: // Set Invalid flag for following cases:
// 1) Inf - Inf (unless x or y is NaN) // 1) any input is a signaling NaN
// 2) 0 * Inf // 2) Inf - Inf (unless x or y is NaN)
// 3) any input is a signaling NaN // 3) 0 * Inf
assign MaxExp = FmtM ? 13'd2047 : 13'd255; assign MaxExp = FmtM ? 13'd2047 : 13'd255;
assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) : assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) :
(XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]); (XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]);
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented // Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed // - Don't set the overflow flag if an overflowed result isn't outputed
assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Underflow flag if the number is too small to be represented in normal numbers // Set Underflow flag if the number is too small to be represented in normal numbers
// - Don't set the underflow flag if the result is exact // - Don't set the underflow flag if the result is exact
assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
//assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1); assign UnderflowFlag = (FullResultExp[12] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1); // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed result isn't outputed
// - Don't set the underflow flag if an underflowed result isn't outputed assign Inexact = (Sticky|UfGuard|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Combine flags // Combine flags
// - FMA can't set the Divide by zero flag // - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result was rounded up to a normal number // - Don't set the underflow flag if the result was rounded up to a normal number
assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
@ -306,31 +324,31 @@ module fma2(
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Select the result // Select the result
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]}; assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]};
assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]}; assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]};
assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]}; assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]};
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} :
{ResultSgn, 11'h7ff, 52'b0} : {ResultSgn, 11'h7ff, 52'b0} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} : ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} :
{ResultSgn, 8'hff, 55'b0}; {ResultSgn, 8'hff, 55'b0};
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
assign FmaResultM = XNaNM ? XNaNResult : assign FmaResultM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult : YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult : ZNaNM ? ZNaNResult :
Invalid ? InvalidResult : // has to be before inf Invalid ? InvalidResult : // has to be before inf
XInfM ? {PSgn, X[62:0]} : XInfM ? {PSgn, X[62:0]} :
YInfM ? {PSgn, Y[62:0]} : YInfM ? {PSgn, Y[62:0]} :
ZInfM ? {ZSgn, Addend[62:0]} : ZInfM ? {ZSgn, Addend[62:0]} :
Overflow ? OverflowResult : Overflow ? OverflowResult :
KillProdM ? KillProdResult : // has to be after Underflow KillProdM ? KillProdResult : // has to be after Underflow
Underflow & ~ResultDenorm ? UnderflowResult : Underflow & ~ResultDenorm ? UnderflowResult :
FmtM ? {ResultSgn, ResultExp, ResultFrac} : FmtM ? {ResultSgn, ResultExp, ResultFrac} :
{ResultSgn, ResultExp[7:0], ResultFrac, 3'b0}; {ResultSgn, ResultExp[7:0], ResultFrac, 3'b0};

View File

@ -1,7 +1,7 @@
/////////////////////////////////////////// ///////////////////////////////////////////
// //
// Written: // Written: Katherine Parry, Bret Mathis
// Modified: // Modified: 6/23/2021
// //
// Purpose: FPU // Purpose: FPU
// //
@ -25,23 +25,22 @@
`include "wally-config.vh" `include "wally-config.vh"
module fpu ( module fpu (
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic reset,
input logic clk, input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, input logic [31:0] InstrD,
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW, input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW, input logic FlushE, FlushM, FlushW,
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
input logic RegWriteD, // register write enable from ieu
output logic [4:0] SetFflagsM, // FPU flags
output logic [1:0] FMemRWM, // Read/write enable for memory {read, write}
output logic FStallD, // Stall the decode stage if Div/Sqrt instruction output logic FStallD, // Stall the decode stage if Div/Sqrt instruction
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
output logic [`XLEN-1:0] FWriteDataM, // Data to be written to memory output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM,
output logic FDivBusyE, // Is the divison/sqrt unit busy output logic FDivBusyE, // Is the divison/sqrt unit busy
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM, // FPU flags
output logic [`XLEN-1:0] FPUResultW); // FPU result output logic [`XLEN-1:0] FPUResultW); // FPU result
// control logic signal instantiation // control logic signal instantiation
@ -51,24 +50,27 @@ module fpu (
logic FDivStartD, FDivStartE; // Start division logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register logic FWriteIntD; // Write to integer register
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory logic [1:0] FMemRWD; // Read and write enable for memory
logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal
logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal
logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal
logic FInput2UsedD; // Is input 2 used logic SrcYUsedD; // Is input 2 used
logic FInput3UsedD; // Is input 3 used logic SrcZUsedD; // Is input 3 used
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component
logic SelLoadInputE, SelLoadInputM; // Select which adress to load when single precision logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [4:0] Adr1E, Adr2E, Adr3E;
// regfile signals //*** KEP lint warning - changed `XLEN-1 to 63 // regfile signals
logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining
logic [63:0] FWDM; // Write data for FP register logic [63:0] FWDM; // Write data for FP register
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FInput1E, FInput1M, FInput1W, FInput1tmpE; // Input 1 to the various units (after forwarding) logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding)
logic [63:0] FInput2E, FInput2M; // Input 2 to the various units (after forwarding) logic [`XLEN-1:0] SrcXMAligned;
logic [63:0] FInput3E, FInput3M; // Input 3 to the various units (after forwarding) logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding)
logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding)
logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
// div/sqrt signals // div/sqrt signals
@ -123,19 +125,14 @@ module fpu (
logic [4:0] FAddFlagsM, FAddFlagsW; logic [4:0] FAddFlagsM, FAddFlagsW;
// cmp signals // cmp signals
logic [7:0] WE, WM; logic CmpInvalidE, CmpInvalidM, CmpInvalidW;
logic [7:0] XE, XM; logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW;
logic ANaNE, ANaNM;
logic BNaNE, BNaNM;
logic AzeroE, AzeroM;
logic BzeroE, BzeroM;
logic CmpInvalidM, CmpInvalidW;
logic [1:0] CmpFCCM, CmpFCCW;
logic [63:0] FCmpResultM, FCmpResultW;
// fsgn signals // fsgn signals
logic [63:0] SgnResultE, SgnResultM, SgnResultW; logic [63:0] SgnResultE, SgnResultM, SgnResultW;
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
logic [63:0] FResM, FResW;
logic FFlgM, FFlgW;
// instantiation of W stage regfile signals // instantiation of W stage regfile signals
logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW;
@ -147,31 +144,9 @@ module fpu (
logic [63:0] FPUResult64W, FPUResult64E; logic [63:0] FPUResult64W, FPUResult64E;
logic [4:0] FPUFlagsW; logic [4:0] FPUFlagsW;
// pipeline control logic
logic PipeEnableDE;
logic PipeEnableEM;
logic PipeEnableMW;
logic PipeClearDE;
logic PipeClearEM;
logic PipeClearMW;
// temporarily assign pipe clear and enable signals
// to never flush & always be running
localparam PipeClear = 1'b0;
localparam PipeEnable = 1'b1;
always_comb begin
PipeEnableDE = ~StallE;
PipeEnableEM = ~StallM;
PipeEnableMW = ~StallW;
PipeClearDE = FlushE;
PipeClearEM = FlushM;
PipeClearMW = FlushW;
end
//DECODE STAGE //DECODE STAGE
// Hazard unit for FPU
fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
// top-level controller for FPU // top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
@ -185,40 +160,33 @@ module fpu (
//***************** //*****************
// fpregfile D/E pipe registers // fpregfile D/E pipe registers
//***************** //*****************
flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E); flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E); flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E); flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
//***************** //*****************
// other D/E pipe registers // other D/E pipe registers
//***************** //*****************
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE); flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE); flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE); {Adr1E, Adr2E, Adr3E});
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE); flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE,
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE); {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD},
flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE); {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE});
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
flopenrc #(1) DEReg18(clk, reset, PipeClearDE, PipeEnableDE, InstrD[15], SelLoadInputE);
//EXECUTION STAGE //EXECUTION STAGE
// input muxs for forwarding // Hazard unit for FPU
mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM); fpuhazard hazard(.*);
mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, FForwardInput1E, FInput1tmpE);
mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E); // forwarding muxs
mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E); mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E); mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE);
mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE);
// first of two-stage instance of floating-point fused multiply-add unit // first of two-stage instance of floating-point fused multiply-add unit
fma1 fma1 (.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .FOpCtrlE(FOpCtrlE[2:0]),.*); fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*);
// first and only instance of floating-point divider // first and only instance of floating-point divider
logic fpdivClk; logic fpdivClk;
@ -229,193 +197,181 @@ module fpu (
.ECLK(fpdivClk)); .ECLK(fpdivClk));
// capture the inputs for div/sqrt // capture the inputs for div/sqrt
flopenrc #(64) reg_input1 (.d(FInput1E), .q(DivInput1E), flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E),
.en(~HoldInputs), .clear(FDivSqrtDoneE), .en(~HoldInputs), .clear(FDivSqrtDoneE),
.reset(reset), .clk(clk)); .reset(reset), .clk(clk));
flopenrc #(64) reg_input2 (.d(FInput2E), .q(DivInput2E), flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E),
.en(~HoldInputs), .clear(FDivSqrtDoneE), .en(~HoldInputs), .clear(FDivSqrtDoneE),
.reset(reset), .clk(clk)); .reset(reset), .clk(clk));
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*); fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*);
// first of two-stage instance of floating-point add/cvt unit // first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.*); fpuaddcvt1 fpadd1 (.*);
// first of two-stage instance of floating-point comparator // first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]); fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE);
// first and only instance of floating-point sign converter // first and only instance of floating-point sign converter
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
// first and only instance of floating-point classify unit // first and only instance of floating-point classify unit
fpuclassify fpuclass (.*); fpuclassify fpuclass (.*);
// output for store instructions
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
//***************** //*****************
//fpregfile D/E pipe registers //fpregfile D/E pipe registers
//***************** //*****************
flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M); flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM);
flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M); flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M); flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
//***************** //*****************
// fma E/M pipe registers // fma E/M pipe registers
//***************** //*****************
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, ProdManE, ProdManM); flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, AlignedAddendE, AlignedAddendM); flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, ProdExpE, ProdExpM); flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, AddendStickyE, AddendStickyM); flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM);
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, KillProdE, KillProdM); flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM);
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, XZeroE, XZeroM); flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM);
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, YZeroE, YZeroM); flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM);
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, ZZeroE, ZZeroM); flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM);
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, XInfE, XInfM); flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM);
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, YInfE, YInfM); flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM);
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, ZInfE, ZInfM); flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM);
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, XNaNE, XNaNM); flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM);
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, YNaNE, YNaNM); flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM);
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, ZNaNE, ZNaNM); flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM);
//***************** //*****************
// fpadd E/M pipe registers // fpadd E/M pipe registers
//***************** //*****************
flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM);
flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM);
flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM); flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM);
flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM);
flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM);
flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM);
flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM);
flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM);
flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM);
flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM); flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM);
flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM);
flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM); flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM);
flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM);
flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM);
flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM);
flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM); flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM);
//***************** //*****************
// fpcmp E/M pipe registers // fpcmp E/M pipe registers
//***************** //*****************
flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM); flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM);
flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM);
flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM);
flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM);
flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM);
flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM);
// put this in for the event we want to delay fsgn - will otherwise bypass
//***************** //*****************
// fpsgn E/M pipe registers // fpsgn E/M pipe registers
//***************** //*****************
flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM); flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM);
flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM); flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM);
//***************** //*****************
// other E/M pipe registers // other E/M pipe registers
//***************** //*****************
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM); flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM); {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM); {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);
flopenrc #(1) EMReg9(clk, reset, PipeClearEM, PipeEnableEM, SelLoadInputE, SelLoadInputM);
//***************** //*****************
// fpuclassify E/M pipe registers // fpuclassify E/M pipe registers
//***************** //*****************
flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM); flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM);
//BEGIN MEMORY STAGE //BEGIN MEMORY STAGE
assign FWriteDataM = FmtM ? FInput1M[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FInput1M[63:32]}; mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM);
//adjecent adress values are sent to the FPU, select the correct one assign FFlgM = CmpInvalidM & FResSelM[1];
// -imm is 80000 most of the time vs the error one which is 00000
// mux3 #(64) FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
// mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM);
fma2 fma2(.X(FInput1M), .Y(FInput2M), .Z(FInput3M), .FOpCtrlM(FOpCtrlM[2:0]), .*); // second instance of two-stage FMA unit
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*);
// second instance of two-stage floating-point add/cvt unit // second instance of two-stage floating-point add/cvt unit
fpuaddcvt2 fpadd2 (.*); fpuaddcvt2 fpadd2 (.*);
// second instance of two-stage floating-point comparator
fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM),
.Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*);
// Align SrcA to MSB when single precicion // Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM);
//***************** //*****************
//fpregfile M/W pipe registers //fpregfile M/W pipe registers
//***************** //*****************
flopenrc #(64) MWFpReg1(clk, reset, PipeClearMW, PipeEnableMW, FInput1M, FInput1W); flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW);
flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW);
//***************** //*****************
// fma M/W pipe registers // fma M/W pipe registers
//***************** //*****************
flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW); flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW);
flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW); flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW);
//***************** //*****************
// fpdiv M/W pipe registers // fpdiv M/W pipe registers
//***************** //*****************
flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW); flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW); flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW);
flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW);
//***************** //*****************
// fpadd M/W pipe registers // fpadd M/W pipe registers
//***************** //*****************
flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW); flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW); flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW);
//***************** //*****************
// fpcmp M/W pipe registers // fpcmp M/W pipe registers
//***************** //*****************
flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW); flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW);
flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW); // flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW);
flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW); flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW);
//***************** //*****************
// fpsgn M/W pipe registers // fpsgn M/W pipe registers
//***************** //*****************
flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW); flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW);
flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW); flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW);
//***************** //*****************
// other M/W pipe registers // other M/W pipe registers
//***************** //*****************
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW); flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW); {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM},
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW});
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
flopenrc #(64) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, AlignedSrcAM, SrcAW);
// flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
flopenrc #(4) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FOpCtrlM, FOpCtrlW);
//***************** //*****************
// fpuclassify M/W pipe registers // fpuclassify M/W pipe registers
//***************** //*****************
flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW); flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
@ -424,14 +380,6 @@ module fpu (
//######################################### //#########################################
// BEGIN WRITEBACK STAGE // BEGIN WRITEBACK STAGE
//######################################### //#########################################
// mux3 #(64) FLoadResultMux({ReadD[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM);
// mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
//***RV32D needs to give two bus transactions
mux2 #(64) FLoadResultMux({ReadDataW[31:0], {32{1'b0}}}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, FLoadResultW);
mux2 #(64) FLoadStoreResultMux(FLoadResultW, FInput1W, |FOpCtrlW[2:1], FLoadStoreResultW);
@ -440,47 +388,26 @@ module fpu (
always_comb begin always_comb begin
case (FResultSelW) case (FResultSelW)
// div/sqrt 3'b000 : FPUFlagsW = 5'b0;
3'b000 : FPUFlagsW = FDivFlagsW; 3'b001 : FPUFlagsW = FmaFlagsW;
// cmp 3'b010 : FPUFlagsW = FAddFlagsW;
3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0}; 3'b011 : FPUFlagsW = FDivFlagsW;
//fma/mult 3'b100 : FPUFlagsW = {4'b0,FFlgW};
3'b010 : FPUFlagsW = FmaFlagsW;
// sgn inj
3'b011 : FPUFlagsW = SgnFlagsW;
// add/sub/cnvt
3'b100 : FPUFlagsW = FAddFlagsW;
// classify
3'b101 : FPUFlagsW = 5'b0;
// output SrcAW
3'b110 : FPUFlagsW = 5'b0;
// output FRD1
3'b111 : FPUFlagsW = 5'b0;
default : FPUFlagsW = 5'bxxxxx; default : FPUFlagsW = 5'bxxxxx;
endcase endcase
end end
always_comb begin always_comb begin
case (FResultSelW) case (FResultSelW)
// div/sqrt 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
3'b000 : FPUResult64W = FDivResultW; 3'b001 : FPUResult64W = FmaResultW;
// cmp 3'b010 : FPUResult64W = FAddResultW;
3'b001 : FPUResult64W = FCmpResultW; 3'b011 : FPUResult64W = FDivResultW;
//fma/mult 3'b100 : FPUResult64W = FResW;
3'b010 : FPUResult64W = FmaResultW; default : FPUResult64W = 64'bxxxxx;
// sgn inj
3'b011 : FPUResult64W = SgnResultW;
// add/sub/cnvt
3'b100 : FPUResult64W = FAddResultW;
// classify
3'b101 : FPUResult64W = ClassResultW;
// output SrcAW
3'b110 : FPUResult64W = SrcAW;
// Load/Store/Move to FP-register
3'b111 : FPUResult64W = FLoadStoreResultW;
default : FPUResult64W = {64{1'bx}};
endcase endcase
end // always_comb end
// interface between XLEN size datapath and double-precision sized // interface between XLEN size datapath and double-precision sized
// floating-point results // floating-point results

View File

@ -27,10 +27,10 @@
// //
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE); module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE);
input logic [63:0] FInput1E; // 1st input operand (A) input logic [63:0] SrcXE; // 1st input operand (A)
input logic [63:0] FInput2E; // 2nd input operand (B) input logic [63:0] SrcYE; // 2nd input operand (B)
input logic [3:0] FOpCtrlE; // Function opcode input logic [3:0] FOpCtrlE; // Function opcode
input logic FmtE; // Result Precision (1 for double, 0 for single) input logic FmtE; // Result Precision (1 for double, 0 for single)
@ -81,12 +81,12 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
// and the sign of the first operand is set appropratiately based on // and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation. // if the operation is absolute value or negation.
convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P); convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P);
// Test for exceptions and return the "Invalid Operation" and // Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "AddSelInvE" is used in // "Denormalized" Input Flags. The "AddSelInvE" is used in
// the third pipeline stage to select the result. Also, AddOp1NormE // the third pipeline stage to select the result. Also, AddOp1NormE
// and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized. // and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized.
// sub is one if the effective operation is subtaction. // sub is one if the effective operation is subtaction.
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub, exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
@ -159,8 +159,8 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
// Place either the sign-extened 32-bit value or the original 64-bit value // Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion) // into IntValue (to be used for integer to floating point conversion)
assign IntValue [31:0] = FInput1E[31:0]; assign IntValue [31:0] = SrcXE[31:0];
assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32]; assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to // If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise, // IntVal and the prenomalized exponent is set to 1084. Otherwise,

View File

@ -1,7 +1,8 @@
`include "wally-config.vh" `include "wally-config.vh"
module fpuclassify ( module fpuclassify (
input logic [63:0] FInput1E, input logic [63:0] SrcXE,
input logic FmtE, // 0-single 1-double input logic FmtE, // 0-single 1-double
output logic [63:0] ClassResultE output logic [63:0] ClassResultE
); );
@ -13,9 +14,9 @@ module fpuclassify (
logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan; logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan;
// single and double precision layouts // single and double precision layouts
assign single = FInput1E[63:32]; assign single = SrcXE[63:32];
assign double = FInput1E; assign double = SrcXE;
assign sign = FInput1E[63]; assign sign = SrcXE[63];
// basic calculations for readabillity // basic calculations for readabillity
assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23]; assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23];
@ -43,10 +44,7 @@ module fpuclassify (
// bit 7 - +infinity // bit 7 - +infinity
// bit 8 - signaling NaN // bit 8 - signaling NaN
// bit 9 - quiet NaN // bit 9 - quiet NaN
assign ClassResultE = FmtE ? {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity} : ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity};
{{22{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity, {32{1'b0}}};
endmodule endmodule

View File

@ -1,3 +1,4 @@
// //
// File name : fpcomp.v // File name : fpcomp.v
// Title : Floating-Point Comparator // Title : Floating-Point Comparator
@ -17,9 +18,9 @@
// and correct for sign bits // and correct for sign bits
// //
// This module takes 64-bits inputs op1 and op2, VSS, and VDD // This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal Sel that indicates the type of // signals, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below. // operands being compared as indicated below.
// Sel Description // FOpCtrlE Description
// 00 double precision numbers // 00 double precision numbers
// 01 single precision numbers // 01 single precision numbers
// 10 half precision numbers // 10 half precision numbers
@ -37,24 +38,41 @@
// It also produces an invalid operation flag, which is one // It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754 // if either of the input operands is a signaling NaN per 754
module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec `include "wally-config.vh"
module fpucmp1 (
input logic [63:0] op1; input logic [63:0] op1,
input logic [63:0] op2; input logic [63:0] op2,
input logic [1:0] Sel; input logic [2:0] FOpCtrlE,
input logic FmtE,
output logic [7:0] w, x;
output logic ANaN, BNaN; output logic Invalid, // Invalid Operation
output logic Azero, Bzero; // output logic [1:0] FCC, // Condition Codes
output logic [63:0] FCmpResultE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
logic [1:0] FCC; // Condition Codes
logic [7:0] w, x;
logic ANaN, BNaN;
logic Azero, Bzero;
logic LT; // magnitude op1 < magnitude op2
logic EQ; // magnitude op1 = magnitude op2
magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE);
// Perform magnitude comparison between the 63 least signficant bits // Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can // of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values. // be determined from these values.
magcompare64b_1 magcomp2 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); magcompare64b_2 magcomp2 (LT, EQ, w, x);
// Determine final values based on output of magnitude comparison, // Determine final values based on output of magnitude comparison,
// sign bits, and special case testing. // sign bits, and special case testing.
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, Sel); exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*);
endmodule // fpcomp endmodule // fpcomp
@ -178,9 +196,9 @@ module magcompare64b_1 (w, x, A, B);
endmodule // magcompare64b endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison // This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of // flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below. // operands being compared as indicated below.
// Sel Description // FOpCtrlE Description
// 00 double precision numbers // 00 double precision numbers
// 01 single precision numbers // 01 single precision numbers
// 10 half precision numbers // 10 half precision numbers
@ -196,11 +214,11 @@ endmodule // magcompare64b
// It also produces a invalid operation flag, which is one // It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN. // if either of the input operands is a signaling NaN.
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE);
input logic [63:0] A; input logic [63:0] A;
input logic [63:0] B; input logic [63:0] B;
input logic [1:0] Sel; input logic [2:0] FOpCtrlE;
logic dp, sp, hp; logic dp, sp, hp;
@ -209,9 +227,9 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
output logic Azero; output logic Azero;
output logic Bzero; output logic Bzero;
assign dp = !Sel[1]&!Sel[0]; assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
assign sp = !Sel[1]&Sel[0]; assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
assign hp = Sel[1]&!Sel[0]; assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
// Test if A or B is NaN. // Test if A or B is NaN.
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
@ -232,3 +250,216 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
assign Bzero = (B[62:0] == 63'h0); assign Bzero = (B[62:0] == 63'h0);
endmodule // exception_cmp endmodule // exception_cmp
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
// project : FPU
// Library : fpcomp
// Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator
// notes :
//
// Copyright Oklahoma State University
//
// Floating Point Comparator (Algorithm)
//
// 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 (unused)
//
// The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison:
//
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
//
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
/*module magcompare2b (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
// Determine if A < B using a minimized sum-of-products expression
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// Determine if A > B using a minimized sum-of-products expression
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
endmodule*/ // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
// module magcompare2c (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// assign LT = B[1] | (!A[1]&B[0]);
// assign GT = A[1] | (!B[1]&A[0]);
// endmodule // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// This structure was modified so
// that it only does a strict magnitdude comparison, and only
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// of 63 2-bit magnitude comparators, followed by one OR gates.
//
// J. E. Stine and M. J. Schulte, "A combined two's complement and
// floating-point comparator," 2005 IEEE International Symposium on
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// doi: 10.1109/ISCAS.2005.1464531
module magcompare64b_2 (LT, EQ, w, x);
input logic [7:0] w;
input logic [7:0] x;
logic [3:0] y;
logic [3:0] z;
logic [1:0] a;
logic [1:0] b;
logic GT;
output logic LT;
output logic EQ;
magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
assign EQ = ~(LT | GT);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_2 (
input logic [63:0] A,
input logic [63:0] B,
input logic FmtE,
input logic LT_mag,
input logic EQ_mag,
input logic [2:0] FOpCtrlE,
output logic invalid,
output logic [1:0] fcc,
output logic [63:0] FCmpResultE,
input logic Azero,
input logic Bzero,
input logic ANaN,
input logic BNaN);
logic dp;
logic sp;
logic hp;
logic ASNaN;
logic BSNaN;
logic UO;
logic GT;
logic LT;
logic EQ;
logic [62:0] sixtythreezeros = 63'h0;
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
// Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// point comparison is being performed.
assign UO = (ANaN | BNaN);
// Test if A or B is a signaling NaN.
assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// If either A or B is a signaling NaN the "Invalid Operation"
// exception flag is set to one; otherwise it is zero.
assign invalid = (ASNaN | BSNaN);
// A and B are equal if (their magnitudes are equal) AND ((their signs are
// equal) or (their magnitudes are zero AND they are floating point
// numbers)). Also, A and B are not equal if they are unordered.
assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// A is less than B if (A is negative and B is posiive) OR
// (A and B are positive and the magnitude of A is less than
// the magnitude of B) or (A and B are negative integers and
// the magnitude of A is less than the magnitude of B) or
// (A and B are negative floating point numbers and
// the magnitude of A is greater than the magnitude of B).
// Also, A is not less than B if A and B are equal or unordered.
assign LT = ((~LT_mag & A[63] & B[63]) |
(LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
// A is greater than B when LT, EQ, and UO are are false.
assign GT = ~(LT | EQ | UO);
// Note: it may be possible to optimize the setting of fcc
// a little more, but it is probably not worth the effort.
// Set the bits of fcc based on LT, GT, EQ, and UO
assign fcc[0] = LT | UO;
assign fcc[1] = GT | UO;
always_comb begin
case (FOpCtrlE[2:0])
3'b111: FCmpResultE = LT ? A : B;//min
3'b101: FCmpResultE = GT ? A : B;//max
3'b010: FCmpResultE = {63'b0, EQ};//equal
3'b001: FCmpResultE = {63'b0, LT};//less than
3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal
default: FCmpResultE = 64'b0;
endcase
end
endmodule // exception_cmp

View File

@ -1,243 +1,243 @@
// // //
// File name : fpcomp.v // // File name : fpcomp.v
// Title : Floating-Point Comparator // // Title : Floating-Point Comparator
// project : FPU // // project : FPU
// Library : fpcomp // // Library : fpcomp
// Author(s) : James E. Stine // // Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator // // Purpose : definition of main unit to floating-point comparator
// notes : // // notes :
// // //
// Copyright Oklahoma State University // // Copyright Oklahoma State University
// // //
// Floating Point Comparator (Algorithm) // // Floating Point Comparator (Algorithm)
// // //
// 1.) Performs sign-extension if the inputs are 32-bit integers. // // 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs // // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values) // // 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits // // and correct for sign bits
// // //
// This module takes 64-bits inputs op1 and op2, VSS, and VDD // // This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal Sel that indicates the type of // // signals, and a 2-bit signal Sel that indicates the type of
// operands being compared as indicated below. // // operands being compared as indicated below.
// Sel Description // // Sel Description
// 00 double precision numbers // // 00 double precision numbers
// 01 single precision numbers // // 01 single precision numbers
// 10 half precision numbers // // 10 half precision numbers
// 11 (unused) // // 11 (unused)
// // //
// The comparator produces a 2-bit signal FCC, which // // The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison: // // indicates the result of the comparison:
// // //
// fcc decscription // // fcc decscription
// 00 A = B // // 00 A = B
// 01 A < B // // 01 A < B
// 10 A > B // // 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN) // // 11 A and B are unordered (i.e., A or B is NaN)
// // //
// It also produces an invalid operation flag, which is one // // It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754 // // if either of the input operands is a signaling NaN per 754
module fpucmp2 ( // module fpucmp2 (
input logic [63:0] op1, // input logic [63:0] op1,
input logic [63:0] op2, // input logic [63:0] op2,
input logic [1:0] Sel, // input logic [1:0] Sel,
input logic [7:0] w, x, // input logic [7:0] w, x,
input logic ANaN, BNaN, // input logic ANaN, BNaN,
input logic Azero, Bzero, // input logic Azero, Bzero,
input logic [3:0] FOpCtrlM, // input logic [3:0] FOpCtrlM,
input logic FmtM, // input logic FmtM,
output logic Invalid, // Invalid Operation // output logic Invalid, // Invalid Operation
output logic [1:0] FCC, // Condition Codes // output logic [1:0] FCC, // Condition Codes
output logic [63:0] FCmpResultM); // output logic [63:0] FCmpResultM);
logic LT; // magnitude op1 < magnitude op2 // logic LT; // magnitude op1 < magnitude op2
logic EQ; // magnitude op1 = magnitude op2 // logic EQ; // magnitude op1 = magnitude op2
// Perform magnitude comparison between the 63 least signficant bits // // Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can // // of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values. // // be determined from these values.
magcompare64b_2 magcomp2 (LT, EQ, w, x); // magcompare64b_2 magcomp2 (LT, EQ, w, x);
// Determine final values based on output of magnitude comparison, // // Determine final values based on output of magnitude comparison,
// sign bits, and special case testing. // // sign bits, and special case testing.
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); // exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*);
endmodule // fpcomp // endmodule // fpcomp
/*module magcompare2b (LT, GT, A, B); // /*module magcompare2b (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
// Determine if A < B using a minimized sum-of-products expression
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// Determine if A > B using a minimized sum-of-products expression
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
endmodule*/ // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
// module magcompare2c (LT, GT, A, B);
// input logic [1:0] A; // input logic [1:0] A;
// input logic [1:0] B; // input logic [1:0] B;
// output logic LT; // output logic LT;
// output logic GT; // output logic GT;
// assign LT = B[1] | (!A[1]&B[0]); // // Determine if A < B using a minimized sum-of-products expression
// assign GT = A[1] | (!B[1]&A[0]); // assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// // Determine if A > B using a minimized sum-of-products expression
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
// endmodule // magcompare2b // endmodule*/ // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B // // 2-bit magnitude comparator
// and EQ is '1'if A = B. LT and GT are both '0' if A > B. // // This module compares two 2-bit values A and B. LT is '1' if A < B
// This structure was modified so // // and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// that it only does a strict magnitdude comparison, and only // // this version actually incorporates don't cares into the equation to
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree // // simplify the optimization
// of 63 2-bit magnitude comparators, followed by one OR gates.
//
// J. E. Stine and M. J. Schulte, "A combined two's complement and
// floating-point comparator," 2005 IEEE International Symposium on
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// doi: 10.1109/ISCAS.2005.1464531
module magcompare64b_2 (LT, EQ, w, x); // // module magcompare2c (LT, GT, A, B);
input logic [7:0] w; // // input logic [1:0] A;
input logic [7:0] x; // // input logic [1:0] B;
logic [3:0] y;
logic [3:0] z;
logic [1:0] a;
logic [1:0] b;
logic GT;
output logic LT; // // output logic LT;
output logic EQ; // // output logic GT;
// // assign LT = B[1] | (!A[1]&B[0]);
// // assign GT = A[1] | (!B[1]&A[0]);
// // endmodule // magcompare2b
// // This module compares two 64-bit values A and B. LT is '1' if A < B
// // and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// // This structure was modified so
// // that it only does a strict magnitdude comparison, and only
// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// // of 63 2-bit magnitude comparators, followed by one OR gates.
// //
// // J. E. Stine and M. J. Schulte, "A combined two's complement and
// // floating-point comparator," 2005 IEEE International Symposium on
// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// // doi: 10.1109/ISCAS.2005.1464531
// module magcompare64b_2 (LT, EQ, w, x);
// input logic [7:0] w;
// input logic [7:0] x;
// logic [3:0] y;
// logic [3:0] z;
// logic [1:0] a;
// logic [1:0] b;
// logic GT;
magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); // output logic LT;
magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); // output logic EQ;
magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); // magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); // magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); // magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
assign EQ = ~(LT | GT);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
// operands being compared as indicated below.
// Sel Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_2 (
input logic [63:0] A,
input logic [63:0] B,
input logic FmtM,
input logic LT_mag,
input logic EQ_mag,
input logic [1:0] Sel,
input logic [3:0] FOpCtrlM,
output logic invalid, // magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
output logic [1:0] fcc,
output logic [63:0] FCmpResultM,
input logic Azero, // assign EQ = ~(LT | GT);
input logic Bzero,
input logic ANaN, // endmodule // magcompare64b
input logic BNaN);
// // This module takes 64-bits inputs A and B, two magnitude comparison
// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
// // operands being compared as indicated below.
// // Sel Description
// // 00 double precision numbers
// // 01 single precision numbers
// // 10 half precision numbers
// // 11 bfloat precision numbers
// //
// // The comparator produces a 2-bit signal fcc, which
// // indicates the result of the comparison as follows:
// // fcc decscription
// // 00 A = B
// // 01 A < B
// // 10 A > B
// // 11 A and B are unordered (i.e., A or B is NaN)
// // It also produces a invalid operation flag, which is one
// // if either of the input operands is a signaling NaN.
// module exception_cmp_2 (
// input logic [63:0] A,
// input logic [63:0] B,
// input logic FmtM,
// input logic LT_mag,
// input logic EQ_mag,
// input logic [1:0] Sel,
// input logic [3:0] FOpCtrlM,
logic dp; // output logic invalid,
logic sp; // output logic [1:0] fcc,
logic hp; // output logic [63:0] FCmpResultM,
logic ASNaN;
logic BSNaN;
logic UO;
logic GT;
logic LT;
logic EQ;
logic [62:0] sixtythreezeros = 63'h0;
assign dp = !Sel[1]&!Sel[0]; // input logic Azero,
assign sp = !Sel[1]&Sel[0]; // input logic Bzero,
assign hp = Sel[1]&!Sel[0]; // input logic ANaN,
// input logic BNaN);
// Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// point comparison is being performed.
assign UO = (ANaN | BNaN);
// Test if A or B is a signaling NaN.
assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// If either A or B is a signaling NaN the "Invalid Operation"
// exception flag is set to one; otherwise it is zero.
assign invalid = (ASNaN | BSNaN);
// A and B are equal if (their magnitudes are equal) AND ((their signs are
// equal) or (their magnitudes are zero AND they are floating point
// numbers)). Also, A and B are not equal if they are unordered.
assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// A is less than B if (A is negative and B is posiive) OR // logic dp;
// (A and B are positive and the magnitude of A is less than // logic sp;
// the magnitude of B) or (A and B are negative integers and // logic hp;
// the magnitude of A is less than the magnitude of B) or // logic ASNaN;
// (A and B are negative floating point numbers and // logic BSNaN;
// the magnitude of A is greater than the magnitude of B). // logic UO;
// Also, A is not less than B if A and B are equal or unordered. // logic GT;
assign LT = ((~LT_mag & A[63] & B[63]) | // logic LT;
(LT_mag & ~(A[63] & B[63])))&~EQ&~UO; // logic EQ;
// logic [62:0] sixtythreezeros = 63'h0;
// assign dp = !Sel[1]&!Sel[0];
// assign sp = !Sel[1]&Sel[0];
// assign hp = Sel[1]&!Sel[0];
// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// // point comparison is being performed.
// assign UO = (ANaN | BNaN);
// // Test if A or B is a signaling NaN.
// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// // If either A or B is a signaling NaN the "Invalid Operation"
// // exception flag is set to one; otherwise it is zero.
// assign invalid = (ASNaN | BSNaN);
// // A and B are equal if (their magnitudes are equal) AND ((their signs are
// // equal) or (their magnitudes are zero AND they are floating point
// // numbers)). Also, A and B are not equal if they are unordered.
// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// A is greater than B when LT, EQ, and UO are are false. // // A is less than B if (A is negative and B is posiive) OR
assign GT = ~(LT | EQ | UO); // // (A and B are positive and the magnitude of A is less than
// // the magnitude of B) or (A and B are negative integers and
// // the magnitude of A is less than the magnitude of B) or
// // (A and B are negative floating point numbers and
// // the magnitude of A is greater than the magnitude of B).
// // Also, A is not less than B if A and B are equal or unordered.
// assign LT = ((~LT_mag & A[63] & B[63]) |
// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
// // A is greater than B when LT, EQ, and UO are are false.
// assign GT = ~(LT | EQ | UO);
// Note: it may be possible to optimize the setting of fcc // // Note: it may be possible to optimize the setting of fcc
// a little more, but it is probably not worth the effort. // // a little more, but it is probably not worth the effort.
// Set the bits of fcc based on LT, GT, EQ, and UO // // Set the bits of fcc based on LT, GT, EQ, and UO
assign fcc[0] = LT | UO; // assign fcc[0] = LT | UO;
assign fcc[1] = GT | UO; // assign fcc[1] = GT | UO;
always_comb begin // always_comb begin
case (FOpCtrlM[2:0]) // case (FOpCtrlM[2:0])
3'b111: FCmpResultM = LT ? A : B;//min // 3'b111: FCmpResultM = LT ? A : B;//min
3'b101: FCmpResultM = GT ? A : B;//max // 3'b101: FCmpResultM = GT ? A : B;//max
3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal // 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal
3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than // 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than
3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal // 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal
default: FCmpResultM = 64'b0; // default: FCmpResultM = 64'b0;
endcase // endcase
end // end
endmodule // exception_cmp // endmodule // exception_cmp

View File

@ -26,47 +26,41 @@
`include "wally-config.vh" `include "wally-config.vh"
module fpuhazard( module fpuhazard(
input logic [4:0] Adr1, Adr2, Adr3, input logic [4:0] Adr1E, Adr2E, Adr3E,
input logic FWriteEnE, FWriteEnM, FWriteEnW, input logic FWriteEnM, FWriteEnW,
input logic [4:0] RdE, RdM, RdW, input logic [4:0] RdM, RdW,
input logic FDivBusyE, input logic [2:0] FResultSelM,
input logic RegWriteD, output logic FStallD,
input logic [2:0] FResultSelD, FResultSelE, output logic [1:0] ForwardXE, ForwardYE, ForwardZE
input logic IllegalFPUInstrD,
input logic FInput2UsedD, FInput3UsedD,
// Stall outputs
output logic FStallD,
output logic [1:0] FForwardInput1D, FForwardInput2D,
output logic FForwardInput3D
); );
always_comb begin always_comb begin
// set ReadData as default // set ReadData as default
FForwardInput1D = 2'b00; ForwardXE = 2'b00; // choose FRD1E
FForwardInput2D = 2'b00; ForwardYE = 2'b00; // choose FRD2E
FForwardInput3D = 1'b0; ForwardZE = 2'b00; // choose FRD3E
FStallD = FDivBusyE; FStallD = 0;
if (~IllegalFPUInstrD) begin
// if taking a value from int register if ((Adr1E == RdM) & FWriteEnM)
if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD))) // if the result will be FResM
if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM
else FStallD = 1'b1; // otherwise stall else FStallD = 1; // if the result won't be ready stall
else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W
else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE
if(FInput2UsedD) if ((Adr2E == RdM) & FWriteEnM)
if ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1; // if the result will be FResM
else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM
else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE else FStallD = 1; // if the result won't be ready stall
else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W
if(FInput3UsedD) if ((Adr3E == RdM) & FWriteEnM)
if ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1; // if the result will be FResM
else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1; if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM
else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE else FStallD = 1; // if the result won't be ready stall
end else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W
end end

View File

@ -1,8 +1,8 @@
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E); module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
input [63:0] FInput1E, FInput2E; input [63:0] SrcXE, SrcYE;
input [1:0] SgnOpCodeE; input [1:0] SgnOpCodeE;
output [63:0] SgnResultE; output [63:0] SgnResultE;
output [4:0] SgnFlagsE; output [4:0] SgnFlagsE;
@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E);
//op code designation: //op code designation:
// //
//00 - fsgnj - directly copy over sign value of FInput2E //00 - fsgnj - directly copy over sign value of SrcYE
//01 - fsgnjn - negate sign value of FInput2E //01 - fsgnjn - negate sign value of SrcYE
//10 - fsgnjx - XOR sign values of FInput1E & FInput2E //10 - fsgnjx - XOR sign values of SrcXE & SrcYE
// //
assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]); assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
assign SgnResultE[62:0] = FInput1E[62:0]; assign SgnResultE[62:0] = SrcXE[62:0];
//If the exponent is all ones, then the value is either Inf or NaN, //If the exponent is all ones, then the value is either Inf or NaN,
//both of which will produce a QNaN/SNaN value of some sort. This will //both of which will produce a QNaN/SNaN value of some sort. This will
//set the invalid flag high. //set the invalid flag high.
assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52]; assign AonesExp = SrcXE[62]&SrcXE[61]&SrcXE[60]&SrcXE[59]&SrcXE[58]&SrcXE[57]&SrcXE[56]&SrcXE[55]&SrcXE[54]&SrcXE[53]&SrcXE[52];
//the only flag that can occur during this operation is invalid //the only flag that can occur during this operation is invalid
//due to changing sign on already existing NaN //due to changing sign on already existing NaN

View File

@ -1,195 +0,0 @@
///////////////////////////////////////////
// lzd.sv
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: Integer Divide instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off DECLFILENAME */
// Original idea came from V. G. Oklobdzija, "An algorithmic and novel
// design of a leading zero detector circuit: comparison with logic
// synthesis," in IEEE Transactions on Very Large Scale Integration
// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
// 10.1109/92.273153.
// Modified to be more hierarchical
module lz2 (P, V, B);
input logic [1:0] B;
output logic P;
output logic V;
assign V = B[0] | B[1];
assign P = B[0] & ~B[1];
endmodule // lz2
module lzd_hier #(parameter WIDTH=8)
(input logic [WIDTH-1:0] B,
output logic [$clog2(WIDTH)-1:0] ZP,
output logic ZV);
if (WIDTH == 128)
lz128 lzd127 (ZP, ZV, B);
else if (WIDTH == 64)
lz64 lzd64 (ZP, ZV, B);
else if (WIDTH == 32)
lz32 lzd32 (ZP, ZV, B);
else if (WIDTH == 16)
lz16 lzd16 (ZP, ZV, B);
else if (WIDTH == 8)
lz8 lzd8 (ZP, ZV, B);
else if (WIDTH == 4)
lz4 lzd4 (ZP, ZV, B);
endmodule // lzd_hier
module lz4 (ZP, ZV, B);
input logic [3:0] B;
logic ZPa;
logic ZPb;
logic ZVa;
logic ZVb;
output logic [1:0] ZP;
output logic ZV;
lz2 l1(ZPa, ZVa, B[1:0]);
lz2 l2(ZPb, ZVb, B[3:2]);
assign ZP[0:0] = ZVb ? ZPb : ZPa;
assign ZP[1] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz8 (ZP, ZV, B);
input logic [7:0] B;
logic [1:0] ZPa;
logic [1:0] ZPb;
logic ZVa;
logic ZVb;
output logic [2:0] ZP;
output logic ZV;
lz4 l1(ZPa, ZVa, B[3:0]);
lz4 l2(ZPb, ZVb, B[7:4]);
assign ZP[1:0] = ZVb ? ZPb : ZPa;
assign ZP[2] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz16 (ZP, ZV, B);
input logic [15:0] B;
logic [2:0] ZPa;
logic [2:0] ZPb;
logic ZVa;
logic ZVb;
output logic [3:0] ZP;
output logic ZV;
lz8 l1(ZPa, ZVa, B[7:0]);
lz8 l2(ZPb, ZVb, B[15:8]);
assign ZP[2:0] = ZVb ? ZPb : ZPa;
assign ZP[3] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz16
module lz32 (ZP, ZV, B);
input logic [31:0] B;
logic [3:0] ZPa;
logic [3:0] ZPb;
logic ZVa;
logic ZVb;
output logic [4:0] ZP;
output logic ZV;
lz16 l1(ZPa, ZVa, B[15:0]);
lz16 l2(ZPb, ZVb, B[31:16]);
assign ZP[3:0] = ZVb ? ZPb : ZPa;
assign ZP[4] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz32
module lz64 (ZP, ZV, B);
input logic [63:0] B;
logic [4:0] ZPa;
logic [4:0] ZPb;
logic ZVa;
logic ZVb;
output logic [5:0] ZP;
output logic ZV;
lz32 l1(ZPa, ZVa, B[31:0]);
lz32 l2(ZPb, ZVb, B[63:32]);
assign ZP[4:0] = ZVb ? ZPb : ZPa;
assign ZP[5] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz64
module lz128 (ZP, ZV, B);
input logic [127:0] B;
logic [5:0] ZPa;
logic [5:0] ZPb;
logic ZVa;
logic ZVb;
output logic [6:0] ZP;
output logic ZV;
lz64 l1(ZPa, ZVa, B[64:0]);
lz64 l2(ZPb, ZVb, B[127:63]);
assign ZP[5:0] = ZVb ? ZPb : ZPa;
assign ZP[6] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz128
/* verilator lint_on DECLFILENAME */

View File

@ -32,7 +32,7 @@ module hazard(
input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
input logic LoadStallD, MulDivStallD, CSRRdStallD, input logic LoadStallD, MulDivStallD, CSRRdStallD,
input logic DCacheStall, ICacheStallF, input logic DCacheStall, ICacheStallF,
input logic FPUStallD, input logic FPUStallD, FStallD,
input logic DivBusyE,FDivBusyE, input logic DivBusyE,FDivBusyE,
// Stall & flush outputs // Stall & flush outputs
output logic StallF, StallD, StallE, StallM, StallW, output logic StallF, StallD, StallE, StallM, StallW,
@ -56,7 +56,7 @@ module hazard(
// If any stages are stalled, the first stage that isn't stalled must flush. // If any stages are stalled, the first stage that isn't stalled must flush.
assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE); assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE);
assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous
assign StallECause = DivBusyE | FDivBusyE; assign StallECause = DivBusyE | FDivBusyE;
assign StallMCause = 0; assign StallMCause = 0;
assign StallWCause = DCacheStall | ICacheStallF; assign StallWCause = DCacheStall | ICacheStallF;

View File

@ -45,11 +45,13 @@ module controller(
output logic MemReadE, CSRReadE, // for Hazard Unit output logic MemReadE, CSRReadE, // for Hazard Unit
output logic [2:0] Funct3E, output logic [2:0] Funct3E,
output logic MulDivE, W64E, output logic MulDivE, W64E,
output logic JumpE, output logic JumpE,
output logic [1:0] MemRWE,
// Memory stage control signals // Memory stage control signals
input logic StallM, FlushM, input logic StallM, FlushM,
output logic [1:0] MemRWM, output logic [1:0] MemRWM,
output logic CSRReadM, CSRWriteM, PrivilegedM, output logic CSRReadM, CSRWriteM, PrivilegedM,
output logic SCE,
output logic [1:0] AtomicM, output logic [1:0] AtomicM,
output logic [2:0] Funct3M, output logic [2:0] Funct3M,
output logic RegWriteM, // for Hazard Unit output logic RegWriteM, // for Hazard Unit
@ -73,7 +75,7 @@ module controller(
// pipelined control signals // pipelined control signals
logic RegWriteE; logic RegWriteE;
logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM;
logic [1:0] MemRWD, MemRWE; logic [1:0] MemRWD;
logic JumpD; logic JumpD;
logic BranchD, BranchE; logic BranchD, BranchE;
logic [1:0] ALUOpD; logic [1:0] ALUOpD;
@ -140,6 +142,7 @@ module controller(
ControlsD = `CTRLW'b1_000_00_00_011_0_00_0_0_1_0_0_1_00_0; // W-type Multiply/Divide ControlsD = `CTRLW'b1_000_00_00_011_0_00_0_0_1_0_0_1_00_0; // W-type Multiply/Divide
else else
ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction
//7'b1010011: ControlsD = `CTRLW'b0_000_00_00_101_0_00_0_0_0_0_0_0_00_1; // FP
7'b1100011: ControlsD = `CTRLW'b0_010_00_00_000_1_01_0_0_0_0_0_0_00_0; // beq 7'b1100011: ControlsD = `CTRLW'b0_010_00_00_000_1_01_0_0_0_0_0_0_00_0; // beq
7'b1100111: ControlsD = `CTRLW'b1_000_00_00_000_0_00_1_1_0_0_0_0_00_0; // jalr 7'b1100111: ControlsD = `CTRLW'b1_000_00_00_000_0_00_1_1_0_0_0_0_00_0; // jalr
7'b1101111: ControlsD = `CTRLW'b1_011_00_00_000_0_00_1_0_0_0_0_0_00_0; // jal 7'b1101111: ControlsD = `CTRLW'b1_011_00_00_000_0_00_1_0_0_0_0_0_00_0; // jal
@ -202,7 +205,8 @@ module controller(
assign PCSrcE = JumpE | BranchE & BranchTakenE; assign PCSrcE = JumpE | BranchE & BranchTakenE;
assign MemReadE = MemRWE[1]; assign MemReadE = MemRWE[1];
assign SCE = (ResultSrcE == 3'b100);
// Memory stage pipeline control register // Memory stage pipeline control register
flopenrc #(15) controlregM(clk, reset, FlushM, ~StallM, flopenrc #(15) controlregM(clk, reset, FlushM, ~StallM,

View File

@ -37,6 +37,9 @@ module datapath (
input logic ALUSrcAE, ALUSrcBE, input logic ALUSrcAE, ALUSrcBE,
input logic TargetSrcE, input logic TargetSrcE,
input logic JumpE, input logic JumpE,
input logic IllegalFPUInstrE,
input logic [1:0] MemRWE,
input logic [`XLEN-1:0] FWriteDataE,
input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE, input logic [`XLEN-1:0] PCLinkE,
output logic [2:0] FlagsE, output logic [2:0] FlagsE,
@ -44,13 +47,13 @@ module datapath (
output logic [`XLEN-1:0] SrcAE, SrcBE, output logic [`XLEN-1:0] SrcAE, SrcBE,
// Memory stage signals // Memory stage signals
input logic StallM, FlushM, input logic StallM, FlushM,
input logic [`XLEN-1:0] FWriteDataM, input logic FWriteIntM,
input logic [`XLEN-1:0] FIntResM,
output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] SrcAM,
output logic [`XLEN-1:0] WriteDataM, MemAdrM, output logic [`XLEN-1:0] WriteDataM, MemAdrM,
// Writeback stage signals // Writeback stage signals
input logic StallW, FlushW, input logic StallW, FlushW,
input logic FWriteIntW, input logic FWriteIntW,
input logic [`XLEN-1:0] FPUResultW,
input logic RegWriteW, input logic RegWriteW,
input logic SquashSCW, input logic SquashSCW,
input logic [2:0] ResultSrcW, input logic [2:0] ResultSrcW,
@ -70,13 +73,14 @@ module datapath (
logic [`XLEN-1:0] RD1E, RD2E; logic [`XLEN-1:0] RD1E, RD2E;
logic [`XLEN-1:0] ExtImmE; logic [`XLEN-1:0] ExtImmE;
logic [`XLEN-1:0] PreSrcAE, SrcAE2, SrcBE2; logic [`XLEN-1:0] PreSrcAE, PreSrcBE, SrcAE2, SrcBE2;
logic [`XLEN-1:0] ALUResultE; logic [`XLEN-1:0] ALUResultE;
logic [`XLEN-1:0] WriteDataE; logic [`XLEN-1:0] WriteDataE;
logic [`XLEN-1:0] TargetBaseE; logic [`XLEN-1:0] TargetBaseE;
// Memory stage signals // Memory stage signals
logic [`XLEN-1:0] ALUResultM; logic [`XLEN-1:0] ALUResultM;
logic [`XLEN-1:0] ResultM;
// Writeback stage signals // Writeback stage signals
logic [`XLEN-1:0] SCResultW; logic [`XLEN-1:0] SCResultW;
logic [`XLEN-1:0] ALUResultW; logic [`XLEN-1:0] ALUResultW;
@ -88,8 +92,7 @@ module datapath (
assign Rs2D = InstrD[24:20]; assign Rs2D = InstrD[24:20];
assign RdD = InstrD[11:7]; assign RdD = InstrD[11:7];
//Mux for writting floating point //Mux for writting floating point
mux2 #(`XLEN) writedatamux(ResultW, FPUResultW, FWriteIntW, WriteDataW);
regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D);
extend ext(.InstrD(InstrD[31:7]), .*); extend ext(.InstrD(InstrD[31:7]), .*);
@ -102,11 +105,12 @@ module datapath (
flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
mux4 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, FWriteDataM, ForwardAE, PreSrcAE); mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, PreSrcAE);
mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, FWriteDataM, ForwardBE, WriteDataE); mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, PreSrcBE);
mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE);
mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2); mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2);
mux2 #(`XLEN) srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE); mux2 #(`XLEN) srcbmux(PreSrcBE, ExtImmE, ALUSrcBE, SrcBE);
mux2 #(`XLEN) srcbmux2(SrcBE, {`XLEN{1'b0}}, JumpE, SrcBE2); // *** May be able to remove this mux. mux2 #(`XLEN) srcbmux2(SrcBE, {`XLEN{1'b0}}, JumpE, SrcBE2); // *** May be able to remove this mux.
alu #(`XLEN) alu(SrcAE2, SrcBE2, ALUControlE, ALUResultE, FlagsE); alu #(`XLEN) alu(SrcAE2, SrcBE2, ALUControlE, ALUResultE, FlagsE);
mux2 #(`XLEN) targetsrcmux(PCE, SrcAE, TargetSrcE, TargetBaseE); mux2 #(`XLEN) targetsrcmux(PCE, SrcAE, TargetSrcE, TargetBaseE);
@ -117,10 +121,11 @@ module datapath (
flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM); flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM);
assign MemAdrM = ALUResultM; assign MemAdrM = ALUResultM;
flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM);
flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM);
mux2 #(`XLEN) resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM);
// Writeback stage pipeline register and logic // Writeback stage pipeline register and logic
flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW); flopenrc #(`XLEN) ResultWReg(clk, reset, FlushW, ~StallW, ResultM, ResultW);
flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW);
// handle Store Conditional result if atomic extension supported // handle Store Conditional result if atomic extension supported
@ -131,11 +136,11 @@ module datapath (
assign SCResultW = 0; assign SCResultW = 0;
endgenerate endgenerate
mux5 #(`XLEN) resultmux(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); mux5 #(`XLEN) resultmuxW(ResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, WriteDataW);
/* -----\/----- EXCLUDED -----\/----- /* -----\/----- EXCLUDED -----\/-----
// This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage. // This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage.
// *** need to look at how the decoder is coded to fix. // *** need to look at how the decoder is coded to fix.
mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, ResultW); mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, WriteDataW);
>>>>>>> bp >>>>>>> bp
-----/\----- EXCLUDED -----/\----- */ -----/\----- EXCLUDED -----/\----- */

View File

@ -28,32 +28,31 @@
module forward( module forward(
// Detect hazards // Detect hazards
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
input logic MemReadE, MulDivE, CSRReadE, input logic MemReadE, MulDivE, CSRReadE,
input logic RegWriteM, RegWriteW, input logic RegWriteM, RegWriteW,
input logic DivDoneE, DivBusyE, input logic DivDoneE, DivBusyE,
input logic FWriteIntE, FWriteIntM, FWriteIntW, input logic FWriteIntE, FWriteIntM, FWriteIntW,
input logic SCE,
// Forwarding controls // Forwarding controls
output logic [1:0] ForwardAE, ForwardBE, output logic [1:0] ForwardAE, ForwardBE,
output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD
); );
always_comb begin always_comb begin
ForwardAE = 2'b00; ForwardAE = 2'b00;
ForwardBE = 2'b00; ForwardBE = 2'b00;
if (Rs1E != 5'b0) if (Rs1E != 5'b0)
if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; if ((Rs1E == RdM) & (RegWriteM|FWriteIntM)) ForwardAE = 2'b10;
else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01; else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01;
else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11;
if (Rs2E != 5'b0) if (Rs2E != 5'b0)
if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; if ((Rs2E == RdM) & (RegWriteM|FWriteIntM)) ForwardBE = 2'b10;
else if ((Rs2E == RdW) & (RegWriteW|FWriteIntW)) ForwardBE = 2'b01; else if ((Rs2E == RdW) & (RegWriteW|FWriteIntW)) ForwardBE = 2'b01;
else if ((Rs2E == RdM) & FWriteIntM) ForwardBE = 2'b11;
end end
// Stall on dependent operations that finish in Mem Stage and can't bypass in time // Stall on dependent operations that finish in Mem Stage and can't bypass in time
assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE)); assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE));
assign LoadStallD = MemReadE & ((Rs1D == RdE) | (Rs2D == RdE)); assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE));
assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide
assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE)); assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE));

View File

@ -31,33 +31,34 @@ module ieu (
input logic [31:0] InstrD, input logic [31:0] InstrD,
input logic IllegalIEUInstrFaultD, input logic IllegalIEUInstrFaultD,
output logic IllegalBaseInstrFaultD, output logic IllegalBaseInstrFaultD,
output logic RegWriteD, output logic RegWriteD,
// Execute Stage interface // Execute Stage interface
input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE, input logic [`XLEN-1:0] PCLinkE,
input logic FWriteIntE, input logic FWriteIntE,
input logic IllegalFPUInstrE,
input logic [`XLEN-1:0] FWriteDataE,
output logic [`XLEN-1:0] PCTargetE, output logic [`XLEN-1:0] PCTargetE,
output logic MulDivE, W64E, output logic MulDivE, W64E,
output logic [2:0] Funct3E, output logic [2:0] Funct3E,
output logic [`XLEN-1:0] SrcAE, SrcBE, output logic [`XLEN-1:0] SrcAE, SrcBE,
input logic FWriteIntM,
// Memory stage interface // Memory stage interface
input logic DataMisalignedM, // from LSU input logic DataMisalignedM, // from LSU
input logic SquashSCW, // from LSU input logic SquashSCW, // from LSU
output logic [1:0] MemRWM, // read/write control goes to LSU output logic [1:0] MemRWM, // read/write control goes to LSU
output logic [1:0] AtomicM, // atomic control goes to LSU output logic [1:0] AtomicM, // atomic control goes to LSU
output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU
output logic [2:0] Funct3M, // size and signedness to LSU output logic [2:0] Funct3M, // size and signedness to LSU
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
input logic DataAccessFaultM,
input logic FWriteIntM, // from FPU input logic [`XLEN-1:0] FIntResM,
input logic [`XLEN-1:0] FWriteDataM, // from FPU
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
// Writeback stage // Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
input logic FWriteIntW, input logic FWriteIntW,
input logic [`XLEN-1:0] FPUResultW,
// input logic [`XLEN-1:0] PCLinkW, // input logic [`XLEN-1:0] PCLinkW,
output logic InstrValidM, InstrValidW, output logic InstrValidM, InstrValidW,
// hazards // hazards
@ -76,7 +77,8 @@ module ieu (
logic [4:0] ALUControlE; logic [4:0] ALUControlE;
logic ALUSrcAE, ALUSrcBE; logic ALUSrcAE, ALUSrcBE;
logic [2:0] ResultSrcW; logic [2:0] ResultSrcW;
logic TargetSrcE; logic TargetSrcE;
logic SCE;
// forwarding signals // forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW; logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW;
@ -84,6 +86,7 @@ module ieu (
logic RegWriteM, RegWriteW; logic RegWriteM, RegWriteW;
logic MemReadE, CSRReadE; logic MemReadE, CSRReadE;
logic JumpE; logic JumpE;
logic [1:0] MemRWE;
controller c(.*); controller c(.*);
datapath dp(.*); datapath dp(.*);

View File

@ -87,21 +87,23 @@ module wallypipelinedhart
logic PCSrcE; logic PCSrcE;
logic CSRWritePendingDEM; logic CSRWritePendingDEM;
logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD;
logic DivDoneE; logic DivDoneE;
logic DivBusyE; logic DivBusyE;
logic DivDoneW;
logic [4:0] SetFflagsM;
logic [2:0] FRM_REGW;
logic FloatRegWriteW;
logic [1:0] FMemRWM;
logic RegWriteD; logic RegWriteD;
logic [`XLEN-1:0] FWriteDataM; logic LoadStallD, MulDivStallD, CSRRdStallD;
logic SquashSCW; logic SquashSCM, SquashSCW;
// floating point unit signals
logic [2:0] FRM_REGW;
logic [1:0] FMemRWM, FMemRWE;
logic FStallD; logic FStallD;
logic FWriteIntE, FWriteIntW, FWriteIntM; logic FWriteIntE, FWriteIntM, FWriteIntW;
logic [`XLEN-1:0] FWriteDataE;
logic [`XLEN-1:0] FIntResM;
logic FDivBusyE; logic FDivBusyE;
logic IllegalFPUInstrD, IllegalFPUInstrE; logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FloatRegWriteW;
logic FPUStallD;
logic [4:0] SetFflagsM;
logic [`XLEN-1:0] FPUResultW; logic [`XLEN-1:0] FPUResultW;
// memory management unit signals // memory management unit signals
@ -185,20 +187,10 @@ module wallypipelinedhart
ieu ieu(.*); // integer execution unit: integer register file, datapath and controller ieu ieu(.*); // integer execution unit: integer register file, datapath and controller
mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); // mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM);
pagetablewalker pagetablewalker(.HPTWRead(HPTWRead), pagetablewalker pagetablewalker(.HPTWRead(HPTWRead),
.*); // can send addresses to ahblite, send out pagetablestall .*); // can send addresses to ahblite, send out pagetablestall
// *** can connect to hazard unit
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.
// Would need to insertinstruction as InstrD, not InstrF
/*ahblite ebu(
.InstrReadF(1'b0),
.InstrRData(), // hook up InstrF later
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
.*); */
// arbiter between IEU and pagetablewalker // arbiter between IEU and pagetablewalker
lsuArb arbiter(// HPTW connection lsuArb arbiter(// HPTW connection
.HPTWTranslate(MMUTranslate), .HPTWTranslate(MMUTranslate),
@ -208,12 +200,12 @@ module wallypipelinedhart
.HPTWReady(MMUReady), .HPTWReady(MMUReady),
.HPTWStall(HPTWStall), .HPTWStall(HPTWStall),
// CPU connection // CPU connection
.MemRWM(MemRWM|FMemRWM), .MemRWM(MemRWM),
.Funct3M(Funct3M), .Funct3M(Funct3M),
.AtomicM(AtomicM), .AtomicM(AtomicM),
.MemAdrM(MemAdrM), .MemAdrM(MemAdrM),
.StallW(StallW), .StallW(StallW),
.WriteDataM(WriteDatatmpM), .WriteDataM(WriteDataM),
.ReadDataW(ReadDataW), .ReadDataW(ReadDataW),
.CommittedM(CommittedM), .CommittedM(CommittedM),
.SquashSCW(SquashSCW), .SquashSCW(SquashSCW),
@ -259,7 +251,8 @@ module wallypipelinedhart
ahblite ebu( ahblite ebu(
//.InstrReadF(1'b0), //.InstrReadF(1'b0),
//.InstrRData(InstrF), // hook up InstrF later //.InstrRData(InstrF), // hook up InstrF later
.WriteDataM(WriteDatatmpM), .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking
.WriteDataM(WriteDataM),
.MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]), .MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]),
.Funct7M(InstrM[31:25]), .Funct7M(InstrM[31:25]),
.HRDATAW(HRDATAW), .HRDATAW(HRDATAW),

View File

@ -539,8 +539,8 @@ string tests32f[] = '{
if (`M_SUPPORTED) tests = {tests, tests64m}; if (`M_SUPPORTED) tests = {tests, tests64m};
if (`A_SUPPORTED) tests = {tests, tests64a}; if (`A_SUPPORTED) tests = {tests, tests64a};
if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; if (`MEM_VIRTMEM) tests = {tests, tests64mmu};
if (`D_SUPPORTED) tests = {tests64d, tests};
if (`F_SUPPORTED) tests = {tests64f, tests}; if (`F_SUPPORTED) tests = {tests64f, tests};
if (`D_SUPPORTED) tests = {tests64d, tests};
end end
//tests = {tests64a, tests}; //tests = {tests64a, tests};
end else begin // RV32 end else begin // RV32
@ -554,7 +554,7 @@ string tests32f[] = '{
if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic}; if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};
else tests = {tests, tests32iNOc}; else tests = {tests, tests32iNOc};
if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m}; if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
// if (`F_SUPPORTED) tests = {tests32f, tests}; if (`F_SUPPORTED) tests = {tests32f, tests};
if (`A_SUPPORTED) tests = {tests, tests32a}; if (`A_SUPPORTED) tests = {tests, tests32a};
if (`MEM_VIRTMEM) tests = {tests, tests32mmu}; if (`MEM_VIRTMEM) tests = {tests, tests32mmu};
end end

File diff suppressed because it is too large Load Diff