forked from Github_Repos/cvw
Merge branch 'main' into bigbadbranch
This commit is contained in:
commit
dbd33465e1
2
.gitignore
vendored
2
.gitignore
vendored
@ -24,6 +24,8 @@ testsBP/*/*/*.elf*
|
||||
testsBP/*/OBJ/*
|
||||
testsBP/*/*.a
|
||||
wally-pipelined/linux-testgen/linux-testvectors/*
|
||||
wally-pipelined/linux-testgen/nohup*
|
||||
wally-pipelined/linux-testgen/x*
|
||||
!wally-pipelined/linux-testgen/linux-testvectors/tvCopier.py
|
||||
!wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh
|
||||
wally-pipelined/regression/slack-notifier/slack-webhook-url.txt
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +0,0 @@
|
||||
[submodule "sky130/sky130_osu_sc_t12"]
|
||||
path = sky130/sky130_osu_sc_t12
|
||||
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/
|
@ -211,26 +211,53 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) {
|
||||
core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*int foreverLoop = 1;
|
||||
secs_ret timing = 0;
|
||||
int timingInt;
|
||||
ee_printf("\nENTERING FOREVER WHILE LOOP\n");
|
||||
while(foreverLoop == 1)
|
||||
{
|
||||
start_time();
|
||||
//filler
|
||||
stop_time();
|
||||
timing += time_in_secs(get_time());
|
||||
timingInt = (int)timing;
|
||||
ee_printf("Timing is %d\n", timingInt);
|
||||
}/*
|
||||
|
||||
/* automatically determine number of iterations if not set */
|
||||
if (results[0].iterations==0) {
|
||||
secs_ret secs_passed=0;
|
||||
ee_u32 divisor;
|
||||
results[0].iterations=1;
|
||||
int iterationInc = 0;
|
||||
ee_printf("\n\nENTERING ITERATION WHILE LOOP\n");
|
||||
while (secs_passed < (secs_ret)1) {
|
||||
results[0].iterations*=10;
|
||||
if(iterationInc != 0)
|
||||
{
|
||||
results[0].iterations++;
|
||||
}
|
||||
ee_printf("iterations is %d\n", results[0].iterations);
|
||||
start_time();
|
||||
iterate(&results[0]);
|
||||
stop_time();
|
||||
secs_passed=time_in_secs(get_time());
|
||||
secs_passed = time_in_secs(get_time());
|
||||
int secs_passed_int = (int)secs_passed;
|
||||
ee_printf("secs passed is %d\n", secs_passed_int);
|
||||
iterationInc++;
|
||||
}
|
||||
ee_printf("LEAVING ITERATION WHILE LOOP!\n\n");
|
||||
/* now we know it executes for at least 1 sec, set actual run time at about 10 secs */
|
||||
divisor=(ee_u32)secs_passed;
|
||||
ee_printf("divisor is %lu\n", divisor);
|
||||
if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */
|
||||
divisor=1;
|
||||
results[0].iterations*=1+10/divisor;
|
||||
ee_printf("iterations is %d\n", results[0].iterations);
|
||||
}
|
||||
/* perform actual benchmark */
|
||||
ee_printf("Starting benchmark\n");
|
||||
start_time();
|
||||
#if (MULTITHREAD>1)
|
||||
if (default_num_contexts>MULTITHREAD) {
|
||||
@ -249,7 +276,8 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) {
|
||||
#endif
|
||||
stop_time();
|
||||
total_time=get_time();
|
||||
ee_printf("ending benchmark");
|
||||
ee_printf("total time is %u\n", total_time);
|
||||
ee_printf("ending benchmark\n");
|
||||
/* get a function of the input to report */
|
||||
seedcrc=crc16(results[0].seed1,seedcrc);
|
||||
seedcrc=crc16(results[0].seed2,seedcrc);
|
||||
@ -340,12 +368,17 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) {
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crcstate : 0x%04x\n",i,results[i].crcstate);
|
||||
for (i=0 ; i<default_num_contexts; i++)
|
||||
ee_printf("[%d]crcfinal : 0x%04x\"n",i,results[i].crc);
|
||||
ee_printf("[%d]crcfinal : 0x%04x\n",i,results[i].crc);
|
||||
if (total_errors==0) {
|
||||
ee_printf("Correct operation validated. See README.md for run and reporting rules.\n");
|
||||
#if HAS_FLOAT
|
||||
if (known_id==3) {
|
||||
ee_printf("CoreMark 1.0 : %f / %s %s",default_num_contexts*results[0].iterations/time_in_secs(total_time),COMPILER_VERSION,COMPILER_FLAGS);
|
||||
unsigned long long tmp = (unsigned long long) 1000.0*default_num_contexts*results[0].iterations/time_in_secs(total_time);
|
||||
secs_ret totalmsecs = time_in_secs(total_time);
|
||||
int totalmint = (int) totalmsecs;
|
||||
ee_printf("ELAPSED S: %d\n", totalmint);
|
||||
|
||||
ee_printf("CoreMark 1.0 : %d / %s %s\n",tmp,COMPILER_VERSION,COMPILER_FLAGS);
|
||||
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
|
||||
ee_printf(" / %s",MEM_LOCATION);
|
||||
#else
|
||||
|
@ -114,9 +114,10 @@ void portable_free(void *p) {
|
||||
#define read_csr(reg) ({ unsigned long __tmp; \
|
||||
asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
|
||||
__tmp; })
|
||||
#define GETMYTIME(_t) (*_t=read_csr(cycle))
|
||||
#define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
|
||||
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
|
||||
#define TIMER_RES_DIVIDER 1
|
||||
// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
|
||||
#define TIMER_RES_DIVIDER 10000
|
||||
#define SAMPLE_TIME_IMPLEMENTATION 1
|
||||
#endif
|
||||
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
|
||||
@ -132,7 +133,9 @@ static CORETIMETYPE start_time_val, stop_time_val;
|
||||
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
|
||||
*/
|
||||
void start_time(void) {
|
||||
GETMYTIME(&start_time_val );
|
||||
GETMYTIME(start_time_val);
|
||||
ee_printf("Timer started\n");
|
||||
ee_printf(" MTIME: %u\n", start_time_val);
|
||||
#if CALLGRIND_RUN
|
||||
CALLGRIND_START_INSTRUMENTATION
|
||||
#endif
|
||||
@ -153,7 +156,9 @@ void stop_time(void) {
|
||||
#if MICA
|
||||
asm volatile("int3");/*1 */
|
||||
#endif
|
||||
GETMYTIME(&stop_time_val );
|
||||
GETMYTIME(stop_time_val);
|
||||
ee_printf("Timer stopped\n");
|
||||
ee_printf(" MTIME: %u\n", stop_time_val);
|
||||
}
|
||||
/* Function: get_time
|
||||
Return an abstract "ticks" number that signifies time on the system.
|
||||
@ -166,6 +171,7 @@ void stop_time(void) {
|
||||
*/
|
||||
CORE_TICKS get_time(void) {
|
||||
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
|
||||
ee_printf(" Elapsed MTIME: %u\n", elapsed);
|
||||
return elapsed;
|
||||
}
|
||||
/* Function: time_in_secs
|
||||
@ -176,13 +182,15 @@ CORE_TICKS get_time(void) {
|
||||
*/
|
||||
secs_ret time_in_secs(CORE_TICKS ticks) {
|
||||
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
|
||||
int retvalint = (int)retval;
|
||||
ee_printf(" RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
|
||||
return retval;
|
||||
}
|
||||
#else
|
||||
#error "Please implement timing functionality in core_portme.c"
|
||||
#endif /* SAMPLE_TIME_IMPLEMENTATION */
|
||||
|
||||
ee_u32 default_num_contexts=MULTITHREAD;
|
||||
ee_u32 default_num_contexts = MULTITHREAD;
|
||||
|
||||
/* Function: portable_init
|
||||
Target specific initialization code
|
||||
|
@ -1 +0,0 @@
|
||||
Subproject commit f60f2d0395053c4df362a97d7e2099721b6face6
|
@ -62,25 +62,25 @@
|
||||
// Peripheral memory space extends from BASE to BASE+RANGE
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
`define BOOTTIM_SUPPORTED 1'b1
|
||||
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 32'h00003FFF
|
||||
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 32'h00000FFF
|
||||
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 56'h00003FFF
|
||||
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 56'h00000FFF
|
||||
`define TIM_SUPPORTED 1'b1
|
||||
`define TIM_BASE 32'h80000000
|
||||
`define TIM_RANGE 32'h07FFFFFF
|
||||
`define TIM_BASE 56'h80000000
|
||||
`define TIM_RANGE 56'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 32'h02000000
|
||||
`define CLINT_RANGE 32'h0000FFFF
|
||||
`define CLINT_BASE 56'h02000000
|
||||
`define CLINT_RANGE 56'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 32'h10012000
|
||||
`define GPIO_RANGE 32'h000000FF
|
||||
`define GPIO_BASE 56'h10012000
|
||||
`define GPIO_RANGE 56'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 32'h10000000
|
||||
`define UART_RANGE 32'h00000007
|
||||
`define UART_BASE 56'h10000000
|
||||
`define UART_RANGE 56'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 32'h0C000000
|
||||
`define PLIC_RANGE 32'h03FFFFFF
|
||||
`define PLIC_BASE 56'h0C000000
|
||||
`define PLIC_RANGE 56'h03FFFFFF
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 64
|
||||
|
@ -31,6 +31,7 @@
|
||||
`define BUSYBEAR 1
|
||||
`define LINUX_FIX_READ {'h10000005}
|
||||
`define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/"
|
||||
//`define LINUX_TEST_VECTORS "../../../busybear_boot/"
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
`define XLEN 64
|
||||
|
||||
@ -63,25 +64,25 @@
|
||||
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
|
||||
|
||||
`define BOOTTIM_SUPPORTED 1'b1
|
||||
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 32'h00003FFF
|
||||
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 32'h00000FFF
|
||||
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
`define BOOTTIM_RANGE 56'h00003FFF
|
||||
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
|
||||
//`define BOOTTIM_RANGE 56'h00000FFF
|
||||
`define TIM_SUPPORTED 1'b1
|
||||
`define TIM_BASE 32'h80000000
|
||||
`define TIM_RANGE 32'h07FFFFFF
|
||||
`define TIM_BASE 56'h80000000
|
||||
`define TIM_RANGE 56'h07FFFFFF
|
||||
`define CLINT_SUPPORTED 1'b1
|
||||
`define CLINT_BASE 32'h02000000
|
||||
`define CLINT_RANGE 32'h0000FFFF
|
||||
`define CLINT_BASE 56'h02000000
|
||||
`define CLINT_RANGE 56'h0000FFFF
|
||||
`define GPIO_SUPPORTED 1'b1
|
||||
`define GPIO_BASE 32'h10012000
|
||||
`define GPIO_RANGE 32'h000000FF
|
||||
`define GPIO_BASE 56'h10012000
|
||||
`define GPIO_RANGE 56'h000000FF
|
||||
`define UART_SUPPORTED 1'b1
|
||||
`define UART_BASE 32'h10000000
|
||||
`define UART_RANGE 32'h00000007
|
||||
`define UART_BASE 56'h10000000
|
||||
`define UART_RANGE 56'h00000007
|
||||
`define PLIC_SUPPORTED 1'b1
|
||||
`define PLIC_BASE 32'h0C000000
|
||||
`define PLIC_RANGE 32'h03FFFFFF
|
||||
`define PLIC_BASE 56'h0C000000
|
||||
`define PLIC_RANGE 56'h03FFFFFF
|
||||
|
||||
// Bus Interface width
|
||||
`define AHBW 64
|
||||
|
@ -1,3 +1,9 @@
|
||||
# Oftentimes this script runs so long you'll go to sleep.
|
||||
# But you don't want the script to die when your computer goes to sleep.
|
||||
# So consider invoking this with nohup (i.e. "nohup ./logAllBuildroot.sh")
|
||||
# You can run "tail -f nohup.out" to see what would've
|
||||
# outputted to the terminal if you didn't use nohup
|
||||
|
||||
# =========== Debug the Process ==========
|
||||
# Uncomment this version for GDB/QEMU debugging
|
||||
# - Opens up GDB interactively
|
||||
@ -15,6 +21,12 @@
|
||||
# - Logs parse_qemu.py's simulated gdb output to qemu_in_gdb_format.txt
|
||||
#cat qemu_output.txt | ./parse_qemu.py >qemu_in_gdb_format.txt
|
||||
#cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/"
|
||||
# Uncomment this version in case you just want to have qemu_in_gdb_format.txt around
|
||||
# It is often helpful for general debugging
|
||||
#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog
|
||||
|
||||
# Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection
|
||||
#split -d -l 5600000 qemu_in_gdb_format.txt --verbose
|
||||
|
||||
# Uncomment this version for parse_gdb_output.py debugging
|
||||
# - Uses qemu_in_gdb_format.txt
|
||||
@ -24,4 +36,4 @@
|
||||
# =========== Just Do the Thing ==========
|
||||
# Uncomment this version for the whole thing
|
||||
# - Logs info needed by buildroot testbench
|
||||
(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | pv -l | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog
|
||||
(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog
|
||||
|
@ -44,7 +44,7 @@ try:
|
||||
instrs += 1
|
||||
storeAMO = ''
|
||||
if instrs % 10000 == 0:
|
||||
print(instrs)
|
||||
print(instrs,flush=True)
|
||||
# Instr in human assembly
|
||||
wPC.write('{} ***\n'.format(' '.join(l.split(':')[1].split()[0:2])))
|
||||
if '\tld' in l or '\tlw' in l or '\tlh' in l or '\tlb' in l:
|
||||
@ -63,6 +63,15 @@ try:
|
||||
storeLoc = readLoc
|
||||
storeReg = l.split()[-1].split(',')[1]
|
||||
storeAMO = l.split()[-2]
|
||||
if '\tlr' in l:
|
||||
currentRead = l.split()[-1].split(',')[0]
|
||||
readOffset = "0"
|
||||
readLoc = l.split()[-1].split('(')[1][:-1]
|
||||
readType = "0" # *** I don't see that readType or lastReadType are ever used; we can probably get rid of them
|
||||
if '\tsc' in l:
|
||||
storeOffset = "0"
|
||||
storeLoc = l.split()[-1].split('(')[1][:-1]
|
||||
storeReg = l.split()[-1].split(',')[1]
|
||||
if '\tsd' in l or '\tsw' in l or '\tsh' in l or '\tsb' in l:
|
||||
s = l.split('#')[0].split()[-1]
|
||||
storeReg = s.split(',')[0]
|
||||
|
@ -40,13 +40,12 @@ def parseCSRs(l):
|
||||
val = int(l.split()[1],16)
|
||||
if inPageFault:
|
||||
# Not sure if these CSRs should be updated or not during page fault.
|
||||
#if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"):
|
||||
# # We do update some CSRs
|
||||
# CSRs[csr] = val
|
||||
#else:
|
||||
# # Others we preserve until changed later
|
||||
# pageFaultCSRs[csr] = val
|
||||
pageFaultCSRs[csr] = val
|
||||
if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"):
|
||||
# We do update some CSRs
|
||||
CSRs[csr] = val
|
||||
else:
|
||||
# Others we preserve until changed later
|
||||
pageFaultCSRs[csr] = val
|
||||
elif pageFaultCSRs and (csr in pageFaultCSRs):
|
||||
if (val != pageFaultCSRs[csr]):
|
||||
del pageFaultCSRs[csr]
|
||||
|
@ -26,12 +26,12 @@ configs = [
|
||||
TestCase(
|
||||
name="busybear",
|
||||
cmd="vsim -do wally-busybear-batch.do -c > {}",
|
||||
grepstr="# loaded 100000 instructions"
|
||||
grepstr="loaded 100000 instructions"
|
||||
),
|
||||
TestCase(
|
||||
name="buildroot",
|
||||
cmd="vsim -do wally-buildroot-batch.do -c > {}",
|
||||
grepstr="# loaded 2000000 instructions"
|
||||
grepstr="loaded 2500000 instructions"
|
||||
),
|
||||
TestCase(
|
||||
name="rv32ic",
|
||||
|
@ -36,5 +36,4 @@ vopt work_busybear.testbench -o workopt_busybear
|
||||
vsim workopt_busybear -suppress 8852,12070
|
||||
|
||||
run -all
|
||||
exec ./slack-notifier/slack-notifier.py
|
||||
quit
|
||||
|
@ -40,5 +40,4 @@ do ./wave-dos/linux-waves.do
|
||||
|
||||
#-- Run the Simulation
|
||||
run -all
|
||||
exec ./slack-notifier/slack-notifier.py
|
||||
##quit
|
||||
|
@ -4,6 +4,7 @@ view wave
|
||||
add wave -divider
|
||||
add wave /testbench/clk
|
||||
add wave /testbench/reset
|
||||
add wave -dec /testbench/instrs
|
||||
|
||||
add wave -divider Stalls_and_Flushes
|
||||
add wave /testbench/dut/hart/StallF
|
||||
@ -19,12 +20,13 @@ add wave /testbench/dut/hart/FlushW
|
||||
add wave -divider F
|
||||
add wave -hex /testbench/dut/hart/ifu/PCF
|
||||
add wave -divider D
|
||||
add wave -hex /testbench/pcExpected
|
||||
add wave -hex /testbench/PCDexpected
|
||||
add wave -hex /testbench/dut/hart/ifu/PCD
|
||||
add wave -hex /testbench/PCtextD
|
||||
add wave /testbench/InstrDName
|
||||
add wave -hex /testbench/dut/hart/ifu/InstrD
|
||||
add wave -hex /testbench/dut/hart/ieu/c/InstrValidD
|
||||
add wave -hex /testbench/PCDwrong
|
||||
add wave -divider E
|
||||
add wave -hex /testbench/dut/hart/ifu/PCE
|
||||
add wave -hex /testbench/PCtextE
|
||||
|
@ -45,8 +45,8 @@ assign FOpCtrlE = 3'b0;
|
||||
// down - 010
|
||||
// up - 011
|
||||
// nearest max mag - 100
|
||||
assign FrmE = 3'b010;
|
||||
assign FmtE = 1'b1;
|
||||
assign FrmE = 3'b011;
|
||||
assign FmtE = 1'b0;
|
||||
|
||||
|
||||
assign wnan = FmtE ? &FmaResultM[62:52] && |FmaResultM[51:0] : &FmaResultM[62:55] && |FmaResultM[54:32];
|
||||
@ -110,7 +110,6 @@ always @(posedge clk)
|
||||
if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $display( "ans=qutNaN ");
|
||||
if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $display( "ans=qutNaN ");
|
||||
errors = errors + 1;
|
||||
if (errors == 20)
|
||||
$stop;
|
||||
end
|
||||
if((FmtE==1'b0)&(FmaFlagsM != flags[4:0] || (!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {FInput1E[62:55],1'b1,FInput1E[53:0]})) || (ynan && (FmaResultM[62:0] == {FInput2E[62:55],1'b1,FInput2E[53:0]})) || (znan && (FmaResultM[62:0] == {FInput3E[62:55],1'b1,FInput3E[53:0]})) || (FmaResultM[62:0] == ans[62:0]))) ))) begin
|
||||
|
@ -1,3 +1,3 @@
|
||||
testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat
|
||||
testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rmax -seed 113355 -level 1 > testFloat
|
||||
tr -d ' ' < testFloat > testFloatNoSpace
|
||||
|
||||
|
@ -10,172 +10,124 @@ module fctrl (
|
||||
output logic FDivStartD,
|
||||
output logic [2:0] FResultSelD,
|
||||
output logic [3:0] FOpCtrlD,
|
||||
output logic [1:0] FResSelD,
|
||||
output logic [1:0] FIntResSelD,
|
||||
output logic FmtD,
|
||||
output logic [2:0] FrmD,
|
||||
output logic [1:0] FMemRWD,
|
||||
output logic FOutputInput2D,
|
||||
output logic FInput2UsedD, FInput3UsedD,
|
||||
output logic FWriteIntD);
|
||||
|
||||
|
||||
logic IllegalFPUInstr1D, IllegalFPUInstr2D;
|
||||
// *** fix rounding for dynamic rounding
|
||||
`define FCTRLW 15
|
||||
logic [`FCTRLW-1:0] ControlsD;
|
||||
// FPU Instruction Decoder
|
||||
always_comb
|
||||
case(OpD)
|
||||
// FWriteEn_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
|
||||
7'b0000111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw
|
||||
3'b011: ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b0100111: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_0_000_0010_00_00_0_0; // fsw
|
||||
3'b011: ControlsD = `FCTRLW'b0_0_000_0011_00_00_0_0; // fsd
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1000011: ControlsD = `FCTRLW'b1_0_001_0000_00_00_0_0; // fmadd
|
||||
7'b1000111: ControlsD = `FCTRLW'b1_0_001_0001_00_00_0_0; // fmsub
|
||||
7'b1001011: ControlsD = `FCTRLW'b1_0_001_0010_00_00_0_0; // fnmsub
|
||||
7'b1001111: ControlsD = `FCTRLW'b1_0_001_0011_00_00_0_0; // fnmadd
|
||||
7'b1010011: casez(Funct7D)
|
||||
7'b00000??: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fadd
|
||||
7'b00001??: ControlsD = `FCTRLW'b1_0_010_0001_00_00_0_0; // fsub
|
||||
7'b00010??: ControlsD = `FCTRLW'b1_0_001_0100_00_00_0_0; // fmul
|
||||
7'b00011??: ControlsD = `FCTRLW'b1_0_011_0000_00_00_1_0; // fdiv
|
||||
7'b01011??: ControlsD = `FCTRLW'b1_0_011_0001_00_00_1_0; // fsqrt
|
||||
7'b00100??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_100_0000_01_00_0_0; // fsgnj
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_100_0001_01_00_0_0; // fsgnjn
|
||||
3'b010: ControlsD = `FCTRLW'b1_0_100_0010_01_00_0_0; // fsgnjx
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b00101??: case(Funct3D)
|
||||
3'b000: ControlsD = `FCTRLW'b1_0_100_0111_10_00_0_0; // fmin
|
||||
3'b001: ControlsD = `FCTRLW'b1_0_100_0101_10_00_0_0; // fmax
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b10100??: case(Funct3D)
|
||||
3'b010: ControlsD = `FCTRLW'b0_1_100_0010_00_00_0_0; // feq
|
||||
3'b001: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // flt
|
||||
3'b000: ControlsD = `FCTRLW'b0_1_100_0011_00_00_0_0; // fle
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b11100??: if (Funct3D == 3'b001)
|
||||
ControlsD = `FCTRLW'b0_1_100_0000_00_10_0_0; // fclass
|
||||
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
|
||||
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
|
||||
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
7'b1100000: case(Rs2D[0])
|
||||
1'b0: ControlsD = `FCTRLW'b0_1_010_0110_00_00_0_0; // fcvt.s.w
|
||||
1'b1: ControlsD = `FCTRLW'b0_1_010_0101_00_00_0_0; // fcvt.s.wu
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1101000: case(Rs2D[0])
|
||||
1'b0: ControlsD = `FCTRLW'b1_1_010_0100_00_00_0_0; // fcvt.w.s
|
||||
1'b1: ControlsD = `FCTRLW'b1_1_010_0101_00_00_0_0; // fcvt.wu.s
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
|
||||
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0010_00_00_0_0; // fcvt.s.d
|
||||
7'b1100001: case(Rs2D[0])
|
||||
1'b0: ControlsD = `FCTRLW'b0_1_010_1110_00_00_0_0; // fcvt.d.w
|
||||
1'b1: ControlsD = `FCTRLW'b0_1_010_1111_00_00_0_0; // fcvt.d.wu
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1101001: case(Rs2D[0])
|
||||
1'b0: ControlsD = `FCTRLW'b1_0_010_1100_00_00_0_0; // fcvt.w.d
|
||||
1'b1: ControlsD = `FCTRLW'b1_0_010_1101_00_00_0_0; // fcvt.wu.d
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
|
||||
7'b0100001: ControlsD = `FCTRLW'b1_0_010_1000_00_00_0_0; // fcvt.d.s
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
|
||||
endcase
|
||||
// unswizzle control bits
|
||||
assign {FWriteEnD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
|
||||
|
||||
// if dynamic rounding, choose FRM_REGW
|
||||
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
|
||||
|
||||
//all subsequent logic is based on the table present
|
||||
//in Section 5 of Wally Architecture Specification
|
||||
|
||||
//write is enabled for all fp instruciton op codes
|
||||
//sans fp load
|
||||
logic isFP, isFPLD;
|
||||
always_comb begin
|
||||
//case statement is easier to modify
|
||||
//in case of errors
|
||||
case(OpD)
|
||||
//fp instructions sans load
|
||||
7'b1010011 : isFP = 1'b1;
|
||||
7'b1000011 : isFP = 1'b1;
|
||||
7'b1000111 : isFP = 1'b1;
|
||||
7'b1001011 : isFP = 1'b1;
|
||||
7'b1001111 : isFP = 1'b1;
|
||||
7'b0100111 : isFP = 1'b1;
|
||||
7'b0000111 : isFP = 1'b1;// KEP change 7'b1010011 to 7'b0000111
|
||||
default : isFP = 1'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
|
||||
//useful intermediary signals
|
||||
//
|
||||
//(mult only not supported in current datapath)
|
||||
//set third FMA operand to zero in this case
|
||||
//(or equivalent)
|
||||
|
||||
always_comb begin
|
||||
//checks all but FMA/store/load
|
||||
IllegalFPUInstr2D = 0;
|
||||
FDivStartD = 1'b0;
|
||||
if(OpD == 7'b1010011) begin
|
||||
casez(Funct7D)
|
||||
//compare
|
||||
7'b10100?? : FResultSelD = 3'b001;
|
||||
//div/sqrt
|
||||
7'b0?011?? : begin FResultSelD = 3'b000; FDivStartD = 1'b1; end
|
||||
//add/sub
|
||||
7'b0000??? : FResultSelD = 3'b100;
|
||||
//mult
|
||||
7'b00010?? : FResultSelD = 3'b010;
|
||||
//convert (not precision)
|
||||
7'b110?0?? : FResultSelD = 3'b100;
|
||||
//convert (precision)
|
||||
7'b010000? : FResultSelD = 3'b100;
|
||||
//Min/Max
|
||||
7'b00101?? : FResultSelD = 3'b001;
|
||||
//sign injection
|
||||
7'b00100?? : FResultSelD = 3'b011;
|
||||
//classify //only if funct3 = 001
|
||||
7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101;
|
||||
//output ReadData1
|
||||
else if (Funct7D[1] == 0) FResultSelD = 3'b111;
|
||||
//output SrcW
|
||||
7'b111100? : FResultSelD = 3'b110;
|
||||
default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end
|
||||
endcase
|
||||
end
|
||||
//FMA/store/load
|
||||
else begin
|
||||
case(OpD)
|
||||
//4 FMA instructions
|
||||
7'b1000011 : FResultSelD = 3'b010;
|
||||
7'b1000111 : FResultSelD = 3'b010;
|
||||
7'b1001011 : FResultSelD = 3'b010;
|
||||
7'b1001111 : FResultSelD = 3'b010;
|
||||
//store
|
||||
7'b0100111 : FResultSelD = 3'b111;
|
||||
//load
|
||||
7'b0000111 : FResultSelD = 3'b111;
|
||||
default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign FOutputInput2D = OpD == 7'b0100111;
|
||||
|
||||
assign FMemRWD[0] = FOutputInput2D;
|
||||
assign FMemRWD[1] = OpD == 7'b0000111;
|
||||
|
||||
|
||||
|
||||
//register is chosen based on operation performed
|
||||
//----
|
||||
//write selection is chosen in the same way as
|
||||
//register selection
|
||||
//
|
||||
|
||||
// reg/write sel logic and assignment
|
||||
//
|
||||
// 3'b000 = div/sqrt
|
||||
// 3'b001 = cmp
|
||||
// 3'b010 = fma/mult
|
||||
// 3'b011 = sgn inj
|
||||
// 3'b100 = add/sub/cnvt
|
||||
// 3'b101 = classify
|
||||
// 3'b110 = output SrcAW
|
||||
// 3'b111 = output ReadData1
|
||||
//
|
||||
//reg select
|
||||
|
||||
//this value is used enough to be shorthand
|
||||
|
||||
|
||||
//operation control for each fp operation
|
||||
//has to be expanded over standard to account for
|
||||
//integrated fpadd/cvt
|
||||
//
|
||||
//will integrate FMA opcodes into design later
|
||||
//
|
||||
//conversion instructions will
|
||||
//also need to be added later as I find the opcode
|
||||
//version I used for this repo
|
||||
|
||||
//let's do separate SOP for each type of operation
|
||||
// assign FOpCtrlD[3] = 1'b0;
|
||||
//
|
||||
//
|
||||
|
||||
|
||||
|
||||
always_comb begin
|
||||
IllegalFPUInstr1D = 0;
|
||||
FInput3UsedD = 0;
|
||||
case (FResultSelD)
|
||||
// div/sqrt
|
||||
// Precision
|
||||
// 0-single
|
||||
// 1-double
|
||||
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0];
|
||||
// div/sqrt
|
||||
// fdiv = ???0
|
||||
// fsqrt = ???1
|
||||
3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end
|
||||
// cmp
|
||||
|
||||
// cmp
|
||||
// fmin = ?111
|
||||
// fmax = ?101
|
||||
// feq = ?010
|
||||
// flt = ?001
|
||||
// fle = ?011
|
||||
// {?, is min or max, is eq or le, is lt or le}
|
||||
3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end
|
||||
//fma/mult
|
||||
|
||||
//fma/mult
|
||||
// fmadd = ?000
|
||||
// fmsub = ?001
|
||||
// fnmsub = ?010 -(a*b)+c
|
||||
// fnmadd = ?011 -(a*b)-c
|
||||
// fmul = ?100
|
||||
// {?, is mul, is negitive, is sub}
|
||||
3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end
|
||||
// sgn inj
|
||||
|
||||
// sgn inj
|
||||
// fsgnj = ??00
|
||||
// fsgnjn = ??01
|
||||
// fsgnjx = ??10
|
||||
3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end
|
||||
// add/sub/cnvt
|
||||
|
||||
// add/sub/cnvt
|
||||
// fadd = 0000
|
||||
// fsub = 0001
|
||||
// fcvt.w.s = 0100
|
||||
@ -188,35 +140,18 @@ module fctrl (
|
||||
// fcvt.d.w = 1110
|
||||
// fcvt.d.wu = 1111
|
||||
// fcvt.d.s = 1000
|
||||
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub
|
||||
3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), (Rs2D[0]&Funct7D[5])|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end
|
||||
// classify {?, ?, ?, ?}
|
||||
3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end
|
||||
// output SrcAW
|
||||
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub}
|
||||
|
||||
// fmv.w.x = ???0
|
||||
// fmv.w.d = ???1
|
||||
3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end
|
||||
// output Input1
|
||||
|
||||
// flw = ?000
|
||||
// fld = ?001
|
||||
// fsw = ?010 // output Input2
|
||||
// fsd = ?011 // output Input2
|
||||
// fsw = ?010
|
||||
// fsd = ?011
|
||||
// fmv.x.w = ?100
|
||||
// fmv.x.d = ?101
|
||||
// {?, is mv, is store, is double or fmv}
|
||||
3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end
|
||||
default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
//precision
|
||||
assign FmtD = (~&FResultSelD & Funct7D[0]) | (&FResultSelD & FOpCtrlD[0]);
|
||||
|
||||
assign IllegalFPUInstrD = IllegalFPUInstr1D | IllegalFPUInstr2D;
|
||||
//write to integer source if conv to int occurs
|
||||
//AND of Funct7 for int results
|
||||
// is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv
|
||||
assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]);
|
||||
// if not writting to int reg and not a store function and not move
|
||||
assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP;
|
||||
endmodule
|
||||
|
@ -1,111 +1,111 @@
|
||||
module fma1(
|
||||
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
|
||||
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
|
||||
output logic XInfE, YInfE, ZInfE, // inputs are infinity
|
||||
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtE, // precision 1 = double 0 = single
|
||||
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
|
||||
output logic [161:0] AlignedAddendE, // Z aligned for addition
|
||||
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
|
||||
output logic AddendStickyE, // sticky bit that is calculated during alignment
|
||||
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
|
||||
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
|
||||
output logic XInfE, YInfE, ZInfE, // inputs are infinity
|
||||
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
|
||||
|
||||
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
|
||||
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
|
||||
logic [12:0] XExp,YExp,ZExp; // input exponents
|
||||
logic XSgn,YSgn,ZSgn; // input signs
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [211:0] ZManShifted; // output of the alignment shifter including sticky bit
|
||||
logic [211:0] ZManPreShifted; // input to the alignment shifter
|
||||
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
|
||||
logic [63:0] Addend; // value to add (Z or zero)
|
||||
logic [12:0] Bias; // 1023 for double, 127 for single
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
|
||||
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
|
||||
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
|
||||
logic [12:0] XExp,YExp,ZExp; // input exponents
|
||||
logic XSgn,YSgn,ZSgn; // input signs
|
||||
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
|
||||
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
|
||||
logic [213:0] ZManPreShifted; // input to the alignment shifter
|
||||
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
|
||||
logic [63:0] Addend; // value to add (Z or zero)
|
||||
logic [12:0] Bias; // 1023 for double, 127 for single
|
||||
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
|
||||
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
|
||||
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
|
||||
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63];
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63];
|
||||
|
||||
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
|
||||
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
|
||||
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
|
||||
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
|
||||
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
|
||||
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
|
||||
|
||||
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
|
||||
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
|
||||
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
|
||||
|
||||
assign XMan = {~XExpZero, XFrac};
|
||||
assign YMan = {~YExpZero, YFrac};
|
||||
assign ZMan = {~ZExpZero, ZFrac};
|
||||
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
|
||||
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
|
||||
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
|
||||
|
||||
assign XMan = {~XExpZero, XFrac};
|
||||
assign YMan = {~YExpZero, YFrac};
|
||||
assign ZMan = {~ZExpZero, ZFrac};
|
||||
|
||||
assign Bias = FmtE ? 13'h3ff : 13'h7f;
|
||||
assign Bias = FmtE ? 13'h3ff : 13'h7f;
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// determine if an input is a special value
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// determine if an input is a special value
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign XExpZero = ~|XExp;
|
||||
assign YExpZero = ~|YExp;
|
||||
assign ZExpZero = ~|ZExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign YFracZero = ~|YFrac;
|
||||
assign ZFracZero = ~|ZFrac;
|
||||
assign XExpZero = ~|XExp;
|
||||
assign YExpZero = ~|YExp;
|
||||
assign ZExpZero = ~|ZExp;
|
||||
|
||||
assign XFracZero = ~|XFrac;
|
||||
assign YFracZero = ~|YFrac;
|
||||
assign ZFracZero = ~|ZFrac;
|
||||
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
|
||||
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
|
||||
|
||||
assign XNaNE = XExpMax & ~XFracZero;
|
||||
assign YNaNE = YExpMax & ~YFracZero;
|
||||
assign ZNaNE = ZExpMax & ~ZFracZero;
|
||||
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
|
||||
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
|
||||
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
|
||||
|
||||
assign XNaNE = XExpMax & ~XFracZero;
|
||||
assign YNaNE = YExpMax & ~YFracZero;
|
||||
assign ZNaNE = ZExpMax & ~ZFracZero;
|
||||
|
||||
assign XDenorm = XExpZero & ~XFracZero;
|
||||
assign YDenorm = YExpZero & ~YFracZero;
|
||||
assign ZDenorm = ZExpZero & ~ZFracZero;
|
||||
assign XDenorm = XExpZero & ~XFracZero;
|
||||
assign YDenorm = YExpZero & ~YFracZero;
|
||||
assign ZDenorm = ZExpZero & ~ZFracZero;
|
||||
|
||||
assign XInfE = XExpMax & XFracZero;
|
||||
assign YInfE = YExpMax & YFracZero;
|
||||
assign ZInfE = ZExpMax & ZFracZero;
|
||||
assign XInfE = XExpMax & XFracZero;
|
||||
assign YInfE = YExpMax & YFracZero;
|
||||
assign ZInfE = ZExpMax & ZFracZero;
|
||||
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
assign XZeroE = XExpZero & XFracZero;
|
||||
assign YZeroE = YExpZero & YFracZero;
|
||||
assign ZZeroE = ZExpZero & ZFracZero;
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// verilator lint_off WIDTH
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
XExp + YExp - Bias + XDenorm + YDenorm;
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Calculate the product
|
||||
// - When multipliying two fp numbers, add the exponents
|
||||
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one if there is a denormal number
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// verilator lint_off WIDTH
|
||||
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
|
||||
XExp + YExp - Bias + XDenorm + YDenorm;
|
||||
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = XMan * YMan;
|
||||
// Calculate the product's mantissa
|
||||
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
|
||||
assign ProdManE = XMan * YMan;
|
||||
|
||||
|
||||
|
||||
@ -114,72 +114,71 @@ module fma1(
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Alignment shifter
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
|
||||
// verilator lint_on WIDTH
|
||||
// determine the shift count for alignment
|
||||
// - negitive means Z is larger, so shift Z left
|
||||
// - positive means the product is larger, so shift Z right
|
||||
// - Denormal numbers have an an exponent value of 1, however they are
|
||||
// represented with an exponent of 0. add one to the exponent if it is a denormal number
|
||||
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
|
||||
// verilator lint_on WIDTH
|
||||
|
||||
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
// Defualt Addition without shifting
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// |1'b0| addnend |
|
||||
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
assign ZManPreShifted = {55'b0, ZMan, 104'b0};
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
|
||||
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
|
||||
always_comb
|
||||
begin
|
||||
|
||||
// If the product is too small to effect the sum, kill the product
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
ZManShifted = {107'b0, ZMan, 52'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
|
||||
KillProdE = 1;
|
||||
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
|
||||
AddendStickyE = ~(XZeroE|YZeroE);
|
||||
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
// If the Addend is shifted left (negitive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted << -AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[49:0]);
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted << -AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
// If the Addend is shifted right (positive AlignCnt)
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(13'd104)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted >> AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[49:0]);
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = ZManPreShifted >> AlignCnt;
|
||||
AddendStickyE = |(ZManShifted[51:0]);
|
||||
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
// If the addend is too small to effect the addition
|
||||
// - The addend has to shift two past the end of the addend to be considered too small
|
||||
// - The 2 extra bits are needed for rounding
|
||||
|
||||
// | 55'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = 0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
// | 54'b0 | 106'b(product) | 2'b0 |
|
||||
// | addnend |
|
||||
end else begin
|
||||
KillProdE = 0;
|
||||
ZManShifted = 0;
|
||||
AddendStickyE = ~ZZeroE;
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign AlignedAddendE = ZManShifted[211:50];
|
||||
|
||||
endmodule
|
||||
|
||||
assign AlignedAddendE = ZManShifted[213:52];
|
||||
|
||||
endmodule
|
@ -1,127 +1,131 @@
|
||||
|
||||
|
||||
module fma2(
|
||||
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtM, // precision 1 = double 0 = single
|
||||
input logic [105:0] ProdManM, // 1.X frac * 1.Y frac
|
||||
input logic [161:0] AlignedAddendM, // Z aligned for addition
|
||||
input logic [12:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
output logic [63:0] FmaResultM, // FMA final result
|
||||
output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
input logic [63:0] X, // X
|
||||
input logic [63:0] Y, // Y
|
||||
input logic [63:0] Z, // Z
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
|
||||
input logic FmtM, // precision 1 = double 0 = single
|
||||
input logic [105:0] ProdManM, // 1.X frac * 1.Y frac
|
||||
input logic [161:0] AlignedAddendM, // Z aligned for addition
|
||||
input logic [12:0] ProdExpM, // X exponent + Y exponent - bias
|
||||
input logic AddendStickyM, // sticky bit that is calculated during alignment
|
||||
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
|
||||
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
|
||||
input logic XInfM, YInfM, ZInfM, // inputs are infinity
|
||||
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
|
||||
output logic [63:0] FmaResultM, // FMA final result
|
||||
output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
|
||||
|
||||
|
||||
|
||||
logic [51:0] ResultFrac; // Result fraction
|
||||
logic [10:0] ResultExp; // Result exponent
|
||||
logic ResultSgn; // Result sign
|
||||
logic [10:0] ZExp; // input exponent
|
||||
logic XSgn, YSgn, ZSgn; // input sign
|
||||
logic PSgn; // product sign
|
||||
logic [105:0] ProdMan2; // product being added
|
||||
logic [162:0] AlignedAddend2; // possibly inverted aligned Z
|
||||
logic [161:0] Sum; // positive sum
|
||||
logic [162:0] PreSum; // possibly negitive sum
|
||||
logic [12:0] SumExp; // exponent of the normalized sum
|
||||
logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic [12:0] SumExpTmpMinus1; // SumExpTmp-1
|
||||
logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow
|
||||
logic [53:0] NormSum; // normalized sum
|
||||
logic [161:0] SumShifted; // sum shifted for normalization
|
||||
logic [8:0] NormCnt; // output of the leading zero detector
|
||||
logic NormSumSticky; // sticky bit calulated from the normalized sum
|
||||
logic SumZero; // is the sum zero
|
||||
logic NegSum; // is the sum negitive
|
||||
logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z)
|
||||
logic ResultDenorm; // is the result denormalized
|
||||
logic Sticky; // Sticky bit
|
||||
logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding
|
||||
logic Invalid,Underflow,Overflow,Inexact; // flags
|
||||
logic [8:0] DenormShift; // right shift if the result is denormalized
|
||||
logic SubBySmallNum; // was there supposed to be a subtraction by a small number
|
||||
logic [63:0] Addend; // value to add (Z or zero)
|
||||
logic ZeroSgn; // the result's sign if the sum is zero
|
||||
logic ResultSgnTmp; // the result's sign assuming the result is not zero
|
||||
logic Guard, Round, LSBNormSum; // bits needed to determine rounding
|
||||
logic [12:0] MaxExp; // maximum value of the exponent
|
||||
logic [12:0] FracLen; // length of the fraction
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency)
|
||||
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
|
||||
logic [51:0] ResultFrac; // Result fraction
|
||||
logic [10:0] ResultExp; // Result exponent
|
||||
logic ResultSgn; // Result sign
|
||||
logic [10:0] ZExp; // input exponent
|
||||
logic XSgn, YSgn, ZSgn; // input sign
|
||||
logic PSgn; // product sign
|
||||
logic [105:0] ProdMan2; // product being added
|
||||
logic [162:0] AlignedAddend2; // possibly inverted aligned Z
|
||||
logic [161:0] Sum; // positive sum
|
||||
logic [162:0] PreSum; // possibly negitive sum
|
||||
logic [12:0] SumExp; // exponent of the normalized sum
|
||||
logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic [12:0] SumExpTmpMinus1; // SumExpTmp-1
|
||||
logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow
|
||||
logic [54:0] NormSum; // normalized sum
|
||||
logic [161:0] SumShifted; // sum shifted for normalization
|
||||
logic [8:0] NormCnt; // output of the leading zero detector
|
||||
logic NormSumSticky; // sticky bit calulated from the normalized sum
|
||||
logic SumZero; // is the sum zero
|
||||
logic NegSum; // is the sum negitive
|
||||
logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z)
|
||||
logic ResultDenorm; // is the result denormalized
|
||||
logic Sticky; // Sticky bit
|
||||
logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding
|
||||
logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag)
|
||||
logic Invalid,Underflow,Overflow,Inexact; // flags
|
||||
logic [8:0] DenormShift; // right shift if the result is denormalized
|
||||
logic SubBySmallNum; // was there supposed to be a subtraction by a small number
|
||||
logic [63:0] Addend; // value to add (Z or zero)
|
||||
logic ZeroSgn; // the result's sign if the sum is zero
|
||||
logic ResultSgnTmp; // the result's sign assuming the result is not zero
|
||||
logic Guard, Round, LSBNormSum; // bits needed to determine rounding
|
||||
logic UfGuard, UfRound, UfLSBNormSum; // bits needed to determine rounding for underflow flag
|
||||
logic [12:0] MaxExp; // maximum value of the exponent
|
||||
logic [12:0] FracLen; // length of the fraction
|
||||
logic SigNaN; // is an input a signaling NaN
|
||||
logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency)
|
||||
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select input fields
|
||||
// The following logic duplicates fma1 because it's cheaper to recompute than provide registers
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select input fields
|
||||
// The following logic duplicates fma1 because it's cheaper to recompute than provide registers
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlM[2] ? 64'b0 : Z;
|
||||
// Set addend to zero if FMUL instruction
|
||||
assign Addend = FOpCtrlM[2] ? 64'b0 : Z;
|
||||
|
||||
// split inputs into the sign bit, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction
|
||||
// split inputs into the sign bit, and exponent to handle single or double precision
|
||||
// - single precision is in the top half of the inputs
|
||||
assign XSgn = X[63];
|
||||
assign YSgn = Y[63];
|
||||
assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction
|
||||
|
||||
assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]};
|
||||
assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]};
|
||||
|
||||
|
||||
|
||||
|
||||
// Calculate the product's sign
|
||||
// Negate product's sign if FNMADD or FNMSUB
|
||||
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
|
||||
// Calculate the product's sign
|
||||
// Negate product's sign if FNMADD or FNMSUB
|
||||
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Addition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Negate Z when doing one of the following opperations:
|
||||
// -prod + Z
|
||||
// prod - Z
|
||||
assign InvZ = ZSgn ^ PSgn;
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Addition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Negate Z when doing one of the following opperations:
|
||||
// -prod + Z
|
||||
// prod - Z
|
||||
assign InvZ = ZSgn ^ PSgn;
|
||||
|
||||
// Choose an inverted or non-inverted addend - the one is added later
|
||||
assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
|
||||
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
|
||||
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
|
||||
// Choose an inverted or non-inverted addend - the one is added later
|
||||
assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
|
||||
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
|
||||
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
|
||||
|
||||
// Do the addition
|
||||
// - add one to negate if the added was inverted
|
||||
// - the 2 extra bits at the begining and end are needed for rounding
|
||||
assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ};
|
||||
|
||||
// Is the sum negitive
|
||||
assign NegSum = PreSum[162];
|
||||
// If the sum is negitive, negate the sum.
|
||||
assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0];
|
||||
// Do the addition
|
||||
// - add one to negate if the added was inverted
|
||||
// - the 2 extra bits at the begining and end are needed for rounding
|
||||
assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ};
|
||||
|
||||
// Is the sum negitive
|
||||
assign NegSum = PreSum[162];
|
||||
// If the sum is negitive, negate the sum.
|
||||
assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Leading one detector
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Leading one detector
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//*** replace with non-behavoral code
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
|
||||
NormCnt = i+1; // compute shift count
|
||||
end
|
||||
//*** replace with non-behavoral code
|
||||
logic [8:0] i;
|
||||
always_comb begin
|
||||
i = 0;
|
||||
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
|
||||
NormCnt = i+1; // compute shift count
|
||||
end
|
||||
|
||||
|
||||
|
||||
@ -133,112 +137,127 @@ module fma2(
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Normalization
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|Sum);
|
||||
// Determine if the sum is zero
|
||||
assign SumZero = ~(|Sum);
|
||||
|
||||
// determine the length of the fraction based on precision
|
||||
assign FracLen = FmtM ? 13'd52 : 13'd23;
|
||||
// determine the length of the fraction based on precision
|
||||
assign FracLen = FmtM ? 13'd52 : 13'd23;
|
||||
|
||||
// Determine if the result is denormal
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
|
||||
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
|
||||
// Determine if the result is denormal
|
||||
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
|
||||
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
|
||||
|
||||
// Determine the shift needed for denormal results
|
||||
assign SumExpTmpMinus1 = SumExpTmp-1;
|
||||
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
|
||||
// Determine the shift needed for denormal results
|
||||
assign SumExpTmpMinus1 = SumExpTmp-1;
|
||||
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
|
||||
|
||||
// Normalize the sum
|
||||
assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
|
||||
assign NormSum = SumShifted[161:108];
|
||||
// Calculate the sticky bit
|
||||
assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]);
|
||||
assign Sticky = AddendStickyM | NormSumSticky;
|
||||
// Normalize the sum
|
||||
assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
|
||||
assign NormSum = SumShifted[161:107];
|
||||
// Calculate the sticky bit
|
||||
assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]);
|
||||
assign Sticky = AddendStickyM | NormSumSticky;
|
||||
|
||||
// Determine sum's exponent
|
||||
assign SumExp = SumZero ? 13'b0 :
|
||||
ResultDenorm ? 13'b0 :
|
||||
SumExpTmp;
|
||||
// Determine sum's exponent
|
||||
assign SumExp = SumZero ? 13'b0 :
|
||||
ResultDenorm ? 13'b0 :
|
||||
SumExpTmp;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// round to nearest even
|
||||
// {Guard, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// 110/111 - Plus1
|
||||
// round to nearest even
|
||||
// {Guard, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// 110/111 - Plus1
|
||||
|
||||
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to -infinity
|
||||
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
// round to -infinity
|
||||
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
|
||||
|
||||
// round to infinity
|
||||
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
|
||||
// round to infinity
|
||||
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
|
||||
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
|
||||
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// 110/111 - Plus1
|
||||
// round to nearest max magnitude
|
||||
// {Guard, Round, Sticky}
|
||||
// 0xx - do nothing
|
||||
// 100 - tie - Plus1
|
||||
// - don't add 1 if a small number was supposed to be subtracted
|
||||
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
|
||||
// 110/111 - Plus1
|
||||
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Guard = FmtM ? NormSum[1] : NormSum[30];
|
||||
assign Round = FmtM ? NormSum[0] : NormSum[29];
|
||||
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[31];
|
||||
// determine guard, round, and least significant bit of the result
|
||||
assign Guard = FmtM ? NormSum[2] : NormSum[31];
|
||||
assign Round = FmtM ? NormSum[1] : NormSum[30];
|
||||
assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32];
|
||||
|
||||
// Deterimine if a small number was supposed to be subtrated
|
||||
assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM;
|
||||
// used to determine underflow flag
|
||||
assign UfGuard = FmtM ? NormSum[1] : NormSum[30];
|
||||
assign UfRound = FmtM ? NormSum[0] : NormSum[29];
|
||||
assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31];
|
||||
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: CalcPlus1 = Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&LSBNormSum&~SubBySmallNum));//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up
|
||||
3'b100: CalcPlus1 = (Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&~SubBySmallNum)));//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: CalcMinus1 = 0;//round to nearest even
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up
|
||||
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
|
||||
default: CalcMinus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
// Deterimine if a small number was supposed to be subtrated
|
||||
assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM;
|
||||
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky | Guard | Round);
|
||||
assign Minus1 = CalcMinus1 & (Sticky | Guard | Round);
|
||||
always_comb begin
|
||||
// Determine if you add 1
|
||||
case (FrmM)
|
||||
3'b000: CalcPlus1 = Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&LSBNormSum&~SubBySmallNum));//round to nearest even
|
||||
3'b001: CalcPlus1 = 0;//round to zero
|
||||
3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down
|
||||
3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up
|
||||
3'b100: CalcPlus1 = (Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&~SubBySmallNum)));//round to nearest max magnitude
|
||||
default: CalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you add 1 (for underflow flag)
|
||||
case (FrmM)
|
||||
3'b000: UfCalcPlus1 = UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&UfLSBNormSum&~SubBySmallNum));//round to nearest even
|
||||
3'b001: UfCalcPlus1 = 0;//round to zero
|
||||
3'b010: UfCalcPlus1 = ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round down
|
||||
3'b011: UfCalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round up
|
||||
3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&~SubBySmallNum)));//round to nearest max magnitude
|
||||
default: UfCalcPlus1 = 1'bx;
|
||||
endcase
|
||||
// Determine if you subtract 1
|
||||
case (FrmM)
|
||||
3'b000: CalcMinus1 = 0;//round to nearest even
|
||||
3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
|
||||
3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down
|
||||
3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up
|
||||
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
|
||||
default: CalcMinus1 = 1'bx;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
// Compute rounded result
|
||||
logic [64:0] RoundAdd;
|
||||
logic [51:0] NormSumTruncated;
|
||||
assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} :
|
||||
Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
|
||||
assign NormSumTruncated = FmtM ? NormSum[53:2] : {NormSum[53:31], 29'b0};
|
||||
// If an answer is exact don't round
|
||||
assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
|
||||
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard | UfRound);
|
||||
assign Minus1 = CalcMinus1 & (Sticky | UfGuard | Guard | Round);
|
||||
|
||||
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
|
||||
// Compute rounded result
|
||||
logic [64:0] RoundAdd;
|
||||
logic [51:0] NormSumTruncated;
|
||||
assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} :
|
||||
Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
|
||||
assign NormSumTruncated = FmtM ? NormSum[54:3] : {NormSum[54:32], 29'b0};
|
||||
|
||||
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
|
||||
assign ResultExp = FullResultExp[10:0];
|
||||
|
||||
|
||||
@ -247,58 +266,57 @@ module fma2(
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
// if cancelation then 0 unless round to -infinity
|
||||
// otherwise psign
|
||||
assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn;
|
||||
// Determine the sign if the sum is zero
|
||||
// if cancelation then 0 unless round to -infinity
|
||||
// otherwise psign
|
||||
assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn;
|
||||
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
|
||||
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
|
||||
// is the result negitive
|
||||
// if p - z is the Sum negitive
|
||||
// if -p + z is the Sum positive
|
||||
// if -p - z then the Sum is negitive
|
||||
assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
|
||||
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) Inf - Inf (unless x or y is NaN)
|
||||
// 2) 0 * Inf
|
||||
// 3) any input is a signaling NaN
|
||||
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
|
||||
assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) :
|
||||
(XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]);
|
||||
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
// - Don't set the overflow flag if an overflowed result isn't outputed
|
||||
assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// Set Invalid flag for following cases:
|
||||
// 1) any input is a signaling NaN
|
||||
// 2) Inf - Inf (unless x or y is NaN)
|
||||
// 3) 0 * Inf
|
||||
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
|
||||
assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) :
|
||||
(XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]);
|
||||
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
|
||||
|
||||
// Set Overflow flag if the number is too big to be represented
|
||||
// - Don't set the overflow flag if an overflowed result isn't outputed
|
||||
assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
//assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1);
|
||||
assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1);
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed result isn't outputed
|
||||
assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// Set Underflow flag if the number is too small to be represented in normal numbers
|
||||
// - Don't set the underflow flag if the result is exact
|
||||
assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
assign UnderflowFlag = (FullResultExp[12] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
|
||||
// - Don't set the underflow flag if an underflowed result isn't outputed
|
||||
assign Inexact = (Sticky|UfGuard|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
|
||||
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
// - Don't set the underflow flag if the result was rounded up to a normal number
|
||||
assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
|
||||
// Combine flags
|
||||
// - FMA can't set the Divide by zero flag
|
||||
// - Don't set the underflow flag if the result was rounded up to a normal number
|
||||
assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
|
||||
|
||||
|
||||
|
||||
@ -306,31 +324,31 @@ module fma2(
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]};
|
||||
assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]};
|
||||
assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]};
|
||||
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} :
|
||||
{ResultSgn, 11'h7ff, 52'b0} :
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} :
|
||||
{ResultSgn, 8'hff, 55'b0};
|
||||
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
|
||||
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
|
||||
assign FmaResultM = XNaNM ? XNaNResult :
|
||||
YNaNM ? YNaNResult :
|
||||
ZNaNM ? ZNaNResult :
|
||||
Invalid ? InvalidResult : // has to be before inf
|
||||
XInfM ? {PSgn, X[62:0]} :
|
||||
YInfM ? {PSgn, Y[62:0]} :
|
||||
ZInfM ? {ZSgn, Addend[62:0]} :
|
||||
Overflow ? OverflowResult :
|
||||
KillProdM ? KillProdResult : // has to be after Underflow
|
||||
Underflow & ~ResultDenorm ? UnderflowResult :
|
||||
FmtM ? {ResultSgn, ResultExp, ResultFrac} :
|
||||
{ResultSgn, ResultExp[7:0], ResultFrac, 3'b0};
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Select the result
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]};
|
||||
assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]};
|
||||
assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]};
|
||||
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} :
|
||||
{ResultSgn, 11'h7ff, 52'b0} :
|
||||
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} :
|
||||
{ResultSgn, 8'hff, 55'b0};
|
||||
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
|
||||
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
|
||||
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
|
||||
assign FmaResultM = XNaNM ? XNaNResult :
|
||||
YNaNM ? YNaNResult :
|
||||
ZNaNM ? ZNaNResult :
|
||||
Invalid ? InvalidResult : // has to be before inf
|
||||
XInfM ? {PSgn, X[62:0]} :
|
||||
YInfM ? {PSgn, Y[62:0]} :
|
||||
ZInfM ? {ZSgn, Addend[62:0]} :
|
||||
Overflow ? OverflowResult :
|
||||
KillProdM ? KillProdResult : // has to be after Underflow
|
||||
Underflow & ~ResultDenorm ? UnderflowResult :
|
||||
FmtM ? {ResultSgn, ResultExp, ResultFrac} :
|
||||
{ResultSgn, ResultExp[7:0], ResultFrac, 3'b0};
|
||||
|
||||
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
///////////////////////////////////////////
|
||||
//
|
||||
// Written:
|
||||
// Modified:
|
||||
// Written: Katherine Parry, Bret Mathis
|
||||
// Modified: 6/23/2021
|
||||
//
|
||||
// Purpose: FPU
|
||||
//
|
||||
@ -25,23 +25,22 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fpu (
|
||||
input logic [2:0] FRM_REGW, // Rounding mode from CSR
|
||||
input logic reset,
|
||||
input logic clk,
|
||||
input logic reset,
|
||||
input logic [2:0] FRM_REGW, // Rounding mode from CSR
|
||||
input logic [31:0] InstrD,
|
||||
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
|
||||
input logic [`XLEN-1:0] SrcAE, // Integer input being processed
|
||||
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
|
||||
input logic StallE, StallM, StallW,
|
||||
input logic FlushE, FlushM, FlushW,
|
||||
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
|
||||
input logic RegWriteD, // register write enable from ieu
|
||||
output logic [4:0] SetFflagsM, // FPU flags
|
||||
output logic [1:0] FMemRWM, // Read/write enable for memory {read, write}
|
||||
output logic FStallD, // Stall the decode stage if Div/Sqrt instruction
|
||||
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
|
||||
output logic [`XLEN-1:0] FWriteDataM, // Data to be written to memory
|
||||
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
|
||||
output logic [`XLEN-1:0] FIntResM,
|
||||
output logic FDivBusyE, // Is the divison/sqrt unit busy
|
||||
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
|
||||
output logic [4:0] SetFflagsM, // FPU flags
|
||||
output logic [`XLEN-1:0] FPUResultW); // FPU result
|
||||
|
||||
// control logic signal instantiation
|
||||
@ -51,24 +50,27 @@ module fpu (
|
||||
logic FDivStartD, FDivStartE; // Start division
|
||||
logic FWriteIntD; // Write to integer register
|
||||
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
|
||||
logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory
|
||||
logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal
|
||||
logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal
|
||||
logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal
|
||||
logic FInput2UsedD; // Is input 2 used
|
||||
logic FInput3UsedD; // Is input 3 used
|
||||
logic [1:0] FMemRWD; // Read and write enable for memory
|
||||
logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal
|
||||
logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal
|
||||
logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal
|
||||
logic SrcYUsedD; // Is input 2 used
|
||||
logic SrcZUsedD; // Is input 3 used
|
||||
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
|
||||
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component
|
||||
logic SelLoadInputE, SelLoadInputM; // Select which adress to load when single precision
|
||||
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component
|
||||
logic [1:0] FResSelD, FResSelE, FResSelM;
|
||||
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
|
||||
logic [4:0] Adr1E, Adr2E, Adr3E;
|
||||
|
||||
// regfile signals //*** KEP lint warning - changed `XLEN-1 to 63
|
||||
// regfile signals
|
||||
logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining
|
||||
logic [63:0] FWDM; // Write data for FP register
|
||||
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
|
||||
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
|
||||
logic [63:0] FInput1E, FInput1M, FInput1W, FInput1tmpE; // Input 1 to the various units (after forwarding)
|
||||
logic [63:0] FInput2E, FInput2M; // Input 2 to the various units (after forwarding)
|
||||
logic [63:0] FInput3E, FInput3M; // Input 3 to the various units (after forwarding)
|
||||
logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding)
|
||||
logic [`XLEN-1:0] SrcXMAligned;
|
||||
logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding)
|
||||
logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding)
|
||||
logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
|
||||
|
||||
// div/sqrt signals
|
||||
@ -123,19 +125,14 @@ module fpu (
|
||||
logic [4:0] FAddFlagsM, FAddFlagsW;
|
||||
|
||||
// cmp signals
|
||||
logic [7:0] WE, WM;
|
||||
logic [7:0] XE, XM;
|
||||
logic ANaNE, ANaNM;
|
||||
logic BNaNE, BNaNM;
|
||||
logic AzeroE, AzeroM;
|
||||
logic BzeroE, BzeroM;
|
||||
logic CmpInvalidM, CmpInvalidW;
|
||||
logic [1:0] CmpFCCM, CmpFCCW;
|
||||
logic [63:0] FCmpResultM, FCmpResultW;
|
||||
logic CmpInvalidE, CmpInvalidM, CmpInvalidW;
|
||||
logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW;
|
||||
|
||||
// fsgn signals
|
||||
logic [63:0] SgnResultE, SgnResultM, SgnResultW;
|
||||
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
|
||||
logic [63:0] FResM, FResW;
|
||||
logic FFlgM, FFlgW;
|
||||
|
||||
// instantiation of W stage regfile signals
|
||||
logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW;
|
||||
@ -147,31 +144,9 @@ module fpu (
|
||||
logic [63:0] FPUResult64W, FPUResult64E;
|
||||
logic [4:0] FPUFlagsW;
|
||||
|
||||
// pipeline control logic
|
||||
logic PipeEnableDE;
|
||||
logic PipeEnableEM;
|
||||
logic PipeEnableMW;
|
||||
logic PipeClearDE;
|
||||
logic PipeClearEM;
|
||||
logic PipeClearMW;
|
||||
|
||||
// temporarily assign pipe clear and enable signals
|
||||
// to never flush & always be running
|
||||
localparam PipeClear = 1'b0;
|
||||
localparam PipeEnable = 1'b1;
|
||||
always_comb begin
|
||||
PipeEnableDE = ~StallE;
|
||||
PipeEnableEM = ~StallM;
|
||||
PipeEnableMW = ~StallW;
|
||||
PipeClearDE = FlushE;
|
||||
PipeClearEM = FlushM;
|
||||
PipeClearMW = FlushW;
|
||||
end
|
||||
|
||||
//DECODE STAGE
|
||||
|
||||
// Hazard unit for FPU
|
||||
fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
|
||||
|
||||
// top-level controller for FPU
|
||||
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
|
||||
@ -185,40 +160,33 @@ module fpu (
|
||||
//*****************
|
||||
// fpregfile D/E pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E);
|
||||
flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E);
|
||||
flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E);
|
||||
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
|
||||
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
|
||||
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
|
||||
|
||||
//*****************
|
||||
// other D/E pipe registers
|
||||
//*****************
|
||||
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE);
|
||||
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
|
||||
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
|
||||
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
|
||||
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
|
||||
flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE);
|
||||
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
|
||||
flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
|
||||
flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
|
||||
flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
|
||||
flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
|
||||
flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
|
||||
flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
|
||||
flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
|
||||
flopenrc #(1) DEReg18(clk, reset, PipeClearDE, PipeEnableDE, InstrD[15], SelLoadInputE);
|
||||
|
||||
flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
|
||||
flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
|
||||
{Adr1E, Adr2E, Adr3E});
|
||||
flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE,
|
||||
{FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD},
|
||||
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE});
|
||||
|
||||
//EXECUTION STAGE
|
||||
|
||||
// input muxs for forwarding
|
||||
mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM);
|
||||
mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, FForwardInput1E, FInput1tmpE);
|
||||
mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E);
|
||||
mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
|
||||
mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
|
||||
// Hazard unit for FPU
|
||||
fpuhazard hazard(.*);
|
||||
|
||||
// forwarding muxs
|
||||
mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
|
||||
mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE);
|
||||
mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE);
|
||||
|
||||
|
||||
// first of two-stage instance of floating-point fused multiply-add unit
|
||||
fma1 fma1 (.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .FOpCtrlE(FOpCtrlE[2:0]),.*);
|
||||
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*);
|
||||
|
||||
// first and only instance of floating-point divider
|
||||
logic fpdivClk;
|
||||
@ -229,193 +197,181 @@ module fpu (
|
||||
.ECLK(fpdivClk));
|
||||
|
||||
// capture the inputs for div/sqrt
|
||||
flopenrc #(64) reg_input1 (.d(FInput1E), .q(DivInput1E),
|
||||
flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E),
|
||||
.en(~HoldInputs), .clear(FDivSqrtDoneE),
|
||||
.reset(reset), .clk(clk));
|
||||
flopenrc #(64) reg_input2 (.d(FInput2E), .q(DivInput2E),
|
||||
flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E),
|
||||
.en(~HoldInputs), .clear(FDivSqrtDoneE),
|
||||
.reset(reset), .clk(clk));
|
||||
|
||||
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*);
|
||||
|
||||
|
||||
|
||||
// first of two-stage instance of floating-point add/cvt unit
|
||||
fpuaddcvt1 fpadd1 (.*);
|
||||
|
||||
// first of two-stage instance of floating-point comparator
|
||||
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]);
|
||||
fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE);
|
||||
|
||||
// first and only instance of floating-point sign converter
|
||||
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
|
||||
|
||||
// first and only instance of floating-point classify unit
|
||||
fpuclassify fpuclass (.*);
|
||||
|
||||
// output for store instructions
|
||||
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
|
||||
|
||||
//*****************
|
||||
//fpregfile D/E pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M);
|
||||
flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M);
|
||||
flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);
|
||||
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM);
|
||||
flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
|
||||
flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
|
||||
|
||||
//*****************
|
||||
// fma E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, ProdExpE, ProdExpM);
|
||||
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, AddendStickyE, AddendStickyM);
|
||||
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, KillProdE, KillProdM);
|
||||
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, XZeroE, XZeroM);
|
||||
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, YZeroE, YZeroM);
|
||||
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, ZZeroE, ZZeroM);
|
||||
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, XInfE, XInfM);
|
||||
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, YInfE, YInfM);
|
||||
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, ZInfE, ZInfM);
|
||||
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, XNaNE, XNaNM);
|
||||
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, YNaNE, YNaNM);
|
||||
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, ZNaNE, ZNaNM);
|
||||
flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
|
||||
flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
|
||||
flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
|
||||
flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM);
|
||||
flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM);
|
||||
flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM);
|
||||
flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM);
|
||||
flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM);
|
||||
flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM);
|
||||
flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM);
|
||||
flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM);
|
||||
flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM);
|
||||
flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM);
|
||||
flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM);
|
||||
|
||||
//*****************
|
||||
// fpadd E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM);
|
||||
flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM);
|
||||
flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM);
|
||||
flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM);
|
||||
flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM);
|
||||
flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM);
|
||||
flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM);
|
||||
flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM);
|
||||
flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM);
|
||||
flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM);
|
||||
flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM);
|
||||
flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM);
|
||||
flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM);
|
||||
flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM);
|
||||
flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM);
|
||||
flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M);
|
||||
flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M);
|
||||
flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM);
|
||||
flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM);
|
||||
flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM);
|
||||
flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM);
|
||||
flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM);
|
||||
flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM);
|
||||
flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM);
|
||||
flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM);
|
||||
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
|
||||
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
|
||||
flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM);
|
||||
flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
|
||||
flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM);
|
||||
flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM);
|
||||
flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM);
|
||||
flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM);
|
||||
flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM);
|
||||
flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM);
|
||||
flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM);
|
||||
flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM);
|
||||
flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM);
|
||||
flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM);
|
||||
flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM);
|
||||
flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
|
||||
flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
|
||||
flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
|
||||
flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
|
||||
flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
|
||||
flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM);
|
||||
flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM);
|
||||
flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM);
|
||||
flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM);
|
||||
flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM);
|
||||
|
||||
//*****************
|
||||
// fpcmp E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM);
|
||||
flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM);
|
||||
flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM);
|
||||
flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM);
|
||||
flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM);
|
||||
flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM);
|
||||
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM);
|
||||
flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM);
|
||||
|
||||
// put this in for the event we want to delay fsgn - will otherwise bypass
|
||||
//*****************
|
||||
// fpsgn E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
|
||||
flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);
|
||||
flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM);
|
||||
flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM);
|
||||
|
||||
//*****************
|
||||
// other E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM);
|
||||
flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
|
||||
flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
|
||||
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
|
||||
flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
|
||||
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
|
||||
flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
|
||||
flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);
|
||||
flopenrc #(1) EMReg9(clk, reset, PipeClearEM, PipeEnableEM, SelLoadInputE, SelLoadInputM);
|
||||
flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
|
||||
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
|
||||
{FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
|
||||
|
||||
//*****************
|
||||
// fpuclassify E/M pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM);
|
||||
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM);
|
||||
|
||||
//BEGIN MEMORY STAGE
|
||||
|
||||
assign FWriteDataM = FmtM ? FInput1M[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FInput1M[63:32]};
|
||||
//adjecent adress values are sent to the FPU, select the correct one
|
||||
// -imm is 80000 most of the time vs the error one which is 00000
|
||||
// mux3 #(64) FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM);
|
||||
// mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
|
||||
|
||||
fma2 fma2(.X(FInput1M), .Y(FInput2M), .Z(FInput3M), .FOpCtrlM(FOpCtrlM[2:0]), .*);
|
||||
mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM);
|
||||
assign FFlgM = CmpInvalidM & FResSelM[1];
|
||||
|
||||
assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
|
||||
mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM);
|
||||
|
||||
// second instance of two-stage FMA unit
|
||||
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*);
|
||||
|
||||
// second instance of two-stage floating-point add/cvt unit
|
||||
fpuaddcvt2 fpadd2 (.*);
|
||||
|
||||
// second instance of two-stage floating-point comparator
|
||||
fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM),
|
||||
.Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*);
|
||||
|
||||
// Align SrcA to MSB when single precicion
|
||||
mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//*****************
|
||||
//fpregfile M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWFpReg1(clk, reset, PipeClearMW, PipeEnableMW, FInput1M, FInput1W);
|
||||
flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW);
|
||||
flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW);
|
||||
|
||||
//*****************
|
||||
// fma M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW);
|
||||
flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW);
|
||||
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW);
|
||||
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW);
|
||||
|
||||
//*****************
|
||||
// fpdiv M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW);
|
||||
flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
|
||||
flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW);
|
||||
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
|
||||
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW);
|
||||
flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW);
|
||||
|
||||
//*****************
|
||||
// fpadd M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW);
|
||||
flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW);
|
||||
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW);
|
||||
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW);
|
||||
|
||||
//*****************
|
||||
// fpcmp M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW);
|
||||
flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW);
|
||||
flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW);
|
||||
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW);
|
||||
// flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW);
|
||||
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW);
|
||||
|
||||
//*****************
|
||||
// fpsgn M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW);
|
||||
flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW);
|
||||
flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW);
|
||||
flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW);
|
||||
|
||||
//*****************
|
||||
// other M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
|
||||
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
|
||||
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
|
||||
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
|
||||
flopenrc #(64) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, AlignedSrcAM, SrcAW);
|
||||
// flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
|
||||
flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
|
||||
flopenrc #(4) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FOpCtrlM, FOpCtrlW);
|
||||
flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
|
||||
{FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM},
|
||||
{FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW});
|
||||
|
||||
//*****************
|
||||
// fpuclassify M/W pipe registers
|
||||
//*****************
|
||||
flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW);
|
||||
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW);
|
||||
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
|
||||
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
|
||||
|
||||
|
||||
|
||||
@ -424,14 +380,6 @@ module fpu (
|
||||
//#########################################
|
||||
// BEGIN WRITEBACK STAGE
|
||||
//#########################################
|
||||
|
||||
|
||||
// mux3 #(64) FLoadResultMux({ReadD[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM);
|
||||
// mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
|
||||
//***RV32D needs to give two bus transactions
|
||||
mux2 #(64) FLoadResultMux({ReadDataW[31:0], {32{1'b0}}}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, FLoadResultW);
|
||||
mux2 #(64) FLoadStoreResultMux(FLoadResultW, FInput1W, |FOpCtrlW[2:1], FLoadStoreResultW);
|
||||
|
||||
|
||||
|
||||
|
||||
@ -440,47 +388,26 @@ module fpu (
|
||||
|
||||
always_comb begin
|
||||
case (FResultSelW)
|
||||
// div/sqrt
|
||||
3'b000 : FPUFlagsW = FDivFlagsW;
|
||||
// cmp
|
||||
3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
|
||||
//fma/mult
|
||||
3'b010 : FPUFlagsW = FmaFlagsW;
|
||||
// sgn inj
|
||||
3'b011 : FPUFlagsW = SgnFlagsW;
|
||||
// add/sub/cnvt
|
||||
3'b100 : FPUFlagsW = FAddFlagsW;
|
||||
// classify
|
||||
3'b101 : FPUFlagsW = 5'b0;
|
||||
// output SrcAW
|
||||
3'b110 : FPUFlagsW = 5'b0;
|
||||
// output FRD1
|
||||
3'b111 : FPUFlagsW = 5'b0;
|
||||
3'b000 : FPUFlagsW = 5'b0;
|
||||
3'b001 : FPUFlagsW = FmaFlagsW;
|
||||
3'b010 : FPUFlagsW = FAddFlagsW;
|
||||
3'b011 : FPUFlagsW = FDivFlagsW;
|
||||
3'b100 : FPUFlagsW = {4'b0,FFlgW};
|
||||
default : FPUFlagsW = 5'bxxxxx;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
always_comb begin
|
||||
case (FResultSelW)
|
||||
// div/sqrt
|
||||
3'b000 : FPUResult64W = FDivResultW;
|
||||
// cmp
|
||||
3'b001 : FPUResult64W = FCmpResultW;
|
||||
//fma/mult
|
||||
3'b010 : FPUResult64W = FmaResultW;
|
||||
// sgn inj
|
||||
3'b011 : FPUResult64W = SgnResultW;
|
||||
// add/sub/cnvt
|
||||
3'b100 : FPUResult64W = FAddResultW;
|
||||
// classify
|
||||
3'b101 : FPUResult64W = ClassResultW;
|
||||
// output SrcAW
|
||||
3'b110 : FPUResult64W = SrcAW;
|
||||
// Load/Store/Move to FP-register
|
||||
3'b111 : FPUResult64W = FLoadStoreResultW;
|
||||
default : FPUResult64W = {64{1'bx}};
|
||||
3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
|
||||
3'b001 : FPUResult64W = FmaResultW;
|
||||
3'b010 : FPUResult64W = FAddResultW;
|
||||
3'b011 : FPUResult64W = FDivResultW;
|
||||
3'b100 : FPUResult64W = FResW;
|
||||
default : FPUResult64W = 64'bxxxxx;
|
||||
endcase
|
||||
end // always_comb
|
||||
end
|
||||
|
||||
|
||||
// interface between XLEN size datapath and double-precision sized
|
||||
// floating-point results
|
||||
|
@ -27,10 +27,10 @@
|
||||
//
|
||||
|
||||
|
||||
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE);
|
||||
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE);
|
||||
|
||||
input logic [63:0] FInput1E; // 1st input operand (A)
|
||||
input logic [63:0] FInput2E; // 2nd input operand (B)
|
||||
input logic [63:0] SrcXE; // 1st input operand (A)
|
||||
input logic [63:0] SrcYE; // 2nd input operand (B)
|
||||
input logic [3:0] FOpCtrlE; // Function opcode
|
||||
input logic FmtE; // Result Precision (1 for double, 0 for single)
|
||||
|
||||
@ -81,12 +81,12 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
|
||||
// and the sign of the first operand is set appropratiately based on
|
||||
// if the operation is absolute value or negation.
|
||||
|
||||
convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P);
|
||||
convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P);
|
||||
|
||||
// Test for exceptions and return the "Invalid Operation" and
|
||||
// "Denormalized" Input Flags. The "AddSelInvE" is used in
|
||||
// the third pipeline stage to select the result. Also, AddOp1NormE
|
||||
// and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized.
|
||||
// and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized.
|
||||
// sub is one if the effective operation is subtaction.
|
||||
|
||||
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
|
||||
@ -159,8 +159,8 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
|
||||
|
||||
// Place either the sign-extened 32-bit value or the original 64-bit value
|
||||
// into IntValue (to be used for integer to floating point conversion)
|
||||
assign IntValue [31:0] = FInput1E[31:0];
|
||||
assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32];
|
||||
assign IntValue [31:0] = SrcXE[31:0];
|
||||
assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32];
|
||||
|
||||
// If doing an integer to floating point conversion, mantissaA3 is set to
|
||||
// IntVal and the prenomalized exponent is set to 1084. Otherwise,
|
||||
|
@ -1,7 +1,8 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fpuclassify (
|
||||
input logic [63:0] FInput1E,
|
||||
input logic [63:0] SrcXE,
|
||||
input logic FmtE, // 0-single 1-double
|
||||
output logic [63:0] ClassResultE
|
||||
);
|
||||
@ -13,9 +14,9 @@ module fpuclassify (
|
||||
logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan;
|
||||
|
||||
// single and double precision layouts
|
||||
assign single = FInput1E[63:32];
|
||||
assign double = FInput1E;
|
||||
assign sign = FInput1E[63];
|
||||
assign single = SrcXE[63:32];
|
||||
assign double = SrcXE;
|
||||
assign sign = SrcXE[63];
|
||||
|
||||
// basic calculations for readabillity
|
||||
assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23];
|
||||
@ -43,10 +44,7 @@ module fpuclassify (
|
||||
// bit 7 - +infinity
|
||||
// bit 8 - signaling NaN
|
||||
// bit 9 - quiet NaN
|
||||
assign ClassResultE = FmtE ? {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
|
||||
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity} :
|
||||
{{22{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
|
||||
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity, {32{1'b0}}};
|
||||
|
||||
assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
|
||||
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity};
|
||||
|
||||
endmodule
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
//
|
||||
// File name : fpcomp.v
|
||||
// Title : Floating-Point Comparator
|
||||
@ -17,9 +18,9 @@
|
||||
// and correct for sign bits
|
||||
//
|
||||
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
|
||||
// signals, and a 2-bit signal Sel that indicates the type of
|
||||
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// Sel Description
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
@ -37,24 +38,41 @@
|
||||
// It also produces an invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN per 754
|
||||
|
||||
module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec
|
||||
|
||||
input logic [63:0] op1;
|
||||
input logic [63:0] op2;
|
||||
input logic [1:0] Sel;
|
||||
`include "wally-config.vh"
|
||||
module fpucmp1 (
|
||||
input logic [63:0] op1,
|
||||
input logic [63:0] op2,
|
||||
input logic [2:0] FOpCtrlE,
|
||||
input logic FmtE,
|
||||
|
||||
output logic [7:0] w, x;
|
||||
output logic ANaN, BNaN;
|
||||
output logic Azero, Bzero;
|
||||
|
||||
output logic Invalid, // Invalid Operation
|
||||
// output logic [1:0] FCC, // Condition Codes
|
||||
output logic [63:0] FCmpResultE);
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
// be determined from these values.
|
||||
logic [1:0] FCC; // Condition Codes
|
||||
logic [7:0] w, x;
|
||||
logic ANaN, BNaN;
|
||||
logic Azero, Bzero;
|
||||
logic LT; // magnitude op1 < magnitude op2
|
||||
logic EQ; // magnitude op1 = magnitude op2
|
||||
|
||||
magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
|
||||
|
||||
// Determine final values based on output of magnitude comparison,
|
||||
// sign bits, and special case testing.
|
||||
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE);
|
||||
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
// be determined from these values.
|
||||
magcompare64b_1 magcomp2 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
|
||||
magcompare64b_2 magcomp2 (LT, EQ, w, x);
|
||||
|
||||
// Determine final values based on output of magnitude comparison,
|
||||
// sign bits, and special case testing.
|
||||
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, Sel);
|
||||
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*);
|
||||
|
||||
endmodule // fpcomp
|
||||
|
||||
@ -178,9 +196,9 @@ module magcompare64b_1 (w, x, A, B);
|
||||
endmodule // magcompare64b
|
||||
|
||||
// This module takes 64-bits inputs A and B, two magnitude comparison
|
||||
// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
|
||||
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// Sel Description
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
@ -196,11 +214,11 @@ endmodule // magcompare64b
|
||||
// It also produces a invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN.
|
||||
|
||||
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
|
||||
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE);
|
||||
|
||||
input logic [63:0] A;
|
||||
input logic [63:0] B;
|
||||
input logic [1:0] Sel;
|
||||
input logic [2:0] FOpCtrlE;
|
||||
|
||||
logic dp, sp, hp;
|
||||
|
||||
@ -209,9 +227,9 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
|
||||
output logic Azero;
|
||||
output logic Bzero;
|
||||
|
||||
assign dp = !Sel[1]&!Sel[0];
|
||||
assign sp = !Sel[1]&Sel[0];
|
||||
assign hp = Sel[1]&!Sel[0];
|
||||
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
|
||||
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
|
||||
// Test if A or B is NaN.
|
||||
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
|
||||
@ -232,3 +250,216 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
|
||||
assign Bzero = (B[62:0] == 63'h0);
|
||||
|
||||
endmodule // exception_cmp
|
||||
//
|
||||
// File name : fpcomp.v
|
||||
// Title : Floating-Point Comparator
|
||||
// project : FPU
|
||||
// Library : fpcomp
|
||||
// Author(s) : James E. Stine
|
||||
// Purpose : definition of main unit to floating-point comparator
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
//
|
||||
// Floating Point Comparator (Algorithm)
|
||||
//
|
||||
// 1.) Performs sign-extension if the inputs are 32-bit integers.
|
||||
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
|
||||
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
|
||||
// and correct for sign bits
|
||||
//
|
||||
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
|
||||
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 (unused)
|
||||
//
|
||||
// The comparator produces a 2-bit signal FCC, which
|
||||
// indicates the result of the comparison:
|
||||
//
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
//
|
||||
// It also produces an invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN per 754
|
||||
|
||||
|
||||
/*module magcompare2b (LT, GT, A, B);
|
||||
|
||||
input logic [1:0] A;
|
||||
input logic [1:0] B;
|
||||
|
||||
output logic LT;
|
||||
output logic GT;
|
||||
|
||||
// Determine if A < B using a minimized sum-of-products expression
|
||||
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// Determine if A > B using a minimized sum-of-products expression
|
||||
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
endmodule*/ // magcompare2b
|
||||
|
||||
// 2-bit magnitude comparator
|
||||
// This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
|
||||
// this version actually incorporates don't cares into the equation to
|
||||
// simplify the optimization
|
||||
|
||||
// module magcompare2c (LT, GT, A, B);
|
||||
|
||||
// input logic [1:0] A;
|
||||
// input logic [1:0] B;
|
||||
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
|
||||
// assign LT = B[1] | (!A[1]&B[0]);
|
||||
// assign GT = A[1] | (!B[1]&A[0]);
|
||||
|
||||
// endmodule // magcompare2b
|
||||
|
||||
// This module compares two 64-bit values A and B. LT is '1' if A < B
|
||||
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
|
||||
// This structure was modified so
|
||||
// that it only does a strict magnitdude comparison, and only
|
||||
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
|
||||
// of 63 2-bit magnitude comparators, followed by one OR gates.
|
||||
//
|
||||
// J. E. Stine and M. J. Schulte, "A combined two's complement and
|
||||
// floating-point comparator," 2005 IEEE International Symposium on
|
||||
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
|
||||
// doi: 10.1109/ISCAS.2005.1464531
|
||||
|
||||
module magcompare64b_2 (LT, EQ, w, x);
|
||||
|
||||
input logic [7:0] w;
|
||||
input logic [7:0] x;
|
||||
logic [3:0] y;
|
||||
logic [3:0] z;
|
||||
logic [1:0] a;
|
||||
logic [1:0] b;
|
||||
logic GT;
|
||||
|
||||
output logic LT;
|
||||
output logic EQ;
|
||||
|
||||
magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
|
||||
magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
|
||||
magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
|
||||
magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
|
||||
|
||||
magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
|
||||
magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
|
||||
|
||||
magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
|
||||
|
||||
assign EQ = ~(LT | GT);
|
||||
|
||||
endmodule // magcompare64b
|
||||
|
||||
// This module takes 64-bits inputs A and B, two magnitude comparison
|
||||
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// FOpCtrlE Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 bfloat precision numbers
|
||||
//
|
||||
// The comparator produces a 2-bit signal fcc, which
|
||||
// indicates the result of the comparison as follows:
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// It also produces a invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN.
|
||||
|
||||
module exception_cmp_2 (
|
||||
input logic [63:0] A,
|
||||
input logic [63:0] B,
|
||||
input logic FmtE,
|
||||
input logic LT_mag,
|
||||
input logic EQ_mag,
|
||||
input logic [2:0] FOpCtrlE,
|
||||
|
||||
output logic invalid,
|
||||
output logic [1:0] fcc,
|
||||
output logic [63:0] FCmpResultE,
|
||||
|
||||
input logic Azero,
|
||||
input logic Bzero,
|
||||
input logic ANaN,
|
||||
input logic BNaN);
|
||||
|
||||
logic dp;
|
||||
logic sp;
|
||||
logic hp;
|
||||
logic ASNaN;
|
||||
logic BSNaN;
|
||||
logic UO;
|
||||
logic GT;
|
||||
logic LT;
|
||||
logic EQ;
|
||||
logic [62:0] sixtythreezeros = 63'h0;
|
||||
|
||||
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
|
||||
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
|
||||
|
||||
// Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
|
||||
// point comparison is being performed.
|
||||
assign UO = (ANaN | BNaN);
|
||||
|
||||
// Test if A or B is a signaling NaN.
|
||||
assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
|
||||
assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
|
||||
|
||||
// If either A or B is a signaling NaN the "Invalid Operation"
|
||||
// exception flag is set to one; otherwise it is zero.
|
||||
assign invalid = (ASNaN | BSNaN);
|
||||
|
||||
// A and B are equal if (their magnitudes are equal) AND ((their signs are
|
||||
// equal) or (their magnitudes are zero AND they are floating point
|
||||
// numbers)). Also, A and B are not equal if they are unordered.
|
||||
assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
|
||||
|
||||
// A is less than B if (A is negative and B is posiive) OR
|
||||
// (A and B are positive and the magnitude of A is less than
|
||||
// the magnitude of B) or (A and B are negative integers and
|
||||
// the magnitude of A is less than the magnitude of B) or
|
||||
// (A and B are negative floating point numbers and
|
||||
// the magnitude of A is greater than the magnitude of B).
|
||||
// Also, A is not less than B if A and B are equal or unordered.
|
||||
assign LT = ((~LT_mag & A[63] & B[63]) |
|
||||
(LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
|
||||
|
||||
// A is greater than B when LT, EQ, and UO are are false.
|
||||
assign GT = ~(LT | EQ | UO);
|
||||
|
||||
// Note: it may be possible to optimize the setting of fcc
|
||||
// a little more, but it is probably not worth the effort.
|
||||
|
||||
// Set the bits of fcc based on LT, GT, EQ, and UO
|
||||
assign fcc[0] = LT | UO;
|
||||
assign fcc[1] = GT | UO;
|
||||
|
||||
always_comb begin
|
||||
case (FOpCtrlE[2:0])
|
||||
3'b111: FCmpResultE = LT ? A : B;//min
|
||||
3'b101: FCmpResultE = GT ? A : B;//max
|
||||
3'b010: FCmpResultE = {63'b0, EQ};//equal
|
||||
3'b001: FCmpResultE = {63'b0, LT};//less than
|
||||
3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal
|
||||
default: FCmpResultE = 64'b0;
|
||||
endcase
|
||||
end
|
||||
|
||||
endmodule // exception_cmp
|
||||
|
@ -1,243 +1,243 @@
|
||||
//
|
||||
// File name : fpcomp.v
|
||||
// Title : Floating-Point Comparator
|
||||
// project : FPU
|
||||
// Library : fpcomp
|
||||
// Author(s) : James E. Stine
|
||||
// Purpose : definition of main unit to floating-point comparator
|
||||
// notes :
|
||||
//
|
||||
// Copyright Oklahoma State University
|
||||
//
|
||||
// Floating Point Comparator (Algorithm)
|
||||
//
|
||||
// 1.) Performs sign-extension if the inputs are 32-bit integers.
|
||||
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
|
||||
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
|
||||
// and correct for sign bits
|
||||
//
|
||||
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
|
||||
// signals, and a 2-bit signal Sel that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// Sel Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 (unused)
|
||||
//
|
||||
// The comparator produces a 2-bit signal FCC, which
|
||||
// indicates the result of the comparison:
|
||||
//
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
//
|
||||
// It also produces an invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN per 754
|
||||
// //
|
||||
// // File name : fpcomp.v
|
||||
// // Title : Floating-Point Comparator
|
||||
// // project : FPU
|
||||
// // Library : fpcomp
|
||||
// // Author(s) : James E. Stine
|
||||
// // Purpose : definition of main unit to floating-point comparator
|
||||
// // notes :
|
||||
// //
|
||||
// // Copyright Oklahoma State University
|
||||
// //
|
||||
// // Floating Point Comparator (Algorithm)
|
||||
// //
|
||||
// // 1.) Performs sign-extension if the inputs are 32-bit integers.
|
||||
// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
|
||||
// // 3.) Check for special cases (+0=-0, unordered, and infinite values)
|
||||
// // and correct for sign bits
|
||||
// //
|
||||
// // This module takes 64-bits inputs op1 and op2, VSS, and VDD
|
||||
// // signals, and a 2-bit signal Sel that indicates the type of
|
||||
// // operands being compared as indicated below.
|
||||
// // Sel Description
|
||||
// // 00 double precision numbers
|
||||
// // 01 single precision numbers
|
||||
// // 10 half precision numbers
|
||||
// // 11 (unused)
|
||||
// //
|
||||
// // The comparator produces a 2-bit signal FCC, which
|
||||
// // indicates the result of the comparison:
|
||||
// //
|
||||
// // fcc decscription
|
||||
// // 00 A = B
|
||||
// // 01 A < B
|
||||
// // 10 A > B
|
||||
// // 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// //
|
||||
// // It also produces an invalid operation flag, which is one
|
||||
// // if either of the input operands is a signaling NaN per 754
|
||||
|
||||
module fpucmp2 (
|
||||
input logic [63:0] op1,
|
||||
input logic [63:0] op2,
|
||||
input logic [1:0] Sel,
|
||||
input logic [7:0] w, x,
|
||||
input logic ANaN, BNaN,
|
||||
input logic Azero, Bzero,
|
||||
input logic [3:0] FOpCtrlM,
|
||||
input logic FmtM,
|
||||
// module fpucmp2 (
|
||||
// input logic [63:0] op1,
|
||||
// input logic [63:0] op2,
|
||||
// input logic [1:0] Sel,
|
||||
// input logic [7:0] w, x,
|
||||
// input logic ANaN, BNaN,
|
||||
// input logic Azero, Bzero,
|
||||
// input logic [3:0] FOpCtrlM,
|
||||
// input logic FmtM,
|
||||
|
||||
output logic Invalid, // Invalid Operation
|
||||
output logic [1:0] FCC, // Condition Codes
|
||||
output logic [63:0] FCmpResultM);
|
||||
// output logic Invalid, // Invalid Operation
|
||||
// output logic [1:0] FCC, // Condition Codes
|
||||
// output logic [63:0] FCmpResultM);
|
||||
|
||||
logic LT; // magnitude op1 < magnitude op2
|
||||
logic EQ; // magnitude op1 = magnitude op2
|
||||
// logic LT; // magnitude op1 < magnitude op2
|
||||
// logic EQ; // magnitude op1 = magnitude op2
|
||||
|
||||
// Perform magnitude comparison between the 63 least signficant bits
|
||||
// of the input operands. Only LT and EQ are returned, since GT can
|
||||
// be determined from these values.
|
||||
magcompare64b_2 magcomp2 (LT, EQ, w, x);
|
||||
// // Perform magnitude comparison between the 63 least signficant bits
|
||||
// // of the input operands. Only LT and EQ are returned, since GT can
|
||||
// // be determined from these values.
|
||||
// magcompare64b_2 magcomp2 (LT, EQ, w, x);
|
||||
|
||||
// Determine final values based on output of magnitude comparison,
|
||||
// sign bits, and special case testing.
|
||||
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*);
|
||||
// // Determine final values based on output of magnitude comparison,
|
||||
// // sign bits, and special case testing.
|
||||
// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*);
|
||||
|
||||
|
||||
endmodule // fpcomp
|
||||
// endmodule // fpcomp
|
||||
|
||||
/*module magcompare2b (LT, GT, A, B);
|
||||
|
||||
input logic [1:0] A;
|
||||
input logic [1:0] B;
|
||||
|
||||
output logic LT;
|
||||
output logic GT;
|
||||
|
||||
// Determine if A < B using a minimized sum-of-products expression
|
||||
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// Determine if A > B using a minimized sum-of-products expression
|
||||
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
endmodule*/ // magcompare2b
|
||||
|
||||
// 2-bit magnitude comparator
|
||||
// This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
|
||||
// this version actually incorporates don't cares into the equation to
|
||||
// simplify the optimization
|
||||
|
||||
// module magcompare2c (LT, GT, A, B);
|
||||
// /*module magcompare2b (LT, GT, A, B);
|
||||
|
||||
// input logic [1:0] A;
|
||||
// input logic [1:0] B;
|
||||
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
// output logic LT;
|
||||
// output logic GT;
|
||||
|
||||
// assign LT = B[1] | (!A[1]&B[0]);
|
||||
// assign GT = A[1] | (!B[1]&A[0]);
|
||||
// // Determine if A < B using a minimized sum-of-products expression
|
||||
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
|
||||
// // Determine if A > B using a minimized sum-of-products expression
|
||||
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
|
||||
|
||||
// endmodule // magcompare2b
|
||||
// endmodule*/ // magcompare2b
|
||||
|
||||
// This module compares two 64-bit values A and B. LT is '1' if A < B
|
||||
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
|
||||
// This structure was modified so
|
||||
// that it only does a strict magnitdude comparison, and only
|
||||
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
|
||||
// of 63 2-bit magnitude comparators, followed by one OR gates.
|
||||
//
|
||||
// J. E. Stine and M. J. Schulte, "A combined two's complement and
|
||||
// floating-point comparator," 2005 IEEE International Symposium on
|
||||
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
|
||||
// doi: 10.1109/ISCAS.2005.1464531
|
||||
// // 2-bit magnitude comparator
|
||||
// // This module compares two 2-bit values A and B. LT is '1' if A < B
|
||||
// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
|
||||
// // this version actually incorporates don't cares into the equation to
|
||||
// // simplify the optimization
|
||||
|
||||
module magcompare64b_2 (LT, EQ, w, x);
|
||||
// // module magcompare2c (LT, GT, A, B);
|
||||
|
||||
input logic [7:0] w;
|
||||
input logic [7:0] x;
|
||||
logic [3:0] y;
|
||||
logic [3:0] z;
|
||||
logic [1:0] a;
|
||||
logic [1:0] b;
|
||||
logic GT;
|
||||
// // input logic [1:0] A;
|
||||
// // input logic [1:0] B;
|
||||
|
||||
output logic LT;
|
||||
output logic EQ;
|
||||
// // output logic LT;
|
||||
// // output logic GT;
|
||||
|
||||
// // assign LT = B[1] | (!A[1]&B[0]);
|
||||
// // assign GT = A[1] | (!B[1]&A[0]);
|
||||
|
||||
// // endmodule // magcompare2b
|
||||
|
||||
// // This module compares two 64-bit values A and B. LT is '1' if A < B
|
||||
// // and EQ is '1'if A = B. LT and GT are both '0' if A > B.
|
||||
// // This structure was modified so
|
||||
// // that it only does a strict magnitdude comparison, and only
|
||||
// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree
|
||||
// // of 63 2-bit magnitude comparators, followed by one OR gates.
|
||||
// //
|
||||
// // J. E. Stine and M. J. Schulte, "A combined two's complement and
|
||||
// // floating-point comparator," 2005 IEEE International Symposium on
|
||||
// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
|
||||
// // doi: 10.1109/ISCAS.2005.1464531
|
||||
|
||||
// module magcompare64b_2 (LT, EQ, w, x);
|
||||
|
||||
// input logic [7:0] w;
|
||||
// input logic [7:0] x;
|
||||
// logic [3:0] y;
|
||||
// logic [3:0] z;
|
||||
// logic [1:0] a;
|
||||
// logic [1:0] b;
|
||||
// logic GT;
|
||||
|
||||
magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
|
||||
magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
|
||||
magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
|
||||
magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
|
||||
// output logic LT;
|
||||
// output logic EQ;
|
||||
|
||||
magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
|
||||
magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
|
||||
// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
|
||||
// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
|
||||
// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
|
||||
// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
|
||||
|
||||
magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
|
||||
|
||||
assign EQ = ~(LT | GT);
|
||||
|
||||
endmodule // magcompare64b
|
||||
|
||||
// This module takes 64-bits inputs A and B, two magnitude comparison
|
||||
// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
|
||||
// operands being compared as indicated below.
|
||||
// Sel Description
|
||||
// 00 double precision numbers
|
||||
// 01 single precision numbers
|
||||
// 10 half precision numbers
|
||||
// 11 bfloat precision numbers
|
||||
//
|
||||
// The comparator produces a 2-bit signal fcc, which
|
||||
// indicates the result of the comparison as follows:
|
||||
// fcc decscription
|
||||
// 00 A = B
|
||||
// 01 A < B
|
||||
// 10 A > B
|
||||
// 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// It also produces a invalid operation flag, which is one
|
||||
// if either of the input operands is a signaling NaN.
|
||||
|
||||
module exception_cmp_2 (
|
||||
input logic [63:0] A,
|
||||
input logic [63:0] B,
|
||||
input logic FmtM,
|
||||
input logic LT_mag,
|
||||
input logic EQ_mag,
|
||||
input logic [1:0] Sel,
|
||||
input logic [3:0] FOpCtrlM,
|
||||
// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
|
||||
// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
|
||||
|
||||
output logic invalid,
|
||||
output logic [1:0] fcc,
|
||||
output logic [63:0] FCmpResultM,
|
||||
// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
|
||||
|
||||
input logic Azero,
|
||||
input logic Bzero,
|
||||
input logic ANaN,
|
||||
input logic BNaN);
|
||||
// assign EQ = ~(LT | GT);
|
||||
|
||||
// endmodule // magcompare64b
|
||||
|
||||
// // This module takes 64-bits inputs A and B, two magnitude comparison
|
||||
// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
|
||||
// // operands being compared as indicated below.
|
||||
// // Sel Description
|
||||
// // 00 double precision numbers
|
||||
// // 01 single precision numbers
|
||||
// // 10 half precision numbers
|
||||
// // 11 bfloat precision numbers
|
||||
// //
|
||||
// // The comparator produces a 2-bit signal fcc, which
|
||||
// // indicates the result of the comparison as follows:
|
||||
// // fcc decscription
|
||||
// // 00 A = B
|
||||
// // 01 A < B
|
||||
// // 10 A > B
|
||||
// // 11 A and B are unordered (i.e., A or B is NaN)
|
||||
// // It also produces a invalid operation flag, which is one
|
||||
// // if either of the input operands is a signaling NaN.
|
||||
|
||||
// module exception_cmp_2 (
|
||||
// input logic [63:0] A,
|
||||
// input logic [63:0] B,
|
||||
// input logic FmtM,
|
||||
// input logic LT_mag,
|
||||
// input logic EQ_mag,
|
||||
// input logic [1:0] Sel,
|
||||
// input logic [3:0] FOpCtrlM,
|
||||
|
||||
logic dp;
|
||||
logic sp;
|
||||
logic hp;
|
||||
logic ASNaN;
|
||||
logic BSNaN;
|
||||
logic UO;
|
||||
logic GT;
|
||||
logic LT;
|
||||
logic EQ;
|
||||
logic [62:0] sixtythreezeros = 63'h0;
|
||||
// output logic invalid,
|
||||
// output logic [1:0] fcc,
|
||||
// output logic [63:0] FCmpResultM,
|
||||
|
||||
assign dp = !Sel[1]&!Sel[0];
|
||||
assign sp = !Sel[1]&Sel[0];
|
||||
assign hp = Sel[1]&!Sel[0];
|
||||
|
||||
// Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
|
||||
// point comparison is being performed.
|
||||
assign UO = (ANaN | BNaN);
|
||||
|
||||
// Test if A or B is a signaling NaN.
|
||||
assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
|
||||
assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
|
||||
|
||||
// If either A or B is a signaling NaN the "Invalid Operation"
|
||||
// exception flag is set to one; otherwise it is zero.
|
||||
assign invalid = (ASNaN | BSNaN);
|
||||
|
||||
// A and B are equal if (their magnitudes are equal) AND ((their signs are
|
||||
// equal) or (their magnitudes are zero AND they are floating point
|
||||
// numbers)). Also, A and B are not equal if they are unordered.
|
||||
assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
|
||||
// input logic Azero,
|
||||
// input logic Bzero,
|
||||
// input logic ANaN,
|
||||
// input logic BNaN);
|
||||
|
||||
// A is less than B if (A is negative and B is posiive) OR
|
||||
// (A and B are positive and the magnitude of A is less than
|
||||
// the magnitude of B) or (A and B are negative integers and
|
||||
// the magnitude of A is less than the magnitude of B) or
|
||||
// (A and B are negative floating point numbers and
|
||||
// the magnitude of A is greater than the magnitude of B).
|
||||
// Also, A is not less than B if A and B are equal or unordered.
|
||||
assign LT = ((~LT_mag & A[63] & B[63]) |
|
||||
(LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
|
||||
// logic dp;
|
||||
// logic sp;
|
||||
// logic hp;
|
||||
// logic ASNaN;
|
||||
// logic BSNaN;
|
||||
// logic UO;
|
||||
// logic GT;
|
||||
// logic LT;
|
||||
// logic EQ;
|
||||
// logic [62:0] sixtythreezeros = 63'h0;
|
||||
|
||||
// assign dp = !Sel[1]&!Sel[0];
|
||||
// assign sp = !Sel[1]&Sel[0];
|
||||
// assign hp = Sel[1]&!Sel[0];
|
||||
|
||||
// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
|
||||
// // point comparison is being performed.
|
||||
// assign UO = (ANaN | BNaN);
|
||||
|
||||
// // Test if A or B is a signaling NaN.
|
||||
// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
|
||||
// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
|
||||
|
||||
// // If either A or B is a signaling NaN the "Invalid Operation"
|
||||
// // exception flag is set to one; otherwise it is zero.
|
||||
// assign invalid = (ASNaN | BSNaN);
|
||||
|
||||
// // A and B are equal if (their magnitudes are equal) AND ((their signs are
|
||||
// // equal) or (their magnitudes are zero AND they are floating point
|
||||
// // numbers)). Also, A and B are not equal if they are unordered.
|
||||
// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
|
||||
|
||||
// A is greater than B when LT, EQ, and UO are are false.
|
||||
assign GT = ~(LT | EQ | UO);
|
||||
// // A is less than B if (A is negative and B is posiive) OR
|
||||
// // (A and B are positive and the magnitude of A is less than
|
||||
// // the magnitude of B) or (A and B are negative integers and
|
||||
// // the magnitude of A is less than the magnitude of B) or
|
||||
// // (A and B are negative floating point numbers and
|
||||
// // the magnitude of A is greater than the magnitude of B).
|
||||
// // Also, A is not less than B if A and B are equal or unordered.
|
||||
// assign LT = ((~LT_mag & A[63] & B[63]) |
|
||||
// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
|
||||
|
||||
// // A is greater than B when LT, EQ, and UO are are false.
|
||||
// assign GT = ~(LT | EQ | UO);
|
||||
|
||||
// Note: it may be possible to optimize the setting of fcc
|
||||
// a little more, but it is probably not worth the effort.
|
||||
// // Note: it may be possible to optimize the setting of fcc
|
||||
// // a little more, but it is probably not worth the effort.
|
||||
|
||||
// Set the bits of fcc based on LT, GT, EQ, and UO
|
||||
assign fcc[0] = LT | UO;
|
||||
assign fcc[1] = GT | UO;
|
||||
// // Set the bits of fcc based on LT, GT, EQ, and UO
|
||||
// assign fcc[0] = LT | UO;
|
||||
// assign fcc[1] = GT | UO;
|
||||
|
||||
always_comb begin
|
||||
case (FOpCtrlM[2:0])
|
||||
3'b111: FCmpResultM = LT ? A : B;//min
|
||||
3'b101: FCmpResultM = GT ? A : B;//max
|
||||
3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal
|
||||
3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than
|
||||
3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal
|
||||
default: FCmpResultM = 64'b0;
|
||||
endcase
|
||||
end
|
||||
// always_comb begin
|
||||
// case (FOpCtrlM[2:0])
|
||||
// 3'b111: FCmpResultM = LT ? A : B;//min
|
||||
// 3'b101: FCmpResultM = GT ? A : B;//max
|
||||
// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal
|
||||
// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than
|
||||
// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal
|
||||
// default: FCmpResultM = 64'b0;
|
||||
// endcase
|
||||
// end
|
||||
|
||||
|
||||
endmodule // exception_cmp
|
||||
// endmodule // exception_cmp
|
||||
|
@ -26,47 +26,41 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module fpuhazard(
|
||||
input logic [4:0] Adr1, Adr2, Adr3,
|
||||
input logic FWriteEnE, FWriteEnM, FWriteEnW,
|
||||
input logic [4:0] RdE, RdM, RdW,
|
||||
input logic FDivBusyE,
|
||||
input logic RegWriteD,
|
||||
input logic [2:0] FResultSelD, FResultSelE,
|
||||
input logic IllegalFPUInstrD,
|
||||
input logic FInput2UsedD, FInput3UsedD,
|
||||
// Stall outputs
|
||||
output logic FStallD,
|
||||
output logic [1:0] FForwardInput1D, FForwardInput2D,
|
||||
output logic FForwardInput3D
|
||||
input logic [4:0] Adr1E, Adr2E, Adr3E,
|
||||
input logic FWriteEnM, FWriteEnW,
|
||||
input logic [4:0] RdM, RdW,
|
||||
input logic [2:0] FResultSelM,
|
||||
output logic FStallD,
|
||||
output logic [1:0] ForwardXE, ForwardYE, ForwardZE
|
||||
);
|
||||
|
||||
|
||||
always_comb begin
|
||||
// set ReadData as default
|
||||
FForwardInput1D = 2'b00;
|
||||
FForwardInput2D = 2'b00;
|
||||
FForwardInput3D = 1'b0;
|
||||
FStallD = FDivBusyE;
|
||||
if (~IllegalFPUInstrD) begin
|
||||
// if taking a value from int register
|
||||
if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD)))
|
||||
if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM
|
||||
else FStallD = 1'b1; // otherwise stall
|
||||
else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW
|
||||
else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE
|
||||
ForwardXE = 2'b00; // choose FRD1E
|
||||
ForwardYE = 2'b00; // choose FRD2E
|
||||
ForwardZE = 2'b00; // choose FRD3E
|
||||
FStallD = 0;
|
||||
|
||||
if ((Adr1E == RdM) & FWriteEnM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if(FInput2UsedD)
|
||||
if ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1;
|
||||
else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW
|
||||
else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE
|
||||
if ((Adr2E == RdM) & FWriteEnM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W
|
||||
|
||||
|
||||
if(FInput3UsedD)
|
||||
if ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1;
|
||||
else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1;
|
||||
else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE
|
||||
end
|
||||
|
||||
if ((Adr3E == RdM) & FWriteEnM)
|
||||
// if the result will be FResM
|
||||
if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM
|
||||
else FStallD = 1; // if the result won't be ready stall
|
||||
else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W
|
||||
|
||||
end
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
|
||||
|
||||
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E);
|
||||
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
|
||||
|
||||
input [63:0] FInput1E, FInput2E;
|
||||
input [63:0] SrcXE, SrcYE;
|
||||
input [1:0] SgnOpCodeE;
|
||||
output [63:0] SgnResultE;
|
||||
output [4:0] SgnFlagsE;
|
||||
@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E);
|
||||
|
||||
//op code designation:
|
||||
//
|
||||
//00 - fsgnj - directly copy over sign value of FInput2E
|
||||
//01 - fsgnjn - negate sign value of FInput2E
|
||||
//10 - fsgnjx - XOR sign values of FInput1E & FInput2E
|
||||
//00 - fsgnj - directly copy over sign value of SrcYE
|
||||
//01 - fsgnjn - negate sign value of SrcYE
|
||||
//10 - fsgnjx - XOR sign values of SrcXE & SrcYE
|
||||
//
|
||||
|
||||
assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]);
|
||||
assign SgnResultE[62:0] = FInput1E[62:0];
|
||||
assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
|
||||
assign SgnResultE[62:0] = SrcXE[62:0];
|
||||
|
||||
//If the exponent is all ones, then the value is either Inf or NaN,
|
||||
//both of which will produce a QNaN/SNaN value of some sort. This will
|
||||
//set the invalid flag high.
|
||||
assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52];
|
||||
assign AonesExp = SrcXE[62]&SrcXE[61]&SrcXE[60]&SrcXE[59]&SrcXE[58]&SrcXE[57]&SrcXE[56]&SrcXE[55]&SrcXE[54]&SrcXE[53]&SrcXE[52];
|
||||
|
||||
//the only flag that can occur during this operation is invalid
|
||||
//due to changing sign on already existing NaN
|
||||
|
@ -1,195 +0,0 @@
|
||||
///////////////////////////////////////////
|
||||
// lzd.sv
|
||||
//
|
||||
// Written: James.Stine@okstate.edu 1 February 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Integer Divide instructions
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
|
||||
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
|
||||
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
///////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
/* verilator lint_off DECLFILENAME */
|
||||
|
||||
// Original idea came from V. G. Oklobdzija, "An algorithmic and novel
|
||||
// design of a leading zero detector circuit: comparison with logic
|
||||
// synthesis," in IEEE Transactions on Very Large Scale Integration
|
||||
// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
|
||||
// 10.1109/92.273153.
|
||||
|
||||
// Modified to be more hierarchical
|
||||
|
||||
module lz2 (P, V, B);
|
||||
|
||||
input logic [1:0] B;
|
||||
|
||||
output logic P;
|
||||
output logic V;
|
||||
|
||||
assign V = B[0] | B[1];
|
||||
assign P = B[0] & ~B[1];
|
||||
|
||||
endmodule // lz2
|
||||
|
||||
module lzd_hier #(parameter WIDTH=8)
|
||||
(input logic [WIDTH-1:0] B,
|
||||
output logic [$clog2(WIDTH)-1:0] ZP,
|
||||
output logic ZV);
|
||||
|
||||
if (WIDTH == 128)
|
||||
lz128 lzd127 (ZP, ZV, B);
|
||||
else if (WIDTH == 64)
|
||||
lz64 lzd64 (ZP, ZV, B);
|
||||
else if (WIDTH == 32)
|
||||
lz32 lzd32 (ZP, ZV, B);
|
||||
else if (WIDTH == 16)
|
||||
lz16 lzd16 (ZP, ZV, B);
|
||||
else if (WIDTH == 8)
|
||||
lz8 lzd8 (ZP, ZV, B);
|
||||
else if (WIDTH == 4)
|
||||
lz4 lzd4 (ZP, ZV, B);
|
||||
|
||||
endmodule // lzd_hier
|
||||
|
||||
module lz4 (ZP, ZV, B);
|
||||
|
||||
input logic [3:0] B;
|
||||
|
||||
logic ZPa;
|
||||
logic ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [1:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz2 l1(ZPa, ZVa, B[1:0]);
|
||||
lz2 l2(ZPb, ZVb, B[3:2]);
|
||||
|
||||
assign ZP[0:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[1] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule
|
||||
|
||||
module lz8 (ZP, ZV, B);
|
||||
|
||||
input logic [7:0] B;
|
||||
|
||||
logic [1:0] ZPa;
|
||||
logic [1:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [2:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz4 l1(ZPa, ZVa, B[3:0]);
|
||||
lz4 l2(ZPb, ZVb, B[7:4]);
|
||||
|
||||
assign ZP[1:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[2] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule
|
||||
|
||||
module lz16 (ZP, ZV, B);
|
||||
|
||||
input logic [15:0] B;
|
||||
|
||||
logic [2:0] ZPa;
|
||||
logic [2:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [3:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz8 l1(ZPa, ZVa, B[7:0]);
|
||||
lz8 l2(ZPb, ZVb, B[15:8]);
|
||||
|
||||
assign ZP[2:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[3] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule // lz16
|
||||
|
||||
module lz32 (ZP, ZV, B);
|
||||
|
||||
input logic [31:0] B;
|
||||
|
||||
logic [3:0] ZPa;
|
||||
logic [3:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [4:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz16 l1(ZPa, ZVa, B[15:0]);
|
||||
lz16 l2(ZPb, ZVb, B[31:16]);
|
||||
|
||||
assign ZP[3:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[4] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule // lz32
|
||||
|
||||
module lz64 (ZP, ZV, B);
|
||||
|
||||
input logic [63:0] B;
|
||||
|
||||
logic [4:0] ZPa;
|
||||
logic [4:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [5:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz32 l1(ZPa, ZVa, B[31:0]);
|
||||
lz32 l2(ZPb, ZVb, B[63:32]);
|
||||
|
||||
assign ZP[4:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[5] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule // lz64
|
||||
|
||||
module lz128 (ZP, ZV, B);
|
||||
|
||||
input logic [127:0] B;
|
||||
|
||||
logic [5:0] ZPa;
|
||||
logic [5:0] ZPb;
|
||||
logic ZVa;
|
||||
logic ZVb;
|
||||
|
||||
output logic [6:0] ZP;
|
||||
output logic ZV;
|
||||
|
||||
lz64 l1(ZPa, ZVa, B[64:0]);
|
||||
lz64 l2(ZPb, ZVb, B[127:63]);
|
||||
|
||||
assign ZP[5:0] = ZVb ? ZPb : ZPa;
|
||||
assign ZP[6] = ~ZVb;
|
||||
assign ZV = ZVa | ZVb;
|
||||
|
||||
endmodule // lz128
|
||||
|
||||
/* verilator lint_on DECLFILENAME */
|
@ -32,7 +32,7 @@ module hazard(
|
||||
input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
|
||||
input logic LoadStallD, MulDivStallD, CSRRdStallD,
|
||||
input logic DCacheStall, ICacheStallF,
|
||||
input logic FPUStallD,
|
||||
input logic FPUStallD, FStallD,
|
||||
input logic DivBusyE,FDivBusyE,
|
||||
// Stall & flush outputs
|
||||
output logic StallF, StallD, StallE, StallM, StallW,
|
||||
@ -56,7 +56,7 @@ module hazard(
|
||||
// If any stages are stalled, the first stage that isn't stalled must flush.
|
||||
|
||||
assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE);
|
||||
assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous
|
||||
assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous
|
||||
assign StallECause = DivBusyE | FDivBusyE;
|
||||
assign StallMCause = 0;
|
||||
assign StallWCause = DCacheStall | ICacheStallF;
|
||||
|
@ -45,11 +45,13 @@ module controller(
|
||||
output logic MemReadE, CSRReadE, // for Hazard Unit
|
||||
output logic [2:0] Funct3E,
|
||||
output logic MulDivE, W64E,
|
||||
output logic JumpE,
|
||||
output logic JumpE,
|
||||
output logic [1:0] MemRWE,
|
||||
// Memory stage control signals
|
||||
input logic StallM, FlushM,
|
||||
output logic [1:0] MemRWM,
|
||||
output logic CSRReadM, CSRWriteM, PrivilegedM,
|
||||
output logic CSRReadM, CSRWriteM, PrivilegedM,
|
||||
output logic SCE,
|
||||
output logic [1:0] AtomicM,
|
||||
output logic [2:0] Funct3M,
|
||||
output logic RegWriteM, // for Hazard Unit
|
||||
@ -73,7 +75,7 @@ module controller(
|
||||
// pipelined control signals
|
||||
logic RegWriteE;
|
||||
logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM;
|
||||
logic [1:0] MemRWD, MemRWE;
|
||||
logic [1:0] MemRWD;
|
||||
logic JumpD;
|
||||
logic BranchD, BranchE;
|
||||
logic [1:0] ALUOpD;
|
||||
@ -140,6 +142,7 @@ module controller(
|
||||
ControlsD = `CTRLW'b1_000_00_00_011_0_00_0_0_1_0_0_1_00_0; // W-type Multiply/Divide
|
||||
else
|
||||
ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction
|
||||
//7'b1010011: ControlsD = `CTRLW'b0_000_00_00_101_0_00_0_0_0_0_0_0_00_1; // FP
|
||||
7'b1100011: ControlsD = `CTRLW'b0_010_00_00_000_1_01_0_0_0_0_0_0_00_0; // beq
|
||||
7'b1100111: ControlsD = `CTRLW'b1_000_00_00_000_0_00_1_1_0_0_0_0_00_0; // jalr
|
||||
7'b1101111: ControlsD = `CTRLW'b1_011_00_00_000_0_00_1_0_0_0_0_0_00_0; // jal
|
||||
@ -202,7 +205,8 @@ module controller(
|
||||
|
||||
assign PCSrcE = JumpE | BranchE & BranchTakenE;
|
||||
|
||||
assign MemReadE = MemRWE[1];
|
||||
assign MemReadE = MemRWE[1];
|
||||
assign SCE = (ResultSrcE == 3'b100);
|
||||
|
||||
// Memory stage pipeline control register
|
||||
flopenrc #(15) controlregM(clk, reset, FlushM, ~StallM,
|
||||
|
@ -37,6 +37,9 @@ module datapath (
|
||||
input logic ALUSrcAE, ALUSrcBE,
|
||||
input logic TargetSrcE,
|
||||
input logic JumpE,
|
||||
input logic IllegalFPUInstrE,
|
||||
input logic [1:0] MemRWE,
|
||||
input logic [`XLEN-1:0] FWriteDataE,
|
||||
input logic [`XLEN-1:0] PCE,
|
||||
input logic [`XLEN-1:0] PCLinkE,
|
||||
output logic [2:0] FlagsE,
|
||||
@ -44,13 +47,13 @@ module datapath (
|
||||
output logic [`XLEN-1:0] SrcAE, SrcBE,
|
||||
// Memory stage signals
|
||||
input logic StallM, FlushM,
|
||||
input logic [`XLEN-1:0] FWriteDataM,
|
||||
input logic FWriteIntM,
|
||||
input logic [`XLEN-1:0] FIntResM,
|
||||
output logic [`XLEN-1:0] SrcAM,
|
||||
output logic [`XLEN-1:0] WriteDataM, MemAdrM,
|
||||
// Writeback stage signals
|
||||
input logic StallW, FlushW,
|
||||
input logic FWriteIntW,
|
||||
input logic [`XLEN-1:0] FPUResultW,
|
||||
input logic RegWriteW,
|
||||
input logic SquashSCW,
|
||||
input logic [2:0] ResultSrcW,
|
||||
@ -70,13 +73,14 @@ module datapath (
|
||||
logic [`XLEN-1:0] RD1E, RD2E;
|
||||
logic [`XLEN-1:0] ExtImmE;
|
||||
|
||||
logic [`XLEN-1:0] PreSrcAE, SrcAE2, SrcBE2;
|
||||
logic [`XLEN-1:0] PreSrcAE, PreSrcBE, SrcAE2, SrcBE2;
|
||||
|
||||
logic [`XLEN-1:0] ALUResultE;
|
||||
logic [`XLEN-1:0] WriteDataE;
|
||||
logic [`XLEN-1:0] TargetBaseE;
|
||||
// Memory stage signals
|
||||
logic [`XLEN-1:0] ALUResultM;
|
||||
logic [`XLEN-1:0] ResultM;
|
||||
// Writeback stage signals
|
||||
logic [`XLEN-1:0] SCResultW;
|
||||
logic [`XLEN-1:0] ALUResultW;
|
||||
@ -88,8 +92,7 @@ module datapath (
|
||||
assign Rs2D = InstrD[24:20];
|
||||
assign RdD = InstrD[11:7];
|
||||
|
||||
//Mux for writting floating point
|
||||
mux2 #(`XLEN) writedatamux(ResultW, FPUResultW, FWriteIntW, WriteDataW);
|
||||
//Mux for writting floating point
|
||||
|
||||
regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D);
|
||||
extend ext(.InstrD(InstrD[31:7]), .*);
|
||||
@ -102,11 +105,12 @@ module datapath (
|
||||
flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
|
||||
flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
|
||||
|
||||
mux4 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, FWriteDataM, ForwardAE, PreSrcAE);
|
||||
mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, FWriteDataM, ForwardBE, WriteDataE);
|
||||
mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, PreSrcAE);
|
||||
mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, PreSrcBE);
|
||||
mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
|
||||
mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE);
|
||||
mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2);
|
||||
mux2 #(`XLEN) srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE);
|
||||
mux2 #(`XLEN) srcbmux(PreSrcBE, ExtImmE, ALUSrcBE, SrcBE);
|
||||
mux2 #(`XLEN) srcbmux2(SrcBE, {`XLEN{1'b0}}, JumpE, SrcBE2); // *** May be able to remove this mux.
|
||||
alu #(`XLEN) alu(SrcAE2, SrcBE2, ALUControlE, ALUResultE, FlagsE);
|
||||
mux2 #(`XLEN) targetsrcmux(PCE, SrcAE, TargetSrcE, TargetBaseE);
|
||||
@ -117,10 +121,11 @@ module datapath (
|
||||
flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM);
|
||||
assign MemAdrM = ALUResultM;
|
||||
flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM);
|
||||
flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM);
|
||||
flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM);
|
||||
mux2 #(`XLEN) resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM);
|
||||
|
||||
// Writeback stage pipeline register and logic
|
||||
flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW);
|
||||
flopenrc #(`XLEN) ResultWReg(clk, reset, FlushW, ~StallW, ResultM, ResultW);
|
||||
flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW);
|
||||
|
||||
// handle Store Conditional result if atomic extension supported
|
||||
@ -131,11 +136,11 @@ module datapath (
|
||||
assign SCResultW = 0;
|
||||
endgenerate
|
||||
|
||||
mux5 #(`XLEN) resultmux(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
|
||||
mux5 #(`XLEN) resultmuxW(ResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, WriteDataW);
|
||||
/* -----\/----- EXCLUDED -----\/-----
|
||||
// This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage.
|
||||
// *** need to look at how the decoder is coded to fix.
|
||||
mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, ResultW);
|
||||
mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, WriteDataW);
|
||||
>>>>>>> bp
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
|
||||
|
@ -28,32 +28,31 @@
|
||||
module forward(
|
||||
// Detect hazards
|
||||
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
|
||||
input logic MemReadE, MulDivE, CSRReadE,
|
||||
input logic RegWriteM, RegWriteW,
|
||||
input logic DivDoneE, DivBusyE,
|
||||
input logic FWriteIntE, FWriteIntM, FWriteIntW,
|
||||
input logic MemReadE, MulDivE, CSRReadE,
|
||||
input logic RegWriteM, RegWriteW,
|
||||
input logic DivDoneE, DivBusyE,
|
||||
input logic FWriteIntE, FWriteIntM, FWriteIntW,
|
||||
input logic SCE,
|
||||
// Forwarding controls
|
||||
output logic [1:0] ForwardAE, ForwardBE,
|
||||
output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD
|
||||
output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
ForwardAE = 2'b00;
|
||||
ForwardBE = 2'b00;
|
||||
if (Rs1E != 5'b0)
|
||||
if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10;
|
||||
if ((Rs1E == RdM) & (RegWriteM|FWriteIntM)) ForwardAE = 2'b10;
|
||||
else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01;
|
||||
else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11;
|
||||
|
||||
if (Rs2E != 5'b0)
|
||||
if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10;
|
||||
if ((Rs2E == RdM) & (RegWriteM|FWriteIntM)) ForwardBE = 2'b10;
|
||||
else if ((Rs2E == RdW) & (RegWriteW|FWriteIntW)) ForwardBE = 2'b01;
|
||||
else if ((Rs2E == RdM) & FWriteIntM) ForwardBE = 2'b11;
|
||||
end
|
||||
|
||||
// Stall on dependent operations that finish in Mem Stage and can't bypass in time
|
||||
assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE));
|
||||
assign LoadStallD = MemReadE & ((Rs1D == RdE) | (Rs2D == RdE));
|
||||
assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE));
|
||||
assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide
|
||||
assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE));
|
||||
|
||||
|
@ -31,33 +31,34 @@ module ieu (
|
||||
input logic [31:0] InstrD,
|
||||
input logic IllegalIEUInstrFaultD,
|
||||
output logic IllegalBaseInstrFaultD,
|
||||
output logic RegWriteD,
|
||||
output logic RegWriteD,
|
||||
// Execute Stage interface
|
||||
input logic [`XLEN-1:0] PCE,
|
||||
input logic [`XLEN-1:0] PCLinkE,
|
||||
input logic FWriteIntE,
|
||||
input logic IllegalFPUInstrE,
|
||||
input logic [`XLEN-1:0] FWriteDataE,
|
||||
output logic [`XLEN-1:0] PCTargetE,
|
||||
output logic MulDivE, W64E,
|
||||
output logic [2:0] Funct3E,
|
||||
output logic [`XLEN-1:0] SrcAE, SrcBE,
|
||||
input logic FWriteIntM,
|
||||
|
||||
// Memory stage interface
|
||||
input logic DataMisalignedM, // from LSU
|
||||
input logic SquashSCW, // from LSU
|
||||
output logic [1:0] MemRWM, // read/write control goes to LSU
|
||||
output logic [1:0] AtomicM, // atomic control goes to LSU
|
||||
output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU
|
||||
input logic DataMisalignedM, // from LSU
|
||||
input logic SquashSCW, // from LSU
|
||||
output logic [1:0] MemRWM, // read/write control goes to LSU
|
||||
output logic [1:0] AtomicM, // atomic control goes to LSU
|
||||
output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU
|
||||
|
||||
output logic [2:0] Funct3M, // size and signedness to LSU
|
||||
|
||||
|
||||
input logic FWriteIntM, // from FPU
|
||||
input logic [`XLEN-1:0] FWriteDataM, // from FPU
|
||||
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
|
||||
output logic [2:0] Funct3M, // size and signedness to LSU
|
||||
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
|
||||
input logic DataAccessFaultM,
|
||||
input logic [`XLEN-1:0] FIntResM,
|
||||
|
||||
// Writeback stage
|
||||
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
|
||||
input logic FWriteIntW,
|
||||
input logic [`XLEN-1:0] FPUResultW,
|
||||
input logic FWriteIntW,
|
||||
// input logic [`XLEN-1:0] PCLinkW,
|
||||
output logic InstrValidM, InstrValidW,
|
||||
// hazards
|
||||
@ -76,7 +77,8 @@ module ieu (
|
||||
logic [4:0] ALUControlE;
|
||||
logic ALUSrcAE, ALUSrcBE;
|
||||
logic [2:0] ResultSrcW;
|
||||
logic TargetSrcE;
|
||||
logic TargetSrcE;
|
||||
logic SCE;
|
||||
|
||||
// forwarding signals
|
||||
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW;
|
||||
@ -84,6 +86,7 @@ module ieu (
|
||||
logic RegWriteM, RegWriteW;
|
||||
logic MemReadE, CSRReadE;
|
||||
logic JumpE;
|
||||
logic [1:0] MemRWE;
|
||||
|
||||
controller c(.*);
|
||||
datapath dp(.*);
|
||||
|
@ -87,21 +87,23 @@ module wallypipelinedhart
|
||||
|
||||
logic PCSrcE;
|
||||
logic CSRWritePendingDEM;
|
||||
logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD;
|
||||
logic DivDoneE;
|
||||
logic DivBusyE;
|
||||
logic DivDoneW;
|
||||
logic [4:0] SetFflagsM;
|
||||
logic [2:0] FRM_REGW;
|
||||
logic FloatRegWriteW;
|
||||
logic [1:0] FMemRWM;
|
||||
logic RegWriteD;
|
||||
logic [`XLEN-1:0] FWriteDataM;
|
||||
logic SquashSCW;
|
||||
logic LoadStallD, MulDivStallD, CSRRdStallD;
|
||||
logic SquashSCM, SquashSCW;
|
||||
// floating point unit signals
|
||||
logic [2:0] FRM_REGW;
|
||||
logic [1:0] FMemRWM, FMemRWE;
|
||||
logic FStallD;
|
||||
logic FWriteIntE, FWriteIntW, FWriteIntM;
|
||||
logic FWriteIntE, FWriteIntM, FWriteIntW;
|
||||
logic [`XLEN-1:0] FWriteDataE;
|
||||
logic [`XLEN-1:0] FIntResM;
|
||||
logic FDivBusyE;
|
||||
logic IllegalFPUInstrD, IllegalFPUInstrE;
|
||||
logic FloatRegWriteW;
|
||||
logic FPUStallD;
|
||||
logic [4:0] SetFflagsM;
|
||||
logic [`XLEN-1:0] FPUResultW;
|
||||
|
||||
// memory management unit signals
|
||||
@ -185,20 +187,10 @@ module wallypipelinedhart
|
||||
ieu ieu(.*); // integer execution unit: integer register file, datapath and controller
|
||||
|
||||
|
||||
mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM);
|
||||
// mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM);
|
||||
|
||||
pagetablewalker pagetablewalker(.HPTWRead(HPTWRead),
|
||||
.*); // can send addresses to ahblite, send out pagetablestall
|
||||
// *** can connect to hazard unit
|
||||
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.
|
||||
// Would need to insertinstruction as InstrD, not InstrF
|
||||
/*ahblite ebu(
|
||||
.InstrReadF(1'b0),
|
||||
.InstrRData(), // hook up InstrF later
|
||||
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
|
||||
.*); */
|
||||
|
||||
|
||||
// arbiter between IEU and pagetablewalker
|
||||
lsuArb arbiter(// HPTW connection
|
||||
.HPTWTranslate(MMUTranslate),
|
||||
@ -208,12 +200,12 @@ module wallypipelinedhart
|
||||
.HPTWReady(MMUReady),
|
||||
.HPTWStall(HPTWStall),
|
||||
// CPU connection
|
||||
.MemRWM(MemRWM|FMemRWM),
|
||||
.MemRWM(MemRWM),
|
||||
.Funct3M(Funct3M),
|
||||
.AtomicM(AtomicM),
|
||||
.MemAdrM(MemAdrM),
|
||||
.StallW(StallW),
|
||||
.WriteDataM(WriteDatatmpM),
|
||||
.WriteDataM(WriteDataM),
|
||||
.ReadDataW(ReadDataW),
|
||||
.CommittedM(CommittedM),
|
||||
.SquashSCW(SquashSCW),
|
||||
@ -259,7 +251,8 @@ module wallypipelinedhart
|
||||
ahblite ebu(
|
||||
//.InstrReadF(1'b0),
|
||||
//.InstrRData(InstrF), // hook up InstrF later
|
||||
.WriteDataM(WriteDatatmpM),
|
||||
.ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking
|
||||
.WriteDataM(WriteDataM),
|
||||
.MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]),
|
||||
.Funct7M(InstrM[31:25]),
|
||||
.HRDATAW(HRDATAW),
|
||||
|
@ -539,8 +539,8 @@ string tests32f[] = '{
|
||||
if (`M_SUPPORTED) tests = {tests, tests64m};
|
||||
if (`A_SUPPORTED) tests = {tests, tests64a};
|
||||
if (`MEM_VIRTMEM) tests = {tests, tests64mmu};
|
||||
if (`D_SUPPORTED) tests = {tests64d, tests};
|
||||
if (`F_SUPPORTED) tests = {tests64f, tests};
|
||||
if (`D_SUPPORTED) tests = {tests64d, tests};
|
||||
end
|
||||
//tests = {tests64a, tests};
|
||||
end else begin // RV32
|
||||
@ -554,7 +554,7 @@ string tests32f[] = '{
|
||||
if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};
|
||||
else tests = {tests, tests32iNOc};
|
||||
if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
|
||||
// if (`F_SUPPORTED) tests = {tests32f, tests};
|
||||
if (`F_SUPPORTED) tests = {tests32f, tests};
|
||||
if (`A_SUPPORTED) tests = {tests, tests32a};
|
||||
if (`MEM_VIRTMEM) tests = {tests, tests32mmu};
|
||||
end
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user