Merge branch 'main' into bigbadbranch

This commit is contained in:
Ross Thompson 2021-07-02 11:52:26 -05:00
commit dbd33465e1
35 changed files with 1838 additions and 1818 deletions

2
.gitignore vendored
View File

@ -24,6 +24,8 @@ testsBP/*/*/*.elf*
testsBP/*/OBJ/*
testsBP/*/*.a
wally-pipelined/linux-testgen/linux-testvectors/*
wally-pipelined/linux-testgen/nohup*
wally-pipelined/linux-testgen/x*
!wally-pipelined/linux-testgen/linux-testvectors/tvCopier.py
!wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh
wally-pipelined/regression/slack-notifier/slack-webhook-url.txt

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "sky130/sky130_osu_sc_t12"]
path = sky130/sky130_osu_sc_t12
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/

View File

@ -211,26 +211,53 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) {
core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]);
}
}
/*int foreverLoop = 1;
secs_ret timing = 0;
int timingInt;
ee_printf("\nENTERING FOREVER WHILE LOOP\n");
while(foreverLoop == 1)
{
start_time();
//filler
stop_time();
timing += time_in_secs(get_time());
timingInt = (int)timing;
ee_printf("Timing is %d\n", timingInt);
}/*
/* automatically determine number of iterations if not set */
if (results[0].iterations==0) {
secs_ret secs_passed=0;
ee_u32 divisor;
results[0].iterations=1;
int iterationInc = 0;
ee_printf("\n\nENTERING ITERATION WHILE LOOP\n");
while (secs_passed < (secs_ret)1) {
results[0].iterations*=10;
if(iterationInc != 0)
{
results[0].iterations++;
}
ee_printf("iterations is %d\n", results[0].iterations);
start_time();
iterate(&results[0]);
stop_time();
secs_passed=time_in_secs(get_time());
secs_passed = time_in_secs(get_time());
int secs_passed_int = (int)secs_passed;
ee_printf("secs passed is %d\n", secs_passed_int);
iterationInc++;
}
ee_printf("LEAVING ITERATION WHILE LOOP!\n\n");
/* now we know it executes for at least 1 sec, set actual run time at about 10 secs */
divisor=(ee_u32)secs_passed;
ee_printf("divisor is %lu\n", divisor);
if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */
divisor=1;
results[0].iterations*=1+10/divisor;
ee_printf("iterations is %d\n", results[0].iterations);
}
/* perform actual benchmark */
ee_printf("Starting benchmark\n");
start_time();
#if (MULTITHREAD>1)
if (default_num_contexts>MULTITHREAD) {
@ -249,7 +276,8 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) {
#endif
stop_time();
total_time=get_time();
ee_printf("ending benchmark");
ee_printf("total time is %u\n", total_time);
ee_printf("ending benchmark\n");
/* get a function of the input to report */
seedcrc=crc16(results[0].seed1,seedcrc);
seedcrc=crc16(results[0].seed2,seedcrc);
@ -340,12 +368,17 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) {
for (i=0 ; i<default_num_contexts; i++)
ee_printf("[%d]crcstate : 0x%04x\n",i,results[i].crcstate);
for (i=0 ; i<default_num_contexts; i++)
ee_printf("[%d]crcfinal : 0x%04x\"n",i,results[i].crc);
ee_printf("[%d]crcfinal : 0x%04x\n",i,results[i].crc);
if (total_errors==0) {
ee_printf("Correct operation validated. See README.md for run and reporting rules.\n");
#if HAS_FLOAT
if (known_id==3) {
ee_printf("CoreMark 1.0 : %f / %s %s",default_num_contexts*results[0].iterations/time_in_secs(total_time),COMPILER_VERSION,COMPILER_FLAGS);
unsigned long long tmp = (unsigned long long) 1000.0*default_num_contexts*results[0].iterations/time_in_secs(total_time);
secs_ret totalmsecs = time_in_secs(total_time);
int totalmint = (int) totalmsecs;
ee_printf("ELAPSED S: %d\n", totalmint);
ee_printf("CoreMark 1.0 : %d / %s %s\n",tmp,COMPILER_VERSION,COMPILER_FLAGS);
#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC)
ee_printf(" / %s",MEM_LOCATION);
#else

View File

@ -114,9 +114,10 @@ void portable_free(void *p) {
#define read_csr(reg) ({ unsigned long __tmp; \
asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
__tmp; })
#define GETMYTIME(_t) (*_t=read_csr(cycle))
#define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
#define TIMER_RES_DIVIDER 1
// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
#define TIMER_RES_DIVIDER 10000
#define SAMPLE_TIME_IMPLEMENTATION 1
#endif
#define EE_TICKS_PER_SEC (NSECS_PER_SEC / TIMER_RES_DIVIDER)
@ -132,7 +133,9 @@ static CORETIMETYPE start_time_val, stop_time_val;
or zeroing some system parameters - e.g. setting the cpu clocks cycles to 0.
*/
void start_time(void) {
GETMYTIME(&start_time_val );
GETMYTIME(start_time_val);
ee_printf("Timer started\n");
ee_printf(" MTIME: %u\n", start_time_val);
#if CALLGRIND_RUN
CALLGRIND_START_INSTRUMENTATION
#endif
@ -153,7 +156,9 @@ void stop_time(void) {
#if MICA
asm volatile("int3");/*1 */
#endif
GETMYTIME(&stop_time_val );
GETMYTIME(stop_time_val);
ee_printf("Timer stopped\n");
ee_printf(" MTIME: %u\n", stop_time_val);
}
/* Function: get_time
Return an abstract "ticks" number that signifies time on the system.
@ -166,6 +171,7 @@ void stop_time(void) {
*/
CORE_TICKS get_time(void) {
CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
ee_printf(" Elapsed MTIME: %u\n", elapsed);
return elapsed;
}
/* Function: time_in_secs
@ -176,13 +182,15 @@ CORE_TICKS get_time(void) {
*/
secs_ret time_in_secs(CORE_TICKS ticks) {
secs_ret retval=((secs_ret)ticks) / (secs_ret)EE_TICKS_PER_SEC;
int retvalint = (int)retval;
ee_printf(" RETURN VALUE FROM TIME IN SECS FUNCTION: %d\n", retvalint);
return retval;
}
#else
#error "Please implement timing functionality in core_portme.c"
#endif /* SAMPLE_TIME_IMPLEMENTATION */
ee_u32 default_num_contexts=MULTITHREAD;
ee_u32 default_num_contexts = MULTITHREAD;
/* Function: portable_init
Target specific initialization code

@ -1 +0,0 @@
Subproject commit f60f2d0395053c4df362a97d7e2099721b6face6

View File

@ -62,25 +62,25 @@
// Peripheral memory space extends from BASE to BASE+RANGE
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 32'h00003FFF
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 32'h00000FFF
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 56'h00003FFF
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 56'h00000FFF
`define TIM_SUPPORTED 1'b1
`define TIM_BASE 32'h80000000
`define TIM_RANGE 32'h07FFFFFF
`define TIM_BASE 56'h80000000
`define TIM_RANGE 56'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 32'h02000000
`define CLINT_RANGE 32'h0000FFFF
`define CLINT_BASE 56'h02000000
`define CLINT_RANGE 56'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 32'h10012000
`define GPIO_RANGE 32'h000000FF
`define GPIO_BASE 56'h10012000
`define GPIO_RANGE 56'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 32'h10000000
`define UART_RANGE 32'h00000007
`define UART_BASE 56'h10000000
`define UART_RANGE 56'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 32'h0C000000
`define PLIC_RANGE 32'h03FFFFFF
`define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 56'h03FFFFFF
// Bus Interface width
`define AHBW 64

View File

@ -31,6 +31,7 @@
`define BUSYBEAR 1
`define LINUX_FIX_READ {'h10000005}
`define LINUX_TEST_VECTORS "/courses/e190ax/busybear_boot/"
//`define LINUX_TEST_VECTORS "../../../busybear_boot/"
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 64
@ -63,25 +64,25 @@
// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
`define BOOTTIM_SUPPORTED 1'b1
`define BOOTTIM_BASE 32'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 32'h00003FFF
//`define BOOTTIM_BASE 32'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 32'h00000FFF
`define BOOTTIM_BASE 56'h00000000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
`define BOOTTIM_RANGE 56'h00003FFF
//`define BOOTTIM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
//`define BOOTTIM_RANGE 56'h00000FFF
`define TIM_SUPPORTED 1'b1
`define TIM_BASE 32'h80000000
`define TIM_RANGE 32'h07FFFFFF
`define TIM_BASE 56'h80000000
`define TIM_RANGE 56'h07FFFFFF
`define CLINT_SUPPORTED 1'b1
`define CLINT_BASE 32'h02000000
`define CLINT_RANGE 32'h0000FFFF
`define CLINT_BASE 56'h02000000
`define CLINT_RANGE 56'h0000FFFF
`define GPIO_SUPPORTED 1'b1
`define GPIO_BASE 32'h10012000
`define GPIO_RANGE 32'h000000FF
`define GPIO_BASE 56'h10012000
`define GPIO_RANGE 56'h000000FF
`define UART_SUPPORTED 1'b1
`define UART_BASE 32'h10000000
`define UART_RANGE 32'h00000007
`define UART_BASE 56'h10000000
`define UART_RANGE 56'h00000007
`define PLIC_SUPPORTED 1'b1
`define PLIC_BASE 32'h0C000000
`define PLIC_RANGE 32'h03FFFFFF
`define PLIC_BASE 56'h0C000000
`define PLIC_RANGE 56'h03FFFFFF
// Bus Interface width
`define AHBW 64

View File

@ -1,3 +1,9 @@
# Oftentimes this script runs so long you'll go to sleep.
# But you don't want the script to die when your computer goes to sleep.
# So consider invoking this with nohup (i.e. "nohup ./logAllBuildroot.sh")
# You can run "tail -f nohup.out" to see what would've
# outputted to the terminal if you didn't use nohup
# =========== Debug the Process ==========
# Uncomment this version for GDB/QEMU debugging
# - Opens up GDB interactively
@ -15,6 +21,12 @@
# - Logs parse_qemu.py's simulated gdb output to qemu_in_gdb_format.txt
#cat qemu_output.txt | ./parse_qemu.py >qemu_in_gdb_format.txt
#cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/"
# Uncomment this version in case you just want to have qemu_in_gdb_format.txt around
# It is often helpful for general debugging
#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog
# Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection
#split -d -l 5600000 qemu_in_gdb_format.txt --verbose
# Uncomment this version for parse_gdb_output.py debugging
# - Uses qemu_in_gdb_format.txt
@ -24,4 +36,4 @@
# =========== Just Do the Thing ==========
# Uncomment this version for the whole thing
# - Logs info needed by buildroot testbench
(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | pv -l | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog
(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog

View File

@ -44,7 +44,7 @@ try:
instrs += 1
storeAMO = ''
if instrs % 10000 == 0:
print(instrs)
print(instrs,flush=True)
# Instr in human assembly
wPC.write('{} ***\n'.format(' '.join(l.split(':')[1].split()[0:2])))
if '\tld' in l or '\tlw' in l or '\tlh' in l or '\tlb' in l:
@ -63,6 +63,15 @@ try:
storeLoc = readLoc
storeReg = l.split()[-1].split(',')[1]
storeAMO = l.split()[-2]
if '\tlr' in l:
currentRead = l.split()[-1].split(',')[0]
readOffset = "0"
readLoc = l.split()[-1].split('(')[1][:-1]
readType = "0" # *** I don't see that readType or lastReadType are ever used; we can probably get rid of them
if '\tsc' in l:
storeOffset = "0"
storeLoc = l.split()[-1].split('(')[1][:-1]
storeReg = l.split()[-1].split(',')[1]
if '\tsd' in l or '\tsw' in l or '\tsh' in l or '\tsb' in l:
s = l.split('#')[0].split()[-1]
storeReg = s.split(',')[0]

View File

@ -40,13 +40,12 @@ def parseCSRs(l):
val = int(l.split()[1],16)
if inPageFault:
# Not sure if these CSRs should be updated or not during page fault.
#if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"):
# # We do update some CSRs
# CSRs[csr] = val
#else:
# # Others we preserve until changed later
# pageFaultCSRs[csr] = val
pageFaultCSRs[csr] = val
if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"):
# We do update some CSRs
CSRs[csr] = val
else:
# Others we preserve until changed later
pageFaultCSRs[csr] = val
elif pageFaultCSRs and (csr in pageFaultCSRs):
if (val != pageFaultCSRs[csr]):
del pageFaultCSRs[csr]

View File

@ -26,12 +26,12 @@ configs = [
TestCase(
name="busybear",
cmd="vsim -do wally-busybear-batch.do -c > {}",
grepstr="# loaded 100000 instructions"
grepstr="loaded 100000 instructions"
),
TestCase(
name="buildroot",
cmd="vsim -do wally-buildroot-batch.do -c > {}",
grepstr="# loaded 2000000 instructions"
grepstr="loaded 2500000 instructions"
),
TestCase(
name="rv32ic",

View File

@ -36,5 +36,4 @@ vopt work_busybear.testbench -o workopt_busybear
vsim workopt_busybear -suppress 8852,12070
run -all
exec ./slack-notifier/slack-notifier.py
quit

View File

@ -40,5 +40,4 @@ do ./wave-dos/linux-waves.do
#-- Run the Simulation
run -all
exec ./slack-notifier/slack-notifier.py
##quit

View File

@ -4,6 +4,7 @@ view wave
add wave -divider
add wave /testbench/clk
add wave /testbench/reset
add wave -dec /testbench/instrs
add wave -divider Stalls_and_Flushes
add wave /testbench/dut/hart/StallF
@ -19,12 +20,13 @@ add wave /testbench/dut/hart/FlushW
add wave -divider F
add wave -hex /testbench/dut/hart/ifu/PCF
add wave -divider D
add wave -hex /testbench/pcExpected
add wave -hex /testbench/PCDexpected
add wave -hex /testbench/dut/hart/ifu/PCD
add wave -hex /testbench/PCtextD
add wave /testbench/InstrDName
add wave -hex /testbench/dut/hart/ifu/InstrD
add wave -hex /testbench/dut/hart/ieu/c/InstrValidD
add wave -hex /testbench/PCDwrong
add wave -divider E
add wave -hex /testbench/dut/hart/ifu/PCE
add wave -hex /testbench/PCtextE

View File

@ -45,8 +45,8 @@ assign FOpCtrlE = 3'b0;
// down - 010
// up - 011
// nearest max mag - 100
assign FrmE = 3'b010;
assign FmtE = 1'b1;
assign FrmE = 3'b011;
assign FmtE = 1'b0;
assign wnan = FmtE ? &FmaResultM[62:52] && |FmaResultM[51:0] : &FmaResultM[62:55] && |FmaResultM[54:32];
@ -110,7 +110,6 @@ always @(posedge clk)
if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $display( "ans=qutNaN ");
if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $display( "ans=qutNaN ");
errors = errors + 1;
if (errors == 20)
$stop;
end
if((FmtE==1'b0)&(FmaFlagsM != flags[4:0] || (!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {FInput1E[62:55],1'b1,FInput1E[53:0]})) || (ynan && (FmaResultM[62:0] == {FInput2E[62:55],1'b1,FInput2E[53:0]})) || (znan && (FmaResultM[62:0] == {FInput3E[62:55],1'b1,FInput3E[53:0]})) || (FmaResultM[62:0] == ans[62:0]))) ))) begin

View File

@ -1,3 +1,3 @@
testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat
testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rmax -seed 113355 -level 1 > testFloat
tr -d ' ' < testFloat > testFloatNoSpace

View File

@ -10,172 +10,124 @@ module fctrl (
output logic FDivStartD,
output logic [2:0] FResultSelD,
output logic [3:0] FOpCtrlD,
output logic [1:0] FResSelD,
output logic [1:0] FIntResSelD,
output logic FmtD,
output logic [2:0] FrmD,
output logic [1:0] FMemRWD,
output logic FOutputInput2D,
output logic FInput2UsedD, FInput3UsedD,
output logic FWriteIntD);
logic IllegalFPUInstr1D, IllegalFPUInstr2D;
// *** fix rounding for dynamic rounding
`define FCTRLW 15
logic [`FCTRLW-1:0] ControlsD;
// FPU Instruction Decoder
always_comb
case(OpD)
// FWriteEn_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
7'b0000111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_000_0010_00_00_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_000_0011_00_00_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_001_0000_00_00_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_001_0001_00_00_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_001_0010_00_00_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_001_0011_00_00_0_0; // fnmadd
7'b1010011: casez(Funct7D)
7'b00000??: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_010_0001_00_00_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_001_0100_00_00_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_011_0000_00_00_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_011_0001_00_00_1_0; // fsqrt
7'b00100??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_100_0000_01_00_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_100_0001_01_00_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_100_0010_01_00_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b00101??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_100_0111_10_00_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_100_0101_10_00_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b10100??: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_1_100_0010_00_00_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_100_0011_00_00_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b11100??: if (Funct3D == 3'b001)
ControlsD = `FCTRLW'b0_1_100_0000_00_10_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
7'b1100000: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b0_1_010_0110_00_00_0_0; // fcvt.s.w
1'b1: ControlsD = `FCTRLW'b0_1_010_0101_00_00_0_0; // fcvt.s.wu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101000: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b1_1_010_0100_00_00_0_0; // fcvt.w.s
1'b1: ControlsD = `FCTRLW'b1_1_010_0101_00_00_0_0; // fcvt.wu.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_010_0010_00_00_0_0; // fcvt.s.d
7'b1100001: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b0_1_010_1110_00_00_0_0; // fcvt.d.w
1'b1: ControlsD = `FCTRLW'b0_1_010_1111_00_00_0_0; // fcvt.d.wu
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1101001: case(Rs2D[0])
1'b0: ControlsD = `FCTRLW'b1_0_010_1100_00_00_0_0; // fcvt.w.d
1'b1: ControlsD = `FCTRLW'b1_0_010_1101_00_00_0_0; // fcvt.wu.d
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_010_1000_00_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction
endcase
// unswizzle control bits
assign {FWriteEnD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
// if dynamic rounding, choose FRM_REGW
assign FrmD = &Funct3D ? FRM_REGW : Funct3D;
//all subsequent logic is based on the table present
//in Section 5 of Wally Architecture Specification
//write is enabled for all fp instruciton op codes
//sans fp load
logic isFP, isFPLD;
always_comb begin
//case statement is easier to modify
//in case of errors
case(OpD)
//fp instructions sans load
7'b1010011 : isFP = 1'b1;
7'b1000011 : isFP = 1'b1;
7'b1000111 : isFP = 1'b1;
7'b1001011 : isFP = 1'b1;
7'b1001111 : isFP = 1'b1;
7'b0100111 : isFP = 1'b1;
7'b0000111 : isFP = 1'b1;// KEP change 7'b1010011 to 7'b0000111
default : isFP = 1'b0;
endcase
end
//useful intermediary signals
//
//(mult only not supported in current datapath)
//set third FMA operand to zero in this case
//(or equivalent)
always_comb begin
//checks all but FMA/store/load
IllegalFPUInstr2D = 0;
FDivStartD = 1'b0;
if(OpD == 7'b1010011) begin
casez(Funct7D)
//compare
7'b10100?? : FResultSelD = 3'b001;
//div/sqrt
7'b0?011?? : begin FResultSelD = 3'b000; FDivStartD = 1'b1; end
//add/sub
7'b0000??? : FResultSelD = 3'b100;
//mult
7'b00010?? : FResultSelD = 3'b010;
//convert (not precision)
7'b110?0?? : FResultSelD = 3'b100;
//convert (precision)
7'b010000? : FResultSelD = 3'b100;
//Min/Max
7'b00101?? : FResultSelD = 3'b001;
//sign injection
7'b00100?? : FResultSelD = 3'b011;
//classify //only if funct3 = 001
7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101;
//output ReadData1
else if (Funct7D[1] == 0) FResultSelD = 3'b111;
//output SrcW
7'b111100? : FResultSelD = 3'b110;
default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end
endcase
end
//FMA/store/load
else begin
case(OpD)
//4 FMA instructions
7'b1000011 : FResultSelD = 3'b010;
7'b1000111 : FResultSelD = 3'b010;
7'b1001011 : FResultSelD = 3'b010;
7'b1001111 : FResultSelD = 3'b010;
//store
7'b0100111 : FResultSelD = 3'b111;
//load
7'b0000111 : FResultSelD = 3'b111;
default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end
endcase
end
end
assign FOutputInput2D = OpD == 7'b0100111;
assign FMemRWD[0] = FOutputInput2D;
assign FMemRWD[1] = OpD == 7'b0000111;
//register is chosen based on operation performed
//----
//write selection is chosen in the same way as
//register selection
//
// reg/write sel logic and assignment
//
// 3'b000 = div/sqrt
// 3'b001 = cmp
// 3'b010 = fma/mult
// 3'b011 = sgn inj
// 3'b100 = add/sub/cnvt
// 3'b101 = classify
// 3'b110 = output SrcAW
// 3'b111 = output ReadData1
//
//reg select
//this value is used enough to be shorthand
//operation control for each fp operation
//has to be expanded over standard to account for
//integrated fpadd/cvt
//
//will integrate FMA opcodes into design later
//
//conversion instructions will
//also need to be added later as I find the opcode
//version I used for this repo
//let's do separate SOP for each type of operation
// assign FOpCtrlD[3] = 1'b0;
//
//
always_comb begin
IllegalFPUInstr1D = 0;
FInput3UsedD = 0;
case (FResultSelD)
// div/sqrt
// Precision
// 0-single
// 1-double
assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0];
// div/sqrt
// fdiv = ???0
// fsqrt = ???1
3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end
// cmp
// cmp
// fmin = ?111
// fmax = ?101
// feq = ?010
// flt = ?001
// fle = ?011
// {?, is min or max, is eq or le, is lt or le}
3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end
//fma/mult
//fma/mult
// fmadd = ?000
// fmsub = ?001
// fnmsub = ?010 -(a*b)+c
// fnmadd = ?011 -(a*b)-c
// fmul = ?100
// {?, is mul, is negitive, is sub}
3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end
// sgn inj
// sgn inj
// fsgnj = ??00
// fsgnjn = ??01
// fsgnjx = ??10
3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end
// add/sub/cnvt
// add/sub/cnvt
// fadd = 0000
// fsub = 0001
// fcvt.w.s = 0100
@ -188,35 +140,18 @@ module fctrl (
// fcvt.d.w = 1110
// fcvt.d.wu = 1111
// fcvt.d.s = 1000
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub
3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), (Rs2D[0]&Funct7D[5])|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end
// classify {?, ?, ?, ?}
3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end
// output SrcAW
// { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub}
// fmv.w.x = ???0
// fmv.w.d = ???1
3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end
// output Input1
// flw = ?000
// fld = ?001
// fsw = ?010 // output Input2
// fsd = ?011 // output Input2
// fsw = ?010
// fsd = ?011
// fmv.x.w = ?100
// fmv.x.d = ?101
// {?, is mv, is store, is double or fmv}
3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end
default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end
endcase
end
//precision
assign FmtD = (~&FResultSelD & Funct7D[0]) | (&FResultSelD & FOpCtrlD[0]);
assign IllegalFPUInstrD = IllegalFPUInstr1D | IllegalFPUInstr2D;
//write to integer source if conv to int occurs
//AND of Funct7 for int results
// is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv
assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]);
// if not writting to int reg and not a store function and not move
assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP;
endmodule

View File

@ -1,111 +1,111 @@
module fma1(
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
output logic XInfE, YInfE, ZInfE, // inputs are infinity
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtE, // precision 1 = double 0 = single
output logic [105:0] ProdManE, // 1.X frac * 1.Y frac
output logic [161:0] AlignedAddendE, // Z aligned for addition
output logic [12:0] ProdExpE, // X exponent + Y exponent - bias
output logic AddendStickyE, // sticky bit that is calculated during alignment
output logic KillProdE, // set the product to zero before addition if the product is too small to matter
output logic XZeroE, YZeroE, ZZeroE, // inputs are zero
output logic XInfE, YInfE, ZInfE, // inputs are infinity
output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
logic [12:0] XExp,YExp,ZExp; // input exponents
logic XSgn,YSgn,ZSgn; // input signs
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [211:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [211:0] ZManPreShifted; // input to the alignment shifter
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
logic [63:0] Addend; // value to add (Z or zero)
logic [12:0] Bias; // 1023 for double, 127 for single
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
logic [51:0] XFrac,YFrac,ZFrac; // input fraction
logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one)
logic [12:0] XExp,YExp,ZExp; // input exponents
logic XSgn,YSgn,ZSgn; // input signs
logic [12:0] AlignCnt; // how far to shift the addend to align with the product
logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit
logic [213:0] ZManPreShifted; // input to the alignment shifter
logic XDenorm, YDenorm, ZDenorm; // inputs are denormal
logic [63:0] Addend; // value to add (Z or zero)
logic [12:0] Bias; // 1023 for double, 127 for single
logic XExpZero, YExpZero, ZExpZero; // input exponent zero
logic XFracZero, YFracZero, ZFracZero; // input fraction zero
logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s
///////////////////////////////////////////////////////////////////////////////
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// split inputs into the sign bit, fraction, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlE[2] ? 64'b0 : Z;
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63];
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63];
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]};
assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]};
assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]};
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
assign XMan = {~XExpZero, XFrac};
assign YMan = {~YExpZero, YFrac};
assign ZMan = {~ZExpZero, ZFrac};
assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0};
assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0};
assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0};
assign XMan = {~XExpZero, XFrac};
assign YMan = {~YExpZero, YFrac};
assign ZMan = {~ZExpZero, ZFrac};
assign Bias = FmtE ? 13'h3ff : 13'h7f;
assign Bias = FmtE ? 13'h3ff : 13'h7f;
///////////////////////////////////////////////////////////////////////////////
// determine if an input is a special value
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// determine if an input is a special value
///////////////////////////////////////////////////////////////////////////////
assign XExpZero = ~|XExp;
assign YExpZero = ~|YExp;
assign ZExpZero = ~|ZExp;
assign XFracZero = ~|XFrac;
assign YFracZero = ~|YFrac;
assign ZFracZero = ~|ZFrac;
assign XExpZero = ~|XExp;
assign YExpZero = ~|YExp;
assign ZExpZero = ~|ZExp;
assign XFracZero = ~|XFrac;
assign YFracZero = ~|YFrac;
assign ZFracZero = ~|ZFrac;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
assign XNaNE = XExpMax & ~XFracZero;
assign YNaNE = YExpMax & ~YFracZero;
assign ZNaNE = ZExpMax & ~ZFracZero;
assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0];
assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0];
assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0];
assign XNaNE = XExpMax & ~XFracZero;
assign YNaNE = YExpMax & ~YFracZero;
assign ZNaNE = ZExpMax & ~ZFracZero;
assign XDenorm = XExpZero & ~XFracZero;
assign YDenorm = YExpZero & ~YFracZero;
assign ZDenorm = ZExpZero & ~ZFracZero;
assign XDenorm = XExpZero & ~XFracZero;
assign YDenorm = YExpZero & ~YFracZero;
assign ZDenorm = ZExpZero & ~ZFracZero;
assign XInfE = XExpMax & XFracZero;
assign YInfE = YExpMax & YFracZero;
assign ZInfE = ZExpMax & ZFracZero;
assign XInfE = XExpMax & XFracZero;
assign YInfE = YExpMax & YFracZero;
assign ZInfE = ZExpMax & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
assign XZeroE = XExpZero & XFracZero;
assign YZeroE = YExpZero & YFracZero;
assign ZZeroE = ZExpZero & ZFracZero;
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
///////////////////////////////////////////////////////////////////////////////
// verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExp + YExp - Bias + XDenorm + YDenorm;
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one if there is a denormal number
///////////////////////////////////////////////////////////////////////////////
// verilator lint_off WIDTH
assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 :
XExp + YExp - Bias + XDenorm + YDenorm;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = XMan * YMan;
// Calculate the product's mantissa
// - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one.
assign ProdManE = XMan * YMan;
@ -114,72 +114,71 @@ module fma1(
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
// verilator lint_on WIDTH
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// - Denormal numbers have an an exponent value of 1, however they are
// represented with an exponent of 0. add one to the exponent if it is a denormal number
assign AlignCnt = ProdExpE - ZExp - ZDenorm;
// verilator lint_on WIDTH
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// Defualt Addition without shifting
// | 55'b0 | 106'b(product) | 2'b0 |
// |1'b0| addnend |
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZManPreShifted = {55'b0, ZMan, 104'b0};
always_comb
begin
// If the product is too small to effect the sum, kill the product
// the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...)
assign ZManPreShifted = {55'b0, ZMan, 106'b0};
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
ZManShifted = {107'b0, ZMan, 52'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
if ($signed(AlignCnt) <= $signed(-13'd56)) begin
KillProdE = 1;
ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0};
AddendStickyE = ~(XZeroE|YZeroE);
// If the Addend is shifted left (negitive AlignCnt)
// If the Addend is shifted left (negitive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted << -AlignCnt;
AddendStickyE = |(ZManShifted[49:0]);
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if($signed(AlignCnt) <= $signed(13'd0)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted << -AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the Addend is shifted right (positive AlignCnt)
// If the Addend is shifted right (positive AlignCnt)
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd104)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted >> AlignCnt;
AddendStickyE = |(ZManShifted[49:0]);
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else if ($signed(AlignCnt)<=$signed(13'd106)) begin
KillProdE = 0;
ZManShifted = ZManPreShifted >> AlignCnt;
AddendStickyE = |(ZManShifted[51:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the addend to be considered too small
// - The 2 extra bits are needed for rounding
// | 55'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
KillProdE = 0;
ZManShifted = 0;
AddendStickyE = ~ZZeroE;
// | 54'b0 | 106'b(product) | 2'b0 |
// | addnend |
end else begin
KillProdE = 0;
ZManShifted = 0;
AddendStickyE = ~ZZeroE;
end
end
end
end
assign AlignedAddendE = ZManShifted[211:50];
endmodule
assign AlignedAddendE = ZManShifted[213:52];
endmodule

View File

@ -1,127 +1,131 @@
module fma2(
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtM, // precision 1 = double 0 = single
input logic [105:0] ProdManM, // 1.X frac * 1.Y frac
input logic [161:0] AlignedAddendM, // Z aligned for addition
input logic [12:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
output logic [63:0] FmaResultM, // FMA final result
output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
input logic [63:0] X, // X
input logic [63:0] Y, // Y
input logic [63:0] Z, // Z
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic FmtM, // precision 1 = double 0 = single
input logic [105:0] ProdManM, // 1.X frac * 1.Y frac
input logic [161:0] AlignedAddendM, // Z aligned for addition
input logic [12:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
output logic [63:0] FmaResultM, // FMA final result
output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
logic [51:0] ResultFrac; // Result fraction
logic [10:0] ResultExp; // Result exponent
logic ResultSgn; // Result sign
logic [10:0] ZExp; // input exponent
logic XSgn, YSgn, ZSgn; // input sign
logic PSgn; // product sign
logic [105:0] ProdMan2; // product being added
logic [162:0] AlignedAddend2; // possibly inverted aligned Z
logic [161:0] Sum; // positive sum
logic [162:0] PreSum; // possibly negitive sum
logic [12:0] SumExp; // exponent of the normalized sum
logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [12:0] SumExpTmpMinus1; // SumExpTmp-1
logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow
logic [53:0] NormSum; // normalized sum
logic [161:0] SumShifted; // sum shifted for normalization
logic [8:0] NormCnt; // output of the leading zero detector
logic NormSumSticky; // sticky bit calulated from the normalized sum
logic SumZero; // is the sum zero
logic NegSum; // is the sum negitive
logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z)
logic ResultDenorm; // is the result denormalized
logic Sticky; // Sticky bit
logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding
logic Invalid,Underflow,Overflow,Inexact; // flags
logic [8:0] DenormShift; // right shift if the result is denormalized
logic SubBySmallNum; // was there supposed to be a subtraction by a small number
logic [63:0] Addend; // value to add (Z or zero)
logic ZeroSgn; // the result's sign if the sum is zero
logic ResultSgnTmp; // the result's sign assuming the result is not zero
logic Guard, Round, LSBNormSum; // bits needed to determine rounding
logic [12:0] MaxExp; // maximum value of the exponent
logic [12:0] FracLen; // length of the fraction
logic SigNaN; // is an input a signaling NaN
logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency)
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
logic [51:0] ResultFrac; // Result fraction
logic [10:0] ResultExp; // Result exponent
logic ResultSgn; // Result sign
logic [10:0] ZExp; // input exponent
logic XSgn, YSgn, ZSgn; // input sign
logic PSgn; // product sign
logic [105:0] ProdMan2; // product being added
logic [162:0] AlignedAddend2; // possibly inverted aligned Z
logic [161:0] Sum; // positive sum
logic [162:0] PreSum; // possibly negitive sum
logic [12:0] SumExp; // exponent of the normalized sum
logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [12:0] SumExpTmpMinus1; // SumExpTmp-1
logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow
logic [54:0] NormSum; // normalized sum
logic [161:0] SumShifted; // sum shifted for normalization
logic [8:0] NormCnt; // output of the leading zero detector
logic NormSumSticky; // sticky bit calulated from the normalized sum
logic SumZero; // is the sum zero
logic NegSum; // is the sum negitive
logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z)
logic ResultDenorm; // is the result denormalized
logic Sticky; // Sticky bit
logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding
logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag)
logic Invalid,Underflow,Overflow,Inexact; // flags
logic [8:0] DenormShift; // right shift if the result is denormalized
logic SubBySmallNum; // was there supposed to be a subtraction by a small number
logic [63:0] Addend; // value to add (Z or zero)
logic ZeroSgn; // the result's sign if the sum is zero
logic ResultSgnTmp; // the result's sign assuming the result is not zero
logic Guard, Round, LSBNormSum; // bits needed to determine rounding
logic UfGuard, UfRound, UfLSBNormSum; // bits needed to determine rounding for underflow flag
logic [12:0] MaxExp; // maximum value of the exponent
logic [12:0] FracLen; // length of the fraction
logic SigNaN; // is an input a signaling NaN
logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency)
logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
///////////////////////////////////////////////////////////////////////////////
// Select input fields
// The following logic duplicates fma1 because it's cheaper to recompute than provide registers
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Select input fields
// The following logic duplicates fma1 because it's cheaper to recompute than provide registers
///////////////////////////////////////////////////////////////////////////////
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlM[2] ? 64'b0 : Z;
// Set addend to zero if FMUL instruction
assign Addend = FOpCtrlM[2] ? 64'b0 : Z;
// split inputs into the sign bit, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction
// split inputs into the sign bit, and exponent to handle single or double precision
// - single precision is in the top half of the inputs
assign XSgn = X[63];
assign YSgn = Y[63];
assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction
assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]};
assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]};
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1];
///////////////////////////////////////////////////////////////////////////////
// Addition
///////////////////////////////////////////////////////////////////////////////
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvZ = ZSgn ^ PSgn;
///////////////////////////////////////////////////////////////////////////////
// Addition
///////////////////////////////////////////////////////////////////////////////
// Negate Z when doing one of the following opperations:
// -prod + Z
// prod - Z
assign InvZ = ZSgn ^ PSgn;
// Choose an inverted or non-inverted addend - the one is added later
assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
// Choose an inverted or non-inverted addend - the one is added later
assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM};
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign ProdMan2 = KillProdM ? 106'b0 : ProdManM;
// Do the addition
// - add one to negate if the added was inverted
// - the 2 extra bits at the begining and end are needed for rounding
assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ};
// Is the sum negitive
assign NegSum = PreSum[162];
// If the sum is negitive, negate the sum.
assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0];
// Do the addition
// - add one to negate if the added was inverted
// - the 2 extra bits at the begining and end are needed for rounding
assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ};
// Is the sum negitive
assign NegSum = PreSum[162];
// If the sum is negitive, negate the sum.
assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0];
///////////////////////////////////////////////////////////////////////////////
// Leading one detector
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Leading one detector
///////////////////////////////////////////////////////////////////////////////
//*** replace with non-behavoral code
logic [8:0] i;
always_comb begin
i = 0;
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
NormCnt = i+1; // compute shift count
end
//*** replace with non-behavoral code
logic [8:0] i;
always_comb begin
i = 0;
while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one
NormCnt = i+1; // compute shift count
end
@ -133,112 +137,127 @@ module fma2(
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
// Determine if the sum is zero
assign SumZero = ~(|Sum);
// Determine if the sum is zero
assign SumZero = ~(|Sum);
// determine the length of the fraction based on precision
assign FracLen = FmtM ? 13'd52 : 13'd23;
// determine the length of the fraction based on precision
assign FracLen = FmtM ? 13'd52 : 13'd23;
// Determine if the result is denormal
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
// Determine if the result is denormal
assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56);
assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
assign SumExpTmpMinus1 = SumExpTmp-1;
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
// Determine the shift needed for denormal results
assign SumExpTmpMinus1 = SumExpTmp-1;
assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0;
// Normalize the sum
assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
assign NormSum = SumShifted[161:108];
// Calculate the sticky bit
assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]);
assign Sticky = AddendStickyM | NormSumSticky;
// Normalize the sum
assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift;
assign NormSum = SumShifted[161:107];
// Calculate the sticky bit
assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]);
assign Sticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent
assign SumExp = SumZero ? 13'b0 :
ResultDenorm ? 13'b0 :
SumExpTmp;
// Determine sum's exponent
assign SumExp = SumZero ? 13'b0 :
ResultDenorm ? 13'b0 :
SumExpTmp;
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
// round to nearest even
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
// determine guard, round, and least significant bit of the result
assign Guard = FmtM ? NormSum[1] : NormSum[30];
assign Round = FmtM ? NormSum[0] : NormSum[29];
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[31];
// determine guard, round, and least significant bit of the result
assign Guard = FmtM ? NormSum[2] : NormSum[31];
assign Round = FmtM ? NormSum[1] : NormSum[30];
assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32];
// Deterimine if a small number was supposed to be subtrated
assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM;
// used to determine underflow flag
assign UfGuard = FmtM ? NormSum[1] : NormSum[30];
assign UfRound = FmtM ? NormSum[0] : NormSum[29];
assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31];
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: CalcPlus1 = Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&LSBNormSum&~SubBySmallNum));//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down
3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up
3'b100: CalcPlus1 = (Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&~SubBySmallNum)));//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// Deterimine if a small number was supposed to be subtrated
assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM;
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | Guard | Round);
assign Minus1 = CalcMinus1 & (Sticky | Guard | Round);
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: CalcPlus1 = Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&LSBNormSum&~SubBySmallNum));//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down
3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up
3'b100: CalcPlus1 = (Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&~SubBySmallNum)));//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
3'b000: UfCalcPlus1 = UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&UfLSBNormSum&~SubBySmallNum));//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round up
3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&~SubBySmallNum)));//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// Compute rounded result
logic [64:0] RoundAdd;
logic [51:0] NormSumTruncated;
assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} :
Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
assign NormSumTruncated = FmtM ? NormSum[53:2] : {NormSum[53:31], 29'b0};
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round);
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard | UfRound);
assign Minus1 = CalcMinus1 & (Sticky | UfGuard | Guard | Round);
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
// Compute rounded result
logic [64:0] RoundAdd;
logic [51:0] NormSumTruncated;
assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} :
Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
assign NormSumTruncated = FmtM ? NormSum[54:3] : {NormSum[54:32], 29'b0};
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
assign ResultExp = FullResultExp[10:0];
@ -247,58 +266,57 @@ module fma2(
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// otherwise psign
assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// otherwise psign
assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn);
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
// Set Invalid flag for following cases:
// 1) Inf - Inf (unless x or y is NaN)
// 2) 0 * Inf
// 3) any input is a signaling NaN
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) :
(XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]);
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed
assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
assign MaxExp = FmtM ? 13'd2047 : 13'd255;
assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) :
(XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]);
assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed
assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Underflow flag if the number is too small to be represented in normal numbers
// - Don't set the underflow flag if the result is exact
assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
//assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1);
assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1);
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed result isn't outputed
assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Underflow flag if the number is too small to be represented in normal numbers
// - Don't set the underflow flag if the result is exact
assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
assign UnderflowFlag = (FullResultExp[12] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed result isn't outputed
assign Inexact = (Sticky|UfGuard|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result was rounded up to a normal number
assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result was rounded up to a normal number
assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
@ -306,31 +324,31 @@ module fma2(
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]};
assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]};
assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]};
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} :
{ResultSgn, 11'h7ff, 52'b0} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} :
{ResultSgn, 8'hff, 55'b0};
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
assign FmaResultM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult : // has to be before inf
XInfM ? {PSgn, X[62:0]} :
YInfM ? {PSgn, Y[62:0]} :
ZInfM ? {ZSgn, Addend[62:0]} :
Overflow ? OverflowResult :
KillProdM ? KillProdResult : // has to be after Underflow
Underflow & ~ResultDenorm ? UnderflowResult :
FmtM ? {ResultSgn, ResultExp, ResultFrac} :
{ResultSgn, ResultExp[7:0], ResultFrac, 3'b0};
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]};
assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]};
assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]};
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} :
{ResultSgn, 11'h7ff, 52'b0} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} :
{ResultSgn, 8'hff, 55'b0};
assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0};
assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0};
assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0};
assign FmaResultM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult : // has to be before inf
XInfM ? {PSgn, X[62:0]} :
YInfM ? {PSgn, Y[62:0]} :
ZInfM ? {ZSgn, Addend[62:0]} :
Overflow ? OverflowResult :
KillProdM ? KillProdResult : // has to be after Underflow
Underflow & ~ResultDenorm ? UnderflowResult :
FmtM ? {ResultSgn, ResultExp, ResultFrac} :
{ResultSgn, ResultExp[7:0], ResultFrac, 3'b0};

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
//
// Written:
// Modified:
// Written: Katherine Parry, Bret Mathis
// Modified: 6/23/2021
//
// Purpose: FPU
//
@ -25,23 +25,22 @@
`include "wally-config.vh"
module fpu (
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic reset,
input logic clk,
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD,
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
input logic [`XLEN-1:0] SrcAE, // Integer input being processed
input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg
input logic StallE, StallM, StallW,
input logic FlushE, FlushM, FlushW,
input logic [`XLEN-1:0] ReadDataW, // Read data from memory
input logic RegWriteD, // register write enable from ieu
output logic [4:0] SetFflagsM, // FPU flags
output logic [1:0] FMemRWM, // Read/write enable for memory {read, write}
output logic FStallD, // Stall the decode stage if Div/Sqrt instruction
output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable
output logic [`XLEN-1:0] FWriteDataM, // Data to be written to memory
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM,
output logic FDivBusyE, // Is the divison/sqrt unit busy
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM, // FPU flags
output logic [`XLEN-1:0] FPUResultW); // FPU result
// control logic signal instantiation
@ -51,24 +50,27 @@ module fpu (
logic FDivStartD, FDivStartE; // Start division
logic FWriteIntD; // Write to integer register
logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction
logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory
logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal
logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal
logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal
logic FInput2UsedD; // Is input 2 used
logic FInput3UsedD; // Is input 3 used
logic [1:0] FMemRWD; // Read and write enable for memory
logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal
logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal
logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal
logic SrcYUsedD; // Is input 2 used
logic SrcZUsedD; // Is input 3 used
logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component
logic SelLoadInputE, SelLoadInputM; // Select which adress to load when single precision
logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM;
logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM;
logic [4:0] Adr1E, Adr2E, Adr3E;
// regfile signals //*** KEP lint warning - changed `XLEN-1 to 63
// regfile signals
logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining
logic [63:0] FWDM; // Write data for FP register
logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [63:0] FInput1E, FInput1M, FInput1W, FInput1tmpE; // Input 1 to the various units (after forwarding)
logic [63:0] FInput2E, FInput2M; // Input 2 to the various units (after forwarding)
logic [63:0] FInput3E, FInput3M; // Input 3 to the various units (after forwarding)
logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] SrcXMAligned;
logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding)
logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding)
logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions
// div/sqrt signals
@ -123,19 +125,14 @@ module fpu (
logic [4:0] FAddFlagsM, FAddFlagsW;
// cmp signals
logic [7:0] WE, WM;
logic [7:0] XE, XM;
logic ANaNE, ANaNM;
logic BNaNE, BNaNM;
logic AzeroE, AzeroM;
logic BzeroE, BzeroM;
logic CmpInvalidM, CmpInvalidW;
logic [1:0] CmpFCCM, CmpFCCW;
logic [63:0] FCmpResultM, FCmpResultW;
logic CmpInvalidE, CmpInvalidM, CmpInvalidW;
logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW;
// fsgn signals
logic [63:0] SgnResultE, SgnResultM, SgnResultW;
logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW;
logic [63:0] FResM, FResW;
logic FFlgM, FFlgW;
// instantiation of W stage regfile signals
logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW;
@ -147,31 +144,9 @@ module fpu (
logic [63:0] FPUResult64W, FPUResult64E;
logic [4:0] FPUFlagsW;
// pipeline control logic
logic PipeEnableDE;
logic PipeEnableEM;
logic PipeEnableMW;
logic PipeClearDE;
logic PipeClearEM;
logic PipeClearMW;
// temporarily assign pipe clear and enable signals
// to never flush & always be running
localparam PipeClear = 1'b0;
localparam PipeEnable = 1'b1;
always_comb begin
PipeEnableDE = ~StallE;
PipeEnableEM = ~StallM;
PipeEnableMW = ~StallW;
PipeClearDE = FlushE;
PipeClearEM = FlushM;
PipeClearMW = FlushW;
end
//DECODE STAGE
// Hazard unit for FPU
fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*);
// top-level controller for FPU
fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*);
@ -185,40 +160,33 @@ module fpu (
//*****************
// fpregfile D/E pipe registers
//*****************
flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E);
flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
//*****************
// other D/E pipe registers
//*****************
flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE);
flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE);
flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE);
flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE);
flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE);
flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE);
flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE);
flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E);
flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E);
flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E);
flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E);
flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE);
flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E);
flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE);
flopenrc #(1) DEReg18(clk, reset, PipeClearDE, PipeEnableDE, InstrD[15], SelLoadInputE);
flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE);
flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE,
{FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD},
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE});
//EXECUTION STAGE
// input muxs for forwarding
mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM);
mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, FForwardInput1E, FInput1tmpE);
mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E);
mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E);
mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E);
// Hazard unit for FPU
fpuhazard hazard(.*);
// forwarding muxs
mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE);
mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE);
mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE);
// first of two-stage instance of floating-point fused multiply-add unit
fma1 fma1 (.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .FOpCtrlE(FOpCtrlE[2:0]),.*);
fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*);
// first and only instance of floating-point divider
logic fpdivClk;
@ -229,193 +197,181 @@ module fpu (
.ECLK(fpdivClk));
// capture the inputs for div/sqrt
flopenrc #(64) reg_input1 (.d(FInput1E), .q(DivInput1E),
flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E),
.en(~HoldInputs), .clear(FDivSqrtDoneE),
.reset(reset), .clk(clk));
flopenrc #(64) reg_input2 (.d(FInput2E), .q(DivInput2E),
flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E),
.en(~HoldInputs), .clear(FDivSqrtDoneE),
.reset(reset), .clk(clk));
fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*);
// first of two-stage instance of floating-point add/cvt unit
fpuaddcvt1 fpadd1 (.*);
// first of two-stage instance of floating-point comparator
fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]);
fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE);
// first and only instance of floating-point sign converter
fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*);
// first and only instance of floating-point classify unit
fpuclassify fpuclass (.*);
// output for store instructions
assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]};
//*****************
//fpregfile D/E pipe registers
//*****************
flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M);
flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M);
flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M);
flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM);
flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM);
flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM);
//*****************
// fma E/M pipe registers
//*****************
flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, ProdExpE, ProdExpM);
flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, AddendStickyE, AddendStickyM);
flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, KillProdE, KillProdM);
flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, XZeroE, XZeroM);
flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, YZeroE, YZeroM);
flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, ZZeroE, ZZeroM);
flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, XInfE, XInfM);
flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, YInfE, YInfM);
flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, ZInfE, ZInfM);
flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, XNaNE, XNaNM);
flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, YNaNE, YNaNM);
flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, ZNaNE, ZNaNM);
flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM);
flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM);
flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM);
flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM);
flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM);
flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM);
flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM);
flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM);
flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM);
flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM);
flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM);
flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM);
flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM);
//*****************
// fpadd E/M pipe registers
//*****************
flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM);
flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM);
flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM);
flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM);
flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM);
flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM);
flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM);
flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM);
flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM);
flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM);
flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM);
flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM);
flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM);
flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM);
flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM);
flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM);
flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM);
flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM);
flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM);
flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM);
flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM);
flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM);
flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM);
flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM);
flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM);
flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM);
flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM);
flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM);
flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM);
flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM);
flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM);
flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM);
flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM);
flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M);
flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M);
flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM);
flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM);
flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM);
flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM);
flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM);
flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM);
flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM);
flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM);
//*****************
// fpcmp E/M pipe registers
//*****************
flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM);
flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM);
flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM);
flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM);
flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM);
flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM);
flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM);
flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM);
// put this in for the event we want to delay fsgn - will otherwise bypass
//*****************
// fpsgn E/M pipe registers
//*****************
flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM);
flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM);
flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM);
flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM);
//*****************
// other E/M pipe registers
//*****************
flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM);
flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM);
flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM);
flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM);
flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM);
flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM);
flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM);
flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM);
flopenrc #(1) EMReg9(clk, reset, PipeClearEM, PipeEnableEM, SelLoadInputE, SelLoadInputM);
flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM,
{FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE},
{FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM});
//*****************
// fpuclassify E/M pipe registers
//*****************
flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM);
flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM);
//BEGIN MEMORY STAGE
assign FWriteDataM = FmtM ? FInput1M[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FInput1M[63:32]};
//adjecent adress values are sent to the FPU, select the correct one
// -imm is 80000 most of the time vs the error one which is 00000
// mux3 #(64) FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM);
// mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
fma2 fma2(.X(FInput1M), .Y(FInput2M), .Z(FInput3M), .FOpCtrlM(FOpCtrlM[2:0]), .*);
mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM);
assign FFlgM = CmpInvalidM & FResSelM[1];
assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]};
mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM);
// second instance of two-stage FMA unit
fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*);
// second instance of two-stage floating-point add/cvt unit
fpuaddcvt2 fpadd2 (.*);
// second instance of two-stage floating-point comparator
fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM),
.Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*);
// Align SrcA to MSB when single precicion
mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM);
//*****************
//fpregfile M/W pipe registers
//*****************
flopenrc #(64) MWFpReg1(clk, reset, PipeClearMW, PipeEnableMW, FInput1M, FInput1W);
flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW);
flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW);
//*****************
// fma M/W pipe registers
//*****************
flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW);
flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW);
flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW);
flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW);
//*****************
// fpdiv M/W pipe registers
//*****************
flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW);
flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW);
flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW);
flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW);
flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW);
//*****************
// fpadd M/W pipe registers
//*****************
flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW);
flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW);
flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW);
//*****************
// fpcmp M/W pipe registers
//*****************
flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW);
flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW);
flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW);
flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW);
// flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW);
flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW);
//*****************
// fpsgn M/W pipe registers
//*****************
flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW);
flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW);
flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW);
flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW);
//*****************
// other M/W pipe registers
//*****************
flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW);
flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW);
flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW);
flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW);
flopenrc #(64) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, AlignedSrcAM, SrcAW);
// flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW);
flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW);
flopenrc #(4) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FOpCtrlM, FOpCtrlW);
flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM},
{FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW});
//*****************
// fpuclassify M/W pipe registers
//*****************
flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW);
flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW);
flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW);
@ -424,14 +380,6 @@ module fpu (
//#########################################
// BEGIN WRITEBACK STAGE
//#########################################
// mux3 #(64) FLoadResultMux({ReadD[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM);
// mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM);
//***RV32D needs to give two bus transactions
mux2 #(64) FLoadResultMux({ReadDataW[31:0], {32{1'b0}}}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, FLoadResultW);
mux2 #(64) FLoadStoreResultMux(FLoadResultW, FInput1W, |FOpCtrlW[2:1], FLoadStoreResultW);
@ -440,47 +388,26 @@ module fpu (
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUFlagsW = FDivFlagsW;
// cmp
3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0};
//fma/mult
3'b010 : FPUFlagsW = FmaFlagsW;
// sgn inj
3'b011 : FPUFlagsW = SgnFlagsW;
// add/sub/cnvt
3'b100 : FPUFlagsW = FAddFlagsW;
// classify
3'b101 : FPUFlagsW = 5'b0;
// output SrcAW
3'b110 : FPUFlagsW = 5'b0;
// output FRD1
3'b111 : FPUFlagsW = 5'b0;
3'b000 : FPUFlagsW = 5'b0;
3'b001 : FPUFlagsW = FmaFlagsW;
3'b010 : FPUFlagsW = FAddFlagsW;
3'b011 : FPUFlagsW = FDivFlagsW;
3'b100 : FPUFlagsW = {4'b0,FFlgW};
default : FPUFlagsW = 5'bxxxxx;
endcase
end
always_comb begin
case (FResultSelW)
// div/sqrt
3'b000 : FPUResult64W = FDivResultW;
// cmp
3'b001 : FPUResult64W = FCmpResultW;
//fma/mult
3'b010 : FPUResult64W = FmaResultW;
// sgn inj
3'b011 : FPUResult64W = SgnResultW;
// add/sub/cnvt
3'b100 : FPUResult64W = FAddResultW;
// classify
3'b101 : FPUResult64W = ClassResultW;
// output SrcAW
3'b110 : FPUResult64W = SrcAW;
// Load/Store/Move to FP-register
3'b111 : FPUResult64W = FLoadStoreResultW;
default : FPUResult64W = {64{1'bx}};
3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0};
3'b001 : FPUResult64W = FmaResultW;
3'b010 : FPUResult64W = FAddResultW;
3'b011 : FPUResult64W = FDivResultW;
3'b100 : FPUResult64W = FResW;
default : FPUResult64W = 64'bxxxxx;
endcase
end // always_comb
end
// interface between XLEN size datapath and double-precision sized
// floating-point results

View File

@ -27,10 +27,10 @@
//
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE);
module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE);
input logic [63:0] FInput1E; // 1st input operand (A)
input logic [63:0] FInput2E; // 2nd input operand (B)
input logic [63:0] SrcXE; // 1st input operand (A)
input logic [63:0] SrcYE; // 2nd input operand (B)
input logic [3:0] FOpCtrlE; // Function opcode
input logic FmtE; // Result Precision (1 for double, 0 for single)
@ -81,12 +81,12 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
// and the sign of the first operand is set appropratiately based on
// if the operation is absolute value or negation.
convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P);
convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P);
// Test for exceptions and return the "Invalid Operation" and
// "Denormalized" Input Flags. The "AddSelInvE" is used in
// the third pipeline stage to select the result. Also, AddOp1NormE
// and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized.
// and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized.
// sub is one if the effective operation is subtaction.
exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub,
@ -159,8 +159,8 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE,
// Place either the sign-extened 32-bit value or the original 64-bit value
// into IntValue (to be used for integer to floating point conversion)
assign IntValue [31:0] = FInput1E[31:0];
assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32];
assign IntValue [31:0] = SrcXE[31:0];
assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32];
// If doing an integer to floating point conversion, mantissaA3 is set to
// IntVal and the prenomalized exponent is set to 1084. Otherwise,

View File

@ -1,7 +1,8 @@
`include "wally-config.vh"
module fpuclassify (
input logic [63:0] FInput1E,
input logic [63:0] SrcXE,
input logic FmtE, // 0-single 1-double
output logic [63:0] ClassResultE
);
@ -13,9 +14,9 @@ module fpuclassify (
logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan;
// single and double precision layouts
assign single = FInput1E[63:32];
assign double = FInput1E;
assign sign = FInput1E[63];
assign single = SrcXE[63:32];
assign double = SrcXE;
assign sign = SrcXE[63];
// basic calculations for readabillity
assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23];
@ -43,10 +44,7 @@ module fpuclassify (
// bit 7 - +infinity
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassResultE = FmtE ? {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity} :
{{22{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity, {32{1'b0}}};
assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal,
~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity};
endmodule

View File

@ -1,3 +1,4 @@
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
@ -17,9 +18,9 @@
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal Sel that indicates the type of
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// Sel Description
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
@ -37,24 +38,41 @@
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec
input logic [63:0] op1;
input logic [63:0] op2;
input logic [1:0] Sel;
`include "wally-config.vh"
module fpucmp1 (
input logic [63:0] op1,
input logic [63:0] op2,
input logic [2:0] FOpCtrlE,
input logic FmtE,
output logic [7:0] w, x;
output logic ANaN, BNaN;
output logic Azero, Bzero;
output logic Invalid, // Invalid Operation
// output logic [1:0] FCC, // Condition Codes
output logic [63:0] FCmpResultE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
logic [1:0] FCC; // Condition Codes
logic [7:0] w, x;
logic ANaN, BNaN;
logic Azero, Bzero;
logic LT; // magnitude op1 < magnitude op2
logic EQ; // magnitude op1 = magnitude op2
magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE);
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
magcompare64b_1 magcomp2 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]});
magcompare64b_2 magcomp2 (LT, EQ, w, x);
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, Sel);
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*);
endmodule // fpcomp
@ -178,9 +196,9 @@ module magcompare64b_1 (w, x, A, B);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// Sel Description
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
@ -196,11 +214,11 @@ endmodule // magcompare64b
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE);
input logic [63:0] A;
input logic [63:0] B;
input logic [1:0] Sel;
input logic [2:0] FOpCtrlE;
logic dp, sp, hp;
@ -209,9 +227,9 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
output logic Azero;
output logic Bzero;
assign dp = !Sel[1]&!Sel[0];
assign sp = !Sel[1]&Sel[0];
assign hp = Sel[1]&!Sel[0];
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
// Test if A or B is NaN.
assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) &
@ -232,3 +250,216 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel);
assign Bzero = (B[62:0] == 63'h0);
endmodule // exception_cmp
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
// project : FPU
// Library : fpcomp
// Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator
// notes :
//
// Copyright Oklahoma State University
//
// Floating Point Comparator (Algorithm)
//
// 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 (unused)
//
// The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison:
//
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
//
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
/*module magcompare2b (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
// Determine if A < B using a minimized sum-of-products expression
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// Determine if A > B using a minimized sum-of-products expression
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
endmodule*/ // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
// module magcompare2c (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// assign LT = B[1] | (!A[1]&B[0]);
// assign GT = A[1] | (!B[1]&A[0]);
// endmodule // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// This structure was modified so
// that it only does a strict magnitdude comparison, and only
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// of 63 2-bit magnitude comparators, followed by one OR gates.
//
// J. E. Stine and M. J. Schulte, "A combined two's complement and
// floating-point comparator," 2005 IEEE International Symposium on
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// doi: 10.1109/ISCAS.2005.1464531
module magcompare64b_2 (LT, EQ, w, x);
input logic [7:0] w;
input logic [7:0] x;
logic [3:0] y;
logic [3:0] z;
logic [1:0] a;
logic [1:0] b;
logic GT;
output logic LT;
output logic EQ;
magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
assign EQ = ~(LT | GT);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of
// operands being compared as indicated below.
// FOpCtrlE Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_2 (
input logic [63:0] A,
input logic [63:0] B,
input logic FmtE,
input logic LT_mag,
input logic EQ_mag,
input logic [2:0] FOpCtrlE,
output logic invalid,
output logic [1:0] fcc,
output logic [63:0] FCmpResultE,
input logic Azero,
input logic Bzero,
input logic ANaN,
input logic BNaN);
logic dp;
logic sp;
logic hp;
logic ASNaN;
logic BSNaN;
logic UO;
logic GT;
logic LT;
logic EQ;
logic [62:0] sixtythreezeros = 63'h0;
assign dp = !FOpCtrlE[1]&!FOpCtrlE[0];
assign sp = !FOpCtrlE[1]&FOpCtrlE[0];
assign hp = FOpCtrlE[1]&!FOpCtrlE[0];
// Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// point comparison is being performed.
assign UO = (ANaN | BNaN);
// Test if A or B is a signaling NaN.
assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// If either A or B is a signaling NaN the "Invalid Operation"
// exception flag is set to one; otherwise it is zero.
assign invalid = (ASNaN | BSNaN);
// A and B are equal if (their magnitudes are equal) AND ((their signs are
// equal) or (their magnitudes are zero AND they are floating point
// numbers)). Also, A and B are not equal if they are unordered.
assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// A is less than B if (A is negative and B is posiive) OR
// (A and B are positive and the magnitude of A is less than
// the magnitude of B) or (A and B are negative integers and
// the magnitude of A is less than the magnitude of B) or
// (A and B are negative floating point numbers and
// the magnitude of A is greater than the magnitude of B).
// Also, A is not less than B if A and B are equal or unordered.
assign LT = ((~LT_mag & A[63] & B[63]) |
(LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
// A is greater than B when LT, EQ, and UO are are false.
assign GT = ~(LT | EQ | UO);
// Note: it may be possible to optimize the setting of fcc
// a little more, but it is probably not worth the effort.
// Set the bits of fcc based on LT, GT, EQ, and UO
assign fcc[0] = LT | UO;
assign fcc[1] = GT | UO;
always_comb begin
case (FOpCtrlE[2:0])
3'b111: FCmpResultE = LT ? A : B;//min
3'b101: FCmpResultE = GT ? A : B;//max
3'b010: FCmpResultE = {63'b0, EQ};//equal
3'b001: FCmpResultE = {63'b0, LT};//less than
3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal
default: FCmpResultE = 64'b0;
endcase
end
endmodule // exception_cmp

View File

@ -1,243 +1,243 @@
//
// File name : fpcomp.v
// Title : Floating-Point Comparator
// project : FPU
// Library : fpcomp
// Author(s) : James E. Stine
// Purpose : definition of main unit to floating-point comparator
// notes :
//
// Copyright Oklahoma State University
//
// Floating Point Comparator (Algorithm)
//
// 1.) Performs sign-extension if the inputs are 32-bit integers.
// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// 3.) Check for special cases (+0=-0, unordered, and infinite values)
// and correct for sign bits
//
// This module takes 64-bits inputs op1 and op2, VSS, and VDD
// signals, and a 2-bit signal Sel that indicates the type of
// operands being compared as indicated below.
// Sel Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 (unused)
//
// The comparator produces a 2-bit signal FCC, which
// indicates the result of the comparison:
//
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
//
// It also produces an invalid operation flag, which is one
// if either of the input operands is a signaling NaN per 754
// //
// // File name : fpcomp.v
// // Title : Floating-Point Comparator
// // project : FPU
// // Library : fpcomp
// // Author(s) : James E. Stine
// // Purpose : definition of main unit to floating-point comparator
// // notes :
// //
// // Copyright Oklahoma State University
// //
// // Floating Point Comparator (Algorithm)
// //
// // 1.) Performs sign-extension if the inputs are 32-bit integers.
// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs
// // 3.) Check for special cases (+0=-0, unordered, and infinite values)
// // and correct for sign bits
// //
// // This module takes 64-bits inputs op1 and op2, VSS, and VDD
// // signals, and a 2-bit signal Sel that indicates the type of
// // operands being compared as indicated below.
// // Sel Description
// // 00 double precision numbers
// // 01 single precision numbers
// // 10 half precision numbers
// // 11 (unused)
// //
// // The comparator produces a 2-bit signal FCC, which
// // indicates the result of the comparison:
// //
// // fcc decscription
// // 00 A = B
// // 01 A < B
// // 10 A > B
// // 11 A and B are unordered (i.e., A or B is NaN)
// //
// // It also produces an invalid operation flag, which is one
// // if either of the input operands is a signaling NaN per 754
module fpucmp2 (
input logic [63:0] op1,
input logic [63:0] op2,
input logic [1:0] Sel,
input logic [7:0] w, x,
input logic ANaN, BNaN,
input logic Azero, Bzero,
input logic [3:0] FOpCtrlM,
input logic FmtM,
// module fpucmp2 (
// input logic [63:0] op1,
// input logic [63:0] op2,
// input logic [1:0] Sel,
// input logic [7:0] w, x,
// input logic ANaN, BNaN,
// input logic Azero, Bzero,
// input logic [3:0] FOpCtrlM,
// input logic FmtM,
output logic Invalid, // Invalid Operation
output logic [1:0] FCC, // Condition Codes
output logic [63:0] FCmpResultM);
// output logic Invalid, // Invalid Operation
// output logic [1:0] FCC, // Condition Codes
// output logic [63:0] FCmpResultM);
logic LT; // magnitude op1 < magnitude op2
logic EQ; // magnitude op1 = magnitude op2
// logic LT; // magnitude op1 < magnitude op2
// logic EQ; // magnitude op1 = magnitude op2
// Perform magnitude comparison between the 63 least signficant bits
// of the input operands. Only LT and EQ are returned, since GT can
// be determined from these values.
magcompare64b_2 magcomp2 (LT, EQ, w, x);
// // Perform magnitude comparison between the 63 least signficant bits
// // of the input operands. Only LT and EQ are returned, since GT can
// // be determined from these values.
// magcompare64b_2 magcomp2 (LT, EQ, w, x);
// Determine final values based on output of magnitude comparison,
// sign bits, and special case testing.
exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*);
// // Determine final values based on output of magnitude comparison,
// // sign bits, and special case testing.
// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*);
endmodule // fpcomp
// endmodule // fpcomp
/*module magcompare2b (LT, GT, A, B);
input logic [1:0] A;
input logic [1:0] B;
output logic LT;
output logic GT;
// Determine if A < B using a minimized sum-of-products expression
assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// Determine if A > B using a minimized sum-of-products expression
assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
endmodule*/ // magcompare2b
// 2-bit magnitude comparator
// This module compares two 2-bit values A and B. LT is '1' if A < B
// and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// this version actually incorporates don't cares into the equation to
// simplify the optimization
// module magcompare2c (LT, GT, A, B);
// /*module magcompare2b (LT, GT, A, B);
// input logic [1:0] A;
// input logic [1:0] B;
// output logic LT;
// output logic GT;
// output logic LT;
// output logic GT;
// assign LT = B[1] | (!A[1]&B[0]);
// assign GT = A[1] | (!B[1]&A[0]);
// // Determine if A < B using a minimized sum-of-products expression
// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0];
// // Determine if A > B using a minimized sum-of-products expression
// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0];
// endmodule // magcompare2b
// endmodule*/ // magcompare2b
// This module compares two 64-bit values A and B. LT is '1' if A < B
// and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// This structure was modified so
// that it only does a strict magnitdude comparison, and only
// returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// of 63 2-bit magnitude comparators, followed by one OR gates.
//
// J. E. Stine and M. J. Schulte, "A combined two's complement and
// floating-point comparator," 2005 IEEE International Symposium on
// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// doi: 10.1109/ISCAS.2005.1464531
// // 2-bit magnitude comparator
// // This module compares two 2-bit values A and B. LT is '1' if A < B
// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However,
// // this version actually incorporates don't cares into the equation to
// // simplify the optimization
module magcompare64b_2 (LT, EQ, w, x);
// // module magcompare2c (LT, GT, A, B);
input logic [7:0] w;
input logic [7:0] x;
logic [3:0] y;
logic [3:0] z;
logic [1:0] a;
logic [1:0] b;
logic GT;
// // input logic [1:0] A;
// // input logic [1:0] B;
output logic LT;
output logic EQ;
// // output logic LT;
// // output logic GT;
// // assign LT = B[1] | (!A[1]&B[0]);
// // assign GT = A[1] | (!B[1]&A[0]);
// // endmodule // magcompare2b
// // This module compares two 64-bit values A and B. LT is '1' if A < B
// // and EQ is '1'if A = B. LT and GT are both '0' if A > B.
// // This structure was modified so
// // that it only does a strict magnitdude comparison, and only
// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree
// // of 63 2-bit magnitude comparators, followed by one OR gates.
// //
// // J. E. Stine and M. J. Schulte, "A combined two's complement and
// // floating-point comparator," 2005 IEEE International Symposium on
// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1.
// // doi: 10.1109/ISCAS.2005.1464531
// module magcompare64b_2 (LT, EQ, w, x);
// input logic [7:0] w;
// input logic [7:0] x;
// logic [3:0] y;
// logic [3:0] z;
// logic [1:0] a;
// logic [1:0] b;
// logic GT;
magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
// output logic LT;
// output logic EQ;
magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]);
// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]);
// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]);
// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]);
magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
assign EQ = ~(LT | GT);
endmodule // magcompare64b
// This module takes 64-bits inputs A and B, two magnitude comparison
// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
// operands being compared as indicated below.
// Sel Description
// 00 double precision numbers
// 01 single precision numbers
// 10 half precision numbers
// 11 bfloat precision numbers
//
// The comparator produces a 2-bit signal fcc, which
// indicates the result of the comparison as follows:
// fcc decscription
// 00 A = B
// 01 A < B
// 10 A > B
// 11 A and B are unordered (i.e., A or B is NaN)
// It also produces a invalid operation flag, which is one
// if either of the input operands is a signaling NaN.
module exception_cmp_2 (
input logic [63:0] A,
input logic [63:0] B,
input logic FmtM,
input logic LT_mag,
input logic EQ_mag,
input logic [1:0] Sel,
input logic [3:0] FOpCtrlM,
// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]);
// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]);
output logic invalid,
output logic [1:0] fcc,
output logic [63:0] FCmpResultM,
// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]);
input logic Azero,
input logic Bzero,
input logic ANaN,
input logic BNaN);
// assign EQ = ~(LT | GT);
// endmodule // magcompare64b
// // This module takes 64-bits inputs A and B, two magnitude comparison
// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of
// // operands being compared as indicated below.
// // Sel Description
// // 00 double precision numbers
// // 01 single precision numbers
// // 10 half precision numbers
// // 11 bfloat precision numbers
// //
// // The comparator produces a 2-bit signal fcc, which
// // indicates the result of the comparison as follows:
// // fcc decscription
// // 00 A = B
// // 01 A < B
// // 10 A > B
// // 11 A and B are unordered (i.e., A or B is NaN)
// // It also produces a invalid operation flag, which is one
// // if either of the input operands is a signaling NaN.
// module exception_cmp_2 (
// input logic [63:0] A,
// input logic [63:0] B,
// input logic FmtM,
// input logic LT_mag,
// input logic EQ_mag,
// input logic [1:0] Sel,
// input logic [3:0] FOpCtrlM,
logic dp;
logic sp;
logic hp;
logic ASNaN;
logic BSNaN;
logic UO;
logic GT;
logic LT;
logic EQ;
logic [62:0] sixtythreezeros = 63'h0;
// output logic invalid,
// output logic [1:0] fcc,
// output logic [63:0] FCmpResultM,
assign dp = !Sel[1]&!Sel[0];
assign sp = !Sel[1]&Sel[0];
assign hp = Sel[1]&!Sel[0];
// Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// point comparison is being performed.
assign UO = (ANaN | BNaN);
// Test if A or B is a signaling NaN.
assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// If either A or B is a signaling NaN the "Invalid Operation"
// exception flag is set to one; otherwise it is zero.
assign invalid = (ASNaN | BSNaN);
// A and B are equal if (their magnitudes are equal) AND ((their signs are
// equal) or (their magnitudes are zero AND they are floating point
// numbers)). Also, A and B are not equal if they are unordered.
assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// input logic Azero,
// input logic Bzero,
// input logic ANaN,
// input logic BNaN);
// A is less than B if (A is negative and B is posiive) OR
// (A and B are positive and the magnitude of A is less than
// the magnitude of B) or (A and B are negative integers and
// the magnitude of A is less than the magnitude of B) or
// (A and B are negative floating point numbers and
// the magnitude of A is greater than the magnitude of B).
// Also, A is not less than B if A and B are equal or unordered.
assign LT = ((~LT_mag & A[63] & B[63]) |
(LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
// logic dp;
// logic sp;
// logic hp;
// logic ASNaN;
// logic BSNaN;
// logic UO;
// logic GT;
// logic LT;
// logic EQ;
// logic [62:0] sixtythreezeros = 63'h0;
// assign dp = !Sel[1]&!Sel[0];
// assign sp = !Sel[1]&Sel[0];
// assign hp = Sel[1]&!Sel[0];
// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating
// // point comparison is being performed.
// assign UO = (ANaN | BNaN);
// // Test if A or B is a signaling NaN.
// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]);
// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]);
// // If either A or B is a signaling NaN the "Invalid Operation"
// // exception flag is set to one; otherwise it is zero.
// assign invalid = (ASNaN | BSNaN);
// // A and B are equal if (their magnitudes are equal) AND ((their signs are
// // equal) or (their magnitudes are zero AND they are floating point
// // numbers)). Also, A and B are not equal if they are unordered.
// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO);
// A is greater than B when LT, EQ, and UO are are false.
assign GT = ~(LT | EQ | UO);
// // A is less than B if (A is negative and B is posiive) OR
// // (A and B are positive and the magnitude of A is less than
// // the magnitude of B) or (A and B are negative integers and
// // the magnitude of A is less than the magnitude of B) or
// // (A and B are negative floating point numbers and
// // the magnitude of A is greater than the magnitude of B).
// // Also, A is not less than B if A and B are equal or unordered.
// assign LT = ((~LT_mag & A[63] & B[63]) |
// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO;
// // A is greater than B when LT, EQ, and UO are are false.
// assign GT = ~(LT | EQ | UO);
// Note: it may be possible to optimize the setting of fcc
// a little more, but it is probably not worth the effort.
// // Note: it may be possible to optimize the setting of fcc
// // a little more, but it is probably not worth the effort.
// Set the bits of fcc based on LT, GT, EQ, and UO
assign fcc[0] = LT | UO;
assign fcc[1] = GT | UO;
// // Set the bits of fcc based on LT, GT, EQ, and UO
// assign fcc[0] = LT | UO;
// assign fcc[1] = GT | UO;
always_comb begin
case (FOpCtrlM[2:0])
3'b111: FCmpResultM = LT ? A : B;//min
3'b101: FCmpResultM = GT ? A : B;//max
3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal
3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than
3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal
default: FCmpResultM = 64'b0;
endcase
end
// always_comb begin
// case (FOpCtrlM[2:0])
// 3'b111: FCmpResultM = LT ? A : B;//min
// 3'b101: FCmpResultM = GT ? A : B;//max
// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal
// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than
// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal
// default: FCmpResultM = 64'b0;
// endcase
// end
endmodule // exception_cmp
// endmodule // exception_cmp

View File

@ -26,47 +26,41 @@
`include "wally-config.vh"
module fpuhazard(
input logic [4:0] Adr1, Adr2, Adr3,
input logic FWriteEnE, FWriteEnM, FWriteEnW,
input logic [4:0] RdE, RdM, RdW,
input logic FDivBusyE,
input logic RegWriteD,
input logic [2:0] FResultSelD, FResultSelE,
input logic IllegalFPUInstrD,
input logic FInput2UsedD, FInput3UsedD,
// Stall outputs
output logic FStallD,
output logic [1:0] FForwardInput1D, FForwardInput2D,
output logic FForwardInput3D
input logic [4:0] Adr1E, Adr2E, Adr3E,
input logic FWriteEnM, FWriteEnW,
input logic [4:0] RdM, RdW,
input logic [2:0] FResultSelM,
output logic FStallD,
output logic [1:0] ForwardXE, ForwardYE, ForwardZE
);
always_comb begin
// set ReadData as default
FForwardInput1D = 2'b00;
FForwardInput2D = 2'b00;
FForwardInput3D = 1'b0;
FStallD = FDivBusyE;
if (~IllegalFPUInstrD) begin
// if taking a value from int register
if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD)))
if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM
else FStallD = 1'b1; // otherwise stall
else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW
else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE
ForwardXE = 2'b00; // choose FRD1E
ForwardYE = 2'b00; // choose FRD2E
ForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
if ((Adr1E == RdM) & FWriteEnM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W
if(FInput2UsedD)
if ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1;
else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW
else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE
if ((Adr2E == RdM) & FWriteEnM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W
if(FInput3UsedD)
if ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1;
else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1;
else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE
end
if ((Adr3E == RdM) & FWriteEnM)
// if the result will be FResM
if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM
else FStallD = 1; // if the result won't be ready stall
else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W
end

View File

@ -1,8 +1,8 @@
//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E);
module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE);
input [63:0] FInput1E, FInput2E;
input [63:0] SrcXE, SrcYE;
input [1:0] SgnOpCodeE;
output [63:0] SgnResultE;
output [4:0] SgnFlagsE;
@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E);
//op code designation:
//
//00 - fsgnj - directly copy over sign value of FInput2E
//01 - fsgnjn - negate sign value of FInput2E
//10 - fsgnjx - XOR sign values of FInput1E & FInput2E
//00 - fsgnj - directly copy over sign value of SrcYE
//01 - fsgnjn - negate sign value of SrcYE
//10 - fsgnjx - XOR sign values of SrcXE & SrcYE
//
assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]);
assign SgnResultE[62:0] = FInput1E[62:0];
assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]);
assign SgnResultE[62:0] = SrcXE[62:0];
//If the exponent is all ones, then the value is either Inf or NaN,
//both of which will produce a QNaN/SNaN value of some sort. This will
//set the invalid flag high.
assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52];
assign AonesExp = SrcXE[62]&SrcXE[61]&SrcXE[60]&SrcXE[59]&SrcXE[58]&SrcXE[57]&SrcXE[56]&SrcXE[55]&SrcXE[54]&SrcXE[53]&SrcXE[52];
//the only flag that can occur during this operation is invalid
//due to changing sign on already existing NaN

View File

@ -1,195 +0,0 @@
///////////////////////////////////////////
// lzd.sv
//
// Written: James.Stine@okstate.edu 1 February 2021
// Modified:
//
// Purpose: Integer Divide instructions
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
`include "wally-config.vh"
/* verilator lint_off DECLFILENAME */
// Original idea came from V. G. Oklobdzija, "An algorithmic and novel
// design of a leading zero detector circuit: comparison with logic
// synthesis," in IEEE Transactions on Very Large Scale Integration
// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi:
// 10.1109/92.273153.
// Modified to be more hierarchical
module lz2 (P, V, B);
input logic [1:0] B;
output logic P;
output logic V;
assign V = B[0] | B[1];
assign P = B[0] & ~B[1];
endmodule // lz2
module lzd_hier #(parameter WIDTH=8)
(input logic [WIDTH-1:0] B,
output logic [$clog2(WIDTH)-1:0] ZP,
output logic ZV);
if (WIDTH == 128)
lz128 lzd127 (ZP, ZV, B);
else if (WIDTH == 64)
lz64 lzd64 (ZP, ZV, B);
else if (WIDTH == 32)
lz32 lzd32 (ZP, ZV, B);
else if (WIDTH == 16)
lz16 lzd16 (ZP, ZV, B);
else if (WIDTH == 8)
lz8 lzd8 (ZP, ZV, B);
else if (WIDTH == 4)
lz4 lzd4 (ZP, ZV, B);
endmodule // lzd_hier
module lz4 (ZP, ZV, B);
input logic [3:0] B;
logic ZPa;
logic ZPb;
logic ZVa;
logic ZVb;
output logic [1:0] ZP;
output logic ZV;
lz2 l1(ZPa, ZVa, B[1:0]);
lz2 l2(ZPb, ZVb, B[3:2]);
assign ZP[0:0] = ZVb ? ZPb : ZPa;
assign ZP[1] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz8 (ZP, ZV, B);
input logic [7:0] B;
logic [1:0] ZPa;
logic [1:0] ZPb;
logic ZVa;
logic ZVb;
output logic [2:0] ZP;
output logic ZV;
lz4 l1(ZPa, ZVa, B[3:0]);
lz4 l2(ZPb, ZVb, B[7:4]);
assign ZP[1:0] = ZVb ? ZPb : ZPa;
assign ZP[2] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule
module lz16 (ZP, ZV, B);
input logic [15:0] B;
logic [2:0] ZPa;
logic [2:0] ZPb;
logic ZVa;
logic ZVb;
output logic [3:0] ZP;
output logic ZV;
lz8 l1(ZPa, ZVa, B[7:0]);
lz8 l2(ZPb, ZVb, B[15:8]);
assign ZP[2:0] = ZVb ? ZPb : ZPa;
assign ZP[3] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz16
module lz32 (ZP, ZV, B);
input logic [31:0] B;
logic [3:0] ZPa;
logic [3:0] ZPb;
logic ZVa;
logic ZVb;
output logic [4:0] ZP;
output logic ZV;
lz16 l1(ZPa, ZVa, B[15:0]);
lz16 l2(ZPb, ZVb, B[31:16]);
assign ZP[3:0] = ZVb ? ZPb : ZPa;
assign ZP[4] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz32
module lz64 (ZP, ZV, B);
input logic [63:0] B;
logic [4:0] ZPa;
logic [4:0] ZPb;
logic ZVa;
logic ZVb;
output logic [5:0] ZP;
output logic ZV;
lz32 l1(ZPa, ZVa, B[31:0]);
lz32 l2(ZPb, ZVb, B[63:32]);
assign ZP[4:0] = ZVb ? ZPb : ZPa;
assign ZP[5] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz64
module lz128 (ZP, ZV, B);
input logic [127:0] B;
logic [5:0] ZPa;
logic [5:0] ZPb;
logic ZVa;
logic ZVb;
output logic [6:0] ZP;
output logic ZV;
lz64 l1(ZPa, ZVa, B[64:0]);
lz64 l2(ZPb, ZVb, B[127:63]);
assign ZP[5:0] = ZVb ? ZPb : ZPa;
assign ZP[6] = ~ZVb;
assign ZV = ZVa | ZVb;
endmodule // lz128
/* verilator lint_on DECLFILENAME */

View File

@ -32,7 +32,7 @@ module hazard(
input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM,
input logic LoadStallD, MulDivStallD, CSRRdStallD,
input logic DCacheStall, ICacheStallF,
input logic FPUStallD,
input logic FPUStallD, FStallD,
input logic DivBusyE,FDivBusyE,
// Stall & flush outputs
output logic StallF, StallD, StallE, StallM, StallW,
@ -56,7 +56,7 @@ module hazard(
// If any stages are stalled, the first stage that isn't stalled must flush.
assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE);
assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous
assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous
assign StallECause = DivBusyE | FDivBusyE;
assign StallMCause = 0;
assign StallWCause = DCacheStall | ICacheStallF;

View File

@ -45,11 +45,13 @@ module controller(
output logic MemReadE, CSRReadE, // for Hazard Unit
output logic [2:0] Funct3E,
output logic MulDivE, W64E,
output logic JumpE,
output logic JumpE,
output logic [1:0] MemRWE,
// Memory stage control signals
input logic StallM, FlushM,
output logic [1:0] MemRWM,
output logic CSRReadM, CSRWriteM, PrivilegedM,
output logic CSRReadM, CSRWriteM, PrivilegedM,
output logic SCE,
output logic [1:0] AtomicM,
output logic [2:0] Funct3M,
output logic RegWriteM, // for Hazard Unit
@ -73,7 +75,7 @@ module controller(
// pipelined control signals
logic RegWriteE;
logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM;
logic [1:0] MemRWD, MemRWE;
logic [1:0] MemRWD;
logic JumpD;
logic BranchD, BranchE;
logic [1:0] ALUOpD;
@ -140,6 +142,7 @@ module controller(
ControlsD = `CTRLW'b1_000_00_00_011_0_00_0_0_1_0_0_1_00_0; // W-type Multiply/Divide
else
ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction
//7'b1010011: ControlsD = `CTRLW'b0_000_00_00_101_0_00_0_0_0_0_0_0_00_1; // FP
7'b1100011: ControlsD = `CTRLW'b0_010_00_00_000_1_01_0_0_0_0_0_0_00_0; // beq
7'b1100111: ControlsD = `CTRLW'b1_000_00_00_000_0_00_1_1_0_0_0_0_00_0; // jalr
7'b1101111: ControlsD = `CTRLW'b1_011_00_00_000_0_00_1_0_0_0_0_0_00_0; // jal
@ -202,7 +205,8 @@ module controller(
assign PCSrcE = JumpE | BranchE & BranchTakenE;
assign MemReadE = MemRWE[1];
assign MemReadE = MemRWE[1];
assign SCE = (ResultSrcE == 3'b100);
// Memory stage pipeline control register
flopenrc #(15) controlregM(clk, reset, FlushM, ~StallM,

View File

@ -37,6 +37,9 @@ module datapath (
input logic ALUSrcAE, ALUSrcBE,
input logic TargetSrcE,
input logic JumpE,
input logic IllegalFPUInstrE,
input logic [1:0] MemRWE,
input logic [`XLEN-1:0] FWriteDataE,
input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE,
output logic [2:0] FlagsE,
@ -44,13 +47,13 @@ module datapath (
output logic [`XLEN-1:0] SrcAE, SrcBE,
// Memory stage signals
input logic StallM, FlushM,
input logic [`XLEN-1:0] FWriteDataM,
input logic FWriteIntM,
input logic [`XLEN-1:0] FIntResM,
output logic [`XLEN-1:0] SrcAM,
output logic [`XLEN-1:0] WriteDataM, MemAdrM,
// Writeback stage signals
input logic StallW, FlushW,
input logic FWriteIntW,
input logic [`XLEN-1:0] FPUResultW,
input logic RegWriteW,
input logic SquashSCW,
input logic [2:0] ResultSrcW,
@ -70,13 +73,14 @@ module datapath (
logic [`XLEN-1:0] RD1E, RD2E;
logic [`XLEN-1:0] ExtImmE;
logic [`XLEN-1:0] PreSrcAE, SrcAE2, SrcBE2;
logic [`XLEN-1:0] PreSrcAE, PreSrcBE, SrcAE2, SrcBE2;
logic [`XLEN-1:0] ALUResultE;
logic [`XLEN-1:0] WriteDataE;
logic [`XLEN-1:0] TargetBaseE;
// Memory stage signals
logic [`XLEN-1:0] ALUResultM;
logic [`XLEN-1:0] ResultM;
// Writeback stage signals
logic [`XLEN-1:0] SCResultW;
logic [`XLEN-1:0] ALUResultW;
@ -88,8 +92,7 @@ module datapath (
assign Rs2D = InstrD[24:20];
assign RdD = InstrD[11:7];
//Mux for writting floating point
mux2 #(`XLEN) writedatamux(ResultW, FPUResultW, FWriteIntW, WriteDataW);
//Mux for writting floating point
regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D);
extend ext(.InstrD(InstrD[31:7]), .*);
@ -102,11 +105,12 @@ module datapath (
flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E);
flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE);
mux4 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, FWriteDataM, ForwardAE, PreSrcAE);
mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, FWriteDataM, ForwardBE, WriteDataE);
mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, PreSrcAE);
mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, PreSrcBE);
mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE);
mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2);
mux2 #(`XLEN) srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE);
mux2 #(`XLEN) srcbmux(PreSrcBE, ExtImmE, ALUSrcBE, SrcBE);
mux2 #(`XLEN) srcbmux2(SrcBE, {`XLEN{1'b0}}, JumpE, SrcBE2); // *** May be able to remove this mux.
alu #(`XLEN) alu(SrcAE2, SrcBE2, ALUControlE, ALUResultE, FlagsE);
mux2 #(`XLEN) targetsrcmux(PCE, SrcAE, TargetSrcE, TargetBaseE);
@ -117,10 +121,11 @@ module datapath (
flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM);
assign MemAdrM = ALUResultM;
flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM);
flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM);
flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM);
mux2 #(`XLEN) resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM);
// Writeback stage pipeline register and logic
flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW);
flopenrc #(`XLEN) ResultWReg(clk, reset, FlushW, ~StallW, ResultM, ResultW);
flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW);
// handle Store Conditional result if atomic extension supported
@ -131,11 +136,11 @@ module datapath (
assign SCResultW = 0;
endgenerate
mux5 #(`XLEN) resultmux(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW);
mux5 #(`XLEN) resultmuxW(ResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, WriteDataW);
/* -----\/----- EXCLUDED -----\/-----
// This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage.
// *** need to look at how the decoder is coded to fix.
mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, ResultW);
mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, WriteDataW);
>>>>>>> bp
-----/\----- EXCLUDED -----/\----- */

View File

@ -28,32 +28,31 @@
module forward(
// Detect hazards
input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW,
input logic MemReadE, MulDivE, CSRReadE,
input logic RegWriteM, RegWriteW,
input logic DivDoneE, DivBusyE,
input logic FWriteIntE, FWriteIntM, FWriteIntW,
input logic MemReadE, MulDivE, CSRReadE,
input logic RegWriteM, RegWriteW,
input logic DivDoneE, DivBusyE,
input logic FWriteIntE, FWriteIntM, FWriteIntW,
input logic SCE,
// Forwarding controls
output logic [1:0] ForwardAE, ForwardBE,
output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD
output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD
);
always_comb begin
ForwardAE = 2'b00;
ForwardBE = 2'b00;
if (Rs1E != 5'b0)
if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10;
if ((Rs1E == RdM) & (RegWriteM|FWriteIntM)) ForwardAE = 2'b10;
else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01;
else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11;
if (Rs2E != 5'b0)
if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10;
if ((Rs2E == RdM) & (RegWriteM|FWriteIntM)) ForwardBE = 2'b10;
else if ((Rs2E == RdW) & (RegWriteW|FWriteIntW)) ForwardBE = 2'b01;
else if ((Rs2E == RdM) & FWriteIntM) ForwardBE = 2'b11;
end
// Stall on dependent operations that finish in Mem Stage and can't bypass in time
assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE));
assign LoadStallD = MemReadE & ((Rs1D == RdE) | (Rs2D == RdE));
assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE));
assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide
assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE));

View File

@ -31,33 +31,34 @@ module ieu (
input logic [31:0] InstrD,
input logic IllegalIEUInstrFaultD,
output logic IllegalBaseInstrFaultD,
output logic RegWriteD,
output logic RegWriteD,
// Execute Stage interface
input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE,
input logic FWriteIntE,
input logic IllegalFPUInstrE,
input logic [`XLEN-1:0] FWriteDataE,
output logic [`XLEN-1:0] PCTargetE,
output logic MulDivE, W64E,
output logic [2:0] Funct3E,
output logic [`XLEN-1:0] SrcAE, SrcBE,
input logic FWriteIntM,
// Memory stage interface
input logic DataMisalignedM, // from LSU
input logic SquashSCW, // from LSU
output logic [1:0] MemRWM, // read/write control goes to LSU
output logic [1:0] AtomicM, // atomic control goes to LSU
output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU
input logic DataMisalignedM, // from LSU
input logic SquashSCW, // from LSU
output logic [1:0] MemRWM, // read/write control goes to LSU
output logic [1:0] AtomicM, // atomic control goes to LSU
output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU
output logic [2:0] Funct3M, // size and signedness to LSU
input logic FWriteIntM, // from FPU
input logic [`XLEN-1:0] FWriteDataM, // from FPU
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
output logic [2:0] Funct3M, // size and signedness to LSU
output logic [`XLEN-1:0] SrcAM, // to privilege and fpu
input logic DataAccessFaultM,
input logic [`XLEN-1:0] FIntResM,
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW,
input logic FWriteIntW,
input logic [`XLEN-1:0] FPUResultW,
input logic FWriteIntW,
// input logic [`XLEN-1:0] PCLinkW,
output logic InstrValidM, InstrValidW,
// hazards
@ -76,7 +77,8 @@ module ieu (
logic [4:0] ALUControlE;
logic ALUSrcAE, ALUSrcBE;
logic [2:0] ResultSrcW;
logic TargetSrcE;
logic TargetSrcE;
logic SCE;
// forwarding signals
logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW;
@ -84,6 +86,7 @@ module ieu (
logic RegWriteM, RegWriteW;
logic MemReadE, CSRReadE;
logic JumpE;
logic [1:0] MemRWE;
controller c(.*);
datapath dp(.*);

View File

@ -87,21 +87,23 @@ module wallypipelinedhart
logic PCSrcE;
logic CSRWritePendingDEM;
logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD;
logic DivDoneE;
logic DivBusyE;
logic DivDoneW;
logic [4:0] SetFflagsM;
logic [2:0] FRM_REGW;
logic FloatRegWriteW;
logic [1:0] FMemRWM;
logic RegWriteD;
logic [`XLEN-1:0] FWriteDataM;
logic SquashSCW;
logic LoadStallD, MulDivStallD, CSRRdStallD;
logic SquashSCM, SquashSCW;
// floating point unit signals
logic [2:0] FRM_REGW;
logic [1:0] FMemRWM, FMemRWE;
logic FStallD;
logic FWriteIntE, FWriteIntW, FWriteIntM;
logic FWriteIntE, FWriteIntM, FWriteIntW;
logic [`XLEN-1:0] FWriteDataE;
logic [`XLEN-1:0] FIntResM;
logic FDivBusyE;
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FloatRegWriteW;
logic FPUStallD;
logic [4:0] SetFflagsM;
logic [`XLEN-1:0] FPUResultW;
// memory management unit signals
@ -185,20 +187,10 @@ module wallypipelinedhart
ieu ieu(.*); // integer execution unit: integer register file, datapath and controller
mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM);
// mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM);
pagetablewalker pagetablewalker(.HPTWRead(HPTWRead),
.*); // can send addresses to ahblite, send out pagetablestall
// *** can connect to hazard unit
// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed.
// Would need to insertinstruction as InstrD, not InstrF
/*ahblite ebu(
.InstrReadF(1'b0),
.InstrRData(), // hook up InstrF later
.MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]),
.*); */
// arbiter between IEU and pagetablewalker
lsuArb arbiter(// HPTW connection
.HPTWTranslate(MMUTranslate),
@ -208,12 +200,12 @@ module wallypipelinedhart
.HPTWReady(MMUReady),
.HPTWStall(HPTWStall),
// CPU connection
.MemRWM(MemRWM|FMemRWM),
.MemRWM(MemRWM),
.Funct3M(Funct3M),
.AtomicM(AtomicM),
.MemAdrM(MemAdrM),
.StallW(StallW),
.WriteDataM(WriteDatatmpM),
.WriteDataM(WriteDataM),
.ReadDataW(ReadDataW),
.CommittedM(CommittedM),
.SquashSCW(SquashSCW),
@ -259,7 +251,8 @@ module wallypipelinedhart
ahblite ebu(
//.InstrReadF(1'b0),
//.InstrRData(InstrF), // hook up InstrF later
.WriteDataM(WriteDatatmpM),
.ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking
.WriteDataM(WriteDataM),
.MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]),
.Funct7M(InstrM[31:25]),
.HRDATAW(HRDATAW),

View File

@ -539,8 +539,8 @@ string tests32f[] = '{
if (`M_SUPPORTED) tests = {tests, tests64m};
if (`A_SUPPORTED) tests = {tests, tests64a};
if (`MEM_VIRTMEM) tests = {tests, tests64mmu};
if (`D_SUPPORTED) tests = {tests64d, tests};
if (`F_SUPPORTED) tests = {tests64f, tests};
if (`D_SUPPORTED) tests = {tests64d, tests};
end
//tests = {tests64a, tests};
end else begin // RV32
@ -554,7 +554,7 @@ string tests32f[] = '{
if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic};
else tests = {tests, tests32iNOc};
if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m};
// if (`F_SUPPORTED) tests = {tests32f, tests};
if (`F_SUPPORTED) tests = {tests32f, tests};
if (`A_SUPPORTED) tests = {tests, tests32a};
if (`MEM_VIRTMEM) tests = {tests, tests32mmu};
end

File diff suppressed because it is too large Load Diff