diff --git a/.gitignore b/.gitignore index c6cac56f..82c20503 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,8 @@ testsBP/*/*/*.elf* testsBP/*/OBJ/* testsBP/*/*.a wally-pipelined/linux-testgen/linux-testvectors/* +wally-pipelined/linux-testgen/nohup* +wally-pipelined/linux-testgen/x* !wally-pipelined/linux-testgen/linux-testvectors/tvCopier.py !wally-pipelined/linux-testgen/linux-testvectors/tvLinker.sh wally-pipelined/regression/slack-notifier/slack-webhook-url.txt diff --git a/.gitmodules b/.gitmodules index 65e1e71c..e69de29b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "sky130/sky130_osu_sc_t12"] - path = sky130/sky130_osu_sc_t12 - url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/ diff --git a/riscv-coremark/coremark/core_main.c b/riscv-coremark/coremark/core_main.c index edd1ac46..a2c3ac67 100644 --- a/riscv-coremark/coremark/core_main.c +++ b/riscv-coremark/coremark/core_main.c @@ -211,26 +211,53 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) { core_init_state(results[0].size,results[i].seed1,results[i].memblock[3]); } } - + + /*int foreverLoop = 1; + secs_ret timing = 0; + int timingInt; + ee_printf("\nENTERING FOREVER WHILE LOOP\n"); + while(foreverLoop == 1) + { + start_time(); + //filler + stop_time(); + timing += time_in_secs(get_time()); + timingInt = (int)timing; + ee_printf("Timing is %d\n", timingInt); + }/* + /* automatically determine number of iterations if not set */ if (results[0].iterations==0) { secs_ret secs_passed=0; ee_u32 divisor; results[0].iterations=1; + int iterationInc = 0; + ee_printf("\n\nENTERING ITERATION WHILE LOOP\n"); while (secs_passed < (secs_ret)1) { - results[0].iterations*=10; + if(iterationInc != 0) + { + results[0].iterations++; + } + ee_printf("iterations is %d\n", results[0].iterations); start_time(); iterate(&results[0]); stop_time(); - secs_passed=time_in_secs(get_time()); + secs_passed = time_in_secs(get_time()); + int secs_passed_int = (int)secs_passed; + ee_printf("secs passed is %d\n", secs_passed_int); + iterationInc++; } + ee_printf("LEAVING ITERATION WHILE LOOP!\n\n"); /* now we know it executes for at least 1 sec, set actual run time at about 10 secs */ divisor=(ee_u32)secs_passed; + ee_printf("divisor is %lu\n", divisor); if (divisor==0) /* some machines cast float to int as 0 since this conversion is not defined by ANSI, but we know at least one second passed */ divisor=1; results[0].iterations*=1+10/divisor; + ee_printf("iterations is %d\n", results[0].iterations); } /* perform actual benchmark */ + ee_printf("Starting benchmark\n"); start_time(); #if (MULTITHREAD>1) if (default_num_contexts>MULTITHREAD) { @@ -249,7 +276,8 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) { #endif stop_time(); total_time=get_time(); - ee_printf("ending benchmark"); + ee_printf("total time is %u\n", total_time); + ee_printf("ending benchmark\n"); /* get a function of the input to report */ seedcrc=crc16(results[0].seed1,seedcrc); seedcrc=crc16(results[0].seed2,seedcrc); @@ -340,12 +368,17 @@ MAIN_RETURN_TYPE main(int argc, char *argv[]) { for (i=0 ; iqemu_in_gdb_format.txt #cat qemu_output.txt | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/" +# Uncomment this version in case you just want to have qemu_in_gdb_format.txt around +# It is often helpful for general debugging +#(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py >qemu_in_gdb_format.txt) & riscv64-unknown-elf-gdb -x gdbinit_qemulog + +# Split qemu_in_gdb_format.txt into chunks of 100,000 instructions for easier inspection +#split -d -l 5600000 qemu_in_gdb_format.txt --verbose # Uncomment this version for parse_gdb_output.py debugging # - Uses qemu_in_gdb_format.txt @@ -24,4 +36,4 @@ # =========== Just Do the Thing ========== # Uncomment this version for the whole thing # - Logs info needed by buildroot testbench -(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | pv -l | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog +(qemu-system-riscv64 -M virt -nographic -bios /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/fw_jump.elf -kernel /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/Image -append "root=/dev/vda ro" -initrd /courses/e190ax/qemu_sim/rv64_initrd/buildroot_experimental/output/images/rootfs.cpio -d nochain,cpu,in_asm -serial /dev/null -singlestep -s -S 2>&1 >/dev/null | ./parse_qemu.py | ./parse_gdb_output.py "/courses/e190ax/buildroot_boot_new/") & riscv64-unknown-elf-gdb -x gdbinit_qemulog diff --git a/wally-pipelined/linux-testgen/parse_gdb_output.py b/wally-pipelined/linux-testgen/parse_gdb_output.py index 739a97e3..7e48fa63 100755 --- a/wally-pipelined/linux-testgen/parse_gdb_output.py +++ b/wally-pipelined/linux-testgen/parse_gdb_output.py @@ -44,7 +44,7 @@ try: instrs += 1 storeAMO = '' if instrs % 10000 == 0: - print(instrs) + print(instrs,flush=True) # Instr in human assembly wPC.write('{} ***\n'.format(' '.join(l.split(':')[1].split()[0:2]))) if '\tld' in l or '\tlw' in l or '\tlh' in l or '\tlb' in l: @@ -63,6 +63,15 @@ try: storeLoc = readLoc storeReg = l.split()[-1].split(',')[1] storeAMO = l.split()[-2] + if '\tlr' in l: + currentRead = l.split()[-1].split(',')[0] + readOffset = "0" + readLoc = l.split()[-1].split('(')[1][:-1] + readType = "0" # *** I don't see that readType or lastReadType are ever used; we can probably get rid of them + if '\tsc' in l: + storeOffset = "0" + storeLoc = l.split()[-1].split('(')[1][:-1] + storeReg = l.split()[-1].split(',')[1] if '\tsd' in l or '\tsw' in l or '\tsh' in l or '\tsb' in l: s = l.split('#')[0].split()[-1] storeReg = s.split(',')[0] diff --git a/wally-pipelined/linux-testgen/parse_qemu.py b/wally-pipelined/linux-testgen/parse_qemu.py index c7f31fb2..ac5d95f0 100755 --- a/wally-pipelined/linux-testgen/parse_qemu.py +++ b/wally-pipelined/linux-testgen/parse_qemu.py @@ -40,13 +40,12 @@ def parseCSRs(l): val = int(l.split()[1],16) if inPageFault: # Not sure if these CSRs should be updated or not during page fault. - #if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"): - # # We do update some CSRs - # CSRs[csr] = val - #else: - # # Others we preserve until changed later - # pageFaultCSRs[csr] = val - pageFaultCSRs[csr] = val + if l.startswith("mstatus") or l.startswith("mepc") or l.startswith("mcause") or l.startswith("mtval") or l.startswith("sepc") or l.startswith("scause") or l.startswith("stval"): + # We do update some CSRs + CSRs[csr] = val + else: + # Others we preserve until changed later + pageFaultCSRs[csr] = val elif pageFaultCSRs and (csr in pageFaultCSRs): if (val != pageFaultCSRs[csr]): del pageFaultCSRs[csr] diff --git a/wally-pipelined/regression/regression-wally.py b/wally-pipelined/regression/regression-wally.py index eac221cd..b1ded5e7 100755 --- a/wally-pipelined/regression/regression-wally.py +++ b/wally-pipelined/regression/regression-wally.py @@ -26,12 +26,12 @@ configs = [ TestCase( name="busybear", cmd="vsim -do wally-busybear-batch.do -c > {}", - grepstr="# loaded 100000 instructions" + grepstr="loaded 100000 instructions" ), TestCase( name="buildroot", cmd="vsim -do wally-buildroot-batch.do -c > {}", - grepstr="# loaded 2000000 instructions" + grepstr="loaded 2500000 instructions" ), TestCase( name="rv32ic", diff --git a/wally-pipelined/regression/wally-busybear-batch.do b/wally-pipelined/regression/wally-busybear-batch.do index e819d780..a4a80eb7 100644 --- a/wally-pipelined/regression/wally-busybear-batch.do +++ b/wally-pipelined/regression/wally-busybear-batch.do @@ -36,5 +36,4 @@ vopt work_busybear.testbench -o workopt_busybear vsim workopt_busybear -suppress 8852,12070 run -all -exec ./slack-notifier/slack-notifier.py quit diff --git a/wally-pipelined/regression/wally-busybear.do b/wally-pipelined/regression/wally-busybear.do index 8d6af28b..11876dde 100644 --- a/wally-pipelined/regression/wally-busybear.do +++ b/wally-pipelined/regression/wally-busybear.do @@ -40,5 +40,4 @@ do ./wave-dos/linux-waves.do #-- Run the Simulation run -all -exec ./slack-notifier/slack-notifier.py ##quit diff --git a/wally-pipelined/regression/wave-dos/linux-waves.do b/wally-pipelined/regression/wave-dos/linux-waves.do index b7dfd8c5..63623891 100644 --- a/wally-pipelined/regression/wave-dos/linux-waves.do +++ b/wally-pipelined/regression/wave-dos/linux-waves.do @@ -4,6 +4,7 @@ view wave add wave -divider add wave /testbench/clk add wave /testbench/reset +add wave -dec /testbench/instrs add wave -divider Stalls_and_Flushes add wave /testbench/dut/hart/StallF @@ -19,12 +20,13 @@ add wave /testbench/dut/hart/FlushW add wave -divider F add wave -hex /testbench/dut/hart/ifu/PCF add wave -divider D -add wave -hex /testbench/pcExpected +add wave -hex /testbench/PCDexpected add wave -hex /testbench/dut/hart/ifu/PCD add wave -hex /testbench/PCtextD add wave /testbench/InstrDName add wave -hex /testbench/dut/hart/ifu/InstrD add wave -hex /testbench/dut/hart/ieu/c/InstrValidD +add wave -hex /testbench/PCDwrong add wave -divider E add wave -hex /testbench/dut/hart/ifu/PCE add wave -hex /testbench/PCtextE diff --git a/wally-pipelined/src/fpu/FMA/tbgen/tb.sv b/wally-pipelined/src/fpu/FMA/tbgen/tb.sv index 4c93cd57..5a8e7a86 100644 --- a/wally-pipelined/src/fpu/FMA/tbgen/tb.sv +++ b/wally-pipelined/src/fpu/FMA/tbgen/tb.sv @@ -45,8 +45,8 @@ assign FOpCtrlE = 3'b0; // down - 010 // up - 011 // nearest max mag - 100 -assign FrmE = 3'b010; -assign FmtE = 1'b1; +assign FrmE = 3'b011; +assign FmtE = 1'b0; assign wnan = FmtE ? &FmaResultM[62:52] && |FmaResultM[51:0] : &FmaResultM[62:55] && |FmaResultM[54:32]; @@ -110,7 +110,6 @@ always @(posedge clk) if(ans >= 64'h7FF8000000000000 && ans <= 64'h7FFfffffffffffff ) $display( "ans=qutNaN "); if(ans >= 64'hFFF8000000000000 && ans <= 64'hFFFfffffffffffff ) $display( "ans=qutNaN "); errors = errors + 1; - if (errors == 20) $stop; end if((FmtE==1'b0)&(FmaFlagsM != flags[4:0] || (!wnan && (FmaResultM != ans)) || (wnan && ansnan && ~(((xnan && (FmaResultM[62:0] == {FInput1E[62:55],1'b1,FInput1E[53:0]})) || (ynan && (FmaResultM[62:0] == {FInput2E[62:55],1'b1,FInput2E[53:0]})) || (znan && (FmaResultM[62:0] == {FInput3E[62:55],1'b1,FInput3E[53:0]})) || (FmaResultM[62:0] == ans[62:0]))) ))) begin diff --git a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh index dc9562b1..5f12e143 100755 --- a/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh +++ b/wally-pipelined/src/fpu/FMA/tbgen/test_gen.sh @@ -1,3 +1,3 @@ -testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rmin -seed 113355 -level 1 > testFloat +testfloat_gen f32_mulAdd -tininessafter -n 6133248 -rmax -seed 113355 -level 1 > testFloat tr -d ' ' < testFloat > testFloatNoSpace diff --git a/wally-pipelined/src/fpu/fctrl.sv b/wally-pipelined/src/fpu/fctrl.sv index 5749d0db..3be9b281 100755 --- a/wally-pipelined/src/fpu/fctrl.sv +++ b/wally-pipelined/src/fpu/fctrl.sv @@ -10,172 +10,124 @@ module fctrl ( output logic FDivStartD, output logic [2:0] FResultSelD, output logic [3:0] FOpCtrlD, + output logic [1:0] FResSelD, + output logic [1:0] FIntResSelD, output logic FmtD, output logic [2:0] FrmD, - output logic [1:0] FMemRWD, - output logic FOutputInput2D, - output logic FInput2UsedD, FInput3UsedD, output logic FWriteIntD); - - logic IllegalFPUInstr1D, IllegalFPUInstr2D; - // *** fix rounding for dynamic rounding + `define FCTRLW 15 + logic [`FCTRLW-1:0] ControlsD; + // FPU Instruction Decoder + always_comb + case(OpD) + // FWriteEn_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr + 7'b0000111: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b1_0_000_0000_00_00_0_0; // flw + 3'b011: ControlsD = `FCTRLW'b1_0_000_0001_00_00_0_0; // fld + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b0100111: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b0_0_000_0010_00_00_0_0; // fsw + 3'b011: ControlsD = `FCTRLW'b0_0_000_0011_00_00_0_0; // fsd + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1000011: ControlsD = `FCTRLW'b1_0_001_0000_00_00_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_001_0001_00_00_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_001_0010_00_00_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_001_0011_00_00_0_0; // fnmadd + 7'b1010011: casez(Funct7D) + 7'b00000??: ControlsD = `FCTRLW'b1_0_010_0000_00_00_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_010_0001_00_00_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_001_0100_00_00_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_011_0000_00_00_1_0; // fdiv + 7'b01011??: ControlsD = `FCTRLW'b1_0_011_0001_00_00_1_0; // fsqrt + 7'b00100??: case(Funct3D) + 3'b000: ControlsD = `FCTRLW'b1_0_100_0000_01_00_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_100_0001_01_00_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_100_0010_01_00_0_0; // fsgnjx + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b00101??: case(Funct3D) + 3'b000: ControlsD = `FCTRLW'b1_0_100_0111_10_00_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_100_0101_10_00_0_0; // fmax + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b10100??: case(Funct3D) + 3'b010: ControlsD = `FCTRLW'b0_1_100_0010_00_00_0_0; // feq + 3'b001: ControlsD = `FCTRLW'b0_1_100_0001_00_00_0_0; // flt + 3'b000: ControlsD = `FCTRLW'b0_1_100_0011_00_00_0_0; // fle + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b11100??: if (Funct3D == 3'b001) + ControlsD = `FCTRLW'b0_1_100_0000_00_10_0_0; // fclass + else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_100_0100_00_01_0_0; // fmv.x.w + else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_100_0101_00_01_0_0; // fmv.x.d + else ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + 7'b1100000: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b0_1_010_0110_00_00_0_0; // fcvt.s.w + 1'b1: ControlsD = `FCTRLW'b0_1_010_0101_00_00_0_0; // fcvt.s.wu + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1101000: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b1_1_010_0100_00_00_0_0; // fcvt.w.s + 1'b1: ControlsD = `FCTRLW'b1_1_010_0101_00_00_0_0; // fcvt.wu.s + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1111000: ControlsD = `FCTRLW'b1_0_100_0000_00_00_0_0; // fmv.w.x + 7'b0100000: ControlsD = `FCTRLW'b1_0_010_0010_00_00_0_0; // fcvt.s.d + 7'b1100001: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b0_1_010_1110_00_00_0_0; // fcvt.d.w + 1'b1: ControlsD = `FCTRLW'b0_1_010_1111_00_00_0_0; // fcvt.d.wu + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1101001: case(Rs2D[0]) + 1'b0: ControlsD = `FCTRLW'b1_0_010_1100_00_00_0_0; // fcvt.w.d + 1'b1: ControlsD = `FCTRLW'b1_0_010_1101_00_00_0_0; // fcvt.wu.d + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + 7'b1111001: ControlsD = `FCTRLW'b1_0_100_0001_00_00_0_0; // fmv.d.x + 7'b0100001: ControlsD = `FCTRLW'b1_0_010_1000_00_00_0_0; // fcvt.d.s + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + default: ControlsD = `FCTRLW'b0_0_000_0000_00_00_0_1; // non-implemented instruction + endcase + // unswizzle control bits + assign {FWriteEnD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD; + + // if dynamic rounding, choose FRM_REGW assign FrmD = &Funct3D ? FRM_REGW : Funct3D; - //all subsequent logic is based on the table present - //in Section 5 of Wally Architecture Specification - - //write is enabled for all fp instruciton op codes - //sans fp load - logic isFP, isFPLD; - always_comb begin - //case statement is easier to modify - //in case of errors - case(OpD) - //fp instructions sans load - 7'b1010011 : isFP = 1'b1; - 7'b1000011 : isFP = 1'b1; - 7'b1000111 : isFP = 1'b1; - 7'b1001011 : isFP = 1'b1; - 7'b1001111 : isFP = 1'b1; - 7'b0100111 : isFP = 1'b1; - 7'b0000111 : isFP = 1'b1;// KEP change 7'b1010011 to 7'b0000111 - default : isFP = 1'b0; - endcase - end - - - - //useful intermediary signals - // - //(mult only not supported in current datapath) - //set third FMA operand to zero in this case - //(or equivalent) - - always_comb begin - //checks all but FMA/store/load - IllegalFPUInstr2D = 0; - FDivStartD = 1'b0; - if(OpD == 7'b1010011) begin - casez(Funct7D) - //compare - 7'b10100?? : FResultSelD = 3'b001; - //div/sqrt - 7'b0?011?? : begin FResultSelD = 3'b000; FDivStartD = 1'b1; end - //add/sub - 7'b0000??? : FResultSelD = 3'b100; - //mult - 7'b00010?? : FResultSelD = 3'b010; - //convert (not precision) - 7'b110?0?? : FResultSelD = 3'b100; - //convert (precision) - 7'b010000? : FResultSelD = 3'b100; - //Min/Max - 7'b00101?? : FResultSelD = 3'b001; - //sign injection - 7'b00100?? : FResultSelD = 3'b011; - //classify //only if funct3 = 001 - 7'b11100?? : if(Funct3D == 3'b001) FResultSelD = 3'b101; - //output ReadData1 - else if (Funct7D[1] == 0) FResultSelD = 3'b111; - //output SrcW - 7'b111100? : FResultSelD = 3'b110; - default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end - endcase - end - //FMA/store/load - else begin - case(OpD) - //4 FMA instructions - 7'b1000011 : FResultSelD = 3'b010; - 7'b1000111 : FResultSelD = 3'b010; - 7'b1001011 : FResultSelD = 3'b010; - 7'b1001111 : FResultSelD = 3'b010; - //store - 7'b0100111 : FResultSelD = 3'b111; - //load - 7'b0000111 : FResultSelD = 3'b111; - default : begin FResultSelD = 3'b0; IllegalFPUInstr2D = 1'b1; end - endcase - end - end - - assign FOutputInput2D = OpD == 7'b0100111; - - assign FMemRWD[0] = FOutputInput2D; - assign FMemRWD[1] = OpD == 7'b0000111; - - - - //register is chosen based on operation performed - //---- - //write selection is chosen in the same way as - //register selection - // - - // reg/write sel logic and assignment - // - // 3'b000 = div/sqrt - // 3'b001 = cmp - // 3'b010 = fma/mult - // 3'b011 = sgn inj - // 3'b100 = add/sub/cnvt - // 3'b101 = classify - // 3'b110 = output SrcAW - // 3'b111 = output ReadData1 - // - //reg select - - //this value is used enough to be shorthand - - - //operation control for each fp operation - //has to be expanded over standard to account for - //integrated fpadd/cvt - // - //will integrate FMA opcodes into design later - // - //conversion instructions will - //also need to be added later as I find the opcode - //version I used for this repo - - //let's do separate SOP for each type of operation -// assign FOpCtrlD[3] = 1'b0; -// -// - - - - always_comb begin - IllegalFPUInstr1D = 0; - FInput3UsedD = 0; - case (FResultSelD) - // div/sqrt + // Precision + // 0-single + // 1-double + assign FmtD = FResultSelD == 3'b000 ? Funct3D[0] : Funct7D[0]; + // div/sqrt // fdiv = ???0 // fsqrt = ???1 - 3'b000 : begin FOpCtrlD = {3'b0, Funct7D[5]}; FInput2UsedD = ~Funct7D[5]; end - // cmp + + // cmp // fmin = ?111 // fmax = ?101 // feq = ?010 // flt = ?001 // fle = ?011 // {?, is min or max, is eq or le, is lt or le} - 3'b001 : begin FOpCtrlD = {1'b0, Funct7D[2], ~Funct3D[0], ~(|Funct3D[2:1])}; FInput2UsedD = 1'b1; end - //fma/mult + + //fma/mult // fmadd = ?000 // fmsub = ?001 // fnmsub = ?010 -(a*b)+c // fnmadd = ?011 -(a*b)-c // fmul = ?100 // {?, is mul, is negitive, is sub} - 3'b010 : begin FOpCtrlD = {1'b0, OpD[4:2]}; FInput2UsedD = 1'b1; FInput3UsedD = ~OpD[4]; end - // sgn inj + + // sgn inj // fsgnj = ??00 // fsgnjn = ??01 // fsgnjx = ??10 - 3'b011 : begin FOpCtrlD = {2'b0, Funct3D[1:0]}; FInput2UsedD = 1'b1; end - // add/sub/cnvt + + // add/sub/cnvt // fadd = 0000 // fsub = 0001 // fcvt.w.s = 0100 @@ -188,35 +140,18 @@ module fctrl ( // fcvt.d.w = 1110 // fcvt.d.wu = 1111 // fcvt.d.s = 1000 - // { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub - 3'b100 : begin FOpCtrlD = {Funct7D[0]&Funct7D[5], Funct7D[6], Funct7D[3] | (~Funct7D[6]&Funct7D[5]&~Funct7D[0]), (Rs2D[0]&Funct7D[5])|(Funct7D[2]&~Funct7D[5])}; FInput2UsedD = ~Funct7D[5]; end - // classify {?, ?, ?, ?} - 3'b101 : begin FOpCtrlD = 4'b0; FInput2UsedD = 1'b0; end - // output SrcAW + // { is double and not add/sub, is to/from int, is to int or float to double, is unsigned or sub} + // fmv.w.x = ???0 // fmv.w.d = ???1 - 3'b110 : begin FOpCtrlD = {3'b0, Funct7D[0]}; FInput2UsedD = 1'b0; end - // output Input1 + // flw = ?000 // fld = ?001 - // fsw = ?010 // output Input2 - // fsd = ?011 // output Input2 + // fsw = ?010 + // fsd = ?011 // fmv.x.w = ?100 // fmv.x.d = ?101 // {?, is mv, is store, is double or fmv} - 3'b111 : begin FOpCtrlD = {1'b0, OpD[6:5], Funct3D[0] | (OpD[6]&Funct7D[0])}; FInput2UsedD = OpD[5]; end - default : begin FOpCtrlD = 4'b0; IllegalFPUInstr1D = 1'b1; FInput2UsedD = 1'b0; end - endcase - end + - //precision - assign FmtD = (~&FResultSelD & Funct7D[0]) | (&FResultSelD & FOpCtrlD[0]); - - assign IllegalFPUInstrD = IllegalFPUInstr1D | IllegalFPUInstr2D; - //write to integer source if conv to int occurs - //AND of Funct7 for int results - // is add/cvt and is to int or is classify or is cmp and not max/min or is output ReadData1 and is mv - assign FWriteIntD = ((FResultSelD == 3'b100)&Funct7D[3]) | (FResultSelD == 3'b101) | ((FResultSelD == 3'b001)&~Funct7D[2]) | ((FResultSelD == 3'b111)&OpD[6]); - // if not writting to int reg and not a store function and not move - assign FWriteEnD = ~FWriteIntD & ~OpD[5] & ~((FResultSelD == 3'b111)&OpD[6]) & isFP; endmodule diff --git a/wally-pipelined/src/fpu/fma1.sv b/wally-pipelined/src/fpu/fma1.sv index ab9d2bb1..76f7316b 100644 --- a/wally-pipelined/src/fpu/fma1.sv +++ b/wally-pipelined/src/fpu/fma1.sv @@ -1,111 +1,111 @@ module fma1( - input logic [63:0] X, // X - input logic [63:0] Y, // Y - input logic [63:0] Z, // Z - input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - input logic FmtE, // precision 1 = double 0 = single - output logic [105:0] ProdManE, // 1.X frac * 1.Y frac - output logic [161:0] AlignedAddendE, // Z aligned for addition - output logic [12:0] ProdExpE, // X exponent + Y exponent - bias - output logic AddendStickyE, // sticky bit that is calculated during alignment - output logic KillProdE, // set the product to zero before addition if the product is too small to matter - output logic XZeroE, YZeroE, ZZeroE, // inputs are zero - output logic XInfE, YInfE, ZInfE, // inputs are infinity - output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN + input logic [63:0] X, // X + input logic [63:0] Y, // Y + input logic [63:0] Z, // Z + input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic FmtE, // precision 1 = double 0 = single + output logic [105:0] ProdManE, // 1.X frac * 1.Y frac + output logic [161:0] AlignedAddendE, // Z aligned for addition + output logic [12:0] ProdExpE, // X exponent + Y exponent - bias + output logic AddendStickyE, // sticky bit that is calculated during alignment + output logic KillProdE, // set the product to zero before addition if the product is too small to matter + output logic XZeroE, YZeroE, ZZeroE, // inputs are zero + output logic XInfE, YInfE, ZInfE, // inputs are infinity + output logic XNaNE, YNaNE, ZNaNE); // inputs are NaN - logic [51:0] XFrac,YFrac,ZFrac; // input fraction - logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one) - logic [12:0] XExp,YExp,ZExp; // input exponents - logic XSgn,YSgn,ZSgn; // input signs - logic [12:0] AlignCnt; // how far to shift the addend to align with the product - logic [211:0] ZManShifted; // output of the alignment shifter including sticky bit - logic [211:0] ZManPreShifted; // input to the alignment shifter - logic XDenorm, YDenorm, ZDenorm; // inputs are denormal - logic [63:0] Addend; // value to add (Z or zero) - logic [12:0] Bias; // 1023 for double, 127 for single - logic XExpZero, YExpZero, ZExpZero; // input exponent zero - logic XFracZero, YFracZero, ZFracZero; // input fraction zero - logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s + logic [51:0] XFrac,YFrac,ZFrac; // input fraction + logic [52:0] XMan,YMan,ZMan; // input mantissa (with leading one) + logic [12:0] XExp,YExp,ZExp; // input exponents + logic XSgn,YSgn,ZSgn; // input signs + logic [12:0] AlignCnt; // how far to shift the addend to align with the product + logic [213:0] ZManShifted; // output of the alignment shifter including sticky bit + logic [213:0] ZManPreShifted; // input to the alignment shifter + logic XDenorm, YDenorm, ZDenorm; // inputs are denormal + logic [63:0] Addend; // value to add (Z or zero) + logic [12:0] Bias; // 1023 for double, 127 for single + logic XExpZero, YExpZero, ZExpZero; // input exponent zero + logic XFracZero, YFracZero, ZFracZero; // input fraction zero + logic XExpMax, YExpMax, ZExpMax; // input exponent all 1s - /////////////////////////////////////////////////////////////////////////////// - // split inputs into the sign bit, fraction, and exponent to handle single or double precision - // - single precision is in the top half of the inputs - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // split inputs into the sign bit, fraction, and exponent to handle single or double precision + // - single precision is in the top half of the inputs + /////////////////////////////////////////////////////////////////////////////// - // Set addend to zero if FMUL instruction - assign Addend = FOpCtrlE[2] ? 64'b0 : Z; + // Set addend to zero if FMUL instruction + assign Addend = FOpCtrlE[2] ? 64'b0 : Z; - assign XSgn = X[63]; - assign YSgn = Y[63]; - assign ZSgn = Addend[63]; + assign XSgn = X[63]; + assign YSgn = Y[63]; + assign ZSgn = Addend[63]; - assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]}; - assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]}; - assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]}; + assign XExp = FmtE ? {2'b0, X[62:52]} : {5'b0, X[62:55]}; + assign YExp = FmtE ? {2'b0, Y[62:52]} : {5'b0, Y[62:55]}; + assign ZExp = FmtE ? {2'b0, Addend[62:52]} : {5'b0, Addend[62:55]}; - assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; - assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0}; - assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0}; - - assign XMan = {~XExpZero, XFrac}; - assign YMan = {~YExpZero, YFrac}; - assign ZMan = {~ZExpZero, ZFrac}; + assign XFrac = FmtE ? X[51:0] : {X[54:32], 29'b0}; + assign YFrac = FmtE ? Y[51:0] : {Y[54:32], 29'b0}; + assign ZFrac = FmtE ? Addend[51:0] : {Addend[54:32], 29'b0}; + + assign XMan = {~XExpZero, XFrac}; + assign YMan = {~YExpZero, YFrac}; + assign ZMan = {~ZExpZero, ZFrac}; - assign Bias = FmtE ? 13'h3ff : 13'h7f; + assign Bias = FmtE ? 13'h3ff : 13'h7f; - /////////////////////////////////////////////////////////////////////////////// - // determine if an input is a special value - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // determine if an input is a special value + /////////////////////////////////////////////////////////////////////////////// - assign XExpZero = ~|XExp; - assign YExpZero = ~|YExp; - assign ZExpZero = ~|ZExp; - - assign XFracZero = ~|XFrac; - assign YFracZero = ~|YFrac; - assign ZFracZero = ~|ZFrac; + assign XExpZero = ~|XExp; + assign YExpZero = ~|YExp; + assign ZExpZero = ~|ZExp; + + assign XFracZero = ~|XFrac; + assign YFracZero = ~|YFrac; + assign ZFracZero = ~|ZFrac; - assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; - assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0]; - assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0]; - - assign XNaNE = XExpMax & ~XFracZero; - assign YNaNE = YExpMax & ~YFracZero; - assign ZNaNE = ZExpMax & ~ZFracZero; + assign XExpMax = FmtE ? &XExp[10:0] : &XExp[7:0]; + assign YExpMax = FmtE ? &YExp[10:0] : &YExp[7:0]; + assign ZExpMax = FmtE ? &ZExp[10:0] : &ZExp[7:0]; + + assign XNaNE = XExpMax & ~XFracZero; + assign YNaNE = YExpMax & ~YFracZero; + assign ZNaNE = ZExpMax & ~ZFracZero; - assign XDenorm = XExpZero & ~XFracZero; - assign YDenorm = YExpZero & ~YFracZero; - assign ZDenorm = ZExpZero & ~ZFracZero; + assign XDenorm = XExpZero & ~XFracZero; + assign YDenorm = YExpZero & ~YFracZero; + assign ZDenorm = ZExpZero & ~ZFracZero; - assign XInfE = XExpMax & XFracZero; - assign YInfE = YExpMax & YFracZero; - assign ZInfE = ZExpMax & ZFracZero; + assign XInfE = XExpMax & XFracZero; + assign YInfE = YExpMax & YFracZero; + assign ZInfE = ZExpMax & ZFracZero; - assign XZeroE = XExpZero & XFracZero; - assign YZeroE = YExpZero & YFracZero; - assign ZZeroE = ZExpZero & ZFracZero; + assign XZeroE = XExpZero & XFracZero; + assign YZeroE = YExpZero & YFracZero; + assign ZZeroE = ZExpZero & ZFracZero; - /////////////////////////////////////////////////////////////////////////////// - // Calculate the product - // - When multipliying two fp numbers, add the exponents - // - Subtract the bias (XExp + YExp has two biases, one from each exponent) - // - Denormal numbers have an an exponent value of 1, however they are - // represented with an exponent of 0. add one if there is a denormal number - /////////////////////////////////////////////////////////////////////////////// - - // verilator lint_off WIDTH - assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : - XExp + YExp - Bias + XDenorm + YDenorm; + /////////////////////////////////////////////////////////////////////////////// + // Calculate the product + // - When multipliying two fp numbers, add the exponents + // - Subtract the bias (XExp + YExp has two biases, one from each exponent) + // - Denormal numbers have an an exponent value of 1, however they are + // represented with an exponent of 0. add one if there is a denormal number + /////////////////////////////////////////////////////////////////////////////// + + // verilator lint_off WIDTH + assign ProdExpE = (XZeroE|YZeroE) ? 13'b0 : + XExp + YExp - Bias + XDenorm + YDenorm; - // Calculate the product's mantissa - // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. - assign ProdManE = XMan * YMan; + // Calculate the product's mantissa + // - Add the assumed one. If the number is denormalized or zero, it does not have an assumed one. + assign ProdManE = XMan * YMan; @@ -114,72 +114,71 @@ module fma1( - - /////////////////////////////////////////////////////////////////////////////// - // Alignment shifter - /////////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////////// + // Alignment shifter + /////////////////////////////////////////////////////////////////////////////// - // determine the shift count for alignment - // - negitive means Z is larger, so shift Z left - // - positive means the product is larger, so shift Z right - // - Denormal numbers have an an exponent value of 1, however they are - // represented with an exponent of 0. add one to the exponent if it is a denormal number - assign AlignCnt = ProdExpE - ZExp - ZDenorm; - // verilator lint_on WIDTH + // determine the shift count for alignment + // - negitive means Z is larger, so shift Z left + // - positive means the product is larger, so shift Z right + // - Denormal numbers have an an exponent value of 1, however they are + // represented with an exponent of 0. add one to the exponent if it is a denormal number + assign AlignCnt = ProdExpE - ZExp - ZDenorm; + // verilator lint_on WIDTH - // Defualt Addition without shifting - // | 55'b0 | 106'b(product) | 2'b0 | - // |1'b0| addnend | + // Defualt Addition without shifting + // | 55'b0 | 106'b(product) | 2'b0 | + // |1'b0| addnend | - // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) - assign ZManPreShifted = {55'b0, ZMan, 104'b0}; - always_comb - begin - - // If the product is too small to effect the sum, kill the product + // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) + assign ZManPreShifted = {55'b0, ZMan, 106'b0}; + always_comb + begin + + // If the product is too small to effect the sum, kill the product - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - if ($signed(AlignCnt) <= $signed(-13'd56)) begin - KillProdE = 1; - ZManShifted = {107'b0, ZMan, 52'b0}; - AddendStickyE = ~(XZeroE|YZeroE); + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + if ($signed(AlignCnt) <= $signed(-13'd56)) begin + KillProdE = 1; + ZManShifted = ZManPreShifted;//{107'b0, ZMan, 54'b0}; + AddendStickyE = ~(XZeroE|YZeroE); - // If the Addend is shifted left (negitive AlignCnt) + // If the Addend is shifted left (negitive AlignCnt) - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else if($signed(AlignCnt) <= $signed(13'd0)) begin - KillProdE = 0; - ZManShifted = ZManPreShifted << -AlignCnt; - AddendStickyE = |(ZManShifted[49:0]); + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else if($signed(AlignCnt) <= $signed(13'd0)) begin + KillProdE = 0; + ZManShifted = ZManPreShifted << -AlignCnt; + AddendStickyE = |(ZManShifted[51:0]); - // If the Addend is shifted right (positive AlignCnt) + // If the Addend is shifted right (positive AlignCnt) - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else if ($signed(AlignCnt)<=$signed(13'd104)) begin - KillProdE = 0; - ZManShifted = ZManPreShifted >> AlignCnt; - AddendStickyE = |(ZManShifted[49:0]); + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else if ($signed(AlignCnt)<=$signed(13'd106)) begin + KillProdE = 0; + ZManShifted = ZManPreShifted >> AlignCnt; + AddendStickyE = |(ZManShifted[51:0]); - // If the addend is too small to effect the addition - // - The addend has to shift two past the end of the addend to be considered too small - // - The 2 extra bits are needed for rounding + // If the addend is too small to effect the addition + // - The addend has to shift two past the end of the addend to be considered too small + // - The 2 extra bits are needed for rounding - // | 55'b0 | 106'b(product) | 2'b0 | - // | addnend | - end else begin - KillProdE = 0; - ZManShifted = 0; - AddendStickyE = ~ZZeroE; + // | 54'b0 | 106'b(product) | 2'b0 | + // | addnend | + end else begin + KillProdE = 0; + ZManShifted = 0; + AddendStickyE = ~ZZeroE; - end - end + end + end - - assign AlignedAddendE = ZManShifted[211:50]; - -endmodule + + assign AlignedAddendE = ZManShifted[213:52]; +endmodule \ No newline at end of file diff --git a/wally-pipelined/src/fpu/fma2.sv b/wally-pipelined/src/fpu/fma2.sv index f9efe93e..131f9839 100644 --- a/wally-pipelined/src/fpu/fma2.sv +++ b/wally-pipelined/src/fpu/fma2.sv @@ -1,127 +1,131 @@ + + module fma2( - input logic [63:0] X, // X - input logic [63:0] Y, // Y - input logic [63:0] Z, // Z - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) - input logic FmtM, // precision 1 = double 0 = single - input logic [105:0] ProdManM, // 1.X frac * 1.Y frac - input logic [161:0] AlignedAddendM, // Z aligned for addition - input logic [12:0] ProdExpM, // X exponent + Y exponent - bias - input logic AddendStickyM, // sticky bit that is calculated during alignment - input logic KillProdM, // set the product to zero before addition if the product is too small to matter - input logic XZeroM, YZeroM, ZZeroM, // inputs are zero - input logic XInfM, YInfM, ZInfM, // inputs are infinity - input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - output logic [63:0] FmaResultM, // FMA final result - output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} - + input logic [63:0] X, // X + input logic [63:0] Y, // Y + input logic [63:0] Z, // Z + input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [2:0] FOpCtrlM, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic FmtM, // precision 1 = double 0 = single + input logic [105:0] ProdManM, // 1.X frac * 1.Y frac + input logic [161:0] AlignedAddendM, // Z aligned for addition + input logic [12:0] ProdExpM, // X exponent + Y exponent - bias + input logic AddendStickyM, // sticky bit that is calculated during alignment + input logic KillProdM, // set the product to zero before addition if the product is too small to matter + input logic XZeroM, YZeroM, ZZeroM, // inputs are zero + input logic XInfM, YInfM, ZInfM, // inputs are infinity + input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN + output logic [63:0] FmaResultM, // FMA final result + output logic [4:0] FmaFlagsM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact} + - logic [51:0] ResultFrac; // Result fraction - logic [10:0] ResultExp; // Result exponent - logic ResultSgn; // Result sign - logic [10:0] ZExp; // input exponent - logic XSgn, YSgn, ZSgn; // input sign - logic PSgn; // product sign - logic [105:0] ProdMan2; // product being added - logic [162:0] AlignedAddend2; // possibly inverted aligned Z - logic [161:0] Sum; // positive sum - logic [162:0] PreSum; // possibly negitive sum - logic [12:0] SumExp; // exponent of the normalized sum - logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results - logic [12:0] SumExpTmpMinus1; // SumExpTmp-1 - logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow - logic [53:0] NormSum; // normalized sum - logic [161:0] SumShifted; // sum shifted for normalization - logic [8:0] NormCnt; // output of the leading zero detector - logic NormSumSticky; // sticky bit calulated from the normalized sum - logic SumZero; // is the sum zero - logic NegSum; // is the sum negitive - logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z) - logic ResultDenorm; // is the result denormalized - logic Sticky; // Sticky bit - logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding - logic Invalid,Underflow,Overflow,Inexact; // flags - logic [8:0] DenormShift; // right shift if the result is denormalized - logic SubBySmallNum; // was there supposed to be a subtraction by a small number - logic [63:0] Addend; // value to add (Z or zero) - logic ZeroSgn; // the result's sign if the sum is zero - logic ResultSgnTmp; // the result's sign assuming the result is not zero - logic Guard, Round, LSBNormSum; // bits needed to determine rounding - logic [12:0] MaxExp; // maximum value of the exponent - logic [12:0] FracLen; // length of the fraction - logic SigNaN; // is an input a signaling NaN - logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) - logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results + logic [51:0] ResultFrac; // Result fraction + logic [10:0] ResultExp; // Result exponent + logic ResultSgn; // Result sign + logic [10:0] ZExp; // input exponent + logic XSgn, YSgn, ZSgn; // input sign + logic PSgn; // product sign + logic [105:0] ProdMan2; // product being added + logic [162:0] AlignedAddend2; // possibly inverted aligned Z + logic [161:0] Sum; // positive sum + logic [162:0] PreSum; // possibly negitive sum + logic [12:0] SumExp; // exponent of the normalized sum + logic [12:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results + logic [12:0] SumExpTmpMinus1; // SumExpTmp-1 + logic [12:0] FullResultExp; // ResultExp with bits to determine sign and overflow + logic [54:0] NormSum; // normalized sum + logic [161:0] SumShifted; // sum shifted for normalization + logic [8:0] NormCnt; // output of the leading zero detector + logic NormSumSticky; // sticky bit calulated from the normalized sum + logic SumZero; // is the sum zero + logic NegSum; // is the sum negitive + logic InvZ; // invert Z if there is a subtraction (-product + Z or product - Z) + logic ResultDenorm; // is the result denormalized + logic Sticky; // Sticky bit + logic Plus1, Minus1, CalcPlus1, CalcMinus1; // do you add or subtract one for rounding + logic UfPlus1, UfCalcPlus1; // do you add one (for determining underflow flag) + logic Invalid,Underflow,Overflow,Inexact; // flags + logic [8:0] DenormShift; // right shift if the result is denormalized + logic SubBySmallNum; // was there supposed to be a subtraction by a small number + logic [63:0] Addend; // value to add (Z or zero) + logic ZeroSgn; // the result's sign if the sum is zero + logic ResultSgnTmp; // the result's sign assuming the result is not zero + logic Guard, Round, LSBNormSum; // bits needed to determine rounding + logic UfGuard, UfRound, UfLSBNormSum; // bits needed to determine rounding for underflow flag + logic [12:0] MaxExp; // maximum value of the exponent + logic [12:0] FracLen; // length of the fraction + logic SigNaN; // is an input a signaling NaN + logic UnderflowFlag; // Underflow singal used in FmaFlagsM (used to avoid a circular depencency) + logic [63:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results - - /////////////////////////////////////////////////////////////////////////////// - // Select input fields - // The following logic duplicates fma1 because it's cheaper to recompute than provide registers - /////////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////////// + // Select input fields + // The following logic duplicates fma1 because it's cheaper to recompute than provide registers + /////////////////////////////////////////////////////////////////////////////// - // Set addend to zero if FMUL instruction - assign Addend = FOpCtrlM[2] ? 64'b0 : Z; + // Set addend to zero if FMUL instruction + assign Addend = FOpCtrlM[2] ? 64'b0 : Z; - // split inputs into the sign bit, and exponent to handle single or double precision - // - single precision is in the top half of the inputs - assign XSgn = X[63]; - assign YSgn = Y[63]; - assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction + // split inputs into the sign bit, and exponent to handle single or double precision + // - single precision is in the top half of the inputs + assign XSgn = X[63]; + assign YSgn = Y[63]; + assign ZSgn = Addend[63]^FOpCtrlM[0]; //Negate Z if subtraction - assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]}; + assign ZExp = FmtM ? Addend[62:52] : {3'b0, Addend[62:55]}; - // Calculate the product's sign - // Negate product's sign if FNMADD or FNMSUB - assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1]; + // Calculate the product's sign + // Negate product's sign if FNMADD or FNMSUB + assign PSgn = XSgn ^ YSgn ^ FOpCtrlM[1]; - /////////////////////////////////////////////////////////////////////////////// - // Addition - /////////////////////////////////////////////////////////////////////////////// - - // Negate Z when doing one of the following opperations: - // -prod + Z - // prod - Z - assign InvZ = ZSgn ^ PSgn; + /////////////////////////////////////////////////////////////////////////////// + // Addition + /////////////////////////////////////////////////////////////////////////////// + + // Negate Z when doing one of the following opperations: + // -prod + Z + // prod - Z + assign InvZ = ZSgn ^ PSgn; - // Choose an inverted or non-inverted addend - the one is added later - assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; - // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign ProdMan2 = KillProdM ? 106'b0 : ProdManM; + // Choose an inverted or non-inverted addend - the one is added later + assign AlignedAddend2 = InvZ ? ~{1'b0, AlignedAddendM} : {1'b0, AlignedAddendM}; + // Kill the product if the product is too small to effect the addition (determined in fma1.sv) + assign ProdMan2 = KillProdM ? 106'b0 : ProdManM; - // Do the addition - // - add one to negate if the added was inverted - // - the 2 extra bits at the begining and end are needed for rounding - assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ}; - - // Is the sum negitive - assign NegSum = PreSum[162]; - // If the sum is negitive, negate the sum. - assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0]; + // Do the addition + // - add one to negate if the added was inverted + // - the 2 extra bits at the begining and end are needed for rounding + assign PreSum = AlignedAddend2 + {55'b0, ProdMan2, 2'b0} + {162'b0, InvZ}; + + // Is the sum negitive + assign NegSum = PreSum[162]; + // If the sum is negitive, negate the sum. + assign Sum = NegSum ? -PreSum[161:0] : PreSum[161:0]; - /////////////////////////////////////////////////////////////////////////////// - // Leading one detector - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Leading one detector + /////////////////////////////////////////////////////////////////////////////// - //*** replace with non-behavoral code - logic [8:0] i; - always_comb begin - i = 0; - while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one - NormCnt = i+1; // compute shift count - end + //*** replace with non-behavoral code + logic [8:0] i; + always_comb begin + i = 0; + while (~Sum[161-i] && $unsigned(i) <= $unsigned(9'd161)) i = i+1; // search for leading one + NormCnt = i+1; // compute shift count + end @@ -133,112 +137,127 @@ module fma2( - /////////////////////////////////////////////////////////////////////////////// - // Normalization - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Normalization + /////////////////////////////////////////////////////////////////////////////// - // Determine if the sum is zero - assign SumZero = ~(|Sum); + // Determine if the sum is zero + assign SumZero = ~(|Sum); - // determine the length of the fraction based on precision - assign FracLen = FmtM ? 13'd52 : 13'd23; + // determine the length of the fraction based on precision + assign FracLen = FmtM ? 13'd52 : 13'd23; - // Determine if the result is denormal - assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56); - assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; + // Determine if the result is denormal + assign SumExpTmp = KillProdM ? {2'b0, ZExp} : ProdExpM + -({4'b0, NormCnt} - 13'd56); + assign ResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; - // Determine the shift needed for denormal results - assign SumExpTmpMinus1 = SumExpTmp-1; - assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0; + // Determine the shift needed for denormal results + assign SumExpTmpMinus1 = SumExpTmp-1; + assign DenormShift = ResultDenorm ? SumExpTmpMinus1[8:0] : 9'b0; - // Normalize the sum - assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift; - assign NormSum = SumShifted[161:108]; - // Calculate the sticky bit - assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]); - assign Sticky = AddendStickyM | NormSumSticky; + // Normalize the sum + assign SumShifted = SumZero ? 162'b0 : Sum << NormCnt+DenormShift; + assign NormSum = SumShifted[161:107]; + // Calculate the sticky bit + assign NormSumSticky = FmtM ? (|SumShifted[107:0]) : (|SumShifted[136:0]); + assign Sticky = AddendStickyM | NormSumSticky; - // Determine sum's exponent - assign SumExp = SumZero ? 13'b0 : - ResultDenorm ? 13'b0 : - SumExpTmp; + // Determine sum's exponent + assign SumExp = SumZero ? 13'b0 : + ResultDenorm ? 13'b0 : + SumExpTmp; - /////////////////////////////////////////////////////////////////////////////// - // Rounding - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// - // round to nearest even - // {Guard, Round, Sticky} - // 0xx - do nothing - // 100 - tie - Plus1 if result is odd (LSBNormSum = 1) - // - don't add 1 if a small number was supposed to be subtracted - // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) - // 110/111 - Plus1 + // round to nearest even + // {Guard, Round, Sticky} + // 0xx - do nothing + // 100 - tie - Plus1 if result is odd (LSBNormSum = 1) + // - don't add 1 if a small number was supposed to be subtracted + // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // 110/111 - Plus1 - // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 - // round to -infinity - // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 - // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + // round to -infinity + // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 - // round to infinity - // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 - // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 + // round to infinity + // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 - // round to nearest max magnitude - // {Guard, Round, Sticky} - // 0xx - do nothing - // 100 - tie - Plus1 - // - don't add 1 if a small number was supposed to be subtracted - // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) - // 110/111 - Plus1 + // round to nearest max magnitude + // {Guard, Round, Sticky} + // 0xx - do nothing + // 100 - tie - Plus1 + // - don't add 1 if a small number was supposed to be subtracted + // 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // 110/111 - Plus1 - // determine guard, round, and least significant bit of the result - assign Guard = FmtM ? NormSum[1] : NormSum[30]; - assign Round = FmtM ? NormSum[0] : NormSum[29]; - assign LSBNormSum = FmtM ? NormSum[2] : NormSum[31]; + // determine guard, round, and least significant bit of the result + assign Guard = FmtM ? NormSum[2] : NormSum[31]; + assign Round = FmtM ? NormSum[1] : NormSum[30]; + assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32]; - // Deterimine if a small number was supposed to be subtrated - assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM; + // used to determine underflow flag + assign UfGuard = FmtM ? NormSum[1] : NormSum[30]; + assign UfRound = FmtM ? NormSum[0] : NormSum[29]; + assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31]; - always_comb begin - // Determine if you add 1 - case (FrmM) - 3'b000: CalcPlus1 = Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&LSBNormSum&~SubBySmallNum));//round to nearest even - 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down - 3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up - 3'b100: CalcPlus1 = (Guard & (Round | (Sticky&~(~Round&SubBySmallNum)) | (~Round&~Sticky&~SubBySmallNum)));//round to nearest max magnitude - default: CalcPlus1 = 1'bx; - endcase - // Determine if you subtract 1 - case (FrmM) - 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero - 3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down - 3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up - 3'b100: CalcMinus1 = 0;//round to nearest max magnitude - default: CalcMinus1 = 1'bx; - endcase - - end + // Deterimine if a small number was supposed to be subtrated + assign SubBySmallNum = AddendStickyM&InvZ&~(NormSumSticky)&~ZZeroM; - // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (Sticky | Guard | Round); - assign Minus1 = CalcMinus1 & (Sticky | Guard | Round); + always_comb begin + // Determine if you add 1 + case (FrmM) + 3'b000: CalcPlus1 = Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&LSBNormSum&~SubBySmallNum));//round to nearest even + 3'b001: CalcPlus1 = 0;//round to zero + 3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down + 3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up + 3'b100: CalcPlus1 = (Guard & (Round | ((Sticky|UfGuard)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky|UfGuard)&~SubBySmallNum)));//round to nearest max magnitude + default: CalcPlus1 = 1'bx; + endcase + // Determine if you add 1 (for underflow flag) + case (FrmM) + 3'b000: UfCalcPlus1 = UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&UfLSBNormSum&~SubBySmallNum));//round to nearest even + 3'b001: UfCalcPlus1 = 0;//round to zero + 3'b010: UfCalcPlus1 = ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round down + 3'b011: UfCalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~UfGuard & ~UfRound);//round up + 3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (Sticky&~(~UfRound&SubBySmallNum)) | (~UfRound&~Sticky&~SubBySmallNum)));//round to nearest max magnitude + default: UfCalcPlus1 = 1'bx; + endcase + // Determine if you subtract 1 + case (FrmM) + 3'b000: CalcMinus1 = 0;//round to nearest even + 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero + 3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down + 3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up + 3'b100: CalcMinus1 = 0;//round to nearest max magnitude + default: CalcMinus1 = 1'bx; + endcase + + end - // Compute rounded result - logic [64:0] RoundAdd; - logic [51:0] NormSumTruncated; - assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} : - Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0}; - assign NormSumTruncated = FmtM ? NormSum[53:2] : {NormSum[53:31], 29'b0}; + // If an answer is exact don't round + assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round); + assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard | UfRound); + assign Minus1 = CalcMinus1 & (Sticky | UfGuard | Guard | Round); - assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; + // Compute rounded result + logic [64:0] RoundAdd; + logic [51:0] NormSumTruncated; + assign RoundAdd = FmtM ? Minus1 ? {65{1'b1}} : {64'b0, Plus1} : + Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0}; + assign NormSumTruncated = FmtM ? NormSum[54:3] : {NormSum[54:32], 29'b0}; + + assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; assign ResultExp = FullResultExp[10:0]; @@ -247,58 +266,57 @@ module fma2( - /////////////////////////////////////////////////////////////////////////////// - // Sign calculation - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Sign calculation + /////////////////////////////////////////////////////////////////////////////// - // Determine the sign if the sum is zero - // if cancelation then 0 unless round to -infinity - // otherwise psign - assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn; + // Determine the sign if the sum is zero + // if cancelation then 0 unless round to -infinity + // otherwise psign + assign ZeroSgn = (PSgn^ZSgn)&~Underflow ? FrmM == 3'b010 : PSgn; - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn); - assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign ResultSgnTmp = InvZ&(ZSgn)&NegSum | InvZ&PSgn&~NegSum | ((ZSgn)&PSgn); + assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp; - /////////////////////////////////////////////////////////////////////////////// - // Flags - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Flags + /////////////////////////////////////////////////////////////////////////////// - // Set Invalid flag for following cases: - // 1) Inf - Inf (unless x or y is NaN) - // 2) 0 * Inf - // 3) any input is a signaling NaN - assign MaxExp = FmtM ? 13'd2047 : 13'd255; - assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) : - (XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]); - assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); - - // Set Overflow flag if the number is too big to be represented - // - Don't set the overflow flag if an overflowed result isn't outputed - assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + // Set Invalid flag for following cases: + // 1) any input is a signaling NaN + // 2) Inf - Inf (unless x or y is NaN) + // 3) 0 * Inf + assign MaxExp = FmtM ? 13'd2047 : 13'd255; + assign SigNaN = FmtM ? (XNaNM&~X[51]) | (YNaNM&~Y[51]) | (ZNaNM&~Addend[51]) : + (XNaNM&~X[54]) | (YNaNM&~Y[54]) | (ZNaNM&~Addend[54]); + assign Invalid = SigNaN | ((XInfM || YInfM) & ZInfM & (PSgn ^ ZSgn) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM); + + // Set Overflow flag if the number is too big to be represented + // - Don't set the overflow flag if an overflowed result isn't outputed + assign Overflow = FullResultExp >= MaxExp & ~FullResultExp[12]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - // Set Underflow flag if the number is too small to be represented in normal numbers - // - Don't set the underflow flag if the result is exact - assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - //assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1); - assign UnderflowFlag = (Underflow | (FullResultExp == 0)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM)&(Round|Guard|Sticky)) & ~(FullResultExp == 1); - // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision - // - Don't set the underflow flag if an underflowed result isn't outputed - assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + // Set Underflow flag if the number is too small to be represented in normal numbers + // - Don't set the underflow flag if the result is exact + assign Underflow = (SumExp[12] | ((SumExp == 0) & (Round|Guard|Sticky|UfGuard)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign UnderflowFlag = (FullResultExp[12] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision + // - Don't set the underflow flag if an underflowed result isn't outputed + assign Inexact = (Sticky|UfGuard|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); - // Combine flags - // - FMA can't set the Divide by zero flag - // - Don't set the underflow flag if the result was rounded up to a normal number - assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; + // Combine flags + // - FMA can't set the Divide by zero flag + // - Don't set the underflow flag if the result was rounded up to a normal number + assign FmaFlagsM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; @@ -306,31 +324,31 @@ module fma2( - /////////////////////////////////////////////////////////////////////////////// - // Select the result - /////////////////////////////////////////////////////////////////////////////// - assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]}; - assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]}; - assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]}; - assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : - {ResultSgn, 11'h7ff, 52'b0} : - ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} : - {ResultSgn, 8'hff, 55'b0}; - assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; - assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; - assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; - assign FmaResultM = XNaNM ? XNaNResult : - YNaNM ? YNaNResult : - ZNaNM ? ZNaNResult : - Invalid ? InvalidResult : // has to be before inf - XInfM ? {PSgn, X[62:0]} : - YInfM ? {PSgn, Y[62:0]} : - ZInfM ? {ZSgn, Addend[62:0]} : - Overflow ? OverflowResult : - KillProdM ? KillProdResult : // has to be after Underflow - Underflow & ~ResultDenorm ? UnderflowResult : - FmtM ? {ResultSgn, ResultExp, ResultFrac} : - {ResultSgn, ResultExp[7:0], ResultFrac, 3'b0}; + /////////////////////////////////////////////////////////////////////////////// + // Select the result + /////////////////////////////////////////////////////////////////////////////// + assign XNaNResult = FmtM ? {XSgn, X[62:52], 1'b1,X[50:0]} : {XSgn, X[62:55], 1'b1,X[53:0]}; + assign YNaNResult = FmtM ? {YSgn, Y[62:52], 1'b1,Y[50:0]} : {YSgn, Y[62:55], 1'b1,Y[53:0]}; + assign ZNaNResult = FmtM ? {ZSgn, Addend[62:52], 1'b1,Addend[50:0]} : {ZSgn, Addend[62:55], 1'b1,Addend[53:0]}; + assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 11'h7fe, {52{1'b1}}} : + {ResultSgn, 11'h7ff, 52'b0} : + ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, 8'hfe, {23{1'b1}}, 32'b0} : + {ResultSgn, 8'hff, 55'b0}; + assign InvalidResult = FmtM ? {ResultSgn, 11'h7ff, 1'b1, 51'b0} : {ResultSgn, 8'hff, 1'b1, 54'b0}; + assign KillProdResult = FmtM ?{ResultSgn, Addend[62:0] - {62'b0, (Minus1&AddendStickyM)}} + {62'b0, (Plus1&AddendStickyM)} : {ResultSgn, Addend[62:32] - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}, 32'b0}; + assign UnderflowResult = FmtM ? {ResultSgn, 63'b0} + {63'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))} : {{ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}, 32'b0}; + assign FmaResultM = XNaNM ? XNaNResult : + YNaNM ? YNaNResult : + ZNaNM ? ZNaNResult : + Invalid ? InvalidResult : // has to be before inf + XInfM ? {PSgn, X[62:0]} : + YInfM ? {PSgn, Y[62:0]} : + ZInfM ? {ZSgn, Addend[62:0]} : + Overflow ? OverflowResult : + KillProdM ? KillProdResult : // has to be after Underflow + Underflow & ~ResultDenorm ? UnderflowResult : + FmtM ? {ResultSgn, ResultExp, ResultFrac} : + {ResultSgn, ResultExp[7:0], ResultFrac, 3'b0}; diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index fc38b2f6..5c15268e 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -1,7 +1,7 @@ /////////////////////////////////////////// // -// Written: -// Modified: +// Written: Katherine Parry, Bret Mathis +// Modified: 6/23/2021 // // Purpose: FPU // @@ -25,23 +25,22 @@ `include "wally-config.vh" module fpu ( - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic reset, input logic clk, + input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode from CSR input logic [31:0] InstrD, + input logic [`XLEN-1:0] ReadDataW, // Read data from memory input logic [`XLEN-1:0] SrcAE, // Integer input being processed input logic [`XLEN-1:0] SrcAM, // Integer input being written into fpreg input logic StallE, StallM, StallW, input logic FlushE, FlushM, FlushW, - input logic [`XLEN-1:0] ReadDataW, // Read data from memory - input logic RegWriteD, // register write enable from ieu - output logic [4:0] SetFflagsM, // FPU flags - output logic [1:0] FMemRWM, // Read/write enable for memory {read, write} output logic FStallD, // Stall the decode stage if Div/Sqrt instruction output logic FWriteIntE, FWriteIntM, FWriteIntW, // Write integer register enable - output logic [`XLEN-1:0] FWriteDataM, // Data to be written to memory + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory + output logic [`XLEN-1:0] FIntResM, output logic FDivBusyE, // Is the divison/sqrt unit busy output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + output logic [4:0] SetFflagsM, // FPU flags output logic [`XLEN-1:0] FPUResultW); // FPU result // control logic signal instantiation @@ -51,24 +50,27 @@ module fpu ( logic FDivStartD, FDivStartE; // Start division logic FWriteIntD; // Write to integer register logic FOutputInput2D, FOutputInput2E; // Put Input2 in Input1 if a store instruction - logic [1:0] FMemRWD, FMemRWE; // Read and write enable for memory - logic [1:0] FForwardInput1D, FForwardInput1E; // Input1 forwarding mux control signal - logic [1:0] FForwardInput2D, FForwardInput2E; // Input2 forwarding mux control signal - logic FForwardInput3D, FForwardInput3E; // Input3 forwarding mux control signal - logic FInput2UsedD; // Is input 2 used - logic FInput3UsedD; // Is input 3 used + logic [1:0] FMemRWD; // Read and write enable for memory + logic [1:0] ForwardXD, ForwardXE; // Input1 forwarding mux control signal + logic [1:0] ForwardYD, ForwardYE; // Input2 forwarding mux control signal + logic [1:0] ForwardZD, ForwardZE; // Input3 forwarding mux control signal + logic SrcYUsedD; // Is input 2 used + logic SrcZUsedD; // Is input 3 used logic [2:0] FResultSelD, FResultSelE, FResultSelM, FResultSelW; // Select FP result - logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component - logic SelLoadInputE, SelLoadInputM; // Select which adress to load when single precision + logic [3:0] FOpCtrlD, FOpCtrlE, FOpCtrlM, FOpCtrlW; // Select which opperation to do in each component + logic [1:0] FResSelD, FResSelE, FResSelM; + logic [1:0] FIntResSelD, FIntResSelE, FIntResSelM; + logic [4:0] Adr1E, Adr2E, Adr3E; - // regfile signals //*** KEP lint warning - changed `XLEN-1 to 63 + // regfile signals logic [4:0] RdE, RdM, RdW; // what adress to write to // ***Can take from ieu insted of pipelining logic [63:0] FWDM; // Write data for FP register logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] FInput1E, FInput1M, FInput1W, FInput1tmpE; // Input 1 to the various units (after forwarding) - logic [63:0] FInput2E, FInput2M; // Input 2 to the various units (after forwarding) - logic [63:0] FInput3E, FInput3M; // Input 3 to the various units (after forwarding) + logic [63:0] SrcXE, SrcXM, SrcXW; // Input 1 to the various units (after forwarding) + logic [`XLEN-1:0] SrcXMAligned; + logic [63:0] SrcYE, SrcYM, SrcYW; // Input 2 to the various units (after forwarding) + logic [63:0] SrcZE, SrcZM; // Input 3 to the various units (after forwarding) logic [63:0] FLoadResultW, FLoadStoreResultM, FLoadStoreResultW; // Result for load, store, and move to int-reg instructions // div/sqrt signals @@ -123,19 +125,14 @@ module fpu ( logic [4:0] FAddFlagsM, FAddFlagsW; // cmp signals - logic [7:0] WE, WM; - logic [7:0] XE, XM; - logic ANaNE, ANaNM; - logic BNaNE, BNaNM; - logic AzeroE, AzeroM; - logic BzeroE, BzeroM; - logic CmpInvalidM, CmpInvalidW; - logic [1:0] CmpFCCM, CmpFCCW; - logic [63:0] FCmpResultM, FCmpResultW; + logic CmpInvalidE, CmpInvalidM, CmpInvalidW; + logic [63:0] FCmpResultE, FCmpResultM, FCmpResultW; // fsgn signals logic [63:0] SgnResultE, SgnResultM, SgnResultW; logic [4:0] SgnFlagsE, SgnFlagsM, SgnFlagsW; + logic [63:0] FResM, FResW; + logic FFlgM, FFlgW; // instantiation of W stage regfile signals logic [63:0] AlignedSrcAM, ForwardSrcAM, SrcAW; @@ -147,31 +144,9 @@ module fpu ( logic [63:0] FPUResult64W, FPUResult64E; logic [4:0] FPUFlagsW; - // pipeline control logic - logic PipeEnableDE; - logic PipeEnableEM; - logic PipeEnableMW; - logic PipeClearDE; - logic PipeClearEM; - logic PipeClearMW; - - // temporarily assign pipe clear and enable signals - // to never flush & always be running - localparam PipeClear = 1'b0; - localparam PipeEnable = 1'b1; - always_comb begin - PipeEnableDE = ~StallE; - PipeEnableEM = ~StallM; - PipeEnableMW = ~StallW; - PipeClearDE = FlushE; - PipeClearEM = FlushM; - PipeClearMW = FlushW; - end //DECODE STAGE - // Hazard unit for FPU - fpuhazard hazard(.Adr1(InstrD[19:15]), .Adr2(InstrD[24:20]), .Adr3(InstrD[31:27]), .*); // top-level controller for FPU fctrl ctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .*); @@ -185,40 +160,33 @@ module fpu ( //***************** // fpregfile D/E pipe registers //***************** - flopenrc #(64) DEReg1(clk, reset, PipeClearDE, PipeEnableDE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, PipeClearDE, PipeEnableDE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, PipeClearDE, PipeEnableDE, FRD3D, FRD3E); + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); //***************** // other D/E pipe registers //***************** - flopenrc #(1) DEReg4(clk, reset, PipeClearDE, PipeEnableDE, FWriteEnD, FWriteEnE); - flopenrc #(3) DEReg5(clk, reset, PipeClearDE, PipeEnableDE, FResultSelD, FResultSelE); - flopenrc #(3) DEReg6(clk, reset, PipeClearDE, PipeEnableDE, FrmD, FrmE); - flopenrc #(1) DEReg7(clk, reset, PipeClearDE, PipeEnableDE, FmtD, FmtE); - flopenrc #(5) DEReg8(clk, reset, PipeClearDE, PipeEnableDE, InstrD[11:7], RdE); - flopenrc #(4) DEReg9(clk, reset, PipeClearDE, PipeEnableDE, FOpCtrlD, FOpCtrlE); - flopenrc #(1) DEReg10(clk, reset, PipeClearDE, PipeEnableDE, FDivStartD, FDivStartE); - flopenrc #(2) DEReg11(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput1D, FForwardInput1E); - flopenrc #(2) DEReg12(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput2D, FForwardInput2E); - flopenrc #(1) DEReg13(clk, reset, PipeClearDE, PipeEnableDE, FForwardInput3D, FForwardInput3E); - flopenrc #(64) DEReg14(clk, reset, PipeClearDE, PipeEnableDE, FPUResult64W, FPUResult64E); - flopenrc #(1) DEReg15(clk, reset, PipeClearDE, PipeEnableDE, FWriteIntD, FWriteIntE); - flopenrc #(1) DEReg16(clk, reset, PipeClearDE, PipeEnableDE, FOutputInput2D, FOutputInput2E); - flopenrc #(2) DEReg17(clk, reset, PipeClearDE, PipeEnableDE, FMemRWD, FMemRWE); - flopenrc #(1) DEReg18(clk, reset, PipeClearDE, PipeEnableDE, InstrD[15], SelLoadInputE); - + flopenrc #(1) CtrlRegE1(clk, reset, FlushE, ~StallE, FDivStartD, FDivStartE); + flopenrc #(15) CtrlRegE2(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(22) DECtrlReg(clk, reset, FlushE, ~StallE, + {FWriteEnD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, InstrD[11:7], FOpCtrlD, FWriteIntD}, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}); + //EXECUTION STAGE - // input muxs for forwarding - mux2 #(64) SrcAMuxForward({SrcAM[31:0], 32'b0}, {SrcAM, {64-`XLEN{1'b0}}}, FmtM, ForwardSrcAM); - mux4 #(64) FInput1Emux(FRD1E, FPUResult64W, FPUResult64E, ForwardSrcAM, FForwardInput1E, FInput1tmpE); - mux3 #(64) FInput2Emux(FRD2E, FPUResult64W, FPUResult64E, FForwardInput2E, FInput2E); - mux2 #(64) FInput3Emux(FRD3E, FPUResult64E, FForwardInput3E, FInput3E); - mux2 #(64) FOutputInput2mux(FInput1tmpE, FInput2E, FOutputInput2E, FInput1E); + // Hazard unit for FPU + fpuhazard hazard(.*); + + // forwarding muxs + mux3 #(64) fxemux(FRD1E, FPUResult64W, FResM, ForwardXE, SrcXE); + mux3 #(64) fyemux(FRD2E, FPUResult64W, FResM, ForwardYE, SrcYE); + mux3 #(64) fzemux(FRD3E, FPUResult64W, FResM, ForwardZE, SrcZE); + // first of two-stage instance of floating-point fused multiply-add unit - fma1 fma1 (.X(FInput1E), .Y(FInput2E), .Z(FInput3E), .FOpCtrlE(FOpCtrlE[2:0]),.*); + fma1 fma1 (.X(SrcXE), .Y(SrcYE), .Z(SrcZE), .FOpCtrlE(FOpCtrlE[2:0]),.*); // first and only instance of floating-point divider logic fpdivClk; @@ -229,193 +197,181 @@ module fpu ( .ECLK(fpdivClk)); // capture the inputs for div/sqrt - flopenrc #(64) reg_input1 (.d(FInput1E), .q(DivInput1E), + flopenrc #(64) reg_input1 (.d(SrcXE), .q(DivInput1E), .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d(FInput2E), .q(DivInput2E), + flopenrc #(64) reg_input2 (.d(SrcYE), .q(DivInput2E), .en(~HoldInputs), .clear(FDivSqrtDoneE), .reset(reset), .clk(clk)); fpdiv fpdivsqrt (.DivOpType(FOpCtrlE[0]), .clk(fpdivClk), .FmtE(~FmtE), .*); + + // first of two-stage instance of floating-point add/cvt unit fpuaddcvt1 fpadd1 (.*); // first of two-stage instance of floating-point comparator - fpucmp1 fpcmp1 (WE, XE, ANaNE, BNaNE, AzeroE, BzeroE, FInput1E, FInput2E, FOpCtrlE[1:0]); + fpucmp1 fpcmp1 (SrcXE, SrcYE, FOpCtrlE[2:0], FmtE, CmpInvalidE, FCmpResultE); // first and only instance of floating-point sign converter fpusgn fpsgn (.SgnOpCodeE(FOpCtrlE[1:0]),.*); // first and only instance of floating-point classify unit fpuclassify fpuclass (.*); + + // output for store instructions + assign FWriteDataE = FmtE ? SrcYE[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcYE[63:32]}; //***************** //fpregfile D/E pipe registers //***************** - flopenrc #(64) EMFpReg1(clk, reset, PipeClearEM, PipeEnableEM, FInput1E, FInput1M); - flopenrc #(64) EMFpReg2(clk, reset, PipeClearEM, PipeEnableEM, FInput2E, FInput2M); - flopenrc #(64) EMFpReg3(clk, reset, PipeClearEM, PipeEnableEM, FInput3E, FInput3M); + flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, SrcXE, SrcXM); + flopenrc #(64) EMFpReg2(clk, reset, FlushM, ~StallM, SrcYE, SrcYM); + flopenrc #(64) EMFpReg3(clk, reset, FlushM, ~StallM, SrcZE, SrcZM); //***************** // fma E/M pipe registers //***************** - flopenrc #(106) EMRegFma3(clk, reset, PipeClearEM, PipeEnableEM, ProdManE, ProdManM); - flopenrc #(162) EMRegFma4(clk, reset, PipeClearEM, PipeEnableEM, AlignedAddendE, AlignedAddendM); - flopenrc #(13) EMRegFma6(clk, reset, PipeClearEM, PipeEnableEM, ProdExpE, ProdExpM); - flopenrc #(1) EMRegFma7(clk, reset, PipeClearEM, PipeEnableEM, AddendStickyE, AddendStickyM); - flopenrc #(1) EMRegFma8(clk, reset, PipeClearEM, PipeEnableEM, KillProdE, KillProdM); - flopenrc #(1) EMRegFma10(clk, reset, PipeClearEM, PipeEnableEM, XZeroE, XZeroM); - flopenrc #(1) EMRegFma11(clk, reset, PipeClearEM, PipeEnableEM, YZeroE, YZeroM); - flopenrc #(1) EMRegFma12(clk, reset, PipeClearEM, PipeEnableEM, ZZeroE, ZZeroM); - flopenrc #(1) EMRegFma16(clk, reset, PipeClearEM, PipeEnableEM, XInfE, XInfM); - flopenrc #(1) EMRegFma17(clk, reset, PipeClearEM, PipeEnableEM, YInfE, YInfM); - flopenrc #(1) EMRegFma18(clk, reset, PipeClearEM, PipeEnableEM, ZInfE, ZInfM); - flopenrc #(1) EMRegFma19(clk, reset, PipeClearEM, PipeEnableEM, XNaNE, XNaNM); - flopenrc #(1) EMRegFma20(clk, reset, PipeClearEM, PipeEnableEM, YNaNE, YNaNM); - flopenrc #(1) EMRegFma21(clk, reset, PipeClearEM, PipeEnableEM, ZNaNE, ZNaNM); + flopenrc #(106) EMRegFma3(clk, reset, FlushM, ~StallM, ProdManE, ProdManM); + flopenrc #(162) EMRegFma4(clk, reset, FlushM, ~StallM, AlignedAddendE, AlignedAddendM); + flopenrc #(13) EMRegFma6(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(1) EMRegFma7(clk, reset, FlushM, ~StallM, AddendStickyE, AddendStickyM); + flopenrc #(1) EMRegFma8(clk, reset, FlushM, ~StallM, KillProdE, KillProdM); + flopenrc #(1) EMRegFma10(clk, reset, FlushM, ~StallM, XZeroE, XZeroM); + flopenrc #(1) EMRegFma11(clk, reset, FlushM, ~StallM, YZeroE, YZeroM); + flopenrc #(1) EMRegFma12(clk, reset, FlushM, ~StallM, ZZeroE, ZZeroM); + flopenrc #(1) EMRegFma16(clk, reset, FlushM, ~StallM, XInfE, XInfM); + flopenrc #(1) EMRegFma17(clk, reset, FlushM, ~StallM, YInfE, YInfM); + flopenrc #(1) EMRegFma18(clk, reset, FlushM, ~StallM, ZInfE, ZInfM); + flopenrc #(1) EMRegFma19(clk, reset, FlushM, ~StallM, XNaNE, XNaNM); + flopenrc #(1) EMRegFma20(clk, reset, FlushM, ~StallM, YNaNE, YNaNM); + flopenrc #(1) EMRegFma21(clk, reset, FlushM, ~StallM, ZNaNE, ZNaNM); //***************** // fpadd E/M pipe registers //***************** - flopenrc #(64) EMRegAdd1(clk, reset, PipeClearEM, PipeEnableEM, AddSumE, AddSumM); - flopenrc #(64) EMRegAdd2(clk, reset, PipeClearEM, PipeEnableEM, AddSumTcE, AddSumTcM); - flopenrc #(4) EMRegAdd3(clk, reset, PipeClearEM, PipeEnableEM, AddSelInvE, AddSelInvM); - flopenrc #(11) EMRegAdd4(clk, reset, PipeClearEM, PipeEnableEM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(1) EMRegAdd5(clk, reset, PipeClearEM, PipeEnableEM, AddCorrSignE, AddCorrSignM); - flopenrc #(1) EMRegAdd6(clk, reset, PipeClearEM, PipeEnableEM, AddOp1NormE, AddOp1NormM); - flopenrc #(1) EMRegAdd7(clk, reset, PipeClearEM, PipeEnableEM, AddOp2NormE, AddOp2NormM); - flopenrc #(1) EMRegAdd8(clk, reset, PipeClearEM, PipeEnableEM, AddOpANormE, AddOpANormM); - flopenrc #(1) EMRegAdd9(clk, reset, PipeClearEM, PipeEnableEM, AddOpBNormE, AddOpBNormM); - flopenrc #(1) EMRegAdd10(clk, reset, PipeClearEM, PipeEnableEM, AddInvalidE, AddInvalidM); - flopenrc #(1) EMRegAdd11(clk, reset, PipeClearEM, PipeEnableEM, AddDenormInE, AddDenormInM); - flopenrc #(1) EMRegAdd12(clk, reset, PipeClearEM, PipeEnableEM, AddConvertE, AddConvertM); - flopenrc #(1) EMRegAdd13(clk, reset, PipeClearEM, PipeEnableEM, AddSwapE, AddSwapM); - flopenrc #(1) EMRegAdd14(clk, reset, PipeClearEM, PipeEnableEM, AddNormOvflowE, AddNormOvflowM); - flopenrc #(1) EMRegAdd15(clk, reset, PipeClearEM, PipeEnableEM, AddSignAE, AddSignAM); - flopenrc #(64) EMRegAdd16(clk, reset, PipeClearEM, PipeEnableEM, AddFloat1E, AddFloat1M); - flopenrc #(64) EMRegAdd17(clk, reset, PipeClearEM, PipeEnableEM, AddFloat2E, AddFloat2M); - flopenrc #(12) EMRegAdd18(clk, reset, PipeClearEM, PipeEnableEM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd19(clk, reset, PipeClearEM, PipeEnableEM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd20(clk, reset, PipeClearEM, PipeEnableEM, AddExponentE, AddExponentM); - flopenrc #(3) EMRegAdd23(clk, reset, PipeClearEM, PipeEnableEM, AddRmE, AddRmM); - flopenrc #(4) EMRegAdd24(clk, reset, PipeClearEM, PipeEnableEM, AddOpTypeE, AddOpTypeM); - flopenrc #(1) EMRegAdd25(clk, reset, PipeClearEM, PipeEnableEM, AddPE, AddPM); - flopenrc #(1) EMRegAdd26(clk, reset, PipeClearEM, PipeEnableEM, AddOvEnE, AddOvEnM); - flopenrc #(1) EMRegAdd27(clk, reset, PipeClearEM, PipeEnableEM, AddUnEnE, AddUnEnM); + flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); + flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); + flopenrc #(4) EMRegAdd3(clk, reset, FlushM, ~StallM, AddSelInvE, AddSelInvM); + flopenrc #(11) EMRegAdd4(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); + flopenrc #(1) EMRegAdd5(clk, reset, FlushM, ~StallM, AddCorrSignE, AddCorrSignM); + flopenrc #(1) EMRegAdd6(clk, reset, FlushM, ~StallM, AddOp1NormE, AddOp1NormM); + flopenrc #(1) EMRegAdd7(clk, reset, FlushM, ~StallM, AddOp2NormE, AddOp2NormM); + flopenrc #(1) EMRegAdd8(clk, reset, FlushM, ~StallM, AddOpANormE, AddOpANormM); + flopenrc #(1) EMRegAdd9(clk, reset, FlushM, ~StallM, AddOpBNormE, AddOpBNormM); + flopenrc #(1) EMRegAdd10(clk, reset, FlushM, ~StallM, AddInvalidE, AddInvalidM); + flopenrc #(1) EMRegAdd11(clk, reset, FlushM, ~StallM, AddDenormInE, AddDenormInM); + flopenrc #(1) EMRegAdd12(clk, reset, FlushM, ~StallM, AddConvertE, AddConvertM); + flopenrc #(1) EMRegAdd13(clk, reset, FlushM, ~StallM, AddSwapE, AddSwapM); + flopenrc #(1) EMRegAdd14(clk, reset, FlushM, ~StallM, AddNormOvflowE, AddNormOvflowM); + flopenrc #(1) EMRegAdd15(clk, reset, FlushM, ~StallM, AddSignAE, AddSignAM); + flopenrc #(64) EMRegAdd16(clk, reset, FlushM, ~StallM, AddFloat1E, AddFloat1M); + flopenrc #(64) EMRegAdd17(clk, reset, FlushM, ~StallM, AddFloat2E, AddFloat2M); + flopenrc #(12) EMRegAdd18(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); + flopenrc #(12) EMRegAdd19(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); + flopenrc #(11) EMRegAdd20(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); + flopenrc #(3) EMRegAdd23(clk, reset, FlushM, ~StallM, AddRmE, AddRmM); + flopenrc #(4) EMRegAdd24(clk, reset, FlushM, ~StallM, AddOpTypeE, AddOpTypeM); + flopenrc #(1) EMRegAdd25(clk, reset, FlushM, ~StallM, AddPE, AddPM); + flopenrc #(1) EMRegAdd26(clk, reset, FlushM, ~StallM, AddOvEnE, AddOvEnM); + flopenrc #(1) EMRegAdd27(clk, reset, FlushM, ~StallM, AddUnEnE, AddUnEnM); //***************** // fpcmp E/M pipe registers //***************** - flopenrc #(8) EMRegCmp1(clk, reset, PipeClearEM, PipeEnableEM, WE, WM); - flopenrc #(8) EMRegCmp2(clk, reset, PipeClearEM, PipeEnableEM, XE, XM); - flopenrc #(1) EMRegcmp3(clk, reset, PipeClearEM, PipeEnableEM, ANaNE, ANaNM); - flopenrc #(1) EMRegCmp4(clk, reset, PipeClearEM, PipeEnableEM, BNaNE, BNaNM); - flopenrc #(1) EMRegCmp5(clk, reset, PipeClearEM, PipeEnableEM, AzeroE, AzeroM); - flopenrc #(1) EMRegCmp6(clk, reset, PipeClearEM, PipeEnableEM, BzeroE, BzeroM); + flopenrc #(1) EMRegCmp1(clk, reset, FlushM, ~StallM, CmpInvalidE, CmpInvalidM); + flopenrc #(64) EMRegCmp3(clk, reset, FlushM, ~StallM, FCmpResultE, FCmpResultM); - // put this in for the event we want to delay fsgn - will otherwise bypass //***************** // fpsgn E/M pipe registers //***************** - flopenrc #(64) EMRegSgn2(clk, reset, PipeClearEM, PipeEnableEM, SgnResultE, SgnResultM); - flopenrc #(5) EMRegSgn3(clk, reset, PipeClearEM, PipeEnableEM, SgnFlagsE, SgnFlagsM); + flopenrc #(64) EMRegSgn2(clk, reset, FlushM, ~StallM, SgnResultE, SgnResultM); + flopenrc #(5) EMRegSgn3(clk, reset, FlushM, ~StallM, SgnFlagsE, SgnFlagsM); //***************** // other E/M pipe registers //***************** - flopenrc #(1) EMReg1(clk, reset, PipeClearEM, PipeEnableEM, FWriteEnE, FWriteEnM); - flopenrc #(3) EMReg2(clk, reset, PipeClearEM, PipeEnableEM, FResultSelE, FResultSelM); - flopenrc #(3) EMReg3(clk, reset, PipeClearEM, PipeEnableEM, FrmE, FrmM); - flopenrc #(1) EMReg4(clk, reset, PipeClearEM, PipeEnableEM, FmtE, FmtM); - flopenrc #(5) EMReg5(clk, reset, PipeClearEM, PipeEnableEM, RdE, RdM); - flopenrc #(4) EMReg6(clk, reset, PipeClearEM, PipeEnableEM, FOpCtrlE, FOpCtrlM); - flopenrc #(1) EMReg7(clk, reset, PipeClearEM, PipeEnableEM, FWriteIntE, FWriteIntM); - flopenrc #(2) EMReg8(clk, reset, PipeClearEM, PipeEnableEM, FMemRWE, FMemRWM); - flopenrc #(1) EMReg9(clk, reset, PipeClearEM, PipeEnableEM, SelLoadInputE, SelLoadInputM); + flopenrc #(22) EMCtrlReg(clk, reset, FlushM, ~StallM, + {FWriteEnE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, RdE, FOpCtrlE, FWriteIntE}, + {FWriteEnM, FResultSelM, FResSelM, FIntResSelM, FrmM, FmtM, RdM, FOpCtrlM, FWriteIntM}); //***************** // fpuclassify E/M pipe registers //***************** - flopenrc #(64) EMRegClass(clk, reset, PipeClearEM, PipeEnableEM, ClassResultE, ClassResultM); + flopenrc #(64) EMRegClass(clk, reset, FlushM, ~StallM, ClassResultE, ClassResultM); //BEGIN MEMORY STAGE - assign FWriteDataM = FmtM ? FInput1M[63:64-`XLEN] : {{`XLEN-32{1'b0}}, FInput1M[63:32]}; - //adjecent adress values are sent to the FPU, select the correct one - // -imm is 80000 most of the time vs the error one which is 00000 - // mux3 #(64) FLoadResultMux({HRDATA[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); - - fma2 fma2(.X(FInput1M), .Y(FInput2M), .Z(FInput3M), .FOpCtrlM(FOpCtrlM[2:0]), .*); + mux3 #(64) FResMux(AlignedSrcAM, SgnResultM, FCmpResultM, FResSelM, FResM); + assign FFlgM = CmpInvalidM & FResSelM[1]; + + assign SrcXMAligned = FmtM ? SrcXM[63:64-`XLEN] : {{`XLEN-32{1'b0}}, SrcXM[63:32]}; + mux3 #(`XLEN) IntResMux(FCmpResultM[`XLEN-1:0], SrcXMAligned, ClassResultM[`XLEN-1:0], FIntResSelM, FIntResM); + + // second instance of two-stage FMA unit + fma2 fma2(.X(SrcXM), .Y(SrcYM), .Z(SrcZM), .FOpCtrlM(FOpCtrlM[2:0]), .*); // second instance of two-stage floating-point add/cvt unit fpuaddcvt2 fpadd2 (.*); - // second instance of two-stage floating-point comparator - fpucmp2 fpcmp2 (.Invalid(CmpInvalidM), .FCC(CmpFCCM), .ANaN(ANaNM), .BNaN(BNaNM), .Azero(AzeroM), - .Bzero(BzeroM), .w(WM), .x(XM), .Sel({1'b0, FmtM}), .op1(FInput1M), .op2(FInput2M), .*); - // Align SrcA to MSB when single precicion mux2 #(64) SrcAMux({SrcAM[31:0], 32'b0}, {{64-`XLEN{1'b0}}, SrcAM}, FmtM, AlignedSrcAM); - //***************** //fpregfile M/W pipe registers //***************** - flopenrc #(64) MWFpReg1(clk, reset, PipeClearMW, PipeEnableMW, FInput1M, FInput1W); + flopenrc #(64) MWFpReg1(clk, reset, FlushW, ~StallW, SrcXM, SrcXW); + flopenrc #(64) MWFpReg2(clk, reset, FlushW, ~StallW, SrcYM, SrcYW); //***************** // fma M/W pipe registers //***************** - flopenrc #(64) MWRegFma1(clk, reset, PipeClearMW, PipeEnableMW, FmaResultM, FmaResultW); - flopenrc #(5) MWRegFma2(clk, reset, PipeClearMW, PipeEnableMW, FmaFlagsM, FmaFlagsW); + flopenrc #(64) MWRegFma1(clk, reset, FlushW, ~StallW, FmaResultM, FmaResultW); + flopenrc #(5) MWRegFma2(clk, reset, FlushW, ~StallW, FmaFlagsM, FmaFlagsW); //***************** // fpdiv M/W pipe registers //***************** - flopenrc #(64) MWRegDiv1(clk, reset, PipeClearMW, PipeEnableMW, FDivResultM, FDivResultW); - flopenrc #(5) MWRegDiv2(clk, reset, PipeClearMW, PipeEnableMW, FDivFlagsM, FDivFlagsW); - flopenrc #(1) MWRegDiv3(clk, reset, PipeClearMW, PipeEnableMW, DivDenormM, DivDenormW); + flopenrc #(64) MWRegDiv1(clk, reset, FlushW, ~StallW, FDivResultM, FDivResultW); + flopenrc #(5) MWRegDiv2(clk, reset, FlushW, ~StallW, FDivFlagsM, FDivFlagsW); + flopenrc #(1) MWRegDiv3(clk, reset, FlushW, ~StallW, DivDenormM, DivDenormW); //***************** // fpadd M/W pipe registers //***************** - flopenrc #(64) MWRegAdd1(clk, reset, PipeClearMW, PipeEnableMW, FAddResultM, FAddResultW); - flopenrc #(5) MWRegAdd2(clk, reset, PipeClearMW, PipeEnableMW, FAddFlagsM, FAddFlagsW); + flopenrc #(64) MWRegAdd1(clk, reset, FlushW, ~StallW, FAddResultM, FAddResultW); + flopenrc #(5) MWRegAdd2(clk, reset, FlushW, ~StallW, FAddFlagsM, FAddFlagsW); //***************** // fpcmp M/W pipe registers //***************** - flopenrc #(1) MWRegCmp1(clk, reset, PipeClearMW, PipeEnableMW, CmpInvalidM, CmpInvalidW); - flopenrc #(2) MWRegCmp2(clk, reset, PipeClearMW, PipeEnableMW, CmpFCCM, CmpFCCW); - flopenrc #(64) MWRegCmp3(clk, reset, PipeClearMW, PipeEnableMW, FCmpResultM, FCmpResultW); + flopenrc #(1) MWRegCmp1(clk, reset, FlushW, ~StallW, CmpInvalidM, CmpInvalidW); + // flopenrc #(2) MWRegCmp2(clk, reset, FlushW, ~StallW, CmpFCCM, CmpFCCW); + flopenrc #(64) MWRegCmp3(clk, reset, FlushW, ~StallW, FCmpResultM, FCmpResultW); //***************** // fpsgn M/W pipe registers //***************** - flopenrc #(64) MWRegSgn1(clk, reset, PipeClearMW, PipeEnableMW, SgnResultM, SgnResultW); - flopenrc #(5) MWRegSgn2(clk, reset, PipeClearMW, PipeEnableMW, SgnFlagsM, SgnFlagsW); + flopenrc #(64) MWRegSgn1(clk, reset, FlushW, ~StallW, SgnResultM, SgnResultW); + flopenrc #(5) MWRegSgn2(clk, reset, FlushW, ~StallW, SgnFlagsM, SgnFlagsW); //***************** // other M/W pipe registers //***************** - flopenrc #(1) MWReg1(clk, reset, PipeClearMW, PipeEnableMW, FWriteEnM, FWriteEnW); - flopenrc #(3) MWReg2(clk, reset, PipeClearMW, PipeEnableMW, FResultSelM, FResultSelW); - flopenrc #(1) MWReg3(clk, reset, PipeClearMW, PipeEnableMW, FmtM, FmtW); - flopenrc #(5) MWReg4(clk, reset, PipeClearMW, PipeEnableMW, RdM, RdW); - flopenrc #(64) MWReg5(clk, reset, PipeClearMW, PipeEnableMW, AlignedSrcAM, SrcAW); - // flopenrc #(64) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FLoadStoreResultM, FLoadStoreResultW); - flopenrc #(1) MWReg7(clk, reset, PipeClearMW, PipeEnableMW, FWriteIntM, FWriteIntW); - flopenrc #(4) MWReg6(clk, reset, PipeClearMW, PipeEnableMW, FOpCtrlM, FOpCtrlW); + flopenrc #(11) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FWriteEnM, FResultSelM, RdM, FmtM, FWriteIntM}, + {FWriteEnW, FResultSelW, RdW, FmtW, FWriteIntW}); //***************** // fpuclassify M/W pipe registers //***************** - flopenrc #(64) MWRegClass(clk, reset, PipeClearMW, PipeEnableMW, ClassResultM, ClassResultW); + flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, ClassResultM, ClassResultW); + flopenrc #(64) MWRegClass2(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(1) MWRegClass1(clk, reset, FlushW, ~StallW, FFlgM, FFlgW); @@ -424,14 +380,6 @@ module fpu ( //######################################### // BEGIN WRITEBACK STAGE //######################################### - - - // mux3 #(64) FLoadResultMux({ReadD[31:0], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA[`AHBW-1:`AHBW-32], {64-`AHBW+(`XLEN-32){1'b0}}}, {HRDATA, {64-`AHBW{1'b0}}}, {FmtM, SelLoadInputM}, FLoadResultM); - // mux2 #(64) FLoadStoreResultMux(FLoadResultM, FInput1M, |FOpCtrlM[2:1], FLoadStoreResultM); - //***RV32D needs to give two bus transactions - mux2 #(64) FLoadResultMux({ReadDataW[31:0], {32{1'b0}}}, {ReadDataW, {64-`XLEN{1'b0}}}, FmtW, FLoadResultW); - mux2 #(64) FLoadStoreResultMux(FLoadResultW, FInput1W, |FOpCtrlW[2:1], FLoadStoreResultW); - @@ -440,47 +388,26 @@ module fpu ( always_comb begin case (FResultSelW) - // div/sqrt - 3'b000 : FPUFlagsW = FDivFlagsW; - // cmp - 3'b001 : FPUFlagsW = {CmpInvalidW, 4'b0}; - //fma/mult - 3'b010 : FPUFlagsW = FmaFlagsW; - // sgn inj - 3'b011 : FPUFlagsW = SgnFlagsW; - // add/sub/cnvt - 3'b100 : FPUFlagsW = FAddFlagsW; - // classify - 3'b101 : FPUFlagsW = 5'b0; - // output SrcAW - 3'b110 : FPUFlagsW = 5'b0; - // output FRD1 - 3'b111 : FPUFlagsW = 5'b0; + 3'b000 : FPUFlagsW = 5'b0; + 3'b001 : FPUFlagsW = FmaFlagsW; + 3'b010 : FPUFlagsW = FAddFlagsW; + 3'b011 : FPUFlagsW = FDivFlagsW; + 3'b100 : FPUFlagsW = {4'b0,FFlgW}; default : FPUFlagsW = 5'bxxxxx; endcase end - + always_comb begin case (FResultSelW) - // div/sqrt - 3'b000 : FPUResult64W = FDivResultW; - // cmp - 3'b001 : FPUResult64W = FCmpResultW; - //fma/mult - 3'b010 : FPUResult64W = FmaResultW; - // sgn inj - 3'b011 : FPUResult64W = SgnResultW; - // add/sub/cnvt - 3'b100 : FPUResult64W = FAddResultW; - // classify - 3'b101 : FPUResult64W = ClassResultW; - // output SrcAW - 3'b110 : FPUResult64W = SrcAW; - // Load/Store/Move to FP-register - 3'b111 : FPUResult64W = FLoadStoreResultW; - default : FPUResult64W = {64{1'bx}}; + 3'b000 : FPUResult64W = FmtW ? {ReadDataW, {64-`XLEN{1'b0}}} : {ReadDataW[31:0], 32'b0}; + 3'b001 : FPUResult64W = FmaResultW; + 3'b010 : FPUResult64W = FAddResultW; + 3'b011 : FPUResult64W = FDivResultW; + 3'b100 : FPUResult64W = FResW; + default : FPUResult64W = 64'bxxxxx; endcase - end // always_comb + end + // interface between XLEN size datapath and double-precision sized // floating-point results diff --git a/wally-pipelined/src/fpu/fpuaddcvt1.sv b/wally-pipelined/src/fpu/fpuaddcvt1.sv index febd47d1..8f045dcd 100755 --- a/wally-pipelined/src/fpu/fpuaddcvt1.sv +++ b/wally-pipelined/src/fpu/fpuaddcvt1.sv @@ -27,10 +27,10 @@ // -module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, FInput1E, FInput2E, FOpCtrlE, FmtE); +module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, AddOp1NormE, AddOp2NormE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddConvertE, AddSwapE, AddNormOvflowE, AddSignAE, AddFloat1E, AddFloat2E, AddExp1DenormE, AddExp2DenormE, AddExponentE, SrcXE, SrcYE, FOpCtrlE, FmtE); - input logic [63:0] FInput1E; // 1st input operand (A) - input logic [63:0] FInput2E; // 2nd input operand (B) + input logic [63:0] SrcXE; // 1st input operand (A) + input logic [63:0] SrcYE; // 2nd input operand (B) input logic [3:0] FOpCtrlE; // Function opcode input logic FmtE; // Result Precision (1 for double, 0 for single) @@ -81,12 +81,12 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, // and the sign of the first operand is set appropratiately based on // if the operation is absolute value or negation. - convert_inputs conv1 (AddFloat1E, AddFloat2E, FInput1E, FInput2E, FOpCtrlE, P); + convert_inputs conv1 (AddFloat1E, AddFloat2E, SrcXE, SrcYE, FOpCtrlE, P); // Test for exceptions and return the "Invalid Operation" and // "Denormalized" Input Flags. The "AddSelInvE" is used in // the third pipeline stage to select the result. Also, AddOp1NormE - // and AddOp2NormE are one if FInput1E and FInput2E are not zero or denormalized. + // and AddOp2NormE are one if SrcXE and SrcYE are not zero or denormalized. // sub is one if the effective operation is subtaction. exception exc1 (AddSelInvE, AddInvalidE, AddDenormInE, AddOp1NormE, AddOp2NormE, sub, @@ -159,8 +159,8 @@ module fpuaddcvt1 (AddSumE, AddSumTcE, AddSelInvE, AddExpPostSumE, AddCorrSignE, // Place either the sign-extened 32-bit value or the original 64-bit value // into IntValue (to be used for integer to floating point conversion) - assign IntValue [31:0] = FInput1E[31:0]; - assign IntValue [63:32] = FOpCtrlE[0] ? {32{FInput1E[31]}} : FInput1E[63:32]; + assign IntValue [31:0] = SrcXE[31:0]; + assign IntValue [63:32] = FOpCtrlE[0] ? {32{SrcXE[31]}} : SrcXE[63:32]; // If doing an integer to floating point conversion, mantissaA3 is set to // IntVal and the prenomalized exponent is set to 1084. Otherwise, diff --git a/wally-pipelined/src/fpu/fpuclassify.sv b/wally-pipelined/src/fpu/fpuclassify.sv index 1000bdf4..b320b2f0 100644 --- a/wally-pipelined/src/fpu/fpuclassify.sv +++ b/wally-pipelined/src/fpu/fpuclassify.sv @@ -1,7 +1,8 @@ + `include "wally-config.vh" module fpuclassify ( - input logic [63:0] FInput1E, + input logic [63:0] SrcXE, input logic FmtE, // 0-single 1-double output logic [63:0] ClassResultE ); @@ -13,9 +14,9 @@ module fpuclassify ( logic ExpNotZero, ExpOnes, ManNotZero, ExpZero, ManZero, FirstBitMan; // single and double precision layouts - assign single = FInput1E[63:32]; - assign double = FInput1E; - assign sign = FInput1E[63]; + assign single = SrcXE[63:32]; + assign double = SrcXE; + assign sign = SrcXE[63]; // basic calculations for readabillity assign ExpNotZero = FmtE ? |double[62:52] : |single[30:23]; @@ -43,10 +44,7 @@ module fpuclassify ( // bit 7 - +infinity // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassResultE = FmtE ? {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity} : - {{22{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, - ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity, {32{1'b0}}}; - + assign ClassResultE = {{54{1'b0}}, FirstBitMan&NaN, ~FirstBitMan&NaN, ~sign&infinity, ~sign&normal, + ~sign&subnormal, ~sign&zero, sign&zero, sign&subnormal, sign&normal, sign&infinity}; endmodule diff --git a/wally-pipelined/src/fpu/fpucmp1.sv b/wally-pipelined/src/fpu/fpucmp1.sv index 1cf267f2..3a8245e6 100755 --- a/wally-pipelined/src/fpu/fpucmp1.sv +++ b/wally-pipelined/src/fpu/fpucmp1.sv @@ -1,3 +1,4 @@ + // // File name : fpcomp.v // Title : Floating-Point Comparator @@ -17,9 +18,9 @@ // and correct for sign bits // // This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal Sel that indicates the type of +// signals, and a 2-bit signal FOpCtrlE that indicates the type of // operands being compared as indicated below. -// Sel Description +// FOpCtrlE Description // 00 double precision numbers // 01 single precision numbers // 10 half precision numbers @@ -37,24 +38,41 @@ // It also produces an invalid operation flag, which is one // if either of the input operands is a signaling NaN per 754 -module fpucmp1 (w, x, ANaN, BNaN, Azero, Bzero, op1, op2, Sel);///***fix Sel to match spec - - input logic [63:0] op1; - input logic [63:0] op2; - input logic [1:0] Sel; +`include "wally-config.vh" +module fpucmp1 ( + input logic [63:0] op1, + input logic [63:0] op2, + input logic [2:0] FOpCtrlE, + input logic FmtE, - output logic [7:0] w, x; - output logic ANaN, BNaN; - output logic Azero, Bzero; + + output logic Invalid, // Invalid Operation + // output logic [1:0] FCC, // Condition Codes + output logic [63:0] FCmpResultE); + // Perform magnitude comparison between the 63 least signficant bits + // of the input operands. Only LT and EQ are returned, since GT can + // be determined from these values. + logic [1:0] FCC; // Condition Codes + logic [7:0] w, x; + logic ANaN, BNaN; + logic Azero, Bzero; + logic LT; // magnitude op1 < magnitude op2 + logic EQ; // magnitude op1 = magnitude op2 + + magcompare64b_1 magcomp1 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); + + // Determine final values based on output of magnitude comparison, + // sign bits, and special case testing. + exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, FOpCtrlE); // Perform magnitude comparison between the 63 least signficant bits // of the input operands. Only LT and EQ are returned, since GT can // be determined from these values. - magcompare64b_1 magcomp2 (w, x, {~op1[63], op1[62:0]}, {~op2[63], op2[62:0]}); + magcompare64b_2 magcomp2 (LT, EQ, w, x); // Determine final values based on output of magnitude comparison, // sign bits, and special case testing. - exception_cmp_1 exc1 (ANaN, BNaN, Azero, Bzero, op1, op2, Sel); + exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .FOpCtrlE(FOpCtrlE), .A(op1), .B(op2), .*); endmodule // fpcomp @@ -178,9 +196,9 @@ module magcompare64b_1 (w, x, A, B); endmodule // magcompare64b // This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of // operands being compared as indicated below. -// Sel Description +// FOpCtrlE Description // 00 double precision numbers // 01 single precision numbers // 10 half precision numbers @@ -196,11 +214,11 @@ endmodule // magcompare64b // It also produces a invalid operation flag, which is one // if either of the input operands is a signaling NaN. -module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); +module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, FOpCtrlE); input logic [63:0] A; input logic [63:0] B; - input logic [1:0] Sel; + input logic [2:0] FOpCtrlE; logic dp, sp, hp; @@ -209,9 +227,9 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); output logic Azero; output logic Bzero; - assign dp = !Sel[1]&!Sel[0]; - assign sp = !Sel[1]&Sel[0]; - assign hp = Sel[1]&!Sel[0]; + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; // Test if A or B is NaN. assign ANaN = (A[62]&A[61]&A[60]&A[59]&A[58]) & @@ -232,3 +250,216 @@ module exception_cmp_1 (ANaN, BNaN, Azero, Bzero, A, B, Sel); assign Bzero = (B[62:0] == 63'h0); endmodule // exception_cmp +// +// File name : fpcomp.v +// Title : Floating-Point Comparator +// project : FPU +// Library : fpcomp +// Author(s) : James E. Stine +// Purpose : definition of main unit to floating-point comparator +// notes : +// +// Copyright Oklahoma State University +// +// Floating Point Comparator (Algorithm) +// +// 1.) Performs sign-extension if the inputs are 32-bit integers. +// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// 3.) Check for special cases (+0=-0, unordered, and infinite values) +// and correct for sign bits +// +// This module takes 64-bits inputs op1 and op2, VSS, and VDD +// signals, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 (unused) +// +// The comparator produces a 2-bit signal FCC, which +// indicates the result of the comparison: +// +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// +// It also produces an invalid operation flag, which is one +// if either of the input operands is a signaling NaN per 754 + + +/*module magcompare2b (LT, GT, A, B); + + input logic [1:0] A; + input logic [1:0] B; + + output logic LT; + output logic GT; + + // Determine if A < B using a minimized sum-of-products expression + assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; + // Determine if A > B using a minimized sum-of-products expression + assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; + +endmodule*/ // magcompare2b + +// 2-bit magnitude comparator +// This module compares two 2-bit values A and B. LT is '1' if A < B +// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// this version actually incorporates don't cares into the equation to +// simplify the optimization + +// module magcompare2c (LT, GT, A, B); + +// input logic [1:0] A; +// input logic [1:0] B; + +// output logic LT; +// output logic GT; + +// assign LT = B[1] | (!A[1]&B[0]); +// assign GT = A[1] | (!B[1]&A[0]); + +// endmodule // magcompare2b + +// This module compares two 64-bit values A and B. LT is '1' if A < B +// and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// This structure was modified so +// that it only does a strict magnitdude comparison, and only +// returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// of 63 2-bit magnitude comparators, followed by one OR gates. +// +// J. E. Stine and M. J. Schulte, "A combined two's complement and +// floating-point comparator," 2005 IEEE International Symposium on +// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// doi: 10.1109/ISCAS.2005.1464531 + +module magcompare64b_2 (LT, EQ, w, x); + + input logic [7:0] w; + input logic [7:0] x; + logic [3:0] y; + logic [3:0] z; + logic [1:0] a; + logic [1:0] b; + logic GT; + + output logic LT; + output logic EQ; + + magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); + magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); + magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); + magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); + + magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); + magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); + + magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); + + assign EQ = ~(LT | GT); + +endmodule // magcompare64b + +// This module takes 64-bits inputs A and B, two magnitude comparison +// flags LT_mag and EQ_mag, and a 2-bit signal FOpCtrlE that indicates the type of +// operands being compared as indicated below. +// FOpCtrlE Description +// 00 double precision numbers +// 01 single precision numbers +// 10 half precision numbers +// 11 bfloat precision numbers +// +// The comparator produces a 2-bit signal fcc, which +// indicates the result of the comparison as follows: +// fcc decscription +// 00 A = B +// 01 A < B +// 10 A > B +// 11 A and B are unordered (i.e., A or B is NaN) +// It also produces a invalid operation flag, which is one +// if either of the input operands is a signaling NaN. + +module exception_cmp_2 ( + input logic [63:0] A, + input logic [63:0] B, + input logic FmtE, + input logic LT_mag, + input logic EQ_mag, + input logic [2:0] FOpCtrlE, + + output logic invalid, + output logic [1:0] fcc, + output logic [63:0] FCmpResultE, + + input logic Azero, + input logic Bzero, + input logic ANaN, + input logic BNaN); + + logic dp; + logic sp; + logic hp; + logic ASNaN; + logic BSNaN; + logic UO; + logic GT; + logic LT; + logic EQ; + logic [62:0] sixtythreezeros = 63'h0; + + assign dp = !FOpCtrlE[1]&!FOpCtrlE[0]; + assign sp = !FOpCtrlE[1]&FOpCtrlE[0]; + assign hp = FOpCtrlE[1]&!FOpCtrlE[0]; + + // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating + // point comparison is being performed. + assign UO = (ANaN | BNaN); + + // Test if A or B is a signaling NaN. + assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); + assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); + + // If either A or B is a signaling NaN the "Invalid Operation" + // exception flag is set to one; otherwise it is zero. + assign invalid = (ASNaN | BSNaN); + + // A and B are equal if (their magnitudes are equal) AND ((their signs are + // equal) or (their magnitudes are zero AND they are floating point + // numbers)). Also, A and B are not equal if they are unordered. + assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); + + // A is less than B if (A is negative and B is posiive) OR + // (A and B are positive and the magnitude of A is less than + // the magnitude of B) or (A and B are negative integers and + // the magnitude of A is less than the magnitude of B) or + // (A and B are negative floating point numbers and + // the magnitude of A is greater than the magnitude of B). + // Also, A is not less than B if A and B are equal or unordered. + assign LT = ((~LT_mag & A[63] & B[63]) | + (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; + + // A is greater than B when LT, EQ, and UO are are false. + assign GT = ~(LT | EQ | UO); + + // Note: it may be possible to optimize the setting of fcc + // a little more, but it is probably not worth the effort. + + // Set the bits of fcc based on LT, GT, EQ, and UO + assign fcc[0] = LT | UO; + assign fcc[1] = GT | UO; + + always_comb begin + case (FOpCtrlE[2:0]) + 3'b111: FCmpResultE = LT ? A : B;//min + 3'b101: FCmpResultE = GT ? A : B;//max + 3'b010: FCmpResultE = {63'b0, EQ};//equal + 3'b001: FCmpResultE = {63'b0, LT};//less than + 3'b011: FCmpResultE = {63'b0, LT|EQ};//less than or equal + default: FCmpResultE = 64'b0; + endcase + end + +endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpucmp2.sv b/wally-pipelined/src/fpu/fpucmp2.sv index 42a780ac..ee14afb9 100755 --- a/wally-pipelined/src/fpu/fpucmp2.sv +++ b/wally-pipelined/src/fpu/fpucmp2.sv @@ -1,243 +1,243 @@ -// -// File name : fpcomp.v -// Title : Floating-Point Comparator -// project : FPU -// Library : fpcomp -// Author(s) : James E. Stine -// Purpose : definition of main unit to floating-point comparator -// notes : -// -// Copyright Oklahoma State University -// -// Floating Point Comparator (Algorithm) -// -// 1.) Performs sign-extension if the inputs are 32-bit integers. -// 2.) Perform a magnitude comparison on the lower 63 bits of the inputs -// 3.) Check for special cases (+0=-0, unordered, and infinite values) -// and correct for sign bits -// -// This module takes 64-bits inputs op1 and op2, VSS, and VDD -// signals, and a 2-bit signal Sel that indicates the type of -// operands being compared as indicated below. -// Sel Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 (unused) -// -// The comparator produces a 2-bit signal FCC, which -// indicates the result of the comparison: -// -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// -// It also produces an invalid operation flag, which is one -// if either of the input operands is a signaling NaN per 754 +// // +// // File name : fpcomp.v +// // Title : Floating-Point Comparator +// // project : FPU +// // Library : fpcomp +// // Author(s) : James E. Stine +// // Purpose : definition of main unit to floating-point comparator +// // notes : +// // +// // Copyright Oklahoma State University +// // +// // Floating Point Comparator (Algorithm) +// // +// // 1.) Performs sign-extension if the inputs are 32-bit integers. +// // 2.) Perform a magnitude comparison on the lower 63 bits of the inputs +// // 3.) Check for special cases (+0=-0, unordered, and infinite values) +// // and correct for sign bits +// // +// // This module takes 64-bits inputs op1 and op2, VSS, and VDD +// // signals, and a 2-bit signal Sel that indicates the type of +// // operands being compared as indicated below. +// // Sel Description +// // 00 double precision numbers +// // 01 single precision numbers +// // 10 half precision numbers +// // 11 (unused) +// // +// // The comparator produces a 2-bit signal FCC, which +// // indicates the result of the comparison: +// // +// // fcc decscription +// // 00 A = B +// // 01 A < B +// // 10 A > B +// // 11 A and B are unordered (i.e., A or B is NaN) +// // +// // It also produces an invalid operation flag, which is one +// // if either of the input operands is a signaling NaN per 754 -module fpucmp2 ( - input logic [63:0] op1, - input logic [63:0] op2, - input logic [1:0] Sel, - input logic [7:0] w, x, - input logic ANaN, BNaN, - input logic Azero, Bzero, - input logic [3:0] FOpCtrlM, - input logic FmtM, +// module fpucmp2 ( +// input logic [63:0] op1, +// input logic [63:0] op2, +// input logic [1:0] Sel, +// input logic [7:0] w, x, +// input logic ANaN, BNaN, +// input logic Azero, Bzero, +// input logic [3:0] FOpCtrlM, +// input logic FmtM, - output logic Invalid, // Invalid Operation - output logic [1:0] FCC, // Condition Codes - output logic [63:0] FCmpResultM); +// output logic Invalid, // Invalid Operation +// output logic [1:0] FCC, // Condition Codes +// output logic [63:0] FCmpResultM); - logic LT; // magnitude op1 < magnitude op2 - logic EQ; // magnitude op1 = magnitude op2 +// logic LT; // magnitude op1 < magnitude op2 +// logic EQ; // magnitude op1 = magnitude op2 - // Perform magnitude comparison between the 63 least signficant bits - // of the input operands. Only LT and EQ are returned, since GT can - // be determined from these values. - magcompare64b_2 magcomp2 (LT, EQ, w, x); +// // Perform magnitude comparison between the 63 least signficant bits +// // of the input operands. Only LT and EQ are returned, since GT can +// // be determined from these values. +// magcompare64b_2 magcomp2 (LT, EQ, w, x); - // Determine final values based on output of magnitude comparison, - // sign bits, and special case testing. - exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); +// // Determine final values based on output of magnitude comparison, +// // sign bits, and special case testing. +// exception_cmp_2 exc2 (.invalid(Invalid), .fcc(FCC), .LT_mag(LT), .EQ_mag(EQ), .ANaN(ANaN), .BNaN(BNaN), .Azero(Azero), .Bzero(Bzero), .Sel(Sel), .A(op1), .B(op2), .*); -endmodule // fpcomp +// endmodule // fpcomp -/*module magcompare2b (LT, GT, A, B); - - input logic [1:0] A; - input logic [1:0] B; - - output logic LT; - output logic GT; - - // Determine if A < B using a minimized sum-of-products expression - assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; - // Determine if A > B using a minimized sum-of-products expression - assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; - -endmodule*/ // magcompare2b - -// 2-bit magnitude comparator -// This module compares two 2-bit values A and B. LT is '1' if A < B -// and GT is '1'if A > B. LT and GT are both '0' if A = B. However, -// this version actually incorporates don't cares into the equation to -// simplify the optimization - -// module magcompare2c (LT, GT, A, B); +// /*module magcompare2b (LT, GT, A, B); // input logic [1:0] A; // input logic [1:0] B; -// output logic LT; -// output logic GT; +// output logic LT; +// output logic GT; -// assign LT = B[1] | (!A[1]&B[0]); -// assign GT = A[1] | (!B[1]&A[0]); +// // Determine if A < B using a minimized sum-of-products expression +// assign LT = ~A[1]&B[1] | ~A[1]&~A[0]&B[0] | ~A[0]&B[1]&B[0]; +// // Determine if A > B using a minimized sum-of-products expression +// assign GT = A[1]&~B[1] | A[1]&A[0]&~B[0] | A[0]&~B[1]&~B[0]; -// endmodule // magcompare2b +// endmodule*/ // magcompare2b -// This module compares two 64-bit values A and B. LT is '1' if A < B -// and EQ is '1'if A = B. LT and GT are both '0' if A > B. -// This structure was modified so -// that it only does a strict magnitdude comparison, and only -// returns flags for less than (LT) and eqaual to (EQ). It uses a tree -// of 63 2-bit magnitude comparators, followed by one OR gates. -// -// J. E. Stine and M. J. Schulte, "A combined two's complement and -// floating-point comparator," 2005 IEEE International Symposium on -// Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. -// doi: 10.1109/ISCAS.2005.1464531 +// // 2-bit magnitude comparator +// // This module compares two 2-bit values A and B. LT is '1' if A < B +// // and GT is '1'if A > B. LT and GT are both '0' if A = B. However, +// // this version actually incorporates don't cares into the equation to +// // simplify the optimization -module magcompare64b_2 (LT, EQ, w, x); +// // module magcompare2c (LT, GT, A, B); - input logic [7:0] w; - input logic [7:0] x; - logic [3:0] y; - logic [3:0] z; - logic [1:0] a; - logic [1:0] b; - logic GT; +// // input logic [1:0] A; +// // input logic [1:0] B; - output logic LT; - output logic EQ; +// // output logic LT; +// // output logic GT; + +// // assign LT = B[1] | (!A[1]&B[0]); +// // assign GT = A[1] | (!B[1]&A[0]); + +// // endmodule // magcompare2b + +// // This module compares two 64-bit values A and B. LT is '1' if A < B +// // and EQ is '1'if A = B. LT and GT are both '0' if A > B. +// // This structure was modified so +// // that it only does a strict magnitdude comparison, and only +// // returns flags for less than (LT) and eqaual to (EQ). It uses a tree +// // of 63 2-bit magnitude comparators, followed by one OR gates. +// // +// // J. E. Stine and M. J. Schulte, "A combined two's complement and +// // floating-point comparator," 2005 IEEE International Symposium on +// // Circuits and Systems, Kobe, 2005, pp. 89-92 Vol. 1. +// // doi: 10.1109/ISCAS.2005.1464531 + +// module magcompare64b_2 (LT, EQ, w, x); + +// input logic [7:0] w; +// input logic [7:0] x; +// logic [3:0] y; +// logic [3:0] z; +// logic [1:0] a; +// logic [1:0] b; +// logic GT; - magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); - magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); - magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); - magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); +// output logic LT; +// output logic EQ; - magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); - magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); +// magcompare2c mag39(y[0], z[0], x[1:0], w[1:0]); +// magcompare2c mag3A(y[1], z[1], x[3:2], w[3:2]); +// magcompare2c mag3B(y[2], z[2], x[5:4], w[5:4]); +// magcompare2c mag3C(y[3], z[3], x[7:6], w[7:6]); - magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - - assign EQ = ~(LT | GT); - -endmodule // magcompare64b - -// This module takes 64-bits inputs A and B, two magnitude comparison -// flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of -// operands being compared as indicated below. -// Sel Description -// 00 double precision numbers -// 01 single precision numbers -// 10 half precision numbers -// 11 bfloat precision numbers -// -// The comparator produces a 2-bit signal fcc, which -// indicates the result of the comparison as follows: -// fcc decscription -// 00 A = B -// 01 A < B -// 10 A > B -// 11 A and B are unordered (i.e., A or B is NaN) -// It also produces a invalid operation flag, which is one -// if either of the input operands is a signaling NaN. - -module exception_cmp_2 ( - input logic [63:0] A, - input logic [63:0] B, - input logic FmtM, - input logic LT_mag, - input logic EQ_mag, - input logic [1:0] Sel, - input logic [3:0] FOpCtrlM, +// magcompare2c mag3D(a[0], b[0], z[1:0], y[1:0]); +// magcompare2c mag3E(a[1], b[1], z[3:2], y[3:2]); - output logic invalid, - output logic [1:0] fcc, - output logic [63:0] FCmpResultM, +// magcompare2c mag3F(LT, GT, b[1:0], a[1:0]); - input logic Azero, - input logic Bzero, - input logic ANaN, - input logic BNaN); +// assign EQ = ~(LT | GT); + +// endmodule // magcompare64b + +// // This module takes 64-bits inputs A and B, two magnitude comparison +// // flags LT_mag and EQ_mag, and a 2-bit signal Sel that indicates the type of +// // operands being compared as indicated below. +// // Sel Description +// // 00 double precision numbers +// // 01 single precision numbers +// // 10 half precision numbers +// // 11 bfloat precision numbers +// // +// // The comparator produces a 2-bit signal fcc, which +// // indicates the result of the comparison as follows: +// // fcc decscription +// // 00 A = B +// // 01 A < B +// // 10 A > B +// // 11 A and B are unordered (i.e., A or B is NaN) +// // It also produces a invalid operation flag, which is one +// // if either of the input operands is a signaling NaN. + +// module exception_cmp_2 ( +// input logic [63:0] A, +// input logic [63:0] B, +// input logic FmtM, +// input logic LT_mag, +// input logic EQ_mag, +// input logic [1:0] Sel, +// input logic [3:0] FOpCtrlM, - logic dp; - logic sp; - logic hp; - logic ASNaN; - logic BSNaN; - logic UO; - logic GT; - logic LT; - logic EQ; - logic [62:0] sixtythreezeros = 63'h0; +// output logic invalid, +// output logic [1:0] fcc, +// output logic [63:0] FCmpResultM, - assign dp = !Sel[1]&!Sel[0]; - assign sp = !Sel[1]&Sel[0]; - assign hp = Sel[1]&!Sel[0]; - - // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating - // point comparison is being performed. - assign UO = (ANaN | BNaN); - - // Test if A or B is a signaling NaN. - assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); - assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); - - // If either A or B is a signaling NaN the "Invalid Operation" - // exception flag is set to one; otherwise it is zero. - assign invalid = (ASNaN | BSNaN); - - // A and B are equal if (their magnitudes are equal) AND ((their signs are - // equal) or (their magnitudes are zero AND they are floating point - // numbers)). Also, A and B are not equal if they are unordered. - assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); +// input logic Azero, +// input logic Bzero, +// input logic ANaN, +// input logic BNaN); - // A is less than B if (A is negative and B is posiive) OR - // (A and B are positive and the magnitude of A is less than - // the magnitude of B) or (A and B are negative integers and - // the magnitude of A is less than the magnitude of B) or - // (A and B are negative floating point numbers and - // the magnitude of A is greater than the magnitude of B). - // Also, A is not less than B if A and B are equal or unordered. - assign LT = ((~LT_mag & A[63] & B[63]) | - (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; +// logic dp; +// logic sp; +// logic hp; +// logic ASNaN; +// logic BSNaN; +// logic UO; +// logic GT; +// logic LT; +// logic EQ; +// logic [62:0] sixtythreezeros = 63'h0; + +// assign dp = !Sel[1]&!Sel[0]; +// assign sp = !Sel[1]&Sel[0]; +// assign hp = Sel[1]&!Sel[0]; + +// // Values are unordered if ((A is NaN) OR (B is NaN)) AND (a floating +// // point comparison is being performed. +// assign UO = (ANaN | BNaN); + +// // Test if A or B is a signaling NaN. +// assign ASNaN = ANaN & (sp&~A[53] | dp&~A[50] | hp&~A[56]); +// assign BSNaN = BNaN & (sp&~B[53] | dp&~B[50] | hp&~B[56]); + +// // If either A or B is a signaling NaN the "Invalid Operation" +// // exception flag is set to one; otherwise it is zero. +// assign invalid = (ASNaN | BSNaN); + +// // A and B are equal if (their magnitudes are equal) AND ((their signs are +// // equal) or (their magnitudes are zero AND they are floating point +// // numbers)). Also, A and B are not equal if they are unordered. +// assign EQ = (EQ_mag | (Azero&Bzero)) & (~UO); - // A is greater than B when LT, EQ, and UO are are false. - assign GT = ~(LT | EQ | UO); +// // A is less than B if (A is negative and B is posiive) OR +// // (A and B are positive and the magnitude of A is less than +// // the magnitude of B) or (A and B are negative integers and +// // the magnitude of A is less than the magnitude of B) or +// // (A and B are negative floating point numbers and +// // the magnitude of A is greater than the magnitude of B). +// // Also, A is not less than B if A and B are equal or unordered. +// assign LT = ((~LT_mag & A[63] & B[63]) | +// (LT_mag & ~(A[63] & B[63])))&~EQ&~UO; + +// // A is greater than B when LT, EQ, and UO are are false. +// assign GT = ~(LT | EQ | UO); - // Note: it may be possible to optimize the setting of fcc - // a little more, but it is probably not worth the effort. +// // Note: it may be possible to optimize the setting of fcc +// // a little more, but it is probably not worth the effort. - // Set the bits of fcc based on LT, GT, EQ, and UO - assign fcc[0] = LT | UO; - assign fcc[1] = GT | UO; +// // Set the bits of fcc based on LT, GT, EQ, and UO +// assign fcc[0] = LT | UO; +// assign fcc[1] = GT | UO; - always_comb begin - case (FOpCtrlM[2:0]) - 3'b111: FCmpResultM = LT ? A : B;//min - 3'b101: FCmpResultM = GT ? A : B;//max - 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal - 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than - 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal - default: FCmpResultM = 64'b0; - endcase - end +// always_comb begin +// case (FOpCtrlM[2:0]) +// 3'b111: FCmpResultM = LT ? A : B;//min +// 3'b101: FCmpResultM = GT ? A : B;//max +// 3'b010: FCmpResultM = FmtM ? {63'b0, EQ} : {31'b0, EQ, 32'b0};//equal +// 3'b001: FCmpResultM = FmtM ? {63'b0, LT} : {31'b0, LT, 32'b0};//less than +// 3'b011: FCmpResultM = FmtM ? {63'b0, LT|EQ} : {31'b0, LT|EQ, 32'b0};//less than or equal +// default: FCmpResultM = 64'b0; +// endcase +// end -endmodule // exception_cmp +// endmodule // exception_cmp diff --git a/wally-pipelined/src/fpu/fpuhazard.sv b/wally-pipelined/src/fpu/fpuhazard.sv index 959ef476..4d0895a7 100644 --- a/wally-pipelined/src/fpu/fpuhazard.sv +++ b/wally-pipelined/src/fpu/fpuhazard.sv @@ -26,47 +26,41 @@ `include "wally-config.vh" module fpuhazard( - input logic [4:0] Adr1, Adr2, Adr3, - input logic FWriteEnE, FWriteEnM, FWriteEnW, - input logic [4:0] RdE, RdM, RdW, - input logic FDivBusyE, - input logic RegWriteD, - input logic [2:0] FResultSelD, FResultSelE, - input logic IllegalFPUInstrD, - input logic FInput2UsedD, FInput3UsedD, - // Stall outputs - output logic FStallD, - output logic [1:0] FForwardInput1D, FForwardInput2D, - output logic FForwardInput3D + input logic [4:0] Adr1E, Adr2E, Adr3E, + input logic FWriteEnM, FWriteEnW, + input logic [4:0] RdM, RdW, + input logic [2:0] FResultSelM, + output logic FStallD, + output logic [1:0] ForwardXE, ForwardYE, ForwardZE ); always_comb begin // set ReadData as default - FForwardInput1D = 2'b00; - FForwardInput2D = 2'b00; - FForwardInput3D = 1'b0; - FStallD = FDivBusyE; - if (~IllegalFPUInstrD) begin -// if taking a value from int register - if ((Adr1 == RdE) & (FWriteEnE | ((FResultSelE == 3'b110) & RegWriteD))) - if (FResultSelE == 3'b110) FForwardInput1D = 2'b11; // choose SrcAM - else FStallD = 1'b1; // otherwise stall - else if ((Adr1 == RdM) & FWriteEnM) FForwardInput1D = 2'b01; // choose FPUResultDirW - else if ((Adr1 == RdW) & FWriteEnW) FForwardInput1D = 2'b11; // choose FPUResultDirE + ForwardXE = 2'b00; // choose FRD1E + ForwardYE = 2'b00; // choose FRD2E + ForwardZE = 2'b00; // choose FRD3E + FStallD = 0; + + if ((Adr1E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardXE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr1E == RdW) & FWriteEnW) ForwardXE = 2'b01; // choose FPUResult64W - if(FInput2UsedD) - if ((Adr2 == RdE) & FWriteEnE) FStallD = 1'b1; - else if ((Adr2 == RdM) & FWriteEnM) FForwardInput2D = 2'b01; // choose FPUResultDirW - else if ((Adr2 == RdW) & FWriteEnW) FForwardInput2D = 2'b10; // choose FPUResultDirE + if ((Adr2E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardYE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr2E == RdW) & FWriteEnW) ForwardYE = 2'b01; // choose FPUResult64W - - if(FInput3UsedD) - if ((Adr3 == RdE) & FWriteEnE) FStallD = 1'b1; - else if ((Adr3 == RdM) & FWriteEnM) FStallD = 1'b1; - else if ((Adr3 == RdW) & FWriteEnW) FForwardInput3D = 1'b1; // choose FPUResultDirE - end + + if ((Adr3E == RdM) & FWriteEnM) + // if the result will be FResM + if(FResultSelM == 3'b100) ForwardZE = 2'b10; // choose FResM + else FStallD = 1; // if the result won't be ready stall + else if ((Adr3E == RdW) & FWriteEnW) ForwardZE = 2'b01; // choose FPUResult64W end diff --git a/wally-pipelined/src/fpu/fsgn.sv b/wally-pipelined/src/fpu/fsgn.sv index 2850af86..62d0e7d7 100755 --- a/wally-pipelined/src/fpu/fsgn.sv +++ b/wally-pipelined/src/fpu/fsgn.sv @@ -1,8 +1,8 @@ //performs the fsgnj/fsgnjn/fsgnjx RISCV instructions -module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E); +module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, SrcXE, SrcYE); - input [63:0] FInput1E, FInput2E; + input [63:0] SrcXE, SrcYE; input [1:0] SgnOpCodeE; output [63:0] SgnResultE; output [4:0] SgnFlagsE; @@ -11,18 +11,18 @@ module fpusgn (SgnOpCodeE, SgnResultE, SgnFlagsE, FInput1E, FInput2E); //op code designation: // - //00 - fsgnj - directly copy over sign value of FInput2E - //01 - fsgnjn - negate sign value of FInput2E - //10 - fsgnjx - XOR sign values of FInput1E & FInput2E + //00 - fsgnj - directly copy over sign value of SrcYE + //01 - fsgnjn - negate sign value of SrcYE + //10 - fsgnjx - XOR sign values of SrcXE & SrcYE // - assign SgnResultE[63] = SgnOpCodeE[1] ? (FInput1E[63] ^ FInput2E[63]) : (FInput2E[63] ^ SgnOpCodeE[0]); - assign SgnResultE[62:0] = FInput1E[62:0]; + assign SgnResultE[63] = SgnOpCodeE[1] ? (SrcXE[63] ^ SrcYE[63]) : (SrcYE[63] ^ SgnOpCodeE[0]); + assign SgnResultE[62:0] = SrcXE[62:0]; //If the exponent is all ones, then the value is either Inf or NaN, //both of which will produce a QNaN/SNaN value of some sort. This will //set the invalid flag high. - assign AonesExp = FInput1E[62]&FInput1E[61]&FInput1E[60]&FInput1E[59]&FInput1E[58]&FInput1E[57]&FInput1E[56]&FInput1E[55]&FInput1E[54]&FInput1E[53]&FInput1E[52]; + assign AonesExp = SrcXE[62]&SrcXE[61]&SrcXE[60]&SrcXE[59]&SrcXE[58]&SrcXE[57]&SrcXE[56]&SrcXE[55]&SrcXE[54]&SrcXE[53]&SrcXE[52]; //the only flag that can occur during this operation is invalid //due to changing sign on already existing NaN diff --git a/wally-pipelined/src/generic/lzd.sv~ b/wally-pipelined/src/generic/lzd.sv~ deleted file mode 100755 index bfffe5e5..00000000 --- a/wally-pipelined/src/generic/lzd.sv~ +++ /dev/null @@ -1,195 +0,0 @@ -/////////////////////////////////////////// -// lzd.sv -// -// Written: James.Stine@okstate.edu 1 February 2021 -// Modified: -// -// Purpose: Integer Divide instructions -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" -/* verilator lint_off DECLFILENAME */ - -// Original idea came from V. G. Oklobdzija, "An algorithmic and novel -// design of a leading zero detector circuit: comparison with logic -// synthesis," in IEEE Transactions on Very Large Scale Integration -// (VLSI) Systems, vol. 2, no. 1, pp. 124-128, March 1994, doi: -// 10.1109/92.273153. - -// Modified to be more hierarchical - -module lz2 (P, V, B); - - input logic [1:0] B; - - output logic P; - output logic V; - - assign V = B[0] | B[1]; - assign P = B[0] & ~B[1]; - -endmodule // lz2 - -module lzd_hier #(parameter WIDTH=8) - (input logic [WIDTH-1:0] B, - output logic [$clog2(WIDTH)-1:0] ZP, - output logic ZV); - - if (WIDTH == 128) - lz128 lzd127 (ZP, ZV, B); - else if (WIDTH == 64) - lz64 lzd64 (ZP, ZV, B); - else if (WIDTH == 32) - lz32 lzd32 (ZP, ZV, B); - else if (WIDTH == 16) - lz16 lzd16 (ZP, ZV, B); - else if (WIDTH == 8) - lz8 lzd8 (ZP, ZV, B); - else if (WIDTH == 4) - lz4 lzd4 (ZP, ZV, B); - -endmodule // lzd_hier - -module lz4 (ZP, ZV, B); - - input logic [3:0] B; - - logic ZPa; - logic ZPb; - logic ZVa; - logic ZVb; - - output logic [1:0] ZP; - output logic ZV; - - lz2 l1(ZPa, ZVa, B[1:0]); - lz2 l2(ZPb, ZVb, B[3:2]); - - assign ZP[0:0] = ZVb ? ZPb : ZPa; - assign ZP[1] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule - -module lz8 (ZP, ZV, B); - - input logic [7:0] B; - - logic [1:0] ZPa; - logic [1:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [2:0] ZP; - output logic ZV; - - lz4 l1(ZPa, ZVa, B[3:0]); - lz4 l2(ZPb, ZVb, B[7:4]); - - assign ZP[1:0] = ZVb ? ZPb : ZPa; - assign ZP[2] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule - -module lz16 (ZP, ZV, B); - - input logic [15:0] B; - - logic [2:0] ZPa; - logic [2:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [3:0] ZP; - output logic ZV; - - lz8 l1(ZPa, ZVa, B[7:0]); - lz8 l2(ZPb, ZVb, B[15:8]); - - assign ZP[2:0] = ZVb ? ZPb : ZPa; - assign ZP[3] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz16 - -module lz32 (ZP, ZV, B); - - input logic [31:0] B; - - logic [3:0] ZPa; - logic [3:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [4:0] ZP; - output logic ZV; - - lz16 l1(ZPa, ZVa, B[15:0]); - lz16 l2(ZPb, ZVb, B[31:16]); - - assign ZP[3:0] = ZVb ? ZPb : ZPa; - assign ZP[4] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz32 - -module lz64 (ZP, ZV, B); - - input logic [63:0] B; - - logic [4:0] ZPa; - logic [4:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [5:0] ZP; - output logic ZV; - - lz32 l1(ZPa, ZVa, B[31:0]); - lz32 l2(ZPb, ZVb, B[63:32]); - - assign ZP[4:0] = ZVb ? ZPb : ZPa; - assign ZP[5] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz64 - -module lz128 (ZP, ZV, B); - - input logic [127:0] B; - - logic [5:0] ZPa; - logic [5:0] ZPb; - logic ZVa; - logic ZVb; - - output logic [6:0] ZP; - output logic ZV; - - lz64 l1(ZPa, ZVa, B[64:0]); - lz64 l2(ZPb, ZVb, B[127:63]); - - assign ZP[5:0] = ZVb ? ZPb : ZPa; - assign ZP[6] = ~ZVb; - assign ZV = ZVa | ZVb; - -endmodule // lz128 - -/* verilator lint_on DECLFILENAME */ diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index c61db2dc..f5552106 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -32,7 +32,7 @@ module hazard( input logic BPPredWrongE, CSRWritePendingDEM, RetM, TrapM, input logic LoadStallD, MulDivStallD, CSRRdStallD, input logic DCacheStall, ICacheStallF, - input logic FPUStallD, + input logic FPUStallD, FStallD, input logic DivBusyE,FDivBusyE, // Stall & flush outputs output logic StallF, StallD, StallE, StallM, StallW, @@ -56,7 +56,7 @@ module hazard( // If any stages are stalled, the first stage that isn't stalled must flush. assign StallFCause = CSRWritePendingDEM && ~(TrapM | RetM | BPPredWrongE); - assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous + assign StallDCause = (LoadStallD | MulDivStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous assign StallECause = DivBusyE | FDivBusyE; assign StallMCause = 0; assign StallWCause = DCacheStall | ICacheStallF; diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index b27541d4..16fd5a8f 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -45,11 +45,13 @@ module controller( output logic MemReadE, CSRReadE, // for Hazard Unit output logic [2:0] Funct3E, output logic MulDivE, W64E, - output logic JumpE, + output logic JumpE, + output logic [1:0] MemRWE, // Memory stage control signals input logic StallM, FlushM, output logic [1:0] MemRWM, - output logic CSRReadM, CSRWriteM, PrivilegedM, + output logic CSRReadM, CSRWriteM, PrivilegedM, + output logic SCE, output logic [1:0] AtomicM, output logic [2:0] Funct3M, output logic RegWriteM, // for Hazard Unit @@ -73,7 +75,7 @@ module controller( // pipelined control signals logic RegWriteE; logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; - logic [1:0] MemRWD, MemRWE; + logic [1:0] MemRWD; logic JumpD; logic BranchD, BranchE; logic [1:0] ALUOpD; @@ -140,6 +142,7 @@ module controller( ControlsD = `CTRLW'b1_000_00_00_011_0_00_0_0_1_0_0_1_00_0; // W-type Multiply/Divide else ControlsD = `CTRLW'b0_000_00_00_000_0_00_0_0_0_0_0_0_00_1; // non-implemented instruction + //7'b1010011: ControlsD = `CTRLW'b0_000_00_00_101_0_00_0_0_0_0_0_0_00_1; // FP 7'b1100011: ControlsD = `CTRLW'b0_010_00_00_000_1_01_0_0_0_0_0_0_00_0; // beq 7'b1100111: ControlsD = `CTRLW'b1_000_00_00_000_0_00_1_1_0_0_0_0_00_0; // jalr 7'b1101111: ControlsD = `CTRLW'b1_011_00_00_000_0_00_1_0_0_0_0_0_00_0; // jal @@ -202,7 +205,8 @@ module controller( assign PCSrcE = JumpE | BranchE & BranchTakenE; - assign MemReadE = MemRWE[1]; + assign MemReadE = MemRWE[1]; + assign SCE = (ResultSrcE == 3'b100); // Memory stage pipeline control register flopenrc #(15) controlregM(clk, reset, FlushM, ~StallM, diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 13db65a3..44a40045 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -37,6 +37,9 @@ module datapath ( input logic ALUSrcAE, ALUSrcBE, input logic TargetSrcE, input logic JumpE, + input logic IllegalFPUInstrE, + input logic [1:0] MemRWE, + input logic [`XLEN-1:0] FWriteDataE, input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, output logic [2:0] FlagsE, @@ -44,13 +47,13 @@ module datapath ( output logic [`XLEN-1:0] SrcAE, SrcBE, // Memory stage signals input logic StallM, FlushM, - input logic [`XLEN-1:0] FWriteDataM, + input logic FWriteIntM, + input logic [`XLEN-1:0] FIntResM, output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] WriteDataM, MemAdrM, // Writeback stage signals input logic StallW, FlushW, input logic FWriteIntW, - input logic [`XLEN-1:0] FPUResultW, input logic RegWriteW, input logic SquashSCW, input logic [2:0] ResultSrcW, @@ -70,13 +73,14 @@ module datapath ( logic [`XLEN-1:0] RD1E, RD2E; logic [`XLEN-1:0] ExtImmE; - logic [`XLEN-1:0] PreSrcAE, SrcAE2, SrcBE2; + logic [`XLEN-1:0] PreSrcAE, PreSrcBE, SrcAE2, SrcBE2; logic [`XLEN-1:0] ALUResultE; logic [`XLEN-1:0] WriteDataE; logic [`XLEN-1:0] TargetBaseE; // Memory stage signals logic [`XLEN-1:0] ALUResultM; + logic [`XLEN-1:0] ResultM; // Writeback stage signals logic [`XLEN-1:0] SCResultW; logic [`XLEN-1:0] ALUResultW; @@ -88,8 +92,7 @@ module datapath ( assign Rs2D = InstrD[24:20]; assign RdD = InstrD[11:7]; - //Mux for writting floating point - mux2 #(`XLEN) writedatamux(ResultW, FPUResultW, FWriteIntW, WriteDataW); + //Mux for writting floating point regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); extend ext(.InstrD(InstrD[31:7]), .*); @@ -102,11 +105,12 @@ module datapath ( flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux4 #(`XLEN) faemux(RD1E, WriteDataW, ALUResultM, FWriteDataM, ForwardAE, PreSrcAE); - mux4 #(`XLEN) fbemux(RD2E, WriteDataW, ALUResultM, FWriteDataM, ForwardBE, WriteDataE); + mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, PreSrcAE); + mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, PreSrcBE); + mux2 #(`XLEN) writedatamux(PreSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE); mux2 #(`XLEN) srcamux(PreSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcamux2(SrcAE, PCLinkE, JumpE, SrcAE2); - mux2 #(`XLEN) srcbmux(WriteDataE, ExtImmE, ALUSrcBE, SrcBE); + mux2 #(`XLEN) srcbmux(PreSrcBE, ExtImmE, ALUSrcBE, SrcBE); mux2 #(`XLEN) srcbmux2(SrcBE, {`XLEN{1'b0}}, JumpE, SrcBE2); // *** May be able to remove this mux. alu #(`XLEN) alu(SrcAE2, SrcBE2, ALUControlE, ALUResultE, FlagsE); mux2 #(`XLEN) targetsrcmux(PCE, SrcAE, TargetSrcE, TargetBaseE); @@ -117,10 +121,11 @@ module datapath ( flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM); assign MemAdrM = ALUResultM; flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); - flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); + flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); + mux2 #(`XLEN) resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM); // Writeback stage pipeline register and logic - flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW); + flopenrc #(`XLEN) ResultWReg(clk, reset, FlushW, ~StallW, ResultM, ResultW); flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); // handle Store Conditional result if atomic extension supported @@ -131,11 +136,11 @@ module datapath ( assign SCResultW = 0; endgenerate - mux5 #(`XLEN) resultmux(ALUResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, ResultW); + mux5 #(`XLEN) resultmuxW(ResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, WriteDataW); /* -----\/----- EXCLUDED -----\/----- // This mux4:1 no longer needs to include PCLinkW. This is set correctly in the execution stage. // *** need to look at how the decoder is coded to fix. - mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, ResultW); + mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, WriteDataW); >>>>>>> bp -----/\----- EXCLUDED -----/\----- */ diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index cdc6d270..e7b3ff24 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -28,32 +28,31 @@ module forward( // Detect hazards input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, - input logic MemReadE, MulDivE, CSRReadE, - input logic RegWriteM, RegWriteW, - input logic DivDoneE, DivBusyE, - input logic FWriteIntE, FWriteIntM, FWriteIntW, + input logic MemReadE, MulDivE, CSRReadE, + input logic RegWriteM, RegWriteW, + input logic DivDoneE, DivBusyE, + input logic FWriteIntE, FWriteIntM, FWriteIntW, + input logic SCE, // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, - output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD + output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD ); always_comb begin ForwardAE = 2'b00; ForwardBE = 2'b00; if (Rs1E != 5'b0) - if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; + if ((Rs1E == RdM) & (RegWriteM|FWriteIntM)) ForwardAE = 2'b10; else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01; - else if ((Rs1E == RdM) & FWriteIntM) ForwardAE = 2'b11; if (Rs2E != 5'b0) - if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; + if ((Rs2E == RdM) & (RegWriteM|FWriteIntM)) ForwardBE = 2'b10; else if ((Rs2E == RdW) & (RegWriteW|FWriteIntW)) ForwardBE = 2'b01; - else if ((Rs2E == RdM) & FWriteIntM) ForwardBE = 2'b11; end // Stall on dependent operations that finish in Mem Stage and can't bypass in time assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE)); - assign LoadStallD = MemReadE & ((Rs1D == RdE) | (Rs2D == RdE)); + assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE)); assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)) | MulDivE | DivBusyE; // *** extend with stalls for divide assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE)); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index bcffce8a..87e21d79 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -31,33 +31,34 @@ module ieu ( input logic [31:0] InstrD, input logic IllegalIEUInstrFaultD, output logic IllegalBaseInstrFaultD, - output logic RegWriteD, + output logic RegWriteD, // Execute Stage interface input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, input logic FWriteIntE, + input logic IllegalFPUInstrE, + input logic [`XLEN-1:0] FWriteDataE, output logic [`XLEN-1:0] PCTargetE, output logic MulDivE, W64E, output logic [2:0] Funct3E, output logic [`XLEN-1:0] SrcAE, SrcBE, + input logic FWriteIntM, + // Memory stage interface - input logic DataMisalignedM, // from LSU - input logic SquashSCW, // from LSU - output logic [1:0] MemRWM, // read/write control goes to LSU - output logic [1:0] AtomicM, // atomic control goes to LSU - output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU + input logic DataMisalignedM, // from LSU + input logic SquashSCW, // from LSU + output logic [1:0] MemRWM, // read/write control goes to LSU + output logic [1:0] AtomicM, // atomic control goes to LSU + output logic [`XLEN-1:0] MemAdrM, WriteDataM, // Address and write data to LSU - output logic [2:0] Funct3M, // size and signedness to LSU - - - input logic FWriteIntM, // from FPU - input logic [`XLEN-1:0] FWriteDataM, // from FPU - output logic [`XLEN-1:0] SrcAM, // to privilege and fpu + output logic [2:0] Funct3M, // size and signedness to LSU + output logic [`XLEN-1:0] SrcAM, // to privilege and fpu + input logic DataAccessFaultM, + input logic [`XLEN-1:0] FIntResM, // Writeback stage input logic [`XLEN-1:0] CSRReadValW, ReadDataW, MulDivResultW, - input logic FWriteIntW, - input logic [`XLEN-1:0] FPUResultW, + input logic FWriteIntW, // input logic [`XLEN-1:0] PCLinkW, output logic InstrValidM, InstrValidW, // hazards @@ -76,7 +77,8 @@ module ieu ( logic [4:0] ALUControlE; logic ALUSrcAE, ALUSrcBE; logic [2:0] ResultSrcW; - logic TargetSrcE; + logic TargetSrcE; + logic SCE; // forwarding signals logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW; @@ -84,6 +86,7 @@ module ieu ( logic RegWriteM, RegWriteW; logic MemReadE, CSRReadE; logic JumpE; + logic [1:0] MemRWE; controller c(.*); datapath dp(.*); diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index d59ec313..be44e198 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -87,21 +87,23 @@ module wallypipelinedhart logic PCSrcE; logic CSRWritePendingDEM; - logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD; logic DivDoneE; logic DivBusyE; - logic DivDoneW; - logic [4:0] SetFflagsM; - logic [2:0] FRM_REGW; - logic FloatRegWriteW; - logic [1:0] FMemRWM; logic RegWriteD; - logic [`XLEN-1:0] FWriteDataM; - logic SquashSCW; + logic LoadStallD, MulDivStallD, CSRRdStallD; + logic SquashSCM, SquashSCW; + // floating point unit signals + logic [2:0] FRM_REGW; + logic [1:0] FMemRWM, FMemRWE; logic FStallD; - logic FWriteIntE, FWriteIntW, FWriteIntM; + logic FWriteIntE, FWriteIntM, FWriteIntW; + logic [`XLEN-1:0] FWriteDataE; + logic [`XLEN-1:0] FIntResM; logic FDivBusyE; logic IllegalFPUInstrD, IllegalFPUInstrE; + logic FloatRegWriteW; + logic FPUStallD; + logic [4:0] SetFflagsM; logic [`XLEN-1:0] FPUResultW; // memory management unit signals @@ -185,20 +187,10 @@ module wallypipelinedhart ieu ieu(.*); // integer execution unit: integer register file, datapath and controller - mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); + // mux2 #(`XLEN) OutputInput2mux(WriteDataM, FWriteDataM, FMemRWM[0], WriteDatatmpM); pagetablewalker pagetablewalker(.HPTWRead(HPTWRead), .*); // can send addresses to ahblite, send out pagetablestall - // *** can connect to hazard unit - // changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed. - // Would need to insertinstruction as InstrD, not InstrF - /*ahblite ebu( - .InstrReadF(1'b0), - .InstrRData(), // hook up InstrF later - .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), - .*); */ - - // arbiter between IEU and pagetablewalker lsuArb arbiter(// HPTW connection .HPTWTranslate(MMUTranslate), @@ -208,12 +200,12 @@ module wallypipelinedhart .HPTWReady(MMUReady), .HPTWStall(HPTWStall), // CPU connection - .MemRWM(MemRWM|FMemRWM), + .MemRWM(MemRWM), .Funct3M(Funct3M), .AtomicM(AtomicM), .MemAdrM(MemAdrM), .StallW(StallW), - .WriteDataM(WriteDatatmpM), + .WriteDataM(WriteDataM), .ReadDataW(ReadDataW), .CommittedM(CommittedM), .SquashSCW(SquashSCW), @@ -259,7 +251,8 @@ module wallypipelinedhart ahblite ebu( //.InstrReadF(1'b0), //.InstrRData(InstrF), // hook up InstrF later - .WriteDataM(WriteDatatmpM), + .ISquashBusAccessF(1'b0), // *** temporary hack to disable PMP instruction fetch checking + .WriteDataM(WriteDataM), .MemSizeM(Funct3MfromLSU[1:0]), .UnsignedLoadM(Funct3MfromLSU[2]), .Funct7M(InstrM[31:25]), .HRDATAW(HRDATAW), diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 1bbe6124..11b8e562 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -539,8 +539,8 @@ string tests32f[] = '{ if (`M_SUPPORTED) tests = {tests, tests64m}; if (`A_SUPPORTED) tests = {tests, tests64a}; if (`MEM_VIRTMEM) tests = {tests, tests64mmu}; - if (`D_SUPPORTED) tests = {tests64d, tests}; if (`F_SUPPORTED) tests = {tests64f, tests}; + if (`D_SUPPORTED) tests = {tests64d, tests}; end //tests = {tests64a, tests}; end else begin // RV32 @@ -554,7 +554,7 @@ string tests32f[] = '{ if (`C_SUPPORTED % 2 == 1) tests = {tests, tests32ic}; else tests = {tests, tests32iNOc}; if (`M_SUPPORTED % 2 == 1) tests = {tests, tests32m}; - // if (`F_SUPPORTED) tests = {tests32f, tests}; + if (`F_SUPPORTED) tests = {tests32f, tests}; if (`A_SUPPORTED) tests = {tests, tests32a}; if (`MEM_VIRTMEM) tests = {tests, tests32mmu}; end diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index df8fad8c..6676d1a7 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -26,13 +26,15 @@ `include "wally-config.vh" module testbench(); - logic clk, reset; - logic [31:0] GPIOPinsIn; - logic [31:0] GPIOPinsOut, GPIOPinsEn; - - // instantiate device to be tested - logic [31:0] CheckInstrD; + + parameter waveOnICount = 2657000; // # of instructions at which to turn on waves in graphical sim + + /////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////// DUT ///////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + logic clk, reset; + logic [`AHBW-1:0] HRDATA; logic [31:0] HADDR; logic [`AHBW-1:0] HWDATA; @@ -45,155 +47,97 @@ module testbench(); logic HCLK, HRESETn; logic [`AHBW-1:0] HRDATAEXT; logic HREADYEXT, HRESPEXT; - logic UARTSout; - - logic ignoreRFwrite; - - parameter waveOnICount = 2060000; // # of instructions at which to turn on waves in graphical sim + logic [31:0] GPIOPinsIn; + logic [31:0] GPIOPinsOut, GPIOPinsEn; + logic UARTSin, UARTSout; assign GPIOPinsIn = 0; assign UARTSin = 1; - // instantiate processor and memories wallypipelinedsoc dut(.*); - /** - * Walk the page table stored in dtim according to sv39 logic and translate a - * virtual address to a physical address. - * - * See section 4.3.2 of the RISC-V Privileged specification for a full - * explanation of the below algorithm. - */ - function logic [`XLEN-1:0] adrTranslator( - input logic [`XLEN-1:0] adrIn); - begin - logic SvMode, PTE_R, PTE_X; - logic [`XLEN-1:0] SATP, PTE; - logic [55:0] BaseAdr, PAdr; - logic [8:0] VPN [2:0]; - logic [11:0] Offset; + /////////////////////////////////////////////////////////////////////////////// + //////////////////////// Signals & Shared Macros /////////////////////////// + //////////////////////// AKA stuff that comes first /////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Sorry if these have gotten decontextualized. + // Verilog expects them to be defined before they are used. - int i; + // ------------------- + // Signal Declarations + // ------------------- + // Testbench Core + integer instrs; + integer warningCount = 0; + string trashString; // should never be read from + logic [31:0] InstrMask; + logic forcedInstr; + logic [63:0] lastPCD; + logic PCDwrong; + // PC, Instr Checking + logic [`XLEN-1:0] PCW; + logic [63:0] lastInstrDExpected, lastPC, lastPC2; + integer data_file_PCF, scan_file_PCF; + integer data_file_PCD, scan_file_PCD; + integer data_file_PCM, scan_file_PCM; + integer data_file_PCW, scan_file_PCW; + string PCtextF, PCtextF2; + string PCtextD, PCtextD2; + string PCtextE; + string PCtextM; + string PCtextW; + logic [31:0] InstrFExpected, InstrDExpected, InstrMExpected, InstrWExpected; + logic [63:0] PCFexpected, PCDexpected, PCMexpected, PCWexpected; + // RegFile Write Checking + logic ignoreRFwrite; + logic [63:0] regExpected; + integer regNumExpected; + integer data_file_rf, scan_file_rf; + // Bus Unit Read/Write Checking + logic [63:0] readMask; + logic [`XLEN-1:0] readAdrExpected, readAdrTranslated; + logic [`XLEN-1:0] writeDataExpected, writeAdrExpected, writeAdrTranslated; + integer data_file_memR, scan_file_memR; + integer data_file_memW, scan_file_memW; + // CSR Checking + integer totalCSR = 0; + logic [99:0] StartCSRexpected[63:0]; + string StartCSRname[99:0]; + integer data_file_csr, scan_file_csr; + + // ----------- + // Error Macro + // ----------- + `define ERROR \ + #10; \ + $display("processed %0d instructions with %0d warnings", instrs, warningCount); \ + $stop; - // Grab the SATP register from privileged unit - SATP = dut.hart.priv.csr.SATP_REGW; + // ---------------- + // PC Updater Macro + // ---------------- + `define SCAN_PC(DATAFILE,SCANFILE,PCTEXT,PCTEXT2,CHECKINSTR,PCEXPECTED) \ + SCANFILE = $fscanf(DATAFILE, "%s\n", PCTEXT); \ + PCTEXT2 = ""; \ + while (PCTEXT2 != "***") begin \ + PCTEXT = {PCTEXT, " ", PCTEXT2}; \ + SCANFILE = $fscanf(DATAFILE, "%s\n", PCTEXT2); \ + end \ + SCANFILE = $fscanf(DATAFILE, "%x\n", CHECKINSTR); \ + SCANFILE = $fscanf(DATAFILE, "%x\n", PCEXPECTED); - // Split the virtual address into page number segments and offset - VPN[2] = adrIn[38:30]; - VPN[1] = adrIn[29:21]; - VPN[0] = adrIn[20:12]; - Offset = adrIn[11:0]; - - // We do not support sv48; only sv39 - SvMode = SATP[63]; - - // Only perform translation if translation is on and the processor is not - // in machine mode - if (SvMode && (dut.hart.priv.PrivilegeModeW != `M_MODE)) begin - BaseAdr = SATP[43:0] << 12; - - for (i = 2; i >= 0; i--) begin - PAdr = BaseAdr + (VPN[i] << 3); - - // dtim.RAM is 64-bit addressed. PAdr specifies a byte. We right shift - // by 3 (the PTE size) to get the requested 64-bit PTE. - PTE = dut.uncore.dtim.RAM[PAdr >> 3]; - PTE_R = PTE[1]; - PTE_X = PTE[3]; - if (PTE_R || PTE_X) begin - // Leaf page found - break; - end else begin - // Go to next level of table - BaseAdr = PTE[53:10] << 12; - end - end - - // Determine which parts of the PTE page number to use based on the - // level of the page table we reached. - if (i == 2) begin - // Gigapage - assign adrTranslator = {8'b0, PTE[53:28], VPN[1], VPN[0], Offset}; - end else if (i == 1) begin - // Megapage - assign adrTranslator = {8'b0, PTE[53:19], VPN[0], Offset}; - end else begin - // Kilopage - assign adrTranslator = {8'b0, PTE[53:10], Offset}; - end - end else begin - // Direct translation if address translation is not on - assign adrTranslator = adrIn; - end - end - endfunction - - // initialize test + /////////////////////////////////////////////////////////////////////////////// + //////////////////////////////// Testbench Core /////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // -------------- + // Initialization + // -------------- initial begin - ignoreRFwrite <= 0; + instrs = 0; + PCDwrong = 0; reset <= 1; # 22; reset <= 0; end - - // read pc trace file - integer data_file_PC, scan_file_PC; - initial begin - data_file_PC = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); - if (data_file_PC == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - integer data_file_PCW, scan_file_PCW; - initial begin - data_file_PCW = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); - if (data_file_PCW == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - // read register trace file - integer data_file_rf, scan_file_rf; - initial begin - data_file_rf = $fopen({`LINUX_TEST_VECTORS,"parsedRegs.txt"}, "r"); - if (data_file_rf == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - // read CSR trace file - integer data_file_csr, scan_file_csr; - initial begin - data_file_csr = $fopen({`LINUX_TEST_VECTORS,"parsedCSRs.txt"}, "r"); - if (data_file_csr == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - // read memreads trace file - integer data_file_memR, scan_file_memR; - initial begin - data_file_memR = $fopen({`LINUX_TEST_VECTORS,"parsedMemRead.txt"}, "r"); - if (data_file_memR == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - - // read memwrite trace file - integer data_file_memW, scan_file_memW; - initial begin - data_file_memW = $fopen({`LINUX_TEST_VECTORS,"parsedMemWrite.txt"}, "r"); - if (data_file_memW == 0) begin - $display("file couldn't be opened"); - $stop; - end - end - // initial loading of memories initial begin $readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootdtim.RAM, 'h1000 >> 3); @@ -201,49 +145,247 @@ module testbench(); $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.memory); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.memory); end - - integer warningCount = 0; - integer instrs; - - //logic[63:0] adrTranslation[4:0]; - //string translationType[4:0] = {"rf", "writeAdr", "PCW", "PC", "readAdr"}; - //initial begin - // for(int i=0; i<5; i++) begin - // adrTranslation[i] = 64'b0; - // end - //end - - //function logic equal(logic[63:0] adr, logic[63:0] adrExpected, integer func); - // if (adr[11:0] !== adrExpected[11:0]) begin - // equal = 1'b0; - // end else begin - // equal = 1'b1; - // if ((adr+adrTranslation[func]) !== adrExpected) begin - // adrTranslation[func] = adrExpected - adr; - // $display("warning: probably new address translation %x for %s at instr %0d", adrTranslation[func], translationType[func], instrs); - // warningCount += 1; - // end - // end - //endfunction - - // pretty sure this isn't necessary anymore, but keeping this for now since its easier - function logic equal(logic[63:0] adr, logic[63:0] adrExpected, integer func); - equal = adr === adrExpected; - endfunction - - - `define ERROR \ - #10; \ - $display("processed %0d instructions with %0d warnings", instrs, warningCount); \ - $stop; - - logic [63:0] pcExpected; - logic [63:0] regExpected; - integer regNumExpected; - logic [`XLEN-1:0] PCW; + // ------- + // Running + // ------- + always + begin + clk <= 1; # 5; clk <= 0; # 5; + end + + // ------------------------------------- + // Special warnings for important faults + // ------------------------------------- + always @(dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW) begin + if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs > 1) begin + $display("!!!!!! illegal instruction !!!!!!!!!!"); + $display("(as a reminder, MCAUSE and MEPC are set by this)"); + $display("at %0t ps, PCM %x, instr %0d, HADDR %x", $time, dut.hart.ifu.PCM, instrs, HADDR); + `ERROR + end + if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 5 && instrs != 0) begin + $display("!!!!!! illegal (physical) memory access !!!!!!!!!!"); + $display("(as a reminder, MCAUSE and MEPC are set by this)"); + $display("at %0t ps, PCM %x, instr %0d, HADDR %x", $time, dut.hart.ifu.PCM, instrs, HADDR); + `ERROR + end + end + + // ----------------------- + // RegFile Write Hijacking + // ----------------------- + always @(PCW or dut.hart.ieu.InstrValidW) begin + if(dut.hart.ieu.InstrValidW && PCW != 0) begin + // Hack to compensate for how Wally's MTIME may diverge from QEMU's MTIME (and that is okay) + if (PCtextW.substr(0,5) == "rdtime") begin + ignoreRFwrite <= 1; + scan_file_rf = $fscanf(data_file_rf, "%d\n", regNumExpected); + scan_file_rf = $fscanf(data_file_rf, "%x\n", regExpected); + force dut.hart.ieu.dp.regf.wd3 = regExpected; + // Hack to compensate for QEMU's incorrect MSTATUS + end else if (PCtextW.substr(0,3) == "csrr" && PCtextW.substr(10,16) == "mstatus") begin + force dut.hart.ieu.dp.regf.wd3 = dut.hart.ieu.dp.WriteDataW & ~64'ha00000000; + end else + release dut.hart.ieu.dp.regf.wd3; + end + end + + // ---------------- + // Big Chunky Block + // ---------------- + always @(reset or dut.hart.ifu.InstrRawD or dut.hart.ifu.PCD) begin// or negedge dut.hart.ifu.StallE) begin // Why do we care about StallE? Everything seems to run fine without it. + if(~HWRITE) begin // *** Should this need to consider HWRITE? + #2; + // If PCD/InstrD aren't garbage + if (~reset && dut.hart.ifu.InstrRawD[15:0] !== {16{1'bx}} && dut.hart.ifu.PCD !== 64'h0) begin // && ~dut.hart.ifu.StallE) begin + // If Wally's PCD has updated + if (dut.hart.ifu.PCD !== lastPCD) begin + lastInstrDExpected = InstrDExpected; + lastPC <= dut.hart.ifu.PCD; + lastPC2 <= lastPC; + // If PCD isn't going to be flushed + if (~PCDwrong || lastPC == PCDexpected) begin + + // Stop if we've reached the end + if($feof(data_file_PCF)) begin + $display("no more PC data to read... CONGRATULATIONS!!!"); + `ERROR + end + + // Increment PC + `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); + `SCAN_PC(data_file_PCD, scan_file_PCD, PCtextD, PCtextD2, InstrDExpected, PCDexpected); + + // NOP out certain instructions + if(dut.hart.ifu.PCD===PCDexpected) begin + if((dut.hart.ifu.PCD == 32'h80001dc6) || // for now, NOP out any stores to PLIC + (dut.hart.ifu.PCD == 32'h80001de0) || + (dut.hart.ifu.PCD == 32'h80001de2)) begin + $display("warning: NOPing out %s at PCD=%0x, instr %0d, time %0t", PCtextD, dut.hart.ifu.PCD, instrs, $time); + force InstrDExpected = 32'b0010011; + force dut.hart.ifu.InstrRawD = 32'b0010011; + while (clk != 0) #1; + while (clk != 1) #1; + release dut.hart.ifu.InstrRawD; + release InstrDExpected; + warningCount += 1; + forcedInstr = 1; + end else begin + forcedInstr = 0; + end + end + + // Increment instruction count + if (instrs <= 10 || (instrs <= 100 && instrs % 10 == 0) || + (instrs <= 1000 && instrs % 100 == 0) || (instrs <= 10000 && instrs % 1000 == 0) || + (instrs <= 100000 && instrs % 10000 == 0) || (instrs % 100000 == 0)) begin + $display("loaded %0d instructions", instrs); + end + instrs += 1; + + // Stop before bugs so "do" file can turn on waves + if (instrs == waveOnICount) begin + $display("turning on waves at %0d instructions", instrs); + $stop; + end + + // Check if PCD is going to be flushed due to a branch or jump + if (`BPRED_ENABLED) begin + PCDwrong = dut.hart.hzu.FlushD; //Old version: dut.hart.ifu.bpred.bpred.BPPredWrongE; <-- This old version failed to account for MRET. + end else begin + casex (lastInstrDExpected[31:0]) + 32'b00000000001000000000000001110011, // URET + 32'b00010000001000000000000001110011, // SRET + 32'b00110000001000000000000001110011, // MRET + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1101111, // JAL + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100111, // JALR + 32'bXXXXXXXXXXXXXXXXXXXXXXXXX1100011, // B + 32'bXXXXXXXXXXXXXXXX110XXXXXXXXXXX01, // C.BEQZ + 32'bXXXXXXXXXXXXXXXX111XXXXXXXXXXX01, // C.BNEZ + 32'bXXXXXXXXXXXXXXXX101XXXXXXXXXXX01: // C.J + PCDwrong = 1; + 32'bXXXXXXXXXXXXXXXX1001000000000010, // C.EBREAK: + 32'bXXXXXXXXXXXXXXXXX000XXXXX1110011: // Something that's not CSRR* + PCDwrong = 0; // tbh don't really know what should happen here + 32'b000110000000XXXXXXXXXXXXX1110011, // CSR* SATP, * + 32'bXXXXXXXXXXXXXXXX1000XXXXX0000010, // C.JR + 32'bXXXXXXXXXXXXXXXX1001XXXXX0000010: // C.JALR //this is RV64 only so no C.JAL + PCDwrong = 1; + default: + PCDwrong = 0; + endcase + end + + // Check PCD, InstrD + if (~PCDwrong && ~(dut.hart.ifu.PCD === PCDexpected)) begin + $display("%0t ps, instr %0d: PC does not equal PC expected: %x, %x", $time, instrs, dut.hart.ifu.PCD, PCDexpected); + `ERROR + end + InstrMask = InstrDExpected[1:0] == 2'b11 ? 32'hFFFFFFFF : 32'h0000FFFF; + if ((~forcedInstr) && (~PCDwrong) && ((InstrMask & dut.hart.ifu.InstrRawD) !== (InstrMask & InstrDExpected))) begin + $display("%0t ps, PCD %x, instr %0d: InstrD %x %s does not equal InstrDExpected %x %s", $time, dut.hart.ifu.PCD, instrs, dut.hart.ifu.InstrRawD, InstrDName, InstrDExpected, PCtextD); + `ERROR + end + + // Repeated instruction means QEMU had an interrupt which we need to spoof + if (PCFexpected == PCDexpected) begin + $display("Note at %0t ps, PCM %x %s, instr %0d: spoofing an interrupt", $time, dut.hart.ifu.PCM, PCtextM, instrs); + // Increment file pointers past the repeated instruction. + `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); + `SCAN_PC(data_file_PCD, scan_file_PCD, PCtextD, PCtextD2, InstrDExpected, PCDexpected); + scan_file_memR = $fscanf(data_file_memR, "%x\n", readAdrExpected); + scan_file_memR = $fscanf(data_file_memR, "%x\n", HRDATA); + // Next force a timer interrupt (*** this may later need generalizing) + force dut.uncore.genblk1.clint.MTIME = dut.uncore.genblk1.clint.MTIMECMP + 1; + while (clk != 0) #1; + while (clk != 1) #1; + release dut.uncore.genblk1.clint.MTIME; + end + end + end + lastPCD = dut.hart.ifu.PCD; + end + end + end + + /////////////////////////////////////////////////////////////////////////////// + ///////////////////////////// PC,Instr Checking /////////////////////////////// + /////////////////////// (outside of Big Chunky Block) ///////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // -------------- + // Initialization + // -------------- + initial begin + data_file_PCF = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); + data_file_PCD = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); + data_file_PCM = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); + data_file_PCW = $fopen({`LINUX_TEST_VECTORS,"parsedPC.txt"}, "r"); + if (data_file_PCW == 0) begin + $display("file couldn't be opened"); + $stop; + end + // This makes sure PCF is one instr ahead of PCD + `SCAN_PC(data_file_PCF, scan_file_PCF, PCtextF, PCtextF2, InstrFExpected, PCFexpected); + // This makes sure PCM is one instr ahead of PCW + `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); + end + + // ------------------- + // Additional Hardware + // ------------------- flopenr #(`XLEN) PCWReg(clk, reset, ~dut.hart.ieu.dp.StallW, dut.hart.ifu.PCM, PCW); + // PCF stuff isn't actually checked + // it only exists for helping detecting duplicate instructions in PCD + // which are the result of interrupts hitting QEMU + // PCD checking already happens in "Big Chunky Block" + // PCM stuff isn't actually checked + // it only exists for helping detecting duplicate instructions in PCW + // which are the result of interrupts hitting QEMU + // ------------ + // PCW Checking + // ------------ + always @(PCW or dut.hart.ieu.InstrValidW) begin + if(dut.hart.ieu.InstrValidW && PCW != 0) begin + if($feof(data_file_PCW)) begin + $display("no more PC data to read"); + `ERROR + end + `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); + `SCAN_PC(data_file_PCW, scan_file_PCW, trashString, trashString, InstrWExpected, PCWexpected); + // If repeated instr + if (PCMexpected == PCWexpected) begin + // Increment file pointers past the repeated instruction. + `SCAN_PC(data_file_PCM, scan_file_PCM, trashString, trashString, InstrMExpected, PCMexpected); + `SCAN_PC(data_file_PCW, scan_file_PCW, trashString, trashString, InstrWExpected, PCWexpected); + end + if(~(PCW === PCWexpected)) begin + $display("%0t ps, instr %0d: PCW does not equal PCW expected: %x, %x", $time, instrs, PCW, PCWexpected); + `ERROR + end + end + end + + + /////////////////////////////////////////////////////////////////////////////// + /////////////////////////// RegFile Write Checking //////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // -------------- + // Initialization + // -------------- + initial begin + data_file_rf = $fopen({`LINUX_TEST_VECTORS,"parsedRegs.txt"}, "r"); + if (data_file_rf == 0) begin + $display("file couldn't be opened"); + $stop; + end + end + initial + ignoreRFwrite <= 0; + // -------- + // Checking + // -------- genvar i; generate for(i=1; i<32; i++) begin @@ -251,33 +393,32 @@ module testbench(); if ($time == 0) begin scan_file_rf = $fscanf(data_file_rf, "%x\n", regExpected); if (dut.hart.ieu.dp.regf.rf[i] != regExpected) begin - $display("%0t ps, instr %0d: rf[%0d] does not equal rf expected: %x, %x", $time, instrs, i, dut.hart.ieu.dp.regf.rf[i], regExpected); + $display("%0t ps, PCW %x, instr %0d: rf[%0d] does not equal rf expected: %x, %x", $time, PCW, instrs, i, dut.hart.ieu.dp.regf.rf[i], regExpected); `ERROR end end else begin - if (ignoreRFwrite) + if (ignoreRFwrite) // this allows other testbench elements to force WriteData to take on the next regExpected ignoreRFwrite <= 0; else begin scan_file_rf = $fscanf(data_file_rf, "%d\n", regNumExpected); scan_file_rf = $fscanf(data_file_rf, "%x\n", regExpected); end if (i != regNumExpected) begin - $display("%0t ps, instr %0d: wrong register changed: %0d, %0d expected to switch to %x from %x", $time, instrs, i, regNumExpected, regExpected, dut.hart.ieu.dp.regf.rf[regNumExpected]); + $display("%0t ps, PCW %x %s, instr %0d: wrong register changed: %0d, %0d expected to switch to %x from %x", $time, PCW, PCtextW, instrs, i, regNumExpected, regExpected, dut.hart.ieu.dp.regf.rf[regNumExpected]); `ERROR end - if (~equal(dut.hart.ieu.dp.regf.rf[i],regExpected, 0)) begin - $display("%0t ps, instr %0d: rf[%0d] does not equal rf expected: %x, %x", $time, instrs, i, dut.hart.ieu.dp.regf.rf[i], regExpected); + if (~(dut.hart.ieu.dp.regf.rf[i] === regExpected)) begin + $display("%0t ps, PCW %x %s, instr %0d: rf[%0d] does not equal rf expected: %x, %x", $time, PCW, PCtextW, instrs, i, dut.hart.ieu.dp.regf.rf[i], regExpected); `ERROR end - //if (dut.hart.ieu.dp.regf.rf[i] !== regExpected) begin - // force dut.hart.ieu.dp.regf.rf[i] = regExpected; - // release dut.hart.ieu.dp.regf.rf[i]; - //end end end end endgenerate + /////////////////////////////////////////////////////////////////////////////// + //////////////////////// Bus Unit Read/Write Checking ///////////////////////// + /////////////////////////////////////////////////////////////////////////////// // RAM and bootram are addressed in 64-bit blocks - this logic handles R/W // including subwords. Brief explanation on signals: // @@ -289,17 +430,33 @@ module testbench(); // In the linux boot, the processor spends the first ~5 instructions in // bootram, before jr jumps to main RAM - logic [63:0] readMask; + // -------------- + // Initialization + // -------------- + initial begin + data_file_memR = $fopen({`LINUX_TEST_VECTORS,"parsedMemRead.txt"}, "r"); + if (data_file_memR == 0) begin + $display("file couldn't be opened"); + $stop; + end + end + initial begin + data_file_memW = $fopen({`LINUX_TEST_VECTORS,"parsedMemWrite.txt"}, "r"); + if (data_file_memW == 0) begin + $display("file couldn't be opened"); + $stop; + end + end + + // ------------ + // Read Checker + // ------------ assign readMask = ((1 << (8*(1 << HSIZE))) - 1) << 8 * HADDR[2:0]; - - logic [`XLEN-1:0] readAdrExpected, readAdrTranslated; - always @(dut.HRDATA) begin #2; if (dut.hart.MemRWM[1] && (dut.hart.ebu.CaptureDataM) && dut.HRDATA !== {64{1'bx}}) begin - //$display("%0t", $time); if($feof(data_file_memR)) begin $display("no more memR data to read"); `ERROR @@ -307,36 +464,34 @@ module testbench(); scan_file_memR = $fscanf(data_file_memR, "%x\n", readAdrExpected); scan_file_memR = $fscanf(data_file_memR, "%x\n", HRDATA); assign readAdrTranslated = adrTranslator(readAdrExpected); - if (~equal(HADDR,readAdrTranslated,4)) begin - $display("%0t ps, instr %0d: HADDR does not equal readAdrExpected: %x, %x", $time, instrs, HADDR, readAdrTranslated); + if (~(HADDR === readAdrTranslated)) begin + $display("%0t ps, PCM %x %s, instr %0d: HADDR does not equal readAdrExpected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HADDR, readAdrTranslated); `ERROR end if ((readMask & HRDATA) !== (readMask & dut.HRDATA)) begin if (HADDR inside `LINUX_FIX_READ) begin - //$display("warning %0t ps, instr %0d, adr %0d: forcing HRDATA to expected: %x, %x", $time, instrs, HADDR, HRDATA, dut.HRDATA); + if (HADDR != 'h10000005) // Suppress the warning for UART LSR so we can read UART output + $display("warning %0t ps, PCM %x %s, instr %0d, adr %0d: forcing HRDATA to expected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HADDR, HRDATA, dut.HRDATA); force dut.uncore.HRDATA = HRDATA; #9; release dut.uncore.HRDATA; warningCount += 1; end else begin - $display("%0t ps, instr %0d: ExpectedHRDATA does not equal dut.HRDATA: %x, %x from address %x, %x", $time, instrs, HRDATA, dut.HRDATA, HADDR, HSIZE); + $display("%0t ps, PCM %x %s, instr %0d: ExpectedHRDATA does not equal dut.HRDATA: %x, %x from address %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HRDATA, dut.HRDATA, HADDR, HSIZE); `ERROR end end - //end else if(dut.hart.MemRWM[1]) begin - // $display("%x, %x, %x, %t", HADDR, dut.PCF, dut.HRDATA, $time); - end - end - logic [`XLEN-1:0] writeDataExpected, writeAdrExpected, writeAdrTranslated; - + // ------------- + // Write Checker + // ------------- // this might need to change //always @(HWDATA or HADDR or HSIZE or HWRITE) begin always @(negedge HWRITE) begin //#1; - if ($time != 0) begin + if (($time != 0) && ~dut.hart.hzu.FlushM) begin if($feof(data_file_memW)) begin $display("no more memW data to read"); `ERROR @@ -346,20 +501,28 @@ module testbench(); assign writeAdrTranslated = adrTranslator(writeAdrExpected); if (writeDataExpected != HWDATA && ~dut.uncore.HSELPLICD) begin - $display("%0t ps, instr %0d: HWDATA does not equal writeDataExpected: %x, %x", $time, instrs, HWDATA, writeDataExpected); + $display("%0t ps, PCM %x %s, instr %0d: HWDATA does not equal writeDataExpected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HWDATA, writeDataExpected); `ERROR end - if (~equal(writeAdrTranslated,HADDR,1) && ~dut.uncore.HSELPLICD) begin - $display("%0t ps, instr %0d: HADDR does not equal writeAdrExpected: %x, %x", $time, instrs, HADDR, writeAdrTranslated); + if (~(writeAdrTranslated === HADDR) && ~dut.uncore.HSELPLICD) begin + $display("%0t ps, PCM %x %s, instr %0d: HADDR does not equal writeAdrExpected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, HADDR, writeAdrTranslated); `ERROR end end end - integer totalCSR = 0; - logic [99:0] StartCSRexpected[63:0]; - string StartCSRname[99:0]; + /////////////////////////////////////////////////////////////////////////////// + //////////////////////////////// CSR Checking ///////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // -------------- + // Initialization + // -------------- initial begin + data_file_csr = $fopen({`LINUX_TEST_VECTORS,"parsedCSRs.txt"}, "r"); + if (data_file_csr == 0) begin + $display("file couldn't be opened"); + $stop; + end while(1) begin scan_file_csr = $fscanf(data_file_csr, "%s\n", StartCSRname[totalCSR]); if(StartCSRname[totalCSR] == "---") begin @@ -370,22 +533,10 @@ module testbench(); end end - always @(dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW) begin - if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 2 && instrs > 1) begin - $display("!!!!!! illegal instruction !!!!!!!!!!"); - $display("(as a reminder, MCAUSE and MEPC are set by this)"); - $display("at %0t ps, instr %0d, HADDR %x", $time, instrs, HADDR); - `ERROR - end - if (dut.hart.priv.csr.genblk1.csrm.MCAUSE_REGW == 5 && instrs != 0) begin - $display("!!!!!! illegal (physical) memory access !!!!!!!!!!"); - $display("(as a reminder, MCAUSE and MEPC are set by this)"); - $display("at %0t ps, instr %0d, HADDR %x", $time, instrs, HADDR); - `ERROR - end - end - - string MSTATUSstring = "MSTATUS"; + // -------------- + // Checker Macros + // -------------- + string MSTATUSstring = "MSTATUS"; //string variables seem to compare more reliably than string literals string SEPCstring = "SEPC"; string SCAUSEstring = "SCAUSE"; string SSTATUSstring = "SSTATUS"; @@ -394,7 +545,6 @@ module testbench(); string CSR; \ string ``CSR``name = `"CSR`"; \ string expected``CSR``name; \ - //CSR checking \ always @(``PATH``.``CSR``_REGW) begin \ if ($time > 1 && (`BUILDROOT != 1 || ``CSR``name != SSTATUSstring)) begin \ if (``CSR``name == SEPCstring) begin #1; end \ @@ -403,16 +553,16 @@ module testbench(); scan_file_csr = $fscanf(data_file_csr, "%s\n", expected``CSR``name); \ scan_file_csr = $fscanf(data_file_csr, "%x\n", expected``CSR``); \ if(expected``CSR``name.icompare(``CSR``name)) begin \ - $display("%0t ps, instr %0d: %s changed, expected %s", $time, instrs, `"CSR`", expected``CSR``name); \ + $display("%0t ps, PCM %x %s, instr %0d: %s changed, expected %s", $time, dut.hart.ifu.PCM, PCtextM, instrs, `"CSR`", expected``CSR``name); \ end \ if (``CSR``name == MSTATUSstring) begin \ if (``PATH``.``CSR``_REGW != ((``expected``CSR) | 64'ha00000000)) begin \ - $display("%0t ps, instr %0d: %s does not equal %s expected: %x, %x", $time, instrs, ``CSR``name, expected``CSR``name, ``PATH``.``CSR``_REGW, (``expected``CSR) | 64'ha00000000); \ + $display("%0t ps, PCM %x %s, instr %0d: %s (should be MSTATUS) does not equal %s expected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, ``CSR``name, expected``CSR``name, ``PATH``.``CSR``_REGW, (``expected``CSR) | 64'ha00000000); \ `ERROR \ end \ end else \ if (``PATH``.``CSR``_REGW != ``expected``CSR[$bits(``PATH``.``CSR``_REGW)-1:0]) begin \ - $display("%0t ps, instr %0d: %s does not equal %s expected: %x, %x", $time, instrs, ``CSR``name, expected``CSR``name, ``PATH``.``CSR``_REGW, ``expected``CSR); \ + $display("%0t ps, PCM %x %s, instr %0d: %s does not equal %s expected: %x, %x", $time, dut.hart.ifu.PCM, PCtextM, instrs, ``CSR``name, expected``CSR``name, ``PATH``.``CSR``_REGW, ``expected``CSR); \ `ERROR \ end \ end else begin \ @@ -420,7 +570,7 @@ module testbench(); for(integer j=0; j= 0; i--) begin + PAdr = BaseAdr + (VPN[i] << 3); + // dtim.RAM is 64-bit addressed. PAdr specifies a byte. We right shift + // by 3 (the PTE size) to get the requested 64-bit PTE. + PTE = dut.uncore.dtim.RAM[PAdr >> 3]; + PTE_R = PTE[1]; + PTE_X = PTE[3]; + if (PTE_R || PTE_X) begin + // Leaf page found + break; + end else begin + // Go to next level of table + BaseAdr = PTE[53:10] << 12; + end + end + // Determine which parts of the PTE page number to use based on the + // level of the page table we reached. + if (i == 2) begin + // Gigapage + assign adrTranslator = {8'b0, PTE[53:28], VPN[1], VPN[0], Offset}; + end else if (i == 1) begin + // Megapage + assign adrTranslator = {8'b0, PTE[53:19], VPN[0], Offset}; + end else begin + // Kilopage + assign adrTranslator = {8'b0, PTE[53:10], Offset}; + end + end else begin + // Direct translation if address translation is not on + assign adrTranslator = adrIn; + end + end + endfunction endmodule + + module instrTrackerTB( input logic clk, reset, input logic [31:0] InstrF,InstrD,InstrE,InstrM,InstrW, - output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); - - // stage Instr to Writeback for visualization - //flopr #(32) InstrWReg(clk, reset, InstrM, InstrW); - + output string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); instrNameDecTB fdec(InstrF, InstrFName); instrNameDecTB ddec(InstrD, InstrDName); instrNameDecTB edec(InstrE, InstrEName);