diff --git a/.gitignore b/.gitignore index bd7e800df..3990c3823 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,7 @@ __pycache__/ addins/riscv-arch-test/Makefile.include addins/riscv-tests/target addins/TestFloat-3e/build/Linux-x86_64-GCC/* -benchmarks/embench/wally*.json + #vsim work files to ignore transcript @@ -175,3 +175,6 @@ tests/fp/combined_IF_vectors/IF_vectors/*.tv sim/bp-results/*.log sim/branch*.log /tests/custom/fpga-test-sdc/bin/fpga-test-sdc +benchmarks/embench/wally*.json +benchmarks/embench/run* +sim/cfi.log diff --git a/.gitmodules b/.gitmodules index 9a4c7fbb8..054afa6fb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,16 +1,9 @@ [submodule "sky130/sky130_osu_sc_t12"] path = sky130/sky130_osu_sc_t12 url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/ -[submodule "addins/riscv-arch-test"] - path = addins/riscv-arch-test - url = https://github.com/riscv-non-isa/riscv-arch-test - ignore = dirty [submodule "addins/imperas-riscv-tests"] path = addins/imperas-riscv-tests url = https://github.com/riscv-ovpsim/imperas-riscv-tests -[submodule "addins/riscv-tests"] - path = addins/riscv-tests - url = https://github.com/riscv-software-src/riscv-tests [submodule "addins/riscv-dv"] path = addins/riscv-dv url = https://github.com/google/riscv-dv @@ -30,6 +23,9 @@ [submodule "addins/vivado-boards"] path = addins/vivado-boards url = https://github.com/Digilent/vivado-boards/ -[submodule "addins/vivado-risc-v"] - path = addins/vivado-risc-v - url = https://github.com/eugene-tarassov/vivado-risc-v.git +[submodule "addins/ahbsdc"] + path = addins/ahbsdc + url = git@github.com:jacobpease/ahbsdc.git +[submodule "addins/riscv-arch-test"] + path = addins/riscv-arch-test + url = https://github.com/riscv-non-isa/riscv-arch-test diff --git a/addins/ahbsdc b/addins/ahbsdc new file mode 160000 index 000000000..5df21aa66 --- /dev/null +++ b/addins/ahbsdc @@ -0,0 +1 @@ +Subproject commit 5df21aa6625eca120e64ea353ca641aff37d90b2 diff --git a/addins/embench-iot b/addins/embench-iot index 1480febc3..4c5eb8798 160000 --- a/addins/embench-iot +++ b/addins/embench-iot @@ -1 +1 @@ -Subproject commit 1480febc3ace5f471baeee4b1ae0d8fea16e4762 +Subproject commit 4c5eb87983f51ca7fcf7855306877b3d1c3aabf1 diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 197179fdc..eb0a38922 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 197179fdc9dfeeca821e848f373c897a3fdae86c +Subproject commit eb0a3892215ad2384702db02da1551a59701ec67 diff --git a/addins/riscv-tests b/addins/riscv-tests deleted file mode 160000 index cf04274f5..000000000 --- a/addins/riscv-tests +++ /dev/null @@ -1 +0,0 @@ -Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7 diff --git a/addins/vivado-risc-v b/addins/vivado-risc-v deleted file mode 160000 index c76a8613a..000000000 --- a/addins/vivado-risc-v +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c76a8613a177b3a04face2cb8e15dd07a8d2fc40 diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 97c7660c5..d7a18b7e2 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -3,6 +3,7 @@ # Compile Embench for Wally embench_dir = ../../addins/embench-iot +ARCH=rv32imac_zicsr all: build run: build size sim @@ -15,7 +16,7 @@ buildsize: build_speedopt_size build_sizeopt_size # uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size build_speedopt_speed: - $(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles" + $(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-O2 -nostartfiles -march=$(ARCH)" # remove files not used in embench1.0 When changing to 2.0, restore these files #rm -rf $(embench_dir)/bd_speedopt_speed/src/md5sum #rm -rf $(embench_dir)/bd_speedopt_speed/src/tarfind @@ -23,7 +24,7 @@ build_speedopt_speed: find $(embench_dir)/bd_speedopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done build_sizeopt_speed: - $(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-Os -nostartfiles" + $(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-Os -nostartfiles -march=$(ARCH)" # remove files not used in embench1.0 When changing to 2.0, restore these files #rm -rf $(embench_dir)/bd_sizeopt_speed/src/md5sum #rm -rf $(embench_dir)/bd_sizeopt_speed/src/tarfind @@ -32,10 +33,10 @@ build_sizeopt_speed: # uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size build_speedopt_size: - $(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-O2 -msave-restore" --dummy-libs="libgcc libm libc crt0" + $(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-O2 -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0" build_sizeopt_size: - $(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0" + $(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-Os -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0" # builds dependencies, then launches modelsim and finally runs python wrapper script to present results sim: modelsim_build_memfile modelsim_run speed diff --git a/benchmarks/embench/embench_arch_sweep.py b/benchmarks/embench/embench_arch_sweep.py new file mode 100755 index 000000000..130a70581 --- /dev/null +++ b/benchmarks/embench/embench_arch_sweep.py @@ -0,0 +1,87 @@ +#!/usr/bin/python3 +# embench_arch_sweep.py +# David_Harris@hmc.edu 16 November 2023 +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +# Run embench on a variety of architectures and collate results + +import os +from datetime import datetime +import re +import collections + +#archs = ["rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr", "rv32imafdc_zba_zbb_zbc_zbs_zicsr"] +archs = ["rv32imafdc_zba_zbb_zbc_zbs_zicsr", "rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr"] + +def calcgeomean(d, arch): + progs = ["aha-mont64", "crc32", "cubic", "edn", "huffbench", "matmult-int", "minver", "nbody", "nettle-aes", "nettle-sha256", "nsichneu", "picojpeg", "qrduino", "sglib-combined", "slre", "st", "statemate", "ud", "wikisort"] + result = 1.0 + for p in progs: + #val = d[arch][p] + val = d[arch].get(p, 1.0) + result = result *float(val) + result = pow(result, (1.0/float(len(progs)))) + return result + +def tabulate_arch_sweep(directory): + for case in ["wallySizeOpt_size", "wallySpeedOpt_speed"]: + d = collections.defaultdict(dict) + for arch in archs: + file = case+"_"+arch+".json" + file_path = os.path.join(directory, file) + lines = [] + try: + f = open(file_path, "r") + lines = f.readlines() + except: + f.close() + #print(file_path+" does not exist") + for line in lines: + #print("File: "+file+" Line: "+line) + #p = re.compile('".*" : .*,') + p = r'"([^"]*)" : ([^,\n]+)' + match = re.search(p, line) + if match: + prog = match.group(1) + result = match.group(2); + d[arch][prog] = result; + #print(match.group(1)+" " + match.group(2)) + f.close() + for arch in [""] + archs: + print (arch, end="\t") + print("") + for prog in d[archs[0]]: + print(prog, end="\t") + for arch in archs: + entry = d[arch].get(prog, "n/a"); + print (entry, end="\t") + print("") + print("New geo mean", end="\t") + for arch in archs: + geomean = calcgeomean(d, arch) + print(geomean, end="\t") + print("") + +def run_arch_sweep(): + # make a folder whose name depends on the date + # Get current date + current_date = datetime.now() + # Format date as a string in the format YYYYMMDD + date_string = current_date.strftime('%Y%m%d_%H%M%S') + dir = "run_"+date_string + # Create a directory with the date string as its name + os.mkdir(dir) + + # make a directory with the current date as its name + + # sweep the runs and save the results in the run directory + for arch in archs: + os.system("make clean") + os.system("make run ARCH="+arch) + for res in ["SizeOpt_size", "SizeOpt_speed", "SpeedOpt_size", "SpeedOpt_speed"]: + os.system("mv -f wally"+res+".json "+dir+"/wally"+res+"_"+arch+".json") + return dir + +directory = run_arch_sweep() +#directory = "run_20231117_082325" +tabulate_arch_sweep(directory) \ No newline at end of file diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index 0015e2bba..35be93fd6 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -45,7 +45,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; -localparam ZICCLSM_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 1; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index 3b306a005..52baad796 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -74,8 +74,8 @@ localparam ICACHE_LINELENINBITS = 32'd512; // Integer Divider Configuration // IDIV_BITSPERCYCLE must be 1, 2, or 4 -localparam IDIV_BITSPERCYCLE = 32'd4; -localparam IDIV_ON_FPU = 1; +localparam IDIV_BITSPERCYCLE = 32'd2; +localparam IDIV_ON_FPU = 0; // Legal number of PMP entries are 0, 16, or 64 localparam PMP_ENTRIES = 32'd16; @@ -169,7 +169,7 @@ localparam ZMMUL_SUPPORTED = 0; // FPU division architecture localparam RADIX = 32'd4; -localparam DIVCOPIES = 32'd4; +localparam DIVCOPIES = 32'd2; // bit manipulation localparam ZBA_SUPPORTED = 1; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index e00c9153d..84f4de599 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -150,7 +150,7 @@ localparam PLIC_SDC_ID = 32'd9; localparam BPRED_SUPPORTED = 1; localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT localparam BPRED_NUM_LHR = 32'd6; -localparam BPRED_SIZE = 32'd6; +localparam BPRED_SIZE = 32'd10; localparam BTB_SIZE = 32'd10; localparam RAS_SIZE = 32'd16; diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 48f02b848..55bca569f 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -93,16 +93,21 @@ localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF); localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2); localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS); +// divider r and rk (bits per digit, bits per cycle) +localparam LOGR = $clog2(RADIX); // r = log(R) bits per digit +localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated + +// intermediate division parameters not directly used in fdivsqrt hardware +localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right +//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step. +localparam DIVMINb = ((FPDIVMINb(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6))); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); -localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVN+1+NF) > (3*NF+4) ? (DIVN+1+NF) : (3*NF+4))); +localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4))); // Disable spurious Verilator warnings diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index 7e982fbde..67f85783c 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -179,13 +179,10 @@ localparam cvw_t P = '{ NORMSHIFTSZ : NORMSHIFTSZ, LOGNORMSHIFTSZ : LOGNORMSHIFTSZ, CORRSHIFTSZ : CORRSHIFTSZ, - DIVN : DIVN, LOGR : LOGR, RK : RK, - LOGRK : LOGRK, FPDUR : FPDUR, DURLEN : DURLEN, DIVb : DIVb, - DIVBLEN : DIVBLEN, - DIVa : DIVa + DIVBLEN : DIVBLEN }; diff --git a/fpga/constraints/marked_debug.txt b/fpga/constraints/marked_debug.txt index 3973fc451..582af32a8 100644 --- a/fpga/constraints/marked_debug.txt +++ b/fpga/constraints/marked_debug.txt @@ -1,131 +1,12 @@ -lsu/lsu.sv: logic IEUAdrM -lsu/lsu.sv: logic WriteDataM -lsu/lsu.sv: logic LSUHADDR -lsu/lsu.sv: logic HRDATA -lsu/lsu.sv: logic LSUHWDATA -lsu/lsu.sv: logic LSUHREADY -lsu/lsu.sv: logic LSUHWRITE -lsu/lsu.sv: logic LSUHSIZE -lsu/lsu.sv: logic LSUHBURST -lsu/lsu.sv: logic LSUHTRANS -lsu/lsu.sv: logic LSUHWSTRB -lsu/lsu.sv: logic IHAdrM -ieu/regfile.sv: logic rf -ieu/datapath.sv: logic RegWriteW -hazard/hazard.sv: logic BPPredWrongE -hazard/hazard.sv: logic LoadStallD -hazard/hazard.sv: logic FCvtIntStallD -hazard/hazard.sv: logic DivBusyE -hazard/hazard.sv: logic EcallFaultM -hazard/hazard.sv: logic WFIStallM -hazard/hazard.sv: logic StallF -hazard/hazard.sv: logic FlushD -cache/cachefsm.sv: statetype CurrState -wally/wallypipelinedcore.sv: logic TrapM -wally/wallypipelinedcore.sv: logic SrcAM -wally/wallypipelinedcore.sv: logic InstrM wally/wallypipelinedcore.sv: logic PCM -wally/wallypipelinedcore.sv: logic MemRWM +wally/wallypipelinedcore.sv: logic TrapM wally/wallypipelinedcore.sv: logic InstrValidM -wally/wallypipelinedcore.sv: logic WriteDataM -wally/wallypipelinedcore.sv: logic IEUAdrM -wally/wallypipelinedcore.sv: logic HRDATA -ifu/spill.sv: statetype CurrState -ifu/ifu.sv: logic IFUStallF -ifu/ifu.sv: logic IFUHADDR -ifu/ifu.sv: logic HRDATA -ifu/ifu.sv: logic IFUHREADY -ifu/ifu.sv: logic IFUHWRITE -ifu/ifu.sv: logic IFUHSIZE -ifu/ifu.sv: logic IFUHBURST -ifu/ifu.sv: logic IFUHTRANS -ifu/ifu.sv: logic PCF -ifu/ifu.sv: logic PCNextF -ifu/ifu.sv: logic PCPF -ifu/ifu.sv: logic PostSpillInstrRawF -mmu/hptw.sv: logic ITLBWriteF -mmu/hptw.sv: statetype WalkerState -privileged/csrs.sv: logic CSRSReadValM -privileged/csrs.sv: logic SEPC_REGW -privileged/csrs.sv: logic MIP_REGW -privileged/csrs.sv: logic SSCRATCH_REGW -privileged/csrs.sv: logic SCAUSE_REGW -privileged/csr.sv: logic CSRReadValM -privileged/csr.sv: logic CSRSrcM -privileged/csr.sv: logic CSRWriteValM -privileged/csr.sv: logic MSTATUS_REGW -privileged/trap.sv: logic InstrMisalignedFaultM -privileged/trap.sv: logic BreakpointFaultM -privileged/trap.sv: logic LoadAccessFaultM -privileged/trap.sv: logic LoadPageFaultM -privileged/trap.sv: logic mretM -privileged/trap.sv: logic MIP_REGW -privileged/trap.sv: logic PendingIntsM -privileged/privileged.sv: logic CSRReadM -privileged/privileged.sv: logic InterruptM -privileged/csrc.sv: logic HPMCOUNTER_REGW -privileged/csri.sv: logic MExtInt -privileged/csri.sv: logic MIP_REGW_writeabl -privileged/csrm.sv: logic MIP_REGW -privileged/csrm.sv: logic MEPC_REGW -privileged/csrm.sv: logic MEDELEG_REGW -privileged/csrm.sv: logic MIDELEG_REGW -privileged/csrm.sv: logic MSCRATCH_REGW -privileged/csrm.sv: logic MCAUSE_REGW -uncore/uart_apb.sv: logic SIN -uncore/uart_apb.sv: logic SOUT -uncore/uart_apb.sv: logic OUT1b -uncore/uartPC16550D.sv: logic RBR -uncore/uartPC16550D.sv: logic FCR -uncore/uartPC16550D.sv: logic IER -uncore/uartPC16550D.sv: logic MCR -uncore/uartPC16550D.sv: logic baudpulse -uncore/uartPC16550D.sv: statetype rxstate -uncore/uartPC16550D.sv: logic rxfifo -uncore/uartPC16550D.sv: logic txfifo -uncore/uartPC16550D.sv: logic rxfifohead -uncore/uartPC16550D.sv: logic rxfifoentries -uncore/uartPC16550D.sv: logic RXBR -uncore/uartPC16550D.sv: logic rxtimeoutcnt -uncore/uartPC16550D.sv: logic rxparityerr -uncore/uartPC16550D.sv: logic rxdataready -uncore/uartPC16550D.sv: logic rxfifoempty -uncore/uartPC16550D.sv: logic rxdata -uncore/uartPC16550D.sv: logic RXerrbit -uncore/uartPC16550D.sv: logic rxfullbitunwrapped -uncore/uartPC16550D.sv: logic txdata -uncore/uartPC16550D.sv: logic txnextbit -uncore/uartPC16550D.sv: logic txfifoempty -uncore/uartPC16550D.sv: logic fifoenabled -uncore/uartPC16550D.sv: logic RXerr -uncore/uartPC16550D.sv: logic THRE -uncore/uartPC16550D.sv: logic rxdataavailintr -uncore/uartPC16550D.sv: logic intrID -uncore/uncore.sv: logic HSELEXTSDCD -uncore/plic_apb.sv: logic MExtInt -uncore/plic_apb.sv: logic Din -uncore/plic_apb.sv: logic requests -uncore/plic_apb.sv: logic intPriority -uncore/plic_apb.sv: logic intInProgress -uncore/plic_apb.sv: logic intThreshold -uncore/plic_apb.sv: logic intEn -uncore/plic_apb.sv: logic intClaim -uncore/plic_apb.sv: logic irqMatrix -uncore/plic_apb.sv: logic priorities_with_irqs -uncore/plic_apb.sv: logic max_priority_with_irqs -uncore/plic_apb.sv: logic irqs_at_max_priority -uncore/plic_apb.sv: logic threshMask -uncore/clint_apb.sv: logic MTIME -uncore/clint_apb.sv: logic MTIMECMP -ebu/ebu.sv: logic HCLK -ebu/ebu.sv: logic HREADY -ebu/ebu.sv: logic HRESP -ebu/ebu.sv: logic HADDR -ebu/ebu.sv: logic HWRITE -ebu/ebu.sv: logic HSIZE -ebu/ebu.sv: logic HBURST -ebu/ebu.sv: logic HPROT -ebu/ebu.sv: logic HTRANS -ebu/ebu.sv: logic HMASTLOC -ebu/buscachefsm.sv: busstatetype CurrState -ebu/busfsm.sv: busstatetype CurrState +wally/wallypipelinedcore.sv: logic InstrM +lsu/lsu.sv: logic IEUAdrM +lsu/lsu.sv: logic PAdrM +lsu/lsu.sv: logic ReadDataM +lsu/lsu.sv: logic WriteDataM +lsu/lsu.sv: logic MemRWM +mmu/hptw.sv: logic SATP_REGW +privileged/csr.sv: logic MENVCFG_REGW +privileged/csr.sv: logic SENVCFG_REGW diff --git a/fpga/constraints/small-debug.xdc b/fpga/constraints/small-debug.xdc index 7bf498a79..8400b7281 100644 --- a/fpga/constraints/small-debug.xdc +++ b/fpga/constraints/small-debug.xdc @@ -53,6 +53,26 @@ set_property port_width 48 [get_debug_ports u_ila_0/probe6] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe6] connect_debug_port u_ila_0/probe6 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/hptw.hptw/SATP_REGW[63]}]] +create_debug_port u_ila_0 probe +set_property port_width 64 [get_debug_ports u_ila_0/probe7] +set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe7] +connect_debug_port u_ila_0/probe7 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/ReadDataM[63]} ]] + +create_debug_port u_ila_0 probe +set_property port_width 64 [get_debug_ports u_ila_0/probe8] +set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe8] +connect_debug_port u_ila_0/probe8 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/lsu/WriteDataM[63]} ]] + +create_debug_port u_ila_0 probe +set_property port_width 64 [get_debug_ports u_ila_0/probe9] +set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe9] +connect_debug_port u_ila_0/probe9 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/MENVCFG_REGW[63]} ]] + +create_debug_port u_ila_0 probe +set_property port_width 64 [get_debug_ports u_ila_0/probe10] +set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe10] +connect_debug_port u_ila_0/probe10 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/SENVCFG_REGW[63]} ]] + # the debug hub has issues with the clocks from the mmcm so lets give up an connect to the 100Mhz input clock. #connect_debug_port dbg_hub/clk [get_nets default_100mhz_clk] connect_debug_port dbg_hub/clk [get_nets CPUCLK] diff --git a/fpga/generator/wally.tcl b/fpga/generator/wally.tcl index d699c3d21..bad9981df 100644 --- a/fpga/generator/wally.tcl +++ b/fpga/generator/wally.tcl @@ -42,13 +42,9 @@ if {$board=="ArtyA7"} { # read in all other rtl read_verilog -sv [glob -type f ../src/CopiedFiles_do_not_add_to_repo/*/*.sv ../src/CopiedFiles_do_not_add_to_repo/*/*/*.sv] # *** Once the sdc is updated to use ahb changes these to system verilog. -read_verilog [glob -type f ../src/axi_sdc_controller.v] -read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_master.v] -read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_serial_host.v] -read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_master.v] -read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_serial_host.v] +read_verilog [glob -type f ../../addins/ahbsdc/sdc/*.v] -set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/vivado-risc-v/sdc} [current_fileset] +set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/ahbsdc/sdc} [current_fileset] if {$board=="ArtyA7"} { add_files -fileset constrs_1 -norecurse ../constraints/constraints-$board.xdc diff --git a/fpga/src/boot.mem b/fpga/src/boot.mem new file mode 100644 index 000000000..4ad2f0657 --- /dev/null +++ b/fpga/src/boot.mem @@ -0,0 +1,513 @@ +8001819300002197 +4281420141014081 +4481440143814301 +4681460145814501 +4881480147814701 +4a814a0149814901 +4c814c014b814b01 +4e814e014d814d01 +0110011b4f814f01 +059b45011161016e +0004063705fe0010 +1f6000ef8006061b +0ff003930000100f +4e952e3110060e37 +c602829b0053f2b7 +2023fe02dfe312fd +829b0053f2b7007e +fe02dfe312fdc602 +4de31efd000e2023 +059bf1402573fdd0 +0000061705e20870 +0010029b01260613 +68110002806702fe +0085179bf0080813 +038008130107f7b3 +480508a86c632781 +1533357902a87963 +38030000181700a8 +1c6301057833f268 +081a403018370808 +0105783342280813 +1815751308081063 +00367513c295e14d +654ded510207e793 +c1701ff00613f130 +0637c530fff6861b +664dcd10167d0200 +17fd001007b7c25c +859b5a5cc20cd21c +02062a23dfed0007 +4785fffd561c664d +4501461c06f59063 +4a1cc35c465cc31c +e29dc75c4a5cc71c +0c63086008138082 +1ae30a9008130105 +b7710017e793f905 +e793b75901d7e793 +5f5c674db7410197 +66cd02072e23dffd +fff78513ff7d5698 +40a0053300a03533 +bfb100a7e7938082 +e0a2715d8082557d +e486f052f44ef84a +fa13e85aec56fc26 +843289ae892a0086 +00959993000a1463 +864ac4396b054a85 +0009859b4549870a +0004049b05540363 +86a66485008b7363 +870a87aaec7ff0ef +4531458146014681 +f0ef0207c9639c05 +17820094979beb1f +873e020541639381 +993e99ba020a1963 +870aa8094501f85d +e8bff0ef45454685 +60a64505fe0559e3 +79a2794274e26406 +61616b426ae27a02 +9301020497138082 +f40647057179b7f1 +d79867cdec26f022 +dff58b85571c674d +2423d35c03600793 +fffd571c674d0207 +0007a737b00026f3 +b00027f311f70713 +674dfef77de38f95 +4f5ccf9d8b895b1c +26f3cf5c0027e793 +071305f5e737b000 +8f95b00027f30ff7 +4f5c674dfef77de3 +b00026f3cf5c9bf5 +67f7071300989737 +7de38f95b00027f3 +458146014681fef7 +ddbff0ef4501870a +059346014681870a +dcbff0ef45211aa0 +1aa007134782e939 +816393d117d24411 +85220ff0041302e7 +614564e270a27402 +46e3da5ff0efa0cd +0207c7634782fe05 +458146014681870a +d8bff0ef03700513 +46014681870a87aa +0a900513403005b7 +4409bf7dfc07d9e3 +c3998b8583f9bfe1 +4681870a00846413 +f0ef450945814601 +870afa0540e3d59f +123405b746014681 +46e3d45ff0ef450d +870a77c14482f805 +85a6460146818cfd +4ae3d2dff0ef451d +d3d8470567cdf605 +000f4737b00026f3 +b00027f323f70713 +67cdfef77de38f95 +4681870a0007ae23 +0370051385a64601 +f2054fe3cf7ff0ef +458146014681870a +ce3ff0ef08600513 +4681870af20545e3 +4541200005934601 +f0055de3ccfff0ef +3023bf010113bf09 +4605842a86aa4081 +40113423850a4585 +86a265a6da5ff0ef +d99ff0ef04084605 +2201358322813603 +86a2260508700513 +d81ff0ef05629e0d +2a0135832a813603 +9e0d86a226054505 +3603d6bff0ef057e +0513320135833281 +9e0d86a226054010 +3083d53ff0ef0556 +4501400134034081 +0000808241010113 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +0000000000000000 +00600100d2e3ca40 diff --git a/linux/Makefile b/linux/Makefile index 3d880bc08..373142615 100644 --- a/linux/Makefile +++ b/linux/Makefile @@ -27,14 +27,6 @@ BINARIES := fw_jump.elf vmlinux busybox OBJDUMPS := $(foreach name, $(BINARIES), $(basename $(name) .elf)) OBJDUMPS := $(foreach name, $(OBJDUMPS), $(DIS)/$(name).objdump) -define linuxDir = -$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") -endef - -define busyboxDir = -$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$") -endef - .PHONY: all generate disassemble install clean cleanDTB cleanDriver test all: @@ -46,8 +38,7 @@ all: # Temp rule for debugging test: - @echo $(linuxDir) - @echo $(busyboxDir) + echo $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") generate: $(DTB) $(IMAGES) @@ -74,11 +65,13 @@ $(DIS)/%.objdump: $(IMAGES)/%.elf $(DIS)/%.objdump: $(IMAGES)/% riscv64-unknown-elf-objdump -S $< >> $@ -$(IMAGES)/vmlinux: $(call linuxDir)/vmlinux - cp $< $@ +$(IMAGES)/vmlinux: + linuxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") ;\ + cp $$linuxDir/vmlinux $@ ;\ -$(IMAGES)/busybox: $(call busyboxDir)/busybox - cp $< $@ +$(IMAGES)/busybox: + busyboxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$") ;\ + cp $$busyboxDir/busybox $@ ;\ # Generating new Buildroot directories -------------------------------- diff --git a/linux/devicetree/wally-vcu118.dts b/linux/devicetree/wally-vcu118.dts index e0257c9a6..20448609b 100644 --- a/linux/devicetree/wally-vcu118.dts +++ b/linux/devicetree/wally-vcu118.dts @@ -31,6 +31,7 @@ status = "okay"; compatible = "riscv"; riscv,isa = "rv64imafdcsu"; + riscv,isa-extensions = "imafdc", "sstc", "svinval", "svnapot", "svpbmt", "zba", "zbb", "zbc", "zbs", "zicbom", "zicbop", "zicbopz", "zicntr", "zicsr", "zifencei", "zihpm"; mmu-type = "riscv,sv48"; interrupt-controller { diff --git a/sim/imperas.ic b/sim/imperas.ic index adb10dcad..6ebc7a5b4 100644 --- a/sim/imperas.ic +++ b/sim/imperas.ic @@ -22,6 +22,9 @@ --override cpu/Zicbom=T --override cpu/Zicbop=T --override cpu/Zicboz=T +--override cmomp_bytes=64 # Zic64b +--override cmoz_bytes=64 # Zic64b +--override lr_sc_grain=64 # Za64rs # 64 KiB continuous huge pages supported --override cpu/Svpbmt=T @@ -40,7 +43,7 @@ --override cpu/reset_address=0x80000000 ---override cpu/unaligned=F +--override cpu/unaligned=T # Zicclsm (should be true) --override cpu/ignore_non_leaf_DAU=1 --override cpu/wfi_is_nop=T --override cpu/misa_Extensions_mask=0x0 @@ -88,7 +91,7 @@ # Add Imperas simulator application instruction tracing --verbose ---trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange --traceafter 0 +#--trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange --traceafter 300000000 --override cpu/debugflags=6 --override cpu/verbose=1 --override cpu/show_c_prefix=T diff --git a/sim/run-imperas-linux.sh b/sim/run-imperas-linux.sh index fd265cb9e..6a49f46e9 100755 --- a/sim/run-imperas-linux.sh +++ b/sim/run-imperas-linux.sh @@ -7,4 +7,4 @@ export OTHERFLAGS="+TRACE2LOG_ENABLE=1" #export OTHERFLAGS="+TRACE2LOG_ENABLE=1 +TRACE2LOG_AFTER=10500000" export OTHERFLAGS="" -vsim -c -do "do wally-linux-imperas.do buildroot buildroot-no-trace $::env(RISCV) 0 0 0" +vsim -c -do "do wally-linux-imperas.do buildroot buildroot-no-trace $::env(RISCV) 0 0 0" diff --git a/sim/wally-linux-imperas.do b/sim/wally-linux-imperas.do index f173f67c9..196c780be 100644 --- a/sim/wally-linux-imperas.do +++ b/sim/wally-linux-imperas.do @@ -40,6 +40,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { #-- Run the Simulation #run -all + run 7000 ms add log -recursive /* do linux-wave.do run -all @@ -87,9 +88,10 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { #run 100 ns #force -deposit testbench/dut/core/priv/priv/csr/csri/IE_REGW 16'h2aa #force -deposit testbench/dut/uncore/uncore/clint/clint/MTIMECMP 64'h1000 + run 7000 ms + add log -recursive /testbench/dut/* + do wave.do run 14000 ms - #add log -recursive /* - #do linux-wave.do #run -all exec ./slack-notifier/slack-notifier.py diff --git a/src/cvw.sv b/src/cvw.sv index 8b3c87a59..6ee18c27f 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -271,15 +271,12 @@ typedef struct packed { int CORRSHIFTSZ; // division constants - int DIVN ; int LOGR ; int RK ; - int LOGRK ; int FPDUR ; int DURLEN ; int DIVb ; int DIVBLEN ; - int DIVa ; } cvw_t; diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 5c5fa0f57..a4e20f229 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -45,8 +45,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( input logic IntDivE, W64E, output logic DivStickyM, output logic FDivBusyE, IFDivStartE, FDivDoneE, - output logic [P.NE+1:0] QeM, - output logic [P.DIVb:0] QmM, + output logic [P.NE+1:0] UeM, // Exponent result + output logic [P.DIVb:0] UmM, // Significand result output logic [P.XLEN-1:0] FIntDivResultM ); @@ -67,17 +67,17 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [P.DIVBLEN:0] nM, mM; // Shift amounts + logic [P.DIVBLEN-1:0] IntNormShiftM; // Integer normalizatoin shift amount logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, + .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, - .BZeroM, .nM, .mM, .AM, + .BZeroM, .IntNormShiftM, .AM, .IntDivM, .W64M, .ALTBM, .AsM, .BsM); fdivsqrtfsm #(P) fdivsqrtfsm( // FSM @@ -94,8 +94,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, - .QmM, .WZeroE, .DivStickyM, + .UmM, .WZeroE, .DivStickyM, // Int-specific - .nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, + .IntNormShiftM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM, .FIntDivResultM); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index ed28c9355..1e6eda56c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -30,13 +30,11 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic IntDivE, - input logic [P.DIVBLEN:0] nE, + input logic [P.DIVBLEN-1:0] IntResultBitsE, output logic [P.DURLEN-1:0] CyclesE ); - logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits - // DIVN = P.NF+3 - // NS = NF + 1 - // N = NS or NS+2 for div/sqrt. + + logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits /* verilator lint_off WIDTH */ if (P.FPSIZES == 1) @@ -64,12 +62,21 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( P.Q_FMT: Nf = P.Q_NF; endcase + // Cycle logic + // P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk. + // Integer division needs p fractional + r integer result bits + // FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits + // FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle. + // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk) + always_comb begin - if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below? - // if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 - else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); - else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES); + if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1 + else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits + + if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; + else ResultBitsE = FPResultBitsE; + + CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv index 5531276df..cf243a84b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -28,16 +28,19 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] Fmt, - input logic [P.NE-1:0] Xe, Ye, + input logic [P.NE-1:0] Xe, Ye, // input exponents input logic Sqrt, input logic XZero, - input logic [P.DIVBLEN:0] ell, m, - output logic [P.NE+1:0] Qe + input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye + output logic [P.NE+1:0] Ue // result exponent ); + logic [P.NE-2:0] Bias; logic [P.NE+1:0] SXExp; logic [P.NE+1:0] SExp; logic [P.NE+1:0] DExp; + + // Determine exponent bias according to the format if (P.FPSIZES == 1) begin assign Bias = (P.NE-1)'(P.BIAS); @@ -63,10 +66,14 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( 2'h2: Bias = (P.NE-1)'(P.H_BIAS); endcase end + + // Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS); assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias}; - // correct exponent for subnormal input's normalization shifts + // division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias}); - assign Qe = Sqrt ? SExp : DExp; + + // Select square root or division exponent + assign Ue = Sqrt ? SExp : DExp; endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtfgen2.sv b/src/fpu/fdivsqrt/fdivsqrtfgen2.sv index 990e3f19f..cf398f570 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfgen2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfgen2.sv @@ -28,12 +28,12 @@ module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) ( input logic up, uz, - input logic [P.DIVb+3:0] C, U, UM, - output logic [P.DIVb+3:0] F + input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms) + output logic [P.DIVb+3:0] F // Q4.DIVb ); - logic [P.DIVb+3:0] FP, FN, FZ; + logic [P.DIVb+3:0] FP, FN, FZ; // Q4.DIVb - // Generate for both positive and negative bits + // Generate for both positive and negative quotient digits assign FP = ~(U << 1) & C; assign FN = (UM << 1) | (C & ~(C << 2)); assign FZ = '0; diff --git a/src/fpu/fdivsqrt/fdivsqrtfgen4.sv b/src/fpu/fdivsqrt/fdivsqrtfgen4.sv index fc648f5bd..e2cec1ab4 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfgen4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfgen4.sv @@ -27,14 +27,14 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) ( - input logic [3:0] udigit, - input logic [P.DIVb+3:0] C, U, UM, - output logic [P.DIVb+3:0] F + input logic [3:0] udigit, // {2, 1, -1, -2}; all cold for zero + input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms) + output logic [P.DIVb+3:0] F // Q4.DIVb ); - logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2; + logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2; // Q4.DIVb - // Generate for both positive and negative bits - assign F2 = (~U << 2) & (C << 2); + // Generate for both positive and negative digits + assign F2 = (~U << 2) & (C << 2); // assign F1 = ~(U << 1) & C; assign F0 = '0; assign FN1 = (UM << 1) | (C & ~(C << 3)); diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 0e2cba90e..862d53b25 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -57,7 +57,7 @@ module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) ( // terminate immediately on special cases assign FSpecialCaseE = XZeroE | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE; if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE; - else assign SpecialCaseE = FSpecialCaseE; + else assign SpecialCaseE = FSpecialCaseE; flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc always_ff @(posedge clk) begin diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index 1d40e8d9a..863d94837 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -31,31 +31,31 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( input logic IFDivStartE, input logic FDivBusyE, input logic SqrtE, - input logic [P.DIVb+3:0] X, D, - output logic [P.DIVb:0] FirstU, FirstUM, - output logic [P.DIVb+1:0] FirstC, + input logic [P.DIVb+3:0] X, D, // Q4.DIVb + output logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb + output logic [P.DIVb+1:0] FirstC, // Q2.DIVb output logic Firstun, - output logic [P.DIVb+3:0] FirstWS, FirstWC + output logic [P.DIVb+3:0] FirstWS, FirstWC // Q4.DIVb ); /* verilator lint_off UNOPTFLAT */ - logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b - logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b - logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b - logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b - logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b - logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b - logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b - logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b - logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b - logic [P.DIVb+1:0] initC; // Q2.b + logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.DIVb + logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.DIVb + logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.DIVb + logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.DIVb + logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.DIVb + logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.DIVb + logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.DIVb + logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.DIVb + logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.DIVb + logic [P.DIVb+1:0] initC; // Q2.DIVb logic [P.DIVCOPIES-1:0] un; - logic [P.DIVb+3:0] WSN, WCN; // Q4.b - logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b - logic [P.DIVb+1:0] NextC; - logic [P.DIVb:0] UMux, UMMux; - logic [P.DIVb:0] initU, initUM; + logic [P.DIVb+3:0] WSN, WCN; // Q4.DIVb + logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.DIVb + logic [P.DIVb+1:0] NextC; // Q2.DIVb + logic [P.DIVb:0] UMux, UMMux; // U1.DIVb + logic [P.DIVb:0] initU, initUM; // U1.DIVb /* verilator lint_on UNOPTFLAT */ // Top Muxes and Registers @@ -104,14 +104,14 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( for(i=0; $unsigned(i)>> P.LOGR; - assign UnsignedQuotM = {3'b000, PreQmM}; + assign UnsignedQuotM = {3'b000, PreUmM}; // Integer remainder: sticky and sign correction muxes assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative @@ -110,9 +109,8 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM); // Select quotient or remainder and do normalization shift - mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM); mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM); - assign PreIntResultM = $signed(PreResultM >>> NormShiftM); + assign PreIntResultM = $signed(PreResultM >>> IntNormShiftM); // special case logic // terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B| @@ -120,7 +118,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( if (BZeroM) begin // Divide by zero if (RemOpM) IntDivResultM = AM; else IntDivResultM = {(P.XLEN){1'b1}}; - end else if (ALTBM) begin // Numerator is zero + end else if (ALTBM) begin // Numerator is small if (RemOpM) IntDivResultM = AM; else IntDivResultM = '0; end else IntDivResultM = PreIntResultM[P.XLEN-1:0]; diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 6c397576a..145bf9a68 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -29,37 +29,39 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic IFDivStartE, - input logic [P.NF:0] Xm, Ym, - input logic [P.NE-1:0] Xe, Ye, + input logic [P.NF:0] Xm, Ym, // Floating-point significands + input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents input logic [P.FMTBITS-1:0] FmtE, input logic SqrtE, input logic XZeroE, input logic [2:0] Funct3E, - output logic [P.NE+1:0] QeM, - output logic [P.DIVb+3:0] X, D, + output logic [P.NE+1:0] UeM, // biased exponent of result + output logic [P.DIVb+3:0] X, D, // Q4.DIVb // Int-specific - input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU input logic IntDivE, W64E, + // Outputs output logic ISpecialCaseE, output logic [P.DURLEN-1:0] CyclesE, - output logic [P.DIVBLEN:0] nM, mM, + output logic [P.DIVBLEN-1:0] IntNormShiftM, output logic ALTBM, IntDivM, W64M, output logic AsM, BsM, BZeroM, output logic [P.XLEN-1:0] AM ); - logic [P.DIVb-1:0] Xfract, Dfract; - logic [P.DIVb:0] PreSqrtX; + logic [P.DIVb:0] Xnorm, Dnorm; logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed - logic [P.NE+1:0] QeE; // Quotient Exponent (FP only) - logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [P.NE+1:0] UeE; // Result Exponent (FP only) + logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input + logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs + logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic SignedDivE; // signed division logic AsE, BsE; // Signs of integer inputs logic [P.XLEN-1:0] AE; // input A after W64 adjustment - logic ALTBE; + logic ALTBE; + logic EvenExp; ////////////////////////////////////////////////////// // Integer Preprocessing @@ -89,12 +91,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB); // Select integer or floating point inputs - mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX); - mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD); + mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX); + mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD); mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); end else begin // Int not supported - assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}}; - assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}}; + assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}}; + assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}}; assign NumerZeroE = XZeroE; end @@ -103,12 +105,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////// // count leading zeros for Subnorm FP and to normalize integer inputs - lzc #(P.DIVb) lzcX (IFX, ell); - lzc #(P.DIVb) lzcY (IFD, mE); + lzc #(P.DIVb+1) lzcX (IFX, ell); + lzc #(P.DIVb+1) lzcY (IFD, mE); - // Normalization shift: shift off leading one - assign Xfract = (IFX << ell) << 1; - assign Dfract = (IFD << mE) << 1; + // Normalization shift: shift leading one into most significant bit + assign Xnorm = (IFX << ell); + assign Dnorm = (IFD << mE); ////////////////////////////////////////////////////// // Integer Right Shift to digit boundary @@ -117,31 +119,28 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////// if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported - logic [P.DIVBLEN:0] ZeroDiff, p; + logic [P.DIVBLEN-1:0] ZeroDiff, p; // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros - assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros) - mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p); + assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros) + mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p); + + /* verilator lint_off WIDTH */ + assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) + /* verilator lint_on WIDTH */ // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; - // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps - - if (P.LOGRK > 0) begin // more than 1 bit per cycle - logic [P.LOGRK-1:0] IntTrunc, RightShiftX; - logic [P.DIVBLEN:0] TotalIntBits, IntSteps; - /* verilator lint_off WIDTH */ - assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) - assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator - assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div - assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits - assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount - assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + // calculate right shift amount RightShiftX to complete in discrete number of steps + if (P.RK > 1) begin // more than 1 bit per cycle + logic [$clog2(P.RK)-1:0] RightShiftX; + /* verilator lint_offf WIDTH */ + assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount + assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting - assign nE = p; assign DivXShifted = DivX; end end else begin @@ -150,22 +149,53 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////// // Floating-Point Preprocessing - // append leading 1 (for nonzero inputs) + // Extend to Q4.b format // shift square root to be in range [1/4, 1) // Normalized numbers are shifted right by 1 if the exponent is odd // Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. - // NOTE: there might be a discrepancy that X is never right shifted by 2. However - // it comes out in the wash and gives the right answer. Investigate later if possible. - ////////////////////////////////////////////////////// + ////////////////////////////////////////////////////// - assign DivX = {3'b000, ~NumerZeroE, Xfract}; + assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division // Sqrt is initialized on step one as R(X-1), so depends on Radix - mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); - if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; - else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; - mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); - + // If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter + // Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2) + // Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even + // Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits + // Then multiply by R is left shift by r (1 or 2 for radix 2 or 4) + // This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4 + // Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction. + // This also means only one extra fractional bit is needed becaue we never shift right by more than 1. + // Radix Exponent odd Exponent Even + // 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1) + // 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1) + // Summary: PreSqrtX = r(x/2or4 - 1) + + logic [P.DIVb:0] PreSqrtX; + assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even + mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even + if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1) + else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; // 2PreSqrtX - 4 = 4(PreSqrtX/2 - 1) + +/* + // Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift + // This saves one bit in DIVb because there is no initial right shift. + // However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s. + // That is an optimization for another day. + if (P.RADIX == 2) begin + logic [P.DIVb:0] PreSqrtX; // U1.DIVb + mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even + assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1) + end else begin + logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb + mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even + assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1) + end +*/ + + // Initialize X for division or square root + mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); + ////////////////////////////////////////////////////// // Selet integer or floating-point operands ////////////////////////////////////////////////////// @@ -176,28 +206,37 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( assign X = PreShiftX; end - // Divisior register - flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D); + // Divisior register + flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D); // Floating-point exponent - fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); - flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM); + fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Ue(UeE)); + flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); + fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs + logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; + logic RemOpE; + + /* verilator lint_off WIDTH */ + assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain + assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift + /* verilator lint_on WIDTH */ + assign RemOpE = Funct3E[1]; + mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE); + // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); - flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); - flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); - flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); - flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); - flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); - flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM); + flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM); + flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM); if (P.XLEN==64) - flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/src/fpu/fdivsqrt/fdivsqrtstage2.sv index bb8d87234..40a2a5a01 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -29,33 +29,27 @@ /* verilator lint_off UNOPTFLAT */ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) ( - input logic [P.DIVb+3:0] D, DBar, - input logic [P.DIVb:0] U, UM, - input logic [P.DIVb+3:0] WS, WC, - input logic [P.DIVb+1:0] C, - input logic SqrtE, - output logic un, - output logic [P.DIVb+1:0] CNext, - output logic [P.DIVb:0] UNext, UMNext, - output logic [P.DIVb+3:0] WSNext, WCNext + input logic [P.DIVb+3:0] D, DBar, // Q4.DIVb + input logic [P.DIVb:0] U, UM, // U1.DIVb + input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb + input logic [P.DIVb+1:0] C, // Q2.DIVb + input logic SqrtE, + output logic un, + output logic [P.DIVb+1:0] CNext, // Q2.DIVb + output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb + output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb ); /* verilator lint_on UNOPTFLAT */ - logic [P.DIVb+3:0] Dsel; - logic up, uz; - logic [P.DIVb+3:0] F; - logic [P.DIVb+3:0] AddIn; - logic [P.DIVb+3:0] WSA, WCA; + logic [P.DIVb+3:0] Dsel; // Q4.DIVb + logic up, uz; + logic [P.DIVb+3:0] F; // Q4.DIVb + logic [P.DIVb+3:0] AddIn; // Q4.DIVb + logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb - // Qmient Selection logic + // Quotient Selection logic // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un) - // q encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 - fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un); + fdivsqrtuslc2 uslc2(.WS(WS[P.DIVb+3:P.DIVb]), .WC(WC[P.DIVb+3:P.DIVb]), .up, .uz, .un); // Sqrt F generation. Extend C, U, UM to Q4.k fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); @@ -66,7 +60,7 @@ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) ( else if (uz) Dsel = '0; else Dsel = D; // un - // Partial Product Generation + // Residual Update // WSA, WCA = WS + WC - qD mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn); csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA); diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index c6477ec68..a24c1155f 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -27,40 +27,33 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( - input logic [P.DIVb+3:0] D, DBar, D2, DBar2, - input logic [P.DIVb:0] U,UM, - input logic [P.DIVb+3:0] WS, WC, - input logic [P.DIVb+1:0] C, - input logic SqrtE, j1, - output logic [P.DIVb+1:0] CNext, - output logic un, - output logic [P.DIVb:0] UNext, UMNext, - output logic [P.DIVb+3:0] WSNext, WCNext + input logic [P.DIVb+3:0] D, DBar, D2, DBar2, // Q4.DIVb + input logic [P.DIVb:0] U,UM, // U1.DIVb + input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb + input logic [P.DIVb+1:0] C, // Q2.DIVb + input logic SqrtE, j1, + output logic [P.DIVb+1:0] CNext, // Q2.DIVb + output logic un, + output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb + output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb ); - logic [P.DIVb+3:0] Dsel; - logic [3:0] udigit; - logic [P.DIVb+3:0] F; - logic [P.DIVb+3:0] AddIn; - logic [4:0] Smsbs; - logic [2:0] Dmsbs; - logic [7:0] WCmsbs, WSmsbs; - logic CarryIn; - logic [P.DIVb+3:0] WSA, WCA; + logic [P.DIVb+3:0] Dsel; // Q4.DIVb + logic [3:0] udigit; // {+2, +1, -1, -2} or 0000 for 0 + logic [P.DIVb+3:0] F; // Q4.DIVb + logic [P.DIVb+3:0] AddIn; // Q4.DIVb + logic [4:0] Smsbs; // U1.4 + logic [2:0] Dmsbs; // U0.3 drop leading 1 from D + logic [7:0] WCmsbs, WSmsbs; // U4.4 + logic CarryIn; + logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb // Digit Selection logic - // u encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 - assign Smsbs = U[P.DIVb:P.DIVb-4]; - assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; - assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; - assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; - - fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit); + assign Smsbs = U[P.DIVb:P.DIVb-4]; // U1.4 most significant bits of square root + assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1 + assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual + assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual + fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit); assign un = 1'b0; // unused for radix 4 // F generation logic diff --git a/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv b/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv index bde28cfba..c895fa2ce 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuotfc2.sv @@ -31,15 +31,15 @@ /////////////////////////////// module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) ( input logic up, un, - input logic [P.DIVb+1:0] C, - input logic [P.DIVb:0] U, UM, - output logic [P.DIVb:0] UNext, UMNext + input logic [P.DIVb+1:0] C, // Q2.DIVb + input logic [P.DIVb:0] U, UM, // U1.DIVb + output logic [P.DIVb:0] UNext, UMNext // U1.DIVb ); // The on-the-fly converter transfers the divsqrt // bits to the quotient as they come. - logic [P.DIVb:0] K; + logic [P.DIVb:0] K; // U1.DIVb one-hot - assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding + assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding always_comb begin if (up) begin diff --git a/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv b/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv index 403ccf051..b12b9174b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuotfc4.sv @@ -28,15 +28,15 @@ module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) ( input logic [3:0] udigit, - input logic [P.DIVb:0] U, UM, - input logic [P.DIVb:0] C, - output logic [P.DIVb:0] UNext, UMNext + input logic [P.DIVb:0] U, UM, // U1.DIVb + input logic [P.DIVb:0] C, // Q1.DIVb + output logic [P.DIVb:0] UNext, UMNext // U1.DIVb ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. // Use this otfc for division and square root. - logic [P.DIVb:0] K1, K2, K3; + logic [P.DIVb:0] K1, K2, K3; // U1.DIVb assign K1 = (C&~(C << 1)); // K assign K2 = ((C << 1)&~(C << 2)); // 2K assign K3 = (C & ~(C << 2)); // 3K diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel2.sv b/src/fpu/fdivsqrt/fdivsqrtuslc2.sv similarity index 57% rename from src/fpu/fdivsqrt/fdivsqrtqsel2.sv rename to src/fpu/fdivsqrt/fdivsqrtuslc2.sv index fe32924e1..e4fcfeadf 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc2.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrtqsel2.sv +// fdivsqrtuslc2.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu // Modified:13 January 2022 // -// Purpose: Radix 2 Quotient Digit Selection +// Purpose: Radix 2 Unified Quotient/Square Root Digit Selection // // Documentation: RISC-V System on Chip Design Chapter 13 // @@ -18,7 +18,7 @@ // except in compliance with the License, or, at your option, the Apache License version 2.0. You // may obtain a copy of the License at // -// https://solderpad.org/licenses/SHL-2.1/ +// httWS://solderpad.org/licenses/SHL-2.1/ // // Unless required by applicable law or agreed to in writing, any work distributed under the // License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, @@ -26,31 +26,26 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module fdivsqrtqsel2 ( - input logic [3:0] ps, pc, - output logic up, uz, un +module fdivsqrtuslc2 ( + input logic [3:0] WS, WC, // Q4.0 most significant bits of redundant residual + output logic up, uz, un // {+1, 0, -1} ); - logic [3:0] p, g; - logic magnitude, sign; + logic sign; + + // Carry chain logic determines if W = WS + WC = -1, < -1, > -1 to choose 0, -1, 1 respectively - // The quotient selection logic is presented for simplicity, not - // for efficiency. You can probably optimize your logic to - // select the proper divisor with less delay. + //if p2 * p1 * p0, W = -1 and choose digit of 0 + assign uz = ((WS[2]^WC[2]) & (WS[1]^WC[1]) & + (WS[0]^WC[0])); - // Quotient equations from EE371 lecture notes 13-20 - assign p = ps ^ pc; - assign g = ps & pc; - - assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) & - (ps[0]^pc[0])); - assign sign = (ps[3]^pc[3])^ - (ps[2] & pc[2] | ((ps[2]^pc[2]) & - (ps[1]&pc[1] | ((ps[1]^pc[1]) & - (ps[0]&pc[0]))))); + // Otherwise determine sign using carry chain: sign = p3 ^ g_2:0 + assign sign = (WS[3]^WC[3])^ + (WS[2] & WC[2] | ((WS[2]^WC[2]) & + (WS[1]&WC[1] | ((WS[1]^WC[1]) & + (WS[0]&WC[0]))))); // Produce digit = +1, 0, or -1 - assign up = magnitude & ~sign; - assign uz = ~magnitude; - assign un = magnitude & sign; + assign up = ~uz & ~sign; + assign un = ~uz & sign; endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4.sv similarity index 72% rename from src/fpu/fdivsqrt/fdivsqrtqsel4.sv rename to src/fpu/fdivsqrt/fdivsqrtuslc4.sv index de520bef2..b44b34a35 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrtqsel4.sv +// fdivsqrtuslc4.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu // Modified:13 January 2022 // -// Purpose: Radix 4 Quotient Digit Selection +// Purpose: Table-based Radix 4 Unified Quotient/Square Root Digit Selection // // Documentation: RISC-V System on Chip Design Chapter 13 // @@ -26,25 +26,25 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module fdivsqrtqsel4 ( - input logic [2:0] Dmsbs, - input logic [4:0] Smsbs, - input logic [7:0] WSmsbs, WCmsbs, +module fdivsqrtuslc4 ( + input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 + input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation + input logic [7:0] WSmsbs, WCmsbs, // Q4.4 redundant residual most significant bits input logic Sqrt, j1, - output logic [3:0] udigit + output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot ); - logic [6:0] Wmsbs; - logic [7:0] PreWmsbs; - logic [2:0] A; + logic [7:0] PreWmsbs; // Q4.4 nonredundant residual msbs + logic [6:0] Wmsbs; // Q4.3 truncated nonredundant residual + logic [2:0] A; // U0.3 upper bits of D or Smsbs, discarding integer bit - assign PreWmsbs = WCmsbs + WSmsbs; - assign Wmsbs = PreWmsbs[7:1]; + assign PreWmsbs = WCmsbs + WSmsbs; // add redundant residual to find msbs + assign Wmsbs = PreWmsbs[7:1]; // truncate least significant bit to Q4.3 to index table // D = 0001.xxx... // Dmsbs = | | // W = xxxx.xxx... // Wmsbs = | | - logic [3:0] USel4[1023:0]; + logic [3:0] USel4[1023:0]; // 1024-bit table indexed with 3 bits of A and 7 bits of Wmsbs // Prepopulate selection table; this is constant at compile time always_comb begin @@ -101,10 +101,10 @@ module fdivsqrtqsel4 ( // Select A always_comb if (Sqrt) begin - if (j1) A = 3'b101; - else if (Smsbs == 5'b10000) A = 3'b111; - else A = Smsbs[2:0]; - end else A = Dmsbs; + if (j1) A = 3'b101; // on first sqrt iteration A = .101 + else if (Smsbs == 5'b10000) A = 3'b111; // if S = 1.0, use A = .111 + else A = Smsbs[2:0]; // otherwise use A = 2S (in U0.3 format) + end else A = Dmsbs; // division Unless A = D (IN U0.3 format, dropping leading 1) // Select quotient digit from lookup table based on A and W assign udigit = USel4[{A,Wmsbs}]; diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv similarity index 85% rename from src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv rename to src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv index 0eb3b71c0..ccb5e618a 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrtqsel4cmp.sv +// fdivsqrtuslc4cmp.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu // Modified:13 January 2022 // -// Purpose: Comparator-based Radix 4 Quotient Digit Selection +// Purpose: Comparator-based Radix 4 Unified Quotient/Square Root Digit Selection // // Documentation: RISC-V System on Chip Design Chapter 13 // @@ -26,12 +26,12 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module fdivsqrtqsel4cmp ( - input logic [2:0] Dmsbs, - input logic [4:0] Smsbs, - input logic [7:0] WSmsbs, WCmsbs, +module fdivsqrtuslc4cmp ( + input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1 + input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation + input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits input logic SqrtE, j1, - output logic [3:0] udigit + output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot ); logic [6:0] Wmsbs; logic [7:0] PreWmsbs; diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index f71999471..ffd9cf49a 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -133,8 +133,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU) // divide signals - logic [P.DIVb:0] QmM; // fdivsqrt signifcand - logic [P.NE+1:0] QeM; // fdivsqrt exponent + logic [P.DIVb:0] UmM; // fdivsqrt signifcand + logic [P.NE+1:0] UeM; // fdivsqrt exponent logic DivStickyM; // fdivsqrt sticky bit logic FDivDoneE, IFDivStartE; // fdivsqrt control signals logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU) @@ -242,8 +242,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, - .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, - .QmM, .FIntDivResultM); + .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM, + .UmM, .FIntDivResultM); // compare: fmin/fmax, flt/fle/feq fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), @@ -326,9 +326,9 @@ module fpu import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////////////////////////////////////////////////////// postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), - .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), + .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivUm(UmM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), - .FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), + .FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); diff --git a/src/fpu/postproc/divshiftcalc.sv b/src/fpu/postproc/divshiftcalc.sv index d560714db..380f8f5e6 100644 --- a/src/fpu/postproc/divshiftcalc.sv +++ b/src/fpu/postproc/divshiftcalc.sv @@ -27,8 +27,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module divshiftcalc import cvw::*; #(parameter cvw_t P) ( - input logic [P.DIVb:0] DivQm, // divsqrt significand - input logic [P.NE+1:0] DivQe, // divsqrt exponent + input logic [P.DIVb:0] DivUm, // divsqrt significand + input logic [P.NE+1:0] DivUe, // divsqrt exponent output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input output logic DivResSubnorm, // is the divsqrt result subnormal @@ -41,23 +41,23 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) ( // is the result subnormal // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes - assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]); + assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]); // if the result is subnormal - // 00000000x.xxxxxx... Exp = DivQe - // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 - // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 - // Left shift amount = DivQe+NF+1-1 - assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe; + // Left shift amount = DivUe+NF+1-1 + assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe; assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1]; // if the result is normalized - // 00000000x.xxxxxx... Exp = DivQe - // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 - // 00000000.xxxxxxx... << NF Exp = DivQe+1 - // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards) - // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) + // 00000000x.xxxxxx... Exp = DivUe + // .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivUe+1 + // 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after) // inital Left shift amount = NF // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF); @@ -68,5 +68,5 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) ( assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; // pre-shift the divider result for normalization - assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}}; + assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}}; endmodule diff --git a/src/fpu/postproc/postprocess.sv b/src/fpu/postproc/postprocess.sv index ee96b34d2..05db352cd 100644 --- a/src/fpu/postproc/postprocess.sv +++ b/src/fpu/postproc/postprocess.sv @@ -48,8 +48,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic DivSticky, // divider sticky bit - input logic [P.NE+1:0] DivQe, // divsqrt exponent - input logic [P.DIVb:0] DivQm, // divsqrt significand + input logic [P.NE+1:0] DivUe, // divsqrt exponent + input logic [P.DIVb:0] DivUm, // divsqrt significand // conversion signals input logic CvtCs, // the result's sign input logic [P.NE:0] CvtCe, // the calculated expoent @@ -91,7 +91,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // division singals logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input - logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift + logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift logic DivByZero; // divide by zero flag logic DivResSubnorm; // is the divsqrt result subnormal logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) @@ -146,7 +146,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); // select which unit's output to shift always_comb @@ -174,7 +174,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // correct for LZA/divsqrt error shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp, - .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf); + .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivUe, .Ue, .FmaSZero, .Shifted, .FmaMe, .Mf); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -189,7 +189,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) ( // calulate result sign used in rounding unit roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); - round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, + round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Ue, .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf, .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index 0a5d9ecc5..e01ff376b 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -39,7 +39,7 @@ module round import cvw::*; #(parameter cvw_t P) ( // divsqrt input logic DivOp, // is a division opperation being done input logic DivSticky, // divsqrt sticky bit - input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent + input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent // cvt input logic CvtOp, // is a convert opperation being done input logic ToInt, // is the cvt op a cvt to integer @@ -300,8 +300,8 @@ module round import cvw::*; #(parameter cvw_t P) ( case(PostProcSel) 2'b10: Me = FmaMe; // fma 2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt - // 2'b01: Me = DivDone ? Qe : '0; // divide - 2'b01: Me = Qe; // divide + // 2'b01: Me = DivDone ? Ue : '0; // divide + 2'b01: Me = Ue; // divide default: Me = '0; endcase diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index 9e0473667..f5860b42d 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -31,7 +31,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // divsqrt input logic DivOp, // is it a divsqrt opperation input logic DivResSubnorm, // is the divsqrt result subnormal - input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent + input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) //fma input logic FmaOp, // is it an fma opperation @@ -41,7 +41,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // output output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction - output logic [P.NE+1:0] Qe // corrected exponent for divider + output logic [P.NE+1:0] Ue // corrected exponent for divider ); logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction @@ -61,7 +61,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // correct the shifting of the divsqrt caused by producing a result in (2, .5] range // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) - assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1)); + assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1)); assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2]; assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1]; mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); @@ -87,5 +87,5 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // the quotent is in the range [.5,2) if there is no early termination // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift - assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1}; + assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1}; endmodule diff --git a/src/fpu/unpackinput.sv b/src/fpu/unpackinput.sv index c551e8173..b3d7f901e 100644 --- a/src/fpu/unpackinput.sv +++ b/src/fpu/unpackinput.sv @@ -83,7 +83,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing always_comb if (BadNaNBox) begin -// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; end else PostBox = In; @@ -143,8 +142,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( if (BadNaNBox) begin case (Fmt) P.FMT: PostBox = In; -// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]}; -// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]}; P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}}; P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}}; default: PostBox = 'x; @@ -230,9 +227,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) ( if (BadNaNBox) begin case (Fmt) 2'b11: PostBox = In; -// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]}; -// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]}; -// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]}; 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}}; 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}}; 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}}; diff --git a/src/generic/mem/rom1p1r.sv b/src/generic/mem/rom1p1r.sv index 93f8c82df..617a779ff 100644 --- a/src/generic/mem/rom1p1r.sv +++ b/src/generic/mem/rom1p1r.sv @@ -33,7 +33,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) ); // Core Memory - logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; + (*rom_style="block" *) logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; // dh 10/30/23 ROM macros are presently commented out // because they don't point to a generated ROM @@ -41,15 +41,23 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) rom1p1r_128x64 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout)); end if ((`USE_SRAM == 1) & (ADDR_WDITH == 7) & (DATA_WIDTH == 32)) begin - rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout)); + rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout)); - end else begin */ - always @ (posedge clk) - if(ce) dout <= ROM[addr]; + end else begin */ + + initial begin + if (PRELOAD_ENABLED) begin + $readmemh("$WALLY/fpga/src/boot.mem", ROM, 0); + end + end + + always @ (posedge clk) begin + if(ce) dout <= ROM[addr]; + end // for FPGA, initialize with zero-stage bootloader - if(PRELOAD_ENABLED) begin + /*if(PRELOAD_ENABLED) begin initial begin ROM[0]=64'h8001819300002197; ROM[1]=64'h4281420141014081; @@ -195,6 +203,6 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) ROM[141]=64'h0000808241010113; end // if (PRELOAD_ENABLED) - end + end*/ endmodule diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index cb70605c0..12bd83bc5 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -26,8 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module hazard ( - // Detect hazards +module hazard import cvw::*; #(parameter cvw_t P) ( input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, input logic LSUStallM, IFUStallF, diff --git a/src/ieu/datapath.sv b/src/ieu/datapath.sv index bb7638514..8c366a2ef 100644 --- a/src/ieu/datapath.sv +++ b/src/ieu/datapath.sv @@ -131,7 +131,7 @@ module datapath import cvw::*; #(parameter cvw_t P) ( if (P.F_SUPPORTED) begin:fpmux mux2 #(P.XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM); mux2 #(P.XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW); - if (P.IDIV_ON_FPU) begin + if (P.IDIV_ON_FPU & P.F_SUPPORTED) begin mux2 #(P.XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW); end else begin assign MulDivResultW = MDUResultW; diff --git a/src/ifu/irom.sv b/src/ifu/irom.sv index 0d4286e43..0b29c72cf 100644 --- a/src/ifu/irom.sv +++ b/src/ifu/irom.sv @@ -39,7 +39,9 @@ module irom import cvw::*; #(parameter cvw_t P) ( logic [31:0] RawIROMInstrF; logic [2:1] AdrD; - rom1p1r #(ADDR_WDITH, P.XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull)); + // preload IROM with the FPGA bootloader by default so that it syntehsizes to something, avoiding having the IEU optimized away because instructions are all 0 + // the testbench replaces these dummy contents with the actual program of interest during simulation + rom1p1r #(ADDR_WDITH, P.XLEN, 1) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull)); if (P.XLEN == 32) assign RawIROMInstrF = IROMInstrFFull; else begin // IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index ebda946a4..f1a757f91 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -92,7 +92,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit ); - localparam MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED; + localparam logic MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED; + localparam MLEN = MISALIGN_SUPPORT ? 2*P.LLEN : P.LLEN; // widen buffer for misaligned accessess logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer @@ -118,9 +119,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data /* verilator lint_off WIDTHEXPAND */ - logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data - logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] LSUWriteDataSpillM; // Final write data - logic [((MISALIGN_SUPPORT+1)*P.LLEN-1)/8:0] ByteMaskSpillM; // Selects which bytes within a word to write + logic [MLEN-1:0] DCacheReadDataWordM; // D$ read data + logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data + logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data diff --git a/src/mdu/mdu.sv b/src/mdu/mdu.sv index 83327a460..e152fc6de 100644 --- a/src/mdu/mdu.sv +++ b/src/mdu/mdu.sv @@ -57,7 +57,7 @@ module mdu import cvw::*; #(parameter cvw_t P) ( // Start a divide when a new division instruction is received and the divider isn't already busy or finishing // When IDIV_ON_FPU is set, use the FPU divider instead // In ZMMUL, with M_SUPPORTED = 0, omit the divider - if ((P.IDIV_ON_FPU) || (!P.M_SUPPORTED)) begin:nodiv + if ((P.IDIV_ON_FPU & P.F_SUPPORTED) || (!P.M_SUPPORTED)) begin:nodiv assign QuotM = 0; assign RemM = 0; assign DivBusyE = 0; diff --git a/src/uncore/spi_apb.sv b/src/uncore/spi_apb.sv index 4db435be6..b0649bf93 100644 --- a/src/uncore/spi_apb.sv +++ b/src/uncore/spi_apb.sv @@ -2,10 +2,14 @@ // spi_apb.sv // // Written: Naiche Whyte-Aguayo nwhyteaguayo@g.hmc.edu 11/16/2022 - // // Purpose: SPI peripheral -// See FU540-C000-v1.0 for specifications +// +// SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers. +// The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing +// to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output, +// along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY. +// Current limitations: Flash read sequencer mode not implemented, dual and quad mode not supported // // A component of the Wally configurable RISC-V project. // @@ -25,19 +29,6 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -// Current limitations: Flash read sequencer mode not implemented, dual and quad modes untestable with current test plan. - -// Attempt to move from >= comparisons by initializing in FSM differently -// Parameterize SynchFIFO -// look at ReadIncrement/WriteIncrement delay necessity - -/* -SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers. -The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing -to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output, -along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY. -*/ - module spi_apb import cvw::*; #(parameter cvw_t P) ( input logic PCLK, PRESETn, input logic PSEL, @@ -54,27 +45,27 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( output logic SPIIntr ); - //SPI control registers. Refer to SiFive FU540-C000 manual + // SPI control registers. Refer to SiFive FU540-C000 manual logic [11:0] SckDiv; - logic [1:0] SckMode; - logic [1:0] ChipSelectID; - logic [3:0] ChipSelectDef; - logic [1:0] ChipSelectMode; + logic [1:0] SckMode; + logic [1:0] ChipSelectID; + logic [3:0] ChipSelectDef; + logic [1:0] ChipSelectMode; logic [15:0] Delay0, Delay1; - logic [4:0] Format; - logic [7:0] ReceiveData; - logic [2:0] TransmitWatermark, ReceiveWatermark; - logic [8:0] TransmitData; - logic [1:0] InterruptEnable, InterruptPending; + logic [4:0] Format; + logic [7:0] ReceiveData; + logic [2:0] TransmitWatermark, ReceiveWatermark; + logic [8:0] TransmitData; + logic [1:0] InterruptEnable, InterruptPending; - //Bus interface signals + // Bus interface signals logic [7:0] Entry; logic Memwrite; logic [31:0] Din, Dout; - logic TransmitInactive; //High when there is no transmission, used as hardware interlock signal + logic TransmitInactive; // High when there is no transmission, used as hardware interlock signal - //FIFO FSM signals - //Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1] + // FIFO FSM signals + // Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1] logic TransmitWriteMark, TransmitReadMark, RecieveWriteMark, RecieveReadMark; logic TransmitFIFOWriteFull, TransmitFIFOReadEmpty; logic TransmitFIFOReadIncrement; @@ -83,75 +74,68 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( logic ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty; logic [7:0] TransmitFIFOReadData, ReceiveFIFOWriteData; logic [2:0] TransmitWriteWatermarkLevel, ReceiveReadWatermarkLevel; - logic [7:0] ReceiveShiftRegEndian; //reverses ReceiveShiftReg if Format[2] set (little endian transmission) + logic [7:0] ReceiveShiftRegEndian; // Reverses ReceiveShiftReg if Format[2] set (little endian transmission) - //Transmission signals + // Transmission signals logic sck; - logic [11:0] DivCounter; //counter for sck - logic SCLKenable; //flip flop enable high every sclk edge + logic [11:0] DivCounter; // Counter for sck + logic SCLKenable; // Flip flop enable high every sclk edge - //Delay signals - logic [8:0] ImplicitDelay1; //Adds implicit delay to cs-sck delay counter based on phase - logic [8:0] ImplicitDelay2; //Adds implicit delay to sck-cs delay counter based on phase - logic [8:0] CS_SCKCount; //Counter for cs-sck delay - logic [8:0] SCK_CSCount; //Counter for sck-cs delay - logic [8:0] InterCSCount; //Counter for inter cs delay - logic [8:0] InterXFRCount; //Counter for inter xfr delay - logic CS_SCKCompare; //Boolean comparison signal, high when CS_SCKCount >= cs-sck delay - logic SCK_CSCompare; //Boolean comparison signal, high when SCK_CSCount >= sck-cs delay - logic InterCSCompare; //Boolean comparison signal, high when InterCSCount >= inter cs delay - logic InterXFRCompare; //Boolean comparison signal, high when InterXFRCount >= inter xfr delay - logic ZeroDelayHoldMode; //High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0 + // Delay signals + logic [8:0] ImplicitDelay1; // Adds implicit delay to cs-sck delay counter based on phase + logic [8:0] ImplicitDelay2; // Adds implicit delay to sck-cs delay counter based on phase + logic [8:0] CS_SCKCount; // Counter for cs-sck delay + logic [8:0] SCK_CSCount; // Counter for sck-cs delay + logic [8:0] InterCSCount; // Counter for inter cs delay + logic [8:0] InterXFRCount; // Counter for inter xfr delay + logic ZeroDelayHoldMode; // High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0 - //Frame counting signals - logic [3:0] FrameCount; //Counter for number of frames in transmission - logic FrameCompare; //Boolean comparison signal, high when FrameCount = Format[7:4] - logic [3:0] ReceivePenultimateFrame; //Frame number - 1 - logic [3:0] ReceivePenultimateFrameCount; //Counter - logic ReceivePenultimateFrameBoolean; //High when penultimate frame in transmission has been reached + // Frame counting signals + logic [3:0] FrameCount; // Counter for number of frames in transmission + logic [3:0] ReceivePenultimateFrameCount; // Counter + logic ReceivePenultimateFrame; // High when penultimate frame in transmission has been reached - //State fsm signals - logic Active; //High when state is either Active1 or Active0 (during transmission) - logic Active0; //High when state is Active0 + // State fsm signals + logic Active; // High when state is either Active1 or Active0 (during transmission) + logic Active0; // High when state is Active0 - //Shift reg signals - logic ShiftEdge; //Determines which edge of sck to shift from TransmitShiftReg - logic [7:0] TransmitShiftReg; //Transmit shift register - logic [7:0] ReceiveShiftReg; //Receive shift register - logic SampleEdge; //Determines which edge of sck to sample from ReceiveShiftReg - logic [7:0] TransmitDataEndian; //Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB - logic TransmitShiftRegLoad; //Determines when to load TransmitShiftReg - logic ReceiveShiftFull; //High when receive shift register is full - logic TransmitShiftEmpty; //High when transmit shift register is empty - logic ShiftIn; //Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST) - logic [3:0] LeftShiftAmount; //Determines left shift amount to left-align data when little endian - logic [7:0] ASR; //AlignedReceiveShiftReg + // Shift reg signals + logic ShiftEdge; // Determines which edge of sck to shift from TransmitShiftReg + logic [7:0] TransmitShiftReg; // Transmit shift register + logic [7:0] ReceiveShiftReg; // Receive shift register + logic SampleEdge; // Determines which edge of sck to sample from ReceiveShiftReg + logic [7:0] TransmitDataEndian; // Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB + logic TransmitShiftRegLoad; // Determines when to load TransmitShiftReg + logic ReceiveShiftFull; // High when receive shift register is full + logic TransmitShiftEmpty; // High when transmit shift register is empty + logic ShiftIn; // Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST) + logic [3:0] LeftShiftAmount; // Determines left shift amount to left-align data when little endian + logic [7:0] ASR; // AlignedReceiveShiftReg - //CS signals - logic [3:0] ChipSelectAuto; //Assigns ChipSelect value to selected CS signal based on CS ID - logic [3:0] ChipSelectInternal; //Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef - logic DelayMode; //Determines where to place implicit half cycle delay based on sck phase for CS assertion + // CS signals + logic [3:0] ChipSelectAuto; // Assigns ChipSelect value to selected CS signal based on CS ID + logic [3:0] ChipSelectInternal; // Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef + logic DelayMode; // Determines where to place implicit half cycle delay based on sck phase for CS assertion - //Miscellaneous signals delayed/early by 1 PCLK cycle - logic ReceiveShiftFullDelay; //Delays ReceiveShiftFull signal by 1 PCLK cycle - logic TransmitFIFOWriteIncrementDelay; //TransmitFIFOWriteIncrement delayed by 1 PCLK cycle - logic ReceiveShiftFullDelayPCLK; //ReceiveShiftFull delayed by 1 PCLK cycle + // Miscellaneous signals delayed/early by 1 PCLK cycle + logic ReceiveShiftFullDelay; // Delays ReceiveShiftFull signal by 1 PCLK cycle + logic ReceiveShiftFullDelayPCLK; // ReceiveShiftFull delayed by 1 PCLK cycle logic TransmitFIFOReadEmptyDelay; - logic SCLKenableEarly; //SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0 + logic SCLKenableEarly; // SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0 - //APB access - assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses - assign Memwrite = PWRITE & PENABLE & PSEL; // only write in access phase - assign PREADY = TransmitInactive; // tie PREADY to transmission for hardware interlock + // APB access + assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses + assign Memwrite = PWRITE & PENABLE & PSEL; // Only write in access phase + assign PREADY = TransmitInactive; // Tie PREADY to transmission for hardware interlock - //Account for subword read/write circuitry + // Account for subword read/write circuitry // -- Note SPI registers are 32 bits no matter what; access them with LW SW. assign Din = PWDATA[31:0]; if (P.XLEN == 64) assign PRDATA = {Dout, Dout}; else assign PRDATA = Dout; - //Register access + // Register access always_ff@(posedge PCLK, negedge PRESETn) if (~PRESETn) begin SckDiv <= #1 12'd3; @@ -167,13 +151,12 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( ReceiveWatermark <= #1 3'b0; InterruptEnable <= #1 2'b0; InterruptPending <= #1 2'b0; - end else begin //writes - //According to FU540 spec: Once interrupt is pending, it will remain set until number - //of entries in tx/rx fifo is strictly more/less than tx/rxmark + end else begin // writes + /* verilator lint_off CASEINCOMPLETE */ if (Memwrite & TransmitInactive) - case(Entry) //flop to sample inputs + case(Entry) // flop to sample inputs 8'h00: SckDiv <= Din[11:0]; 8'h04: SckMode <= Din[1:0]; 8'h10: ChipSelectID <= Din[1:0]; @@ -188,18 +171,21 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( 8'h70: InterruptEnable <= Din[1:0]; endcase /* verilator lint_off CASEINCOMPLETE */ - //interrupt clearance + + // According to FU540 spec: Once interrupt is pending, it will remain set until number + // of entries in tx/rx fifo is strictly more/less than tx/rxmark InterruptPending[0] <= TransmitReadMark; InterruptPending[1] <= RecieveWriteMark; - case(Entry) // flop to sample inputs + + case(Entry) // Flop to sample inputs 8'h00: Dout <= #1 {20'b0, SckDiv}; 8'h04: Dout <= #1 {30'b0, SckMode}; 8'h10: Dout <= #1 {30'b0, ChipSelectID}; 8'h14: Dout <= #1 {28'b0, ChipSelectDef}; 8'h18: Dout <= #1 {30'b0, ChipSelectMode}; - 8'h28: Dout <= {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]}; - 8'h2C: Dout <= {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]}; - 8'h40: Dout <= {12'b0, Format[4:1], 13'b0, Format[0], 2'b0}; + 8'h28: Dout <= #1 {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]}; + 8'h2C: Dout <= #1 {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]}; + 8'h40: Dout <= #1 {12'b0, Format[4:1], 13'b0, Format[0], 2'b0}; 8'h48: Dout <= #1 {23'b0, TransmitFIFOWriteFull, 8'b0}; 8'h4C: Dout <= #1 {23'b0, ReceiveFIFOReadEmpty, ReceiveData[7:0]}; 8'h50: Dout <= #1 {29'b0, TransmitWatermark}; @@ -210,8 +196,9 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( endcase end - //SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1)) - //Generates a high signal at the rising and falling edge of SCLK by counting from 0 to SckDiv + // SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1)) + // Asserts SCLKenable at the rising and falling edge of SCLK by counting from 0 to SckDiv + // Active at 2x SCLK frequency to account for implicit half cycle delays and actions on both clock edges depending on phase assign SCLKenable = (DivCounter == SckDiv); assign SCLKenableEarly = ((DivCounter + 12'b1) == SckDiv); always_ff @(posedge PCLK, negedge PRESETn) @@ -219,44 +206,38 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( else if (SCLKenable) DivCounter <= 0; else DivCounter <= DivCounter + 12'b1; - //Boolean logic that tracks frame progression - assign FrameCompare = (FrameCount < Format[4:1]); - assign ReceivePenultimateFrameBoolean = ((FrameCount + 4'b0001) == Format[4:1]); + // Asserts when transmission is one frame before complete + assign ReceivePenultimateFrame = ((FrameCount + 4'b0001) == Format[4:1]); - //Computing delays + // Computing delays // When sckmode.pha = 0, an extra half-period delay is implicit in the cs-sck delay, and vice-versa for sck-cs assign ImplicitDelay1 = SckMode[0] ? 9'b0 : 9'b1; assign ImplicitDelay2 = SckMode[0] ? 9'b1 : 9'b0; - assign CS_SCKCompare = CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1); - assign SCK_CSCompare = SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2); - assign InterCSCompare = (InterCSCount >= ({Delay1[7:0],1'b0})); - assign InterXFRCompare = (InterXFRCount >= ({Delay1[15:8], 1'b0})); + // Calculate when tx/rx shift registers are full/empty + TransmitShiftFSM TransmitShiftFSM(PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0, TransmitShiftEmpty); + ReceiveShiftFSM ReceiveShiftFSM(PCLK, PRESETn, SCLKenable, ReceivePenultimateFrame, SampleEdge, SckMode[0], ReceiveShiftFull); - //Calculate when tx/rx shift registers are full/empty - TransmitShiftFSM TransmitShiftFSM_1 (PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0, TransmitShiftEmpty); - ReceiveShiftFSM ReceiveShiftFSM_1 (PCLK, PRESETn, SCLKenable, ReceivePenultimateFrameBoolean, SampleEdge, SckMode[0], ReceiveShiftFull); - - //Calculate tx/rx fifo write and recieve increment signals - assign TransmitFIFOWriteIncrement = (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive); + // Calculate tx/rx fifo write and recieve increment signals always_ff @(posedge PCLK, negedge PRESETn) - if (~PRESETn) TransmitFIFOWriteIncrementDelay <= 0; - else TransmitFIFOWriteIncrementDelay <= TransmitFIFOWriteIncrement; + if (~PRESETn) TransmitFIFOWriteIncrement <= 0; + else TransmitFIFOWriteIncrement <= (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive); always_ff @(posedge PCLK, negedge PRESETn) if (~PRESETn) ReceiveFIFOReadIncrement <= 0; else ReceiveFIFOReadIncrement <= ((Entry == 8'h4C) & ~ReceiveFIFOReadEmpty & PSEL & ~ReceiveFIFOReadIncrement); - //Tx/Rx FIFOs - SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrementDelay, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0], TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark); - SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel, ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark); + // Tx/Rx FIFOs + SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrement, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0], + TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark); + SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel, + ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark); always_ff @(posedge PCLK, negedge PRESETn) if (~PRESETn) TransmitFIFOReadEmptyDelay <= 1; else if (SCLKenable) TransmitFIFOReadEmptyDelay <= TransmitFIFOReadEmpty; - always_ff @(posedge PCLK, negedge PRESETn) if (~PRESETn) ReceiveShiftFullDelay <= 0; else if (SCLKenable) ReceiveShiftFullDelay <= ReceiveShiftFull; @@ -266,16 +247,16 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( assign TransmitShiftRegLoad = ~TransmitShiftEmpty & ~Active | (((ChipSelectMode == 2'b10) & ~|(Delay1[15:8])) & ((ReceiveShiftFullDelay | ReceiveShiftFull) & ~SampleEdge & ~TransmitFIFOReadEmpty)); - //Main FSM which controls SPI transmission + // Main FSM which controls SPI transmission typedef enum logic [2:0] {CS_INACTIVE, DELAY_0, ACTIVE_0, ACTIVE_1, DELAY_1,INTER_CS, INTER_XFR} statetype; statetype state; always_ff @(posedge PCLK, negedge PRESETn) - if (~PRESETn) begin state <= CS_INACTIVE; + if (~PRESETn) begin + state <= CS_INACTIVE; FrameCount <= 4'b0; - - /* verilator lint_off CASEINCOMPLETE */ end else if (SCLKenable) begin + /* verilator lint_off CASEINCOMPLETE */ case (state) CS_INACTIVE: begin CS_SCKCount <= 9'b1; @@ -288,7 +269,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( end DELAY_0: begin CS_SCKCount <= CS_SCKCount + 9'b1; - if (CS_SCKCompare) state <= ACTIVE_0; + if (CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1)) state <= ACTIVE_0; end ACTIVE_0: begin FrameCount <= FrameCount + 4'b1; @@ -296,7 +277,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( end ACTIVE_1: begin InterXFRCount <= 9'b1; - if (FrameCompare) state <= ACTIVE_0; + if (FrameCount < Format[4:1]) state <= ACTIVE_0; else if ((ChipSelectMode[1:0] == 2'b10) & ~|(Delay1[15:8]) & (~TransmitFIFOReadEmpty)) begin state <= ACTIVE_0; CS_SCKCount <= 9'b1; @@ -310,11 +291,11 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( end DELAY_1: begin SCK_CSCount <= SCK_CSCount + 9'b1; - if (SCK_CSCompare) state <= INTER_CS; + if (SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2)) state <= INTER_CS; end INTER_CS: begin InterCSCount <= InterCSCount + 9'b1; - if (InterCSCompare ) state <= CS_INACTIVE; + if (InterCSCount >= ({Delay1[7:0],1'b0})) state <= CS_INACTIVE; end INTER_XFR: begin CS_SCKCount <= 9'b1; @@ -322,13 +303,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( FrameCount <= 4'b0; InterCSCount <= 9'b10; InterXFRCount <= InterXFRCount + 9'b1; - if (InterXFRCompare & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0; + if ((InterXFRCount >= ({Delay1[15:8], 1'b0})) & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0; else if (~|ChipSelectMode[1:0]) state <= CS_INACTIVE; end endcase + /* verilator lint_off CASEINCOMPLETE */ end - /* verilator lint_off CASEINCOMPLETE */ + assign DelayMode = SckMode[0] ? (state == DELAY_1) : (state == ACTIVE_1 & ReceiveShiftFull); assign ChipSelectInternal = (state == CS_INACTIVE | state == INTER_CS | DelayMode & ~|(Delay0[15:8])) ? ChipSelectDef : ~ChipSelectDef; @@ -339,7 +321,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( assign TransmitInactive = ((state == INTER_CS) | (state == CS_INACTIVE) | (state == INTER_XFR) | (ReceiveShiftFullDelayPCLK & ZeroDelayHoldMode)); assign Active0 = (state == ACTIVE_0); - //Signal tracks which edge of sck to shift data + // Signal tracks which edge of sck to shift data always_comb case(SckMode[1:0]) 2'b00: ShiftEdge = ~sck & SCLKenable; @@ -349,36 +331,36 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( default: ShiftEdge = sck & SCLKenable; endcase - //Transmit shift register - assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0]; + // Transmit shift register + assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0]; always_ff @(posedge PCLK, negedge PRESETn) if(~PRESETn) TransmitShiftReg <= 8'b0; else if (TransmitShiftRegLoad) TransmitShiftReg <= TransmitDataEndian; - else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0}; + else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0}; assign SPIOut = TransmitShiftReg[7]; - //If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn - //There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges + // If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn + // There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges assign ShiftIn = P.SPI_LOOPBACK_TEST ? SPIOut : SPIIn; - //Receive shift register + // Receive shift register always_ff @(posedge PCLK, negedge PRESETn) if(~PRESETn) ReceiveShiftReg <= 8'b0; else if (SampleEdge & SCLKenable) begin - if (~Active) ReceiveShiftReg <= 8'b0; - else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn}; + if (~Active) ReceiveShiftReg <= 8'b0; + else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn}; end - //Aligns received data and reverses if little-endian + // Aligns received data and reverses if little-endian assign LeftShiftAmount = 4'h8 - Format[4:1]; assign ASR = ReceiveShiftReg << LeftShiftAmount[2:0]; assign ReceiveShiftRegEndian = Format[0] ? {ASR[0], ASR[1], ASR[2], ASR[3], ASR[4], ASR[5], ASR[6], ASR[7]} : ASR[7:0]; - //Interrupt logic: raise interrupt if any enabled interrupts are pending + // Interrupt logic: raise interrupt if any enabled interrupts are pending assign SPIIntr = |(InterruptPending & InterruptEnable); - //Chip select logic + // Chip select logic always_comb case(ChipSelectID[1:0]) 2'b00: ChipSelectAuto = {ChipSelectDef[3], ChipSelectDef[2], ChipSelectDef[1], ChipSelectInternal[0]}; @@ -390,14 +372,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) ( assign SPICS = ChipSelectMode[0] ? ChipSelectDef : ChipSelectAuto; endmodule -module SynchFIFO #(parameter M =3 , N= 8)( - input logic PCLK, wen, ren, PRESETn, - input logic winc,rinc, - input logic [N-1:0] wdata, - input logic [M-1:0] wwatermarklevel, rwatermarklevel, +module SynchFIFO #(parameter M=3, N=8)( // 2^M entries of N bits each + input logic PCLK, wen, ren, PRESETn, + input logic winc, rinc, + input logic [N-1:0] wdata, + input logic [M-1:0] wwatermarklevel, rwatermarklevel, output logic [N-1:0] rdata, - output logic wfull, rempty, - output logic wwatermark, rwatermark); + output logic wfull, rempty, + output logic wwatermark, rwatermark); /* Pointer FIFO using design elements from "Simulation and Synthesis Techniques for Asynchronous FIFO Design" by Clifford E. Cummings. Namely, M bit read and write pointers @@ -409,8 +391,6 @@ module SynchFIFO #(parameter M =3 , N= 8)( logic [N-1:0] mem[2**M]; logic [M:0] rptr, wptr; logic [M:0] rptrnext, wptrnext; - logic rempty_val; - logic wfull_val; logic [M-1:0] raddr; logic [M-1:0] waddr; @@ -428,53 +408,43 @@ module SynchFIFO #(parameter M =3 , N= 8)( end else begin if (wen) begin - wfull <= wfull_val; + wfull <= ({~wptrnext[M], wptrnext[M-1:0]} == rptr); wptr <= wptrnext; end if (ren) begin rptr <= rptrnext; - rempty <= rempty_val; + rempty <= (wptr == rptrnext); end end - + assign raddr = rptr[M-1:0]; - assign rptrnext = rptr + {3'b0, (rinc & ~rempty)}; - assign rempty_val = (wptr == rptrnext); + assign rptrnext = rptr + {{(M){1'b0}}, (rinc & ~rempty)}; assign rwatermark = ((waddr - raddr) < rwatermarklevel) & ~wfull; assign waddr = wptr[M-1:0]; assign wwatermark = ((waddr - raddr) > wwatermarklevel) | wfull; - assign wptrnext = wptr + {3'b0, (winc & ~wfull)}; - assign wfull_val = ({~wptrnext[M], wptrnext[M-1:0]} == rptr); + assign wptrnext = wptr + {{(M){1'b0}}, (winc & ~wfull)}; endmodule module TransmitShiftFSM( - input logic PCLK, PRESETn, - input logic TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0, + input logic PCLK, PRESETn, + input logic TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0, output logic TransmitShiftEmpty); - typedef enum logic [1:0] {TransmitShiftEmptyState, TransmitShiftHoldState, TransmitShiftNotEmptyState} statetype; - statetype TransmitState, TransmitNextState; always_ff @(posedge PCLK, negedge PRESETn) - if (~PRESETn) TransmitState <= TransmitShiftEmptyState; - else TransmitState <= TransmitNextState; + if (~PRESETn) TransmitShiftEmpty <= 1; + else if (TransmitShiftEmpty) begin + if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrame & Active0))) TransmitShiftEmpty <= 1; + else if (~TransmitFIFOReadEmpty) TransmitShiftEmpty <= 0; + end else begin + if (ReceivePenultimateFrame & Active0) TransmitShiftEmpty <= 1; + else TransmitShiftEmpty <= 0; + end - always_comb - case(TransmitState) - TransmitShiftEmptyState: begin - if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrameBoolean & Active0))) TransmitNextState = TransmitShiftEmptyState; - else if (~TransmitFIFOReadEmpty) TransmitNextState = TransmitShiftNotEmptyState; - end - TransmitShiftNotEmptyState: begin - if (ReceivePenultimateFrameBoolean & Active0) TransmitNextState = TransmitShiftEmptyState; - else TransmitNextState = TransmitShiftNotEmptyState; - end - endcase - assign TransmitShiftEmpty = (TransmitNextState == TransmitShiftEmptyState); endmodule module ReceiveShiftFSM( - input logic PCLK, PRESETn, SCLKenable, - input logic ReceivePenultimateFrameBoolean, SampleEdge, SckMode, + input logic PCLK, PRESETn, SCLKenable, + input logic ReceivePenultimateFrame, SampleEdge, SckMode, output logic ReceiveShiftFull ); typedef enum logic [1:0] {ReceiveShiftFullState, ReceiveShiftNotFullState, ReceiveShiftDelayState} statetype; @@ -484,17 +454,12 @@ module ReceiveShiftFSM( else if (SCLKenable) begin case (ReceiveState) ReceiveShiftFullState: ReceiveState <= ReceiveShiftNotFullState; - ReceiveShiftNotFullState: if (ReceivePenultimateFrameBoolean & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState; + ReceiveShiftNotFullState: if (ReceivePenultimateFrame & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState; else ReceiveState <= ReceiveShiftNotFullState; - ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState; + ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState; endcase end - assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState); + assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState); endmodule - - - - - diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 00b348660..46ffcac09 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -264,7 +264,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( end // global stall and flush control - hazard hzu( + hazard #(P) hzu( .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LSUStallM, .IFUStallF, diff --git a/synthDC/Makefile b/synthDC/Makefile index e7918e3dc..7968a7b52 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -11,7 +11,7 @@ export MOD ?= orig # title to add a note in the synth's directory name TITLE = # tsmc28, sky130, and sky90 presently supported -export TECH ?= sky90 +export TECH ?= sky130 # MAXCORES allows parallel compilation, which is faster but less CPU-efficient # Avoid when doing sweeps of many optimization points in parallel export MAXCORES ?= 1 @@ -20,7 +20,7 @@ export MAXCORES ?= 1 export MAXOPT ?= 0 export DRIVE ?= FLOP export USESRAM ?= 0 - +export WIDTH ?= 32 time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) @@ -94,10 +94,10 @@ endif ifneq ($(MOD), orig) # PMP 0 - sed -i 's/PMP_ENTRIES \(64\|16\|0\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh + sed -i 's/PMP_ENTRIES.*\(64\|16\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh ifneq ($(MOD), PMP0) # no priv - sed -i 's/ZICSR_SUPPORTED *1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh + sed -i 's/ZICSR_SUPPORTED.*1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh ifneq ($(MOD), noPriv) # turn off FPU sed -i 's/1 *<< *3/0 << 3/' $(CONFIGDIR)/config.vh @@ -147,4 +147,4 @@ clean: rm -f power.saif rm -f Synopsys_stack_trace_*.txt rm -f crte_*.txt - \ No newline at end of file + diff --git a/synthDC/README.md b/synthDC/README.md index edbd57340..30a98a76b 100644 --- a/synthDC/README.md +++ b/synthDC/README.md @@ -5,7 +5,7 @@ This subdirectory contains synthesis scripts for use with Synopsys scripts/synth.tcl. Example Usage -make synth DESIGN=wallypipelinedcore FREQ=500 +make synth DESIGN=wallypipelinedcore FREQ=500 CONFIG=rv32e environment variables @@ -38,5 +38,25 @@ To run ppa analysis that hones into target frequency, you can type: python3 ppa/ppaSynth.py from the synthDC directory. This runs a sweep across all modules listed at the bottom of the ppaSynth.py file. +Two options for running the sweep. The first run runs all modules for +all techs around a given frequency (i.e., freqs). The second option +will run all designs for the specific module based on bestSynths.csv +values. Since the second option is 2nd, it has priority. If the +second set of values is commented out, it will run all widths. +WARNING: The first option may runs lots of runs that could expend all +the licenses available for a license. Therefore, care must be taken +to be sure that enough licenses are available for this first option. +##### Run specific syntheses + widths = [8, 16, 32, 64, 128] + modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8'] + techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn'] + freqs = [5000] + synthsToRun = allCombos(widths, modules, techs, freqs) + +##### Run a sweep based on best delay found in existing syntheses + module = 'adder' + width = 32 + tech = 'tsmc28psyn' + synthsToRun = freqSweep(module, width, tech) \ No newline at end of file diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 7a3f45ddd..d6f5933a9 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -252,7 +252,7 @@ if __name__ == '__main__': TechSpec = namedtuple("TechSpec", "color shape targfreq fo4 add32area add32lpower add32denergy") techdict = {} - techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 1440.600027, 714.057, 0.658023) + techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 2581, 18, 0.685) techdict['sky90'] = TechSpec('gray', 'o', args.sky90freq, 43.2e-3, 1440.600027, 714.057, 0.658023) techdict['tsmc28psyn'] = TechSpec('blue', 's', args.tsmcfreq, 12.2e-3, 209.286002, 1060.0, .081533) diff --git a/synthDC/ppa/bestSynths.csv b/synthDC/ppa/bestSynths.csv index 7e3d35569..655f171a1 100644 --- a/synthDC/ppa/bestSynths.csv +++ b/synthDC/ppa/bestSynths.csv @@ -1,24 +1,74 @@ Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (nJ) -priorityencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078 -priorityencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348 -priorityencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111 -priorityencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981 -priorityencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861 -add,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422 -add,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417 -add,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014 -add,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874 -add,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755 +binencoder,sky130,8,1000,1.0000,50.960001,24.761,0.010685929975270078 +binencoder,sky130,16,1000,1.0000,136.220003,77.243,0.021773774467348 +binencoder,sky130,32,1000,1.0000,372.400007,189.626,0.04371111111111111 +binencoder,sky130,64,1000,1.0000,797.720015,382.205,0.07393850658857981 +binencoder,sky130,128,900,1.1111,1602.300031,610.009,0.1261366969785861 +adder,sky130,8,1700,0.588235,253.820005,154.438,0.10825587752870422 +adder,sky130,16,1300,0.7692307,722.260013,485.109,0.32460910944935417 +adder,sky130,32,1100,0.90909,1440.600027,714.057,0.6580226904376014 +adder,sky130,64,950,1.0526315,2781.240054,1050.0,0.9392239364188874 +adder,sky130,128,900,1.1111,6186.740118,2230.0,2.1480106100795755 +csa,sky130,8,1000,1.0000,266.560005,154.202,0.13650573115665163 +csa,sky130,16,1000,1.0000,533.12001,308.404,0.27263530601922104 +csa,sky130,32,1000,1.0000,1066.240021,616.808,0.5448072247308093 +csa,sky130,64,1000,1.0000,2132.480042,1230.0,1.0905412240768841 +csa,sky130,128,1000,1.0000,4264.960083,2470.0,2.178553363682347 +shifter,sky130,8,1000,1.0000,259.700005,196.451,0.07534088282874972 +shifter,sky130,16,1000,1.0000,666.400006,558.433,0.19552906110283155 +shifter,sky130,32,1000,1.0000,1475.880027,768.262,0.3807431082700759 +shifter,sky130,64,1000,1.0000,3914.120062,2680.0,1.144802541988198 +shifter,sky130,128,900,1.1111,9192.400136,6080.0,2.9008914525432616 +comparator,sky130,8,1700,0.588235,200.900004,136.6,0.05001033271337053 +comparator,sky130,16,1500,0.6666667,358.680007,189.253,0.06321553011448482 +comparator,sky130,32,1300,0.7692307,690.900013,315.709,0.10771793448084398 +comparator,sky130,64,1200,0.8333333,1372.980026,508.393,0.2048577820389901 +comparator,sky130,128,1150,0.869565,2744.980052,796.047,0.34396273737011823 +flop,sky130,8,1000,1.0000,133.279999,64.8145,0.193835 +flop,sky130,16,1000,1.0000,266.5599975,129.629,0.38715000000000005 +flop,sky130,32,1000,1.0000,533.119995,259.258,0.7723000000000001 +flop,sky130,64,1000,1.0000,1066.23999,520.0,1.54955 +flop,sky130,128,1000,1.0000,2132.4799805,1035.0,3.094 +mux2,sky130,8,1000,1.0000,63.700001,21.541,0.01932440083034535 +mux2,sky130,16,1000,1.0000,119.560002,32.354,0.03884536082474227 +mux2,sky130,32,1000,1.0000,375.340008,259.372,0.13671796921846893 +mux2,sky130,64,1000,1.0000,479.220009,115.22,0.15148539160324087 +mux2,sky130,128,1000,1.0000,1302.420025,767.078,0.4665334665334665 +mux4,sky130,8,1000,1.0000,148.960002,66.984,0.04026661024121879 +mux4,sky130,16,1000,1.0000,392.0,398.313,0.1037037037037037 +mux4,sky130,32,1000,1.0000,594.860011,331.197,0.131617289946576 +mux4,sky130,64,1000,1.0000,899.640016,344.331,0.2862533692722372 +mux4,sky130,128,1000,1.0000,2013.900038,818.249,0.6094182825484764 +mux8,sky130,8,1000,1.0000,287.140006,116.648,0.06089260808926081 +mux8,sky130,16,1000,1.0000,582.120003,282.366,0.14455681142177274 +mux8,sky130,32,1000,1.0000,1319.079995,670.683,0.35777218376337316 +mux8,sky130,64,1000,1.0000,2132.48004,808.482,0.44287680660701995 +mux8,sky130,128,1000,1.0000,4575.620089,1830.0,0.9786276715410572 +mul,sky130,8,1000,1.0000,2194.220041,1440.0,1.421374045801527 +mul,sky130,16,1000,1.0000,7519.540137,4940.0,6.376128385155466 +mul,sky130,32,1000,1.0000,25200.700446,14900.0,24.931847968545217 +mul,sky130,64,1000,1.0000,86011.661365,42600.0,88.84651898734177 +mul,sky130,128,800,1.2500,296198.144128,114000.0,273.3148854961832 +binencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078 +binencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348 +binencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111 +binencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981 +binencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861 +adder,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422 +adder,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417 +adder,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014 +adder,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874 +adder,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755 csa,sky90,8,5758,0.16536141368530738,266.560005,154.202,0.13650573115665163 csa,sky90,16,5931,0.1654056314280897,533.12001,308.404,0.27263530601922104 csa,sky90,32,5758,0.16536141368530738,1066.240021,616.808,0.5448072247308093 csa,sky90,64,5931,0.1654056314280897,2132.480042,1230.0,1.0905412240768841 csa,sky90,128,5931,0.1654056314280897,4264.960083,2470.0,2.178553363682347 -shiftleft,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972 -shiftleft,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155 -shiftleft,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759 -shiftleft,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198 -shiftleft,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616 +shifter,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972 +shifter,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155 +shifter,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759 +shifter,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198 +shifter,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616 comparator,sky90,8,4839,0.20629126741062204,200.900004,136.6,0.05001033271337053 comparator,sky90,16,4018,0.24806303982080635,358.680007,189.253,0.06321553011448482 comparator,sky90,32,3602,0.276293542476402,690.900013,315.709,0.10771793448084398 @@ -44,31 +94,31 @@ mux8,sky90,16,3362,0.295237998810232,582.120003,282.366,0.14455681142177274 mux8,sky90,32,3178,0.3140553102580239,1319.079995,670.683,0.35777218376337316 mux8,sky90,64,2906,0.3440756228492774,2132.48004,808.482,0.44287680660701995 mux8,sky90,128,2667,0.3749401308586427,4575.620089,1830.0,0.9786276715410572 -mult,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527 -mult,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466 -mult,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217 -mult,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177 -mult,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832 -priorityencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546 -priorityencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294 -priorityencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266 -priorityencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089 -priorityencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666 -add,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546 -add,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698 -add,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594 -add,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008 -add,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731 +mul,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527 +mul,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466 +mul,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217 +mul,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177 +mul,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832 +binencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546 +binencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294 +binencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266 +binencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089 +binencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666 +adder,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546 +adder,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698 +adder,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594 +adder,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008 +adder,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731 csa,tsmc28,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921 csa,tsmc28,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842 csa,tsmc28,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941 csa,tsmc28,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076 csa,tsmc28,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363 -shiftleft,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006 -shiftleft,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719 -shiftleft,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654 -shiftleft,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286 -shiftleft,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039 +shifter,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006 +shifter,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719 +shifter,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654 +shifter,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286 +shifter,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039 comparator,tsmc28,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243 comparator,tsmc28,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673 comparator,tsmc28,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319 @@ -94,8 +144,58 @@ mux8,tsmc28,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262 mux8,tsmc28,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814 mux8,tsmc28,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495 mux8,tsmc28,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426 -mult,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766 -mult,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547 -mult,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723 -mult,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251 -mult,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719 +mul,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766 +mul,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547 +mul,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723 +mul,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251 +mul,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719 +binencoder,tsmc28psyn,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546 +binencoder,tsmc28psyn,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294 +binencoder,tsmc28psyn,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266 +binencoder,tsmc28psyn,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089 +binencoder,tsmc28psyn,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666 +adder,tsmc28psyn,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546 +adder,tsmc28psyn,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698 +adder,tsmc28psyn,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594 +adder,tsmc28psyn,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008 +adder,tsmc28psyn,128,7000,0.142857142857,907.452008,4360.0,0.3451183029643731 +csa,tsmc28psyn,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921 +csa,tsmc28psyn,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842 +csa,tsmc28psyn,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941 +csa,tsmc28psyn,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076 +csa,tsmc28psyn,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363 +shifter,tsmc28psyn,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006 +shifter,tsmc28psyn,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719 +shifter,tsmc28psyn,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654 +shifter,tsmc28psyn,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286 +shifter,tsmc28psyn,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039 +comparator,tsmc28psyn,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243 +comparator,tsmc28psyn,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673 +comparator,tsmc28psyn,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319 +comparator,tsmc28psyn,64,11080,0.09024670758122744,294.21,1250.0,0.0684115523465704 +comparator,tsmc28psyn,128,9371,0.10671119720414043,558.432,2400.0,0.12794792444776437 +flop,tsmc28psyn,8,10,0.048889000000002625,15.12,78.6345,0.027246000000000003 +flop,tsmc28psyn,16,10,0.048889000000002625,30.24,157.29,0.054290000000000005 +flop,tsmc28psyn,32,10,0.048889000000002625,60.4799995,314.5805,0.10908000000000001 +flop,tsmc28psyn,64,10,0.048889000000002625,120.959999,630.0,0.21765500000000004 +flop,tsmc28psyn,128,10,0.048889000000002625,241.919998,1260.0,0.43579999999999997 +mux2,tsmc28psyn,8,29614,0.03374481252110488,16.758,114.564,0.005436617815897886 +mux2,tsmc28psyn,16,18767,0.053046021580433735,15.75,88.025,0.005142004582511856 +mux2,tsmc28psyn,32,17903,0.05585556035301346,32.130001,171.146,0.009897782494553985 +mux2,tsmc28psyn,64,18568,0.05371109651012495,91.35,523.884,0.027574321413183972 +mux2,tsmc28psyn,128,16637,0.05991099044298852,176.525999,941.106,0.05012923002945243 +mux4,tsmc28psyn,8,18151,0.055092383284667513,27.971999,133.963,0.008032615282904523 +mux4,tsmc28psyn,16,16486,0.06057952759917506,39.438,186.231,0.012556108213029236 +mux4,tsmc28psyn,32,15196,0.06580579126085812,69.174,324.969,0.023229797315082915 +mux4,tsmc28psyn,64,13926,0.07180612868016659,137.465999,648.086,0.04574177796926612 +mux4,tsmc28psyn,128,13090,0.07636619404125286,294.335997,1420.0,0.09358288770053477 +mux8,tsmc28psyn,8,12902,0.07750336319950395,44.604,214.286,0.0117501162610448 +mux8,tsmc28psyn,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262 +mux8,tsmc28psyn,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814 +mux8,tsmc28psyn,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495 +mux8,tsmc28psyn,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426 +mul,tsmc28psyn,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766 +mul,tsmc28psyn,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547 +mul,tsmc28psyn,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723 +mul,tsmc28psyn,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251 +mul,tsmc28psyn,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719 diff --git a/synthDC/ppa/ppaAnalyze.py b/synthDC/ppa/ppaAnalyze.py index 459a8520d..bd98e79be 100755 --- a/synthDC/ppa/ppaAnalyze.py +++ b/synthDC/ppa/ppaAnalyze.py @@ -18,93 +18,115 @@ from collections import namedtuple import sklearn.metrics as skm # depricated, will need to replace with scikit-learn import os + def synthsfromcsv(filename): Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy") - with open(filename, newline='') as csvfile: + with open(filename, newline="") as csvfile: csvreader = csv.reader(csvfile) global allSynths allSynths = list(csvreader)[1:] for i in range(len(allSynths)): for j in range(len(allSynths[0])): - try: allSynths[i][j] = int(allSynths[i][j]) - except: - try: allSynths[i][j] = float(allSynths[i][j]) - except: pass + try: + allSynths[i][j] = int(allSynths[i][j]) + except: + try: + allSynths[i][j] = float(allSynths[i][j]) + except: + pass allSynths[i] = Synth(*allSynths[i]) return allSynths - -def synthsintocsv(): - ''' writes a CSV with one line for every available synthesis - each line contains the module, tech, width, target freq, and resulting metrics - ''' - print("This takes a moment...") - bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" - output = subprocess.check_output(['bash','-c', bashCommand]) - allSynths = output.decode("utf-8").split('\n')[:-1] - specReg = re.compile('[a-zA-Z0-9]+') - metricReg = re.compile('-?\d+\.\d+[e]?[-+]?\d*') + +def synthsintocsv(): + """writes a CSV with one line for every available synthesis + each line contains the module, tech, width, target freq, and resulting metrics + """ + print("This takes a moment...") + bashCommand = "find . -path '*runs/*' -prune" + output = subprocess.check_output(["bash", "-c", bashCommand]) + allSynths = output.decode("utf-8").split("\n")[:-1] + + specReg = re.compile("[a-zA-Z0-9]+") + metricReg = re.compile("-?\d+\.\d+[e]?[-+]?\d*") file = open("ppaData.csv", "w") writer = csv.writer(file) - writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (nJ)']) + writer.writerow( + [ + "Module", + "Tech", + "Width", + "Target Freq", + "Delay", + "Area", + "L Power (nW)", + "D energy (nJ)", + ] + ) for oneSynth in allSynths: - module, width, risc, tech, freq = specReg.findall(oneSynth)[2:7] - tech = tech[:-2] + module, width, risc, tech, freq = specReg.findall(oneSynth)[1:6] metrics = [] - for phrase in [['Path Slack', 'qor'], ['Design Area', 'qor'], ['100', 'power']]: - bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*' + for phrase in [["Path Slack", "qor"], ["Design Area", "qor"], ["100", "power"]]: + bashCommand = 'grep "{}" ' + oneSynth[2:] + "/reports/*{}*" bashCommand = bashCommand.format(*phrase) - try: output = subprocess.check_output(['bash','-c', bashCommand]) - except: + try: + output = subprocess.check_output(["bash", "-c", bashCommand]) + except: print(module + width + tech + freq + " doesn't have reports") print("Consider running cleanup() first") nums = metricReg.findall(str(output)) nums = [float(m) for m in nums] metrics += nums - delay = 1000/int(freq) - metrics[0] + delay = 1000 / int(freq) - metrics[0] area = metrics[1] lpower = metrics[4] - denergy = (metrics[2] + metrics[3])/int(freq)*1000 # (switching + internal powers)*delay, more practical units for regression coefs + tpower = (metrics[2] + metrics[3] + metrics[4]*.000001) + denergy = ( + (tpower) / int(freq) * 1000 + ) # (switching + internal powers)*delay, more practical units for regression coefs - if ('flop' in module): # since two flops in each module - [area, lpower, denergy] = [n/2 for n in [area, lpower, denergy]] + if "flop" in module: # since two flops in each module + [area, lpower, denergy] = [n / 2 for n in [area, lpower, denergy]] writer.writerow([module, tech, width, freq, delay, area, lpower, denergy]) file.close() -def cleanup(): - ''' removes runs that didn't work - ''' - bashCommand = 'grep -r "Error" runs/ppa*/reports/*qor*' - try: - output = subprocess.check_output(['bash','-c', bashCommand]) - allSynths = output.decode("utf-8").split('\n')[:-1] - for run in allSynths: - run = run.split('MHz')[0] - bc = 'rm -r '+ run + '*' - output = subprocess.check_output(['bash','-c', bc]) - except: pass - bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" - output = subprocess.check_output(['bash','-c', bashCommand]) - allSynths = output.decode("utf-8").split('\n')[:-1] +def cleanup(): + """removes runs that didn't work""" + bashCommand = 'grep -r "Error" runs/ppa*/reports/*qor*' + try: + output = subprocess.check_output(["bash", "-c", bashCommand]) + allSynths = output.decode("utf-8").split("\n")[:-1] + for run in allSynths: + run = run.split("MHz")[0] + bc = "rm -r " + run + "*" + output = subprocess.check_output(["bash", "-c", bc]) + except: + pass + + bashCommand = "find . -path '*runs/*' -prune" + output = subprocess.check_output(["bash", "-c", bashCommand]) + allSynths = output.decode("utf-8").split("\n")[:-1] for oneSynth in allSynths: - for phrase in [['Path Length', 'qor']]: - bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*' + for phrase in [["Path Length", "qor"]]: + bashCommand = 'grep "{}" ' + oneSynth[2:] + "/reports/*{}*" bashCommand = bashCommand.format(*phrase) - try: output = subprocess.check_output(['bash','-c', bashCommand]) - except: - bc = 'rm -r '+ oneSynth[2:] - output = subprocess.check_output(['bash','-c', bc]) + try: + output = subprocess.check_output(["bash", "-c", bashCommand]) + except: + bc = "rm -r " + oneSynth[2:] + output = subprocess.check_output(["bash", "-c", bc]) print("All cleaned up!") + def getVals(tech, module, var, freq=None, width=None): - ''' for a specified tech, module, and variable/metric - returns a list of values for that metric in ascending width order - works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width - ''' + """for a specified tech, module, and variable/metric + returns a list of values for that metric in ascending width order + works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width + """ if width != None: widthsToGet = width @@ -114,85 +136,132 @@ def getVals(tech, module, var, freq=None, width=None): metric = [] widthL = [] - if (freq != None): + if freq != None: for oneSynth in allSynths: - if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module) & (oneSynth.width != 1): + if ( + (oneSynth.freq == freq) + & (oneSynth.tech == tech) + & (oneSynth.module == module) + & (oneSynth.width != 1) + ): widthL += [oneSynth.width] osdict = oneSynth._asdict() metric += [osdict[var]] - metric = [x for _, x in sorted(zip(widthL, metric))] # ordering + metric = [x for _, x in sorted(zip(widthL, metric))] # ordering else: for w in widthsToGet: for oneSynth in bestSynths: - if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == module): + if ( + (oneSynth.width == w) + & (oneSynth.tech == tech) + & (oneSynth.module == module) + ): osdict = oneSynth._asdict() met = osdict[var] metric += [met] return metric + def csvOfBest(filename): bestSynths = [] for tech in [x.tech for x in techSpecs]: for mod in modules: for w in widths: - m = np.Inf # large number to start + m = np.Inf # large number to start best = None - for oneSynth in allSynths: # best achievable, rightmost green - if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == mod): - if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq): + for oneSynth in allSynths: # best achievable, rightmost green + if ( + (oneSynth.width == w) + & (oneSynth.tech == tech) + & (oneSynth.module == mod) + ): + if (oneSynth.delay < m) & ( + 1000 / oneSynth.delay > oneSynth.freq + ): m = oneSynth.delay best = oneSynth if (best != None) & (best not in bestSynths): bestSynths += [best] - + file = open(filename, "w") writer = csv.writer(file) - writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (nJ)']) + writer.writerow( + [ + "Module", + "Tech", + "Width", + "Target Freq", + "Delay", + "Area", + "L Power (nW)", + "D energy (nJ)", + ] + ) for synth in bestSynths: writer.writerow(list(synth)) file.close() return bestSynths - + + def genLegend(fits, coefs, r2=None, spec=None, ale=False): - ''' generates a list of two legend elements (or just an equation if no r2 or spec) - labels line with fit equation and dots with r squared of the fit - ''' + """generates a list of two legend elements (or just an equation if no r2 or spec) + labels line with fit equation and dots with r squared of the fit + """ coefsr = [str(sigfig(c, 2)) for c in coefs] if ale: - if (normAddWidth == 32): - sub = 'S' + if normAddWidth == 32: + sub = "S" elif normAddWidth != 1: - print('Equations are wrong, check normAddWidth') + print("Equations are wrong, check normAddWidth") else: - sub = 'N' + sub = "N" - eqDict = {'c': '', 'l': sub, 's': '$'+sub+'^2$', 'g': '$log_2$('+sub+')', 'n': ''+sub+'$log_2$('+sub+')'} - eq = '' - ind = 0 + eqDict = { + "c": "", + "l": sub, + "s": "$" + sub + "^2$", + "g": "$log_2$(" + sub + ")", + "n": "" + sub + "$log_2$(" + sub + ")", + } + eq = "" + ind = 0 for k in eqDict.keys(): if k in fits: - if str(coefsr[ind]) != '0': eq += " + " + coefsr[ind] + eqDict[k] + if str(coefsr[ind]) != "0": + eq += " + " + coefsr[ind] + eqDict[k] ind += 1 - eq = eq[3:] # chop off leading ' + ' + eq = eq[3:] # chop off leading ' + ' - if (r2==None) or (spec==None): + if (r2 == None) or (spec == None): return eq else: legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq)] - legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label='$R^2$='+ str(round(r2, 4)))] + legend_elements += [ + lines.Line2D( + [0], + [0], + color=spec.color, + ls="", + marker=spec.shape, + label="$R^2$=" + str(round(r2, 4)), + ) + ] return legend_elements -def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, color=None): - ''' module: string module name - freq: int freq (MHz) - var: string delay, area, lpower, or denergy - fits: constant, linear, square, log2, Nlog2 - plots given variable vs width for all matching syntheses with regression - ''' + +def oneMetricPlot( + module, widths, var, freq=None, ax=None, fits="clsgn", norm=True, color=None +): + """module: string module name + freq: int freq (MHz) + var: string delay, area, lpower, or denergy + fits: constant, linear, square, log2, Nlog2 + plots given variable vs width for all matching syntheses with regression + """ singlePlot = True if ax or (freq == 10): singlePlot = False @@ -203,24 +272,27 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo allWidths = [] allMetrics = [] - ale = (var != 'delay') # if not delay, must be area, leakage, or energy + ale = var != "delay" # if not delay, must be area, leakage, or energy modFit = fitDict[module] fits = modFit[ale] if freq: - ls = '--' + ls = "--" else: - ls = '-' + ls = "-" for spec in techSpecs: + # print(f"Searching for module of spec {spec} and module {module} and var {var}") metric = getVals(spec.tech, module, var, freq=freq) - + # print(f"Found metric : {metric}") if norm: techdict = spec._asdict() norm = techdict[var] - metric = [m/norm for m in metric] + metric = [m / norm for m in metric] - if len(metric) == 5: # don't include the spec if we don't have points for all widths + if len(widths) == len(metric): + # don't include the spec if we don't have points for all widths + # print(f"Width \neq Metric") xp, pred, coefs, r2 = regress(widths, metric, fits, ale) fullLeg += genLegend(fits, coefs, r2, spec, ale=ale) c = color if color else spec.color @@ -229,44 +301,77 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo allWidths += widths allMetrics += metric - xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits) - ax.plot(xp, pred, color='red', linestyle=ls) + # print(f"Widths passed into regress : {allWidths}") + if len(allWidths) > 0: + xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits) + ax.plot(xp, pred, color="orange", linestyle=ls) + else: + xp, pred, coefs, r2 = regress(widths, metric, fits) + ax.plot(xp, pred, color="orange", linestyle=ls) if norm: - ylabeldic = {"lpower": "Leakage Power (add32)", "denergy": "Energy/Op (add32)", "area": "Area (add32)", "delay": "Delay (FO4)"} + ylabeldic = { + "lpower": "Leakage Power (add32)", + "denergy": "Energy/Op (add32)", + "area": "Area (add32)", + "delay": "Delay (FO4)", + } else: - ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (nJ)", "area": "Area (sq microns)", "delay": "Delay (ns)"} + ylabeldic = { + "lpower": "Leakage Power (nW)", + "denergy": "Dynamic Energy (nJ)", + "area": "Area (sq microns)", + "delay": "Delay (ns)", + } ax.set_ylabel(ylabeldic[var]) ax.set_xticks(widths) - if singlePlot or (var == 'lpower') or (var == 'denergy'): + if singlePlot or (var == "lpower") or (var == "denergy"): ax.set_xlabel("Width (bits)") - if not singlePlot and ((var == 'delay') or (var == 'area')): - ax.tick_params(labelbottom=False) + if not singlePlot and ((var == "delay") or (var == "area")): + ax.tick_params(labelbottom=False) if singlePlot: fullLeg += genLegend(fits, coefs, r2, combined, ale=ale) - legLoc = 'upper left' if ale else 'center right' + legLoc = "upper left" if ale else "center right" ax.add_artist(ax.legend(handles=fullLeg, loc=legLoc)) - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" + titleStr = ( + " (target " + str(freq) + "MHz)" + if freq != None + else " (best achievable delay)" + ) ax.set_title(module + titleStr) - plt.savefig('.plots/'+ module + '_' + var + '.png') + plt.savefig(".plots/" + module + "_" + var + ".png") # plt.show() return r2 -def regress(widths, var, fits='clsgn', ale=False): - ''' fits a curve to the given points - returns lists of x and y values to plot that curve and coefs for the eq with r2 - ''' +def regress(widths, var, fits="clsgn", ale=False): + """fits a curve to the given points + returns lists of x and y values to plot that curve and coefs for the eq with r2 + """ + if len(var) != len(widths): + # print( + # f"There are not enough variables to match widths. Widths : {widths} Variables Found : {var}, padding to match may affect correctness (doing it anyways)\n" + # ) + if len(widths) > len(var): + while len(widths) > len(var): + var.append(0.0) + if len(var) > len(widths): + while len(var) > len(widths): + widths.append(0) + + # widths = [8, 16, 32, 64, 128] + # print(f"Regress var : {var}") + # print(f"Regress widths : {widths}") funcArr = genFuncs(fits) - xp = np.linspace(min(widths)/2, max(widths)*1.1, 200) + xp = np.linspace(min(widths) / 2, max(widths) * 1.1, 200) xpToCalc = xp if ale: - widths = [w/normAddWidth for w in widths] - xpToCalc = [x/normAddWidth for x in xp] + widths = [w / normAddWidth for w in widths] + xpToCalc = [x / normAddWidth for x in xp] mat = [] for w in widths: @@ -274,8 +379,9 @@ def regress(widths, var, fits='clsgn', ale=False): for func in funcArr: row += [func(w)] mat += [row] - - y = np.array(var, dtype=np.float) + + # var = [0, 1, 2, 3, 4] + y = np.array(var, dtype=np.float64) coefs = opt.nnls(mat, y)[0] yp = [] @@ -291,19 +397,22 @@ def regress(widths, var, fits='clsgn', ale=False): return xp, pred, coefs, r2 + def makeCoefTable(): - ''' writes CSV with each line containing the coefficients for a regression fit - to a particular combination of module, metric (including both techs, normalized) - ''' + """writes CSV with each line containing the coefficients for a regression fit + to a particular combination of module, metric (including both techs, normalized) + """ file = open("ppaFitting.csv", "w") writer = csv.writer(file) - writer.writerow(['Module', 'Metric', 'Target', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2']) + writer.writerow( + ["Module", "Metric", "Target", "1", "N", "N^2", "log2(N)", "Nlog2(N)", "R^2"] + ) for module in modules: for freq in [10, None]: - target = 'easy' if freq else 'hard' - for var in ['delay', 'area', 'lpower', 'denergy']: - ale = (var != 'delay') + target = "easy" if freq else "hard" + for var in ["delay", "area", "lpower", "denergy"]: + ale = var != "delay" metL = [] modFit = fitDict[module] fits = modFit[ale] @@ -312,12 +421,12 @@ def makeCoefTable(): metric = getVals(spec.tech, module, var, freq=freq) techdict = spec._asdict() norm = techdict[var] - metL += [m/norm for m in metric] + metL += [m / norm for m in metric] - xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale) + xp, pred, coefs, r2 = regress(widths * 2, metL, fits, ale) coefs = np.ndarray.tolist(coefs) - coefsToWrite = [None]*5 - fitTerms = 'clsgn' + coefsToWrite = [None] * 5 + fitTerms = "clsgn" ind = 0 for i in range(len(fitTerms)): if fitTerms[i] in fits: @@ -328,25 +437,38 @@ def makeCoefTable(): file.close() + def sigfig(num, figs): - return '{:g}'.format(float('{:.{p}g}'.format(num, p=figs))) + return "{:g}".format(float("{:.{p}g}".format(num, p=figs))) + def makeEqTable(): - ''' writes CSV with each line containing the equations for fits for each metric - to a particular module (including both techs, normalized) - ''' + """writes CSV with each line containing the equations for fits for each metric + to a particular module (including both techs, normalized) + """ file = open("ppaEquations.csv", "w") writer = csv.writer(file) - writer.writerow(['Element', 'Best delay', 'Fast area', 'Fast leakage', 'Fast energy', 'Small area', 'Small leakage', 'Small energy']) + writer.writerow( + [ + "Element", + "Best delay", + "Fast area", + "Fast leakage", + "Fast energy", + "Small area", + "Small leakage", + "Small energy", + ] + ) for module in modules: eqs = [] for freq in [None, 10]: - for var in ['delay', 'area', 'lpower', 'denergy']: - if (var == 'delay') and (freq == 10): + for var in ["delay", "area", "lpower", "denergy"]: + if (var == "delay") and (freq == 10): pass else: - ale = (var != 'delay') + ale = var != "delay" metL = [] modFit = fitDict[module] fits = modFit[ale] @@ -355,9 +477,9 @@ def makeEqTable(): metric = getVals(spec.tech, module, var, freq=freq) techdict = spec._asdict() norm = techdict[var] - metL += [m/norm for m in metric] + metL += [m / norm for m in metric] - xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale) + xp, pred, coefs, r2 = regress(widths * 2, metL, fits, ale) coefs = np.ndarray.tolist(coefs) eqs += [genLegend(fits, coefs, ale=ale)] row = [module] + eqs @@ -365,93 +487,113 @@ def makeEqTable(): file.close() -def genFuncs(fits='clsgn'): - ''' helper function for regress() - returns array of functions with one for each term desired in the regression fit - ''' + +def genFuncs(fits="clsgn"): + """helper function for regress() + returns array of functions with one for each term desired in the regression fit + """ funcArr = [] - if 'c' in fits: + if "c" in fits: funcArr += [lambda x: 1] - if 'l' in fits: + if "l" in fits: funcArr += [lambda x: x] - if 's' in fits: + if "s" in fits: funcArr += [lambda x: x**2] - if 'g' in fits: + if "g" in fits: funcArr += [lambda x: np.log2(x)] - if 'n' in fits: - funcArr += [lambda x: x*np.log2(x)] + if "n" in fits: + funcArr += [lambda x: x * np.log2(x)] return funcArr + def noOutliers(median, freqs, delays, areas): - ''' returns a pared down list of freqs, delays, and areas - cuts out any syntheses in which target freq isn't within 75% of the min delay target to focus on interesting area - helper function to freqPlot() - ''' - f=[] - d=[] - a=[] + """returns a pared down list of freqs, delays, and areas + cuts out any syntheses in which target freq isn't within 75% of the min delay target to focus on interesting area + helper function to freqPlot() + """ + f = [] + d = [] + a = [] for i in range(len(freqs)): - norm = freqs[i]/median - if (norm > 0.4) & (norm<1.4): + norm = freqs[i] / median + if (norm > 0.4) & (norm < 1.4): f += [freqs[i]] d += [delays[i]] a += [areas[i]] - + return f, d, a + def freqPlot(tech, mod, width): - ''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width - ''' + """plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width""" freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: - if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech): - ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period + if ( + (mod == oneSynth.module) + & (width == oneSynth.width) + & (tech == oneSynth.tech) + ): + ind = ( + 1000 / oneSynth.delay < oneSynth.freq + ) # when delay is within target clock period freqsL[ind] += [oneSynth.freq] delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] median = np.median(list(flatten(freqsL))) - + f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) for ax in (ax1, ax2): - ax.ticklabel_format(useOffset=False, style='plain') + ax.ticklabel_format(useOffset=False, style="plain") - for ind in [0,1]: + for ind in [0, 1]: areas = areasL[ind] delays = delaysL[ind] freqs = freqsL[ind] - freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses + freqs, delays, areas = noOutliers( + median, freqs, delays, areas + ) # comment out to see all syntheses - c = 'blue' if ind else 'green' + c = "blue" if ind else "green" ax1.scatter(freqs, delays, color=c) ax2.scatter(freqs, areas, color=c) - legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'), - lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')] + legend_elements = [ + lines.Line2D( + [0], [0], color="green", ls="", marker="o", label="timing achieved" + ), + lines.Line2D([0], [0], color="blue", ls="", marker="o", label="slack violated"), + ] ax1.legend(handles=legend_elements) width = str(width) - + ax2.set_xlabel("Target Freq (MHz)") - ax1.set_ylabel('Delay (ns)') - ax2.set_ylabel('Area (sq microns)') - ax1.set_title(mod + '_' + width) - if ('mux' in mod) & ('d' in mod): + ax1.set_ylabel("Delay (ns)") + ax2.set_ylabel("Area (sq microns)") + ax1.set_title(mod + "_" + width) + if ("mux" in mod) & ("d" in mod): width = mod - mod = 'muxd' - plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + width + '.png') + mod = "muxd" + plt.savefig("./plots/freqBuckshot/" + tech + "/" + mod + "/" + width + ".png") # plt.show() + def squareAreaDelay(tech, mod, width): - ''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width - ''' + """plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width""" global allSynths freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: - if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech): - ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period + if ( + (mod == oneSynth.module) + & (width == oneSynth.width) + & (tech == oneSynth.tech) + ): + ind = ( + 1000 / oneSynth.delay < oneSynth.freq + ) # when delay is within target clock period freqsL[ind] += [oneSynth.freq] delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] @@ -459,181 +601,212 @@ def squareAreaDelay(tech, mod, width): f, (ax1) = plt.subplots(1, 1) ax2 = ax1.twinx() - for ind in [0,1]: + for ind in [0, 1]: areas = areasL[ind] delays = delaysL[ind] targets = freqsL[ind] - targets = [1000/f for f in targets] - - targets, delays, areas = noOutliers(targets, delays, areas) # comment out to see all - + targets = [1000 / f for f in targets] + + targets, delays, areas = noOutliers( + targets, delays, areas + ) # comment out to see all + if not ind: achievedDelays = delays - c = 'blue' if ind else 'green' - ax1.scatter(targets, delays, marker='^', color=c) - ax2.scatter(targets, areas, marker='s', color=c) - - bestAchieved = min(achievedDelays) - - legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='^', label='delay (timing achieved)'), - lines.Line2D([0], [0], color='green', ls='', marker='s', label='area (timing achieved)'), - lines.Line2D([0], [0], color='blue', ls='', marker='^', label='delay (timing violated)'), - lines.Line2D([0], [0], color='blue', ls='', marker='s', label='area (timing violated)')] + c = "blue" if ind else "green" + ax1.scatter(targets, delays, marker="^", color=c) + ax2.scatter(targets, areas, marker="s", color=c) + + bestAchieved = min(achievedDelays) + + legend_elements = [ + lines.Line2D( + [0], [0], color="green", ls="", marker="^", label="delay (timing achieved)" + ), + lines.Line2D( + [0], [0], color="green", ls="", marker="s", label="area (timing achieved)" + ), + lines.Line2D( + [0], [0], color="blue", ls="", marker="^", label="delay (timing violated)" + ), + lines.Line2D( + [0], [0], color="blue", ls="", marker="s", label="area (timing violated)" + ), + ] + + ax2.legend(handles=legend_elements, loc="upper left") - ax2.legend(handles=legend_elements, loc='upper left') - ax1.set_xlabel("Delay Targeted (ns)") ax1.set_ylabel("Delay Achieved (ns)") - ax2.set_ylabel('Area (sq microns)') - ax1.set_title(mod + '_' + str(width)) + ax2.set_ylabel("Area (sq microns)") + ax1.set_title(mod + "_" + str(width)) squarify(f) xvals = np.array(ax1.get_xlim()) - frac = (min(flatten(delaysL))-xvals[0])/(xvals[1]-xvals[0]) - areaLowerLim = min(flatten(areasL))-100 - areaUpperLim = max(flatten(areasL))/frac + areaLowerLim + frac = (min(flatten(delaysL)) - xvals[0]) / (xvals[1] - xvals[0]) + areaLowerLim = min(flatten(areasL)) - 100 + areaUpperLim = max(flatten(areasL)) / frac + areaLowerLim ax2.set_ylim([areaLowerLim, areaUpperLim]) ax1.plot(xvals, xvals, ls="--", c=".3") - ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls='--') + ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls="--") - plt.savefig('./plots/squareareadelay_' + mod + '_' + str(width) + '.png') + plt.savefig("./plots/squareareadelay_" + mod + "_" + str(width) + ".png") # plt.show() + def squarify(fig): - ''' helper function for squareAreaDelay() - forces matplotlib figure to be a square - ''' + """helper function for squareAreaDelay() + forces matplotlib figure to be a square + """ w, h = fig.get_size_inches() if w > h: t = fig.subplotpars.top b = fig.subplotpars.bottom - axs = h*(t-b) - l = (1.-axs/w)/2 - fig.subplots_adjust(left=l, right=1-l) + axs = h * (t - b) + l = (1.0 - axs / w) / 2 + fig.subplots_adjust(left=l, right=1 - l) else: t = fig.subplotpars.right b = fig.subplotpars.left - axs = w*(t-b) - l = (1.-axs/h)/2 - fig.subplots_adjust(bottom=l, top=1-l) + axs = w * (t - b) + l = (1.0 - axs / h) / 2 + fig.subplots_adjust(bottom=l, top=1 - l) + def plotPPA(mod, freq=None, norm=True, aleOpt=False): - ''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits - if no freq specified, uses the synthesis with best achievable delay for each width - overlays data from both techs - ''' - with mpl.rc_context({"figure.figsize": (7,3.46)}): + """for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits + if no freq specified, uses the synthesis with best achievable delay for each width + overlays data from both techs + """ + with mpl.rc_context({"figure.figsize": (7, 3.46)}): fig, axs = plt.subplots(2, 2) - arr = [['delay', 'area'], ['lpower', 'denergy']] + arr = [["delay", "area"], ["lpower", "denergy"]] freqs = [freq] - if aleOpt: freqs += [10] + if aleOpt: + freqs += [10] for i in [0, 1]: for j in [0, 1]: leg = [] for f in freqs: - if (arr[i][j]=='delay') and (f==10): + if (arr[i][j] == "delay") and (f == 10): pass else: - r2 = oneMetricPlot(mod, arr[i][j], ax=axs[i, j], freq=f, norm=norm) - ls = '--' if f else '-' - leg += [lines.Line2D([0], [0], color='red', label='$R^2$='+str(round(r2, 4)), linestyle=ls)] + # print(f"Pasing in widths {widths}") + r2 = oneMetricPlot( + mod, widths, arr[i][j], ax=axs[i, j], freq=f, norm=norm + ) + ls = "--" if f else "-" + leg += [ + lines.Line2D( + [0], + [0], + color="orange", + label="$R^2$=" + str(round(r2, 4)), + linestyle=ls, + ) + ] - if (mod in ['flop', 'csa']) & (arr[i][j] == 'delay'): + if (mod in ["flop", "csa"]) & (arr[i][j] == "delay"): axs[i, j].set_ylim(ymin=0) ytop = axs[i, j].get_ylim()[1] - axs[i, j].set_ylim(ymax=1.1*ytop) + axs[i, j].set_ylim(ymax=1.1 * ytop) else: axs[i, j].legend(handles=leg, handlelength=1.5) - - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else "" - plt.suptitle(mod + titleStr) - plt.tight_layout(pad=0.05, w_pad=1, h_pad=0.5, rect=(0,0,1,0.97)) - if freq != 10: - n = 'normalized' if norm else 'unnormalized' - saveStr = './plots/'+ n + '/' + mod + '.png' + titleStr = " (target " + str(freq) + "MHz)" if freq != None else "" + plt.suptitle(mod + titleStr) + plt.tight_layout(pad=0.05, w_pad=1, h_pad=0.5, rect=(0, 0, 1, 0.97)) + + if freq != 10: + n = "normalized" if norm else "unnormalized" + saveStr = "./plots/" + n + "/" + mod + "_" + ".png" + print(f"Saving to {saveStr}") plt.savefig(saveStr) # plt.show() + def makeLineLegend(): - ''' generates legend to accompany normalized plots - ''' - plt.rcParams["figure.figsize"] = (5.5,0.3) + """generates legend to accompany normalized plots""" + plt.rcParams["figure.figsize"] = (5.5, 0.3) fig = plt.figure() - fullLeg = [lines.Line2D([0], [0], color='black', label='fastest', linestyle='-')] - fullLeg += [lines.Line2D([0], [0], color='black', label='smallest', linestyle='--')] - fullLeg += [lines.Line2D([0], [0], color='blue', label='tsmc28', marker='^')] - fullLeg += [lines.Line2D([0], [0], color='green', label='sky90', marker='o')] - fullLeg += [lines.Line2D([0], [0], color='green', label='sky130', marker='+')] - fullLeg += [lines.Line2D([0], [0], color='red', label='combined', marker='_')] - fig.legend(handles=fullLeg, ncol=5, handlelength=1.4, loc='center') - saveStr = './plots/legend.png' + fullLeg = [lines.Line2D([0], [0], color="black", label="fastest", linestyle="-")] + fullLeg += [lines.Line2D([0], [0], color="black", label="smallest", linestyle="--")] + fullLeg += [lines.Line2D([0], [0], color="blue", label="tsmc28", marker="^")] + fullLeg += [lines.Line2D([0], [0], color="blue", label="tsmc28psyn", marker="x")] + fullLeg += [lines.Line2D([0], [0], color="green", label="sky90", marker="o")] + fullLeg += [lines.Line2D([0], [0], color="purple", label="sky130", marker="+")] + fullLeg += [lines.Line2D([0], [0], color="orange", label="combined", marker="_")] + fig.legend(handles=fullLeg, ncol=5, handlelength=1.4, loc="center") + saveStr = "./plots/legend.png" plt.savefig(saveStr) -def muxPlot(fits='clsgn', norm=True): - ''' module: string module name - freq: int freq (MHz) - var: string delay, area, lpower, or denergy - fits: constant, linear, square, log2, Nlog2 - plots given variable vs width for all matching syntheses with regression - ''' + +def muxPlot(fits="clsgn", norm=True): + """module: string module name + freq: int freq (MHz) + var: string delay, area, lpower, or denergy + fits: constant, linear, square, log2, Nlog2 + plots given variable vs width for all matching syntheses with regression + """ ax = plt.gca() inputs = [2, 4, 8] - allInputs = inputs*2 + allInputs = inputs * 2 fullLeg = [] - for crit in ['data', 'control']: + for crit in ["data", "control"]: allMetrics = [] - muxes = ['mux2', 'mux4', 'mux8'] + muxes = ["mux2", "mux4", "mux8"] - if crit == 'data': - ls = '--' - muxes = [m + 'd' for m in muxes] - elif crit == 'control': - ls = '-' + if crit == "data": + ls = "--" + muxes = [m + "d" for m in muxes] + elif crit == "control": + ls = "-" for spec in techSpecs: metric = [] for module in muxes: - metric += getVals(spec.tech, module, 'delay', width=[1]) - + metric += getVals(spec.tech, module, "delay", width=[1]) + if norm: techdict = spec._asdict() - norm = techdict['delay'] - metric = [m/norm for m in metric] + norm = techdict["delay"] + metric = [m / norm for m in metric] # print(spec.tech, ' ', metric) - if len(metric) == 3: # don't include the spec if we don't have points for all + if ( + len(metric) == 3 + ): # don't include the spec if we don't have points for all xp, pred, coefs, r2 = regress(inputs, metric, fits, ale=False) ax.scatter(inputs, metric, color=spec.color, marker=spec.shape) ax.plot(xp, pred, color=spec.color, linestyle=ls) allMetrics += metric xp, pred, coefs, r2 = regress(allInputs, allMetrics, fits) - ax.plot(xp, pred, color='red', linestyle=ls) - fullLeg += [lines.Line2D([0], [0], color='red', label=crit, linestyle=ls)] - - ax.set_ylabel('Delay (FO4)') + ax.plot(xp, pred, color="red", linestyle=ls) + fullLeg += [lines.Line2D([0], [0], color="red", label=crit, linestyle=ls)] + + ax.set_ylabel("Delay (FO4)") ax.set_xticks(inputs) ax.set_xlabel("Number of inputs") - ax.set_title('mux timing') - - ax.legend(handles = fullLeg) - plt.savefig('./plots/mux.png') + ax.set_title("mux timing") + + ax.legend(handles=fullLeg) + plt.savefig("./plots/mux.png") + def stdDevError(): - ''' calculates std deviation and error for paper-writing purposes - ''' - for var in ['delay', 'area', 'lpower', 'denergy']: + """calculates std deviation and error for paper-writing purposes""" + for var in ["delay", "area", "lpower", "denergy"]: errlist = [] for module in modules: - ale = (var != 'delay') + ale = var != "delay" metL = [] modFit = fitDict[module] fits = modFit[ale] @@ -643,20 +816,20 @@ def stdDevError(): metric = getVals(spec.tech, module, var) techdict = spec._asdict() norm = techdict[var] - metL += [m/norm for m in metric] + metL += [m / norm for m in metric] if ale: - ws = [w/normAddWidth for w in widths] + ws = [w / normAddWidth for w in widths] else: ws = widths - ws = ws*2 + ws = ws * 2 mat = [] for w in ws: row = [] for func in funcArr: row += [func(w)] mat += [row] - + y = np.array(metL, dtype=np.float) coefs = opt.nnls(mat, y)[0] @@ -665,65 +838,83 @@ def stdDevError(): n = [func(w) for func in funcArr] yp += [sum(np.multiply(coefs, n))] - if (var == 'delay') & (module == 'flop'): + if (var == "delay") & (module == "flop"): pass - elif (module == 'mult') & ale: + elif (module == "mult") & ale: pass else: for i in range(len(y)): - errlist += [abs(y[i]/yp[i]-1)] + errlist += [abs(y[i] / yp[i] - 1)] # print(module, ' ', var, ' ', np.mean(errlist[-10:])) - + avgErr = np.mean(errlist) stdv = np.std(errlist) - print(var, ' ', avgErr, ' ', stdv) + print(var, " ", avgErr, " ", stdv) + def makePlotDirectory(): - ''' creates plots directory in same level as this script to store plots in - ''' + """creates plots directory in same level as this script to store plots in""" current_directory = os.getcwd() - final_directory = os.path.join(current_directory, 'plots') + final_directory = os.path.join(current_directory, "plots") if not os.path.exists(final_directory): os.makedirs(final_directory) os.chdir(final_directory) - for folder in ['freqBuckshot', 'normalized', 'unnormalized']: + for folder in ["freqBuckshot", "normalized", "unnormalized"]: new_directory = os.path.join(final_directory, folder) if not os.path.exists(new_directory): os.makedirs(new_directory) os.chdir(new_directory) - if 'freq' in folder: - for tech in ['sky90', 'sky130', 'tsmc28']: + if "freq" in folder: + for tech in ["sky90", "sky130", "tsmc28", "tsmc28psyn"]: for mod in modules: tech_directory = os.path.join(new_directory, tech) mod_directory = os.path.join(tech_directory, mod) if not os.path.exists(mod_directory): os.makedirs(mod_directory) - os.chdir('..') - + os.chdir("..") + os.chdir(current_directory) - -if __name__ == '__main__': + + +if __name__ == "__main__": ############################## # set up stuff, global variables - widths = [8, 16, 32, 64, 128] - modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult'] - normAddWidth = 32 # divisor to use with N since normalizing to add_32 + widths = [8, 16, 32, 64, 128] + modules = ["adder"] - fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 's', 's'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']} fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l'])) + normAddWidth = 32 # divisor to use with N since normalizing to add_32 + + fitDict = { + "adder": ["cg", "l", "l"], + "mul": ["cg", "s", "s"], + "comparator": ["cg", "l", "l"], + "csa": ["c", "l", "l"], + "shifter": ["cg", "l", "ln"], + "flop": ["c", "l", "l"], + "binencoder": ["cg", "l", "l"], + } + fitDict.update(dict.fromkeys(["mux2", "mux4", "mux8"], ["cg", "l", "l"])) TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") - techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['sky130', 'red', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['tsmc28', 'blue', '^', 12.2e-3, 209.286002, 1060.0, .08153281695882594]] + # FO4 delay information information + techSpecs = [ + #["sky90", "green", "o", 43.2e-3, 1440.600027, 714.057, 0.658022690438], + # Area/Lpower/Denergy needs to be corrected here (jes) + ["sky130", "orange", "o", 99.5e-3, 1440.600027, 714.057, 0.658022690438], + # ["tsmc28", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], + # ["tsmc28psyn", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], + ] techSpecs = [TechSpec(*t) for t in techSpecs] - combined = TechSpec('combined fit', 'red', '_', 0, 0, 0, 0) + combined = TechSpec("combined fit", "red", "_", 0, 0, 0, 0) ############################## # cleanup() # run to remove garbage synth runs - synthsintocsv() # slow, run only when new synth runs to add to csv - - allSynths = synthsfromcsv('ppaData.csv') # your csv here! - bestSynths = csvOfBest('bestSynths.csv') + synthsintocsv() # slow, run only when new synth runs to add to csv + + allSynths = synthsfromcsv("ppaData.csv") # your csv here! + bestSynths = csvOfBest("bestSynths.csv") makePlotDirectory() # ### other functions @@ -734,9 +925,10 @@ if __name__ == '__main__': for mod in modules: for w in widths: - freqPlot('sky90', mod, w) - #freqPlot('sky130', mod, w) - #freqPlot('tsmc28', mod, w) - #plotPPA(mod, norm=False) - #plotPPA(mod, aleOpt=True) - plt.close('all') + #freqPlot('sky90', mod, w) + freqPlot("sky130", mod, w) + # freqPlot('tsmc28', mod, w) + # freqPlot('tsmc28psyn', mod, w) + plotPPA(mod, norm=False) + plotPPA(mod, aleOpt=True) + plt.close("all") diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index d9d07c10d..315fa554a 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -12,13 +12,11 @@ from ppaAnalyze import synthsfromcsv def runCommand(module, width, tech, freq): command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq) - print('here we go') - - subprocess.Popen(command, shell=True) + subprocess.call(command, shell=True) def deleteRedundant(synthsToRun): '''removes any previous runs for the current synthesis specifications''' - synthStr = "rm -rf runs/ppa_{}_{}_rv32e_{}nm_{}_*" + synthStr = "rm -rf runs/{}_{}_rv32e_{}_{}_*" for synth in synthsToRun: bashCommand = synthStr.format(*synth) outputCPL = subprocess.check_output(['bash','-c', bashCommand]) @@ -34,8 +32,21 @@ def freqSweep(module, width, tech): synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]] return synthsToRun +def freqModuleSweep(widths, modules, tech): + synthsToRun = [] + arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8] + allSynths = synthsfromcsv('ppa/bestSynths.csv') + for w in widths: + for module in modules: + for synth in allSynths: + if (synth.module == str(module)) & (synth.tech == tech) & (synth.width == w): + f = 1000/synth.delay + for freq in [round(f+f*x/100) for x in arr]: + synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]] + return synthsToRun + def filterRedundant(synthsToRun): - bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" + bashCommand = "find . -path '*runs/*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) specReg = re.compile('[a-zA-Z0-9]+') allSynths = output.decode("utf-8").split('\n')[:-1] @@ -59,21 +70,30 @@ def allCombos(widths, modules, techs, freqs): if __name__ == '__main__': - ##### Run specific syntheses + ##### Run specific syntheses for a specific frequency widths = [8, 16, 32, 64, 128] - modules = ['mult', 'add', 'shiftleft', 'flop', 'comparator', 'priorityencoder', 'add', 'csa', 'mux2', 'mux4', 'mux8'] - techs = ['sky90', 'tsmc28'] + modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8'] + techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn'] freqs = [5000] synthsToRun = allCombos(widths, modules, techs, freqs) ##### Run a sweep based on best delay found in existing syntheses - module = 'add' + module = 'adder' width = 32 - tech = 'sky90' + tech = 'tsmc28psyn' synthsToRun = freqSweep(module, width, tech) + + ##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses + modules = ['adder'] +# widths = [8, 16, 32, 64, 128] + widths = [32] + tech = 'sky130' + synthsToRun = freqModuleSweep(widths, modules, tech) ##### Only do syntheses for which a run doesn't already exist - synthsToRun = filterRedundant(synthsToRun) - + synthsToRun = filterRedundant(synthsToRun) pool = Pool(processes=25) - pool.starmap(runCommand, synthsToRun) + +pool.starmap(runCommand, synthsToRun) +pool.close() +pool.join() \ No newline at end of file diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index ba548869f..cd4d6ff27 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -18,7 +18,6 @@ suppress_message {VER-274} # Enable Multicore set_host_options -max_cores $::env(MAXCORES) - # get outputDir and configDir from environment (Makefile) set outputDir $::env(OUTPUTDIR) set cfg $::env(CONFIGDIR) @@ -26,6 +25,7 @@ set hdl_src "../src" set saifpower $::env(SAIFPOWER) set maxopt $::env(MAXOPT) set drive $::env(DRIVE) +set width $::env(WIDTH) eval file copy -force [glob ${cfg}/*.vh] {$outputDir/hdl/} eval file copy -force [glob ${hdl_src}/cvw.sv] {$outputDir/hdl/} @@ -88,7 +88,13 @@ if { [shell_is_in_topographical_mode] } { #set alib_library_analysis_path ./$outputDir define_design_lib WORK -path ./$outputDir/WORK analyze -f sverilog -lib WORK $my_verilog_files -elaborate $my_toplevel -lib WORK +# If wrapper=0, we want to run against a specific module and pass +# width to DC +if { $wrapper == 1 } { + elaborate $my_toplevel -lib WORK +} else { + elaborate $my_toplevel -lib WORK -parameters WIDTH=$width +} # Set the current_design current_design $my_toplevel @@ -447,4 +453,4 @@ set t2 [clock seconds] set t [expr $t2 - $t1] echo [expr $t/60] -quit \ No newline at end of file +quit diff --git a/synthDC/wallySynthAll.sh b/synthDC/wallySynthAll.sh new file mode 100755 index 000000000..9af40a379 --- /dev/null +++ b/synthDC/wallySynthAll.sh @@ -0,0 +1,14 @@ +# Run all Wally synthesis experiments from chapter 8 +# However, trying to run the freqsweeps at the same time maxes out licenses and some runs fail +#./wallySynth.py --freqsweep 330 --tech sky130 +#./wallySynth.py --freqsweep 870 --tech sky90 +#./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram +./wallySynth.py --configsweep --tech sky130 --targetfreq 330 +./wallySynth.py --configsweep --tech sky90 --targetfreq 870 +./wallySynth.py --configsweep --tech tsmc28psyn --targetfreq 2800 --usesram +./wallySynth.py --featuresweep --tech sky130 --targetfreq 330 +./wallySynth.py --featuresweep --tech sky90 --targetfreq 870 +./wallySynth.py --featuresweep --tech tsmc28psyn --targetfreq 2800 --usesram +# Extract summary data (run this by hand after all experiments finish) +#./extractSummary.py --sky130freq 330 --sky90freq 870 --tsmcfreq 2800 + diff --git a/testbench/testbench-fp.sv b/testbench/testbench-fp.sv index e5f215e07..662036439 100644 --- a/testbench/testbench-fp.sv +++ b/testbench/testbench-fp.sv @@ -115,8 +115,8 @@ module testbenchfp; logic FlushE; logic IFDivStartE; logic FDivDoneE; - logic [P.NE+1:0] QeM; - logic [P.DIVb:0] QmM; + logic [P.NE+1:0] UeM; + logic [P.DIVb:0] UmM; logic [P.XLEN-1:0] FIntDivResultM; logic ResMatch; // Check if result match logic FlagMatch; // Check if IEEE flags match @@ -145,9 +145,12 @@ module testbenchfp; initial begin // Information displayed for user on what is simulating - $display("\nThe start of simulation..."); - $display("This simulation for TEST is %s", TEST); - $display("This simulation for TEST is of the operand size of %s", TEST_SIZE); + //$display("\nThe start of simulation..."); + //$display("This simulation for TEST is %s", TEST); + //$display("This simulation for TEST is of the operand size of %s", TEST_SIZE); + + // $display("FPDUR %d %d DIVN %d LOGR %d RK %d RADIX %d DURLEN %d", FPDUR, DIVN, LOGR, RK, RADIX, DURLEN); + if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion // add the 128-bit cvtint tests to the to-be-tested list @@ -649,7 +652,7 @@ module testbenchfp; string tt0; tt0 = $psprintf("%s", Tests[TestNum]); testname = {pp, tt0}; - $display("Here you are %s", testname); + //$display("Here you are %s", testname); $display("\n\nRunning %s vectors ", Tests[TestNum]); $readmemh(testname, TestVectors); // set the test index to 0 @@ -705,7 +708,7 @@ module testbenchfp; end postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), - .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .OpCtrl(OpCtrlVal), .DivUm(Quot), .DivUe(DivCalcExp), .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss), .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE), .XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE), @@ -734,8 +737,8 @@ module testbenchfp; .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0), - .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .QeM(DivCalcExp), - .QmM(Quot), + .StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp), + .UmM(Quot), .FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M), .Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM), .FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE)); diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 070a6cad7..ece7500d5 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -389,6 +389,7 @@ module testbench; assign SDCCmd = SDCCmdOE ? SDCCmdOut : 1'bz; assign SDCCmdIn = SDCCmd; + assign SDCDat = sd_dat_reg_t ? sd_dat_reg_o : sd_dat_i; assign SDCDatIn = SDCDat; -----/\----- EXCLUDED -----/\----- */ assign SDCIntr = '0;