Merge branch 'main' of github.com:ross144/cvw

This commit is contained in:
Rose Thompson 2023-11-20 11:29:45 -06:00
commit eed6f11df6
64 changed files with 1909 additions and 1057 deletions

5
.gitignore vendored
View File

@ -10,7 +10,7 @@ __pycache__/
addins/riscv-arch-test/Makefile.include
addins/riscv-tests/target
addins/TestFloat-3e/build/Linux-x86_64-GCC/*
benchmarks/embench/wally*.json
#vsim work files to ignore
transcript
@ -175,3 +175,6 @@ tests/fp/combined_IF_vectors/IF_vectors/*.tv
sim/bp-results/*.log
sim/branch*.log
/tests/custom/fpga-test-sdc/bin/fpga-test-sdc
benchmarks/embench/wally*.json
benchmarks/embench/run*
sim/cfi.log

16
.gitmodules vendored
View File

@ -1,16 +1,9 @@
[submodule "sky130/sky130_osu_sc_t12"]
path = sky130/sky130_osu_sc_t12
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/
[submodule "addins/riscv-arch-test"]
path = addins/riscv-arch-test
url = https://github.com/riscv-non-isa/riscv-arch-test
ignore = dirty
[submodule "addins/imperas-riscv-tests"]
path = addins/imperas-riscv-tests
url = https://github.com/riscv-ovpsim/imperas-riscv-tests
[submodule "addins/riscv-tests"]
path = addins/riscv-tests
url = https://github.com/riscv-software-src/riscv-tests
[submodule "addins/riscv-dv"]
path = addins/riscv-dv
url = https://github.com/google/riscv-dv
@ -30,6 +23,9 @@
[submodule "addins/vivado-boards"]
path = addins/vivado-boards
url = https://github.com/Digilent/vivado-boards/
[submodule "addins/vivado-risc-v"]
path = addins/vivado-risc-v
url = https://github.com/eugene-tarassov/vivado-risc-v.git
[submodule "addins/ahbsdc"]
path = addins/ahbsdc
url = git@github.com:jacobpease/ahbsdc.git
[submodule "addins/riscv-arch-test"]
path = addins/riscv-arch-test
url = https://github.com/riscv-non-isa/riscv-arch-test

1
addins/ahbsdc Submodule

@ -0,0 +1 @@
Subproject commit 5df21aa6625eca120e64ea353ca641aff37d90b2

@ -1 +1 @@
Subproject commit 1480febc3ace5f471baeee4b1ae0d8fea16e4762
Subproject commit 4c5eb87983f51ca7fcf7855306877b3d1c3aabf1

@ -1 +1 @@
Subproject commit 197179fdc9dfeeca821e848f373c897a3fdae86c
Subproject commit eb0a3892215ad2384702db02da1551a59701ec67

@ -1 +0,0 @@
Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7

@ -1 +0,0 @@
Subproject commit c76a8613a177b3a04face2cb8e15dd07a8d2fc40

View File

@ -3,6 +3,7 @@
# Compile Embench for Wally
embench_dir = ../../addins/embench-iot
ARCH=rv32imac_zicsr
all: build
run: build size sim
@ -15,7 +16,7 @@ buildsize: build_speedopt_size build_sizeopt_size
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
build_speedopt_speed:
$(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
$(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-O2 -nostartfiles -march=$(ARCH)"
# remove files not used in embench1.0 When changing to 2.0, restore these files
#rm -rf $(embench_dir)/bd_speedopt_speed/src/md5sum
#rm -rf $(embench_dir)/bd_speedopt_speed/src/tarfind
@ -23,7 +24,7 @@ build_speedopt_speed:
find $(embench_dir)/bd_speedopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
build_sizeopt_speed:
$(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-Os -nostartfiles"
$(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-Os -nostartfiles -march=$(ARCH)"
# remove files not used in embench1.0 When changing to 2.0, restore these files
#rm -rf $(embench_dir)/bd_sizeopt_speed/src/md5sum
#rm -rf $(embench_dir)/bd_sizeopt_speed/src/tarfind
@ -32,10 +33,10 @@ build_sizeopt_speed:
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
build_speedopt_size:
$(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-O2 -msave-restore" --dummy-libs="libgcc libm libc crt0"
$(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-O2 -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0"
build_sizeopt_size:
$(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
$(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-Os -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0"
# builds dependencies, then launches modelsim and finally runs python wrapper script to present results
sim: modelsim_build_memfile modelsim_run speed

View File

@ -0,0 +1,87 @@
#!/usr/bin/python3
# embench_arch_sweep.py
# David_Harris@hmc.edu 16 November 2023
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
# Run embench on a variety of architectures and collate results
import os
from datetime import datetime
import re
import collections
#archs = ["rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr", "rv32imafdc_zba_zbb_zbc_zbs_zicsr"]
archs = ["rv32imafdc_zba_zbb_zbc_zbs_zicsr", "rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr"]
def calcgeomean(d, arch):
progs = ["aha-mont64", "crc32", "cubic", "edn", "huffbench", "matmult-int", "minver", "nbody", "nettle-aes", "nettle-sha256", "nsichneu", "picojpeg", "qrduino", "sglib-combined", "slre", "st", "statemate", "ud", "wikisort"]
result = 1.0
for p in progs:
#val = d[arch][p]
val = d[arch].get(p, 1.0)
result = result *float(val)
result = pow(result, (1.0/float(len(progs))))
return result
def tabulate_arch_sweep(directory):
for case in ["wallySizeOpt_size", "wallySpeedOpt_speed"]:
d = collections.defaultdict(dict)
for arch in archs:
file = case+"_"+arch+".json"
file_path = os.path.join(directory, file)
lines = []
try:
f = open(file_path, "r")
lines = f.readlines()
except:
f.close()
#print(file_path+" does not exist")
for line in lines:
#print("File: "+file+" Line: "+line)
#p = re.compile('".*" : .*,')
p = r'"([^"]*)" : ([^,\n]+)'
match = re.search(p, line)
if match:
prog = match.group(1)
result = match.group(2);
d[arch][prog] = result;
#print(match.group(1)+" " + match.group(2))
f.close()
for arch in [""] + archs:
print (arch, end="\t")
print("")
for prog in d[archs[0]]:
print(prog, end="\t")
for arch in archs:
entry = d[arch].get(prog, "n/a");
print (entry, end="\t")
print("")
print("New geo mean", end="\t")
for arch in archs:
geomean = calcgeomean(d, arch)
print(geomean, end="\t")
print("")
def run_arch_sweep():
# make a folder whose name depends on the date
# Get current date
current_date = datetime.now()
# Format date as a string in the format YYYYMMDD
date_string = current_date.strftime('%Y%m%d_%H%M%S')
dir = "run_"+date_string
# Create a directory with the date string as its name
os.mkdir(dir)
# make a directory with the current date as its name
# sweep the runs and save the results in the run directory
for arch in archs:
os.system("make clean")
os.system("make run ARCH="+arch)
for res in ["SizeOpt_size", "SizeOpt_speed", "SpeedOpt_size", "SpeedOpt_speed"]:
os.system("mv -f wally"+res+".json "+dir+"/wally"+res+"_"+arch+".json")
return dir
directory = run_arch_sweep()
#directory = "run_20231117_082325"
tabulate_arch_sweep(directory)

View File

@ -45,7 +45,7 @@ localparam SSTC_SUPPORTED = 1;
localparam ZICBOM_SUPPORTED = 1;
localparam ZICBOZ_SUPPORTED = 1;
localparam ZICBOP_SUPPORTED = 1;
localparam ZICCLSM_SUPPORTED = 0;
localparam ZICCLSM_SUPPORTED = 1;
localparam SVPBMT_SUPPORTED = 1;
localparam SVNAPOT_SUPPORTED = 1;
localparam SVINVAL_SUPPORTED = 1;

View File

@ -74,8 +74,8 @@ localparam ICACHE_LINELENINBITS = 32'd512;
// Integer Divider Configuration
// IDIV_BITSPERCYCLE must be 1, 2, or 4
localparam IDIV_BITSPERCYCLE = 32'd4;
localparam IDIV_ON_FPU = 1;
localparam IDIV_BITSPERCYCLE = 32'd2;
localparam IDIV_ON_FPU = 0;
// Legal number of PMP entries are 0, 16, or 64
localparam PMP_ENTRIES = 32'd16;
@ -169,7 +169,7 @@ localparam ZMMUL_SUPPORTED = 0;
// FPU division architecture
localparam RADIX = 32'd4;
localparam DIVCOPIES = 32'd4;
localparam DIVCOPIES = 32'd2;
// bit manipulation
localparam ZBA_SUPPORTED = 1;

View File

@ -150,7 +150,7 @@ localparam PLIC_SDC_ID = 32'd9;
localparam BPRED_SUPPORTED = 1;
localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
localparam BPRED_NUM_LHR = 32'd6;
localparam BPRED_SIZE = 32'd6;
localparam BPRED_SIZE = 32'd10;
localparam BTB_SIZE = 32'd10;
localparam RAS_SIZE = 32'd16;

View File

@ -93,16 +93,21 @@ localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF);
localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2);
localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS);
// divider r and rk (bits per digit, bits per cycle)
localparam LOGR = $clog2(RADIX); // r = log(R) bits per digit
localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated
// intermediate division parameters not directly used in fdivsqrt hardware
localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right
//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step.
localparam DIVMINb = ((FPDIVMINb<XLEN) & IDIV_ON_FPU) ? XLEN : FPDIVMINb; // minimum fractional bits b = max(XLEN, FPDIVMINb)
localparam RESBITS = DIVMINb + LOGR; // number of bits in a result: r integer + b fractional
// division constants
localparam DIVN = (((NF+2<XLEN) & IDIV_ON_FPU) ? XLEN : NF+2); // standard length of input
localparam LOGR = ($clog2(RADIX)); // r = log(R)
localparam RK = (LOGR*DIVCOPIES); // r*k used for intdiv preproc
localparam LOGRK = ($clog2(RK)); // log2(r*k)
localparam FPDUR = ((DIVN+1+(LOGR*DIVCOPIES))/(LOGR*DIVCOPIES)+(RADIX/4));
localparam DURLEN = ($clog2(FPDUR+1));
localparam DIVb = (FPDUR*LOGR*DIVCOPIES-1); // canonical fdiv size (b)
localparam DIVBLEN = ($clog2(DIVb+1)-1);
localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result
localparam FPDUR = (RESBITS-1)/RK + 1 ; // ceiling((r+b)/rk)
localparam DIVb = FPDUR*RK - LOGR; // divsqrt fractional bits, so total number of bits is a multiple of rk after r integer bits
localparam DURLEN = $clog2(FPDUR); // enough bits to count the duration
localparam DIVBLEN = $clog2(DIVb); // enough bits to count number of fractional bits
// largest length in IEU/FPU
localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF)); // max(XLEN, NF)
@ -110,7 +115,7 @@ localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($uns
localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6)));
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));
localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVN+1+NF) > (3*NF+4) ? (DIVN+1+NF) : (3*NF+4)));
localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4)));
// Disable spurious Verilator warnings

View File

@ -179,13 +179,10 @@ localparam cvw_t P = '{
NORMSHIFTSZ : NORMSHIFTSZ,
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
CORRSHIFTSZ : CORRSHIFTSZ,
DIVN : DIVN,
LOGR : LOGR,
RK : RK,
LOGRK : LOGRK,
FPDUR : FPDUR,
DURLEN : DURLEN,
DIVb : DIVb,
DIVBLEN : DIVBLEN,
DIVa : DIVa
DIVBLEN : DIVBLEN
};

View File

@ -1,131 +1,12 @@
lsu/lsu.sv: logic IEUAdrM
lsu/lsu.sv: logic WriteDataM
lsu/lsu.sv: logic LSUHADDR
lsu/lsu.sv: logic HRDATA
lsu/lsu.sv: logic LSUHWDATA
lsu/lsu.sv: logic LSUHREADY
lsu/lsu.sv: logic LSUHWRITE
lsu/lsu.sv: logic LSUHSIZE
lsu/lsu.sv: logic LSUHBURST
lsu/lsu.sv: logic LSUHTRANS
lsu/lsu.sv: logic LSUHWSTRB
lsu/lsu.sv: logic IHAdrM
ieu/regfile.sv: logic rf
ieu/datapath.sv: logic RegWriteW
hazard/hazard.sv: logic BPPredWrongE
hazard/hazard.sv: logic LoadStallD
hazard/hazard.sv: logic FCvtIntStallD
hazard/hazard.sv: logic DivBusyE
hazard/hazard.sv: logic EcallFaultM
hazard/hazard.sv: logic WFIStallM
hazard/hazard.sv: logic StallF
hazard/hazard.sv: logic FlushD
cache/cachefsm.sv: statetype CurrState
wally/wallypipelinedcore.sv: logic TrapM
wally/wallypipelinedcore.sv: logic SrcAM
wally/wallypipelinedcore.sv: logic InstrM
wally/wallypipelinedcore.sv: logic PCM
wally/wallypipelinedcore.sv: logic MemRWM
wally/wallypipelinedcore.sv: logic TrapM
wally/wallypipelinedcore.sv: logic InstrValidM
wally/wallypipelinedcore.sv: logic WriteDataM
wally/wallypipelinedcore.sv: logic IEUAdrM
wally/wallypipelinedcore.sv: logic HRDATA
ifu/spill.sv: statetype CurrState
ifu/ifu.sv: logic IFUStallF
ifu/ifu.sv: logic IFUHADDR
ifu/ifu.sv: logic HRDATA
ifu/ifu.sv: logic IFUHREADY
ifu/ifu.sv: logic IFUHWRITE
ifu/ifu.sv: logic IFUHSIZE
ifu/ifu.sv: logic IFUHBURST
ifu/ifu.sv: logic IFUHTRANS
ifu/ifu.sv: logic PCF
ifu/ifu.sv: logic PCNextF
ifu/ifu.sv: logic PCPF
ifu/ifu.sv: logic PostSpillInstrRawF
mmu/hptw.sv: logic ITLBWriteF
mmu/hptw.sv: statetype WalkerState
privileged/csrs.sv: logic CSRSReadValM
privileged/csrs.sv: logic SEPC_REGW
privileged/csrs.sv: logic MIP_REGW
privileged/csrs.sv: logic SSCRATCH_REGW
privileged/csrs.sv: logic SCAUSE_REGW
privileged/csr.sv: logic CSRReadValM
privileged/csr.sv: logic CSRSrcM
privileged/csr.sv: logic CSRWriteValM
privileged/csr.sv: logic MSTATUS_REGW
privileged/trap.sv: logic InstrMisalignedFaultM
privileged/trap.sv: logic BreakpointFaultM
privileged/trap.sv: logic LoadAccessFaultM
privileged/trap.sv: logic LoadPageFaultM
privileged/trap.sv: logic mretM
privileged/trap.sv: logic MIP_REGW
privileged/trap.sv: logic PendingIntsM
privileged/privileged.sv: logic CSRReadM
privileged/privileged.sv: logic InterruptM
privileged/csrc.sv: logic HPMCOUNTER_REGW
privileged/csri.sv: logic MExtInt
privileged/csri.sv: logic MIP_REGW_writeabl
privileged/csrm.sv: logic MIP_REGW
privileged/csrm.sv: logic MEPC_REGW
privileged/csrm.sv: logic MEDELEG_REGW
privileged/csrm.sv: logic MIDELEG_REGW
privileged/csrm.sv: logic MSCRATCH_REGW
privileged/csrm.sv: logic MCAUSE_REGW
uncore/uart_apb.sv: logic SIN
uncore/uart_apb.sv: logic SOUT
uncore/uart_apb.sv: logic OUT1b
uncore/uartPC16550D.sv: logic RBR
uncore/uartPC16550D.sv: logic FCR
uncore/uartPC16550D.sv: logic IER
uncore/uartPC16550D.sv: logic MCR
uncore/uartPC16550D.sv: logic baudpulse
uncore/uartPC16550D.sv: statetype rxstate
uncore/uartPC16550D.sv: logic rxfifo
uncore/uartPC16550D.sv: logic txfifo
uncore/uartPC16550D.sv: logic rxfifohead
uncore/uartPC16550D.sv: logic rxfifoentries
uncore/uartPC16550D.sv: logic RXBR
uncore/uartPC16550D.sv: logic rxtimeoutcnt
uncore/uartPC16550D.sv: logic rxparityerr
uncore/uartPC16550D.sv: logic rxdataready
uncore/uartPC16550D.sv: logic rxfifoempty
uncore/uartPC16550D.sv: logic rxdata
uncore/uartPC16550D.sv: logic RXerrbit
uncore/uartPC16550D.sv: logic rxfullbitunwrapped
uncore/uartPC16550D.sv: logic txdata
uncore/uartPC16550D.sv: logic txnextbit
uncore/uartPC16550D.sv: logic txfifoempty
uncore/uartPC16550D.sv: logic fifoenabled
uncore/uartPC16550D.sv: logic RXerr
uncore/uartPC16550D.sv: logic THRE
uncore/uartPC16550D.sv: logic rxdataavailintr
uncore/uartPC16550D.sv: logic intrID
uncore/uncore.sv: logic HSELEXTSDCD
uncore/plic_apb.sv: logic MExtInt
uncore/plic_apb.sv: logic Din
uncore/plic_apb.sv: logic requests
uncore/plic_apb.sv: logic intPriority
uncore/plic_apb.sv: logic intInProgress
uncore/plic_apb.sv: logic intThreshold
uncore/plic_apb.sv: logic intEn
uncore/plic_apb.sv: logic intClaim
uncore/plic_apb.sv: logic irqMatrix
uncore/plic_apb.sv: logic priorities_with_irqs
uncore/plic_apb.sv: logic max_priority_with_irqs
uncore/plic_apb.sv: logic irqs_at_max_priority
uncore/plic_apb.sv: logic threshMask
uncore/clint_apb.sv: logic MTIME
uncore/clint_apb.sv: logic MTIMECMP
ebu/ebu.sv: logic HCLK
ebu/ebu.sv: logic HREADY
ebu/ebu.sv: logic HRESP
ebu/ebu.sv: logic HADDR
ebu/ebu.sv: logic HWRITE
ebu/ebu.sv: logic HSIZE
ebu/ebu.sv: logic HBURST
ebu/ebu.sv: logic HPROT
ebu/ebu.sv: logic HTRANS
ebu/ebu.sv: logic HMASTLOC
ebu/buscachefsm.sv: busstatetype CurrState
ebu/busfsm.sv: busstatetype CurrState
wally/wallypipelinedcore.sv: logic InstrM
lsu/lsu.sv: logic IEUAdrM
lsu/lsu.sv: logic PAdrM
lsu/lsu.sv: logic ReadDataM
lsu/lsu.sv: logic WriteDataM
lsu/lsu.sv: logic MemRWM
mmu/hptw.sv: logic SATP_REGW
privileged/csr.sv: logic MENVCFG_REGW
privileged/csr.sv: logic SENVCFG_REGW

File diff suppressed because one or more lines are too long

View File

@ -42,13 +42,9 @@ if {$board=="ArtyA7"} {
# read in all other rtl
read_verilog -sv [glob -type f ../src/CopiedFiles_do_not_add_to_repo/*/*.sv ../src/CopiedFiles_do_not_add_to_repo/*/*/*.sv]
# *** Once the sdc is updated to use ahb changes these to system verilog.
read_verilog [glob -type f ../src/axi_sdc_controller.v]
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_master.v]
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_serial_host.v]
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_master.v]
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_serial_host.v]
read_verilog [glob -type f ../../addins/ahbsdc/sdc/*.v]
set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/vivado-risc-v/sdc} [current_fileset]
set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/ahbsdc/sdc} [current_fileset]
if {$board=="ArtyA7"} {
add_files -fileset constrs_1 -norecurse ../constraints/constraints-$board.xdc

513
fpga/src/boot.mem Normal file
View File

@ -0,0 +1,513 @@
8001819300002197
4281420141014081
4481440143814301
4681460145814501
4881480147814701
4a814a0149814901
4c814c014b814b01
4e814e014d814d01
0110011b4f814f01
059b45011161016e
0004063705fe0010
1f6000ef8006061b
0ff003930000100f
4e952e3110060e37
c602829b0053f2b7
2023fe02dfe312fd
829b0053f2b7007e
fe02dfe312fdc602
4de31efd000e2023
059bf1402573fdd0
0000061705e20870
0010029b01260613
68110002806702fe
0085179bf0080813
038008130107f7b3
480508a86c632781
1533357902a87963
38030000181700a8
1c6301057833f268
081a403018370808
0105783342280813
1815751308081063
00367513c295e14d
654ded510207e793
c1701ff00613f130
0637c530fff6861b
664dcd10167d0200
17fd001007b7c25c
859b5a5cc20cd21c
02062a23dfed0007
4785fffd561c664d
4501461c06f59063
4a1cc35c465cc31c
e29dc75c4a5cc71c
0c63086008138082
1ae30a9008130105
b7710017e793f905
e793b75901d7e793
5f5c674db7410197
66cd02072e23dffd
fff78513ff7d5698
40a0053300a03533
bfb100a7e7938082
e0a2715d8082557d
e486f052f44ef84a
fa13e85aec56fc26
843289ae892a0086
00959993000a1463
864ac4396b054a85
0009859b4549870a
0004049b05540363
86a66485008b7363
870a87aaec7ff0ef
4531458146014681
f0ef0207c9639c05
17820094979beb1f
873e020541639381
993e99ba020a1963
870aa8094501f85d
e8bff0ef45454685
60a64505fe0559e3
79a2794274e26406
61616b426ae27a02
9301020497138082
f40647057179b7f1
d79867cdec26f022
dff58b85571c674d
2423d35c03600793
fffd571c674d0207
0007a737b00026f3
b00027f311f70713
674dfef77de38f95
4f5ccf9d8b895b1c
26f3cf5c0027e793
071305f5e737b000
8f95b00027f30ff7
4f5c674dfef77de3
b00026f3cf5c9bf5
67f7071300989737
7de38f95b00027f3
458146014681fef7
ddbff0ef4501870a
059346014681870a
dcbff0ef45211aa0
1aa007134782e939
816393d117d24411
85220ff0041302e7
614564e270a27402
46e3da5ff0efa0cd
0207c7634782fe05
458146014681870a
d8bff0ef03700513
46014681870a87aa
0a900513403005b7
4409bf7dfc07d9e3
c3998b8583f9bfe1
4681870a00846413
f0ef450945814601
870afa0540e3d59f
123405b746014681
46e3d45ff0ef450d
870a77c14482f805
85a6460146818cfd
4ae3d2dff0ef451d
d3d8470567cdf605
000f4737b00026f3
b00027f323f70713
67cdfef77de38f95
4681870a0007ae23
0370051385a64601
f2054fe3cf7ff0ef
458146014681870a
ce3ff0ef08600513
4681870af20545e3
4541200005934601
f0055de3ccfff0ef
3023bf010113bf09
4605842a86aa4081
40113423850a4585
86a265a6da5ff0ef
d99ff0ef04084605
2201358322813603
86a2260508700513
d81ff0ef05629e0d
2a0135832a813603
9e0d86a226054505
3603d6bff0ef057e
0513320135833281
9e0d86a226054010
3083d53ff0ef0556
4501400134034081
0000808241010113
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
0000000000000000
00600100d2e3ca40

View File

@ -27,14 +27,6 @@ BINARIES := fw_jump.elf vmlinux busybox
OBJDUMPS := $(foreach name, $(BINARIES), $(basename $(name) .elf))
OBJDUMPS := $(foreach name, $(OBJDUMPS), $(DIS)/$(name).objdump)
define linuxDir =
$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$")
endef
define busyboxDir =
$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$")
endef
.PHONY: all generate disassemble install clean cleanDTB cleanDriver test
all:
@ -46,8 +38,7 @@ all:
# Temp rule for debugging
test:
@echo $(linuxDir)
@echo $(busyboxDir)
echo $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$")
generate: $(DTB) $(IMAGES)
@ -74,11 +65,13 @@ $(DIS)/%.objdump: $(IMAGES)/%.elf
$(DIS)/%.objdump: $(IMAGES)/%
riscv64-unknown-elf-objdump -S $< >> $@
$(IMAGES)/vmlinux: $(call linuxDir)/vmlinux
cp $< $@
$(IMAGES)/vmlinux:
linuxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") ;\
cp $$linuxDir/vmlinux $@ ;\
$(IMAGES)/busybox: $(call busyboxDir)/busybox
cp $< $@
$(IMAGES)/busybox:
busyboxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$") ;\
cp $$busyboxDir/busybox $@ ;\
# Generating new Buildroot directories --------------------------------

View File

@ -31,6 +31,7 @@
status = "okay";
compatible = "riscv";
riscv,isa = "rv64imafdcsu";
riscv,isa-extensions = "imafdc", "sstc", "svinval", "svnapot", "svpbmt", "zba", "zbb", "zbc", "zbs", "zicbom", "zicbop", "zicbopz", "zicntr", "zicsr", "zifencei", "zihpm";
mmu-type = "riscv,sv48";
interrupt-controller {

View File

@ -22,6 +22,9 @@
--override cpu/Zicbom=T
--override cpu/Zicbop=T
--override cpu/Zicboz=T
--override cmomp_bytes=64 # Zic64b
--override cmoz_bytes=64 # Zic64b
--override lr_sc_grain=64 # Za64rs
# 64 KiB continuous huge pages supported
--override cpu/Svpbmt=T
@ -40,7 +43,7 @@
--override cpu/reset_address=0x80000000
--override cpu/unaligned=F
--override cpu/unaligned=T # Zicclsm (should be true)
--override cpu/ignore_non_leaf_DAU=1
--override cpu/wfi_is_nop=T
--override cpu/misa_Extensions_mask=0x0
@ -88,7 +91,7 @@
# Add Imperas simulator application instruction tracing
--verbose
--trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange --traceafter 0
#--trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange --traceafter 300000000
--override cpu/debugflags=6 --override cpu/verbose=1
--override cpu/show_c_prefix=T

View File

@ -7,4 +7,4 @@ export OTHERFLAGS="+TRACE2LOG_ENABLE=1"
#export OTHERFLAGS="+TRACE2LOG_ENABLE=1 +TRACE2LOG_AFTER=10500000"
export OTHERFLAGS=""
vsim -c -do "do wally-linux-imperas.do buildroot buildroot-no-trace $::env(RISCV) 0 0 0"
vsim -c -do "do wally-linux-imperas.do buildroot buildroot-no-trace $::env(RISCV) 0 0 0"

View File

@ -40,6 +40,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
#-- Run the Simulation
#run -all
run 7000 ms
add log -recursive /*
do linux-wave.do
run -all
@ -87,9 +88,10 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
#run 100 ns
#force -deposit testbench/dut/core/priv/priv/csr/csri/IE_REGW 16'h2aa
#force -deposit testbench/dut/uncore/uncore/clint/clint/MTIMECMP 64'h1000
run 7000 ms
add log -recursive /testbench/dut/*
do wave.do
run 14000 ms
#add log -recursive /*
#do linux-wave.do
#run -all
exec ./slack-notifier/slack-notifier.py

View File

@ -271,15 +271,12 @@ typedef struct packed {
int CORRSHIFTSZ;
// division constants
int DIVN ;
int LOGR ;
int RK ;
int LOGRK ;
int FPDUR ;
int DURLEN ;
int DIVb ;
int DIVBLEN ;
int DIVa ;
} cvw_t;

View File

@ -45,8 +45,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
input logic IntDivE, W64E,
output logic DivStickyM,
output logic FDivBusyE, IFDivStartE, FDivDoneE,
output logic [P.NE+1:0] QeM,
output logic [P.DIVb:0] QmM,
output logic [P.NE+1:0] UeM, // Exponent result
output logic [P.DIVb:0] UmM, // Significand result
output logic [P.XLEN-1:0] FIntDivResultM
);
@ -67,17 +67,17 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
// Integer div/rem signals
logic BZeroM; // Denominator is zero
logic IntDivM; // Integer operation
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
logic [P.DIVBLEN-1:0] IntNormShiftM; // Integer normalizatoin shift amount
logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
logic ISpecialCaseE; // Integer div/remainder special cases
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
.FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
// Int-specific
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
.BZeroM, .nM, .mM, .AM,
.BZeroM, .IntNormShiftM, .AM,
.IntDivM, .W64M, .ALTBM, .AsM, .BsM);
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
@ -94,8 +94,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
.QmM, .WZeroE, .DivStickyM,
.UmM, .WZeroE, .DivStickyM,
// Int-specific
.nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
.IntNormShiftM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
.FIntDivResultM);
endmodule

View File

@ -30,13 +30,11 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic IntDivE,
input logic [P.DIVBLEN:0] nE,
input logic [P.DIVBLEN-1:0] IntResultBitsE,
output logic [P.DURLEN-1:0] CyclesE
);
logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits
// DIVN = P.NF+3
// NS = NF + 1
// N = NS or NS+2 for div/sqrt.
logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits
/* verilator lint_off WIDTH */
if (P.FPSIZES == 1)
@ -64,12 +62,21 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
P.Q_FMT: Nf = P.Q_NF;
endcase
// Cycle logic
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
// Integer division needs p fractional + r integer result bits
// FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
// FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle.
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
always_comb begin
if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below?
// if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1
else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
else ResultBitsE = FPResultBitsE;
CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk)
end
/* verilator lint_on WIDTH */

View File

@ -28,16 +28,19 @@
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.FMTBITS-1:0] Fmt,
input logic [P.NE-1:0] Xe, Ye,
input logic [P.NE-1:0] Xe, Ye, // input exponents
input logic Sqrt,
input logic XZero,
input logic [P.DIVBLEN:0] ell, m,
output logic [P.NE+1:0] Qe
input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye
output logic [P.NE+1:0] Ue // result exponent
);
logic [P.NE-2:0] Bias;
logic [P.NE+1:0] SXExp;
logic [P.NE+1:0] SExp;
logic [P.NE+1:0] DExp;
// Determine exponent bias according to the format
if (P.FPSIZES == 1) begin
assign Bias = (P.NE-1)'(P.BIAS);
@ -63,10 +66,14 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
endcase
end
// Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
// correct exponent for subnormal input's normalization shifts
// division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
assign Qe = Sqrt ? SExp : DExp;
// Select square root or division exponent
assign Ue = Sqrt ? SExp : DExp;
endmodule

View File

@ -28,12 +28,12 @@
module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) (
input logic up, uz,
input logic [P.DIVb+3:0] C, U, UM,
output logic [P.DIVb+3:0] F
input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms)
output logic [P.DIVb+3:0] F // Q4.DIVb
);
logic [P.DIVb+3:0] FP, FN, FZ;
logic [P.DIVb+3:0] FP, FN, FZ; // Q4.DIVb
// Generate for both positive and negative bits
// Generate for both positive and negative quotient digits
assign FP = ~(U << 1) & C;
assign FN = (UM << 1) | (C & ~(C << 2));
assign FZ = '0;

View File

@ -27,14 +27,14 @@
////////////////////////////////////////////////////////////////////////////////////////////////
module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) (
input logic [3:0] udigit,
input logic [P.DIVb+3:0] C, U, UM,
output logic [P.DIVb+3:0] F
input logic [3:0] udigit, // {2, 1, -1, -2}; all cold for zero
input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms)
output logic [P.DIVb+3:0] F // Q4.DIVb
);
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2;
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2; // Q4.DIVb
// Generate for both positive and negative bits
assign F2 = (~U << 2) & (C << 2);
// Generate for both positive and negative digits
assign F2 = (~U << 2) & (C << 2); //
assign F1 = ~(U << 1) & C;
assign F0 = '0;
assign FN1 = (UM << 1) | (C & ~(C << 3));

View File

@ -57,7 +57,7 @@ module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) (
// terminate immediately on special cases
assign FSpecialCaseE = XZeroE | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
else assign SpecialCaseE = FSpecialCaseE;
else assign SpecialCaseE = FSpecialCaseE;
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
always_ff @(posedge clk) begin

View File

@ -31,31 +31,31 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
input logic IFDivStartE,
input logic FDivBusyE,
input logic SqrtE,
input logic [P.DIVb+3:0] X, D,
output logic [P.DIVb:0] FirstU, FirstUM,
output logic [P.DIVb+1:0] FirstC,
input logic [P.DIVb+3:0] X, D, // Q4.DIVb
output logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb
output logic [P.DIVb+1:0] FirstC, // Q2.DIVb
output logic Firstun,
output logic [P.DIVb+3:0] FirstWS, FirstWC
output logic [P.DIVb+3:0] FirstWS, FirstWC // Q4.DIVb
);
/* verilator lint_off UNOPTFLAT */
logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b
logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b
logic [P.DIVb+1:0] initC; // Q2.b
logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.DIVb
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.DIVb
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.DIVb
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.DIVb
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.DIVb
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.DIVb
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.DIVb
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.DIVb
logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.DIVb
logic [P.DIVb+1:0] initC; // Q2.DIVb
logic [P.DIVCOPIES-1:0] un;
logic [P.DIVb+3:0] WSN, WCN; // Q4.b
logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b
logic [P.DIVb+1:0] NextC;
logic [P.DIVb:0] UMux, UMMux;
logic [P.DIVb:0] initU, initUM;
logic [P.DIVb+3:0] WSN, WCN; // Q4.DIVb
logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.DIVb
logic [P.DIVb+1:0] NextC; // Q2.DIVb
logic [P.DIVb:0] UMux, UMMux; // U1.DIVb
logic [P.DIVb:0] initU, initUM; // U1.DIVb
/* verilator lint_on UNOPTFLAT */
// Top Muxes and Registers
@ -104,14 +104,14 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
if (P.RADIX == 2) begin: stage
fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end else begin: stage
logic j1;
assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
end
assign WS[i+1] = WSNext[i];
assign WC[i+1] = WCNext[i];

View File

@ -27,25 +27,25 @@
////////////////////////////////////////////////////////////////////////////////////////////////
module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
input logic clk, reset,
input logic StallM,
input logic [P.DIVb+3:0] WS, WC,
input logic [P.DIVb+3:0] D,
input logic [P.DIVb:0] FirstU, FirstUM,
input logic [P.DIVb+1:0] FirstC,
input logic SqrtE,
input logic Firstun, SqrtM, SpecialCaseM,
input logic [P.XLEN-1:0] AM,
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M,
input logic [P.DIVBLEN:0] nM, mM,
output logic [P.DIVb:0] QmM,
output logic WZeroE,
output logic DivStickyM,
output logic [P.XLEN-1:0] FIntDivResultM
input logic clk, reset,
input logic StallM,
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
input logic [P.DIVb+3:0] D, // Q4.DIVb
input logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb
input logic [P.DIVb+1:0] FirstC, // Q2.DIVb
input logic SqrtE,
input logic Firstun, SqrtM, SpecialCaseM,
input logic [P.XLEN-1:0] AM, // U/Q(XLEN.0)
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M,
input logic [P.DIVBLEN-1:0] IntNormShiftM,
output logic [P.DIVb:0] UmM, // U1.DIVb result significand
output logic WZeroE,
output logic DivStickyM,
output logic [P.XLEN-1:0] FIntDivResultM // U/Q(XLEN.0)
);
logic [P.DIVb+3:0] W, Sum;
logic [P.DIVb:0] PreQmM;
logic [P.DIVb:0] PreUmM;
logic NegStickyM;
logic weq0E, WZeroM;
logic [P.XLEN-1:0] IntDivResultM;
@ -86,22 +86,21 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
//////////////////////////
// If the result is not exact, the sticky should be set
assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide
assign DivStickyM = ~WZeroM & ~SpecialCaseM;
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed.
// Determine if sticky bit is negative
assign Sum = WC + WS;
assign NegStickyM = Sum[P.DIVb+3];
mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit
mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM);
// Integer quotient or remainder correctoin, normalization, and special cases
// Integer quotient or remainder correction, normalization, and special cases
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
logic [P.DIVBLEN:0] NormShiftM;
logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
assign W = $signed(Sum) >>> P.LOGR;
assign UnsignedQuotM = {3'b000, PreQmM};
assign UnsignedQuotM = {3'b000, PreUmM};
// Integer remainder: sticky and sign correction muxes
assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
@ -110,9 +109,8 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
// Select quotient or remainder and do normalization shift
mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM);
mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
assign PreIntResultM = $signed(PreResultM >>> NormShiftM);
assign PreIntResultM = $signed(PreResultM >>> IntNormShiftM);
// special case logic
// terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B|
@ -120,7 +118,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
if (BZeroM) begin // Divide by zero
if (RemOpM) IntDivResultM = AM;
else IntDivResultM = {(P.XLEN){1'b1}};
end else if (ALTBM) begin // Numerator is zero
end else if (ALTBM) begin // Numerator is small
if (RemOpM) IntDivResultM = AM;
else IntDivResultM = '0;
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];

View File

@ -29,37 +29,39 @@
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic IFDivStartE,
input logic [P.NF:0] Xm, Ym,
input logic [P.NE-1:0] Xe, Ye,
input logic [P.NF:0] Xm, Ym, // Floating-point significands
input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents
input logic [P.FMTBITS-1:0] FmtE,
input logic SqrtE,
input logic XZeroE,
input logic [2:0] Funct3E,
output logic [P.NE+1:0] QeM,
output logic [P.DIVb+3:0] X, D,
output logic [P.NE+1:0] UeM, // biased exponent of result
output logic [P.DIVb+3:0] X, D, // Q4.DIVb
// Int-specific
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU
input logic IntDivE, W64E,
// Outputs
output logic ISpecialCaseE,
output logic [P.DURLEN-1:0] CyclesE,
output logic [P.DIVBLEN:0] nM, mM,
output logic [P.DIVBLEN-1:0] IntNormShiftM,
output logic ALTBM, IntDivM, W64M,
output logic AsM, BsM, BZeroM,
output logic [P.XLEN-1:0] AM
);
logic [P.DIVb-1:0] Xfract, Dfract;
logic [P.DIVb:0] PreSqrtX;
logic [P.DIVb:0] Xnorm, Dnorm;
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
logic [P.NE+1:0] UeE; // Result Exponent (FP only)
logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs
logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result
logic NumerZeroE; // Numerator is zero (X or A)
logic AZeroE, BZeroE; // A or B is Zero for integer division
logic SignedDivE; // signed division
logic AsE, BsE; // Signs of integer inputs
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
logic ALTBE;
logic ALTBE;
logic EvenExp;
//////////////////////////////////////////////////////
// Integer Preprocessing
@ -89,12 +91,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
// Select integer or floating point inputs
mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX);
mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD);
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
end else begin // Int not supported
assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}};
assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}};
assign NumerZeroE = XZeroE;
end
@ -103,12 +105,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
//////////////////////////////////////////////////////
// count leading zeros for Subnorm FP and to normalize integer inputs
lzc #(P.DIVb) lzcX (IFX, ell);
lzc #(P.DIVb) lzcY (IFD, mE);
lzc #(P.DIVb+1) lzcX (IFX, ell);
lzc #(P.DIVb+1) lzcY (IFD, mE);
// Normalization shift: shift off leading one
assign Xfract = (IFX << ell) << 1;
assign Dfract = (IFD << mE) << 1;
// Normalization shift: shift leading one into most significant bit
assign Xnorm = (IFX << ell);
assign Dnorm = (IFD << mE);
//////////////////////////////////////////////////////
// Integer Right Shift to digit boundary
@ -117,31 +119,28 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
//////////////////////////////////////////////////////
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
logic [P.DIVBLEN:0] ZeroDiff, p;
logic [P.DIVBLEN-1:0] ZeroDiff, p;
// calculate number of fractional bits p
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros)
mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p);
/* verilator lint_off WIDTH */
assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
/* verilator lint_on WIDTH */
// Integer special cases (terminate immediately)
assign ISpecialCaseE = BZeroE | ALTBE;
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
if (P.LOGRK > 0) begin // more than 1 bit per cycle
logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
/* verilator lint_off WIDTH */
assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
// calculate right shift amount RightShiftX to complete in discrete number of steps
if (P.RK > 1) begin // more than 1 bit per cycle
logic [$clog2(P.RK)-1:0] RightShiftX;
/* verilator lint_offf WIDTH */
assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps
/* verilator lint_on WIDTH */
end else begin // radix 2 1 copy doesn't require shifting
assign nE = p;
assign DivXShifted = DivX;
end
end else begin
@ -150,22 +149,53 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
//////////////////////////////////////////////////////
// Floating-Point Preprocessing
// append leading 1 (for nonzero inputs)
// Extend to Q4.b format
// shift square root to be in range [1/4, 1)
// Normalized numbers are shifted right by 1 if the exponent is odd
// Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
// NOTE: there might be a discrepancy that X is never right shifted by 2. However
// it comes out in the wash and gives the right answer. Investigate later if possible.
//////////////////////////////////////////////////////
//////////////////////////////////////////////////////
assign DivX = {3'b000, ~NumerZeroE, Xfract};
assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
// Sqrt is initialized on step one as R(X-1), so depends on Radix
mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
// If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter
// Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2)
// Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
// Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
// Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
// This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4
// Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction.
// This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
// Radix Exponent odd Exponent Even
// 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1)
// 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1)
// Summary: PreSqrtX = r(x/2or4 - 1)
logic [P.DIVb:0] PreSqrtX;
assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; // 2PreSqrtX - 4 = 4(PreSqrtX/2 - 1)
/*
// Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
// This saves one bit in DIVb because there is no initial right shift.
// However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
// That is an optimization for another day.
if (P.RADIX == 2) begin
logic [P.DIVb:0] PreSqrtX; // U1.DIVb
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
end else begin
logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb
mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
end
*/
// Initialize X for division or square root
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
//////////////////////////////////////////////////////
// Selet integer or floating-point operands
//////////////////////////////////////////////////////
@ -176,28 +206,37 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
assign X = PreShiftX;
end
// Divisior register
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
// Divisior register
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
// Floating-point exponent
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Ue(UeE));
flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
// Number of FSM cycles (to FSM)
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
if (P.IDIV_ON_FPU) begin:intpipelineregs
logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
logic RemOpE;
/* verilator lint_off WIDTH */
assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain
assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift
/* verilator lint_on WIDTH */
assign RemOpE = Funct3E[1];
mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
// pipeline registers
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM);
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
if (P.XLEN==64)
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
end
endmodule

View File

@ -29,33 +29,27 @@
/* verilator lint_off UNOPTFLAT */
module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
input logic [P.DIVb+3:0] D, DBar,
input logic [P.DIVb:0] U, UM,
input logic [P.DIVb+3:0] WS, WC,
input logic [P.DIVb+1:0] C,
input logic SqrtE,
output logic un,
output logic [P.DIVb+1:0] CNext,
output logic [P.DIVb:0] UNext, UMNext,
output logic [P.DIVb+3:0] WSNext, WCNext
input logic [P.DIVb+3:0] D, DBar, // Q4.DIVb
input logic [P.DIVb:0] U, UM, // U1.DIVb
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
input logic [P.DIVb+1:0] C, // Q2.DIVb
input logic SqrtE,
output logic un,
output logic [P.DIVb+1:0] CNext, // Q2.DIVb
output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb
output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb
);
/* verilator lint_on UNOPTFLAT */
logic [P.DIVb+3:0] Dsel;
logic up, uz;
logic [P.DIVb+3:0] F;
logic [P.DIVb+3:0] AddIn;
logic [P.DIVb+3:0] WSA, WCA;
logic [P.DIVb+3:0] Dsel; // Q4.DIVb
logic up, uz;
logic [P.DIVb+3:0] F; // Q4.DIVb
logic [P.DIVb+3:0] AddIn; // Q4.DIVb
logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb
// Qmient Selection logic
// Quotient Selection logic
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un);
fdivsqrtuslc2 uslc2(.WS(WS[P.DIVb+3:P.DIVb]), .WC(WC[P.DIVb+3:P.DIVb]), .up, .uz, .un);
// Sqrt F generation. Extend C, U, UM to Q4.k
fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
@ -66,7 +60,7 @@ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
else if (uz) Dsel = '0;
else Dsel = D; // un
// Partial Product Generation
// Residual Update
// WSA, WCA = WS + WC - qD
mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);

View File

@ -27,40 +27,33 @@
////////////////////////////////////////////////////////////////////////////////////////////////
module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
input logic [P.DIVb+3:0] D, DBar, D2, DBar2,
input logic [P.DIVb:0] U,UM,
input logic [P.DIVb+3:0] WS, WC,
input logic [P.DIVb+1:0] C,
input logic SqrtE, j1,
output logic [P.DIVb+1:0] CNext,
output logic un,
output logic [P.DIVb:0] UNext, UMNext,
output logic [P.DIVb+3:0] WSNext, WCNext
input logic [P.DIVb+3:0] D, DBar, D2, DBar2, // Q4.DIVb
input logic [P.DIVb:0] U,UM, // U1.DIVb
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
input logic [P.DIVb+1:0] C, // Q2.DIVb
input logic SqrtE, j1,
output logic [P.DIVb+1:0] CNext, // Q2.DIVb
output logic un,
output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb
output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb
);
logic [P.DIVb+3:0] Dsel;
logic [3:0] udigit;
logic [P.DIVb+3:0] F;
logic [P.DIVb+3:0] AddIn;
logic [4:0] Smsbs;
logic [2:0] Dmsbs;
logic [7:0] WCmsbs, WSmsbs;
logic CarryIn;
logic [P.DIVb+3:0] WSA, WCA;
logic [P.DIVb+3:0] Dsel; // Q4.DIVb
logic [3:0] udigit; // {+2, +1, -1, -2} or 0000 for 0
logic [P.DIVb+3:0] F; // Q4.DIVb
logic [P.DIVb+3:0] AddIn; // Q4.DIVb
logic [4:0] Smsbs; // U1.4
logic [2:0] Dmsbs; // U0.3 drop leading 1 from D
logic [7:0] WCmsbs, WSmsbs; // U4.4
logic CarryIn;
logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb
// Digit Selection logic
// u encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
assign Smsbs = U[P.DIVb:P.DIVb-4];
assign Dmsbs = D[P.DIVb-1:P.DIVb-3];
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4];
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4];
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
assign Smsbs = U[P.DIVb:P.DIVb-4]; // U1.4 most significant bits of square root
assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual
fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
assign un = 1'b0; // unused for radix 4
// F generation logic

View File

@ -31,15 +31,15 @@
///////////////////////////////
module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) (
input logic up, un,
input logic [P.DIVb+1:0] C,
input logic [P.DIVb:0] U, UM,
output logic [P.DIVb:0] UNext, UMNext
input logic [P.DIVb+1:0] C, // Q2.DIVb
input logic [P.DIVb:0] U, UM, // U1.DIVb
output logic [P.DIVb:0] UNext, UMNext // U1.DIVb
);
// The on-the-fly converter transfers the divsqrt
// bits to the quotient as they come.
logic [P.DIVb:0] K;
logic [P.DIVb:0] K; // U1.DIVb one-hot
assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
always_comb begin
if (up) begin

View File

@ -28,15 +28,15 @@
module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) (
input logic [3:0] udigit,
input logic [P.DIVb:0] U, UM,
input logic [P.DIVb:0] C,
output logic [P.DIVb:0] UNext, UMNext
input logic [P.DIVb:0] U, UM, // U1.DIVb
input logic [P.DIVb:0] C, // Q1.DIVb
output logic [P.DIVb:0] UNext, UMNext // U1.DIVb
);
// The on-the-fly converter transfers the square root
// bits to the quotient as they come.
// Use this otfc for division and square root.
logic [P.DIVb:0] K1, K2, K3;
logic [P.DIVb:0] K1, K2, K3; // U1.DIVb
assign K1 = (C&~(C << 1)); // K
assign K2 = ((C << 1)&~(C << 2)); // 2K
assign K3 = (C & ~(C << 2)); // 3K

View File

@ -1,10 +1,10 @@
///////////////////////////////////////////
// fdivsqrtqsel2.sv
// fdivsqrtuslc2.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Radix 2 Quotient Digit Selection
// Purpose: Radix 2 Unified Quotient/Square Root Digit Selection
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
@ -18,7 +18,7 @@
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
// httWS://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
@ -26,31 +26,26 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module fdivsqrtqsel2 (
input logic [3:0] ps, pc,
output logic up, uz, un
module fdivsqrtuslc2 (
input logic [3:0] WS, WC, // Q4.0 most significant bits of redundant residual
output logic up, uz, un // {+1, 0, -1}
);
logic [3:0] p, g;
logic magnitude, sign;
logic sign;
// Carry chain logic determines if W = WS + WC = -1, < -1, > -1 to choose 0, -1, 1 respectively
// The quotient selection logic is presented for simplicity, not
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
//if p2 * p1 * p0, W = -1 and choose digit of 0
assign uz = ((WS[2]^WC[2]) & (WS[1]^WC[1]) &
(WS[0]^WC[0]));
// Quotient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) &
(ps[0]^pc[0]));
assign sign = (ps[3]^pc[3])^
(ps[2] & pc[2] | ((ps[2]^pc[2]) &
(ps[1]&pc[1] | ((ps[1]^pc[1]) &
(ps[0]&pc[0])))));
// Otherwise determine sign using carry chain: sign = p3 ^ g_2:0
assign sign = (WS[3]^WC[3])^
(WS[2] & WC[2] | ((WS[2]^WC[2]) &
(WS[1]&WC[1] | ((WS[1]^WC[1]) &
(WS[0]&WC[0])))));
// Produce digit = +1, 0, or -1
assign up = magnitude & ~sign;
assign uz = ~magnitude;
assign un = magnitude & sign;
assign up = ~uz & ~sign;
assign un = ~uz & sign;
endmodule

View File

@ -1,10 +1,10 @@
///////////////////////////////////////////
// fdivsqrtqsel4.sv
// fdivsqrtuslc4.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Radix 4 Quotient Digit Selection
// Purpose: Table-based Radix 4 Unified Quotient/Square Root Digit Selection
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
@ -26,25 +26,25 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module fdivsqrtqsel4 (
input logic [2:0] Dmsbs,
input logic [4:0] Smsbs,
input logic [7:0] WSmsbs, WCmsbs,
module fdivsqrtuslc4 (
input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1
input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation
input logic [7:0] WSmsbs, WCmsbs, // Q4.4 redundant residual most significant bits
input logic Sqrt, j1,
output logic [3:0] udigit
output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] A;
logic [7:0] PreWmsbs; // Q4.4 nonredundant residual msbs
logic [6:0] Wmsbs; // Q4.3 truncated nonredundant residual
logic [2:0] A; // U0.3 upper bits of D or Smsbs, discarding integer bit
assign PreWmsbs = WCmsbs + WSmsbs;
assign Wmsbs = PreWmsbs[7:1];
assign PreWmsbs = WCmsbs + WSmsbs; // add redundant residual to find msbs
assign Wmsbs = PreWmsbs[7:1]; // truncate least significant bit to Q4.3 to index table
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [3:0] USel4[1023:0];
logic [3:0] USel4[1023:0]; // 1024-bit table indexed with 3 bits of A and 7 bits of Wmsbs
// Prepopulate selection table; this is constant at compile time
always_comb begin
@ -101,10 +101,10 @@ module fdivsqrtqsel4 (
// Select A
always_comb
if (Sqrt) begin
if (j1) A = 3'b101;
else if (Smsbs == 5'b10000) A = 3'b111;
else A = Smsbs[2:0];
end else A = Dmsbs;
if (j1) A = 3'b101; // on first sqrt iteration A = .101
else if (Smsbs == 5'b10000) A = 3'b111; // if S = 1.0, use A = .111
else A = Smsbs[2:0]; // otherwise use A = 2S (in U0.3 format)
end else A = Dmsbs; // division Unless A = D (IN U0.3 format, dropping leading 1)
// Select quotient digit from lookup table based on A and W
assign udigit = USel4[{A,Wmsbs}];

View File

@ -1,10 +1,10 @@
///////////////////////////////////////////
// fdivsqrtqsel4cmp.sv
// fdivsqrtuslc4cmp.sv
//
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
// Modified:13 January 2022
//
// Purpose: Comparator-based Radix 4 Quotient Digit Selection
// Purpose: Comparator-based Radix 4 Unified Quotient/Square Root Digit Selection
//
// Documentation: RISC-V System on Chip Design Chapter 13
//
@ -26,12 +26,12 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module fdivsqrtqsel4cmp (
input logic [2:0] Dmsbs,
input logic [4:0] Smsbs,
input logic [7:0] WSmsbs, WCmsbs,
module fdivsqrtuslc4cmp (
input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1
input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation
input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits
input logic SqrtE, j1,
output logic [3:0] udigit
output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;

View File

@ -133,8 +133,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
// divide signals
logic [P.DIVb:0] QmM; // fdivsqrt signifcand
logic [P.NE+1:0] QeM; // fdivsqrt exponent
logic [P.DIVb:0] UmM; // fdivsqrt signifcand
logic [P.NE+1:0] UeM; // fdivsqrt exponent
logic DivStickyM; // fdivsqrt sticky bit
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
@ -242,8 +242,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
.QmM, .FIntDivResultM);
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM,
.UmM, .FIntDivResultM);
// compare: fmin/fmax, flt/fle/feq
fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
@ -326,9 +326,9 @@ module fpu import cvw::*; #(parameter cvw_t P) (
//////////////////////////////////////////////////////////////////////////////////////////
postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivUm(UmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
.FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM),
.ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM),
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));

View File

@ -27,8 +27,8 @@
////////////////////////////////////////////////////////////////////////////////////////////////
module divshiftcalc import cvw::*; #(parameter cvw_t P) (
input logic [P.DIVb:0] DivQm, // divsqrt significand
input logic [P.NE+1:0] DivQe, // divsqrt exponent
input logic [P.DIVb:0] DivUm, // divsqrt significand
input logic [P.NE+1:0] DivUe, // divsqrt exponent
output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
output logic DivResSubnorm, // is the divsqrt result subnormal
@ -41,23 +41,23 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) (
// is the result subnormal
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]);
assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]);
// if the result is subnormal
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
// 00000000x.xxxxxx... Exp = DivUe
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
// .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivQe+NF+1-1
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe;
// Left shift amount = DivUe+NF+1-1
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe;
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivQe+1
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
// 00000000x.xxxxxx... Exp = DivUe
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivUe+1
// 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after)
// inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF);
@ -68,5 +68,5 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) (
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
// pre-shift the divider result for normalization
assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
endmodule

View File

@ -48,8 +48,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count
//divide signals
input logic DivSticky, // divider sticky bit
input logic [P.NE+1:0] DivQe, // divsqrt exponent
input logic [P.DIVb:0] DivQm, // divsqrt significand
input logic [P.NE+1:0] DivUe, // divsqrt exponent
input logic [P.DIVb:0] DivUm, // divsqrt significand
// conversion signals
input logic CvtCs, // the result's sign
input logic [P.NE:0] CvtCe, // the calculated expoent
@ -91,7 +91,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
// division singals
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift
logic DivByZero; // divide by zero flag
logic DivResSubnorm; // is the divsqrt result subnormal
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
@ -146,7 +146,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
.FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
// select which unit's output to shift
always_comb
@ -174,7 +174,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
// correct for LZA/divsqrt error
shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivUe, .Ue, .FmaSZero, .Shifted, .FmaMe, .Mf);
///////////////////////////////////////////////////////////////////////////////
// Rounding
@ -189,7 +189,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
// calulate result sign used in rounding unit
roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Ue,
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf,
.DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);

View File

@ -39,7 +39,7 @@ module round import cvw::*; #(parameter cvw_t P) (
// divsqrt
input logic DivOp, // is a division opperation being done
input logic DivSticky, // divsqrt sticky bit
input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent
input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent
// cvt
input logic CvtOp, // is a convert opperation being done
input logic ToInt, // is the cvt op a cvt to integer
@ -300,8 +300,8 @@ module round import cvw::*; #(parameter cvw_t P) (
case(PostProcSel)
2'b10: Me = FmaMe; // fma
2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
// 2'b01: Me = DivDone ? Qe : '0; // divide
2'b01: Me = Qe; // divide
// 2'b01: Me = DivDone ? Ue : '0; // divide
2'b01: Me = Ue; // divide
default: Me = '0;
endcase

View File

@ -31,7 +31,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
// divsqrt
input logic DivOp, // is it a divsqrt opperation
input logic DivResSubnorm, // is the divsqrt result subnormal
input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent
input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
//fma
input logic FmaOp, // is it an fma opperation
@ -41,7 +41,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
// output
output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [P.NE+1:0] Qe // corrected exponent for divider
output logic [P.NE+1:0] Ue // corrected exponent for divider
);
logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
@ -61,7 +61,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1));
assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
@ -87,5 +87,5 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1};
assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
endmodule

View File

@ -83,7 +83,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing
always_comb
if (BadNaNBox) begin
// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
end else
PostBox = In;
@ -143,8 +142,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
if (BadNaNBox) begin
case (Fmt)
P.FMT: PostBox = In;
// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]};
P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}};
default: PostBox = 'x;
@ -230,9 +227,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
if (BadNaNBox) begin
case (Fmt)
2'b11: PostBox = In;
// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]};
// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]};
// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]};
2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}};
2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}};
2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}};

View File

@ -33,7 +33,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0)
);
// Core Memory
logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0];
(*rom_style="block" *) logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0];
// dh 10/30/23 ROM macros are presently commented out
// because they don't point to a generated ROM
@ -41,15 +41,23 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0)
rom1p1r_128x64 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
end if ((`USE_SRAM == 1) & (ADDR_WDITH == 7) & (DATA_WIDTH == 32)) begin
rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
end else begin */
always @ (posedge clk)
if(ce) dout <= ROM[addr];
end else begin */
initial begin
if (PRELOAD_ENABLED) begin
$readmemh("$WALLY/fpga/src/boot.mem", ROM, 0);
end
end
always @ (posedge clk) begin
if(ce) dout <= ROM[addr];
end
// for FPGA, initialize with zero-stage bootloader
if(PRELOAD_ENABLED) begin
/*if(PRELOAD_ENABLED) begin
initial begin
ROM[0]=64'h8001819300002197;
ROM[1]=64'h4281420141014081;
@ -195,6 +203,6 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0)
ROM[141]=64'h0000808241010113;
end // if (PRELOAD_ENABLED)
end
end*/
endmodule

View File

@ -26,8 +26,7 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module hazard (
// Detect hazards
module hazard import cvw::*; #(parameter cvw_t P) (
input logic BPWrongE, CSRWriteFenceM, RetM, TrapM,
input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD,
input logic LSUStallM, IFUStallF,

View File

@ -131,7 +131,7 @@ module datapath import cvw::*; #(parameter cvw_t P) (
if (P.F_SUPPORTED) begin:fpmux
mux2 #(P.XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(P.XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
if (P.IDIV_ON_FPU) begin
if (P.IDIV_ON_FPU & P.F_SUPPORTED) begin
mux2 #(P.XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
end else begin
assign MulDivResultW = MDUResultW;

View File

@ -39,7 +39,9 @@ module irom import cvw::*; #(parameter cvw_t P) (
logic [31:0] RawIROMInstrF;
logic [2:1] AdrD;
rom1p1r #(ADDR_WDITH, P.XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull));
// preload IROM with the FPGA bootloader by default so that it syntehsizes to something, avoiding having the IEU optimized away because instructions are all 0
// the testbench replaces these dummy contents with the actual program of interest during simulation
rom1p1r #(ADDR_WDITH, P.XLEN, 1) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull));
if (P.XLEN == 32) assign RawIROMInstrF = IROMInstrFFull;
else begin
// IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two

View File

@ -92,7 +92,8 @@ module lsu import cvw::*; #(parameter cvw_t P) (
input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit
input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit
);
localparam MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED;
localparam logic MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED;
localparam MLEN = MISALIGN_SUPPORT ? 2*P.LLEN : P.LLEN; // widen buffer for misaligned accessess
logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer
logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer
@ -118,9 +119,9 @@ module lsu import cvw::*; #(parameter cvw_t P) (
logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data
/* verilator lint_off WIDTHEXPAND */
logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data
logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] LSUWriteDataSpillM; // Final write data
logic [((MISALIGN_SUPPORT+1)*P.LLEN-1)/8:0] ByteMaskSpillM; // Selects which bytes within a word to write
logic [MLEN-1:0] DCacheReadDataWordM; // D$ read data
logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data
logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write
/* verilator lint_on WIDTHEXPAND */
logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data
logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data

View File

@ -57,7 +57,7 @@ module mdu import cvw::*; #(parameter cvw_t P) (
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
// When IDIV_ON_FPU is set, use the FPU divider instead
// In ZMMUL, with M_SUPPORTED = 0, omit the divider
if ((P.IDIV_ON_FPU) || (!P.M_SUPPORTED)) begin:nodiv
if ((P.IDIV_ON_FPU & P.F_SUPPORTED) || (!P.M_SUPPORTED)) begin:nodiv
assign QuotM = 0;
assign RemM = 0;
assign DivBusyE = 0;

View File

@ -2,10 +2,14 @@
// spi_apb.sv
//
// Written: Naiche Whyte-Aguayo nwhyteaguayo@g.hmc.edu 11/16/2022
//
// Purpose: SPI peripheral
// See FU540-C000-v1.0 for specifications
//
// SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers.
// The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing
// to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output,
// along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY.
// Current limitations: Flash read sequencer mode not implemented, dual and quad mode not supported
//
// A component of the Wally configurable RISC-V project.
//
@ -25,19 +29,6 @@
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
// Current limitations: Flash read sequencer mode not implemented, dual and quad modes untestable with current test plan.
// Attempt to move from >= comparisons by initializing in FSM differently
// Parameterize SynchFIFO
// look at ReadIncrement/WriteIncrement delay necessity
/*
SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers.
The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing
to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output,
along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY.
*/
module spi_apb import cvw::*; #(parameter cvw_t P) (
input logic PCLK, PRESETn,
input logic PSEL,
@ -54,27 +45,27 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
output logic SPIIntr
);
//SPI control registers. Refer to SiFive FU540-C000 manual
// SPI control registers. Refer to SiFive FU540-C000 manual
logic [11:0] SckDiv;
logic [1:0] SckMode;
logic [1:0] ChipSelectID;
logic [3:0] ChipSelectDef;
logic [1:0] ChipSelectMode;
logic [1:0] SckMode;
logic [1:0] ChipSelectID;
logic [3:0] ChipSelectDef;
logic [1:0] ChipSelectMode;
logic [15:0] Delay0, Delay1;
logic [4:0] Format;
logic [7:0] ReceiveData;
logic [2:0] TransmitWatermark, ReceiveWatermark;
logic [8:0] TransmitData;
logic [1:0] InterruptEnable, InterruptPending;
logic [4:0] Format;
logic [7:0] ReceiveData;
logic [2:0] TransmitWatermark, ReceiveWatermark;
logic [8:0] TransmitData;
logic [1:0] InterruptEnable, InterruptPending;
//Bus interface signals
// Bus interface signals
logic [7:0] Entry;
logic Memwrite;
logic [31:0] Din, Dout;
logic TransmitInactive; //High when there is no transmission, used as hardware interlock signal
logic TransmitInactive; // High when there is no transmission, used as hardware interlock signal
//FIFO FSM signals
//Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1]
// FIFO FSM signals
// Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1]
logic TransmitWriteMark, TransmitReadMark, RecieveWriteMark, RecieveReadMark;
logic TransmitFIFOWriteFull, TransmitFIFOReadEmpty;
logic TransmitFIFOReadIncrement;
@ -83,75 +74,68 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
logic ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty;
logic [7:0] TransmitFIFOReadData, ReceiveFIFOWriteData;
logic [2:0] TransmitWriteWatermarkLevel, ReceiveReadWatermarkLevel;
logic [7:0] ReceiveShiftRegEndian; //reverses ReceiveShiftReg if Format[2] set (little endian transmission)
logic [7:0] ReceiveShiftRegEndian; // Reverses ReceiveShiftReg if Format[2] set (little endian transmission)
//Transmission signals
// Transmission signals
logic sck;
logic [11:0] DivCounter; //counter for sck
logic SCLKenable; //flip flop enable high every sclk edge
logic [11:0] DivCounter; // Counter for sck
logic SCLKenable; // Flip flop enable high every sclk edge
//Delay signals
logic [8:0] ImplicitDelay1; //Adds implicit delay to cs-sck delay counter based on phase
logic [8:0] ImplicitDelay2; //Adds implicit delay to sck-cs delay counter based on phase
logic [8:0] CS_SCKCount; //Counter for cs-sck delay
logic [8:0] SCK_CSCount; //Counter for sck-cs delay
logic [8:0] InterCSCount; //Counter for inter cs delay
logic [8:0] InterXFRCount; //Counter for inter xfr delay
logic CS_SCKCompare; //Boolean comparison signal, high when CS_SCKCount >= cs-sck delay
logic SCK_CSCompare; //Boolean comparison signal, high when SCK_CSCount >= sck-cs delay
logic InterCSCompare; //Boolean comparison signal, high when InterCSCount >= inter cs delay
logic InterXFRCompare; //Boolean comparison signal, high when InterXFRCount >= inter xfr delay
logic ZeroDelayHoldMode; //High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
// Delay signals
logic [8:0] ImplicitDelay1; // Adds implicit delay to cs-sck delay counter based on phase
logic [8:0] ImplicitDelay2; // Adds implicit delay to sck-cs delay counter based on phase
logic [8:0] CS_SCKCount; // Counter for cs-sck delay
logic [8:0] SCK_CSCount; // Counter for sck-cs delay
logic [8:0] InterCSCount; // Counter for inter cs delay
logic [8:0] InterXFRCount; // Counter for inter xfr delay
logic ZeroDelayHoldMode; // High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
//Frame counting signals
logic [3:0] FrameCount; //Counter for number of frames in transmission
logic FrameCompare; //Boolean comparison signal, high when FrameCount = Format[7:4]
logic [3:0] ReceivePenultimateFrame; //Frame number - 1
logic [3:0] ReceivePenultimateFrameCount; //Counter
logic ReceivePenultimateFrameBoolean; //High when penultimate frame in transmission has been reached
// Frame counting signals
logic [3:0] FrameCount; // Counter for number of frames in transmission
logic [3:0] ReceivePenultimateFrameCount; // Counter
logic ReceivePenultimateFrame; // High when penultimate frame in transmission has been reached
//State fsm signals
logic Active; //High when state is either Active1 or Active0 (during transmission)
logic Active0; //High when state is Active0
// State fsm signals
logic Active; // High when state is either Active1 or Active0 (during transmission)
logic Active0; // High when state is Active0
//Shift reg signals
logic ShiftEdge; //Determines which edge of sck to shift from TransmitShiftReg
logic [7:0] TransmitShiftReg; //Transmit shift register
logic [7:0] ReceiveShiftReg; //Receive shift register
logic SampleEdge; //Determines which edge of sck to sample from ReceiveShiftReg
logic [7:0] TransmitDataEndian; //Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB
logic TransmitShiftRegLoad; //Determines when to load TransmitShiftReg
logic ReceiveShiftFull; //High when receive shift register is full
logic TransmitShiftEmpty; //High when transmit shift register is empty
logic ShiftIn; //Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST)
logic [3:0] LeftShiftAmount; //Determines left shift amount to left-align data when little endian
logic [7:0] ASR; //AlignedReceiveShiftReg
// Shift reg signals
logic ShiftEdge; // Determines which edge of sck to shift from TransmitShiftReg
logic [7:0] TransmitShiftReg; // Transmit shift register
logic [7:0] ReceiveShiftReg; // Receive shift register
logic SampleEdge; // Determines which edge of sck to sample from ReceiveShiftReg
logic [7:0] TransmitDataEndian; // Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB
logic TransmitShiftRegLoad; // Determines when to load TransmitShiftReg
logic ReceiveShiftFull; // High when receive shift register is full
logic TransmitShiftEmpty; // High when transmit shift register is empty
logic ShiftIn; // Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST)
logic [3:0] LeftShiftAmount; // Determines left shift amount to left-align data when little endian
logic [7:0] ASR; // AlignedReceiveShiftReg
//CS signals
logic [3:0] ChipSelectAuto; //Assigns ChipSelect value to selected CS signal based on CS ID
logic [3:0] ChipSelectInternal; //Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef
logic DelayMode; //Determines where to place implicit half cycle delay based on sck phase for CS assertion
// CS signals
logic [3:0] ChipSelectAuto; // Assigns ChipSelect value to selected CS signal based on CS ID
logic [3:0] ChipSelectInternal; // Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef
logic DelayMode; // Determines where to place implicit half cycle delay based on sck phase for CS assertion
//Miscellaneous signals delayed/early by 1 PCLK cycle
logic ReceiveShiftFullDelay; //Delays ReceiveShiftFull signal by 1 PCLK cycle
logic TransmitFIFOWriteIncrementDelay; //TransmitFIFOWriteIncrement delayed by 1 PCLK cycle
logic ReceiveShiftFullDelayPCLK; //ReceiveShiftFull delayed by 1 PCLK cycle
// Miscellaneous signals delayed/early by 1 PCLK cycle
logic ReceiveShiftFullDelay; // Delays ReceiveShiftFull signal by 1 PCLK cycle
logic ReceiveShiftFullDelayPCLK; // ReceiveShiftFull delayed by 1 PCLK cycle
logic TransmitFIFOReadEmptyDelay;
logic SCLKenableEarly; //SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
logic SCLKenableEarly; // SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
//APB access
assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses
assign Memwrite = PWRITE & PENABLE & PSEL; // only write in access phase
assign PREADY = TransmitInactive; // tie PREADY to transmission for hardware interlock
// APB access
assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses
assign Memwrite = PWRITE & PENABLE & PSEL; // Only write in access phase
assign PREADY = TransmitInactive; // Tie PREADY to transmission for hardware interlock
//Account for subword read/write circuitry
// Account for subword read/write circuitry
// -- Note SPI registers are 32 bits no matter what; access them with LW SW.
assign Din = PWDATA[31:0];
if (P.XLEN == 64) assign PRDATA = {Dout, Dout};
else assign PRDATA = Dout;
//Register access
// Register access
always_ff@(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin
SckDiv <= #1 12'd3;
@ -167,13 +151,12 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
ReceiveWatermark <= #1 3'b0;
InterruptEnable <= #1 2'b0;
InterruptPending <= #1 2'b0;
end else begin //writes
//According to FU540 spec: Once interrupt is pending, it will remain set until number
//of entries in tx/rx fifo is strictly more/less than tx/rxmark
end else begin // writes
/* verilator lint_off CASEINCOMPLETE */
if (Memwrite & TransmitInactive)
case(Entry) //flop to sample inputs
case(Entry) // flop to sample inputs
8'h00: SckDiv <= Din[11:0];
8'h04: SckMode <= Din[1:0];
8'h10: ChipSelectID <= Din[1:0];
@ -188,18 +171,21 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
8'h70: InterruptEnable <= Din[1:0];
endcase
/* verilator lint_off CASEINCOMPLETE */
//interrupt clearance
// According to FU540 spec: Once interrupt is pending, it will remain set until number
// of entries in tx/rx fifo is strictly more/less than tx/rxmark
InterruptPending[0] <= TransmitReadMark;
InterruptPending[1] <= RecieveWriteMark;
case(Entry) // flop to sample inputs
case(Entry) // Flop to sample inputs
8'h00: Dout <= #1 {20'b0, SckDiv};
8'h04: Dout <= #1 {30'b0, SckMode};
8'h10: Dout <= #1 {30'b0, ChipSelectID};
8'h14: Dout <= #1 {28'b0, ChipSelectDef};
8'h18: Dout <= #1 {30'b0, ChipSelectMode};
8'h28: Dout <= {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]};
8'h2C: Dout <= {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]};
8'h40: Dout <= {12'b0, Format[4:1], 13'b0, Format[0], 2'b0};
8'h28: Dout <= #1 {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]};
8'h2C: Dout <= #1 {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]};
8'h40: Dout <= #1 {12'b0, Format[4:1], 13'b0, Format[0], 2'b0};
8'h48: Dout <= #1 {23'b0, TransmitFIFOWriteFull, 8'b0};
8'h4C: Dout <= #1 {23'b0, ReceiveFIFOReadEmpty, ReceiveData[7:0]};
8'h50: Dout <= #1 {29'b0, TransmitWatermark};
@ -210,8 +196,9 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
endcase
end
//SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1))
//Generates a high signal at the rising and falling edge of SCLK by counting from 0 to SckDiv
// SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1))
// Asserts SCLKenable at the rising and falling edge of SCLK by counting from 0 to SckDiv
// Active at 2x SCLK frequency to account for implicit half cycle delays and actions on both clock edges depending on phase
assign SCLKenable = (DivCounter == SckDiv);
assign SCLKenableEarly = ((DivCounter + 12'b1) == SckDiv);
always_ff @(posedge PCLK, negedge PRESETn)
@ -219,44 +206,38 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
else if (SCLKenable) DivCounter <= 0;
else DivCounter <= DivCounter + 12'b1;
//Boolean logic that tracks frame progression
assign FrameCompare = (FrameCount < Format[4:1]);
assign ReceivePenultimateFrameBoolean = ((FrameCount + 4'b0001) == Format[4:1]);
// Asserts when transmission is one frame before complete
assign ReceivePenultimateFrame = ((FrameCount + 4'b0001) == Format[4:1]);
//Computing delays
// Computing delays
// When sckmode.pha = 0, an extra half-period delay is implicit in the cs-sck delay, and vice-versa for sck-cs
assign ImplicitDelay1 = SckMode[0] ? 9'b0 : 9'b1;
assign ImplicitDelay2 = SckMode[0] ? 9'b1 : 9'b0;
assign CS_SCKCompare = CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1);
assign SCK_CSCompare = SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2);
assign InterCSCompare = (InterCSCount >= ({Delay1[7:0],1'b0}));
assign InterXFRCompare = (InterXFRCount >= ({Delay1[15:8], 1'b0}));
// Calculate when tx/rx shift registers are full/empty
TransmitShiftFSM TransmitShiftFSM(PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0, TransmitShiftEmpty);
ReceiveShiftFSM ReceiveShiftFSM(PCLK, PRESETn, SCLKenable, ReceivePenultimateFrame, SampleEdge, SckMode[0], ReceiveShiftFull);
//Calculate when tx/rx shift registers are full/empty
TransmitShiftFSM TransmitShiftFSM_1 (PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0, TransmitShiftEmpty);
ReceiveShiftFSM ReceiveShiftFSM_1 (PCLK, PRESETn, SCLKenable, ReceivePenultimateFrameBoolean, SampleEdge, SckMode[0], ReceiveShiftFull);
//Calculate tx/rx fifo write and recieve increment signals
assign TransmitFIFOWriteIncrement = (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive);
// Calculate tx/rx fifo write and recieve increment signals
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) TransmitFIFOWriteIncrementDelay <= 0;
else TransmitFIFOWriteIncrementDelay <= TransmitFIFOWriteIncrement;
if (~PRESETn) TransmitFIFOWriteIncrement <= 0;
else TransmitFIFOWriteIncrement <= (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive);
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) ReceiveFIFOReadIncrement <= 0;
else ReceiveFIFOReadIncrement <= ((Entry == 8'h4C) & ~ReceiveFIFOReadEmpty & PSEL & ~ReceiveFIFOReadIncrement);
//Tx/Rx FIFOs
SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrementDelay, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0], TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark);
SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel, ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark);
// Tx/Rx FIFOs
SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrement, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0],
TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark);
SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel,
ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark);
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) TransmitFIFOReadEmptyDelay <= 1;
else if (SCLKenable) TransmitFIFOReadEmptyDelay <= TransmitFIFOReadEmpty;
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) ReceiveShiftFullDelay <= 0;
else if (SCLKenable) ReceiveShiftFullDelay <= ReceiveShiftFull;
@ -266,16 +247,16 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
assign TransmitShiftRegLoad = ~TransmitShiftEmpty & ~Active | (((ChipSelectMode == 2'b10) & ~|(Delay1[15:8])) & ((ReceiveShiftFullDelay | ReceiveShiftFull) & ~SampleEdge & ~TransmitFIFOReadEmpty));
//Main FSM which controls SPI transmission
// Main FSM which controls SPI transmission
typedef enum logic [2:0] {CS_INACTIVE, DELAY_0, ACTIVE_0, ACTIVE_1, DELAY_1,INTER_CS, INTER_XFR} statetype;
statetype state;
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) begin state <= CS_INACTIVE;
if (~PRESETn) begin
state <= CS_INACTIVE;
FrameCount <= 4'b0;
/* verilator lint_off CASEINCOMPLETE */
end else if (SCLKenable) begin
/* verilator lint_off CASEINCOMPLETE */
case (state)
CS_INACTIVE: begin
CS_SCKCount <= 9'b1;
@ -288,7 +269,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
end
DELAY_0: begin
CS_SCKCount <= CS_SCKCount + 9'b1;
if (CS_SCKCompare) state <= ACTIVE_0;
if (CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1)) state <= ACTIVE_0;
end
ACTIVE_0: begin
FrameCount <= FrameCount + 4'b1;
@ -296,7 +277,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
end
ACTIVE_1: begin
InterXFRCount <= 9'b1;
if (FrameCompare) state <= ACTIVE_0;
if (FrameCount < Format[4:1]) state <= ACTIVE_0;
else if ((ChipSelectMode[1:0] == 2'b10) & ~|(Delay1[15:8]) & (~TransmitFIFOReadEmpty)) begin
state <= ACTIVE_0;
CS_SCKCount <= 9'b1;
@ -310,11 +291,11 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
end
DELAY_1: begin
SCK_CSCount <= SCK_CSCount + 9'b1;
if (SCK_CSCompare) state <= INTER_CS;
if (SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2)) state <= INTER_CS;
end
INTER_CS: begin
InterCSCount <= InterCSCount + 9'b1;
if (InterCSCompare ) state <= CS_INACTIVE;
if (InterCSCount >= ({Delay1[7:0],1'b0})) state <= CS_INACTIVE;
end
INTER_XFR: begin
CS_SCKCount <= 9'b1;
@ -322,13 +303,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
FrameCount <= 4'b0;
InterCSCount <= 9'b10;
InterXFRCount <= InterXFRCount + 9'b1;
if (InterXFRCompare & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0;
if ((InterXFRCount >= ({Delay1[15:8], 1'b0})) & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0;
else if (~|ChipSelectMode[1:0]) state <= CS_INACTIVE;
end
endcase
/* verilator lint_off CASEINCOMPLETE */
end
/* verilator lint_off CASEINCOMPLETE */
assign DelayMode = SckMode[0] ? (state == DELAY_1) : (state == ACTIVE_1 & ReceiveShiftFull);
assign ChipSelectInternal = (state == CS_INACTIVE | state == INTER_CS | DelayMode & ~|(Delay0[15:8])) ? ChipSelectDef : ~ChipSelectDef;
@ -339,7 +321,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
assign TransmitInactive = ((state == INTER_CS) | (state == CS_INACTIVE) | (state == INTER_XFR) | (ReceiveShiftFullDelayPCLK & ZeroDelayHoldMode));
assign Active0 = (state == ACTIVE_0);
//Signal tracks which edge of sck to shift data
// Signal tracks which edge of sck to shift data
always_comb
case(SckMode[1:0])
2'b00: ShiftEdge = ~sck & SCLKenable;
@ -349,36 +331,36 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
default: ShiftEdge = sck & SCLKenable;
endcase
//Transmit shift register
assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0];
// Transmit shift register
assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0];
always_ff @(posedge PCLK, negedge PRESETn)
if(~PRESETn) TransmitShiftReg <= 8'b0;
else if (TransmitShiftRegLoad) TransmitShiftReg <= TransmitDataEndian;
else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0};
else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0};
assign SPIOut = TransmitShiftReg[7];
//If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn
//There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges
// If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn
// There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges
assign ShiftIn = P.SPI_LOOPBACK_TEST ? SPIOut : SPIIn;
//Receive shift register
// Receive shift register
always_ff @(posedge PCLK, negedge PRESETn)
if(~PRESETn) ReceiveShiftReg <= 8'b0;
else if (SampleEdge & SCLKenable) begin
if (~Active) ReceiveShiftReg <= 8'b0;
else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn};
if (~Active) ReceiveShiftReg <= 8'b0;
else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn};
end
//Aligns received data and reverses if little-endian
// Aligns received data and reverses if little-endian
assign LeftShiftAmount = 4'h8 - Format[4:1];
assign ASR = ReceiveShiftReg << LeftShiftAmount[2:0];
assign ReceiveShiftRegEndian = Format[0] ? {ASR[0], ASR[1], ASR[2], ASR[3], ASR[4], ASR[5], ASR[6], ASR[7]} : ASR[7:0];
//Interrupt logic: raise interrupt if any enabled interrupts are pending
// Interrupt logic: raise interrupt if any enabled interrupts are pending
assign SPIIntr = |(InterruptPending & InterruptEnable);
//Chip select logic
// Chip select logic
always_comb
case(ChipSelectID[1:0])
2'b00: ChipSelectAuto = {ChipSelectDef[3], ChipSelectDef[2], ChipSelectDef[1], ChipSelectInternal[0]};
@ -390,14 +372,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
assign SPICS = ChipSelectMode[0] ? ChipSelectDef : ChipSelectAuto;
endmodule
module SynchFIFO #(parameter M =3 , N= 8)(
input logic PCLK, wen, ren, PRESETn,
input logic winc,rinc,
input logic [N-1:0] wdata,
input logic [M-1:0] wwatermarklevel, rwatermarklevel,
module SynchFIFO #(parameter M=3, N=8)( // 2^M entries of N bits each
input logic PCLK, wen, ren, PRESETn,
input logic winc, rinc,
input logic [N-1:0] wdata,
input logic [M-1:0] wwatermarklevel, rwatermarklevel,
output logic [N-1:0] rdata,
output logic wfull, rempty,
output logic wwatermark, rwatermark);
output logic wfull, rempty,
output logic wwatermark, rwatermark);
/* Pointer FIFO using design elements from "Simulation and Synthesis Techniques
for Asynchronous FIFO Design" by Clifford E. Cummings. Namely, M bit read and write pointers
@ -409,8 +391,6 @@ module SynchFIFO #(parameter M =3 , N= 8)(
logic [N-1:0] mem[2**M];
logic [M:0] rptr, wptr;
logic [M:0] rptrnext, wptrnext;
logic rempty_val;
logic wfull_val;
logic [M-1:0] raddr;
logic [M-1:0] waddr;
@ -428,53 +408,43 @@ module SynchFIFO #(parameter M =3 , N= 8)(
end
else begin
if (wen) begin
wfull <= wfull_val;
wfull <= ({~wptrnext[M], wptrnext[M-1:0]} == rptr);
wptr <= wptrnext;
end
if (ren) begin
rptr <= rptrnext;
rempty <= rempty_val;
rempty <= (wptr == rptrnext);
end
end
assign raddr = rptr[M-1:0];
assign rptrnext = rptr + {3'b0, (rinc & ~rempty)};
assign rempty_val = (wptr == rptrnext);
assign rptrnext = rptr + {{(M){1'b0}}, (rinc & ~rempty)};
assign rwatermark = ((waddr - raddr) < rwatermarklevel) & ~wfull;
assign waddr = wptr[M-1:0];
assign wwatermark = ((waddr - raddr) > wwatermarklevel) | wfull;
assign wptrnext = wptr + {3'b0, (winc & ~wfull)};
assign wfull_val = ({~wptrnext[M], wptrnext[M-1:0]} == rptr);
assign wptrnext = wptr + {{(M){1'b0}}, (winc & ~wfull)};
endmodule
module TransmitShiftFSM(
input logic PCLK, PRESETn,
input logic TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0,
input logic PCLK, PRESETn,
input logic TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0,
output logic TransmitShiftEmpty);
typedef enum logic [1:0] {TransmitShiftEmptyState, TransmitShiftHoldState, TransmitShiftNotEmptyState} statetype;
statetype TransmitState, TransmitNextState;
always_ff @(posedge PCLK, negedge PRESETn)
if (~PRESETn) TransmitState <= TransmitShiftEmptyState;
else TransmitState <= TransmitNextState;
if (~PRESETn) TransmitShiftEmpty <= 1;
else if (TransmitShiftEmpty) begin
if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrame & Active0))) TransmitShiftEmpty <= 1;
else if (~TransmitFIFOReadEmpty) TransmitShiftEmpty <= 0;
end else begin
if (ReceivePenultimateFrame & Active0) TransmitShiftEmpty <= 1;
else TransmitShiftEmpty <= 0;
end
always_comb
case(TransmitState)
TransmitShiftEmptyState: begin
if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrameBoolean & Active0))) TransmitNextState = TransmitShiftEmptyState;
else if (~TransmitFIFOReadEmpty) TransmitNextState = TransmitShiftNotEmptyState;
end
TransmitShiftNotEmptyState: begin
if (ReceivePenultimateFrameBoolean & Active0) TransmitNextState = TransmitShiftEmptyState;
else TransmitNextState = TransmitShiftNotEmptyState;
end
endcase
assign TransmitShiftEmpty = (TransmitNextState == TransmitShiftEmptyState);
endmodule
module ReceiveShiftFSM(
input logic PCLK, PRESETn, SCLKenable,
input logic ReceivePenultimateFrameBoolean, SampleEdge, SckMode,
input logic PCLK, PRESETn, SCLKenable,
input logic ReceivePenultimateFrame, SampleEdge, SckMode,
output logic ReceiveShiftFull
);
typedef enum logic [1:0] {ReceiveShiftFullState, ReceiveShiftNotFullState, ReceiveShiftDelayState} statetype;
@ -484,17 +454,12 @@ module ReceiveShiftFSM(
else if (SCLKenable) begin
case (ReceiveState)
ReceiveShiftFullState: ReceiveState <= ReceiveShiftNotFullState;
ReceiveShiftNotFullState: if (ReceivePenultimateFrameBoolean & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState;
ReceiveShiftNotFullState: if (ReceivePenultimateFrame & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState;
else ReceiveState <= ReceiveShiftNotFullState;
ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState;
ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState;
endcase
end
assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState);
assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState);
endmodule

View File

@ -264,7 +264,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
end
// global stall and flush control
hazard hzu(
hazard #(P) hzu(
.BPWrongE, .CSRWriteFenceM, .RetM, .TrapM,
.LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD,
.LSUStallM, .IFUStallF,

View File

@ -11,7 +11,7 @@ export MOD ?= orig
# title to add a note in the synth's directory name
TITLE =
# tsmc28, sky130, and sky90 presently supported
export TECH ?= sky90
export TECH ?= sky130
# MAXCORES allows parallel compilation, which is faster but less CPU-efficient
# Avoid when doing sweeps of many optimization points in parallel
export MAXCORES ?= 1
@ -20,7 +20,7 @@ export MAXCORES ?= 1
export MAXOPT ?= 0
export DRIVE ?= FLOP
export USESRAM ?= 0
export WIDTH ?= 32
time := $(shell date +%F-%H-%M)
hash := $(shell git rev-parse --short HEAD)
@ -94,10 +94,10 @@ endif
ifneq ($(MOD), orig)
# PMP 0
sed -i 's/PMP_ENTRIES \(64\|16\|0\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh
sed -i 's/PMP_ENTRIES.*\(64\|16\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh
ifneq ($(MOD), PMP0)
# no priv
sed -i 's/ZICSR_SUPPORTED *1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh
sed -i 's/ZICSR_SUPPORTED.*1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh
ifneq ($(MOD), noPriv)
# turn off FPU
sed -i 's/1 *<< *3/0 << 3/' $(CONFIGDIR)/config.vh
@ -147,4 +147,4 @@ clean:
rm -f power.saif
rm -f Synopsys_stack_trace_*.txt
rm -f crte_*.txt

View File

@ -5,7 +5,7 @@ This subdirectory contains synthesis scripts for use with Synopsys
scripts/synth.tcl.
Example Usage
make synth DESIGN=wallypipelinedcore FREQ=500
make synth DESIGN=wallypipelinedcore FREQ=500 CONFIG=rv32e
environment variables
@ -38,5 +38,25 @@ To run ppa analysis that hones into target frequency, you can type:
python3 ppa/ppaSynth.py from the synthDC directory. This runs a sweep
across all modules listed at the bottom of the ppaSynth.py file.
Two options for running the sweep. The first run runs all modules for
all techs around a given frequency (i.e., freqs). The second option
will run all designs for the specific module based on bestSynths.csv
values. Since the second option is 2nd, it has priority. If the
second set of values is commented out, it will run all widths.
WARNING: The first option may runs lots of runs that could expend all
the licenses available for a license. Therefore, care must be taken
to be sure that enough licenses are available for this first option.
##### Run specific syntheses
widths = [8, 16, 32, 64, 128]
modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8']
techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn']
freqs = [5000]
synthsToRun = allCombos(widths, modules, techs, freqs)
##### Run a sweep based on best delay found in existing syntheses
module = 'adder'
width = 32
tech = 'tsmc28psyn'
synthsToRun = freqSweep(module, width, tech)

View File

@ -252,7 +252,7 @@ if __name__ == '__main__':
TechSpec = namedtuple("TechSpec", "color shape targfreq fo4 add32area add32lpower add32denergy")
techdict = {}
techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 1440.600027, 714.057, 0.658023)
techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 2581, 18, 0.685)
techdict['sky90'] = TechSpec('gray', 'o', args.sky90freq, 43.2e-3, 1440.600027, 714.057, 0.658023)
techdict['tsmc28psyn'] = TechSpec('blue', 's', args.tsmcfreq, 12.2e-3, 209.286002, 1060.0, .081533)

View File

@ -1,24 +1,74 @@
Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (nJ)
priorityencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078
priorityencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348
priorityencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111
priorityencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981
priorityencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861
add,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422
add,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417
add,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014
add,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874
add,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755
binencoder,sky130,8,1000,1.0000,50.960001,24.761,0.010685929975270078
binencoder,sky130,16,1000,1.0000,136.220003,77.243,0.021773774467348
binencoder,sky130,32,1000,1.0000,372.400007,189.626,0.04371111111111111
binencoder,sky130,64,1000,1.0000,797.720015,382.205,0.07393850658857981
binencoder,sky130,128,900,1.1111,1602.300031,610.009,0.1261366969785861
adder,sky130,8,1700,0.588235,253.820005,154.438,0.10825587752870422
adder,sky130,16,1300,0.7692307,722.260013,485.109,0.32460910944935417
adder,sky130,32,1100,0.90909,1440.600027,714.057,0.6580226904376014
adder,sky130,64,950,1.0526315,2781.240054,1050.0,0.9392239364188874
adder,sky130,128,900,1.1111,6186.740118,2230.0,2.1480106100795755
csa,sky130,8,1000,1.0000,266.560005,154.202,0.13650573115665163
csa,sky130,16,1000,1.0000,533.12001,308.404,0.27263530601922104
csa,sky130,32,1000,1.0000,1066.240021,616.808,0.5448072247308093
csa,sky130,64,1000,1.0000,2132.480042,1230.0,1.0905412240768841
csa,sky130,128,1000,1.0000,4264.960083,2470.0,2.178553363682347
shifter,sky130,8,1000,1.0000,259.700005,196.451,0.07534088282874972
shifter,sky130,16,1000,1.0000,666.400006,558.433,0.19552906110283155
shifter,sky130,32,1000,1.0000,1475.880027,768.262,0.3807431082700759
shifter,sky130,64,1000,1.0000,3914.120062,2680.0,1.144802541988198
shifter,sky130,128,900,1.1111,9192.400136,6080.0,2.9008914525432616
comparator,sky130,8,1700,0.588235,200.900004,136.6,0.05001033271337053
comparator,sky130,16,1500,0.6666667,358.680007,189.253,0.06321553011448482
comparator,sky130,32,1300,0.7692307,690.900013,315.709,0.10771793448084398
comparator,sky130,64,1200,0.8333333,1372.980026,508.393,0.2048577820389901
comparator,sky130,128,1150,0.869565,2744.980052,796.047,0.34396273737011823
flop,sky130,8,1000,1.0000,133.279999,64.8145,0.193835
flop,sky130,16,1000,1.0000,266.5599975,129.629,0.38715000000000005
flop,sky130,32,1000,1.0000,533.119995,259.258,0.7723000000000001
flop,sky130,64,1000,1.0000,1066.23999,520.0,1.54955
flop,sky130,128,1000,1.0000,2132.4799805,1035.0,3.094
mux2,sky130,8,1000,1.0000,63.700001,21.541,0.01932440083034535
mux2,sky130,16,1000,1.0000,119.560002,32.354,0.03884536082474227
mux2,sky130,32,1000,1.0000,375.340008,259.372,0.13671796921846893
mux2,sky130,64,1000,1.0000,479.220009,115.22,0.15148539160324087
mux2,sky130,128,1000,1.0000,1302.420025,767.078,0.4665334665334665
mux4,sky130,8,1000,1.0000,148.960002,66.984,0.04026661024121879
mux4,sky130,16,1000,1.0000,392.0,398.313,0.1037037037037037
mux4,sky130,32,1000,1.0000,594.860011,331.197,0.131617289946576
mux4,sky130,64,1000,1.0000,899.640016,344.331,0.2862533692722372
mux4,sky130,128,1000,1.0000,2013.900038,818.249,0.6094182825484764
mux8,sky130,8,1000,1.0000,287.140006,116.648,0.06089260808926081
mux8,sky130,16,1000,1.0000,582.120003,282.366,0.14455681142177274
mux8,sky130,32,1000,1.0000,1319.079995,670.683,0.35777218376337316
mux8,sky130,64,1000,1.0000,2132.48004,808.482,0.44287680660701995
mux8,sky130,128,1000,1.0000,4575.620089,1830.0,0.9786276715410572
mul,sky130,8,1000,1.0000,2194.220041,1440.0,1.421374045801527
mul,sky130,16,1000,1.0000,7519.540137,4940.0,6.376128385155466
mul,sky130,32,1000,1.0000,25200.700446,14900.0,24.931847968545217
mul,sky130,64,1000,1.0000,86011.661365,42600.0,88.84651898734177
mul,sky130,128,800,1.2500,296198.144128,114000.0,273.3148854961832
binencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078
binencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348
binencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111
binencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981
binencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861
adder,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422
adder,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417
adder,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014
adder,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874
adder,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755
csa,sky90,8,5758,0.16536141368530738,266.560005,154.202,0.13650573115665163
csa,sky90,16,5931,0.1654056314280897,533.12001,308.404,0.27263530601922104
csa,sky90,32,5758,0.16536141368530738,1066.240021,616.808,0.5448072247308093
csa,sky90,64,5931,0.1654056314280897,2132.480042,1230.0,1.0905412240768841
csa,sky90,128,5931,0.1654056314280897,4264.960083,2470.0,2.178553363682347
shiftleft,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972
shiftleft,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155
shiftleft,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759
shiftleft,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198
shiftleft,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616
shifter,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972
shifter,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155
shifter,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759
shifter,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198
shifter,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616
comparator,sky90,8,4839,0.20629126741062204,200.900004,136.6,0.05001033271337053
comparator,sky90,16,4018,0.24806303982080635,358.680007,189.253,0.06321553011448482
comparator,sky90,32,3602,0.276293542476402,690.900013,315.709,0.10771793448084398
@ -44,31 +94,31 @@ mux8,sky90,16,3362,0.295237998810232,582.120003,282.366,0.14455681142177274
mux8,sky90,32,3178,0.3140553102580239,1319.079995,670.683,0.35777218376337316
mux8,sky90,64,2906,0.3440756228492774,2132.48004,808.482,0.44287680660701995
mux8,sky90,128,2667,0.3749401308586427,4575.620089,1830.0,0.9786276715410572
mult,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527
mult,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466
mult,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217
mult,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177
mult,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832
priorityencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
priorityencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
priorityencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
priorityencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
priorityencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
add,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
add,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
add,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
add,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
add,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731
mul,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527
mul,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466
mul,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217
mul,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177
mul,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832
binencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
binencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
binencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
binencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
binencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
adder,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
adder,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
adder,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
adder,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
adder,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731
csa,tsmc28,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921
csa,tsmc28,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842
csa,tsmc28,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941
csa,tsmc28,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076
csa,tsmc28,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363
shiftleft,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
shiftleft,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
shiftleft,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
shiftleft,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
shiftleft,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
shifter,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
shifter,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
shifter,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
shifter,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
shifter,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
comparator,tsmc28,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243
comparator,tsmc28,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673
comparator,tsmc28,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319
@ -94,8 +144,58 @@ mux8,tsmc28,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262
mux8,tsmc28,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814
mux8,tsmc28,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495
mux8,tsmc28,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426
mult,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
mult,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
mult,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
mult,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
mult,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719
mul,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
mul,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
mul,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
mul,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
mul,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719
binencoder,tsmc28psyn,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
binencoder,tsmc28psyn,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
binencoder,tsmc28psyn,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
binencoder,tsmc28psyn,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
binencoder,tsmc28psyn,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
adder,tsmc28psyn,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
adder,tsmc28psyn,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
adder,tsmc28psyn,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
adder,tsmc28psyn,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
adder,tsmc28psyn,128,7000,0.142857142857,907.452008,4360.0,0.3451183029643731
csa,tsmc28psyn,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921
csa,tsmc28psyn,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842
csa,tsmc28psyn,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941
csa,tsmc28psyn,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076
csa,tsmc28psyn,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363
shifter,tsmc28psyn,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
shifter,tsmc28psyn,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
shifter,tsmc28psyn,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
shifter,tsmc28psyn,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
shifter,tsmc28psyn,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
comparator,tsmc28psyn,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243
comparator,tsmc28psyn,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673
comparator,tsmc28psyn,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319
comparator,tsmc28psyn,64,11080,0.09024670758122744,294.21,1250.0,0.0684115523465704
comparator,tsmc28psyn,128,9371,0.10671119720414043,558.432,2400.0,0.12794792444776437
flop,tsmc28psyn,8,10,0.048889000000002625,15.12,78.6345,0.027246000000000003
flop,tsmc28psyn,16,10,0.048889000000002625,30.24,157.29,0.054290000000000005
flop,tsmc28psyn,32,10,0.048889000000002625,60.4799995,314.5805,0.10908000000000001
flop,tsmc28psyn,64,10,0.048889000000002625,120.959999,630.0,0.21765500000000004
flop,tsmc28psyn,128,10,0.048889000000002625,241.919998,1260.0,0.43579999999999997
mux2,tsmc28psyn,8,29614,0.03374481252110488,16.758,114.564,0.005436617815897886
mux2,tsmc28psyn,16,18767,0.053046021580433735,15.75,88.025,0.005142004582511856
mux2,tsmc28psyn,32,17903,0.05585556035301346,32.130001,171.146,0.009897782494553985
mux2,tsmc28psyn,64,18568,0.05371109651012495,91.35,523.884,0.027574321413183972
mux2,tsmc28psyn,128,16637,0.05991099044298852,176.525999,941.106,0.05012923002945243
mux4,tsmc28psyn,8,18151,0.055092383284667513,27.971999,133.963,0.008032615282904523
mux4,tsmc28psyn,16,16486,0.06057952759917506,39.438,186.231,0.012556108213029236
mux4,tsmc28psyn,32,15196,0.06580579126085812,69.174,324.969,0.023229797315082915
mux4,tsmc28psyn,64,13926,0.07180612868016659,137.465999,648.086,0.04574177796926612
mux4,tsmc28psyn,128,13090,0.07636619404125286,294.335997,1420.0,0.09358288770053477
mux8,tsmc28psyn,8,12902,0.07750336319950395,44.604,214.286,0.0117501162610448
mux8,tsmc28psyn,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262
mux8,tsmc28psyn,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814
mux8,tsmc28psyn,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495
mux8,tsmc28psyn,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426
mul,tsmc28psyn,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
mul,tsmc28psyn,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
mul,tsmc28psyn,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
mul,tsmc28psyn,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
mul,tsmc28psyn,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719

1 Module Tech Width Target Freq Delay Area L Power (nW) D energy (nJ)
2 priorityencoder binencoder sky90 sky130 8 7683 1000 0.12508649056358195 1.0000 50.960001 24.761 0.010685929975270078
3 priorityencoder binencoder sky90 sky130 16 5773 1000 0.16977016282695304 1.0000 136.220003 77.243 0.021773774467348
4 priorityencoder binencoder sky90 sky130 32 4500 1000 0.2218912222222222 1.0000 372.400007 189.626 0.04371111111111111
5 priorityencoder binencoder sky90 sky130 64 4098 1000 0.2439914738897023 1.0000 797.720015 382.205 0.07393850658857981
6 priorityencoder binencoder sky90 sky130 128 3409 900 0.2933331557641537 1.1111 1602.300031 610.009 0.1261366969785861
7 add adder sky90 sky130 8 3658 1700 0.27337042810278844 0.588235 253.820005 154.438 0.10825587752870422
8 add adder sky90 sky130 16 2942 1300 0.3393218266485384 0.7692307 722.260013 485.109 0.32460910944935417
9 add adder sky90 sky130 32 2468 1100 0.40496338573743923 0.90909 1440.600027 714.057 0.6580226904376014
10 add adder sky90 sky130 64 2139 950 0.4674681813931744 1.0526315 2781.240054 1050.0 0.9392239364188874
11 add adder sky90 sky130 128 1885 900 0.5304949787798409 1.1111 6186.740118 2230.0 2.1480106100795755
12 csa sky130 8 1000 1.0000 266.560005 154.202 0.13650573115665163
13 csa sky130 16 1000 1.0000 533.12001 308.404 0.27263530601922104
14 csa sky130 32 1000 1.0000 1066.240021 616.808 0.5448072247308093
15 csa sky130 64 1000 1.0000 2132.480042 1230.0 1.0905412240768841
16 csa sky130 128 1000 1.0000 4264.960083 2470.0 2.178553363682347
17 shifter sky130 8 1000 1.0000 259.700005 196.451 0.07534088282874972
18 shifter sky130 16 1000 1.0000 666.400006 558.433 0.19552906110283155
19 shifter sky130 32 1000 1.0000 1475.880027 768.262 0.3807431082700759
20 shifter sky130 64 1000 1.0000 3914.120062 2680.0 1.144802541988198
21 shifter sky130 128 900 1.1111 9192.400136 6080.0 2.9008914525432616
22 comparator sky130 8 1700 0.588235 200.900004 136.6 0.05001033271337053
23 comparator sky130 16 1500 0.6666667 358.680007 189.253 0.06321553011448482
24 comparator sky130 32 1300 0.7692307 690.900013 315.709 0.10771793448084398
25 comparator sky130 64 1200 0.8333333 1372.980026 508.393 0.2048577820389901
26 comparator sky130 128 1150 0.869565 2744.980052 796.047 0.34396273737011823
27 flop sky130 8 1000 1.0000 133.279999 64.8145 0.193835
28 flop sky130 16 1000 1.0000 266.5599975 129.629 0.38715000000000005
29 flop sky130 32 1000 1.0000 533.119995 259.258 0.7723000000000001
30 flop sky130 64 1000 1.0000 1066.23999 520.0 1.54955
31 flop sky130 128 1000 1.0000 2132.4799805 1035.0 3.094
32 mux2 sky130 8 1000 1.0000 63.700001 21.541 0.01932440083034535
33 mux2 sky130 16 1000 1.0000 119.560002 32.354 0.03884536082474227
34 mux2 sky130 32 1000 1.0000 375.340008 259.372 0.13671796921846893
35 mux2 sky130 64 1000 1.0000 479.220009 115.22 0.15148539160324087
36 mux2 sky130 128 1000 1.0000 1302.420025 767.078 0.4665334665334665
37 mux4 sky130 8 1000 1.0000 148.960002 66.984 0.04026661024121879
38 mux4 sky130 16 1000 1.0000 392.0 398.313 0.1037037037037037
39 mux4 sky130 32 1000 1.0000 594.860011 331.197 0.131617289946576
40 mux4 sky130 64 1000 1.0000 899.640016 344.331 0.2862533692722372
41 mux4 sky130 128 1000 1.0000 2013.900038 818.249 0.6094182825484764
42 mux8 sky130 8 1000 1.0000 287.140006 116.648 0.06089260808926081
43 mux8 sky130 16 1000 1.0000 582.120003 282.366 0.14455681142177274
44 mux8 sky130 32 1000 1.0000 1319.079995 670.683 0.35777218376337316
45 mux8 sky130 64 1000 1.0000 2132.48004 808.482 0.44287680660701995
46 mux8 sky130 128 1000 1.0000 4575.620089 1830.0 0.9786276715410572
47 mul sky130 8 1000 1.0000 2194.220041 1440.0 1.421374045801527
48 mul sky130 16 1000 1.0000 7519.540137 4940.0 6.376128385155466
49 mul sky130 32 1000 1.0000 25200.700446 14900.0 24.931847968545217
50 mul sky130 64 1000 1.0000 86011.661365 42600.0 88.84651898734177
51 mul sky130 128 800 1.2500 296198.144128 114000.0 273.3148854961832
52 binencoder sky90 8 7683 0.12508649056358195 50.960001 24.761 0.010685929975270078
53 binencoder sky90 16 5773 0.16977016282695304 136.220003 77.243 0.021773774467348
54 binencoder sky90 32 4500 0.2218912222222222 372.400007 189.626 0.04371111111111111
55 binencoder sky90 64 4098 0.2439914738897023 797.720015 382.205 0.07393850658857981
56 binencoder sky90 128 3409 0.2933331557641537 1602.300031 610.009 0.1261366969785861
57 adder sky90 8 3658 0.27337042810278844 253.820005 154.438 0.10825587752870422
58 adder sky90 16 2942 0.3393218266485384 722.260013 485.109 0.32460910944935417
59 adder sky90 32 2468 0.40496338573743923 1440.600027 714.057 0.6580226904376014
60 adder sky90 64 2139 0.4674681813931744 2781.240054 1050.0 0.9392239364188874
61 adder sky90 128 1885 0.5304949787798409 6186.740118 2230.0 2.1480106100795755
62 csa sky90 8 5758 0.16536141368530738 266.560005 154.202 0.13650573115665163
63 csa sky90 16 5931 0.1654056314280897 533.12001 308.404 0.27263530601922104
64 csa sky90 32 5758 0.16536141368530738 1066.240021 616.808 0.5448072247308093
65 csa sky90 64 5931 0.1654056314280897 2132.480042 1230.0 1.0905412240768841
66 csa sky90 128 5931 0.1654056314280897 4264.960083 2470.0 2.178553363682347
67 shiftleft shifter sky90 8 4327 0.23025600254217704 259.700005 196.451 0.07534088282874972
68 shiftleft shifter sky90 16 3355 0.29803959314456036 666.400006 558.433 0.19552906110283155
69 shiftleft shifter sky90 32 2503 0.39951757530962845 1475.880027 768.262 0.3807431082700759
70 shiftleft shifter sky90 64 2203 0.45385946391284615 3914.120062 2680.0 1.144802541988198
71 shiftleft shifter sky90 128 1907 0.5242938489774515 9192.400136 6080.0 2.9008914525432616
72 comparator sky90 8 4839 0.20629126741062204 200.900004 136.6 0.05001033271337053
73 comparator sky90 16 4018 0.24806303982080635 358.680007 189.253 0.06321553011448482
74 comparator sky90 32 3602 0.276293542476402 690.900013 315.709 0.10771793448084398
94 mux8 sky90 32 3178 0.3140553102580239 1319.079995 670.683 0.35777218376337316
95 mux8 sky90 64 2906 0.3440756228492774 2132.48004 808.482 0.44287680660701995
96 mux8 sky90 128 2667 0.3749401308586427 4575.620089 1830.0 0.9786276715410572
97 mult mul sky90 8 1310 0.7631557786259543 2194.220041 1440.0 1.421374045801527
98 mult mul sky90 16 997 1.0029260270812437 7519.540137 4940.0 6.376128385155466
99 mult mul sky90 32 763 1.3106129895150722 25200.700446 14900.0 24.931847968545217
100 mult mul sky90 64 632 1.5822664810126583 86011.661365 42600.0 88.84651898734177
101 mult mul sky90 128 524 1.9083759465648855 296198.144128 114000.0 273.3148854961832
102 priorityencoder binencoder tsmc28 8 31335 0.031912196106590074 8.316 34.836 0.001716929950534546
103 priorityencoder binencoder tsmc28 16 21253 0.04703118086858326 21.672 78.026 0.004008845810003294
104 priorityencoder binencoder tsmc28 32 16464 0.06071258114674442 61.614 207.499 0.009323372206025266
105 priorityencoder binencoder tsmc28 64 13804 0.07239877021153289 137.466 425.592 0.01847290640394089
106 priorityencoder binencoder tsmc28 128 11440 0.0874065874125874 317.646 973.649 0.041171328671328666
107 add adder tsmc28 8 13838 0.07207477814713109 34.272 187.089 0.013311172134701546
108 add adder tsmc28 16 11521 0.08678002100512108 90.972001 475.207 0.03367763214998698
109 add adder tsmc28 32 9812 0.1018860211985324 209.286002 1060.0 0.08153281695882594
110 add adder tsmc28 64 8206 0.12185605215695831 388.836003 1770.0 0.1409943943456008
111 add adder tsmc28 128 7354 0.13597341881968997 907.452008 4360.0 0.3451183029643731
112 csa tsmc28 8 24524 0.040663382319360626 52.416 482.462 0.02173381177621921
113 csa tsmc28 16 24524 0.040663382319360626 104.832 964.99 0.04346762355243842
114 csa tsmc28 32 24524 0.040663382319360626 209.664 1930.0 0.08677214157559941
115 csa tsmc28 64 24524 0.040663382319360626 419.327999 3860.0 0.17342195400424076
116 csa tsmc28 128 24524 0.040663382319360626 838.655998 7720.0 0.3471701190670363
117 shiftleft shifter tsmc28 8 15202 0.0656078183133798 50.652 367.074 0.016991185370346006
118 shiftleft shifter tsmc28 16 11804 0.08465604506946797 127.511999 602.29 0.03388681802778719
119 shiftleft shifter tsmc28 32 9587 0.10430391697089808 384.803997 1940.0 0.10180452696359654
120 shiftleft shifter tsmc28 64 8272 0.12086674854932303 1041.263998 5460.0 0.2895309477756286
121 shiftleft shifter tsmc28 128 7023 0.14238329232521713 1836.953994 8670.0 0.566566994162039
122 comparator tsmc28 8 17422 0.05733769130983814 35.784 170.595 0.009488003673516243
123 comparator tsmc28 16 13736 0.07273839778683751 54.558 250.167 0.014349155503785673
124 comparator tsmc28 32 12139 0.08236710865804432 145.782 622.975 0.03567015404893319
144 mux8 tsmc28 32 11713 0.08517122410996329 172.115999 823.633 0.046956373260479814
145 mux8 tsmc28 64 11014 0.09067453550027238 304.163999 1460.0 0.08498274922825495
146 mux8 tsmc28 128 10474 0.09542350830628223 683.045996 2820.0 0.15705556616383426
147 mult mul tsmc28 8 5200 0.1922996923076923 577.206 4340.0 0.37769230769230766
148 mult mul tsmc28 16 3819 0.26184265147944485 1634.472002 11800.0 1.4553548049227547
149 mult mul tsmc28 32 3033 0.3295775611605671 6343.721998 47200.0 6.303330036267723
150 mult mul tsmc28 64 2390 0.4184090418410042 16045.092071 109000.0 18.54602510460251
151 mult mul tsmc28 128 1868 0.5353279057815846 44272.49428 262000.0 50.01177730192719
152 binencoder tsmc28psyn 8 31335 0.031912196106590074 8.316 34.836 0.001716929950534546
153 binencoder tsmc28psyn 16 21253 0.04703118086858326 21.672 78.026 0.004008845810003294
154 binencoder tsmc28psyn 32 16464 0.06071258114674442 61.614 207.499 0.009323372206025266
155 binencoder tsmc28psyn 64 13804 0.07239877021153289 137.466 425.592 0.01847290640394089
156 binencoder tsmc28psyn 128 11440 0.0874065874125874 317.646 973.649 0.041171328671328666
157 adder tsmc28psyn 8 13838 0.07207477814713109 34.272 187.089 0.013311172134701546
158 adder tsmc28psyn 16 11521 0.08678002100512108 90.972001 475.207 0.03367763214998698
159 adder tsmc28psyn 32 9812 0.1018860211985324 209.286002 1060.0 0.08153281695882594
160 adder tsmc28psyn 64 8206 0.12185605215695831 388.836003 1770.0 0.1409943943456008
161 adder tsmc28psyn 128 7000 0.142857142857 907.452008 4360.0 0.3451183029643731
162 csa tsmc28psyn 8 24524 0.040663382319360626 52.416 482.462 0.02173381177621921
163 csa tsmc28psyn 16 24524 0.040663382319360626 104.832 964.99 0.04346762355243842
164 csa tsmc28psyn 32 24524 0.040663382319360626 209.664 1930.0 0.08677214157559941
165 csa tsmc28psyn 64 24524 0.040663382319360626 419.327999 3860.0 0.17342195400424076
166 csa tsmc28psyn 128 24524 0.040663382319360626 838.655998 7720.0 0.3471701190670363
167 shifter tsmc28psyn 8 15202 0.0656078183133798 50.652 367.074 0.016991185370346006
168 shifter tsmc28psyn 16 11804 0.08465604506946797 127.511999 602.29 0.03388681802778719
169 shifter tsmc28psyn 32 9587 0.10430391697089808 384.803997 1940.0 0.10180452696359654
170 shifter tsmc28psyn 64 8272 0.12086674854932303 1041.263998 5460.0 0.2895309477756286
171 shifter tsmc28psyn 128 7023 0.14238329232521713 1836.953994 8670.0 0.566566994162039
172 comparator tsmc28psyn 8 17422 0.05733769130983814 35.784 170.595 0.009488003673516243
173 comparator tsmc28psyn 16 13736 0.07273839778683751 54.558 250.167 0.014349155503785673
174 comparator tsmc28psyn 32 12139 0.08236710865804432 145.782 622.975 0.03567015404893319
175 comparator tsmc28psyn 64 11080 0.09024670758122744 294.21 1250.0 0.0684115523465704
176 comparator tsmc28psyn 128 9371 0.10671119720414043 558.432 2400.0 0.12794792444776437
177 flop tsmc28psyn 8 10 0.048889000000002625 15.12 78.6345 0.027246000000000003
178 flop tsmc28psyn 16 10 0.048889000000002625 30.24 157.29 0.054290000000000005
179 flop tsmc28psyn 32 10 0.048889000000002625 60.4799995 314.5805 0.10908000000000001
180 flop tsmc28psyn 64 10 0.048889000000002625 120.959999 630.0 0.21765500000000004
181 flop tsmc28psyn 128 10 0.048889000000002625 241.919998 1260.0 0.43579999999999997
182 mux2 tsmc28psyn 8 29614 0.03374481252110488 16.758 114.564 0.005436617815897886
183 mux2 tsmc28psyn 16 18767 0.053046021580433735 15.75 88.025 0.005142004582511856
184 mux2 tsmc28psyn 32 17903 0.05585556035301346 32.130001 171.146 0.009897782494553985
185 mux2 tsmc28psyn 64 18568 0.05371109651012495 91.35 523.884 0.027574321413183972
186 mux2 tsmc28psyn 128 16637 0.05991099044298852 176.525999 941.106 0.05012923002945243
187 mux4 tsmc28psyn 8 18151 0.055092383284667513 27.971999 133.963 0.008032615282904523
188 mux4 tsmc28psyn 16 16486 0.06057952759917506 39.438 186.231 0.012556108213029236
189 mux4 tsmc28psyn 32 15196 0.06580579126085812 69.174 324.969 0.023229797315082915
190 mux4 tsmc28psyn 64 13926 0.07180612868016659 137.465999 648.086 0.04574177796926612
191 mux4 tsmc28psyn 128 13090 0.07636619404125286 294.335997 1420.0 0.09358288770053477
192 mux8 tsmc28psyn 8 12902 0.07750336319950395 44.604 214.286 0.0117501162610448
193 mux8 tsmc28psyn 16 12264 0.08147446510110894 128.771998 548.714 0.02666340508806262
194 mux8 tsmc28psyn 32 11713 0.08517122410996329 172.115999 823.633 0.046956373260479814
195 mux8 tsmc28psyn 64 11014 0.09067453550027238 304.163999 1460.0 0.08498274922825495
196 mux8 tsmc28psyn 128 10474 0.09542350830628223 683.045996 2820.0 0.15705556616383426
197 mul tsmc28psyn 8 5200 0.1922996923076923 577.206 4340.0 0.37769230769230766
198 mul tsmc28psyn 16 3819 0.26184265147944485 1634.472002 11800.0 1.4553548049227547
199 mul tsmc28psyn 32 3033 0.3295775611605671 6343.721998 47200.0 6.303330036267723
200 mul tsmc28psyn 64 2390 0.4184090418410042 16045.092071 109000.0 18.54602510460251
201 mul tsmc28psyn 128 1868 0.5353279057815846 44272.49428 262000.0 50.01177730192719

File diff suppressed because it is too large Load Diff

View File

@ -12,13 +12,11 @@ from ppaAnalyze import synthsfromcsv
def runCommand(module, width, tech, freq):
command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq)
print('here we go')
subprocess.Popen(command, shell=True)
subprocess.call(command, shell=True)
def deleteRedundant(synthsToRun):
'''removes any previous runs for the current synthesis specifications'''
synthStr = "rm -rf runs/ppa_{}_{}_rv32e_{}nm_{}_*"
synthStr = "rm -rf runs/{}_{}_rv32e_{}_{}_*"
for synth in synthsToRun:
bashCommand = synthStr.format(*synth)
outputCPL = subprocess.check_output(['bash','-c', bashCommand])
@ -34,8 +32,21 @@ def freqSweep(module, width, tech):
synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]]
return synthsToRun
def freqModuleSweep(widths, modules, tech):
synthsToRun = []
arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8]
allSynths = synthsfromcsv('ppa/bestSynths.csv')
for w in widths:
for module in modules:
for synth in allSynths:
if (synth.module == str(module)) & (synth.tech == tech) & (synth.width == w):
f = 1000/synth.delay
for freq in [round(f+f*x/100) for x in arr]:
synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]]
return synthsToRun
def filterRedundant(synthsToRun):
bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
bashCommand = "find . -path '*runs/*' -prune"
output = subprocess.check_output(['bash','-c', bashCommand])
specReg = re.compile('[a-zA-Z0-9]+')
allSynths = output.decode("utf-8").split('\n')[:-1]
@ -59,21 +70,30 @@ def allCombos(widths, modules, techs, freqs):
if __name__ == '__main__':
##### Run specific syntheses
##### Run specific syntheses for a specific frequency
widths = [8, 16, 32, 64, 128]
modules = ['mult', 'add', 'shiftleft', 'flop', 'comparator', 'priorityencoder', 'add', 'csa', 'mux2', 'mux4', 'mux8']
techs = ['sky90', 'tsmc28']
modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8']
techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn']
freqs = [5000]
synthsToRun = allCombos(widths, modules, techs, freqs)
##### Run a sweep based on best delay found in existing syntheses
module = 'add'
module = 'adder'
width = 32
tech = 'sky90'
tech = 'tsmc28psyn'
synthsToRun = freqSweep(module, width, tech)
##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses
modules = ['adder']
# widths = [8, 16, 32, 64, 128]
widths = [32]
tech = 'sky130'
synthsToRun = freqModuleSweep(widths, modules, tech)
##### Only do syntheses for which a run doesn't already exist
synthsToRun = filterRedundant(synthsToRun)
synthsToRun = filterRedundant(synthsToRun)
pool = Pool(processes=25)
pool.starmap(runCommand, synthsToRun)
pool.starmap(runCommand, synthsToRun)
pool.close()
pool.join()

View File

@ -18,7 +18,6 @@ suppress_message {VER-274}
# Enable Multicore
set_host_options -max_cores $::env(MAXCORES)
# get outputDir and configDir from environment (Makefile)
set outputDir $::env(OUTPUTDIR)
set cfg $::env(CONFIGDIR)
@ -26,6 +25,7 @@ set hdl_src "../src"
set saifpower $::env(SAIFPOWER)
set maxopt $::env(MAXOPT)
set drive $::env(DRIVE)
set width $::env(WIDTH)
eval file copy -force [glob ${cfg}/*.vh] {$outputDir/hdl/}
eval file copy -force [glob ${hdl_src}/cvw.sv] {$outputDir/hdl/}
@ -88,7 +88,13 @@ if { [shell_is_in_topographical_mode] } {
#set alib_library_analysis_path ./$outputDir
define_design_lib WORK -path ./$outputDir/WORK
analyze -f sverilog -lib WORK $my_verilog_files
elaborate $my_toplevel -lib WORK
# If wrapper=0, we want to run against a specific module and pass
# width to DC
if { $wrapper == 1 } {
elaborate $my_toplevel -lib WORK
} else {
elaborate $my_toplevel -lib WORK -parameters WIDTH=$width
}
# Set the current_design
current_design $my_toplevel
@ -447,4 +453,4 @@ set t2 [clock seconds]
set t [expr $t2 - $t1]
echo [expr $t/60]
quit
quit

14
synthDC/wallySynthAll.sh Executable file
View File

@ -0,0 +1,14 @@
# Run all Wally synthesis experiments from chapter 8
# However, trying to run the freqsweeps at the same time maxes out licenses and some runs fail
#./wallySynth.py --freqsweep 330 --tech sky130
#./wallySynth.py --freqsweep 870 --tech sky90
#./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram
./wallySynth.py --configsweep --tech sky130 --targetfreq 330
./wallySynth.py --configsweep --tech sky90 --targetfreq 870
./wallySynth.py --configsweep --tech tsmc28psyn --targetfreq 2800 --usesram
./wallySynth.py --featuresweep --tech sky130 --targetfreq 330
./wallySynth.py --featuresweep --tech sky90 --targetfreq 870
./wallySynth.py --featuresweep --tech tsmc28psyn --targetfreq 2800 --usesram
# Extract summary data (run this by hand after all experiments finish)
#./extractSummary.py --sky130freq 330 --sky90freq 870 --tsmcfreq 2800

View File

@ -115,8 +115,8 @@ module testbenchfp;
logic FlushE;
logic IFDivStartE;
logic FDivDoneE;
logic [P.NE+1:0] QeM;
logic [P.DIVb:0] QmM;
logic [P.NE+1:0] UeM;
logic [P.DIVb:0] UmM;
logic [P.XLEN-1:0] FIntDivResultM;
logic ResMatch; // Check if result match
logic FlagMatch; // Check if IEEE flags match
@ -145,9 +145,12 @@ module testbenchfp;
initial begin
// Information displayed for user on what is simulating
$display("\nThe start of simulation...");
$display("This simulation for TEST is %s", TEST);
$display("This simulation for TEST is of the operand size of %s", TEST_SIZE);
//$display("\nThe start of simulation...");
//$display("This simulation for TEST is %s", TEST);
//$display("This simulation for TEST is of the operand size of %s", TEST_SIZE);
// $display("FPDUR %d %d DIVN %d LOGR %d RK %d RADIX %d DURLEN %d", FPDUR, DIVN, LOGR, RK, RADIX, DURLEN);
if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported
if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion
// add the 128-bit cvtint tests to the to-be-tested list
@ -649,7 +652,7 @@ module testbenchfp;
string tt0;
tt0 = $psprintf("%s", Tests[TestNum]);
testname = {pp, tt0};
$display("Here you are %s", testname);
//$display("Here you are %s", testname);
$display("\n\nRunning %s vectors ", Tests[TestNum]);
$readmemh(testname, TestVectors);
// set the test index to 0
@ -705,7 +708,7 @@ module testbenchfp;
end
postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
.OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
.OpCtrl(OpCtrlVal), .DivUm(Quot), .DivUe(DivCalcExp),
.Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
.XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
@ -734,8 +737,8 @@ module testbenchfp;
.XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero),
.XNaNE(XNaN), .YNaNE(YNaN),
.FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
.StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .QeM(DivCalcExp),
.QmM(Quot),
.StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp),
.UmM(Quot),
.FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
.Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
.FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));

View File

@ -389,6 +389,7 @@ module testbench;
assign SDCCmd = SDCCmdOE ? SDCCmdOut : 1'bz;
assign SDCCmdIn = SDCCmd;
assign SDCDat = sd_dat_reg_t ? sd_dat_reg_o : sd_dat_i;
assign SDCDatIn = SDCDat;
-----/\----- EXCLUDED -----/\----- */
assign SDCIntr = '0;