mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-03 10:15:19 +00:00
Merge branch 'main' of github.com:ross144/cvw
This commit is contained in:
commit
eed6f11df6
5
.gitignore
vendored
5
.gitignore
vendored
@ -10,7 +10,7 @@ __pycache__/
|
||||
addins/riscv-arch-test/Makefile.include
|
||||
addins/riscv-tests/target
|
||||
addins/TestFloat-3e/build/Linux-x86_64-GCC/*
|
||||
benchmarks/embench/wally*.json
|
||||
|
||||
|
||||
#vsim work files to ignore
|
||||
transcript
|
||||
@ -175,3 +175,6 @@ tests/fp/combined_IF_vectors/IF_vectors/*.tv
|
||||
sim/bp-results/*.log
|
||||
sim/branch*.log
|
||||
/tests/custom/fpga-test-sdc/bin/fpga-test-sdc
|
||||
benchmarks/embench/wally*.json
|
||||
benchmarks/embench/run*
|
||||
sim/cfi.log
|
||||
|
16
.gitmodules
vendored
16
.gitmodules
vendored
@ -1,16 +1,9 @@
|
||||
[submodule "sky130/sky130_osu_sc_t12"]
|
||||
path = sky130/sky130_osu_sc_t12
|
||||
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/
|
||||
[submodule "addins/riscv-arch-test"]
|
||||
path = addins/riscv-arch-test
|
||||
url = https://github.com/riscv-non-isa/riscv-arch-test
|
||||
ignore = dirty
|
||||
[submodule "addins/imperas-riscv-tests"]
|
||||
path = addins/imperas-riscv-tests
|
||||
url = https://github.com/riscv-ovpsim/imperas-riscv-tests
|
||||
[submodule "addins/riscv-tests"]
|
||||
path = addins/riscv-tests
|
||||
url = https://github.com/riscv-software-src/riscv-tests
|
||||
[submodule "addins/riscv-dv"]
|
||||
path = addins/riscv-dv
|
||||
url = https://github.com/google/riscv-dv
|
||||
@ -30,6 +23,9 @@
|
||||
[submodule "addins/vivado-boards"]
|
||||
path = addins/vivado-boards
|
||||
url = https://github.com/Digilent/vivado-boards/
|
||||
[submodule "addins/vivado-risc-v"]
|
||||
path = addins/vivado-risc-v
|
||||
url = https://github.com/eugene-tarassov/vivado-risc-v.git
|
||||
[submodule "addins/ahbsdc"]
|
||||
path = addins/ahbsdc
|
||||
url = git@github.com:jacobpease/ahbsdc.git
|
||||
[submodule "addins/riscv-arch-test"]
|
||||
path = addins/riscv-arch-test
|
||||
url = https://github.com/riscv-non-isa/riscv-arch-test
|
||||
|
1
addins/ahbsdc
Submodule
1
addins/ahbsdc
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 5df21aa6625eca120e64ea353ca641aff37d90b2
|
@ -1 +1 @@
|
||||
Subproject commit 1480febc3ace5f471baeee4b1ae0d8fea16e4762
|
||||
Subproject commit 4c5eb87983f51ca7fcf7855306877b3d1c3aabf1
|
@ -1 +1 @@
|
||||
Subproject commit 197179fdc9dfeeca821e848f373c897a3fdae86c
|
||||
Subproject commit eb0a3892215ad2384702db02da1551a59701ec67
|
@ -1 +0,0 @@
|
||||
Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7
|
@ -1 +0,0 @@
|
||||
Subproject commit c76a8613a177b3a04face2cb8e15dd07a8d2fc40
|
@ -3,6 +3,7 @@
|
||||
# Compile Embench for Wally
|
||||
|
||||
embench_dir = ../../addins/embench-iot
|
||||
ARCH=rv32imac_zicsr
|
||||
|
||||
all: build
|
||||
run: build size sim
|
||||
@ -15,7 +16,7 @@ buildsize: build_speedopt_size build_sizeopt_size
|
||||
|
||||
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
|
||||
build_speedopt_speed:
|
||||
$(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
|
||||
$(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-O2 -nostartfiles -march=$(ARCH)"
|
||||
# remove files not used in embench1.0 When changing to 2.0, restore these files
|
||||
#rm -rf $(embench_dir)/bd_speedopt_speed/src/md5sum
|
||||
#rm -rf $(embench_dir)/bd_speedopt_speed/src/tarfind
|
||||
@ -23,7 +24,7 @@ build_speedopt_speed:
|
||||
find $(embench_dir)/bd_speedopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
|
||||
|
||||
build_sizeopt_speed:
|
||||
$(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-Os -nostartfiles"
|
||||
$(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-Os -nostartfiles -march=$(ARCH)"
|
||||
# remove files not used in embench1.0 When changing to 2.0, restore these files
|
||||
#rm -rf $(embench_dir)/bd_sizeopt_speed/src/md5sum
|
||||
#rm -rf $(embench_dir)/bd_sizeopt_speed/src/tarfind
|
||||
@ -32,10 +33,10 @@ build_sizeopt_speed:
|
||||
|
||||
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
|
||||
build_speedopt_size:
|
||||
$(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-O2 -msave-restore" --dummy-libs="libgcc libm libc crt0"
|
||||
$(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-O2 -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0"
|
||||
|
||||
build_sizeopt_size:
|
||||
$(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
|
||||
$(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-Os -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0"
|
||||
|
||||
# builds dependencies, then launches modelsim and finally runs python wrapper script to present results
|
||||
sim: modelsim_build_memfile modelsim_run speed
|
||||
|
87
benchmarks/embench/embench_arch_sweep.py
Executable file
87
benchmarks/embench/embench_arch_sweep.py
Executable file
@ -0,0 +1,87 @@
|
||||
#!/usr/bin/python3
|
||||
# embench_arch_sweep.py
|
||||
# David_Harris@hmc.edu 16 November 2023
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
|
||||
# Run embench on a variety of architectures and collate results
|
||||
|
||||
import os
|
||||
from datetime import datetime
|
||||
import re
|
||||
import collections
|
||||
|
||||
#archs = ["rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr", "rv32imafdc_zba_zbb_zbc_zbs_zicsr"]
|
||||
archs = ["rv32imafdc_zba_zbb_zbc_zbs_zicsr", "rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr"]
|
||||
|
||||
def calcgeomean(d, arch):
|
||||
progs = ["aha-mont64", "crc32", "cubic", "edn", "huffbench", "matmult-int", "minver", "nbody", "nettle-aes", "nettle-sha256", "nsichneu", "picojpeg", "qrduino", "sglib-combined", "slre", "st", "statemate", "ud", "wikisort"]
|
||||
result = 1.0
|
||||
for p in progs:
|
||||
#val = d[arch][p]
|
||||
val = d[arch].get(p, 1.0)
|
||||
result = result *float(val)
|
||||
result = pow(result, (1.0/float(len(progs))))
|
||||
return result
|
||||
|
||||
def tabulate_arch_sweep(directory):
|
||||
for case in ["wallySizeOpt_size", "wallySpeedOpt_speed"]:
|
||||
d = collections.defaultdict(dict)
|
||||
for arch in archs:
|
||||
file = case+"_"+arch+".json"
|
||||
file_path = os.path.join(directory, file)
|
||||
lines = []
|
||||
try:
|
||||
f = open(file_path, "r")
|
||||
lines = f.readlines()
|
||||
except:
|
||||
f.close()
|
||||
#print(file_path+" does not exist")
|
||||
for line in lines:
|
||||
#print("File: "+file+" Line: "+line)
|
||||
#p = re.compile('".*" : .*,')
|
||||
p = r'"([^"]*)" : ([^,\n]+)'
|
||||
match = re.search(p, line)
|
||||
if match:
|
||||
prog = match.group(1)
|
||||
result = match.group(2);
|
||||
d[arch][prog] = result;
|
||||
#print(match.group(1)+" " + match.group(2))
|
||||
f.close()
|
||||
for arch in [""] + archs:
|
||||
print (arch, end="\t")
|
||||
print("")
|
||||
for prog in d[archs[0]]:
|
||||
print(prog, end="\t")
|
||||
for arch in archs:
|
||||
entry = d[arch].get(prog, "n/a");
|
||||
print (entry, end="\t")
|
||||
print("")
|
||||
print("New geo mean", end="\t")
|
||||
for arch in archs:
|
||||
geomean = calcgeomean(d, arch)
|
||||
print(geomean, end="\t")
|
||||
print("")
|
||||
|
||||
def run_arch_sweep():
|
||||
# make a folder whose name depends on the date
|
||||
# Get current date
|
||||
current_date = datetime.now()
|
||||
# Format date as a string in the format YYYYMMDD
|
||||
date_string = current_date.strftime('%Y%m%d_%H%M%S')
|
||||
dir = "run_"+date_string
|
||||
# Create a directory with the date string as its name
|
||||
os.mkdir(dir)
|
||||
|
||||
# make a directory with the current date as its name
|
||||
|
||||
# sweep the runs and save the results in the run directory
|
||||
for arch in archs:
|
||||
os.system("make clean")
|
||||
os.system("make run ARCH="+arch)
|
||||
for res in ["SizeOpt_size", "SizeOpt_speed", "SpeedOpt_size", "SpeedOpt_speed"]:
|
||||
os.system("mv -f wally"+res+".json "+dir+"/wally"+res+"_"+arch+".json")
|
||||
return dir
|
||||
|
||||
directory = run_arch_sweep()
|
||||
#directory = "run_20231117_082325"
|
||||
tabulate_arch_sweep(directory)
|
@ -45,7 +45,7 @@ localparam SSTC_SUPPORTED = 1;
|
||||
localparam ZICBOM_SUPPORTED = 1;
|
||||
localparam ZICBOZ_SUPPORTED = 1;
|
||||
localparam ZICBOP_SUPPORTED = 1;
|
||||
localparam ZICCLSM_SUPPORTED = 0;
|
||||
localparam ZICCLSM_SUPPORTED = 1;
|
||||
localparam SVPBMT_SUPPORTED = 1;
|
||||
localparam SVNAPOT_SUPPORTED = 1;
|
||||
localparam SVINVAL_SUPPORTED = 1;
|
||||
|
@ -74,8 +74,8 @@ localparam ICACHE_LINELENINBITS = 32'd512;
|
||||
|
||||
// Integer Divider Configuration
|
||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||
localparam IDIV_BITSPERCYCLE = 32'd4;
|
||||
localparam IDIV_ON_FPU = 1;
|
||||
localparam IDIV_BITSPERCYCLE = 32'd2;
|
||||
localparam IDIV_ON_FPU = 0;
|
||||
|
||||
// Legal number of PMP entries are 0, 16, or 64
|
||||
localparam PMP_ENTRIES = 32'd16;
|
||||
@ -169,7 +169,7 @@ localparam ZMMUL_SUPPORTED = 0;
|
||||
|
||||
// FPU division architecture
|
||||
localparam RADIX = 32'd4;
|
||||
localparam DIVCOPIES = 32'd4;
|
||||
localparam DIVCOPIES = 32'd2;
|
||||
|
||||
// bit manipulation
|
||||
localparam ZBA_SUPPORTED = 1;
|
||||
|
@ -150,7 +150,7 @@ localparam PLIC_SDC_ID = 32'd9;
|
||||
localparam BPRED_SUPPORTED = 1;
|
||||
localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||
localparam BPRED_NUM_LHR = 32'd6;
|
||||
localparam BPRED_SIZE = 32'd6;
|
||||
localparam BPRED_SIZE = 32'd10;
|
||||
localparam BTB_SIZE = 32'd10;
|
||||
localparam RAS_SIZE = 32'd16;
|
||||
|
||||
|
@ -93,16 +93,21 @@ localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF);
|
||||
localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2);
|
||||
localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS);
|
||||
|
||||
// divider r and rk (bits per digit, bits per cycle)
|
||||
localparam LOGR = $clog2(RADIX); // r = log(R) bits per digit
|
||||
localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated
|
||||
|
||||
// intermediate division parameters not directly used in fdivsqrt hardware
|
||||
localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right
|
||||
//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step.
|
||||
localparam DIVMINb = ((FPDIVMINb<XLEN) & IDIV_ON_FPU) ? XLEN : FPDIVMINb; // minimum fractional bits b = max(XLEN, FPDIVMINb)
|
||||
localparam RESBITS = DIVMINb + LOGR; // number of bits in a result: r integer + b fractional
|
||||
|
||||
// division constants
|
||||
localparam DIVN = (((NF+2<XLEN) & IDIV_ON_FPU) ? XLEN : NF+2); // standard length of input
|
||||
localparam LOGR = ($clog2(RADIX)); // r = log(R)
|
||||
localparam RK = (LOGR*DIVCOPIES); // r*k used for intdiv preproc
|
||||
localparam LOGRK = ($clog2(RK)); // log2(r*k)
|
||||
localparam FPDUR = ((DIVN+1+(LOGR*DIVCOPIES))/(LOGR*DIVCOPIES)+(RADIX/4));
|
||||
localparam DURLEN = ($clog2(FPDUR+1));
|
||||
localparam DIVb = (FPDUR*LOGR*DIVCOPIES-1); // canonical fdiv size (b)
|
||||
localparam DIVBLEN = ($clog2(DIVb+1)-1);
|
||||
localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result
|
||||
localparam FPDUR = (RESBITS-1)/RK + 1 ; // ceiling((r+b)/rk)
|
||||
localparam DIVb = FPDUR*RK - LOGR; // divsqrt fractional bits, so total number of bits is a multiple of rk after r integer bits
|
||||
localparam DURLEN = $clog2(FPDUR); // enough bits to count the duration
|
||||
localparam DIVBLEN = $clog2(DIVb); // enough bits to count number of fractional bits
|
||||
|
||||
// largest length in IEU/FPU
|
||||
localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF)); // max(XLEN, NF)
|
||||
@ -110,7 +115,7 @@ localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($uns
|
||||
localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
|
||||
localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6)));
|
||||
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));
|
||||
localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVN+1+NF) > (3*NF+4) ? (DIVN+1+NF) : (3*NF+4)));
|
||||
localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4)));
|
||||
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
@ -179,13 +179,10 @@ localparam cvw_t P = '{
|
||||
NORMSHIFTSZ : NORMSHIFTSZ,
|
||||
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
|
||||
CORRSHIFTSZ : CORRSHIFTSZ,
|
||||
DIVN : DIVN,
|
||||
LOGR : LOGR,
|
||||
RK : RK,
|
||||
LOGRK : LOGRK,
|
||||
FPDUR : FPDUR,
|
||||
DURLEN : DURLEN,
|
||||
DIVb : DIVb,
|
||||
DIVBLEN : DIVBLEN,
|
||||
DIVa : DIVa
|
||||
DIVBLEN : DIVBLEN
|
||||
};
|
||||
|
@ -1,131 +1,12 @@
|
||||
lsu/lsu.sv: logic IEUAdrM
|
||||
lsu/lsu.sv: logic WriteDataM
|
||||
lsu/lsu.sv: logic LSUHADDR
|
||||
lsu/lsu.sv: logic HRDATA
|
||||
lsu/lsu.sv: logic LSUHWDATA
|
||||
lsu/lsu.sv: logic LSUHREADY
|
||||
lsu/lsu.sv: logic LSUHWRITE
|
||||
lsu/lsu.sv: logic LSUHSIZE
|
||||
lsu/lsu.sv: logic LSUHBURST
|
||||
lsu/lsu.sv: logic LSUHTRANS
|
||||
lsu/lsu.sv: logic LSUHWSTRB
|
||||
lsu/lsu.sv: logic IHAdrM
|
||||
ieu/regfile.sv: logic rf
|
||||
ieu/datapath.sv: logic RegWriteW
|
||||
hazard/hazard.sv: logic BPPredWrongE
|
||||
hazard/hazard.sv: logic LoadStallD
|
||||
hazard/hazard.sv: logic FCvtIntStallD
|
||||
hazard/hazard.sv: logic DivBusyE
|
||||
hazard/hazard.sv: logic EcallFaultM
|
||||
hazard/hazard.sv: logic WFIStallM
|
||||
hazard/hazard.sv: logic StallF
|
||||
hazard/hazard.sv: logic FlushD
|
||||
cache/cachefsm.sv: statetype CurrState
|
||||
wally/wallypipelinedcore.sv: logic TrapM
|
||||
wally/wallypipelinedcore.sv: logic SrcAM
|
||||
wally/wallypipelinedcore.sv: logic InstrM
|
||||
wally/wallypipelinedcore.sv: logic PCM
|
||||
wally/wallypipelinedcore.sv: logic MemRWM
|
||||
wally/wallypipelinedcore.sv: logic TrapM
|
||||
wally/wallypipelinedcore.sv: logic InstrValidM
|
||||
wally/wallypipelinedcore.sv: logic WriteDataM
|
||||
wally/wallypipelinedcore.sv: logic IEUAdrM
|
||||
wally/wallypipelinedcore.sv: logic HRDATA
|
||||
ifu/spill.sv: statetype CurrState
|
||||
ifu/ifu.sv: logic IFUStallF
|
||||
ifu/ifu.sv: logic IFUHADDR
|
||||
ifu/ifu.sv: logic HRDATA
|
||||
ifu/ifu.sv: logic IFUHREADY
|
||||
ifu/ifu.sv: logic IFUHWRITE
|
||||
ifu/ifu.sv: logic IFUHSIZE
|
||||
ifu/ifu.sv: logic IFUHBURST
|
||||
ifu/ifu.sv: logic IFUHTRANS
|
||||
ifu/ifu.sv: logic PCF
|
||||
ifu/ifu.sv: logic PCNextF
|
||||
ifu/ifu.sv: logic PCPF
|
||||
ifu/ifu.sv: logic PostSpillInstrRawF
|
||||
mmu/hptw.sv: logic ITLBWriteF
|
||||
mmu/hptw.sv: statetype WalkerState
|
||||
privileged/csrs.sv: logic CSRSReadValM
|
||||
privileged/csrs.sv: logic SEPC_REGW
|
||||
privileged/csrs.sv: logic MIP_REGW
|
||||
privileged/csrs.sv: logic SSCRATCH_REGW
|
||||
privileged/csrs.sv: logic SCAUSE_REGW
|
||||
privileged/csr.sv: logic CSRReadValM
|
||||
privileged/csr.sv: logic CSRSrcM
|
||||
privileged/csr.sv: logic CSRWriteValM
|
||||
privileged/csr.sv: logic MSTATUS_REGW
|
||||
privileged/trap.sv: logic InstrMisalignedFaultM
|
||||
privileged/trap.sv: logic BreakpointFaultM
|
||||
privileged/trap.sv: logic LoadAccessFaultM
|
||||
privileged/trap.sv: logic LoadPageFaultM
|
||||
privileged/trap.sv: logic mretM
|
||||
privileged/trap.sv: logic MIP_REGW
|
||||
privileged/trap.sv: logic PendingIntsM
|
||||
privileged/privileged.sv: logic CSRReadM
|
||||
privileged/privileged.sv: logic InterruptM
|
||||
privileged/csrc.sv: logic HPMCOUNTER_REGW
|
||||
privileged/csri.sv: logic MExtInt
|
||||
privileged/csri.sv: logic MIP_REGW_writeabl
|
||||
privileged/csrm.sv: logic MIP_REGW
|
||||
privileged/csrm.sv: logic MEPC_REGW
|
||||
privileged/csrm.sv: logic MEDELEG_REGW
|
||||
privileged/csrm.sv: logic MIDELEG_REGW
|
||||
privileged/csrm.sv: logic MSCRATCH_REGW
|
||||
privileged/csrm.sv: logic MCAUSE_REGW
|
||||
uncore/uart_apb.sv: logic SIN
|
||||
uncore/uart_apb.sv: logic SOUT
|
||||
uncore/uart_apb.sv: logic OUT1b
|
||||
uncore/uartPC16550D.sv: logic RBR
|
||||
uncore/uartPC16550D.sv: logic FCR
|
||||
uncore/uartPC16550D.sv: logic IER
|
||||
uncore/uartPC16550D.sv: logic MCR
|
||||
uncore/uartPC16550D.sv: logic baudpulse
|
||||
uncore/uartPC16550D.sv: statetype rxstate
|
||||
uncore/uartPC16550D.sv: logic rxfifo
|
||||
uncore/uartPC16550D.sv: logic txfifo
|
||||
uncore/uartPC16550D.sv: logic rxfifohead
|
||||
uncore/uartPC16550D.sv: logic rxfifoentries
|
||||
uncore/uartPC16550D.sv: logic RXBR
|
||||
uncore/uartPC16550D.sv: logic rxtimeoutcnt
|
||||
uncore/uartPC16550D.sv: logic rxparityerr
|
||||
uncore/uartPC16550D.sv: logic rxdataready
|
||||
uncore/uartPC16550D.sv: logic rxfifoempty
|
||||
uncore/uartPC16550D.sv: logic rxdata
|
||||
uncore/uartPC16550D.sv: logic RXerrbit
|
||||
uncore/uartPC16550D.sv: logic rxfullbitunwrapped
|
||||
uncore/uartPC16550D.sv: logic txdata
|
||||
uncore/uartPC16550D.sv: logic txnextbit
|
||||
uncore/uartPC16550D.sv: logic txfifoempty
|
||||
uncore/uartPC16550D.sv: logic fifoenabled
|
||||
uncore/uartPC16550D.sv: logic RXerr
|
||||
uncore/uartPC16550D.sv: logic THRE
|
||||
uncore/uartPC16550D.sv: logic rxdataavailintr
|
||||
uncore/uartPC16550D.sv: logic intrID
|
||||
uncore/uncore.sv: logic HSELEXTSDCD
|
||||
uncore/plic_apb.sv: logic MExtInt
|
||||
uncore/plic_apb.sv: logic Din
|
||||
uncore/plic_apb.sv: logic requests
|
||||
uncore/plic_apb.sv: logic intPriority
|
||||
uncore/plic_apb.sv: logic intInProgress
|
||||
uncore/plic_apb.sv: logic intThreshold
|
||||
uncore/plic_apb.sv: logic intEn
|
||||
uncore/plic_apb.sv: logic intClaim
|
||||
uncore/plic_apb.sv: logic irqMatrix
|
||||
uncore/plic_apb.sv: logic priorities_with_irqs
|
||||
uncore/plic_apb.sv: logic max_priority_with_irqs
|
||||
uncore/plic_apb.sv: logic irqs_at_max_priority
|
||||
uncore/plic_apb.sv: logic threshMask
|
||||
uncore/clint_apb.sv: logic MTIME
|
||||
uncore/clint_apb.sv: logic MTIMECMP
|
||||
ebu/ebu.sv: logic HCLK
|
||||
ebu/ebu.sv: logic HREADY
|
||||
ebu/ebu.sv: logic HRESP
|
||||
ebu/ebu.sv: logic HADDR
|
||||
ebu/ebu.sv: logic HWRITE
|
||||
ebu/ebu.sv: logic HSIZE
|
||||
ebu/ebu.sv: logic HBURST
|
||||
ebu/ebu.sv: logic HPROT
|
||||
ebu/ebu.sv: logic HTRANS
|
||||
ebu/ebu.sv: logic HMASTLOC
|
||||
ebu/buscachefsm.sv: busstatetype CurrState
|
||||
ebu/busfsm.sv: busstatetype CurrState
|
||||
wally/wallypipelinedcore.sv: logic InstrM
|
||||
lsu/lsu.sv: logic IEUAdrM
|
||||
lsu/lsu.sv: logic PAdrM
|
||||
lsu/lsu.sv: logic ReadDataM
|
||||
lsu/lsu.sv: logic WriteDataM
|
||||
lsu/lsu.sv: logic MemRWM
|
||||
mmu/hptw.sv: logic SATP_REGW
|
||||
privileged/csr.sv: logic MENVCFG_REGW
|
||||
privileged/csr.sv: logic SENVCFG_REGW
|
||||
|
File diff suppressed because one or more lines are too long
@ -42,13 +42,9 @@ if {$board=="ArtyA7"} {
|
||||
# read in all other rtl
|
||||
read_verilog -sv [glob -type f ../src/CopiedFiles_do_not_add_to_repo/*/*.sv ../src/CopiedFiles_do_not_add_to_repo/*/*/*.sv]
|
||||
# *** Once the sdc is updated to use ahb changes these to system verilog.
|
||||
read_verilog [glob -type f ../src/axi_sdc_controller.v]
|
||||
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_master.v]
|
||||
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_serial_host.v]
|
||||
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_master.v]
|
||||
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_serial_host.v]
|
||||
read_verilog [glob -type f ../../addins/ahbsdc/sdc/*.v]
|
||||
|
||||
set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/vivado-risc-v/sdc} [current_fileset]
|
||||
set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/ahbsdc/sdc} [current_fileset]
|
||||
|
||||
if {$board=="ArtyA7"} {
|
||||
add_files -fileset constrs_1 -norecurse ../constraints/constraints-$board.xdc
|
||||
|
513
fpga/src/boot.mem
Normal file
513
fpga/src/boot.mem
Normal file
@ -0,0 +1,513 @@
|
||||
8001819300002197
|
||||
4281420141014081
|
||||
4481440143814301
|
||||
4681460145814501
|
||||
4881480147814701
|
||||
4a814a0149814901
|
||||
4c814c014b814b01
|
||||
4e814e014d814d01
|
||||
0110011b4f814f01
|
||||
059b45011161016e
|
||||
0004063705fe0010
|
||||
1f6000ef8006061b
|
||||
0ff003930000100f
|
||||
4e952e3110060e37
|
||||
c602829b0053f2b7
|
||||
2023fe02dfe312fd
|
||||
829b0053f2b7007e
|
||||
fe02dfe312fdc602
|
||||
4de31efd000e2023
|
||||
059bf1402573fdd0
|
||||
0000061705e20870
|
||||
0010029b01260613
|
||||
68110002806702fe
|
||||
0085179bf0080813
|
||||
038008130107f7b3
|
||||
480508a86c632781
|
||||
1533357902a87963
|
||||
38030000181700a8
|
||||
1c6301057833f268
|
||||
081a403018370808
|
||||
0105783342280813
|
||||
1815751308081063
|
||||
00367513c295e14d
|
||||
654ded510207e793
|
||||
c1701ff00613f130
|
||||
0637c530fff6861b
|
||||
664dcd10167d0200
|
||||
17fd001007b7c25c
|
||||
859b5a5cc20cd21c
|
||||
02062a23dfed0007
|
||||
4785fffd561c664d
|
||||
4501461c06f59063
|
||||
4a1cc35c465cc31c
|
||||
e29dc75c4a5cc71c
|
||||
0c63086008138082
|
||||
1ae30a9008130105
|
||||
b7710017e793f905
|
||||
e793b75901d7e793
|
||||
5f5c674db7410197
|
||||
66cd02072e23dffd
|
||||
fff78513ff7d5698
|
||||
40a0053300a03533
|
||||
bfb100a7e7938082
|
||||
e0a2715d8082557d
|
||||
e486f052f44ef84a
|
||||
fa13e85aec56fc26
|
||||
843289ae892a0086
|
||||
00959993000a1463
|
||||
864ac4396b054a85
|
||||
0009859b4549870a
|
||||
0004049b05540363
|
||||
86a66485008b7363
|
||||
870a87aaec7ff0ef
|
||||
4531458146014681
|
||||
f0ef0207c9639c05
|
||||
17820094979beb1f
|
||||
873e020541639381
|
||||
993e99ba020a1963
|
||||
870aa8094501f85d
|
||||
e8bff0ef45454685
|
||||
60a64505fe0559e3
|
||||
79a2794274e26406
|
||||
61616b426ae27a02
|
||||
9301020497138082
|
||||
f40647057179b7f1
|
||||
d79867cdec26f022
|
||||
dff58b85571c674d
|
||||
2423d35c03600793
|
||||
fffd571c674d0207
|
||||
0007a737b00026f3
|
||||
b00027f311f70713
|
||||
674dfef77de38f95
|
||||
4f5ccf9d8b895b1c
|
||||
26f3cf5c0027e793
|
||||
071305f5e737b000
|
||||
8f95b00027f30ff7
|
||||
4f5c674dfef77de3
|
||||
b00026f3cf5c9bf5
|
||||
67f7071300989737
|
||||
7de38f95b00027f3
|
||||
458146014681fef7
|
||||
ddbff0ef4501870a
|
||||
059346014681870a
|
||||
dcbff0ef45211aa0
|
||||
1aa007134782e939
|
||||
816393d117d24411
|
||||
85220ff0041302e7
|
||||
614564e270a27402
|
||||
46e3da5ff0efa0cd
|
||||
0207c7634782fe05
|
||||
458146014681870a
|
||||
d8bff0ef03700513
|
||||
46014681870a87aa
|
||||
0a900513403005b7
|
||||
4409bf7dfc07d9e3
|
||||
c3998b8583f9bfe1
|
||||
4681870a00846413
|
||||
f0ef450945814601
|
||||
870afa0540e3d59f
|
||||
123405b746014681
|
||||
46e3d45ff0ef450d
|
||||
870a77c14482f805
|
||||
85a6460146818cfd
|
||||
4ae3d2dff0ef451d
|
||||
d3d8470567cdf605
|
||||
000f4737b00026f3
|
||||
b00027f323f70713
|
||||
67cdfef77de38f95
|
||||
4681870a0007ae23
|
||||
0370051385a64601
|
||||
f2054fe3cf7ff0ef
|
||||
458146014681870a
|
||||
ce3ff0ef08600513
|
||||
4681870af20545e3
|
||||
4541200005934601
|
||||
f0055de3ccfff0ef
|
||||
3023bf010113bf09
|
||||
4605842a86aa4081
|
||||
40113423850a4585
|
||||
86a265a6da5ff0ef
|
||||
d99ff0ef04084605
|
||||
2201358322813603
|
||||
86a2260508700513
|
||||
d81ff0ef05629e0d
|
||||
2a0135832a813603
|
||||
9e0d86a226054505
|
||||
3603d6bff0ef057e
|
||||
0513320135833281
|
||||
9e0d86a226054010
|
||||
3083d53ff0ef0556
|
||||
4501400134034081
|
||||
0000808241010113
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
0000000000000000
|
||||
00600100d2e3ca40
|
@ -27,14 +27,6 @@ BINARIES := fw_jump.elf vmlinux busybox
|
||||
OBJDUMPS := $(foreach name, $(BINARIES), $(basename $(name) .elf))
|
||||
OBJDUMPS := $(foreach name, $(OBJDUMPS), $(DIS)/$(name).objdump)
|
||||
|
||||
define linuxDir =
|
||||
$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$")
|
||||
endef
|
||||
|
||||
define busyboxDir =
|
||||
$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$")
|
||||
endef
|
||||
|
||||
.PHONY: all generate disassemble install clean cleanDTB cleanDriver test
|
||||
|
||||
all:
|
||||
@ -46,8 +38,7 @@ all:
|
||||
|
||||
# Temp rule for debugging
|
||||
test:
|
||||
@echo $(linuxDir)
|
||||
@echo $(busyboxDir)
|
||||
echo $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$")
|
||||
|
||||
generate: $(DTB) $(IMAGES)
|
||||
|
||||
@ -74,11 +65,13 @@ $(DIS)/%.objdump: $(IMAGES)/%.elf
|
||||
$(DIS)/%.objdump: $(IMAGES)/%
|
||||
riscv64-unknown-elf-objdump -S $< >> $@
|
||||
|
||||
$(IMAGES)/vmlinux: $(call linuxDir)/vmlinux
|
||||
cp $< $@
|
||||
$(IMAGES)/vmlinux:
|
||||
linuxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") ;\
|
||||
cp $$linuxDir/vmlinux $@ ;\
|
||||
|
||||
$(IMAGES)/busybox: $(call busyboxDir)/busybox
|
||||
cp $< $@
|
||||
$(IMAGES)/busybox:
|
||||
busyboxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$") ;\
|
||||
cp $$busyboxDir/busybox $@ ;\
|
||||
|
||||
# Generating new Buildroot directories --------------------------------
|
||||
|
||||
|
@ -31,6 +31,7 @@
|
||||
status = "okay";
|
||||
compatible = "riscv";
|
||||
riscv,isa = "rv64imafdcsu";
|
||||
riscv,isa-extensions = "imafdc", "sstc", "svinval", "svnapot", "svpbmt", "zba", "zbb", "zbc", "zbs", "zicbom", "zicbop", "zicbopz", "zicntr", "zicsr", "zifencei", "zihpm";
|
||||
mmu-type = "riscv,sv48";
|
||||
|
||||
interrupt-controller {
|
||||
|
@ -22,6 +22,9 @@
|
||||
--override cpu/Zicbom=T
|
||||
--override cpu/Zicbop=T
|
||||
--override cpu/Zicboz=T
|
||||
--override cmomp_bytes=64 # Zic64b
|
||||
--override cmoz_bytes=64 # Zic64b
|
||||
--override lr_sc_grain=64 # Za64rs
|
||||
|
||||
# 64 KiB continuous huge pages supported
|
||||
--override cpu/Svpbmt=T
|
||||
@ -40,7 +43,7 @@
|
||||
|
||||
--override cpu/reset_address=0x80000000
|
||||
|
||||
--override cpu/unaligned=F
|
||||
--override cpu/unaligned=T # Zicclsm (should be true)
|
||||
--override cpu/ignore_non_leaf_DAU=1
|
||||
--override cpu/wfi_is_nop=T
|
||||
--override cpu/misa_Extensions_mask=0x0
|
||||
@ -88,7 +91,7 @@
|
||||
|
||||
# Add Imperas simulator application instruction tracing
|
||||
--verbose
|
||||
--trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange --traceafter 0
|
||||
#--trace --tracechange --traceshowicount --tracemode -tracemem ASX --monitornetschange --traceafter 300000000
|
||||
--override cpu/debugflags=6 --override cpu/verbose=1
|
||||
--override cpu/show_c_prefix=T
|
||||
|
||||
|
@ -7,4 +7,4 @@ export OTHERFLAGS="+TRACE2LOG_ENABLE=1"
|
||||
#export OTHERFLAGS="+TRACE2LOG_ENABLE=1 +TRACE2LOG_AFTER=10500000"
|
||||
export OTHERFLAGS=""
|
||||
|
||||
vsim -c -do "do wally-linux-imperas.do buildroot buildroot-no-trace $::env(RISCV) 0 0 0"
|
||||
vsim -c -do "do wally-linux-imperas.do buildroot buildroot-no-trace $::env(RISCV) 0 0 0"
|
||||
|
@ -40,6 +40,7 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
|
||||
#-- Run the Simulation
|
||||
#run -all
|
||||
run 7000 ms
|
||||
add log -recursive /*
|
||||
do linux-wave.do
|
||||
run -all
|
||||
@ -87,9 +88,10 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} {
|
||||
#run 100 ns
|
||||
#force -deposit testbench/dut/core/priv/priv/csr/csri/IE_REGW 16'h2aa
|
||||
#force -deposit testbench/dut/uncore/uncore/clint/clint/MTIMECMP 64'h1000
|
||||
run 7000 ms
|
||||
add log -recursive /testbench/dut/*
|
||||
do wave.do
|
||||
run 14000 ms
|
||||
#add log -recursive /*
|
||||
#do linux-wave.do
|
||||
#run -all
|
||||
|
||||
exec ./slack-notifier/slack-notifier.py
|
||||
|
@ -271,15 +271,12 @@ typedef struct packed {
|
||||
int CORRSHIFTSZ;
|
||||
|
||||
// division constants
|
||||
int DIVN ;
|
||||
int LOGR ;
|
||||
int RK ;
|
||||
int LOGRK ;
|
||||
int FPDUR ;
|
||||
int DURLEN ;
|
||||
int DIVb ;
|
||||
int DIVBLEN ;
|
||||
int DIVa ;
|
||||
|
||||
} cvw_t;
|
||||
|
||||
|
@ -45,8 +45,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
||||
input logic IntDivE, W64E,
|
||||
output logic DivStickyM,
|
||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||
output logic [P.NE+1:0] QeM,
|
||||
output logic [P.DIVb:0] QmM,
|
||||
output logic [P.NE+1:0] UeM, // Exponent result
|
||||
output logic [P.DIVb:0] UmM, // Significand result
|
||||
output logic [P.XLEN-1:0] FIntDivResultM
|
||||
);
|
||||
|
||||
@ -67,17 +67,17 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
||||
// Integer div/rem signals
|
||||
logic BZeroM; // Denominator is zero
|
||||
logic IntDivM; // Integer operation
|
||||
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
||||
logic [P.DIVBLEN-1:0] IntNormShiftM; // Integer normalizatoin shift amount
|
||||
logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor
|
||||
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||
|
||||
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
|
||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
|
||||
.FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
|
||||
// Int-specific
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
|
||||
.BZeroM, .nM, .mM, .AM,
|
||||
.BZeroM, .IntNormShiftM, .AM,
|
||||
.IntDivM, .W64M, .ALTBM, .AsM, .BsM);
|
||||
|
||||
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
|
||||
@ -94,8 +94,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
||||
fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
|
||||
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
|
||||
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
|
||||
.QmM, .WZeroE, .DivStickyM,
|
||||
.UmM, .WZeroE, .DivStickyM,
|
||||
// Int-specific
|
||||
.nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
|
||||
.IntNormShiftM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
|
||||
.FIntDivResultM);
|
||||
endmodule
|
||||
|
@ -30,13 +30,11 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic SqrtE,
|
||||
input logic IntDivE,
|
||||
input logic [P.DIVBLEN:0] nE,
|
||||
input logic [P.DIVBLEN-1:0] IntResultBitsE,
|
||||
output logic [P.DURLEN-1:0] CyclesE
|
||||
);
|
||||
logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits
|
||||
// DIVN = P.NF+3
|
||||
// NS = NF + 1
|
||||
// N = NS or NS+2 for div/sqrt.
|
||||
|
||||
logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
if (P.FPSIZES == 1)
|
||||
@ -64,12 +62,21 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
||||
P.Q_FMT: Nf = P.Q_NF;
|
||||
endcase
|
||||
|
||||
// Cycle logic
|
||||
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
|
||||
// Integer division needs p fractional + r integer result bits
|
||||
// FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
|
||||
// FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle.
|
||||
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
|
||||
|
||||
always_comb begin
|
||||
if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below?
|
||||
// if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
|
||||
else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
|
||||
if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
|
||||
else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
|
||||
if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1
|
||||
else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits
|
||||
|
||||
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
|
||||
else ResultBitsE = FPResultBitsE;
|
||||
|
||||
CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk)
|
||||
end
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
|
@ -28,16 +28,19 @@
|
||||
|
||||
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] Fmt,
|
||||
input logic [P.NE-1:0] Xe, Ye,
|
||||
input logic [P.NE-1:0] Xe, Ye, // input exponents
|
||||
input logic Sqrt,
|
||||
input logic XZero,
|
||||
input logic [P.DIVBLEN:0] ell, m,
|
||||
output logic [P.NE+1:0] Qe
|
||||
input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye
|
||||
output logic [P.NE+1:0] Ue // result exponent
|
||||
);
|
||||
|
||||
logic [P.NE-2:0] Bias;
|
||||
logic [P.NE+1:0] SXExp;
|
||||
logic [P.NE+1:0] SExp;
|
||||
logic [P.NE+1:0] DExp;
|
||||
|
||||
// Determine exponent bias according to the format
|
||||
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign Bias = (P.NE-1)'(P.BIAS);
|
||||
@ -63,10 +66,14 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
||||
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
|
||||
endcase
|
||||
end
|
||||
|
||||
// Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms
|
||||
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
|
||||
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
|
||||
|
||||
// correct exponent for subnormal input's normalization shifts
|
||||
// division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms
|
||||
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
||||
assign Qe = Sqrt ? SExp : DExp;
|
||||
|
||||
// Select square root or division exponent
|
||||
assign Ue = Sqrt ? SExp : DExp;
|
||||
endmodule
|
||||
|
@ -28,12 +28,12 @@
|
||||
|
||||
module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic up, uz,
|
||||
input logic [P.DIVb+3:0] C, U, UM,
|
||||
output logic [P.DIVb+3:0] F
|
||||
input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms)
|
||||
output logic [P.DIVb+3:0] F // Q4.DIVb
|
||||
);
|
||||
logic [P.DIVb+3:0] FP, FN, FZ;
|
||||
logic [P.DIVb+3:0] FP, FN, FZ; // Q4.DIVb
|
||||
|
||||
// Generate for both positive and negative bits
|
||||
// Generate for both positive and negative quotient digits
|
||||
assign FP = ~(U << 1) & C;
|
||||
assign FN = (UM << 1) | (C & ~(C << 2));
|
||||
assign FZ = '0;
|
||||
|
@ -27,14 +27,14 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [3:0] udigit,
|
||||
input logic [P.DIVb+3:0] C, U, UM,
|
||||
output logic [P.DIVb+3:0] F
|
||||
input logic [3:0] udigit, // {2, 1, -1, -2}; all cold for zero
|
||||
input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms)
|
||||
output logic [P.DIVb+3:0] F // Q4.DIVb
|
||||
);
|
||||
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2;
|
||||
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2; // Q4.DIVb
|
||||
|
||||
// Generate for both positive and negative bits
|
||||
assign F2 = (~U << 2) & (C << 2);
|
||||
// Generate for both positive and negative digits
|
||||
assign F2 = (~U << 2) & (C << 2); //
|
||||
assign F1 = ~(U << 1) & C;
|
||||
assign F0 = '0;
|
||||
assign FN1 = (UM << 1) | (C & ~(C << 3));
|
||||
|
@ -57,7 +57,7 @@ module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) (
|
||||
// terminate immediately on special cases
|
||||
assign FSpecialCaseE = XZeroE | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
|
||||
if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
|
||||
else assign SpecialCaseE = FSpecialCaseE;
|
||||
else assign SpecialCaseE = FSpecialCaseE;
|
||||
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
|
@ -31,31 +31,31 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
|
||||
input logic IFDivStartE,
|
||||
input logic FDivBusyE,
|
||||
input logic SqrtE,
|
||||
input logic [P.DIVb+3:0] X, D,
|
||||
output logic [P.DIVb:0] FirstU, FirstUM,
|
||||
output logic [P.DIVb+1:0] FirstC,
|
||||
input logic [P.DIVb+3:0] X, D, // Q4.DIVb
|
||||
output logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb
|
||||
output logic [P.DIVb+1:0] FirstC, // Q2.DIVb
|
||||
output logic Firstun,
|
||||
output logic [P.DIVb+3:0] FirstWS, FirstWC
|
||||
output logic [P.DIVb+3:0] FirstWS, FirstWC // Q4.DIVb
|
||||
);
|
||||
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b
|
||||
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b
|
||||
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b
|
||||
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b
|
||||
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b
|
||||
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b
|
||||
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b
|
||||
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b
|
||||
logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b
|
||||
logic [P.DIVb+1:0] initC; // Q2.b
|
||||
logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.DIVb
|
||||
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.DIVb
|
||||
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.DIVb
|
||||
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.DIVb
|
||||
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.DIVb
|
||||
logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.DIVb
|
||||
logic [P.DIVb+1:0] initC; // Q2.DIVb
|
||||
logic [P.DIVCOPIES-1:0] un;
|
||||
|
||||
logic [P.DIVb+3:0] WSN, WCN; // Q4.b
|
||||
logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b
|
||||
logic [P.DIVb+1:0] NextC;
|
||||
logic [P.DIVb:0] UMux, UMMux;
|
||||
logic [P.DIVb:0] initU, initUM;
|
||||
logic [P.DIVb+3:0] WSN, WCN; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.DIVb
|
||||
logic [P.DIVb+1:0] NextC; // Q2.DIVb
|
||||
logic [P.DIVb:0] UMux, UMMux; // U1.DIVb
|
||||
logic [P.DIVb:0] initU, initUM; // U1.DIVb
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
|
||||
// Top Muxes and Registers
|
||||
@ -104,14 +104,14 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
|
||||
for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
|
||||
if (P.RADIX == 2) begin: stage
|
||||
fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
end else begin: stage
|
||||
logic j1;
|
||||
assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
|
||||
fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
end
|
||||
assign WS[i+1] = WSNext[i];
|
||||
assign WC[i+1] = WCNext[i];
|
||||
|
@ -27,25 +27,25 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk, reset,
|
||||
input logic StallM,
|
||||
input logic [P.DIVb+3:0] WS, WC,
|
||||
input logic [P.DIVb+3:0] D,
|
||||
input logic [P.DIVb:0] FirstU, FirstUM,
|
||||
input logic [P.DIVb+1:0] FirstC,
|
||||
input logic SqrtE,
|
||||
input logic Firstun, SqrtM, SpecialCaseM,
|
||||
input logic [P.XLEN-1:0] AM,
|
||||
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M,
|
||||
input logic [P.DIVBLEN:0] nM, mM,
|
||||
output logic [P.DIVb:0] QmM,
|
||||
output logic WZeroE,
|
||||
output logic DivStickyM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM
|
||||
input logic clk, reset,
|
||||
input logic StallM,
|
||||
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||
input logic [P.DIVb+3:0] D, // Q4.DIVb
|
||||
input logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb
|
||||
input logic [P.DIVb+1:0] FirstC, // Q2.DIVb
|
||||
input logic SqrtE,
|
||||
input logic Firstun, SqrtM, SpecialCaseM,
|
||||
input logic [P.XLEN-1:0] AM, // U/Q(XLEN.0)
|
||||
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M,
|
||||
input logic [P.DIVBLEN-1:0] IntNormShiftM,
|
||||
output logic [P.DIVb:0] UmM, // U1.DIVb result significand
|
||||
output logic WZeroE,
|
||||
output logic DivStickyM,
|
||||
output logic [P.XLEN-1:0] FIntDivResultM // U/Q(XLEN.0)
|
||||
);
|
||||
|
||||
logic [P.DIVb+3:0] W, Sum;
|
||||
logic [P.DIVb:0] PreQmM;
|
||||
logic [P.DIVb:0] PreUmM;
|
||||
logic NegStickyM;
|
||||
logic weq0E, WZeroM;
|
||||
logic [P.XLEN-1:0] IntDivResultM;
|
||||
@ -86,22 +86,21 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
||||
//////////////////////////
|
||||
|
||||
// If the result is not exact, the sticky should be set
|
||||
assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide
|
||||
assign DivStickyM = ~WZeroM & ~SpecialCaseM;
|
||||
|
||||
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed.
|
||||
// Determine if sticky bit is negative
|
||||
assign Sum = WC + WS;
|
||||
assign NegStickyM = Sum[P.DIVb+3];
|
||||
mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
|
||||
mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
|
||||
mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit
|
||||
mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM);
|
||||
|
||||
// Integer quotient or remainder correctoin, normalization, and special cases
|
||||
// Integer quotient or remainder correction, normalization, and special cases
|
||||
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
|
||||
logic [P.DIVBLEN:0] NormShiftM;
|
||||
logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
||||
logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
|
||||
|
||||
assign W = $signed(Sum) >>> P.LOGR;
|
||||
assign UnsignedQuotM = {3'b000, PreQmM};
|
||||
assign UnsignedQuotM = {3'b000, PreUmM};
|
||||
|
||||
// Integer remainder: sticky and sign correction muxes
|
||||
assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
|
||||
@ -110,9 +109,8 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
||||
mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
|
||||
|
||||
// Select quotient or remainder and do normalization shift
|
||||
mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM);
|
||||
mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
|
||||
assign PreIntResultM = $signed(PreResultM >>> NormShiftM);
|
||||
assign PreIntResultM = $signed(PreResultM >>> IntNormShiftM);
|
||||
|
||||
// special case logic
|
||||
// terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B|
|
||||
@ -120,7 +118,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
||||
if (BZeroM) begin // Divide by zero
|
||||
if (RemOpM) IntDivResultM = AM;
|
||||
else IntDivResultM = {(P.XLEN){1'b1}};
|
||||
end else if (ALTBM) begin // Numerator is zero
|
||||
end else if (ALTBM) begin // Numerator is small
|
||||
if (RemOpM) IntDivResultM = AM;
|
||||
else IntDivResultM = '0;
|
||||
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
|
||||
|
@ -29,37 +29,39 @@
|
||||
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic IFDivStartE,
|
||||
input logic [P.NF:0] Xm, Ym,
|
||||
input logic [P.NE-1:0] Xe, Ye,
|
||||
input logic [P.NF:0] Xm, Ym, // Floating-point significands
|
||||
input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic SqrtE,
|
||||
input logic XZeroE,
|
||||
input logic [2:0] Funct3E,
|
||||
output logic [P.NE+1:0] QeM,
|
||||
output logic [P.DIVb+3:0] X, D,
|
||||
output logic [P.NE+1:0] UeM, // biased exponent of result
|
||||
output logic [P.DIVb+3:0] X, D, // Q4.DIVb
|
||||
// Int-specific
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU
|
||||
input logic IntDivE, W64E,
|
||||
// Outputs
|
||||
output logic ISpecialCaseE,
|
||||
output logic [P.DURLEN-1:0] CyclesE,
|
||||
output logic [P.DIVBLEN:0] nM, mM,
|
||||
output logic [P.DIVBLEN-1:0] IntNormShiftM,
|
||||
output logic ALTBM, IntDivM, W64M,
|
||||
output logic AsM, BsM, BZeroM,
|
||||
output logic [P.XLEN-1:0] AM
|
||||
);
|
||||
|
||||
logic [P.DIVb-1:0] Xfract, Dfract;
|
||||
logic [P.DIVb:0] PreSqrtX;
|
||||
logic [P.DIVb:0] Xnorm, Dnorm;
|
||||
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||
logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
|
||||
logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
||||
logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
|
||||
logic [P.NE+1:0] UeE; // Result Exponent (FP only)
|
||||
logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
||||
logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs
|
||||
logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result
|
||||
logic NumerZeroE; // Numerator is zero (X or A)
|
||||
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
||||
logic SignedDivE; // signed division
|
||||
logic AsE, BsE; // Signs of integer inputs
|
||||
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
|
||||
logic ALTBE;
|
||||
logic ALTBE;
|
||||
logic EvenExp;
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer Preprocessing
|
||||
@ -89,12 +91,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
|
||||
|
||||
// Select integer or floating point inputs
|
||||
mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
|
||||
mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
|
||||
mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX);
|
||||
mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD);
|
||||
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
|
||||
end else begin // Int not supported
|
||||
assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
|
||||
assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
|
||||
assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}};
|
||||
assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}};
|
||||
assign NumerZeroE = XZeroE;
|
||||
end
|
||||
|
||||
@ -103,12 +105,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
// count leading zeros for Subnorm FP and to normalize integer inputs
|
||||
lzc #(P.DIVb) lzcX (IFX, ell);
|
||||
lzc #(P.DIVb) lzcY (IFD, mE);
|
||||
lzc #(P.DIVb+1) lzcX (IFX, ell);
|
||||
lzc #(P.DIVb+1) lzcY (IFD, mE);
|
||||
|
||||
// Normalization shift: shift off leading one
|
||||
assign Xfract = (IFX << ell) << 1;
|
||||
assign Dfract = (IFD << mE) << 1;
|
||||
// Normalization shift: shift leading one into most significant bit
|
||||
assign Xnorm = (IFX << ell);
|
||||
assign Dnorm = (IFD << mE);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer Right Shift to digit boundary
|
||||
@ -117,31 +119,28 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
|
||||
logic [P.DIVBLEN:0] ZeroDiff, p;
|
||||
logic [P.DIVBLEN-1:0] ZeroDiff, p;
|
||||
|
||||
// calculate number of fractional bits p
|
||||
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
||||
assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
|
||||
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
|
||||
assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros)
|
||||
mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
// Integer special cases (terminate immediately)
|
||||
assign ISpecialCaseE = BZeroE | ALTBE;
|
||||
|
||||
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
|
||||
|
||||
if (P.LOGRK > 0) begin // more than 1 bit per cycle
|
||||
logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
|
||||
logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
|
||||
/* verilator lint_off WIDTH */
|
||||
assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
||||
assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
|
||||
assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
|
||||
assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
|
||||
assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
|
||||
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
|
||||
// calculate right shift amount RightShiftX to complete in discrete number of steps
|
||||
if (P.RK > 1) begin // more than 1 bit per cycle
|
||||
logic [$clog2(P.RK)-1:0] RightShiftX;
|
||||
/* verilator lint_offf WIDTH */
|
||||
assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount
|
||||
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps
|
||||
/* verilator lint_on WIDTH */
|
||||
end else begin // radix 2 1 copy doesn't require shifting
|
||||
assign nE = p;
|
||||
assign DivXShifted = DivX;
|
||||
end
|
||||
end else begin
|
||||
@ -150,22 +149,53 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Floating-Point Preprocessing
|
||||
// append leading 1 (for nonzero inputs)
|
||||
// Extend to Q4.b format
|
||||
// shift square root to be in range [1/4, 1)
|
||||
// Normalized numbers are shifted right by 1 if the exponent is odd
|
||||
// Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
|
||||
// NOTE: there might be a discrepancy that X is never right shifted by 2. However
|
||||
// it comes out in the wash and gives the right answer. Investigate later if possible.
|
||||
//////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
assign DivX = {3'b000, ~NumerZeroE, Xfract};
|
||||
assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
|
||||
|
||||
// Sqrt is initialized on step one as R(X-1), so depends on Radix
|
||||
mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
|
||||
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
|
||||
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
|
||||
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||
|
||||
// If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter
|
||||
// Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2)
|
||||
// Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
|
||||
// Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
|
||||
// Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
|
||||
// This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4
|
||||
// Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction.
|
||||
// This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
|
||||
// Radix Exponent odd Exponent Even
|
||||
// 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1)
|
||||
// 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1)
|
||||
// Summary: PreSqrtX = r(x/2or4 - 1)
|
||||
|
||||
logic [P.DIVb:0] PreSqrtX;
|
||||
assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
|
||||
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
||||
else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; // 2PreSqrtX - 4 = 4(PreSqrtX/2 - 1)
|
||||
|
||||
/*
|
||||
// Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
|
||||
// This saves one bit in DIVb because there is no initial right shift.
|
||||
// However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
|
||||
// That is an optimization for another day.
|
||||
if (P.RADIX == 2) begin
|
||||
logic [P.DIVb:0] PreSqrtX; // U1.DIVb
|
||||
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||
assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
||||
end else begin
|
||||
logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb
|
||||
mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
|
||||
assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
|
||||
end
|
||||
*/
|
||||
|
||||
// Initialize X for division or square root
|
||||
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Selet integer or floating-point operands
|
||||
//////////////////////////////////////////////////////
|
||||
@ -176,28 +206,37 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
assign X = PreShiftX;
|
||||
end
|
||||
|
||||
// Divisior register
|
||||
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
|
||||
// Divisior register
|
||||
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
|
||||
|
||||
// Floating-point exponent
|
||||
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
|
||||
flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
|
||||
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Ue(UeE));
|
||||
flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
|
||||
|
||||
// Number of FSM cycles (to FSM)
|
||||
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
|
||||
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
|
||||
|
||||
if (P.IDIV_ON_FPU) begin:intpipelineregs
|
||||
logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
|
||||
logic RemOpE;
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain
|
||||
assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift
|
||||
/* verilator lint_on WIDTH */
|
||||
assign RemOpE = Funct3E[1];
|
||||
mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
|
||||
|
||||
// pipeline registers
|
||||
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
||||
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
||||
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
|
||||
flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
||||
flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
|
||||
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
||||
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
||||
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
||||
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
|
||||
flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM);
|
||||
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
||||
if (P.XLEN==64)
|
||||
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
||||
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
@ -29,33 +29,27 @@
|
||||
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.DIVb+3:0] D, DBar,
|
||||
input logic [P.DIVb:0] U, UM,
|
||||
input logic [P.DIVb+3:0] WS, WC,
|
||||
input logic [P.DIVb+1:0] C,
|
||||
input logic SqrtE,
|
||||
output logic un,
|
||||
output logic [P.DIVb+1:0] CNext,
|
||||
output logic [P.DIVb:0] UNext, UMNext,
|
||||
output logic [P.DIVb+3:0] WSNext, WCNext
|
||||
input logic [P.DIVb+3:0] D, DBar, // Q4.DIVb
|
||||
input logic [P.DIVb:0] U, UM, // U1.DIVb
|
||||
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||
input logic [P.DIVb+1:0] C, // Q2.DIVb
|
||||
input logic SqrtE,
|
||||
output logic un,
|
||||
output logic [P.DIVb+1:0] CNext, // Q2.DIVb
|
||||
output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb
|
||||
output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb
|
||||
);
|
||||
/* verilator lint_on UNOPTFLAT */
|
||||
|
||||
logic [P.DIVb+3:0] Dsel;
|
||||
logic up, uz;
|
||||
logic [P.DIVb+3:0] F;
|
||||
logic [P.DIVb+3:0] AddIn;
|
||||
logic [P.DIVb+3:0] WSA, WCA;
|
||||
logic [P.DIVb+3:0] Dsel; // Q4.DIVb
|
||||
logic up, uz;
|
||||
logic [P.DIVb+3:0] F; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] AddIn; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb
|
||||
|
||||
// Qmient Selection logic
|
||||
// Quotient Selection logic
|
||||
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
|
||||
// q encoding:
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un);
|
||||
fdivsqrtuslc2 uslc2(.WS(WS[P.DIVb+3:P.DIVb]), .WC(WC[P.DIVb+3:P.DIVb]), .up, .uz, .un);
|
||||
|
||||
// Sqrt F generation. Extend C, U, UM to Q4.k
|
||||
fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
||||
@ -66,7 +60,7 @@ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
||||
else if (uz) Dsel = '0;
|
||||
else Dsel = D; // un
|
||||
|
||||
// Partial Product Generation
|
||||
// Residual Update
|
||||
// WSA, WCA = WS + WC - qD
|
||||
mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
|
||||
csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
|
||||
|
@ -27,40 +27,33 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.DIVb+3:0] D, DBar, D2, DBar2,
|
||||
input logic [P.DIVb:0] U,UM,
|
||||
input logic [P.DIVb+3:0] WS, WC,
|
||||
input logic [P.DIVb+1:0] C,
|
||||
input logic SqrtE, j1,
|
||||
output logic [P.DIVb+1:0] CNext,
|
||||
output logic un,
|
||||
output logic [P.DIVb:0] UNext, UMNext,
|
||||
output logic [P.DIVb+3:0] WSNext, WCNext
|
||||
input logic [P.DIVb+3:0] D, DBar, D2, DBar2, // Q4.DIVb
|
||||
input logic [P.DIVb:0] U,UM, // U1.DIVb
|
||||
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||
input logic [P.DIVb+1:0] C, // Q2.DIVb
|
||||
input logic SqrtE, j1,
|
||||
output logic [P.DIVb+1:0] CNext, // Q2.DIVb
|
||||
output logic un,
|
||||
output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb
|
||||
output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb
|
||||
);
|
||||
|
||||
logic [P.DIVb+3:0] Dsel;
|
||||
logic [3:0] udigit;
|
||||
logic [P.DIVb+3:0] F;
|
||||
logic [P.DIVb+3:0] AddIn;
|
||||
logic [4:0] Smsbs;
|
||||
logic [2:0] Dmsbs;
|
||||
logic [7:0] WCmsbs, WSmsbs;
|
||||
logic CarryIn;
|
||||
logic [P.DIVb+3:0] WSA, WCA;
|
||||
logic [P.DIVb+3:0] Dsel; // Q4.DIVb
|
||||
logic [3:0] udigit; // {+2, +1, -1, -2} or 0000 for 0
|
||||
logic [P.DIVb+3:0] F; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] AddIn; // Q4.DIVb
|
||||
logic [4:0] Smsbs; // U1.4
|
||||
logic [2:0] Dmsbs; // U0.3 drop leading 1 from D
|
||||
logic [7:0] WCmsbs, WSmsbs; // U4.4
|
||||
logic CarryIn;
|
||||
logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb
|
||||
|
||||
// Digit Selection logic
|
||||
// u encoding:
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
assign Smsbs = U[P.DIVb:P.DIVb-4];
|
||||
assign Dmsbs = D[P.DIVb-1:P.DIVb-3];
|
||||
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4];
|
||||
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4];
|
||||
|
||||
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
|
||||
assign Smsbs = U[P.DIVb:P.DIVb-4]; // U1.4 most significant bits of square root
|
||||
assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1
|
||||
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual
|
||||
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual
|
||||
fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
|
||||
assign un = 1'b0; // unused for radix 4
|
||||
|
||||
// F generation logic
|
||||
|
@ -31,15 +31,15 @@
|
||||
///////////////////////////////
|
||||
module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic up, un,
|
||||
input logic [P.DIVb+1:0] C,
|
||||
input logic [P.DIVb:0] U, UM,
|
||||
output logic [P.DIVb:0] UNext, UMNext
|
||||
input logic [P.DIVb+1:0] C, // Q2.DIVb
|
||||
input logic [P.DIVb:0] U, UM, // U1.DIVb
|
||||
output logic [P.DIVb:0] UNext, UMNext // U1.DIVb
|
||||
);
|
||||
// The on-the-fly converter transfers the divsqrt
|
||||
// bits to the quotient as they come.
|
||||
logic [P.DIVb:0] K;
|
||||
logic [P.DIVb:0] K; // U1.DIVb one-hot
|
||||
|
||||
assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
|
||||
assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
|
||||
|
||||
always_comb begin
|
||||
if (up) begin
|
||||
|
@ -28,15 +28,15 @@
|
||||
|
||||
module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [3:0] udigit,
|
||||
input logic [P.DIVb:0] U, UM,
|
||||
input logic [P.DIVb:0] C,
|
||||
output logic [P.DIVb:0] UNext, UMNext
|
||||
input logic [P.DIVb:0] U, UM, // U1.DIVb
|
||||
input logic [P.DIVb:0] C, // Q1.DIVb
|
||||
output logic [P.DIVb:0] UNext, UMNext // U1.DIVb
|
||||
);
|
||||
// The on-the-fly converter transfers the square root
|
||||
// bits to the quotient as they come.
|
||||
// Use this otfc for division and square root.
|
||||
|
||||
logic [P.DIVb:0] K1, K2, K3;
|
||||
logic [P.DIVb:0] K1, K2, K3; // U1.DIVb
|
||||
assign K1 = (C&~(C << 1)); // K
|
||||
assign K2 = ((C << 1)&~(C << 2)); // 2K
|
||||
assign K3 = (C & ~(C << 2)); // 3K
|
||||
|
@ -1,10 +1,10 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtqsel2.sv
|
||||
// fdivsqrtuslc2.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Radix 2 Quotient Digit Selection
|
||||
// Purpose: Radix 2 Unified Quotient/Square Root Digit Selection
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
@ -18,7 +18,7 @@
|
||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
// httWS://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
@ -26,31 +26,26 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtqsel2 (
|
||||
input logic [3:0] ps, pc,
|
||||
output logic up, uz, un
|
||||
module fdivsqrtuslc2 (
|
||||
input logic [3:0] WS, WC, // Q4.0 most significant bits of redundant residual
|
||||
output logic up, uz, un // {+1, 0, -1}
|
||||
);
|
||||
|
||||
logic [3:0] p, g;
|
||||
logic magnitude, sign;
|
||||
logic sign;
|
||||
|
||||
// Carry chain logic determines if W = WS + WC = -1, < -1, > -1 to choose 0, -1, 1 respectively
|
||||
|
||||
// The quotient selection logic is presented for simplicity, not
|
||||
// for efficiency. You can probably optimize your logic to
|
||||
// select the proper divisor with less delay.
|
||||
//if p2 * p1 * p0, W = -1 and choose digit of 0
|
||||
assign uz = ((WS[2]^WC[2]) & (WS[1]^WC[1]) &
|
||||
(WS[0]^WC[0]));
|
||||
|
||||
// Quotient equations from EE371 lecture notes 13-20
|
||||
assign p = ps ^ pc;
|
||||
assign g = ps & pc;
|
||||
|
||||
assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) &
|
||||
(ps[0]^pc[0]));
|
||||
assign sign = (ps[3]^pc[3])^
|
||||
(ps[2] & pc[2] | ((ps[2]^pc[2]) &
|
||||
(ps[1]&pc[1] | ((ps[1]^pc[1]) &
|
||||
(ps[0]&pc[0])))));
|
||||
// Otherwise determine sign using carry chain: sign = p3 ^ g_2:0
|
||||
assign sign = (WS[3]^WC[3])^
|
||||
(WS[2] & WC[2] | ((WS[2]^WC[2]) &
|
||||
(WS[1]&WC[1] | ((WS[1]^WC[1]) &
|
||||
(WS[0]&WC[0])))));
|
||||
|
||||
// Produce digit = +1, 0, or -1
|
||||
assign up = magnitude & ~sign;
|
||||
assign uz = ~magnitude;
|
||||
assign un = magnitude & sign;
|
||||
assign up = ~uz & ~sign;
|
||||
assign un = ~uz & sign;
|
||||
endmodule
|
@ -1,10 +1,10 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtqsel4.sv
|
||||
// fdivsqrtuslc4.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Radix 4 Quotient Digit Selection
|
||||
// Purpose: Table-based Radix 4 Unified Quotient/Square Root Digit Selection
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
@ -26,25 +26,25 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtqsel4 (
|
||||
input logic [2:0] Dmsbs,
|
||||
input logic [4:0] Smsbs,
|
||||
input logic [7:0] WSmsbs, WCmsbs,
|
||||
module fdivsqrtuslc4 (
|
||||
input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1
|
||||
input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation
|
||||
input logic [7:0] WSmsbs, WCmsbs, // Q4.4 redundant residual most significant bits
|
||||
input logic Sqrt, j1,
|
||||
output logic [3:0] udigit
|
||||
output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot
|
||||
);
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
||||
logic [2:0] A;
|
||||
logic [7:0] PreWmsbs; // Q4.4 nonredundant residual msbs
|
||||
logic [6:0] Wmsbs; // Q4.3 truncated nonredundant residual
|
||||
logic [2:0] A; // U0.3 upper bits of D or Smsbs, discarding integer bit
|
||||
|
||||
assign PreWmsbs = WCmsbs + WSmsbs;
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
assign PreWmsbs = WCmsbs + WSmsbs; // add redundant residual to find msbs
|
||||
assign Wmsbs = PreWmsbs[7:1]; // truncate least significant bit to Q4.3 to index table
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
// W = xxxx.xxx...
|
||||
// Wmsbs = | |
|
||||
|
||||
logic [3:0] USel4[1023:0];
|
||||
logic [3:0] USel4[1023:0]; // 1024-bit table indexed with 3 bits of A and 7 bits of Wmsbs
|
||||
|
||||
// Prepopulate selection table; this is constant at compile time
|
||||
always_comb begin
|
||||
@ -101,10 +101,10 @@ module fdivsqrtqsel4 (
|
||||
// Select A
|
||||
always_comb
|
||||
if (Sqrt) begin
|
||||
if (j1) A = 3'b101;
|
||||
else if (Smsbs == 5'b10000) A = 3'b111;
|
||||
else A = Smsbs[2:0];
|
||||
end else A = Dmsbs;
|
||||
if (j1) A = 3'b101; // on first sqrt iteration A = .101
|
||||
else if (Smsbs == 5'b10000) A = 3'b111; // if S = 1.0, use A = .111
|
||||
else A = Smsbs[2:0]; // otherwise use A = 2S (in U0.3 format)
|
||||
end else A = Dmsbs; // division Unless A = D (IN U0.3 format, dropping leading 1)
|
||||
|
||||
// Select quotient digit from lookup table based on A and W
|
||||
assign udigit = USel4[{A,Wmsbs}];
|
@ -1,10 +1,10 @@
|
||||
///////////////////////////////////////////
|
||||
// fdivsqrtqsel4cmp.sv
|
||||
// fdivsqrtuslc4cmp.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Comparator-based Radix 4 Quotient Digit Selection
|
||||
// Purpose: Comparator-based Radix 4 Unified Quotient/Square Root Digit Selection
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
@ -26,12 +26,12 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtqsel4cmp (
|
||||
input logic [2:0] Dmsbs,
|
||||
input logic [4:0] Smsbs,
|
||||
input logic [7:0] WSmsbs, WCmsbs,
|
||||
module fdivsqrtuslc4cmp (
|
||||
input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1
|
||||
input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation
|
||||
input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits
|
||||
input logic SqrtE, j1,
|
||||
output logic [3:0] udigit
|
||||
output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot
|
||||
);
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
@ -133,8 +133,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
|
||||
|
||||
// divide signals
|
||||
logic [P.DIVb:0] QmM; // fdivsqrt signifcand
|
||||
logic [P.NE+1:0] QeM; // fdivsqrt exponent
|
||||
logic [P.DIVb:0] UmM; // fdivsqrt signifcand
|
||||
logic [P.NE+1:0] UeM; // fdivsqrt exponent
|
||||
logic DivStickyM; // fdivsqrt sticky bit
|
||||
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
|
||||
logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
|
||||
@ -242,8 +242,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
|
||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
|
||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
|
||||
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
|
||||
.QmM, .FIntDivResultM);
|
||||
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM,
|
||||
.UmM, .FIntDivResultM);
|
||||
|
||||
// compare: fmin/fmax, flt/fle/feq
|
||||
fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
|
||||
@ -326,9 +326,9 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
|
||||
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
|
||||
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivUm(UmM), .FmaSs(SsM),
|
||||
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
|
||||
.FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
|
||||
.FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
|
||||
.CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM),
|
||||
.ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM),
|
||||
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
|
||||
|
@ -27,8 +27,8 @@
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module divshiftcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.DIVb:0] DivQm, // divsqrt significand
|
||||
input logic [P.NE+1:0] DivQe, // divsqrt exponent
|
||||
input logic [P.DIVb:0] DivUm, // divsqrt significand
|
||||
input logic [P.NE+1:0] DivUe, // divsqrt exponent
|
||||
output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
|
||||
output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
|
||||
output logic DivResSubnorm, // is the divsqrt result subnormal
|
||||
@ -41,23 +41,23 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
// is the result subnormal
|
||||
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
|
||||
assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]);
|
||||
assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]);
|
||||
|
||||
// if the result is subnormal
|
||||
// 00000000x.xxxxxx... Exp = DivQe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
|
||||
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
|
||||
// 00000000x.xxxxxx... Exp = DivUe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
|
||||
// .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1
|
||||
// .0000xxxxxxxxxxx... >> 1 Exp = 1
|
||||
// Left shift amount = DivQe+NF+1-1
|
||||
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe;
|
||||
// Left shift amount = DivUe+NF+1-1
|
||||
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe;
|
||||
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
|
||||
|
||||
// if the result is normalized
|
||||
// 00000000x.xxxxxx... Exp = DivQe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
|
||||
// 00000000.xxxxxxx... << NF Exp = DivQe+1
|
||||
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
|
||||
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
|
||||
// 00000000x.xxxxxx... Exp = DivUe
|
||||
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
|
||||
// 00000000.xxxxxxx... << NF Exp = DivUe+1
|
||||
// 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards)
|
||||
// 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after)
|
||||
// inital Left shift amount = NF
|
||||
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
|
||||
assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF);
|
||||
@ -68,5 +68,5 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) (
|
||||
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
|
||||
|
||||
// pre-shift the divider result for normalization
|
||||
assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
|
||||
assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
|
||||
endmodule
|
||||
|
@ -48,8 +48,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count
|
||||
//divide signals
|
||||
input logic DivSticky, // divider sticky bit
|
||||
input logic [P.NE+1:0] DivQe, // divsqrt exponent
|
||||
input logic [P.DIVb:0] DivQm, // divsqrt significand
|
||||
input logic [P.NE+1:0] DivUe, // divsqrt exponent
|
||||
input logic [P.DIVb:0] DivUm, // divsqrt significand
|
||||
// conversion signals
|
||||
input logic CvtCs, // the result's sign
|
||||
input logic [P.NE:0] CvtCe, // the calculated expoent
|
||||
@ -91,7 +91,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
||||
// division singals
|
||||
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
|
||||
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
|
||||
logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
|
||||
logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift
|
||||
logic DivByZero; // divide by zero flag
|
||||
logic DivResSubnorm; // is the divsqrt result subnormal
|
||||
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
|
||||
@ -146,7 +146,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
||||
fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
|
||||
.FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
|
||||
divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||
divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||
|
||||
// select which unit's output to shift
|
||||
always_comb
|
||||
@ -174,7 +174,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
// correct for LZA/divsqrt error
|
||||
shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
|
||||
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
|
||||
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivUe, .Ue, .FmaSZero, .Shifted, .FmaMe, .Mf);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Rounding
|
||||
@ -189,7 +189,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
||||
// calulate result sign used in rounding unit
|
||||
roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
|
||||
|
||||
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
|
||||
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Ue,
|
||||
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf,
|
||||
.DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
|
||||
|
||||
|
@ -39,7 +39,7 @@ module round import cvw::*; #(parameter cvw_t P) (
|
||||
// divsqrt
|
||||
input logic DivOp, // is a division opperation being done
|
||||
input logic DivSticky, // divsqrt sticky bit
|
||||
input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent
|
||||
input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent
|
||||
// cvt
|
||||
input logic CvtOp, // is a convert opperation being done
|
||||
input logic ToInt, // is the cvt op a cvt to integer
|
||||
@ -300,8 +300,8 @@ module round import cvw::*; #(parameter cvw_t P) (
|
||||
case(PostProcSel)
|
||||
2'b10: Me = FmaMe; // fma
|
||||
2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
|
||||
// 2'b01: Me = DivDone ? Qe : '0; // divide
|
||||
2'b01: Me = Qe; // divide
|
||||
// 2'b01: Me = DivDone ? Ue : '0; // divide
|
||||
2'b01: Me = Ue; // divide
|
||||
default: Me = '0;
|
||||
endcase
|
||||
|
||||
|
@ -31,7 +31,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
|
||||
// divsqrt
|
||||
input logic DivOp, // is it a divsqrt opperation
|
||||
input logic DivResSubnorm, // is the divsqrt result subnormal
|
||||
input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent
|
||||
input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent
|
||||
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
|
||||
//fma
|
||||
input logic FmaOp, // is it an fma opperation
|
||||
@ -41,7 +41,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
|
||||
// output
|
||||
output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
|
||||
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
|
||||
output logic [P.NE+1:0] Qe // corrected exponent for divider
|
||||
output logic [P.NE+1:0] Ue // corrected exponent for divider
|
||||
);
|
||||
|
||||
logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||
@ -61,7 +61,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
|
||||
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
|
||||
assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
|
||||
assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1));
|
||||
assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
|
||||
assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
|
||||
mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
|
||||
@ -87,5 +87,5 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
// the quotent is in the range [.5,2) if there is no early termination
|
||||
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
|
||||
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1};
|
||||
assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
|
||||
endmodule
|
||||
|
@ -83,7 +83,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
|
||||
assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing
|
||||
always_comb
|
||||
if (BadNaNBox) begin
|
||||
// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
|
||||
PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
|
||||
end else
|
||||
PostBox = In;
|
||||
@ -143,8 +142,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
|
||||
if (BadNaNBox) begin
|
||||
case (Fmt)
|
||||
P.FMT: PostBox = In;
|
||||
// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
|
||||
// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]};
|
||||
P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
|
||||
P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}};
|
||||
default: PostBox = 'x;
|
||||
@ -230,9 +227,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
|
||||
if (BadNaNBox) begin
|
||||
case (Fmt)
|
||||
2'b11: PostBox = In;
|
||||
// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]};
|
||||
// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]};
|
||||
// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]};
|
||||
2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}};
|
||||
2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}};
|
||||
2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}};
|
||||
|
@ -33,7 +33,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0)
|
||||
);
|
||||
|
||||
// Core Memory
|
||||
logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0];
|
||||
(*rom_style="block" *) logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0];
|
||||
|
||||
// dh 10/30/23 ROM macros are presently commented out
|
||||
// because they don't point to a generated ROM
|
||||
@ -41,15 +41,23 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0)
|
||||
rom1p1r_128x64 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
|
||||
|
||||
end if ((`USE_SRAM == 1) & (ADDR_WDITH == 7) & (DATA_WIDTH == 32)) begin
|
||||
rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
|
||||
rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
|
||||
|
||||
end else begin */
|
||||
always @ (posedge clk)
|
||||
if(ce) dout <= ROM[addr];
|
||||
end else begin */
|
||||
|
||||
initial begin
|
||||
if (PRELOAD_ENABLED) begin
|
||||
$readmemh("$WALLY/fpga/src/boot.mem", ROM, 0);
|
||||
end
|
||||
end
|
||||
|
||||
always @ (posedge clk) begin
|
||||
if(ce) dout <= ROM[addr];
|
||||
end
|
||||
|
||||
|
||||
// for FPGA, initialize with zero-stage bootloader
|
||||
if(PRELOAD_ENABLED) begin
|
||||
/*if(PRELOAD_ENABLED) begin
|
||||
initial begin
|
||||
ROM[0]=64'h8001819300002197;
|
||||
ROM[1]=64'h4281420141014081;
|
||||
@ -195,6 +203,6 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0)
|
||||
ROM[141]=64'h0000808241010113;
|
||||
|
||||
end // if (PRELOAD_ENABLED)
|
||||
end
|
||||
end*/
|
||||
|
||||
endmodule
|
||||
|
@ -26,8 +26,7 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module hazard (
|
||||
// Detect hazards
|
||||
module hazard import cvw::*; #(parameter cvw_t P) (
|
||||
input logic BPWrongE, CSRWriteFenceM, RetM, TrapM,
|
||||
input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD,
|
||||
input logic LSUStallM, IFUStallF,
|
||||
|
@ -131,7 +131,7 @@ module datapath import cvw::*; #(parameter cvw_t P) (
|
||||
if (P.F_SUPPORTED) begin:fpmux
|
||||
mux2 #(P.XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||
mux2 #(P.XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
|
||||
if (P.IDIV_ON_FPU) begin
|
||||
if (P.IDIV_ON_FPU & P.F_SUPPORTED) begin
|
||||
mux2 #(P.XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
|
||||
end else begin
|
||||
assign MulDivResultW = MDUResultW;
|
||||
|
@ -39,7 +39,9 @@ module irom import cvw::*; #(parameter cvw_t P) (
|
||||
logic [31:0] RawIROMInstrF;
|
||||
logic [2:1] AdrD;
|
||||
|
||||
rom1p1r #(ADDR_WDITH, P.XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull));
|
||||
// preload IROM with the FPGA bootloader by default so that it syntehsizes to something, avoiding having the IEU optimized away because instructions are all 0
|
||||
// the testbench replaces these dummy contents with the actual program of interest during simulation
|
||||
rom1p1r #(ADDR_WDITH, P.XLEN, 1) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull));
|
||||
if (P.XLEN == 32) assign RawIROMInstrF = IROMInstrFFull;
|
||||
else begin
|
||||
// IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two
|
||||
|
@ -92,7 +92,8 @@ module lsu import cvw::*; #(parameter cvw_t P) (
|
||||
input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit
|
||||
input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit
|
||||
);
|
||||
localparam MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED;
|
||||
localparam logic MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED;
|
||||
localparam MLEN = MISALIGN_SUPPORT ? 2*P.LLEN : P.LLEN; // widen buffer for misaligned accessess
|
||||
|
||||
logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer
|
||||
logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer
|
||||
@ -118,9 +119,9 @@ module lsu import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data
|
||||
/* verilator lint_off WIDTHEXPAND */
|
||||
logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data
|
||||
logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] LSUWriteDataSpillM; // Final write data
|
||||
logic [((MISALIGN_SUPPORT+1)*P.LLEN-1)/8:0] ByteMaskSpillM; // Selects which bytes within a word to write
|
||||
logic [MLEN-1:0] DCacheReadDataWordM; // D$ read data
|
||||
logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data
|
||||
logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write
|
||||
/* verilator lint_on WIDTHEXPAND */
|
||||
logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data
|
||||
logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data
|
||||
|
@ -57,7 +57,7 @@ module mdu import cvw::*; #(parameter cvw_t P) (
|
||||
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
|
||||
// When IDIV_ON_FPU is set, use the FPU divider instead
|
||||
// In ZMMUL, with M_SUPPORTED = 0, omit the divider
|
||||
if ((P.IDIV_ON_FPU) || (!P.M_SUPPORTED)) begin:nodiv
|
||||
if ((P.IDIV_ON_FPU & P.F_SUPPORTED) || (!P.M_SUPPORTED)) begin:nodiv
|
||||
assign QuotM = 0;
|
||||
assign RemM = 0;
|
||||
assign DivBusyE = 0;
|
||||
|
@ -2,10 +2,14 @@
|
||||
// spi_apb.sv
|
||||
//
|
||||
// Written: Naiche Whyte-Aguayo nwhyteaguayo@g.hmc.edu 11/16/2022
|
||||
|
||||
//
|
||||
// Purpose: SPI peripheral
|
||||
// See FU540-C000-v1.0 for specifications
|
||||
//
|
||||
// SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers.
|
||||
// The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing
|
||||
// to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output,
|
||||
// along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY.
|
||||
// Current limitations: Flash read sequencer mode not implemented, dual and quad mode not supported
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
@ -25,19 +29,6 @@
|
||||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Current limitations: Flash read sequencer mode not implemented, dual and quad modes untestable with current test plan.
|
||||
|
||||
// Attempt to move from >= comparisons by initializing in FSM differently
|
||||
// Parameterize SynchFIFO
|
||||
// look at ReadIncrement/WriteIncrement delay necessity
|
||||
|
||||
/*
|
||||
SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers.
|
||||
The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing
|
||||
to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output,
|
||||
along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY.
|
||||
*/
|
||||
|
||||
module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
input logic PCLK, PRESETn,
|
||||
input logic PSEL,
|
||||
@ -54,27 +45,27 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
output logic SPIIntr
|
||||
);
|
||||
|
||||
//SPI control registers. Refer to SiFive FU540-C000 manual
|
||||
// SPI control registers. Refer to SiFive FU540-C000 manual
|
||||
logic [11:0] SckDiv;
|
||||
logic [1:0] SckMode;
|
||||
logic [1:0] ChipSelectID;
|
||||
logic [3:0] ChipSelectDef;
|
||||
logic [1:0] ChipSelectMode;
|
||||
logic [1:0] SckMode;
|
||||
logic [1:0] ChipSelectID;
|
||||
logic [3:0] ChipSelectDef;
|
||||
logic [1:0] ChipSelectMode;
|
||||
logic [15:0] Delay0, Delay1;
|
||||
logic [4:0] Format;
|
||||
logic [7:0] ReceiveData;
|
||||
logic [2:0] TransmitWatermark, ReceiveWatermark;
|
||||
logic [8:0] TransmitData;
|
||||
logic [1:0] InterruptEnable, InterruptPending;
|
||||
logic [4:0] Format;
|
||||
logic [7:0] ReceiveData;
|
||||
logic [2:0] TransmitWatermark, ReceiveWatermark;
|
||||
logic [8:0] TransmitData;
|
||||
logic [1:0] InterruptEnable, InterruptPending;
|
||||
|
||||
//Bus interface signals
|
||||
// Bus interface signals
|
||||
logic [7:0] Entry;
|
||||
logic Memwrite;
|
||||
logic [31:0] Din, Dout;
|
||||
logic TransmitInactive; //High when there is no transmission, used as hardware interlock signal
|
||||
logic TransmitInactive; // High when there is no transmission, used as hardware interlock signal
|
||||
|
||||
//FIFO FSM signals
|
||||
//Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1]
|
||||
// FIFO FSM signals
|
||||
// Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1]
|
||||
logic TransmitWriteMark, TransmitReadMark, RecieveWriteMark, RecieveReadMark;
|
||||
logic TransmitFIFOWriteFull, TransmitFIFOReadEmpty;
|
||||
logic TransmitFIFOReadIncrement;
|
||||
@ -83,75 +74,68 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
logic ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty;
|
||||
logic [7:0] TransmitFIFOReadData, ReceiveFIFOWriteData;
|
||||
logic [2:0] TransmitWriteWatermarkLevel, ReceiveReadWatermarkLevel;
|
||||
logic [7:0] ReceiveShiftRegEndian; //reverses ReceiveShiftReg if Format[2] set (little endian transmission)
|
||||
logic [7:0] ReceiveShiftRegEndian; // Reverses ReceiveShiftReg if Format[2] set (little endian transmission)
|
||||
|
||||
//Transmission signals
|
||||
// Transmission signals
|
||||
logic sck;
|
||||
logic [11:0] DivCounter; //counter for sck
|
||||
logic SCLKenable; //flip flop enable high every sclk edge
|
||||
logic [11:0] DivCounter; // Counter for sck
|
||||
logic SCLKenable; // Flip flop enable high every sclk edge
|
||||
|
||||
//Delay signals
|
||||
logic [8:0] ImplicitDelay1; //Adds implicit delay to cs-sck delay counter based on phase
|
||||
logic [8:0] ImplicitDelay2; //Adds implicit delay to sck-cs delay counter based on phase
|
||||
logic [8:0] CS_SCKCount; //Counter for cs-sck delay
|
||||
logic [8:0] SCK_CSCount; //Counter for sck-cs delay
|
||||
logic [8:0] InterCSCount; //Counter for inter cs delay
|
||||
logic [8:0] InterXFRCount; //Counter for inter xfr delay
|
||||
logic CS_SCKCompare; //Boolean comparison signal, high when CS_SCKCount >= cs-sck delay
|
||||
logic SCK_CSCompare; //Boolean comparison signal, high when SCK_CSCount >= sck-cs delay
|
||||
logic InterCSCompare; //Boolean comparison signal, high when InterCSCount >= inter cs delay
|
||||
logic InterXFRCompare; //Boolean comparison signal, high when InterXFRCount >= inter xfr delay
|
||||
logic ZeroDelayHoldMode; //High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
|
||||
// Delay signals
|
||||
logic [8:0] ImplicitDelay1; // Adds implicit delay to cs-sck delay counter based on phase
|
||||
logic [8:0] ImplicitDelay2; // Adds implicit delay to sck-cs delay counter based on phase
|
||||
logic [8:0] CS_SCKCount; // Counter for cs-sck delay
|
||||
logic [8:0] SCK_CSCount; // Counter for sck-cs delay
|
||||
logic [8:0] InterCSCount; // Counter for inter cs delay
|
||||
logic [8:0] InterXFRCount; // Counter for inter xfr delay
|
||||
logic ZeroDelayHoldMode; // High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
|
||||
|
||||
//Frame counting signals
|
||||
logic [3:0] FrameCount; //Counter for number of frames in transmission
|
||||
logic FrameCompare; //Boolean comparison signal, high when FrameCount = Format[7:4]
|
||||
logic [3:0] ReceivePenultimateFrame; //Frame number - 1
|
||||
logic [3:0] ReceivePenultimateFrameCount; //Counter
|
||||
logic ReceivePenultimateFrameBoolean; //High when penultimate frame in transmission has been reached
|
||||
// Frame counting signals
|
||||
logic [3:0] FrameCount; // Counter for number of frames in transmission
|
||||
logic [3:0] ReceivePenultimateFrameCount; // Counter
|
||||
logic ReceivePenultimateFrame; // High when penultimate frame in transmission has been reached
|
||||
|
||||
//State fsm signals
|
||||
logic Active; //High when state is either Active1 or Active0 (during transmission)
|
||||
logic Active0; //High when state is Active0
|
||||
// State fsm signals
|
||||
logic Active; // High when state is either Active1 or Active0 (during transmission)
|
||||
logic Active0; // High when state is Active0
|
||||
|
||||
//Shift reg signals
|
||||
logic ShiftEdge; //Determines which edge of sck to shift from TransmitShiftReg
|
||||
logic [7:0] TransmitShiftReg; //Transmit shift register
|
||||
logic [7:0] ReceiveShiftReg; //Receive shift register
|
||||
logic SampleEdge; //Determines which edge of sck to sample from ReceiveShiftReg
|
||||
logic [7:0] TransmitDataEndian; //Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB
|
||||
logic TransmitShiftRegLoad; //Determines when to load TransmitShiftReg
|
||||
logic ReceiveShiftFull; //High when receive shift register is full
|
||||
logic TransmitShiftEmpty; //High when transmit shift register is empty
|
||||
logic ShiftIn; //Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST)
|
||||
logic [3:0] LeftShiftAmount; //Determines left shift amount to left-align data when little endian
|
||||
logic [7:0] ASR; //AlignedReceiveShiftReg
|
||||
// Shift reg signals
|
||||
logic ShiftEdge; // Determines which edge of sck to shift from TransmitShiftReg
|
||||
logic [7:0] TransmitShiftReg; // Transmit shift register
|
||||
logic [7:0] ReceiveShiftReg; // Receive shift register
|
||||
logic SampleEdge; // Determines which edge of sck to sample from ReceiveShiftReg
|
||||
logic [7:0] TransmitDataEndian; // Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB
|
||||
logic TransmitShiftRegLoad; // Determines when to load TransmitShiftReg
|
||||
logic ReceiveShiftFull; // High when receive shift register is full
|
||||
logic TransmitShiftEmpty; // High when transmit shift register is empty
|
||||
logic ShiftIn; // Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST)
|
||||
logic [3:0] LeftShiftAmount; // Determines left shift amount to left-align data when little endian
|
||||
logic [7:0] ASR; // AlignedReceiveShiftReg
|
||||
|
||||
//CS signals
|
||||
logic [3:0] ChipSelectAuto; //Assigns ChipSelect value to selected CS signal based on CS ID
|
||||
logic [3:0] ChipSelectInternal; //Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef
|
||||
logic DelayMode; //Determines where to place implicit half cycle delay based on sck phase for CS assertion
|
||||
// CS signals
|
||||
logic [3:0] ChipSelectAuto; // Assigns ChipSelect value to selected CS signal based on CS ID
|
||||
logic [3:0] ChipSelectInternal; // Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef
|
||||
logic DelayMode; // Determines where to place implicit half cycle delay based on sck phase for CS assertion
|
||||
|
||||
//Miscellaneous signals delayed/early by 1 PCLK cycle
|
||||
logic ReceiveShiftFullDelay; //Delays ReceiveShiftFull signal by 1 PCLK cycle
|
||||
logic TransmitFIFOWriteIncrementDelay; //TransmitFIFOWriteIncrement delayed by 1 PCLK cycle
|
||||
logic ReceiveShiftFullDelayPCLK; //ReceiveShiftFull delayed by 1 PCLK cycle
|
||||
// Miscellaneous signals delayed/early by 1 PCLK cycle
|
||||
logic ReceiveShiftFullDelay; // Delays ReceiveShiftFull signal by 1 PCLK cycle
|
||||
logic ReceiveShiftFullDelayPCLK; // ReceiveShiftFull delayed by 1 PCLK cycle
|
||||
logic TransmitFIFOReadEmptyDelay;
|
||||
logic SCLKenableEarly; //SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
|
||||
logic SCLKenableEarly; // SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
|
||||
|
||||
//APB access
|
||||
assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses
|
||||
assign Memwrite = PWRITE & PENABLE & PSEL; // only write in access phase
|
||||
assign PREADY = TransmitInactive; // tie PREADY to transmission for hardware interlock
|
||||
// APB access
|
||||
assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses
|
||||
assign Memwrite = PWRITE & PENABLE & PSEL; // Only write in access phase
|
||||
assign PREADY = TransmitInactive; // Tie PREADY to transmission for hardware interlock
|
||||
|
||||
//Account for subword read/write circuitry
|
||||
// Account for subword read/write circuitry
|
||||
// -- Note SPI registers are 32 bits no matter what; access them with LW SW.
|
||||
|
||||
assign Din = PWDATA[31:0];
|
||||
if (P.XLEN == 64) assign PRDATA = {Dout, Dout};
|
||||
else assign PRDATA = Dout;
|
||||
|
||||
//Register access
|
||||
// Register access
|
||||
always_ff@(posedge PCLK, negedge PRESETn)
|
||||
if (~PRESETn) begin
|
||||
SckDiv <= #1 12'd3;
|
||||
@ -167,13 +151,12 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
ReceiveWatermark <= #1 3'b0;
|
||||
InterruptEnable <= #1 2'b0;
|
||||
InterruptPending <= #1 2'b0;
|
||||
end else begin //writes
|
||||
//According to FU540 spec: Once interrupt is pending, it will remain set until number
|
||||
//of entries in tx/rx fifo is strictly more/less than tx/rxmark
|
||||
end else begin // writes
|
||||
|
||||
|
||||
/* verilator lint_off CASEINCOMPLETE */
|
||||
if (Memwrite & TransmitInactive)
|
||||
case(Entry) //flop to sample inputs
|
||||
case(Entry) // flop to sample inputs
|
||||
8'h00: SckDiv <= Din[11:0];
|
||||
8'h04: SckMode <= Din[1:0];
|
||||
8'h10: ChipSelectID <= Din[1:0];
|
||||
@ -188,18 +171,21 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
8'h70: InterruptEnable <= Din[1:0];
|
||||
endcase
|
||||
/* verilator lint_off CASEINCOMPLETE */
|
||||
//interrupt clearance
|
||||
|
||||
// According to FU540 spec: Once interrupt is pending, it will remain set until number
|
||||
// of entries in tx/rx fifo is strictly more/less than tx/rxmark
|
||||
InterruptPending[0] <= TransmitReadMark;
|
||||
InterruptPending[1] <= RecieveWriteMark;
|
||||
case(Entry) // flop to sample inputs
|
||||
|
||||
case(Entry) // Flop to sample inputs
|
||||
8'h00: Dout <= #1 {20'b0, SckDiv};
|
||||
8'h04: Dout <= #1 {30'b0, SckMode};
|
||||
8'h10: Dout <= #1 {30'b0, ChipSelectID};
|
||||
8'h14: Dout <= #1 {28'b0, ChipSelectDef};
|
||||
8'h18: Dout <= #1 {30'b0, ChipSelectMode};
|
||||
8'h28: Dout <= {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]};
|
||||
8'h2C: Dout <= {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]};
|
||||
8'h40: Dout <= {12'b0, Format[4:1], 13'b0, Format[0], 2'b0};
|
||||
8'h28: Dout <= #1 {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]};
|
||||
8'h2C: Dout <= #1 {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]};
|
||||
8'h40: Dout <= #1 {12'b0, Format[4:1], 13'b0, Format[0], 2'b0};
|
||||
8'h48: Dout <= #1 {23'b0, TransmitFIFOWriteFull, 8'b0};
|
||||
8'h4C: Dout <= #1 {23'b0, ReceiveFIFOReadEmpty, ReceiveData[7:0]};
|
||||
8'h50: Dout <= #1 {29'b0, TransmitWatermark};
|
||||
@ -210,8 +196,9 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
endcase
|
||||
end
|
||||
|
||||
//SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1))
|
||||
//Generates a high signal at the rising and falling edge of SCLK by counting from 0 to SckDiv
|
||||
// SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1))
|
||||
// Asserts SCLKenable at the rising and falling edge of SCLK by counting from 0 to SckDiv
|
||||
// Active at 2x SCLK frequency to account for implicit half cycle delays and actions on both clock edges depending on phase
|
||||
assign SCLKenable = (DivCounter == SckDiv);
|
||||
assign SCLKenableEarly = ((DivCounter + 12'b1) == SckDiv);
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
@ -219,44 +206,38 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
else if (SCLKenable) DivCounter <= 0;
|
||||
else DivCounter <= DivCounter + 12'b1;
|
||||
|
||||
//Boolean logic that tracks frame progression
|
||||
assign FrameCompare = (FrameCount < Format[4:1]);
|
||||
assign ReceivePenultimateFrameBoolean = ((FrameCount + 4'b0001) == Format[4:1]);
|
||||
// Asserts when transmission is one frame before complete
|
||||
assign ReceivePenultimateFrame = ((FrameCount + 4'b0001) == Format[4:1]);
|
||||
|
||||
//Computing delays
|
||||
// Computing delays
|
||||
// When sckmode.pha = 0, an extra half-period delay is implicit in the cs-sck delay, and vice-versa for sck-cs
|
||||
assign ImplicitDelay1 = SckMode[0] ? 9'b0 : 9'b1;
|
||||
assign ImplicitDelay2 = SckMode[0] ? 9'b1 : 9'b0;
|
||||
|
||||
assign CS_SCKCompare = CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1);
|
||||
assign SCK_CSCompare = SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2);
|
||||
assign InterCSCompare = (InterCSCount >= ({Delay1[7:0],1'b0}));
|
||||
assign InterXFRCompare = (InterXFRCount >= ({Delay1[15:8], 1'b0}));
|
||||
// Calculate when tx/rx shift registers are full/empty
|
||||
TransmitShiftFSM TransmitShiftFSM(PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0, TransmitShiftEmpty);
|
||||
ReceiveShiftFSM ReceiveShiftFSM(PCLK, PRESETn, SCLKenable, ReceivePenultimateFrame, SampleEdge, SckMode[0], ReceiveShiftFull);
|
||||
|
||||
//Calculate when tx/rx shift registers are full/empty
|
||||
TransmitShiftFSM TransmitShiftFSM_1 (PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0, TransmitShiftEmpty);
|
||||
ReceiveShiftFSM ReceiveShiftFSM_1 (PCLK, PRESETn, SCLKenable, ReceivePenultimateFrameBoolean, SampleEdge, SckMode[0], ReceiveShiftFull);
|
||||
|
||||
//Calculate tx/rx fifo write and recieve increment signals
|
||||
assign TransmitFIFOWriteIncrement = (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive);
|
||||
// Calculate tx/rx fifo write and recieve increment signals
|
||||
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if (~PRESETn) TransmitFIFOWriteIncrementDelay <= 0;
|
||||
else TransmitFIFOWriteIncrementDelay <= TransmitFIFOWriteIncrement;
|
||||
if (~PRESETn) TransmitFIFOWriteIncrement <= 0;
|
||||
else TransmitFIFOWriteIncrement <= (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive);
|
||||
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if (~PRESETn) ReceiveFIFOReadIncrement <= 0;
|
||||
else ReceiveFIFOReadIncrement <= ((Entry == 8'h4C) & ~ReceiveFIFOReadEmpty & PSEL & ~ReceiveFIFOReadIncrement);
|
||||
|
||||
//Tx/Rx FIFOs
|
||||
SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrementDelay, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0], TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark);
|
||||
SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel, ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark);
|
||||
// Tx/Rx FIFOs
|
||||
SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrement, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0],
|
||||
TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark);
|
||||
SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel,
|
||||
ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark);
|
||||
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if (~PRESETn) TransmitFIFOReadEmptyDelay <= 1;
|
||||
else if (SCLKenable) TransmitFIFOReadEmptyDelay <= TransmitFIFOReadEmpty;
|
||||
|
||||
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if (~PRESETn) ReceiveShiftFullDelay <= 0;
|
||||
else if (SCLKenable) ReceiveShiftFullDelay <= ReceiveShiftFull;
|
||||
@ -266,16 +247,16 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
|
||||
assign TransmitShiftRegLoad = ~TransmitShiftEmpty & ~Active | (((ChipSelectMode == 2'b10) & ~|(Delay1[15:8])) & ((ReceiveShiftFullDelay | ReceiveShiftFull) & ~SampleEdge & ~TransmitFIFOReadEmpty));
|
||||
|
||||
//Main FSM which controls SPI transmission
|
||||
// Main FSM which controls SPI transmission
|
||||
typedef enum logic [2:0] {CS_INACTIVE, DELAY_0, ACTIVE_0, ACTIVE_1, DELAY_1,INTER_CS, INTER_XFR} statetype;
|
||||
statetype state;
|
||||
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if (~PRESETn) begin state <= CS_INACTIVE;
|
||||
if (~PRESETn) begin
|
||||
state <= CS_INACTIVE;
|
||||
FrameCount <= 4'b0;
|
||||
|
||||
/* verilator lint_off CASEINCOMPLETE */
|
||||
end else if (SCLKenable) begin
|
||||
/* verilator lint_off CASEINCOMPLETE */
|
||||
case (state)
|
||||
CS_INACTIVE: begin
|
||||
CS_SCKCount <= 9'b1;
|
||||
@ -288,7 +269,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
end
|
||||
DELAY_0: begin
|
||||
CS_SCKCount <= CS_SCKCount + 9'b1;
|
||||
if (CS_SCKCompare) state <= ACTIVE_0;
|
||||
if (CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1)) state <= ACTIVE_0;
|
||||
end
|
||||
ACTIVE_0: begin
|
||||
FrameCount <= FrameCount + 4'b1;
|
||||
@ -296,7 +277,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
end
|
||||
ACTIVE_1: begin
|
||||
InterXFRCount <= 9'b1;
|
||||
if (FrameCompare) state <= ACTIVE_0;
|
||||
if (FrameCount < Format[4:1]) state <= ACTIVE_0;
|
||||
else if ((ChipSelectMode[1:0] == 2'b10) & ~|(Delay1[15:8]) & (~TransmitFIFOReadEmpty)) begin
|
||||
state <= ACTIVE_0;
|
||||
CS_SCKCount <= 9'b1;
|
||||
@ -310,11 +291,11 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
end
|
||||
DELAY_1: begin
|
||||
SCK_CSCount <= SCK_CSCount + 9'b1;
|
||||
if (SCK_CSCompare) state <= INTER_CS;
|
||||
if (SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2)) state <= INTER_CS;
|
||||
end
|
||||
INTER_CS: begin
|
||||
InterCSCount <= InterCSCount + 9'b1;
|
||||
if (InterCSCompare ) state <= CS_INACTIVE;
|
||||
if (InterCSCount >= ({Delay1[7:0],1'b0})) state <= CS_INACTIVE;
|
||||
end
|
||||
INTER_XFR: begin
|
||||
CS_SCKCount <= 9'b1;
|
||||
@ -322,13 +303,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
FrameCount <= 4'b0;
|
||||
InterCSCount <= 9'b10;
|
||||
InterXFRCount <= InterXFRCount + 9'b1;
|
||||
if (InterXFRCompare & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0;
|
||||
if ((InterXFRCount >= ({Delay1[15:8], 1'b0})) & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0;
|
||||
else if (~|ChipSelectMode[1:0]) state <= CS_INACTIVE;
|
||||
end
|
||||
endcase
|
||||
/* verilator lint_off CASEINCOMPLETE */
|
||||
end
|
||||
|
||||
/* verilator lint_off CASEINCOMPLETE */
|
||||
|
||||
|
||||
assign DelayMode = SckMode[0] ? (state == DELAY_1) : (state == ACTIVE_1 & ReceiveShiftFull);
|
||||
assign ChipSelectInternal = (state == CS_INACTIVE | state == INTER_CS | DelayMode & ~|(Delay0[15:8])) ? ChipSelectDef : ~ChipSelectDef;
|
||||
@ -339,7 +321,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
assign TransmitInactive = ((state == INTER_CS) | (state == CS_INACTIVE) | (state == INTER_XFR) | (ReceiveShiftFullDelayPCLK & ZeroDelayHoldMode));
|
||||
assign Active0 = (state == ACTIVE_0);
|
||||
|
||||
//Signal tracks which edge of sck to shift data
|
||||
// Signal tracks which edge of sck to shift data
|
||||
always_comb
|
||||
case(SckMode[1:0])
|
||||
2'b00: ShiftEdge = ~sck & SCLKenable;
|
||||
@ -349,36 +331,36 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
default: ShiftEdge = sck & SCLKenable;
|
||||
endcase
|
||||
|
||||
//Transmit shift register
|
||||
assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0];
|
||||
// Transmit shift register
|
||||
assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0];
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if(~PRESETn) TransmitShiftReg <= 8'b0;
|
||||
else if (TransmitShiftRegLoad) TransmitShiftReg <= TransmitDataEndian;
|
||||
else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0};
|
||||
else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0};
|
||||
|
||||
assign SPIOut = TransmitShiftReg[7];
|
||||
|
||||
//If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn
|
||||
//There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges
|
||||
// If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn
|
||||
// There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges
|
||||
assign ShiftIn = P.SPI_LOOPBACK_TEST ? SPIOut : SPIIn;
|
||||
|
||||
//Receive shift register
|
||||
// Receive shift register
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if(~PRESETn) ReceiveShiftReg <= 8'b0;
|
||||
else if (SampleEdge & SCLKenable) begin
|
||||
if (~Active) ReceiveShiftReg <= 8'b0;
|
||||
else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn};
|
||||
if (~Active) ReceiveShiftReg <= 8'b0;
|
||||
else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn};
|
||||
end
|
||||
|
||||
//Aligns received data and reverses if little-endian
|
||||
// Aligns received data and reverses if little-endian
|
||||
assign LeftShiftAmount = 4'h8 - Format[4:1];
|
||||
assign ASR = ReceiveShiftReg << LeftShiftAmount[2:0];
|
||||
assign ReceiveShiftRegEndian = Format[0] ? {ASR[0], ASR[1], ASR[2], ASR[3], ASR[4], ASR[5], ASR[6], ASR[7]} : ASR[7:0];
|
||||
|
||||
//Interrupt logic: raise interrupt if any enabled interrupts are pending
|
||||
// Interrupt logic: raise interrupt if any enabled interrupts are pending
|
||||
assign SPIIntr = |(InterruptPending & InterruptEnable);
|
||||
|
||||
//Chip select logic
|
||||
// Chip select logic
|
||||
always_comb
|
||||
case(ChipSelectID[1:0])
|
||||
2'b00: ChipSelectAuto = {ChipSelectDef[3], ChipSelectDef[2], ChipSelectDef[1], ChipSelectInternal[0]};
|
||||
@ -390,14 +372,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||
assign SPICS = ChipSelectMode[0] ? ChipSelectDef : ChipSelectAuto;
|
||||
endmodule
|
||||
|
||||
module SynchFIFO #(parameter M =3 , N= 8)(
|
||||
input logic PCLK, wen, ren, PRESETn,
|
||||
input logic winc,rinc,
|
||||
input logic [N-1:0] wdata,
|
||||
input logic [M-1:0] wwatermarklevel, rwatermarklevel,
|
||||
module SynchFIFO #(parameter M=3, N=8)( // 2^M entries of N bits each
|
||||
input logic PCLK, wen, ren, PRESETn,
|
||||
input logic winc, rinc,
|
||||
input logic [N-1:0] wdata,
|
||||
input logic [M-1:0] wwatermarklevel, rwatermarklevel,
|
||||
output logic [N-1:0] rdata,
|
||||
output logic wfull, rempty,
|
||||
output logic wwatermark, rwatermark);
|
||||
output logic wfull, rempty,
|
||||
output logic wwatermark, rwatermark);
|
||||
|
||||
/* Pointer FIFO using design elements from "Simulation and Synthesis Techniques
|
||||
for Asynchronous FIFO Design" by Clifford E. Cummings. Namely, M bit read and write pointers
|
||||
@ -409,8 +391,6 @@ module SynchFIFO #(parameter M =3 , N= 8)(
|
||||
logic [N-1:0] mem[2**M];
|
||||
logic [M:0] rptr, wptr;
|
||||
logic [M:0] rptrnext, wptrnext;
|
||||
logic rempty_val;
|
||||
logic wfull_val;
|
||||
logic [M-1:0] raddr;
|
||||
logic [M-1:0] waddr;
|
||||
|
||||
@ -428,53 +408,43 @@ module SynchFIFO #(parameter M =3 , N= 8)(
|
||||
end
|
||||
else begin
|
||||
if (wen) begin
|
||||
wfull <= wfull_val;
|
||||
wfull <= ({~wptrnext[M], wptrnext[M-1:0]} == rptr);
|
||||
wptr <= wptrnext;
|
||||
end
|
||||
if (ren) begin
|
||||
rptr <= rptrnext;
|
||||
rempty <= rempty_val;
|
||||
rempty <= (wptr == rptrnext);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign raddr = rptr[M-1:0];
|
||||
assign rptrnext = rptr + {3'b0, (rinc & ~rempty)};
|
||||
assign rempty_val = (wptr == rptrnext);
|
||||
assign rptrnext = rptr + {{(M){1'b0}}, (rinc & ~rempty)};
|
||||
assign rwatermark = ((waddr - raddr) < rwatermarklevel) & ~wfull;
|
||||
assign waddr = wptr[M-1:0];
|
||||
assign wwatermark = ((waddr - raddr) > wwatermarklevel) | wfull;
|
||||
assign wptrnext = wptr + {3'b0, (winc & ~wfull)};
|
||||
assign wfull_val = ({~wptrnext[M], wptrnext[M-1:0]} == rptr);
|
||||
assign wptrnext = wptr + {{(M){1'b0}}, (winc & ~wfull)};
|
||||
endmodule
|
||||
|
||||
module TransmitShiftFSM(
|
||||
input logic PCLK, PRESETn,
|
||||
input logic TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0,
|
||||
input logic PCLK, PRESETn,
|
||||
input logic TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0,
|
||||
output logic TransmitShiftEmpty);
|
||||
|
||||
typedef enum logic [1:0] {TransmitShiftEmptyState, TransmitShiftHoldState, TransmitShiftNotEmptyState} statetype;
|
||||
statetype TransmitState, TransmitNextState;
|
||||
always_ff @(posedge PCLK, negedge PRESETn)
|
||||
if (~PRESETn) TransmitState <= TransmitShiftEmptyState;
|
||||
else TransmitState <= TransmitNextState;
|
||||
if (~PRESETn) TransmitShiftEmpty <= 1;
|
||||
else if (TransmitShiftEmpty) begin
|
||||
if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrame & Active0))) TransmitShiftEmpty <= 1;
|
||||
else if (~TransmitFIFOReadEmpty) TransmitShiftEmpty <= 0;
|
||||
end else begin
|
||||
if (ReceivePenultimateFrame & Active0) TransmitShiftEmpty <= 1;
|
||||
else TransmitShiftEmpty <= 0;
|
||||
end
|
||||
|
||||
always_comb
|
||||
case(TransmitState)
|
||||
TransmitShiftEmptyState: begin
|
||||
if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrameBoolean & Active0))) TransmitNextState = TransmitShiftEmptyState;
|
||||
else if (~TransmitFIFOReadEmpty) TransmitNextState = TransmitShiftNotEmptyState;
|
||||
end
|
||||
TransmitShiftNotEmptyState: begin
|
||||
if (ReceivePenultimateFrameBoolean & Active0) TransmitNextState = TransmitShiftEmptyState;
|
||||
else TransmitNextState = TransmitShiftNotEmptyState;
|
||||
end
|
||||
endcase
|
||||
assign TransmitShiftEmpty = (TransmitNextState == TransmitShiftEmptyState);
|
||||
endmodule
|
||||
|
||||
module ReceiveShiftFSM(
|
||||
input logic PCLK, PRESETn, SCLKenable,
|
||||
input logic ReceivePenultimateFrameBoolean, SampleEdge, SckMode,
|
||||
input logic PCLK, PRESETn, SCLKenable,
|
||||
input logic ReceivePenultimateFrame, SampleEdge, SckMode,
|
||||
output logic ReceiveShiftFull
|
||||
);
|
||||
typedef enum logic [1:0] {ReceiveShiftFullState, ReceiveShiftNotFullState, ReceiveShiftDelayState} statetype;
|
||||
@ -484,17 +454,12 @@ module ReceiveShiftFSM(
|
||||
else if (SCLKenable) begin
|
||||
case (ReceiveState)
|
||||
ReceiveShiftFullState: ReceiveState <= ReceiveShiftNotFullState;
|
||||
ReceiveShiftNotFullState: if (ReceivePenultimateFrameBoolean & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState;
|
||||
ReceiveShiftNotFullState: if (ReceivePenultimateFrame & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState;
|
||||
else ReceiveState <= ReceiveShiftNotFullState;
|
||||
ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState;
|
||||
ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState);
|
||||
assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState);
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -264,7 +264,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
|
||||
end
|
||||
|
||||
// global stall and flush control
|
||||
hazard hzu(
|
||||
hazard #(P) hzu(
|
||||
.BPWrongE, .CSRWriteFenceM, .RetM, .TrapM,
|
||||
.LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD,
|
||||
.LSUStallM, .IFUStallF,
|
||||
|
@ -11,7 +11,7 @@ export MOD ?= orig
|
||||
# title to add a note in the synth's directory name
|
||||
TITLE =
|
||||
# tsmc28, sky130, and sky90 presently supported
|
||||
export TECH ?= sky90
|
||||
export TECH ?= sky130
|
||||
# MAXCORES allows parallel compilation, which is faster but less CPU-efficient
|
||||
# Avoid when doing sweeps of many optimization points in parallel
|
||||
export MAXCORES ?= 1
|
||||
@ -20,7 +20,7 @@ export MAXCORES ?= 1
|
||||
export MAXOPT ?= 0
|
||||
export DRIVE ?= FLOP
|
||||
export USESRAM ?= 0
|
||||
|
||||
export WIDTH ?= 32
|
||||
|
||||
time := $(shell date +%F-%H-%M)
|
||||
hash := $(shell git rev-parse --short HEAD)
|
||||
@ -94,10 +94,10 @@ endif
|
||||
|
||||
ifneq ($(MOD), orig)
|
||||
# PMP 0
|
||||
sed -i 's/PMP_ENTRIES \(64\|16\|0\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh
|
||||
sed -i 's/PMP_ENTRIES.*\(64\|16\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh
|
||||
ifneq ($(MOD), PMP0)
|
||||
# no priv
|
||||
sed -i 's/ZICSR_SUPPORTED *1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh
|
||||
sed -i 's/ZICSR_SUPPORTED.*1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh
|
||||
ifneq ($(MOD), noPriv)
|
||||
# turn off FPU
|
||||
sed -i 's/1 *<< *3/0 << 3/' $(CONFIGDIR)/config.vh
|
||||
@ -147,4 +147,4 @@ clean:
|
||||
rm -f power.saif
|
||||
rm -f Synopsys_stack_trace_*.txt
|
||||
rm -f crte_*.txt
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@ This subdirectory contains synthesis scripts for use with Synopsys
|
||||
scripts/synth.tcl.
|
||||
|
||||
Example Usage
|
||||
make synth DESIGN=wallypipelinedcore FREQ=500
|
||||
make synth DESIGN=wallypipelinedcore FREQ=500 CONFIG=rv32e
|
||||
|
||||
environment variables
|
||||
|
||||
@ -38,5 +38,25 @@ To run ppa analysis that hones into target frequency, you can type:
|
||||
python3 ppa/ppaSynth.py from the synthDC directory. This runs a sweep
|
||||
across all modules listed at the bottom of the ppaSynth.py file.
|
||||
|
||||
Two options for running the sweep. The first run runs all modules for
|
||||
all techs around a given frequency (i.e., freqs). The second option
|
||||
will run all designs for the specific module based on bestSynths.csv
|
||||
values. Since the second option is 2nd, it has priority. If the
|
||||
second set of values is commented out, it will run all widths.
|
||||
|
||||
WARNING: The first option may runs lots of runs that could expend all
|
||||
the licenses available for a license. Therefore, care must be taken
|
||||
to be sure that enough licenses are available for this first option.
|
||||
|
||||
##### Run specific syntheses
|
||||
widths = [8, 16, 32, 64, 128]
|
||||
modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8']
|
||||
techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn']
|
||||
freqs = [5000]
|
||||
synthsToRun = allCombos(widths, modules, techs, freqs)
|
||||
|
||||
##### Run a sweep based on best delay found in existing syntheses
|
||||
module = 'adder'
|
||||
width = 32
|
||||
tech = 'tsmc28psyn'
|
||||
synthsToRun = freqSweep(module, width, tech)
|
@ -252,7 +252,7 @@ if __name__ == '__main__':
|
||||
|
||||
TechSpec = namedtuple("TechSpec", "color shape targfreq fo4 add32area add32lpower add32denergy")
|
||||
techdict = {}
|
||||
techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 1440.600027, 714.057, 0.658023)
|
||||
techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 2581, 18, 0.685)
|
||||
techdict['sky90'] = TechSpec('gray', 'o', args.sky90freq, 43.2e-3, 1440.600027, 714.057, 0.658023)
|
||||
techdict['tsmc28psyn'] = TechSpec('blue', 's', args.tsmcfreq, 12.2e-3, 209.286002, 1060.0, .081533)
|
||||
|
||||
|
@ -1,24 +1,74 @@
|
||||
Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (nJ)
|
||||
priorityencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078
|
||||
priorityencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348
|
||||
priorityencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111
|
||||
priorityencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981
|
||||
priorityencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861
|
||||
add,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422
|
||||
add,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417
|
||||
add,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014
|
||||
add,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874
|
||||
add,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755
|
||||
binencoder,sky130,8,1000,1.0000,50.960001,24.761,0.010685929975270078
|
||||
binencoder,sky130,16,1000,1.0000,136.220003,77.243,0.021773774467348
|
||||
binencoder,sky130,32,1000,1.0000,372.400007,189.626,0.04371111111111111
|
||||
binencoder,sky130,64,1000,1.0000,797.720015,382.205,0.07393850658857981
|
||||
binencoder,sky130,128,900,1.1111,1602.300031,610.009,0.1261366969785861
|
||||
adder,sky130,8,1700,0.588235,253.820005,154.438,0.10825587752870422
|
||||
adder,sky130,16,1300,0.7692307,722.260013,485.109,0.32460910944935417
|
||||
adder,sky130,32,1100,0.90909,1440.600027,714.057,0.6580226904376014
|
||||
adder,sky130,64,950,1.0526315,2781.240054,1050.0,0.9392239364188874
|
||||
adder,sky130,128,900,1.1111,6186.740118,2230.0,2.1480106100795755
|
||||
csa,sky130,8,1000,1.0000,266.560005,154.202,0.13650573115665163
|
||||
csa,sky130,16,1000,1.0000,533.12001,308.404,0.27263530601922104
|
||||
csa,sky130,32,1000,1.0000,1066.240021,616.808,0.5448072247308093
|
||||
csa,sky130,64,1000,1.0000,2132.480042,1230.0,1.0905412240768841
|
||||
csa,sky130,128,1000,1.0000,4264.960083,2470.0,2.178553363682347
|
||||
shifter,sky130,8,1000,1.0000,259.700005,196.451,0.07534088282874972
|
||||
shifter,sky130,16,1000,1.0000,666.400006,558.433,0.19552906110283155
|
||||
shifter,sky130,32,1000,1.0000,1475.880027,768.262,0.3807431082700759
|
||||
shifter,sky130,64,1000,1.0000,3914.120062,2680.0,1.144802541988198
|
||||
shifter,sky130,128,900,1.1111,9192.400136,6080.0,2.9008914525432616
|
||||
comparator,sky130,8,1700,0.588235,200.900004,136.6,0.05001033271337053
|
||||
comparator,sky130,16,1500,0.6666667,358.680007,189.253,0.06321553011448482
|
||||
comparator,sky130,32,1300,0.7692307,690.900013,315.709,0.10771793448084398
|
||||
comparator,sky130,64,1200,0.8333333,1372.980026,508.393,0.2048577820389901
|
||||
comparator,sky130,128,1150,0.869565,2744.980052,796.047,0.34396273737011823
|
||||
flop,sky130,8,1000,1.0000,133.279999,64.8145,0.193835
|
||||
flop,sky130,16,1000,1.0000,266.5599975,129.629,0.38715000000000005
|
||||
flop,sky130,32,1000,1.0000,533.119995,259.258,0.7723000000000001
|
||||
flop,sky130,64,1000,1.0000,1066.23999,520.0,1.54955
|
||||
flop,sky130,128,1000,1.0000,2132.4799805,1035.0,3.094
|
||||
mux2,sky130,8,1000,1.0000,63.700001,21.541,0.01932440083034535
|
||||
mux2,sky130,16,1000,1.0000,119.560002,32.354,0.03884536082474227
|
||||
mux2,sky130,32,1000,1.0000,375.340008,259.372,0.13671796921846893
|
||||
mux2,sky130,64,1000,1.0000,479.220009,115.22,0.15148539160324087
|
||||
mux2,sky130,128,1000,1.0000,1302.420025,767.078,0.4665334665334665
|
||||
mux4,sky130,8,1000,1.0000,148.960002,66.984,0.04026661024121879
|
||||
mux4,sky130,16,1000,1.0000,392.0,398.313,0.1037037037037037
|
||||
mux4,sky130,32,1000,1.0000,594.860011,331.197,0.131617289946576
|
||||
mux4,sky130,64,1000,1.0000,899.640016,344.331,0.2862533692722372
|
||||
mux4,sky130,128,1000,1.0000,2013.900038,818.249,0.6094182825484764
|
||||
mux8,sky130,8,1000,1.0000,287.140006,116.648,0.06089260808926081
|
||||
mux8,sky130,16,1000,1.0000,582.120003,282.366,0.14455681142177274
|
||||
mux8,sky130,32,1000,1.0000,1319.079995,670.683,0.35777218376337316
|
||||
mux8,sky130,64,1000,1.0000,2132.48004,808.482,0.44287680660701995
|
||||
mux8,sky130,128,1000,1.0000,4575.620089,1830.0,0.9786276715410572
|
||||
mul,sky130,8,1000,1.0000,2194.220041,1440.0,1.421374045801527
|
||||
mul,sky130,16,1000,1.0000,7519.540137,4940.0,6.376128385155466
|
||||
mul,sky130,32,1000,1.0000,25200.700446,14900.0,24.931847968545217
|
||||
mul,sky130,64,1000,1.0000,86011.661365,42600.0,88.84651898734177
|
||||
mul,sky130,128,800,1.2500,296198.144128,114000.0,273.3148854961832
|
||||
binencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078
|
||||
binencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348
|
||||
binencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111
|
||||
binencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981
|
||||
binencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861
|
||||
adder,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422
|
||||
adder,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417
|
||||
adder,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014
|
||||
adder,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874
|
||||
adder,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755
|
||||
csa,sky90,8,5758,0.16536141368530738,266.560005,154.202,0.13650573115665163
|
||||
csa,sky90,16,5931,0.1654056314280897,533.12001,308.404,0.27263530601922104
|
||||
csa,sky90,32,5758,0.16536141368530738,1066.240021,616.808,0.5448072247308093
|
||||
csa,sky90,64,5931,0.1654056314280897,2132.480042,1230.0,1.0905412240768841
|
||||
csa,sky90,128,5931,0.1654056314280897,4264.960083,2470.0,2.178553363682347
|
||||
shiftleft,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972
|
||||
shiftleft,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155
|
||||
shiftleft,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759
|
||||
shiftleft,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198
|
||||
shiftleft,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616
|
||||
shifter,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972
|
||||
shifter,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155
|
||||
shifter,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759
|
||||
shifter,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198
|
||||
shifter,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616
|
||||
comparator,sky90,8,4839,0.20629126741062204,200.900004,136.6,0.05001033271337053
|
||||
comparator,sky90,16,4018,0.24806303982080635,358.680007,189.253,0.06321553011448482
|
||||
comparator,sky90,32,3602,0.276293542476402,690.900013,315.709,0.10771793448084398
|
||||
@ -44,31 +94,31 @@ mux8,sky90,16,3362,0.295237998810232,582.120003,282.366,0.14455681142177274
|
||||
mux8,sky90,32,3178,0.3140553102580239,1319.079995,670.683,0.35777218376337316
|
||||
mux8,sky90,64,2906,0.3440756228492774,2132.48004,808.482,0.44287680660701995
|
||||
mux8,sky90,128,2667,0.3749401308586427,4575.620089,1830.0,0.9786276715410572
|
||||
mult,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527
|
||||
mult,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466
|
||||
mult,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217
|
||||
mult,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177
|
||||
mult,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832
|
||||
priorityencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
|
||||
priorityencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
|
||||
priorityencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
|
||||
priorityencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
|
||||
priorityencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
|
||||
add,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
|
||||
add,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
|
||||
add,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
|
||||
add,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
|
||||
add,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731
|
||||
mul,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527
|
||||
mul,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466
|
||||
mul,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217
|
||||
mul,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177
|
||||
mul,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832
|
||||
binencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
|
||||
binencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
|
||||
binencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
|
||||
binencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
|
||||
binencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
|
||||
adder,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
|
||||
adder,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
|
||||
adder,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
|
||||
adder,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
|
||||
adder,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731
|
||||
csa,tsmc28,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921
|
||||
csa,tsmc28,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842
|
||||
csa,tsmc28,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941
|
||||
csa,tsmc28,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076
|
||||
csa,tsmc28,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363
|
||||
shiftleft,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
|
||||
shiftleft,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
|
||||
shiftleft,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
|
||||
shiftleft,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
|
||||
shiftleft,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
|
||||
shifter,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
|
||||
shifter,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
|
||||
shifter,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
|
||||
shifter,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
|
||||
shifter,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
|
||||
comparator,tsmc28,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243
|
||||
comparator,tsmc28,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673
|
||||
comparator,tsmc28,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319
|
||||
@ -94,8 +144,58 @@ mux8,tsmc28,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262
|
||||
mux8,tsmc28,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814
|
||||
mux8,tsmc28,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495
|
||||
mux8,tsmc28,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426
|
||||
mult,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
|
||||
mult,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
|
||||
mult,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
|
||||
mult,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
|
||||
mult,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719
|
||||
mul,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
|
||||
mul,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
|
||||
mul,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
|
||||
mul,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
|
||||
mul,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719
|
||||
binencoder,tsmc28psyn,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
|
||||
binencoder,tsmc28psyn,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
|
||||
binencoder,tsmc28psyn,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
|
||||
binencoder,tsmc28psyn,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
|
||||
binencoder,tsmc28psyn,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
|
||||
adder,tsmc28psyn,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
|
||||
adder,tsmc28psyn,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
|
||||
adder,tsmc28psyn,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
|
||||
adder,tsmc28psyn,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
|
||||
adder,tsmc28psyn,128,7000,0.142857142857,907.452008,4360.0,0.3451183029643731
|
||||
csa,tsmc28psyn,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921
|
||||
csa,tsmc28psyn,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842
|
||||
csa,tsmc28psyn,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941
|
||||
csa,tsmc28psyn,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076
|
||||
csa,tsmc28psyn,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363
|
||||
shifter,tsmc28psyn,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
|
||||
shifter,tsmc28psyn,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
|
||||
shifter,tsmc28psyn,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
|
||||
shifter,tsmc28psyn,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
|
||||
shifter,tsmc28psyn,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
|
||||
comparator,tsmc28psyn,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243
|
||||
comparator,tsmc28psyn,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673
|
||||
comparator,tsmc28psyn,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319
|
||||
comparator,tsmc28psyn,64,11080,0.09024670758122744,294.21,1250.0,0.0684115523465704
|
||||
comparator,tsmc28psyn,128,9371,0.10671119720414043,558.432,2400.0,0.12794792444776437
|
||||
flop,tsmc28psyn,8,10,0.048889000000002625,15.12,78.6345,0.027246000000000003
|
||||
flop,tsmc28psyn,16,10,0.048889000000002625,30.24,157.29,0.054290000000000005
|
||||
flop,tsmc28psyn,32,10,0.048889000000002625,60.4799995,314.5805,0.10908000000000001
|
||||
flop,tsmc28psyn,64,10,0.048889000000002625,120.959999,630.0,0.21765500000000004
|
||||
flop,tsmc28psyn,128,10,0.048889000000002625,241.919998,1260.0,0.43579999999999997
|
||||
mux2,tsmc28psyn,8,29614,0.03374481252110488,16.758,114.564,0.005436617815897886
|
||||
mux2,tsmc28psyn,16,18767,0.053046021580433735,15.75,88.025,0.005142004582511856
|
||||
mux2,tsmc28psyn,32,17903,0.05585556035301346,32.130001,171.146,0.009897782494553985
|
||||
mux2,tsmc28psyn,64,18568,0.05371109651012495,91.35,523.884,0.027574321413183972
|
||||
mux2,tsmc28psyn,128,16637,0.05991099044298852,176.525999,941.106,0.05012923002945243
|
||||
mux4,tsmc28psyn,8,18151,0.055092383284667513,27.971999,133.963,0.008032615282904523
|
||||
mux4,tsmc28psyn,16,16486,0.06057952759917506,39.438,186.231,0.012556108213029236
|
||||
mux4,tsmc28psyn,32,15196,0.06580579126085812,69.174,324.969,0.023229797315082915
|
||||
mux4,tsmc28psyn,64,13926,0.07180612868016659,137.465999,648.086,0.04574177796926612
|
||||
mux4,tsmc28psyn,128,13090,0.07636619404125286,294.335997,1420.0,0.09358288770053477
|
||||
mux8,tsmc28psyn,8,12902,0.07750336319950395,44.604,214.286,0.0117501162610448
|
||||
mux8,tsmc28psyn,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262
|
||||
mux8,tsmc28psyn,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814
|
||||
mux8,tsmc28psyn,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495
|
||||
mux8,tsmc28psyn,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426
|
||||
mul,tsmc28psyn,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
|
||||
mul,tsmc28psyn,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
|
||||
mul,tsmc28psyn,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
|
||||
mul,tsmc28psyn,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
|
||||
mul,tsmc28psyn,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719
|
||||
|
|
File diff suppressed because it is too large
Load Diff
@ -12,13 +12,11 @@ from ppaAnalyze import synthsfromcsv
|
||||
|
||||
def runCommand(module, width, tech, freq):
|
||||
command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq)
|
||||
print('here we go')
|
||||
|
||||
subprocess.Popen(command, shell=True)
|
||||
subprocess.call(command, shell=True)
|
||||
|
||||
def deleteRedundant(synthsToRun):
|
||||
'''removes any previous runs for the current synthesis specifications'''
|
||||
synthStr = "rm -rf runs/ppa_{}_{}_rv32e_{}nm_{}_*"
|
||||
synthStr = "rm -rf runs/{}_{}_rv32e_{}_{}_*"
|
||||
for synth in synthsToRun:
|
||||
bashCommand = synthStr.format(*synth)
|
||||
outputCPL = subprocess.check_output(['bash','-c', bashCommand])
|
||||
@ -34,8 +32,21 @@ def freqSweep(module, width, tech):
|
||||
synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]]
|
||||
return synthsToRun
|
||||
|
||||
def freqModuleSweep(widths, modules, tech):
|
||||
synthsToRun = []
|
||||
arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8]
|
||||
allSynths = synthsfromcsv('ppa/bestSynths.csv')
|
||||
for w in widths:
|
||||
for module in modules:
|
||||
for synth in allSynths:
|
||||
if (synth.module == str(module)) & (synth.tech == tech) & (synth.width == w):
|
||||
f = 1000/synth.delay
|
||||
for freq in [round(f+f*x/100) for x in arr]:
|
||||
synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]]
|
||||
return synthsToRun
|
||||
|
||||
def filterRedundant(synthsToRun):
|
||||
bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
|
||||
bashCommand = "find . -path '*runs/*' -prune"
|
||||
output = subprocess.check_output(['bash','-c', bashCommand])
|
||||
specReg = re.compile('[a-zA-Z0-9]+')
|
||||
allSynths = output.decode("utf-8").split('\n')[:-1]
|
||||
@ -59,21 +70,30 @@ def allCombos(widths, modules, techs, freqs):
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
##### Run specific syntheses
|
||||
##### Run specific syntheses for a specific frequency
|
||||
widths = [8, 16, 32, 64, 128]
|
||||
modules = ['mult', 'add', 'shiftleft', 'flop', 'comparator', 'priorityencoder', 'add', 'csa', 'mux2', 'mux4', 'mux8']
|
||||
techs = ['sky90', 'tsmc28']
|
||||
modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8']
|
||||
techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn']
|
||||
freqs = [5000]
|
||||
synthsToRun = allCombos(widths, modules, techs, freqs)
|
||||
|
||||
##### Run a sweep based on best delay found in existing syntheses
|
||||
module = 'add'
|
||||
module = 'adder'
|
||||
width = 32
|
||||
tech = 'sky90'
|
||||
tech = 'tsmc28psyn'
|
||||
synthsToRun = freqSweep(module, width, tech)
|
||||
|
||||
##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses
|
||||
modules = ['adder']
|
||||
# widths = [8, 16, 32, 64, 128]
|
||||
widths = [32]
|
||||
tech = 'sky130'
|
||||
synthsToRun = freqModuleSweep(widths, modules, tech)
|
||||
|
||||
##### Only do syntheses for which a run doesn't already exist
|
||||
synthsToRun = filterRedundant(synthsToRun)
|
||||
|
||||
synthsToRun = filterRedundant(synthsToRun)
|
||||
pool = Pool(processes=25)
|
||||
pool.starmap(runCommand, synthsToRun)
|
||||
|
||||
pool.starmap(runCommand, synthsToRun)
|
||||
pool.close()
|
||||
pool.join()
|
@ -18,7 +18,6 @@ suppress_message {VER-274}
|
||||
# Enable Multicore
|
||||
set_host_options -max_cores $::env(MAXCORES)
|
||||
|
||||
|
||||
# get outputDir and configDir from environment (Makefile)
|
||||
set outputDir $::env(OUTPUTDIR)
|
||||
set cfg $::env(CONFIGDIR)
|
||||
@ -26,6 +25,7 @@ set hdl_src "../src"
|
||||
set saifpower $::env(SAIFPOWER)
|
||||
set maxopt $::env(MAXOPT)
|
||||
set drive $::env(DRIVE)
|
||||
set width $::env(WIDTH)
|
||||
|
||||
eval file copy -force [glob ${cfg}/*.vh] {$outputDir/hdl/}
|
||||
eval file copy -force [glob ${hdl_src}/cvw.sv] {$outputDir/hdl/}
|
||||
@ -88,7 +88,13 @@ if { [shell_is_in_topographical_mode] } {
|
||||
#set alib_library_analysis_path ./$outputDir
|
||||
define_design_lib WORK -path ./$outputDir/WORK
|
||||
analyze -f sverilog -lib WORK $my_verilog_files
|
||||
elaborate $my_toplevel -lib WORK
|
||||
# If wrapper=0, we want to run against a specific module and pass
|
||||
# width to DC
|
||||
if { $wrapper == 1 } {
|
||||
elaborate $my_toplevel -lib WORK
|
||||
} else {
|
||||
elaborate $my_toplevel -lib WORK -parameters WIDTH=$width
|
||||
}
|
||||
|
||||
# Set the current_design
|
||||
current_design $my_toplevel
|
||||
@ -447,4 +453,4 @@ set t2 [clock seconds]
|
||||
set t [expr $t2 - $t1]
|
||||
echo [expr $t/60]
|
||||
|
||||
quit
|
||||
quit
|
||||
|
14
synthDC/wallySynthAll.sh
Executable file
14
synthDC/wallySynthAll.sh
Executable file
@ -0,0 +1,14 @@
|
||||
# Run all Wally synthesis experiments from chapter 8
|
||||
# However, trying to run the freqsweeps at the same time maxes out licenses and some runs fail
|
||||
#./wallySynth.py --freqsweep 330 --tech sky130
|
||||
#./wallySynth.py --freqsweep 870 --tech sky90
|
||||
#./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram
|
||||
./wallySynth.py --configsweep --tech sky130 --targetfreq 330
|
||||
./wallySynth.py --configsweep --tech sky90 --targetfreq 870
|
||||
./wallySynth.py --configsweep --tech tsmc28psyn --targetfreq 2800 --usesram
|
||||
./wallySynth.py --featuresweep --tech sky130 --targetfreq 330
|
||||
./wallySynth.py --featuresweep --tech sky90 --targetfreq 870
|
||||
./wallySynth.py --featuresweep --tech tsmc28psyn --targetfreq 2800 --usesram
|
||||
# Extract summary data (run this by hand after all experiments finish)
|
||||
#./extractSummary.py --sky130freq 330 --sky90freq 870 --tsmcfreq 2800
|
||||
|
@ -115,8 +115,8 @@ module testbenchfp;
|
||||
logic FlushE;
|
||||
logic IFDivStartE;
|
||||
logic FDivDoneE;
|
||||
logic [P.NE+1:0] QeM;
|
||||
logic [P.DIVb:0] QmM;
|
||||
logic [P.NE+1:0] UeM;
|
||||
logic [P.DIVb:0] UmM;
|
||||
logic [P.XLEN-1:0] FIntDivResultM;
|
||||
logic ResMatch; // Check if result match
|
||||
logic FlagMatch; // Check if IEEE flags match
|
||||
@ -145,9 +145,12 @@ module testbenchfp;
|
||||
|
||||
initial begin
|
||||
// Information displayed for user on what is simulating
|
||||
$display("\nThe start of simulation...");
|
||||
$display("This simulation for TEST is %s", TEST);
|
||||
$display("This simulation for TEST is of the operand size of %s", TEST_SIZE);
|
||||
//$display("\nThe start of simulation...");
|
||||
//$display("This simulation for TEST is %s", TEST);
|
||||
//$display("This simulation for TEST is of the operand size of %s", TEST_SIZE);
|
||||
|
||||
// $display("FPDUR %d %d DIVN %d LOGR %d RK %d RADIX %d DURLEN %d", FPDUR, DIVN, LOGR, RK, RADIX, DURLEN);
|
||||
|
||||
if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported
|
||||
if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion
|
||||
// add the 128-bit cvtint tests to the to-be-tested list
|
||||
@ -649,7 +652,7 @@ module testbenchfp;
|
||||
string tt0;
|
||||
tt0 = $psprintf("%s", Tests[TestNum]);
|
||||
testname = {pp, tt0};
|
||||
$display("Here you are %s", testname);
|
||||
//$display("Here you are %s", testname);
|
||||
$display("\n\nRunning %s vectors ", Tests[TestNum]);
|
||||
$readmemh(testname, TestVectors);
|
||||
// set the test index to 0
|
||||
@ -705,7 +708,7 @@ module testbenchfp;
|
||||
end
|
||||
|
||||
postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
|
||||
.OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
|
||||
.OpCtrl(OpCtrlVal), .DivUm(Quot), .DivUe(DivCalcExp),
|
||||
.Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
|
||||
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
|
||||
.XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
|
||||
@ -734,8 +737,8 @@ module testbenchfp;
|
||||
.XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN),
|
||||
.FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
|
||||
.StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .QeM(DivCalcExp),
|
||||
.QmM(Quot),
|
||||
.StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp),
|
||||
.UmM(Quot),
|
||||
.FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
|
||||
.Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
|
||||
.FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
|
||||
|
@ -389,6 +389,7 @@ module testbench;
|
||||
|
||||
assign SDCCmd = SDCCmdOE ? SDCCmdOut : 1'bz;
|
||||
assign SDCCmdIn = SDCCmd;
|
||||
assign SDCDat = sd_dat_reg_t ? sd_dat_reg_o : sd_dat_i;
|
||||
assign SDCDatIn = SDCDat;
|
||||
-----/\----- EXCLUDED -----/\----- */
|
||||
assign SDCIntr = '0;
|
||||
|
Loading…
Reference in New Issue
Block a user