mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
Merge branch 'main' of https://github.com/openhwgroup/cvw
This commit is contained in:
commit
b137759b45
5
.gitignore
vendored
5
.gitignore
vendored
@ -10,7 +10,7 @@ __pycache__/
|
|||||||
addins/riscv-arch-test/Makefile.include
|
addins/riscv-arch-test/Makefile.include
|
||||||
addins/riscv-tests/target
|
addins/riscv-tests/target
|
||||||
addins/TestFloat-3e/build/Linux-x86_64-GCC/*
|
addins/TestFloat-3e/build/Linux-x86_64-GCC/*
|
||||||
benchmarks/embench/wally*.json
|
|
||||||
|
|
||||||
#vsim work files to ignore
|
#vsim work files to ignore
|
||||||
transcript
|
transcript
|
||||||
@ -175,3 +175,6 @@ tests/fp/combined_IF_vectors/IF_vectors/*.tv
|
|||||||
sim/bp-results/*.log
|
sim/bp-results/*.log
|
||||||
sim/branch*.log
|
sim/branch*.log
|
||||||
/tests/custom/fpga-test-sdc/bin/fpga-test-sdc
|
/tests/custom/fpga-test-sdc/bin/fpga-test-sdc
|
||||||
|
benchmarks/embench/wally*.json
|
||||||
|
benchmarks/embench/run*
|
||||||
|
sim/cfi.log
|
||||||
|
16
.gitmodules
vendored
16
.gitmodules
vendored
@ -1,16 +1,9 @@
|
|||||||
[submodule "sky130/sky130_osu_sc_t12"]
|
[submodule "sky130/sky130_osu_sc_t12"]
|
||||||
path = sky130/sky130_osu_sc_t12
|
path = sky130/sky130_osu_sc_t12
|
||||||
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/
|
url = https://foss-eda-tools.googlesource.com/skywater-pdk/libs/sky130_osu_sc_t12/
|
||||||
[submodule "addins/riscv-arch-test"]
|
|
||||||
path = addins/riscv-arch-test
|
|
||||||
url = https://github.com/riscv-non-isa/riscv-arch-test
|
|
||||||
ignore = dirty
|
|
||||||
[submodule "addins/imperas-riscv-tests"]
|
[submodule "addins/imperas-riscv-tests"]
|
||||||
path = addins/imperas-riscv-tests
|
path = addins/imperas-riscv-tests
|
||||||
url = https://github.com/riscv-ovpsim/imperas-riscv-tests
|
url = https://github.com/riscv-ovpsim/imperas-riscv-tests
|
||||||
[submodule "addins/riscv-tests"]
|
|
||||||
path = addins/riscv-tests
|
|
||||||
url = https://github.com/riscv-software-src/riscv-tests
|
|
||||||
[submodule "addins/riscv-dv"]
|
[submodule "addins/riscv-dv"]
|
||||||
path = addins/riscv-dv
|
path = addins/riscv-dv
|
||||||
url = https://github.com/google/riscv-dv
|
url = https://github.com/google/riscv-dv
|
||||||
@ -30,6 +23,9 @@
|
|||||||
[submodule "addins/vivado-boards"]
|
[submodule "addins/vivado-boards"]
|
||||||
path = addins/vivado-boards
|
path = addins/vivado-boards
|
||||||
url = https://github.com/Digilent/vivado-boards/
|
url = https://github.com/Digilent/vivado-boards/
|
||||||
[submodule "addins/vivado-risc-v"]
|
[submodule "addins/ahbsdc"]
|
||||||
path = addins/vivado-risc-v
|
path = addins/ahbsdc
|
||||||
url = https://github.com/eugene-tarassov/vivado-risc-v.git
|
url = git@github.com:jacobpease/ahbsdc.git
|
||||||
|
[submodule "addins/riscv-arch-test"]
|
||||||
|
path = addins/riscv-arch-test
|
||||||
|
url = https://github.com/riscv-non-isa/riscv-arch-test
|
||||||
|
1
addins/ahbsdc
Submodule
1
addins/ahbsdc
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 5df21aa6625eca120e64ea353ca641aff37d90b2
|
@ -1 +1 @@
|
|||||||
Subproject commit 1480febc3ace5f471baeee4b1ae0d8fea16e4762
|
Subproject commit 4c5eb87983f51ca7fcf7855306877b3d1c3aabf1
|
@ -1 +1 @@
|
|||||||
Subproject commit 197179fdc9dfeeca821e848f373c897a3fdae86c
|
Subproject commit eb0a3892215ad2384702db02da1551a59701ec67
|
@ -1 +0,0 @@
|
|||||||
Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7
|
|
@ -1 +0,0 @@
|
|||||||
Subproject commit c76a8613a177b3a04face2cb8e15dd07a8d2fc40
|
|
@ -3,6 +3,7 @@
|
|||||||
# Compile Embench for Wally
|
# Compile Embench for Wally
|
||||||
|
|
||||||
embench_dir = ../../addins/embench-iot
|
embench_dir = ../../addins/embench-iot
|
||||||
|
ARCH=rv32imac_zicsr
|
||||||
|
|
||||||
all: build
|
all: build
|
||||||
run: build size sim
|
run: build size sim
|
||||||
@ -15,7 +16,7 @@ buildsize: build_speedopt_size build_sizeopt_size
|
|||||||
|
|
||||||
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
|
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
|
||||||
build_speedopt_speed:
|
build_speedopt_speed:
|
||||||
$(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
|
$(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-O2 -nostartfiles -march=$(ARCH)"
|
||||||
# remove files not used in embench1.0 When changing to 2.0, restore these files
|
# remove files not used in embench1.0 When changing to 2.0, restore these files
|
||||||
#rm -rf $(embench_dir)/bd_speedopt_speed/src/md5sum
|
#rm -rf $(embench_dir)/bd_speedopt_speed/src/md5sum
|
||||||
#rm -rf $(embench_dir)/bd_speedopt_speed/src/tarfind
|
#rm -rf $(embench_dir)/bd_speedopt_speed/src/tarfind
|
||||||
@ -23,7 +24,7 @@ build_speedopt_speed:
|
|||||||
find $(embench_dir)/bd_speedopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
|
find $(embench_dir)/bd_speedopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
|
||||||
|
|
||||||
build_sizeopt_speed:
|
build_sizeopt_speed:
|
||||||
$(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-Os -nostartfiles"
|
$(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S -march=$(ARCH)" --cflags="-Os -nostartfiles -march=$(ARCH)"
|
||||||
# remove files not used in embench1.0 When changing to 2.0, restore these files
|
# remove files not used in embench1.0 When changing to 2.0, restore these files
|
||||||
#rm -rf $(embench_dir)/bd_sizeopt_speed/src/md5sum
|
#rm -rf $(embench_dir)/bd_sizeopt_speed/src/md5sum
|
||||||
#rm -rf $(embench_dir)/bd_sizeopt_speed/src/tarfind
|
#rm -rf $(embench_dir)/bd_sizeopt_speed/src/tarfind
|
||||||
@ -32,10 +33,10 @@ build_sizeopt_speed:
|
|||||||
|
|
||||||
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
|
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
|
||||||
build_speedopt_size:
|
build_speedopt_size:
|
||||||
$(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-O2 -msave-restore" --dummy-libs="libgcc libm libc crt0"
|
$(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-O2 -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0"
|
||||||
|
|
||||||
build_sizeopt_size:
|
build_sizeopt_size:
|
||||||
$(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
|
$(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S -march=$(ARCH)" --cflags="-Os -msave-restore -march=$(ARCH)" --dummy-libs="libgcc libm libc crt0"
|
||||||
|
|
||||||
# builds dependencies, then launches modelsim and finally runs python wrapper script to present results
|
# builds dependencies, then launches modelsim and finally runs python wrapper script to present results
|
||||||
sim: modelsim_build_memfile modelsim_run speed
|
sim: modelsim_build_memfile modelsim_run speed
|
||||||
|
87
benchmarks/embench/embench_arch_sweep.py
Executable file
87
benchmarks/embench/embench_arch_sweep.py
Executable file
@ -0,0 +1,87 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# embench_arch_sweep.py
|
||||||
|
# David_Harris@hmc.edu 16 November 2023
|
||||||
|
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||||
|
|
||||||
|
# Run embench on a variety of architectures and collate results
|
||||||
|
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
import collections
|
||||||
|
|
||||||
|
#archs = ["rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr", "rv32imafdc_zba_zbb_zbc_zbs_zicsr"]
|
||||||
|
archs = ["rv32imafdc_zba_zbb_zbc_zbs_zicsr", "rv32i_zicsr", "rv32im_zicsr", "rv32imc_zicsr", "rv32imc_zba_zbb_zbc_zbs_zicsr"]
|
||||||
|
|
||||||
|
def calcgeomean(d, arch):
|
||||||
|
progs = ["aha-mont64", "crc32", "cubic", "edn", "huffbench", "matmult-int", "minver", "nbody", "nettle-aes", "nettle-sha256", "nsichneu", "picojpeg", "qrduino", "sglib-combined", "slre", "st", "statemate", "ud", "wikisort"]
|
||||||
|
result = 1.0
|
||||||
|
for p in progs:
|
||||||
|
#val = d[arch][p]
|
||||||
|
val = d[arch].get(p, 1.0)
|
||||||
|
result = result *float(val)
|
||||||
|
result = pow(result, (1.0/float(len(progs))))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def tabulate_arch_sweep(directory):
|
||||||
|
for case in ["wallySizeOpt_size", "wallySpeedOpt_speed"]:
|
||||||
|
d = collections.defaultdict(dict)
|
||||||
|
for arch in archs:
|
||||||
|
file = case+"_"+arch+".json"
|
||||||
|
file_path = os.path.join(directory, file)
|
||||||
|
lines = []
|
||||||
|
try:
|
||||||
|
f = open(file_path, "r")
|
||||||
|
lines = f.readlines()
|
||||||
|
except:
|
||||||
|
f.close()
|
||||||
|
#print(file_path+" does not exist")
|
||||||
|
for line in lines:
|
||||||
|
#print("File: "+file+" Line: "+line)
|
||||||
|
#p = re.compile('".*" : .*,')
|
||||||
|
p = r'"([^"]*)" : ([^,\n]+)'
|
||||||
|
match = re.search(p, line)
|
||||||
|
if match:
|
||||||
|
prog = match.group(1)
|
||||||
|
result = match.group(2);
|
||||||
|
d[arch][prog] = result;
|
||||||
|
#print(match.group(1)+" " + match.group(2))
|
||||||
|
f.close()
|
||||||
|
for arch in [""] + archs:
|
||||||
|
print (arch, end="\t")
|
||||||
|
print("")
|
||||||
|
for prog in d[archs[0]]:
|
||||||
|
print(prog, end="\t")
|
||||||
|
for arch in archs:
|
||||||
|
entry = d[arch].get(prog, "n/a");
|
||||||
|
print (entry, end="\t")
|
||||||
|
print("")
|
||||||
|
print("New geo mean", end="\t")
|
||||||
|
for arch in archs:
|
||||||
|
geomean = calcgeomean(d, arch)
|
||||||
|
print(geomean, end="\t")
|
||||||
|
print("")
|
||||||
|
|
||||||
|
def run_arch_sweep():
|
||||||
|
# make a folder whose name depends on the date
|
||||||
|
# Get current date
|
||||||
|
current_date = datetime.now()
|
||||||
|
# Format date as a string in the format YYYYMMDD
|
||||||
|
date_string = current_date.strftime('%Y%m%d_%H%M%S')
|
||||||
|
dir = "run_"+date_string
|
||||||
|
# Create a directory with the date string as its name
|
||||||
|
os.mkdir(dir)
|
||||||
|
|
||||||
|
# make a directory with the current date as its name
|
||||||
|
|
||||||
|
# sweep the runs and save the results in the run directory
|
||||||
|
for arch in archs:
|
||||||
|
os.system("make clean")
|
||||||
|
os.system("make run ARCH="+arch)
|
||||||
|
for res in ["SizeOpt_size", "SizeOpt_speed", "SpeedOpt_size", "SpeedOpt_speed"]:
|
||||||
|
os.system("mv -f wally"+res+".json "+dir+"/wally"+res+"_"+arch+".json")
|
||||||
|
return dir
|
||||||
|
|
||||||
|
directory = run_arch_sweep()
|
||||||
|
#directory = "run_20231117_082325"
|
||||||
|
tabulate_arch_sweep(directory)
|
@ -74,8 +74,8 @@ localparam ICACHE_LINELENINBITS = 32'd512;
|
|||||||
|
|
||||||
// Integer Divider Configuration
|
// Integer Divider Configuration
|
||||||
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
// IDIV_BITSPERCYCLE must be 1, 2, or 4
|
||||||
localparam IDIV_BITSPERCYCLE = 32'd4;
|
localparam IDIV_BITSPERCYCLE = 32'd2;
|
||||||
localparam IDIV_ON_FPU = 1;
|
localparam IDIV_ON_FPU = 0;
|
||||||
|
|
||||||
// Legal number of PMP entries are 0, 16, or 64
|
// Legal number of PMP entries are 0, 16, or 64
|
||||||
localparam PMP_ENTRIES = 32'd16;
|
localparam PMP_ENTRIES = 32'd16;
|
||||||
@ -169,7 +169,7 @@ localparam ZMMUL_SUPPORTED = 0;
|
|||||||
|
|
||||||
// FPU division architecture
|
// FPU division architecture
|
||||||
localparam RADIX = 32'd4;
|
localparam RADIX = 32'd4;
|
||||||
localparam DIVCOPIES = 32'd4;
|
localparam DIVCOPIES = 32'd2;
|
||||||
|
|
||||||
// bit manipulation
|
// bit manipulation
|
||||||
localparam ZBA_SUPPORTED = 1;
|
localparam ZBA_SUPPORTED = 1;
|
||||||
|
@ -150,7 +150,7 @@ localparam PLIC_SDC_ID = 32'd9;
|
|||||||
localparam BPRED_SUPPORTED = 1;
|
localparam BPRED_SUPPORTED = 1;
|
||||||
localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT
|
||||||
localparam BPRED_NUM_LHR = 32'd6;
|
localparam BPRED_NUM_LHR = 32'd6;
|
||||||
localparam BPRED_SIZE = 32'd6;
|
localparam BPRED_SIZE = 32'd10;
|
||||||
localparam BTB_SIZE = 32'd10;
|
localparam BTB_SIZE = 32'd10;
|
||||||
localparam RAS_SIZE = 32'd16;
|
localparam RAS_SIZE = 32'd16;
|
||||||
|
|
||||||
|
@ -93,16 +93,21 @@ localparam NF2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_NF : H_NF);
|
|||||||
localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2);
|
localparam FMT2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? 2'd0 : 2'd2);
|
||||||
localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS);
|
localparam BIAS2 = ((F_SUPPORTED & (LEN1 != S_LEN)) ? S_BIAS : H_BIAS);
|
||||||
|
|
||||||
|
// divider r and rk (bits per digit, bits per cycle)
|
||||||
|
localparam LOGR = $clog2(RADIX); // r = log(R) bits per digit
|
||||||
|
localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated
|
||||||
|
|
||||||
|
// intermediate division parameters not directly used in fdivsqrt hardware
|
||||||
|
localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right
|
||||||
|
//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step.
|
||||||
|
localparam DIVMINb = ((FPDIVMINb<XLEN) & IDIV_ON_FPU) ? XLEN : FPDIVMINb; // minimum fractional bits b = max(XLEN, FPDIVMINb)
|
||||||
|
localparam RESBITS = DIVMINb + LOGR; // number of bits in a result: r integer + b fractional
|
||||||
|
|
||||||
// division constants
|
// division constants
|
||||||
localparam DIVN = (((NF+2<XLEN) & IDIV_ON_FPU) ? XLEN : NF+2); // standard length of input
|
localparam FPDUR = (RESBITS-1)/RK + 1 ; // ceiling((r+b)/rk)
|
||||||
localparam LOGR = ($clog2(RADIX)); // r = log(R)
|
localparam DIVb = FPDUR*RK - LOGR; // divsqrt fractional bits, so total number of bits is a multiple of rk after r integer bits
|
||||||
localparam RK = (LOGR*DIVCOPIES); // r*k used for intdiv preproc
|
localparam DURLEN = $clog2(FPDUR); // enough bits to count the duration
|
||||||
localparam LOGRK = ($clog2(RK)); // log2(r*k)
|
localparam DIVBLEN = $clog2(DIVb); // enough bits to count number of fractional bits
|
||||||
localparam FPDUR = ((DIVN+1+(LOGR*DIVCOPIES))/(LOGR*DIVCOPIES)+(RADIX/4));
|
|
||||||
localparam DURLEN = ($clog2(FPDUR+1));
|
|
||||||
localparam DIVb = (FPDUR*LOGR*DIVCOPIES-1); // canonical fdiv size (b)
|
|
||||||
localparam DIVBLEN = ($clog2(DIVb+1)-1);
|
|
||||||
localparam DIVa = (DIVb+1-XLEN); // used for idiv on fpu: Shift residual right by b - (XLEN-1) to put remainder in lsbs of integer result
|
|
||||||
|
|
||||||
// largest length in IEU/FPU
|
// largest length in IEU/FPU
|
||||||
localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF)); // max(XLEN, NF)
|
localparam CVTLEN = ((NF<XLEN) ? (XLEN) : (NF)); // max(XLEN, NF)
|
||||||
@ -110,7 +115,7 @@ localparam LLEN = (($unsigned(FLEN)<$unsigned(XLEN)) ? ($unsigned(XLEN)) : ($uns
|
|||||||
localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
|
localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1));
|
||||||
localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6)));
|
localparam NORMSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVb + 1 +NF+1) > (3*NF+6) ? (DIVb + 1 +NF+1) : (3*NF+6)));
|
||||||
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));
|
localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ));
|
||||||
localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVN+1+NF) > (3*NF+4) ? (DIVN+1+NF) : (3*NF+4)));
|
localparam CORRSHIFTSZ = (((CVTLEN+NF+1)>(DIVb + 1 +NF+1) & (CVTLEN+NF+1)>(3*NF+6)) ? (CVTLEN+NF+1) : ((DIVMINb+1+NF) > (3*NF+4) ? (DIVMINb+1+NF) : (3*NF+4)));
|
||||||
|
|
||||||
|
|
||||||
// Disable spurious Verilator warnings
|
// Disable spurious Verilator warnings
|
||||||
|
@ -179,13 +179,10 @@ localparam cvw_t P = '{
|
|||||||
NORMSHIFTSZ : NORMSHIFTSZ,
|
NORMSHIFTSZ : NORMSHIFTSZ,
|
||||||
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
|
LOGNORMSHIFTSZ : LOGNORMSHIFTSZ,
|
||||||
CORRSHIFTSZ : CORRSHIFTSZ,
|
CORRSHIFTSZ : CORRSHIFTSZ,
|
||||||
DIVN : DIVN,
|
|
||||||
LOGR : LOGR,
|
LOGR : LOGR,
|
||||||
RK : RK,
|
RK : RK,
|
||||||
LOGRK : LOGRK,
|
|
||||||
FPDUR : FPDUR,
|
FPDUR : FPDUR,
|
||||||
DURLEN : DURLEN,
|
DURLEN : DURLEN,
|
||||||
DIVb : DIVb,
|
DIVb : DIVb,
|
||||||
DIVBLEN : DIVBLEN,
|
DIVBLEN : DIVBLEN
|
||||||
DIVa : DIVa
|
|
||||||
};
|
};
|
||||||
|
@ -42,13 +42,9 @@ if {$board=="ArtyA7"} {
|
|||||||
# read in all other rtl
|
# read in all other rtl
|
||||||
read_verilog -sv [glob -type f ../src/CopiedFiles_do_not_add_to_repo/*/*.sv ../src/CopiedFiles_do_not_add_to_repo/*/*/*.sv]
|
read_verilog -sv [glob -type f ../src/CopiedFiles_do_not_add_to_repo/*/*.sv ../src/CopiedFiles_do_not_add_to_repo/*/*/*.sv]
|
||||||
# *** Once the sdc is updated to use ahb changes these to system verilog.
|
# *** Once the sdc is updated to use ahb changes these to system verilog.
|
||||||
read_verilog [glob -type f ../src/axi_sdc_controller.v]
|
read_verilog [glob -type f ../../addins/ahbsdc/sdc/*.v]
|
||||||
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_master.v]
|
|
||||||
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_serial_host.v]
|
|
||||||
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_master.v]
|
|
||||||
read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_serial_host.v]
|
|
||||||
|
|
||||||
set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/vivado-risc-v/sdc} [current_fileset]
|
set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/ahbsdc/sdc} [current_fileset]
|
||||||
|
|
||||||
if {$board=="ArtyA7"} {
|
if {$board=="ArtyA7"} {
|
||||||
add_files -fileset constrs_1 -norecurse ../constraints/constraints-$board.xdc
|
add_files -fileset constrs_1 -norecurse ../constraints/constraints-$board.xdc
|
||||||
|
513
fpga/src/boot.mem
Normal file
513
fpga/src/boot.mem
Normal file
@ -0,0 +1,513 @@
|
|||||||
|
8001819300002197
|
||||||
|
4281420141014081
|
||||||
|
4481440143814301
|
||||||
|
4681460145814501
|
||||||
|
4881480147814701
|
||||||
|
4a814a0149814901
|
||||||
|
4c814c014b814b01
|
||||||
|
4e814e014d814d01
|
||||||
|
0110011b4f814f01
|
||||||
|
059b45011161016e
|
||||||
|
0004063705fe0010
|
||||||
|
1f6000ef8006061b
|
||||||
|
0ff003930000100f
|
||||||
|
4e952e3110060e37
|
||||||
|
c602829b0053f2b7
|
||||||
|
2023fe02dfe312fd
|
||||||
|
829b0053f2b7007e
|
||||||
|
fe02dfe312fdc602
|
||||||
|
4de31efd000e2023
|
||||||
|
059bf1402573fdd0
|
||||||
|
0000061705e20870
|
||||||
|
0010029b01260613
|
||||||
|
68110002806702fe
|
||||||
|
0085179bf0080813
|
||||||
|
038008130107f7b3
|
||||||
|
480508a86c632781
|
||||||
|
1533357902a87963
|
||||||
|
38030000181700a8
|
||||||
|
1c6301057833f268
|
||||||
|
081a403018370808
|
||||||
|
0105783342280813
|
||||||
|
1815751308081063
|
||||||
|
00367513c295e14d
|
||||||
|
654ded510207e793
|
||||||
|
c1701ff00613f130
|
||||||
|
0637c530fff6861b
|
||||||
|
664dcd10167d0200
|
||||||
|
17fd001007b7c25c
|
||||||
|
859b5a5cc20cd21c
|
||||||
|
02062a23dfed0007
|
||||||
|
4785fffd561c664d
|
||||||
|
4501461c06f59063
|
||||||
|
4a1cc35c465cc31c
|
||||||
|
e29dc75c4a5cc71c
|
||||||
|
0c63086008138082
|
||||||
|
1ae30a9008130105
|
||||||
|
b7710017e793f905
|
||||||
|
e793b75901d7e793
|
||||||
|
5f5c674db7410197
|
||||||
|
66cd02072e23dffd
|
||||||
|
fff78513ff7d5698
|
||||||
|
40a0053300a03533
|
||||||
|
bfb100a7e7938082
|
||||||
|
e0a2715d8082557d
|
||||||
|
e486f052f44ef84a
|
||||||
|
fa13e85aec56fc26
|
||||||
|
843289ae892a0086
|
||||||
|
00959993000a1463
|
||||||
|
864ac4396b054a85
|
||||||
|
0009859b4549870a
|
||||||
|
0004049b05540363
|
||||||
|
86a66485008b7363
|
||||||
|
870a87aaec7ff0ef
|
||||||
|
4531458146014681
|
||||||
|
f0ef0207c9639c05
|
||||||
|
17820094979beb1f
|
||||||
|
873e020541639381
|
||||||
|
993e99ba020a1963
|
||||||
|
870aa8094501f85d
|
||||||
|
e8bff0ef45454685
|
||||||
|
60a64505fe0559e3
|
||||||
|
79a2794274e26406
|
||||||
|
61616b426ae27a02
|
||||||
|
9301020497138082
|
||||||
|
f40647057179b7f1
|
||||||
|
d79867cdec26f022
|
||||||
|
dff58b85571c674d
|
||||||
|
2423d35c03600793
|
||||||
|
fffd571c674d0207
|
||||||
|
0007a737b00026f3
|
||||||
|
b00027f311f70713
|
||||||
|
674dfef77de38f95
|
||||||
|
4f5ccf9d8b895b1c
|
||||||
|
26f3cf5c0027e793
|
||||||
|
071305f5e737b000
|
||||||
|
8f95b00027f30ff7
|
||||||
|
4f5c674dfef77de3
|
||||||
|
b00026f3cf5c9bf5
|
||||||
|
67f7071300989737
|
||||||
|
7de38f95b00027f3
|
||||||
|
458146014681fef7
|
||||||
|
ddbff0ef4501870a
|
||||||
|
059346014681870a
|
||||||
|
dcbff0ef45211aa0
|
||||||
|
1aa007134782e939
|
||||||
|
816393d117d24411
|
||||||
|
85220ff0041302e7
|
||||||
|
614564e270a27402
|
||||||
|
46e3da5ff0efa0cd
|
||||||
|
0207c7634782fe05
|
||||||
|
458146014681870a
|
||||||
|
d8bff0ef03700513
|
||||||
|
46014681870a87aa
|
||||||
|
0a900513403005b7
|
||||||
|
4409bf7dfc07d9e3
|
||||||
|
c3998b8583f9bfe1
|
||||||
|
4681870a00846413
|
||||||
|
f0ef450945814601
|
||||||
|
870afa0540e3d59f
|
||||||
|
123405b746014681
|
||||||
|
46e3d45ff0ef450d
|
||||||
|
870a77c14482f805
|
||||||
|
85a6460146818cfd
|
||||||
|
4ae3d2dff0ef451d
|
||||||
|
d3d8470567cdf605
|
||||||
|
000f4737b00026f3
|
||||||
|
b00027f323f70713
|
||||||
|
67cdfef77de38f95
|
||||||
|
4681870a0007ae23
|
||||||
|
0370051385a64601
|
||||||
|
f2054fe3cf7ff0ef
|
||||||
|
458146014681870a
|
||||||
|
ce3ff0ef08600513
|
||||||
|
4681870af20545e3
|
||||||
|
4541200005934601
|
||||||
|
f0055de3ccfff0ef
|
||||||
|
3023bf010113bf09
|
||||||
|
4605842a86aa4081
|
||||||
|
40113423850a4585
|
||||||
|
86a265a6da5ff0ef
|
||||||
|
d99ff0ef04084605
|
||||||
|
2201358322813603
|
||||||
|
86a2260508700513
|
||||||
|
d81ff0ef05629e0d
|
||||||
|
2a0135832a813603
|
||||||
|
9e0d86a226054505
|
||||||
|
3603d6bff0ef057e
|
||||||
|
0513320135833281
|
||||||
|
9e0d86a226054010
|
||||||
|
3083d53ff0ef0556
|
||||||
|
4501400134034081
|
||||||
|
0000808241010113
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
0000000000000000
|
||||||
|
00600100d2e3ca40
|
@ -27,14 +27,6 @@ BINARIES := fw_jump.elf vmlinux busybox
|
|||||||
OBJDUMPS := $(foreach name, $(BINARIES), $(basename $(name) .elf))
|
OBJDUMPS := $(foreach name, $(BINARIES), $(basename $(name) .elf))
|
||||||
OBJDUMPS := $(foreach name, $(OBJDUMPS), $(DIS)/$(name).objdump)
|
OBJDUMPS := $(foreach name, $(OBJDUMPS), $(DIS)/$(name).objdump)
|
||||||
|
|
||||||
define linuxDir =
|
|
||||||
$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$")
|
|
||||||
endef
|
|
||||||
|
|
||||||
define busyboxDir =
|
|
||||||
$(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$")
|
|
||||||
endef
|
|
||||||
|
|
||||||
.PHONY: all generate disassemble install clean cleanDTB cleanDriver test
|
.PHONY: all generate disassemble install clean cleanDTB cleanDriver test
|
||||||
|
|
||||||
all:
|
all:
|
||||||
@ -46,8 +38,7 @@ all:
|
|||||||
|
|
||||||
# Temp rule for debugging
|
# Temp rule for debugging
|
||||||
test:
|
test:
|
||||||
@echo $(linuxDir)
|
echo $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$")
|
||||||
@echo $(busyboxDir)
|
|
||||||
|
|
||||||
generate: $(DTB) $(IMAGES)
|
generate: $(DTB) $(IMAGES)
|
||||||
|
|
||||||
@ -74,11 +65,13 @@ $(DIS)/%.objdump: $(IMAGES)/%.elf
|
|||||||
$(DIS)/%.objdump: $(IMAGES)/%
|
$(DIS)/%.objdump: $(IMAGES)/%
|
||||||
riscv64-unknown-elf-objdump -S $< >> $@
|
riscv64-unknown-elf-objdump -S $< >> $@
|
||||||
|
|
||||||
$(IMAGES)/vmlinux: $(call linuxDir)/vmlinux
|
$(IMAGES)/vmlinux:
|
||||||
cp $< $@
|
linuxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") ;\
|
||||||
|
cp $$linuxDir/vmlinux $@ ;\
|
||||||
|
|
||||||
$(IMAGES)/busybox: $(call busyboxDir)/busybox
|
$(IMAGES)/busybox:
|
||||||
cp $< $@
|
busyboxDir=$$(find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$") ;\
|
||||||
|
cp $$busyboxDir/busybox $@ ;\
|
||||||
|
|
||||||
# Generating new Buildroot directories --------------------------------
|
# Generating new Buildroot directories --------------------------------
|
||||||
|
|
||||||
|
@ -18,12 +18,13 @@
|
|||||||
# More extensions
|
# More extensions
|
||||||
--override cpu/Zcb=T
|
--override cpu/Zcb=T
|
||||||
|
|
||||||
--override cpu/unaligned=T
|
|
||||||
|
|
||||||
# Cache block operations
|
# Cache block operations
|
||||||
--override cpu/Zicbom=T
|
--override cpu/Zicbom=T
|
||||||
--override cpu/Zicbop=T
|
--override cpu/Zicbop=T
|
||||||
--override cpu/Zicboz=T
|
--override cpu/Zicboz=T
|
||||||
|
--override cmomp_bytes=64 # Zic64b
|
||||||
|
--override cmoz_bytes=64 # Zic64b
|
||||||
|
--override lr_sc_grain=64 # Za64rs
|
||||||
|
|
||||||
# 64 KiB continuous huge pages supported
|
# 64 KiB continuous huge pages supported
|
||||||
--override cpu/Svpbmt=T
|
--override cpu/Svpbmt=T
|
||||||
@ -42,6 +43,7 @@
|
|||||||
|
|
||||||
--override cpu/reset_address=0x80000000
|
--override cpu/reset_address=0x80000000
|
||||||
|
|
||||||
|
--override cpu/unaligned=T # Zicclsm (should be true)
|
||||||
--override cpu/ignore_non_leaf_DAU=1
|
--override cpu/ignore_non_leaf_DAU=1
|
||||||
--override cpu/wfi_is_nop=T
|
--override cpu/wfi_is_nop=T
|
||||||
--override cpu/misa_Extensions_mask=0x0
|
--override cpu/misa_Extensions_mask=0x0
|
||||||
|
@ -271,15 +271,12 @@ typedef struct packed {
|
|||||||
int CORRSHIFTSZ;
|
int CORRSHIFTSZ;
|
||||||
|
|
||||||
// division constants
|
// division constants
|
||||||
int DIVN ;
|
|
||||||
int LOGR ;
|
int LOGR ;
|
||||||
int RK ;
|
int RK ;
|
||||||
int LOGRK ;
|
|
||||||
int FPDUR ;
|
int FPDUR ;
|
||||||
int DURLEN ;
|
int DURLEN ;
|
||||||
int DIVb ;
|
int DIVb ;
|
||||||
int DIVBLEN ;
|
int DIVBLEN ;
|
||||||
int DIVa ;
|
|
||||||
|
|
||||||
} cvw_t;
|
} cvw_t;
|
||||||
|
|
||||||
|
@ -45,8 +45,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
|||||||
input logic IntDivE, W64E,
|
input logic IntDivE, W64E,
|
||||||
output logic DivStickyM,
|
output logic DivStickyM,
|
||||||
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
output logic FDivBusyE, IFDivStartE, FDivDoneE,
|
||||||
output logic [P.NE+1:0] QeM,
|
output logic [P.NE+1:0] UeM, // Exponent result
|
||||||
output logic [P.DIVb:0] QmM,
|
output logic [P.DIVb:0] UmM, // Significand result
|
||||||
output logic [P.XLEN-1:0] FIntDivResultM
|
output logic [P.XLEN-1:0] FIntDivResultM
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -67,17 +67,17 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
|||||||
// Integer div/rem signals
|
// Integer div/rem signals
|
||||||
logic BZeroM; // Denominator is zero
|
logic BZeroM; // Denominator is zero
|
||||||
logic IntDivM; // Integer operation
|
logic IntDivM; // Integer operation
|
||||||
logic [P.DIVBLEN:0] nM, mM; // Shift amounts
|
logic [P.DIVBLEN-1:0] IntNormShiftM; // Integer normalizatoin shift amount
|
||||||
logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor
|
logic ALTBM, AsM, BsM, W64M; // Special handling for postprocessor
|
||||||
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
logic [P.XLEN-1:0] AM; // Original Numerator for postprocessor
|
||||||
logic ISpecialCaseE; // Integer div/remainder special cases
|
logic ISpecialCaseE; // Integer div/remainder special cases
|
||||||
|
|
||||||
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
|
fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor
|
||||||
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
.clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
|
||||||
.FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
|
.FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE,
|
||||||
// Int-specific
|
// Int-specific
|
||||||
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
|
.ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
|
||||||
.BZeroM, .nM, .mM, .AM,
|
.BZeroM, .IntNormShiftM, .AM,
|
||||||
.IntDivM, .W64M, .ALTBM, .AsM, .BsM);
|
.IntDivM, .W64M, .ALTBM, .AsM, .BsM);
|
||||||
|
|
||||||
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
|
fdivsqrtfsm #(P) fdivsqrtfsm( // FSM
|
||||||
@ -94,8 +94,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) (
|
|||||||
fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
|
fdivsqrtpostproc #(P) fdivsqrtpostproc( // Postprocessor
|
||||||
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
|
.clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC,
|
||||||
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
|
.SqrtE, .Firstun, .SqrtM, .SpecialCaseM,
|
||||||
.QmM, .WZeroE, .DivStickyM,
|
.UmM, .WZeroE, .DivStickyM,
|
||||||
// Int-specific
|
// Int-specific
|
||||||
.nM, .mM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
|
.IntNormShiftM, .ALTBM, .AsM, .BsM, .BZeroM, .W64M, .RemOpM(Funct3M[1]), .AM,
|
||||||
.FIntDivResultM);
|
.FIntDivResultM);
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -30,13 +30,11 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
|||||||
input logic [P.FMTBITS-1:0] FmtE,
|
input logic [P.FMTBITS-1:0] FmtE,
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
input logic IntDivE,
|
input logic IntDivE,
|
||||||
input logic [P.DIVBLEN:0] nE,
|
input logic [P.DIVBLEN-1:0] IntResultBitsE,
|
||||||
output logic [P.DURLEN-1:0] CyclesE
|
output logic [P.DURLEN-1:0] CyclesE
|
||||||
);
|
);
|
||||||
logic [P.DURLEN+1:0] Nf, fbits; // number of fractional bits
|
|
||||||
// DIVN = P.NF+3
|
logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits
|
||||||
// NS = NF + 1
|
|
||||||
// N = NS or NS+2 for div/sqrt.
|
|
||||||
|
|
||||||
/* verilator lint_off WIDTH */
|
/* verilator lint_off WIDTH */
|
||||||
if (P.FPSIZES == 1)
|
if (P.FPSIZES == 1)
|
||||||
@ -64,12 +62,21 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
|||||||
P.Q_FMT: Nf = P.Q_NF;
|
P.Q_FMT: Nf = P.Q_NF;
|
||||||
endcase
|
endcase
|
||||||
|
|
||||||
|
// Cycle logic
|
||||||
|
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
|
||||||
|
// Integer division needs p fractional + r integer result bits
|
||||||
|
// FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
|
||||||
|
// FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle.
|
||||||
|
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
|
||||||
|
|
||||||
always_comb begin
|
always_comb begin
|
||||||
if (SqrtE) fbits = Nf + 2 + 1; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 *** unclear why it works with just +1; is it related to DIVCOPIES logic below?
|
if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1
|
||||||
// if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
|
else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits
|
||||||
else fbits = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
|
|
||||||
if (P.IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/P.DIVCOPIES) : (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
|
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
|
||||||
else CyclesE = (fbits + (P.LOGR*P.DIVCOPIES)-1)/(P.LOGR*P.DIVCOPIES);
|
else ResultBitsE = FPResultBitsE;
|
||||||
|
|
||||||
|
CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk)
|
||||||
end
|
end
|
||||||
/* verilator lint_on WIDTH */
|
/* verilator lint_on WIDTH */
|
||||||
|
|
||||||
|
@ -28,16 +28,19 @@
|
|||||||
|
|
||||||
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic [P.FMTBITS-1:0] Fmt,
|
input logic [P.FMTBITS-1:0] Fmt,
|
||||||
input logic [P.NE-1:0] Xe, Ye,
|
input logic [P.NE-1:0] Xe, Ye, // input exponents
|
||||||
input logic Sqrt,
|
input logic Sqrt,
|
||||||
input logic XZero,
|
input logic XZero,
|
||||||
input logic [P.DIVBLEN:0] ell, m,
|
input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye
|
||||||
output logic [P.NE+1:0] Qe
|
output logic [P.NE+1:0] Ue // result exponent
|
||||||
);
|
);
|
||||||
|
|
||||||
logic [P.NE-2:0] Bias;
|
logic [P.NE-2:0] Bias;
|
||||||
logic [P.NE+1:0] SXExp;
|
logic [P.NE+1:0] SXExp;
|
||||||
logic [P.NE+1:0] SExp;
|
logic [P.NE+1:0] SExp;
|
||||||
logic [P.NE+1:0] DExp;
|
logic [P.NE+1:0] DExp;
|
||||||
|
|
||||||
|
// Determine exponent bias according to the format
|
||||||
|
|
||||||
if (P.FPSIZES == 1) begin
|
if (P.FPSIZES == 1) begin
|
||||||
assign Bias = (P.NE-1)'(P.BIAS);
|
assign Bias = (P.NE-1)'(P.BIAS);
|
||||||
@ -63,10 +66,14 @@ module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
|||||||
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
|
2'h2: Bias = (P.NE-1)'(P.H_BIAS);
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
|
// Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms
|
||||||
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
|
assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS);
|
||||||
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
|
assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias};
|
||||||
|
|
||||||
// correct exponent for subnormal input's normalization shifts
|
// division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms
|
||||||
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias});
|
||||||
assign Qe = Sqrt ? SExp : DExp;
|
|
||||||
|
// Select square root or division exponent
|
||||||
|
assign Ue = Sqrt ? SExp : DExp;
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -28,12 +28,12 @@
|
|||||||
|
|
||||||
module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) (
|
module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic up, uz,
|
input logic up, uz,
|
||||||
input logic [P.DIVb+3:0] C, U, UM,
|
input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms)
|
||||||
output logic [P.DIVb+3:0] F
|
output logic [P.DIVb+3:0] F // Q4.DIVb
|
||||||
);
|
);
|
||||||
logic [P.DIVb+3:0] FP, FN, FZ;
|
logic [P.DIVb+3:0] FP, FN, FZ; // Q4.DIVb
|
||||||
|
|
||||||
// Generate for both positive and negative bits
|
// Generate for both positive and negative quotient digits
|
||||||
assign FP = ~(U << 1) & C;
|
assign FP = ~(U << 1) & C;
|
||||||
assign FN = (UM << 1) | (C & ~(C << 2));
|
assign FN = (UM << 1) | (C & ~(C << 2));
|
||||||
assign FZ = '0;
|
assign FZ = '0;
|
||||||
|
@ -27,14 +27,14 @@
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) (
|
module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic [3:0] udigit,
|
input logic [3:0] udigit, // {2, 1, -1, -2}; all cold for zero
|
||||||
input logic [P.DIVb+3:0] C, U, UM,
|
input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms)
|
||||||
output logic [P.DIVb+3:0] F
|
output logic [P.DIVb+3:0] F // Q4.DIVb
|
||||||
);
|
);
|
||||||
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2;
|
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2; // Q4.DIVb
|
||||||
|
|
||||||
// Generate for both positive and negative bits
|
// Generate for both positive and negative digits
|
||||||
assign F2 = (~U << 2) & (C << 2);
|
assign F2 = (~U << 2) & (C << 2); //
|
||||||
assign F1 = ~(U << 1) & C;
|
assign F1 = ~(U << 1) & C;
|
||||||
assign F0 = '0;
|
assign F0 = '0;
|
||||||
assign FN1 = (UM << 1) | (C & ~(C << 3));
|
assign FN1 = (UM << 1) | (C & ~(C << 3));
|
||||||
|
@ -57,7 +57,7 @@ module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) (
|
|||||||
// terminate immediately on special cases
|
// terminate immediately on special cases
|
||||||
assign FSpecialCaseE = XZeroE | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
|
assign FSpecialCaseE = XZeroE | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
|
||||||
if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
|
if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
|
||||||
else assign SpecialCaseE = FSpecialCaseE;
|
else assign SpecialCaseE = FSpecialCaseE;
|
||||||
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
||||||
|
|
||||||
always_ff @(posedge clk) begin
|
always_ff @(posedge clk) begin
|
||||||
|
@ -31,31 +31,31 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
|
|||||||
input logic IFDivStartE,
|
input logic IFDivStartE,
|
||||||
input logic FDivBusyE,
|
input logic FDivBusyE,
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
input logic [P.DIVb+3:0] X, D,
|
input logic [P.DIVb+3:0] X, D, // Q4.DIVb
|
||||||
output logic [P.DIVb:0] FirstU, FirstUM,
|
output logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb
|
||||||
output logic [P.DIVb+1:0] FirstC,
|
output logic [P.DIVb+1:0] FirstC, // Q2.DIVb
|
||||||
output logic Firstun,
|
output logic Firstun,
|
||||||
output logic [P.DIVb+3:0] FirstWS, FirstWC
|
output logic [P.DIVb+3:0] FirstWS, FirstWC // Q4.DIVb
|
||||||
);
|
);
|
||||||
|
|
||||||
/* verilator lint_off UNOPTFLAT */
|
/* verilator lint_off UNOPTFLAT */
|
||||||
logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.b
|
logic [P.DIVb+3:0] WSNext[P.DIVCOPIES-1:0]; // Q4.DIVb
|
||||||
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.b
|
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.DIVb
|
||||||
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.b
|
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.DIVb
|
||||||
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.b
|
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.DIVb
|
||||||
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.b
|
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.DIVb
|
||||||
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.b
|
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.DIVb
|
||||||
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.b
|
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.DIVb
|
||||||
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.b
|
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.DIVb
|
||||||
logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.b
|
logic [P.DIVb+1:0] C[P.DIVCOPIES:0]; // Q2.DIVb
|
||||||
logic [P.DIVb+1:0] initC; // Q2.b
|
logic [P.DIVb+1:0] initC; // Q2.DIVb
|
||||||
logic [P.DIVCOPIES-1:0] un;
|
logic [P.DIVCOPIES-1:0] un;
|
||||||
|
|
||||||
logic [P.DIVb+3:0] WSN, WCN; // Q4.b
|
logic [P.DIVb+3:0] WSN, WCN; // Q4.DIVb
|
||||||
logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.b
|
logic [P.DIVb+3:0] DBar, D2, DBar2; // Q4.DIVb
|
||||||
logic [P.DIVb+1:0] NextC;
|
logic [P.DIVb+1:0] NextC; // Q2.DIVb
|
||||||
logic [P.DIVb:0] UMux, UMMux;
|
logic [P.DIVb:0] UMux, UMMux; // U1.DIVb
|
||||||
logic [P.DIVb:0] initU, initUM;
|
logic [P.DIVb:0] initU, initUM; // U1.DIVb
|
||||||
/* verilator lint_on UNOPTFLAT */
|
/* verilator lint_on UNOPTFLAT */
|
||||||
|
|
||||||
// Top Muxes and Registers
|
// Top Muxes and Registers
|
||||||
@ -104,14 +104,14 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
|
|||||||
for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
|
for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
|
||||||
if (P.RADIX == 2) begin: stage
|
if (P.RADIX == 2) begin: stage
|
||||||
fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
|
fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
|
||||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||||
end else begin: stage
|
end else begin: stage
|
||||||
logic j1;
|
logic j1;
|
||||||
assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
|
assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
|
||||||
fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
|
fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
|
||||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||||
end
|
end
|
||||||
assign WS[i+1] = WSNext[i];
|
assign WS[i+1] = WSNext[i];
|
||||||
assign WC[i+1] = WCNext[i];
|
assign WC[i+1] = WCNext[i];
|
||||||
|
@ -27,25 +27,25 @@
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic clk, reset,
|
input logic clk, reset,
|
||||||
input logic StallM,
|
input logic StallM,
|
||||||
input logic [P.DIVb+3:0] WS, WC,
|
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||||
input logic [P.DIVb+3:0] D,
|
input logic [P.DIVb+3:0] D, // Q4.DIVb
|
||||||
input logic [P.DIVb:0] FirstU, FirstUM,
|
input logic [P.DIVb:0] FirstU, FirstUM, // U1.DIVb
|
||||||
input logic [P.DIVb+1:0] FirstC,
|
input logic [P.DIVb+1:0] FirstC, // Q2.DIVb
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
input logic Firstun, SqrtM, SpecialCaseM,
|
input logic Firstun, SqrtM, SpecialCaseM,
|
||||||
input logic [P.XLEN-1:0] AM,
|
input logic [P.XLEN-1:0] AM, // U/Q(XLEN.0)
|
||||||
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M,
|
input logic RemOpM, ALTBM, BZeroM, AsM, BsM, W64M,
|
||||||
input logic [P.DIVBLEN:0] nM, mM,
|
input logic [P.DIVBLEN-1:0] IntNormShiftM,
|
||||||
output logic [P.DIVb:0] QmM,
|
output logic [P.DIVb:0] UmM, // U1.DIVb result significand
|
||||||
output logic WZeroE,
|
output logic WZeroE,
|
||||||
output logic DivStickyM,
|
output logic DivStickyM,
|
||||||
output logic [P.XLEN-1:0] FIntDivResultM
|
output logic [P.XLEN-1:0] FIntDivResultM // U/Q(XLEN.0)
|
||||||
);
|
);
|
||||||
|
|
||||||
logic [P.DIVb+3:0] W, Sum;
|
logic [P.DIVb+3:0] W, Sum;
|
||||||
logic [P.DIVb:0] PreQmM;
|
logic [P.DIVb:0] PreUmM;
|
||||||
logic NegStickyM;
|
logic NegStickyM;
|
||||||
logic weq0E, WZeroM;
|
logic weq0E, WZeroM;
|
||||||
logic [P.XLEN-1:0] IntDivResultM;
|
logic [P.XLEN-1:0] IntDivResultM;
|
||||||
@ -86,22 +86,21 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
//////////////////////////
|
//////////////////////////
|
||||||
|
|
||||||
// If the result is not exact, the sticky should be set
|
// If the result is not exact, the sticky should be set
|
||||||
assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide
|
assign DivStickyM = ~WZeroM & ~SpecialCaseM;
|
||||||
|
|
||||||
// Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed.
|
// Determine if sticky bit is negative
|
||||||
assign Sum = WC + WS;
|
assign Sum = WC + WS;
|
||||||
assign NegStickyM = Sum[P.DIVb+3];
|
assign NegStickyM = Sum[P.DIVb+3];
|
||||||
mux2 #(P.DIVb+1) preqmmux(FirstU, FirstUM, NegStickyM, PreQmM); // Select U or U-1 depending on negative sticky bit
|
mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit
|
||||||
mux2 #(P.DIVb+1) qmmux(PreQmM, (PreQmM << 1), SqrtM, QmM);
|
mux2 #(P.DIVb+1) ummux(PreUmM, (PreUmM << 1), SqrtM, UmM);
|
||||||
|
|
||||||
// Integer quotient or remainder correctoin, normalization, and special cases
|
// Integer quotient or remainder correction, normalization, and special cases
|
||||||
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
|
if (P.IDIV_ON_FPU) begin:intpostproc // Int supported
|
||||||
logic [P.DIVBLEN:0] NormShiftM;
|
|
||||||
logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
logic [P.DIVb+3:0] UnsignedQuotM, NormRemM, NormRemDM, NormQuotM;
|
||||||
logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
|
logic signed [P.DIVb+3:0] PreResultM, PreIntResultM;
|
||||||
|
|
||||||
assign W = $signed(Sum) >>> P.LOGR;
|
assign W = $signed(Sum) >>> P.LOGR;
|
||||||
assign UnsignedQuotM = {3'b000, PreQmM};
|
assign UnsignedQuotM = {3'b000, PreUmM};
|
||||||
|
|
||||||
// Integer remainder: sticky and sign correction muxes
|
// Integer remainder: sticky and sign correction muxes
|
||||||
assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
|
assign NegQuotM = AsM ^ BsM; // Integer Quotient is negative
|
||||||
@ -110,9 +109,8 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
|
mux2 #(P.DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
|
||||||
|
|
||||||
// Select quotient or remainder and do normalization shift
|
// Select quotient or remainder and do normalization shift
|
||||||
mux2 #(P.DIVBLEN+1) normshiftmux(((P.DIVBLEN+1)'(P.DIVb) - (nM * (P.DIVBLEN+1)'(P.LOGR))), (mM + (P.DIVBLEN+1)'(P.DIVa)), RemOpM, NormShiftM);
|
|
||||||
mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
|
mux2 #(P.DIVb+4) presresultmux(NormQuotM, NormRemM, RemOpM, PreResultM);
|
||||||
assign PreIntResultM = $signed(PreResultM >>> NormShiftM);
|
assign PreIntResultM = $signed(PreResultM >>> IntNormShiftM);
|
||||||
|
|
||||||
// special case logic
|
// special case logic
|
||||||
// terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B|
|
// terminates immediately when B is Zero (div 0) or |A| has more leading 0s than |B|
|
||||||
@ -120,7 +118,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
if (BZeroM) begin // Divide by zero
|
if (BZeroM) begin // Divide by zero
|
||||||
if (RemOpM) IntDivResultM = AM;
|
if (RemOpM) IntDivResultM = AM;
|
||||||
else IntDivResultM = {(P.XLEN){1'b1}};
|
else IntDivResultM = {(P.XLEN){1'b1}};
|
||||||
end else if (ALTBM) begin // Numerator is zero
|
end else if (ALTBM) begin // Numerator is small
|
||||||
if (RemOpM) IntDivResultM = AM;
|
if (RemOpM) IntDivResultM = AM;
|
||||||
else IntDivResultM = '0;
|
else IntDivResultM = '0;
|
||||||
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
|
end else IntDivResultM = PreIntResultM[P.XLEN-1:0];
|
||||||
|
@ -29,37 +29,39 @@
|
|||||||
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic clk,
|
input logic clk,
|
||||||
input logic IFDivStartE,
|
input logic IFDivStartE,
|
||||||
input logic [P.NF:0] Xm, Ym,
|
input logic [P.NF:0] Xm, Ym, // Floating-point significands
|
||||||
input logic [P.NE-1:0] Xe, Ye,
|
input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents
|
||||||
input logic [P.FMTBITS-1:0] FmtE,
|
input logic [P.FMTBITS-1:0] FmtE,
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
input logic XZeroE,
|
input logic XZeroE,
|
||||||
input logic [2:0] Funct3E,
|
input logic [2:0] Funct3E,
|
||||||
output logic [P.NE+1:0] QeM,
|
output logic [P.NE+1:0] UeM, // biased exponent of result
|
||||||
output logic [P.DIVb+3:0] X, D,
|
output logic [P.DIVb+3:0] X, D, // Q4.DIVb
|
||||||
// Int-specific
|
// Int-specific
|
||||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU
|
||||||
input logic IntDivE, W64E,
|
input logic IntDivE, W64E,
|
||||||
|
// Outputs
|
||||||
output logic ISpecialCaseE,
|
output logic ISpecialCaseE,
|
||||||
output logic [P.DURLEN-1:0] CyclesE,
|
output logic [P.DURLEN-1:0] CyclesE,
|
||||||
output logic [P.DIVBLEN:0] nM, mM,
|
output logic [P.DIVBLEN-1:0] IntNormShiftM,
|
||||||
output logic ALTBM, IntDivM, W64M,
|
output logic ALTBM, IntDivM, W64M,
|
||||||
output logic AsM, BsM, BZeroM,
|
output logic AsM, BsM, BZeroM,
|
||||||
output logic [P.XLEN-1:0] AM
|
output logic [P.XLEN-1:0] AM
|
||||||
);
|
);
|
||||||
|
|
||||||
logic [P.DIVb-1:0] Xfract, Dfract;
|
logic [P.DIVb:0] Xnorm, Dnorm;
|
||||||
logic [P.DIVb:0] PreSqrtX;
|
|
||||||
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||||
logic [P.NE+1:0] QeE; // Quotient Exponent (FP only)
|
logic [P.NE+1:0] UeE; // Result Exponent (FP only)
|
||||||
logic [P.DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
||||||
logic [P.DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs
|
logic [P.DIVBLEN-1:0] mE, ell; // Leading zeros of inputs
|
||||||
|
logic [P.DIVBLEN-1:0] IntResultBitsE; // bits in integer result
|
||||||
logic NumerZeroE; // Numerator is zero (X or A)
|
logic NumerZeroE; // Numerator is zero (X or A)
|
||||||
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
logic AZeroE, BZeroE; // A or B is Zero for integer division
|
||||||
logic SignedDivE; // signed division
|
logic SignedDivE; // signed division
|
||||||
logic AsE, BsE; // Signs of integer inputs
|
logic AsE, BsE; // Signs of integer inputs
|
||||||
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
|
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
|
||||||
logic ALTBE;
|
logic ALTBE;
|
||||||
|
logic EvenExp;
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Integer Preprocessing
|
// Integer Preprocessing
|
||||||
@ -89,12 +91,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
|
mux2 #(P.XLEN) posbmux(BE, -BE, BsE, PosB);
|
||||||
|
|
||||||
// Select integer or floating point inputs
|
// Select integer or floating point inputs
|
||||||
mux2 #(P.DIVb) ifxmux({Xm, {(P.DIVb-P.NF-1){1'b0}}}, {PosA, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFX);
|
mux2 #(P.DIVb+1) ifxmux({Xm, {(P.DIVb-P.NF){1'b0}}}, {PosA, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFX);
|
||||||
mux2 #(P.DIVb) ifdmux({Ym, {(P.DIVb-P.NF-1){1'b0}}}, {PosB, {(P.DIVb-P.XLEN){1'b0}}}, IntDivE, IFD);
|
mux2 #(P.DIVb+1) ifdmux({Ym, {(P.DIVb-P.NF){1'b0}}}, {PosB, {(P.DIVb-P.XLEN+1){1'b0}}}, IntDivE, IFD);
|
||||||
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
|
mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
|
||||||
end else begin // Int not supported
|
end else begin // Int not supported
|
||||||
assign IFX = {Xm, {(P.DIVb-P.NF-1){1'b0}}};
|
assign IFX = {Xm, {(P.DIVb-P.NF){1'b0}}};
|
||||||
assign IFD = {Ym, {(P.DIVb-P.NF-1){1'b0}}};
|
assign IFD = {Ym, {(P.DIVb-P.NF){1'b0}}};
|
||||||
assign NumerZeroE = XZeroE;
|
assign NumerZeroE = XZeroE;
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -103,12 +105,12 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
|
|
||||||
// count leading zeros for Subnorm FP and to normalize integer inputs
|
// count leading zeros for Subnorm FP and to normalize integer inputs
|
||||||
lzc #(P.DIVb) lzcX (IFX, ell);
|
lzc #(P.DIVb+1) lzcX (IFX, ell);
|
||||||
lzc #(P.DIVb) lzcY (IFD, mE);
|
lzc #(P.DIVb+1) lzcY (IFD, mE);
|
||||||
|
|
||||||
// Normalization shift: shift off leading one
|
// Normalization shift: shift leading one into most significant bit
|
||||||
assign Xfract = (IFX << ell) << 1;
|
assign Xnorm = (IFX << ell);
|
||||||
assign Dfract = (IFD << mE) << 1;
|
assign Dnorm = (IFD << mE);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Integer Right Shift to digit boundary
|
// Integer Right Shift to digit boundary
|
||||||
@ -117,31 +119,28 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
|
|
||||||
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
|
if (P.IDIV_ON_FPU) begin:intrightshift // Int Supported
|
||||||
logic [P.DIVBLEN:0] ZeroDiff, p;
|
logic [P.DIVBLEN-1:0] ZeroDiff, p;
|
||||||
|
|
||||||
// calculate number of fractional bits p
|
// calculate number of fractional bits p
|
||||||
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
assign ZeroDiff = mE - ell; // Difference in number of leading zeros
|
||||||
assign ALTBE = ZeroDiff[P.DIVBLEN]; // A less than B (A has more leading zeros)
|
assign ALTBE = ZeroDiff[P.DIVBLEN-1]; // A less than B (A has more leading zeros)
|
||||||
mux2 #(P.DIVBLEN+1) pmux(ZeroDiff, '0, ALTBE, p);
|
mux2 #(P.DIVBLEN) pmux(ZeroDiff, '0, ALTBE, p);
|
||||||
|
|
||||||
|
/* verilator lint_off WIDTH */
|
||||||
|
assign IntResultBitsE = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
||||||
|
/* verilator lint_on WIDTH */
|
||||||
|
|
||||||
// Integer special cases (terminate immediately)
|
// Integer special cases (terminate immediately)
|
||||||
assign ISpecialCaseE = BZeroE | ALTBE;
|
assign ISpecialCaseE = BZeroE | ALTBE;
|
||||||
|
|
||||||
// calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
|
// calculate right shift amount RightShiftX to complete in discrete number of steps
|
||||||
|
if (P.RK > 1) begin // more than 1 bit per cycle
|
||||||
if (P.LOGRK > 0) begin // more than 1 bit per cycle
|
logic [$clog2(P.RK)-1:0] RightShiftX;
|
||||||
logic [P.LOGRK-1:0] IntTrunc, RightShiftX;
|
/* verilator lint_offf WIDTH */
|
||||||
logic [P.DIVBLEN:0] TotalIntBits, IntSteps;
|
assign RightShiftX = P.RK - 1 - ((IntResultBitsE - 1) % P.RK); // Right shift amount
|
||||||
/* verilator lint_off WIDTH */
|
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in n steps
|
||||||
assign TotalIntBits = P.LOGR + p; // Total number of result bits (r integer bits plus p fractional bits)
|
|
||||||
assign IntTrunc = TotalIntBits % P.RK; // Truncation check for ceiling operator
|
|
||||||
assign IntSteps = (TotalIntBits >> P.LOGRK) + |IntTrunc; // Number of steps for int div
|
|
||||||
assign nE = (IntSteps * P.DIVCOPIES) - 1; // Fractional digits
|
|
||||||
assign RightShiftX = P.RK - 1 - ((TotalIntBits - 1) % P.RK); // Right shift amount
|
|
||||||
assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps
|
|
||||||
/* verilator lint_on WIDTH */
|
/* verilator lint_on WIDTH */
|
||||||
end else begin // radix 2 1 copy doesn't require shifting
|
end else begin // radix 2 1 copy doesn't require shifting
|
||||||
assign nE = p;
|
|
||||||
assign DivXShifted = DivX;
|
assign DivXShifted = DivX;
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
@ -150,22 +149,53 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Floating-Point Preprocessing
|
// Floating-Point Preprocessing
|
||||||
// append leading 1 (for nonzero inputs)
|
// Extend to Q4.b format
|
||||||
// shift square root to be in range [1/4, 1)
|
// shift square root to be in range [1/4, 1)
|
||||||
// Normalized numbers are shifted right by 1 if the exponent is odd
|
// Normalized numbers are shifted right by 1 if the exponent is odd
|
||||||
// Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
|
// Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
|
||||||
// NOTE: there might be a discrepancy that X is never right shifted by 2. However
|
//////////////////////////////////////////////////////
|
||||||
// it comes out in the wash and gives the right answer. Investigate later if possible.
|
|
||||||
//////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
assign DivX = {3'b000, ~NumerZeroE, Xfract};
|
assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
|
||||||
|
|
||||||
// Sqrt is initialized on step one as R(X-1), so depends on Radix
|
// Sqrt is initialized on step one as R(X-1), so depends on Radix
|
||||||
mux2 #(P.DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[P.DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
|
// If X = 0, then special case logic sets sqrt = 0 so this portion doesn't matter
|
||||||
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
|
// Otherwise, X has a leading 1 after possible normalization shift and is now in range [1, 2)
|
||||||
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
|
// Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
|
||||||
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
// Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
|
||||||
|
// Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
|
||||||
|
// This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4
|
||||||
|
// Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction.
|
||||||
|
// This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
|
||||||
|
// Radix Exponent odd Exponent Even
|
||||||
|
// 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1)
|
||||||
|
// 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1)
|
||||||
|
// Summary: PreSqrtX = r(x/2or4 - 1)
|
||||||
|
|
||||||
|
logic [P.DIVb:0] PreSqrtX;
|
||||||
|
assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
|
||||||
|
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||||
|
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
||||||
|
else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; // 2PreSqrtX - 4 = 4(PreSqrtX/2 - 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
// Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
|
||||||
|
// This saves one bit in DIVb because there is no initial right shift.
|
||||||
|
// However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
|
||||||
|
// That is an optimization for another day.
|
||||||
|
if (P.RADIX == 2) begin
|
||||||
|
logic [P.DIVb:0] PreSqrtX; // U1.DIVb
|
||||||
|
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||||
|
assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
||||||
|
end else begin
|
||||||
|
logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb
|
||||||
|
mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
|
||||||
|
assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
|
||||||
|
end
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Initialize X for division or square root
|
||||||
|
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Selet integer or floating-point operands
|
// Selet integer or floating-point operands
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
@ -176,28 +206,37 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||||||
assign X = PreShiftX;
|
assign X = PreShiftX;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Divisior register
|
// Divisior register
|
||||||
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
|
flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D);
|
||||||
|
|
||||||
// Floating-point exponent
|
// Floating-point exponent
|
||||||
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
|
fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Ue(UeE));
|
||||||
flopen #(P.NE+2) expreg(clk, IFDivStartE, QeE, QeM);
|
flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM);
|
||||||
|
|
||||||
// Number of FSM cycles (to FSM)
|
// Number of FSM cycles (to FSM)
|
||||||
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
|
fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE);
|
||||||
|
|
||||||
if (P.IDIV_ON_FPU) begin:intpipelineregs
|
if (P.IDIV_ON_FPU) begin:intpipelineregs
|
||||||
|
logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE;
|
||||||
|
logic RemOpE;
|
||||||
|
|
||||||
|
/* verilator lint_off WIDTH */
|
||||||
|
assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain
|
||||||
|
assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift
|
||||||
|
/* verilator lint_on WIDTH */
|
||||||
|
assign RemOpE = Funct3E[1];
|
||||||
|
mux2 #(P.DIVBLEN) normshiftmux(IntDivNormShiftE, IntRemNormShiftE, RemOpE, IntNormShiftE);
|
||||||
|
|
||||||
// pipeline registers
|
// pipeline registers
|
||||||
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM);
|
||||||
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM);
|
||||||
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM);
|
||||||
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM);
|
||||||
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
|
flopen #(1) bsignreg(clk, IFDivStartE, BsE, BsM);
|
||||||
flopen #(P.DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM);
|
flopen #(P.DIVBLEN) nsreg(clk, IFDivStartE, IntNormShiftE, IntNormShiftM);
|
||||||
flopen #(P.DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM);
|
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
||||||
flopen #(P.XLEN) srcareg(clk, IFDivStartE, AE, AM);
|
|
||||||
if (P.XLEN==64)
|
if (P.XLEN==64)
|
||||||
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M);
|
||||||
end
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -29,33 +29,27 @@
|
|||||||
|
|
||||||
/* verilator lint_off UNOPTFLAT */
|
/* verilator lint_off UNOPTFLAT */
|
||||||
module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic [P.DIVb+3:0] D, DBar,
|
input logic [P.DIVb+3:0] D, DBar, // Q4.DIVb
|
||||||
input logic [P.DIVb:0] U, UM,
|
input logic [P.DIVb:0] U, UM, // U1.DIVb
|
||||||
input logic [P.DIVb+3:0] WS, WC,
|
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||||
input logic [P.DIVb+1:0] C,
|
input logic [P.DIVb+1:0] C, // Q2.DIVb
|
||||||
input logic SqrtE,
|
input logic SqrtE,
|
||||||
output logic un,
|
output logic un,
|
||||||
output logic [P.DIVb+1:0] CNext,
|
output logic [P.DIVb+1:0] CNext, // Q2.DIVb
|
||||||
output logic [P.DIVb:0] UNext, UMNext,
|
output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb
|
||||||
output logic [P.DIVb+3:0] WSNext, WCNext
|
output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb
|
||||||
);
|
);
|
||||||
/* verilator lint_on UNOPTFLAT */
|
/* verilator lint_on UNOPTFLAT */
|
||||||
|
|
||||||
logic [P.DIVb+3:0] Dsel;
|
logic [P.DIVb+3:0] Dsel; // Q4.DIVb
|
||||||
logic up, uz;
|
logic up, uz;
|
||||||
logic [P.DIVb+3:0] F;
|
logic [P.DIVb+3:0] F; // Q4.DIVb
|
||||||
logic [P.DIVb+3:0] AddIn;
|
logic [P.DIVb+3:0] AddIn; // Q4.DIVb
|
||||||
logic [P.DIVb+3:0] WSA, WCA;
|
logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb
|
||||||
|
|
||||||
// Qmient Selection logic
|
// Quotient Selection logic
|
||||||
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
|
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
|
||||||
// q encoding:
|
fdivsqrtuslc2 uslc2(.WS(WS[P.DIVb+3:P.DIVb]), .WC(WC[P.DIVb+3:P.DIVb]), .up, .uz, .un);
|
||||||
// 1000 = +2
|
|
||||||
// 0100 = +1
|
|
||||||
// 0000 = 0
|
|
||||||
// 0010 = -1
|
|
||||||
// 0001 = -2
|
|
||||||
fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un);
|
|
||||||
|
|
||||||
// Sqrt F generation. Extend C, U, UM to Q4.k
|
// Sqrt F generation. Extend C, U, UM to Q4.k
|
||||||
fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
||||||
@ -66,7 +60,7 @@ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
|||||||
else if (uz) Dsel = '0;
|
else if (uz) Dsel = '0;
|
||||||
else Dsel = D; // un
|
else Dsel = D; // un
|
||||||
|
|
||||||
// Partial Product Generation
|
// Residual Update
|
||||||
// WSA, WCA = WS + WC - qD
|
// WSA, WCA = WS + WC - qD
|
||||||
mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
|
mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
|
||||||
csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
|
csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
|
||||||
|
@ -27,40 +27,33 @@
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
|
module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic [P.DIVb+3:0] D, DBar, D2, DBar2,
|
input logic [P.DIVb+3:0] D, DBar, D2, DBar2, // Q4.DIVb
|
||||||
input logic [P.DIVb:0] U,UM,
|
input logic [P.DIVb:0] U,UM, // U1.DIVb
|
||||||
input logic [P.DIVb+3:0] WS, WC,
|
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||||
input logic [P.DIVb+1:0] C,
|
input logic [P.DIVb+1:0] C, // Q2.DIVb
|
||||||
input logic SqrtE, j1,
|
input logic SqrtE, j1,
|
||||||
output logic [P.DIVb+1:0] CNext,
|
output logic [P.DIVb+1:0] CNext, // Q2.DIVb
|
||||||
output logic un,
|
output logic un,
|
||||||
output logic [P.DIVb:0] UNext, UMNext,
|
output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb
|
||||||
output logic [P.DIVb+3:0] WSNext, WCNext
|
output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb
|
||||||
);
|
);
|
||||||
|
|
||||||
logic [P.DIVb+3:0] Dsel;
|
logic [P.DIVb+3:0] Dsel; // Q4.DIVb
|
||||||
logic [3:0] udigit;
|
logic [3:0] udigit; // {+2, +1, -1, -2} or 0000 for 0
|
||||||
logic [P.DIVb+3:0] F;
|
logic [P.DIVb+3:0] F; // Q4.DIVb
|
||||||
logic [P.DIVb+3:0] AddIn;
|
logic [P.DIVb+3:0] AddIn; // Q4.DIVb
|
||||||
logic [4:0] Smsbs;
|
logic [4:0] Smsbs; // U1.4
|
||||||
logic [2:0] Dmsbs;
|
logic [2:0] Dmsbs; // U0.3 drop leading 1 from D
|
||||||
logic [7:0] WCmsbs, WSmsbs;
|
logic [7:0] WCmsbs, WSmsbs; // U4.4
|
||||||
logic CarryIn;
|
logic CarryIn;
|
||||||
logic [P.DIVb+3:0] WSA, WCA;
|
logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb
|
||||||
|
|
||||||
// Digit Selection logic
|
// Digit Selection logic
|
||||||
// u encoding:
|
assign Smsbs = U[P.DIVb:P.DIVb-4]; // U1.4 most significant bits of square root
|
||||||
// 1000 = +2
|
assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1
|
||||||
// 0100 = +1
|
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual
|
||||||
// 0000 = 0
|
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual
|
||||||
// 0010 = -1
|
fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
|
||||||
// 0001 = -2
|
|
||||||
assign Smsbs = U[P.DIVb:P.DIVb-4];
|
|
||||||
assign Dmsbs = D[P.DIVb-1:P.DIVb-3];
|
|
||||||
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4];
|
|
||||||
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4];
|
|
||||||
|
|
||||||
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
|
|
||||||
assign un = 1'b0; // unused for radix 4
|
assign un = 1'b0; // unused for radix 4
|
||||||
|
|
||||||
// F generation logic
|
// F generation logic
|
||||||
|
@ -31,15 +31,15 @@
|
|||||||
///////////////////////////////
|
///////////////////////////////
|
||||||
module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) (
|
module fdivsqrtuotfc2 import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic up, un,
|
input logic up, un,
|
||||||
input logic [P.DIVb+1:0] C,
|
input logic [P.DIVb+1:0] C, // Q2.DIVb
|
||||||
input logic [P.DIVb:0] U, UM,
|
input logic [P.DIVb:0] U, UM, // U1.DIVb
|
||||||
output logic [P.DIVb:0] UNext, UMNext
|
output logic [P.DIVb:0] UNext, UMNext // U1.DIVb
|
||||||
);
|
);
|
||||||
// The on-the-fly converter transfers the divsqrt
|
// The on-the-fly converter transfers the divsqrt
|
||||||
// bits to the quotient as they come.
|
// bits to the quotient as they come.
|
||||||
logic [P.DIVb:0] K;
|
logic [P.DIVb:0] K; // U1.DIVb one-hot
|
||||||
|
|
||||||
assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
|
assign K = (C[P.DIVb:0] & ~(C[P.DIVb:0] << 1)); // Thermometer to one hot encoding
|
||||||
|
|
||||||
always_comb begin
|
always_comb begin
|
||||||
if (up) begin
|
if (up) begin
|
||||||
|
@ -28,15 +28,15 @@
|
|||||||
|
|
||||||
module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) (
|
module fdivsqrtuotfc4 import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic [3:0] udigit,
|
input logic [3:0] udigit,
|
||||||
input logic [P.DIVb:0] U, UM,
|
input logic [P.DIVb:0] U, UM, // U1.DIVb
|
||||||
input logic [P.DIVb:0] C,
|
input logic [P.DIVb:0] C, // Q1.DIVb
|
||||||
output logic [P.DIVb:0] UNext, UMNext
|
output logic [P.DIVb:0] UNext, UMNext // U1.DIVb
|
||||||
);
|
);
|
||||||
// The on-the-fly converter transfers the square root
|
// The on-the-fly converter transfers the square root
|
||||||
// bits to the quotient as they come.
|
// bits to the quotient as they come.
|
||||||
// Use this otfc for division and square root.
|
// Use this otfc for division and square root.
|
||||||
|
|
||||||
logic [P.DIVb:0] K1, K2, K3;
|
logic [P.DIVb:0] K1, K2, K3; // U1.DIVb
|
||||||
assign K1 = (C&~(C << 1)); // K
|
assign K1 = (C&~(C << 1)); // K
|
||||||
assign K2 = ((C << 1)&~(C << 2)); // 2K
|
assign K2 = ((C << 1)&~(C << 2)); // 2K
|
||||||
assign K3 = (C & ~(C << 2)); // 3K
|
assign K3 = (C & ~(C << 2)); // 3K
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
// fdivsqrtqsel2.sv
|
// fdivsqrtuslc2.sv
|
||||||
//
|
//
|
||||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||||
// Modified:13 January 2022
|
// Modified:13 January 2022
|
||||||
//
|
//
|
||||||
// Purpose: Radix 2 Quotient Digit Selection
|
// Purpose: Radix 2 Unified Quotient/Square Root Digit Selection
|
||||||
//
|
//
|
||||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||||
//
|
//
|
||||||
@ -18,7 +18,7 @@
|
|||||||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||||
// may obtain a copy of the License at
|
// may obtain a copy of the License at
|
||||||
//
|
//
|
||||||
// https://solderpad.org/licenses/SHL-2.1/
|
// httWS://solderpad.org/licenses/SHL-2.1/
|
||||||
//
|
//
|
||||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||||
@ -26,31 +26,26 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
module fdivsqrtqsel2 (
|
module fdivsqrtuslc2 (
|
||||||
input logic [3:0] ps, pc,
|
input logic [3:0] WS, WC, // Q4.0 most significant bits of redundant residual
|
||||||
output logic up, uz, un
|
output logic up, uz, un // {+1, 0, -1}
|
||||||
);
|
);
|
||||||
|
|
||||||
logic [3:0] p, g;
|
logic sign;
|
||||||
logic magnitude, sign;
|
|
||||||
|
// Carry chain logic determines if W = WS + WC = -1, < -1, > -1 to choose 0, -1, 1 respectively
|
||||||
|
|
||||||
// The quotient selection logic is presented for simplicity, not
|
//if p2 * p1 * p0, W = -1 and choose digit of 0
|
||||||
// for efficiency. You can probably optimize your logic to
|
assign uz = ((WS[2]^WC[2]) & (WS[1]^WC[1]) &
|
||||||
// select the proper divisor with less delay.
|
(WS[0]^WC[0]));
|
||||||
|
|
||||||
// Quotient equations from EE371 lecture notes 13-20
|
// Otherwise determine sign using carry chain: sign = p3 ^ g_2:0
|
||||||
assign p = ps ^ pc;
|
assign sign = (WS[3]^WC[3])^
|
||||||
assign g = ps & pc;
|
(WS[2] & WC[2] | ((WS[2]^WC[2]) &
|
||||||
|
(WS[1]&WC[1] | ((WS[1]^WC[1]) &
|
||||||
assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) &
|
(WS[0]&WC[0])))));
|
||||||
(ps[0]^pc[0]));
|
|
||||||
assign sign = (ps[3]^pc[3])^
|
|
||||||
(ps[2] & pc[2] | ((ps[2]^pc[2]) &
|
|
||||||
(ps[1]&pc[1] | ((ps[1]^pc[1]) &
|
|
||||||
(ps[0]&pc[0])))));
|
|
||||||
|
|
||||||
// Produce digit = +1, 0, or -1
|
// Produce digit = +1, 0, or -1
|
||||||
assign up = magnitude & ~sign;
|
assign up = ~uz & ~sign;
|
||||||
assign uz = ~magnitude;
|
assign un = ~uz & sign;
|
||||||
assign un = magnitude & sign;
|
|
||||||
endmodule
|
endmodule
|
@ -1,10 +1,10 @@
|
|||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
// fdivsqrtqsel4.sv
|
// fdivsqrtuslc4.sv
|
||||||
//
|
//
|
||||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||||
// Modified:13 January 2022
|
// Modified:13 January 2022
|
||||||
//
|
//
|
||||||
// Purpose: Radix 4 Quotient Digit Selection
|
// Purpose: Table-based Radix 4 Unified Quotient/Square Root Digit Selection
|
||||||
//
|
//
|
||||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||||
//
|
//
|
||||||
@ -26,25 +26,25 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
module fdivsqrtqsel4 (
|
module fdivsqrtuslc4 (
|
||||||
input logic [2:0] Dmsbs,
|
input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1
|
||||||
input logic [4:0] Smsbs,
|
input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation
|
||||||
input logic [7:0] WSmsbs, WCmsbs,
|
input logic [7:0] WSmsbs, WCmsbs, // Q4.4 redundant residual most significant bits
|
||||||
input logic Sqrt, j1,
|
input logic Sqrt, j1,
|
||||||
output logic [3:0] udigit
|
output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot
|
||||||
);
|
);
|
||||||
logic [6:0] Wmsbs;
|
logic [7:0] PreWmsbs; // Q4.4 nonredundant residual msbs
|
||||||
logic [7:0] PreWmsbs;
|
logic [6:0] Wmsbs; // Q4.3 truncated nonredundant residual
|
||||||
logic [2:0] A;
|
logic [2:0] A; // U0.3 upper bits of D or Smsbs, discarding integer bit
|
||||||
|
|
||||||
assign PreWmsbs = WCmsbs + WSmsbs;
|
assign PreWmsbs = WCmsbs + WSmsbs; // add redundant residual to find msbs
|
||||||
assign Wmsbs = PreWmsbs[7:1];
|
assign Wmsbs = PreWmsbs[7:1]; // truncate least significant bit to Q4.3 to index table
|
||||||
// D = 0001.xxx...
|
// D = 0001.xxx...
|
||||||
// Dmsbs = | |
|
// Dmsbs = | |
|
||||||
// W = xxxx.xxx...
|
// W = xxxx.xxx...
|
||||||
// Wmsbs = | |
|
// Wmsbs = | |
|
||||||
|
|
||||||
logic [3:0] USel4[1023:0];
|
logic [3:0] USel4[1023:0]; // 1024-bit table indexed with 3 bits of A and 7 bits of Wmsbs
|
||||||
|
|
||||||
// Prepopulate selection table; this is constant at compile time
|
// Prepopulate selection table; this is constant at compile time
|
||||||
always_comb begin
|
always_comb begin
|
||||||
@ -101,10 +101,10 @@ module fdivsqrtqsel4 (
|
|||||||
// Select A
|
// Select A
|
||||||
always_comb
|
always_comb
|
||||||
if (Sqrt) begin
|
if (Sqrt) begin
|
||||||
if (j1) A = 3'b101;
|
if (j1) A = 3'b101; // on first sqrt iteration A = .101
|
||||||
else if (Smsbs == 5'b10000) A = 3'b111;
|
else if (Smsbs == 5'b10000) A = 3'b111; // if S = 1.0, use A = .111
|
||||||
else A = Smsbs[2:0];
|
else A = Smsbs[2:0]; // otherwise use A = 2S (in U0.3 format)
|
||||||
end else A = Dmsbs;
|
end else A = Dmsbs; // division Unless A = D (IN U0.3 format, dropping leading 1)
|
||||||
|
|
||||||
// Select quotient digit from lookup table based on A and W
|
// Select quotient digit from lookup table based on A and W
|
||||||
assign udigit = USel4[{A,Wmsbs}];
|
assign udigit = USel4[{A,Wmsbs}];
|
@ -1,10 +1,10 @@
|
|||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
// fdivsqrtqsel4cmp.sv
|
// fdivsqrtuslc4cmp.sv
|
||||||
//
|
//
|
||||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||||
// Modified:13 January 2022
|
// Modified:13 January 2022
|
||||||
//
|
//
|
||||||
// Purpose: Comparator-based Radix 4 Quotient Digit Selection
|
// Purpose: Comparator-based Radix 4 Unified Quotient/Square Root Digit Selection
|
||||||
//
|
//
|
||||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||||
//
|
//
|
||||||
@ -26,12 +26,12 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
module fdivsqrtqsel4cmp (
|
module fdivsqrtuslc4cmp (
|
||||||
input logic [2:0] Dmsbs,
|
input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1
|
||||||
input logic [4:0] Smsbs,
|
input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation
|
||||||
input logic [7:0] WSmsbs, WCmsbs,
|
input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits
|
||||||
input logic SqrtE, j1,
|
input logic SqrtE, j1,
|
||||||
output logic [3:0] udigit
|
output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot
|
||||||
);
|
);
|
||||||
logic [6:0] Wmsbs;
|
logic [6:0] Wmsbs;
|
||||||
logic [7:0] PreWmsbs;
|
logic [7:0] PreWmsbs;
|
@ -133,8 +133,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
|||||||
logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
|
logic [P.XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
|
||||||
|
|
||||||
// divide signals
|
// divide signals
|
||||||
logic [P.DIVb:0] QmM; // fdivsqrt signifcand
|
logic [P.DIVb:0] UmM; // fdivsqrt signifcand
|
||||||
logic [P.NE+1:0] QeM; // fdivsqrt exponent
|
logic [P.NE+1:0] UeM; // fdivsqrt exponent
|
||||||
logic DivStickyM; // fdivsqrt sticky bit
|
logic DivStickyM; // fdivsqrt sticky bit
|
||||||
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
|
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
|
||||||
logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
|
logic [P.XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
|
||||||
@ -242,8 +242,8 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
|||||||
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
|
fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
|
||||||
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
|
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
|
||||||
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
|
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
|
||||||
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
|
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM,
|
||||||
.QmM, .FIntDivResultM);
|
.UmM, .FIntDivResultM);
|
||||||
|
|
||||||
// compare: fmin/fmax, flt/fle/feq
|
// compare: fmin/fmax, flt/fle/feq
|
||||||
fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
|
fcmp #(P) fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
|
||||||
@ -326,9 +326,9 @@ module fpu import cvw::*; #(parameter cvw_t P) (
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
|
postprocess #(P) postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
|
||||||
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
|
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivUm(UmM), .FmaSs(SsM),
|
||||||
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
|
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
|
||||||
.FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
|
.FmaSm(SmM), .DivUe(UeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
|
||||||
.CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM),
|
.CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM),
|
||||||
.ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM),
|
.ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM),
|
||||||
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
|
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
|
||||||
|
@ -27,8 +27,8 @@
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
module divshiftcalc import cvw::*; #(parameter cvw_t P) (
|
module divshiftcalc import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic [P.DIVb:0] DivQm, // divsqrt significand
|
input logic [P.DIVb:0] DivUm, // divsqrt significand
|
||||||
input logic [P.NE+1:0] DivQe, // divsqrt exponent
|
input logic [P.NE+1:0] DivUe, // divsqrt exponent
|
||||||
output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
|
output logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
|
||||||
output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
|
output logic [P.NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
|
||||||
output logic DivResSubnorm, // is the divsqrt result subnormal
|
output logic DivResSubnorm, // is the divsqrt result subnormal
|
||||||
@ -41,23 +41,23 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) (
|
|||||||
|
|
||||||
// is the result subnormal
|
// is the result subnormal
|
||||||
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
|
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
|
||||||
assign DivResSubnorm = DivQe[P.NE+1]|(~|DivQe[P.NE+1:0]);
|
assign DivResSubnorm = DivUe[P.NE+1]|(~|DivUe[P.NE+1:0]);
|
||||||
|
|
||||||
// if the result is subnormal
|
// if the result is subnormal
|
||||||
// 00000000x.xxxxxx... Exp = DivQe
|
// 00000000x.xxxxxx... Exp = DivUe
|
||||||
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
|
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
|
||||||
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
|
// .00xxxxxxxxxxxxx... << DivUe+NF+1 Exp = +1
|
||||||
// .0000xxxxxxxxxxx... >> 1 Exp = 1
|
// .0000xxxxxxxxxxx... >> 1 Exp = 1
|
||||||
// Left shift amount = DivQe+NF+1-1
|
// Left shift amount = DivUe+NF+1-1
|
||||||
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivQe;
|
assign DivSubnormShift = (P.NE+2)'(P.NF)+DivUe;
|
||||||
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
|
assign DivSubnormShiftPos = ~DivSubnormShift[P.NE+1];
|
||||||
|
|
||||||
// if the result is normalized
|
// if the result is normalized
|
||||||
// 00000000x.xxxxxx... Exp = DivQe
|
// 00000000x.xxxxxx... Exp = DivUe
|
||||||
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
|
// .00000000xxxxxxx... >> NF+1 Exp = DivUe+NF+1
|
||||||
// 00000000.xxxxxxx... << NF Exp = DivQe+1
|
// 00000000.xxxxxxx... << NF Exp = DivUe+1
|
||||||
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
|
// 00000000x.xxxxxx... << NF Exp = DivUe (extra shift done afterwards)
|
||||||
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
|
// 00000000xx.xxxxx... << 1? Exp = DivUe-1 (determined after)
|
||||||
// inital Left shift amount = NF
|
// inital Left shift amount = NF
|
||||||
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
|
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
|
||||||
assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF);
|
assign NormShift = (P.LOGNORMSHIFTSZ)'(P.NF);
|
||||||
@ -68,5 +68,5 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) (
|
|||||||
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
|
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
|
||||||
|
|
||||||
// pre-shift the divider result for normalization
|
// pre-shift the divider result for normalization
|
||||||
assign DivShiftIn = {{P.NF{1'b0}}, DivQm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
|
assign DivShiftIn = {{P.NF{1'b0}}, DivUm, {P.NORMSHIFTSZ-P.DIVb-1-P.NF{1'b0}}};
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -48,8 +48,8 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
|||||||
input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count
|
input logic [$clog2(3*P.NF+5)-1:0] FmaSCnt, // the normalization shift count
|
||||||
//divide signals
|
//divide signals
|
||||||
input logic DivSticky, // divider sticky bit
|
input logic DivSticky, // divider sticky bit
|
||||||
input logic [P.NE+1:0] DivQe, // divsqrt exponent
|
input logic [P.NE+1:0] DivUe, // divsqrt exponent
|
||||||
input logic [P.DIVb:0] DivQm, // divsqrt significand
|
input logic [P.DIVb:0] DivUm, // divsqrt significand
|
||||||
// conversion signals
|
// conversion signals
|
||||||
input logic CvtCs, // the result's sign
|
input logic CvtCs, // the result's sign
|
||||||
input logic [P.NE:0] CvtCe, // the calculated expoent
|
input logic [P.NE:0] CvtCe, // the calculated expoent
|
||||||
@ -91,7 +91,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
|||||||
// division singals
|
// division singals
|
||||||
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
|
logic [P.LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
|
||||||
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
|
logic [P.NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
|
||||||
logic [P.NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
|
logic [P.NE+1:0] Ue; // divsqrt corrected exponent after corretion shift
|
||||||
logic DivByZero; // divide by zero flag
|
logic DivByZero; // divide by zero flag
|
||||||
logic DivResSubnorm; // is the divsqrt result subnormal
|
logic DivResSubnorm; // is the divsqrt result subnormal
|
||||||
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
|
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
|
||||||
@ -146,7 +146,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
|||||||
fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
|
fmashiftcalc #(P) fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
|
||||||
.FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
|
.FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
|
||||||
|
|
||||||
divshiftcalc #(P) divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
|
divshiftcalc #(P) divshiftcalc(.DivUe, .DivUm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
|
||||||
|
|
||||||
// select which unit's output to shift
|
// select which unit's output to shift
|
||||||
always_comb
|
always_comb
|
||||||
@ -174,7 +174,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
|||||||
|
|
||||||
// correct for LZA/divsqrt error
|
// correct for LZA/divsqrt error
|
||||||
shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
|
shiftcorrection #(P) shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
|
||||||
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
|
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivUe, .Ue, .FmaSZero, .Shifted, .FmaMe, .Mf);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Rounding
|
// Rounding
|
||||||
@ -189,7 +189,7 @@ module postprocess import cvw::*; #(parameter cvw_t P) (
|
|||||||
// calulate result sign used in rounding unit
|
// calulate result sign used in rounding unit
|
||||||
roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
|
roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
|
||||||
|
|
||||||
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
|
round #(P) round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Ue,
|
||||||
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf,
|
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf,
|
||||||
.DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
|
.DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ module round import cvw::*; #(parameter cvw_t P) (
|
|||||||
// divsqrt
|
// divsqrt
|
||||||
input logic DivOp, // is a division opperation being done
|
input logic DivOp, // is a division opperation being done
|
||||||
input logic DivSticky, // divsqrt sticky bit
|
input logic DivSticky, // divsqrt sticky bit
|
||||||
input logic [P.NE+1:0] Qe, // the divsqrt calculated expoent
|
input logic [P.NE+1:0] Ue, // the divsqrt calculated expoent
|
||||||
// cvt
|
// cvt
|
||||||
input logic CvtOp, // is a convert opperation being done
|
input logic CvtOp, // is a convert opperation being done
|
||||||
input logic ToInt, // is the cvt op a cvt to integer
|
input logic ToInt, // is the cvt op a cvt to integer
|
||||||
@ -300,8 +300,8 @@ module round import cvw::*; #(parameter cvw_t P) (
|
|||||||
case(PostProcSel)
|
case(PostProcSel)
|
||||||
2'b10: Me = FmaMe; // fma
|
2'b10: Me = FmaMe; // fma
|
||||||
2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
|
2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
|
||||||
// 2'b01: Me = DivDone ? Qe : '0; // divide
|
// 2'b01: Me = DivDone ? Ue : '0; // divide
|
||||||
2'b01: Me = Qe; // divide
|
2'b01: Me = Ue; // divide
|
||||||
default: Me = '0;
|
default: Me = '0;
|
||||||
endcase
|
endcase
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
|
|||||||
// divsqrt
|
// divsqrt
|
||||||
input logic DivOp, // is it a divsqrt opperation
|
input logic DivOp, // is it a divsqrt opperation
|
||||||
input logic DivResSubnorm, // is the divsqrt result subnormal
|
input logic DivResSubnorm, // is the divsqrt result subnormal
|
||||||
input logic [P.NE+1:0] DivQe, // the divsqrt result's exponent
|
input logic [P.NE+1:0] DivUe, // the divsqrt result's exponent
|
||||||
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
|
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
|
||||||
//fma
|
//fma
|
||||||
input logic FmaOp, // is it an fma opperation
|
input logic FmaOp, // is it an fma opperation
|
||||||
@ -41,7 +41,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
|
|||||||
// output
|
// output
|
||||||
output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
|
output logic [P.NE+1:0] FmaMe, // exponent of the normalized sum
|
||||||
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
|
output logic [P.CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
|
||||||
output logic [P.NE+1:0] Qe // corrected exponent for divider
|
output logic [P.NE+1:0] Ue // corrected exponent for divider
|
||||||
);
|
);
|
||||||
|
|
||||||
logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
|
logic [3*P.NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
|
||||||
@ -61,7 +61,7 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
|
|||||||
|
|
||||||
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
|
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
|
||||||
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
|
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
|
||||||
assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
|
assign LeftShiftQm = (LZAPlus1|(DivUe==1&~LZAPlus1));
|
||||||
assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
|
assign CorrQm0 = Shifted[P.NORMSHIFTSZ-3:P.NORMSHIFTSZ-P.CORRSHIFTSZ-2];
|
||||||
assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
|
assign CorrQm1 = Shifted[P.NORMSHIFTSZ-2:P.NORMSHIFTSZ-P.CORRSHIFTSZ-1];
|
||||||
mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
|
mux2 #(P.CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
|
||||||
@ -87,5 +87,5 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) (
|
|||||||
|
|
||||||
// the quotent is in the range [.5,2) if there is no early termination
|
// the quotent is in the range [.5,2) if there is no early termination
|
||||||
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
|
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
|
||||||
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(P.NE+1)'(0), ~LZAPlus1};
|
assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1};
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -83,7 +83,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
|
|||||||
assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing
|
assign BadNaNBox = ~(Fmt|(&In[P.FLEN-1:P.LEN1])); // Check NaN boxing
|
||||||
always_comb
|
always_comb
|
||||||
if (BadNaNBox) begin
|
if (BadNaNBox) begin
|
||||||
// PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
|
|
||||||
PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
|
PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
|
||||||
end else
|
end else
|
||||||
PostBox = In;
|
PostBox = In;
|
||||||
@ -143,8 +142,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
|
|||||||
if (BadNaNBox) begin
|
if (BadNaNBox) begin
|
||||||
case (Fmt)
|
case (Fmt)
|
||||||
P.FMT: PostBox = In;
|
P.FMT: PostBox = In;
|
||||||
// P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, In[P.LEN1-P.NE1-3:0]};
|
|
||||||
// P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, In[P.LEN2-P.NE2-3:0]};
|
|
||||||
P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
|
P.FMT1: PostBox = {{(P.FLEN-P.LEN1){1'b1}}, 1'b1, {(P.NE1+1){1'b1}}, {(P.LEN1-P.NE1-2){1'b0}}};
|
||||||
P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}};
|
P.FMT2: PostBox = {{(P.FLEN-P.LEN2){1'b1}}, 1'b1, {(P.NE2+1){1'b1}}, {(P.LEN2-P.NE2-2){1'b0}}};
|
||||||
default: PostBox = 'x;
|
default: PostBox = 'x;
|
||||||
@ -230,9 +227,6 @@ module unpackinput import cvw::*; #(parameter cvw_t P) (
|
|||||||
if (BadNaNBox) begin
|
if (BadNaNBox) begin
|
||||||
case (Fmt)
|
case (Fmt)
|
||||||
2'b11: PostBox = In;
|
2'b11: PostBox = In;
|
||||||
// 2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, In[P.D_LEN-P.D_NE-3:0]};
|
|
||||||
// 2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, In[P.S_LEN-P.S_NE-3:0]};
|
|
||||||
// 2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, In[P.H_LEN-P.H_NE-3:0]};
|
|
||||||
2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}};
|
2'b01: PostBox = {{(P.Q_LEN-P.D_LEN){1'b1}}, 1'b1, {(P.D_NE+1){1'b1}}, {(P.D_LEN-P.D_NE-2){1'b0}}};
|
||||||
2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}};
|
2'b00: PostBox = {{(P.Q_LEN-P.S_LEN){1'b1}}, 1'b1, {(P.S_NE+1){1'b1}}, {(P.S_LEN-P.S_NE-2){1'b0}}};
|
||||||
2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}};
|
2'b10: PostBox = {{(P.Q_LEN-P.H_LEN){1'b1}}, 1'b1, {(P.H_NE+1){1'b1}}, {(P.H_LEN-P.H_NE-2){1'b0}}};
|
||||||
|
@ -33,7 +33,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0)
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Core Memory
|
// Core Memory
|
||||||
logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0];
|
(*rom_style="block" *) logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0];
|
||||||
|
|
||||||
// dh 10/30/23 ROM macros are presently commented out
|
// dh 10/30/23 ROM macros are presently commented out
|
||||||
// because they don't point to a generated ROM
|
// because they don't point to a generated ROM
|
||||||
@ -41,15 +41,23 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0)
|
|||||||
rom1p1r_128x64 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
|
rom1p1r_128x64 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
|
||||||
|
|
||||||
end if ((`USE_SRAM == 1) & (ADDR_WDITH == 7) & (DATA_WIDTH == 32)) begin
|
end if ((`USE_SRAM == 1) & (ADDR_WDITH == 7) & (DATA_WIDTH == 32)) begin
|
||||||
rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
|
rom1p1r_128x32 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout));
|
||||||
|
|
||||||
end else begin */
|
end else begin */
|
||||||
always @ (posedge clk)
|
|
||||||
if(ce) dout <= ROM[addr];
|
initial begin
|
||||||
|
if (PRELOAD_ENABLED) begin
|
||||||
|
$readmemh("$WALLY/fpga/src/boot.mem", ROM, 0);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
always @ (posedge clk) begin
|
||||||
|
if(ce) dout <= ROM[addr];
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
// for FPGA, initialize with zero-stage bootloader
|
// for FPGA, initialize with zero-stage bootloader
|
||||||
if(PRELOAD_ENABLED) begin
|
/*if(PRELOAD_ENABLED) begin
|
||||||
initial begin
|
initial begin
|
||||||
ROM[0]=64'h8001819300002197;
|
ROM[0]=64'h8001819300002197;
|
||||||
ROM[1]=64'h4281420141014081;
|
ROM[1]=64'h4281420141014081;
|
||||||
@ -195,6 +203,6 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0)
|
|||||||
ROM[141]=64'h0000808241010113;
|
ROM[141]=64'h0000808241010113;
|
||||||
|
|
||||||
end // if (PRELOAD_ENABLED)
|
end // if (PRELOAD_ENABLED)
|
||||||
end
|
end*/
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
@ -26,8 +26,7 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
module hazard (
|
module hazard import cvw::*; #(parameter cvw_t P) (
|
||||||
// Detect hazards
|
|
||||||
input logic BPWrongE, CSRWriteFenceM, RetM, TrapM,
|
input logic BPWrongE, CSRWriteFenceM, RetM, TrapM,
|
||||||
input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD,
|
input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD,
|
||||||
input logic LSUStallM, IFUStallF,
|
input logic LSUStallM, IFUStallF,
|
||||||
|
@ -131,7 +131,7 @@ module datapath import cvw::*; #(parameter cvw_t P) (
|
|||||||
if (P.F_SUPPORTED) begin:fpmux
|
if (P.F_SUPPORTED) begin:fpmux
|
||||||
mux2 #(P.XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
mux2 #(P.XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
|
||||||
mux2 #(P.XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
|
mux2 #(P.XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, FCvtIntW, IFCvtResultW);
|
||||||
if (P.IDIV_ON_FPU) begin
|
if (P.IDIV_ON_FPU & P.F_SUPPORTED) begin
|
||||||
mux2 #(P.XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
|
mux2 #(P.XLEN) divresultmuxW(MDUResultW, FIntDivResultW, IntDivW, MulDivResultW);
|
||||||
end else begin
|
end else begin
|
||||||
assign MulDivResultW = MDUResultW;
|
assign MulDivResultW = MDUResultW;
|
||||||
|
@ -39,7 +39,9 @@ module irom import cvw::*; #(parameter cvw_t P) (
|
|||||||
logic [31:0] RawIROMInstrF;
|
logic [31:0] RawIROMInstrF;
|
||||||
logic [2:1] AdrD;
|
logic [2:1] AdrD;
|
||||||
|
|
||||||
rom1p1r #(ADDR_WDITH, P.XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull));
|
// preload IROM with the FPGA bootloader by default so that it syntehsizes to something, avoiding having the IEU optimized away because instructions are all 0
|
||||||
|
// the testbench replaces these dummy contents with the actual program of interest during simulation
|
||||||
|
rom1p1r #(ADDR_WDITH, P.XLEN, 1) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull));
|
||||||
if (P.XLEN == 32) assign RawIROMInstrF = IROMInstrFFull;
|
if (P.XLEN == 32) assign RawIROMInstrF = IROMInstrFFull;
|
||||||
else begin
|
else begin
|
||||||
// IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two
|
// IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two
|
||||||
|
@ -92,7 +92,8 @@ module lsu import cvw::*; #(parameter cvw_t P) (
|
|||||||
input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit
|
input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit
|
||||||
input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit
|
input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit
|
||||||
);
|
);
|
||||||
localparam MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED;
|
localparam logic MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED;
|
||||||
|
localparam MLEN = MISALIGN_SUPPORT ? 2*P.LLEN : P.LLEN; // widen buffer for misaligned accessess
|
||||||
|
|
||||||
logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer
|
logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer
|
||||||
logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer
|
logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer
|
||||||
@ -118,9 +119,9 @@ module lsu import cvw::*; #(parameter cvw_t P) (
|
|||||||
|
|
||||||
logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data
|
logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data
|
||||||
/* verilator lint_off WIDTHEXPAND */
|
/* verilator lint_off WIDTHEXPAND */
|
||||||
logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data
|
logic [MLEN-1:0] DCacheReadDataWordM; // D$ read data
|
||||||
logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] LSUWriteDataSpillM; // Final write data
|
logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data
|
||||||
logic [((MISALIGN_SUPPORT+1)*P.LLEN-1)/8:0] ByteMaskSpillM; // Selects which bytes within a word to write
|
logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write
|
||||||
/* verilator lint_on WIDTHEXPAND */
|
/* verilator lint_on WIDTHEXPAND */
|
||||||
logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data
|
logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data
|
||||||
logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data
|
logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data
|
||||||
|
@ -57,7 +57,7 @@ module mdu import cvw::*; #(parameter cvw_t P) (
|
|||||||
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
|
// Start a divide when a new division instruction is received and the divider isn't already busy or finishing
|
||||||
// When IDIV_ON_FPU is set, use the FPU divider instead
|
// When IDIV_ON_FPU is set, use the FPU divider instead
|
||||||
// In ZMMUL, with M_SUPPORTED = 0, omit the divider
|
// In ZMMUL, with M_SUPPORTED = 0, omit the divider
|
||||||
if ((P.IDIV_ON_FPU) || (!P.M_SUPPORTED)) begin:nodiv
|
if ((P.IDIV_ON_FPU & P.F_SUPPORTED) || (!P.M_SUPPORTED)) begin:nodiv
|
||||||
assign QuotM = 0;
|
assign QuotM = 0;
|
||||||
assign RemM = 0;
|
assign RemM = 0;
|
||||||
assign DivBusyE = 0;
|
assign DivBusyE = 0;
|
||||||
|
@ -2,10 +2,14 @@
|
|||||||
// spi_apb.sv
|
// spi_apb.sv
|
||||||
//
|
//
|
||||||
// Written: Naiche Whyte-Aguayo nwhyteaguayo@g.hmc.edu 11/16/2022
|
// Written: Naiche Whyte-Aguayo nwhyteaguayo@g.hmc.edu 11/16/2022
|
||||||
|
|
||||||
//
|
//
|
||||||
// Purpose: SPI peripheral
|
// Purpose: SPI peripheral
|
||||||
// See FU540-C000-v1.0 for specifications
|
//
|
||||||
|
// SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers.
|
||||||
|
// The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing
|
||||||
|
// to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output,
|
||||||
|
// along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY.
|
||||||
|
// Current limitations: Flash read sequencer mode not implemented, dual and quad mode not supported
|
||||||
//
|
//
|
||||||
// A component of the Wally configurable RISC-V project.
|
// A component of the Wally configurable RISC-V project.
|
||||||
//
|
//
|
||||||
@ -25,19 +29,6 @@
|
|||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
// Current limitations: Flash read sequencer mode not implemented, dual and quad modes untestable with current test plan.
|
|
||||||
|
|
||||||
// Attempt to move from >= comparisons by initializing in FSM differently
|
|
||||||
// Parameterize SynchFIFO
|
|
||||||
// look at ReadIncrement/WriteIncrement delay necessity
|
|
||||||
|
|
||||||
/*
|
|
||||||
SPI module is written to the specifications described in FU540-C000-v1.0. At the top level, it is consists of synchronous 8 byte transmit and recieve FIFOs connected to shift registers.
|
|
||||||
The FIFOs are connected to WALLY by an apb control register interface, which includes various control registers for modifying the SPI transmission along with registers for writing
|
|
||||||
to the transmit FIFO and reading from the receive FIFO. The transmissions themselves are then controlled by a finite state machine. The SPI module uses 4 tristate pins for SPI input/output,
|
|
||||||
along with a 4 bit Chip Select signal, a clock signal, and an interrupt signal to WALLY.
|
|
||||||
*/
|
|
||||||
|
|
||||||
module spi_apb import cvw::*; #(parameter cvw_t P) (
|
module spi_apb import cvw::*; #(parameter cvw_t P) (
|
||||||
input logic PCLK, PRESETn,
|
input logic PCLK, PRESETn,
|
||||||
input logic PSEL,
|
input logic PSEL,
|
||||||
@ -54,27 +45,27 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
output logic SPIIntr
|
output logic SPIIntr
|
||||||
);
|
);
|
||||||
|
|
||||||
//SPI control registers. Refer to SiFive FU540-C000 manual
|
// SPI control registers. Refer to SiFive FU540-C000 manual
|
||||||
logic [11:0] SckDiv;
|
logic [11:0] SckDiv;
|
||||||
logic [1:0] SckMode;
|
logic [1:0] SckMode;
|
||||||
logic [1:0] ChipSelectID;
|
logic [1:0] ChipSelectID;
|
||||||
logic [3:0] ChipSelectDef;
|
logic [3:0] ChipSelectDef;
|
||||||
logic [1:0] ChipSelectMode;
|
logic [1:0] ChipSelectMode;
|
||||||
logic [15:0] Delay0, Delay1;
|
logic [15:0] Delay0, Delay1;
|
||||||
logic [4:0] Format;
|
logic [4:0] Format;
|
||||||
logic [7:0] ReceiveData;
|
logic [7:0] ReceiveData;
|
||||||
logic [2:0] TransmitWatermark, ReceiveWatermark;
|
logic [2:0] TransmitWatermark, ReceiveWatermark;
|
||||||
logic [8:0] TransmitData;
|
logic [8:0] TransmitData;
|
||||||
logic [1:0] InterruptEnable, InterruptPending;
|
logic [1:0] InterruptEnable, InterruptPending;
|
||||||
|
|
||||||
//Bus interface signals
|
// Bus interface signals
|
||||||
logic [7:0] Entry;
|
logic [7:0] Entry;
|
||||||
logic Memwrite;
|
logic Memwrite;
|
||||||
logic [31:0] Din, Dout;
|
logic [31:0] Din, Dout;
|
||||||
logic TransmitInactive; //High when there is no transmission, used as hardware interlock signal
|
logic TransmitInactive; // High when there is no transmission, used as hardware interlock signal
|
||||||
|
|
||||||
//FIFO FSM signals
|
// FIFO FSM signals
|
||||||
//Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1]
|
// Watermark signals - TransmitReadMark = ip[0], ReceiveWriteMark = ip[1]
|
||||||
logic TransmitWriteMark, TransmitReadMark, RecieveWriteMark, RecieveReadMark;
|
logic TransmitWriteMark, TransmitReadMark, RecieveWriteMark, RecieveReadMark;
|
||||||
logic TransmitFIFOWriteFull, TransmitFIFOReadEmpty;
|
logic TransmitFIFOWriteFull, TransmitFIFOReadEmpty;
|
||||||
logic TransmitFIFOReadIncrement;
|
logic TransmitFIFOReadIncrement;
|
||||||
@ -83,75 +74,68 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
logic ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty;
|
logic ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty;
|
||||||
logic [7:0] TransmitFIFOReadData, ReceiveFIFOWriteData;
|
logic [7:0] TransmitFIFOReadData, ReceiveFIFOWriteData;
|
||||||
logic [2:0] TransmitWriteWatermarkLevel, ReceiveReadWatermarkLevel;
|
logic [2:0] TransmitWriteWatermarkLevel, ReceiveReadWatermarkLevel;
|
||||||
logic [7:0] ReceiveShiftRegEndian; //reverses ReceiveShiftReg if Format[2] set (little endian transmission)
|
logic [7:0] ReceiveShiftRegEndian; // Reverses ReceiveShiftReg if Format[2] set (little endian transmission)
|
||||||
|
|
||||||
//Transmission signals
|
// Transmission signals
|
||||||
logic sck;
|
logic sck;
|
||||||
logic [11:0] DivCounter; //counter for sck
|
logic [11:0] DivCounter; // Counter for sck
|
||||||
logic SCLKenable; //flip flop enable high every sclk edge
|
logic SCLKenable; // Flip flop enable high every sclk edge
|
||||||
|
|
||||||
//Delay signals
|
// Delay signals
|
||||||
logic [8:0] ImplicitDelay1; //Adds implicit delay to cs-sck delay counter based on phase
|
logic [8:0] ImplicitDelay1; // Adds implicit delay to cs-sck delay counter based on phase
|
||||||
logic [8:0] ImplicitDelay2; //Adds implicit delay to sck-cs delay counter based on phase
|
logic [8:0] ImplicitDelay2; // Adds implicit delay to sck-cs delay counter based on phase
|
||||||
logic [8:0] CS_SCKCount; //Counter for cs-sck delay
|
logic [8:0] CS_SCKCount; // Counter for cs-sck delay
|
||||||
logic [8:0] SCK_CSCount; //Counter for sck-cs delay
|
logic [8:0] SCK_CSCount; // Counter for sck-cs delay
|
||||||
logic [8:0] InterCSCount; //Counter for inter cs delay
|
logic [8:0] InterCSCount; // Counter for inter cs delay
|
||||||
logic [8:0] InterXFRCount; //Counter for inter xfr delay
|
logic [8:0] InterXFRCount; // Counter for inter xfr delay
|
||||||
logic CS_SCKCompare; //Boolean comparison signal, high when CS_SCKCount >= cs-sck delay
|
logic ZeroDelayHoldMode; // High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
|
||||||
logic SCK_CSCompare; //Boolean comparison signal, high when SCK_CSCount >= sck-cs delay
|
|
||||||
logic InterCSCompare; //Boolean comparison signal, high when InterCSCount >= inter cs delay
|
|
||||||
logic InterXFRCompare; //Boolean comparison signal, high when InterXFRCount >= inter xfr delay
|
|
||||||
logic ZeroDelayHoldMode; //High when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
|
|
||||||
|
|
||||||
//Frame counting signals
|
// Frame counting signals
|
||||||
logic [3:0] FrameCount; //Counter for number of frames in transmission
|
logic [3:0] FrameCount; // Counter for number of frames in transmission
|
||||||
logic FrameCompare; //Boolean comparison signal, high when FrameCount = Format[7:4]
|
logic [3:0] ReceivePenultimateFrameCount; // Counter
|
||||||
logic [3:0] ReceivePenultimateFrame; //Frame number - 1
|
logic ReceivePenultimateFrame; // High when penultimate frame in transmission has been reached
|
||||||
logic [3:0] ReceivePenultimateFrameCount; //Counter
|
|
||||||
logic ReceivePenultimateFrameBoolean; //High when penultimate frame in transmission has been reached
|
|
||||||
|
|
||||||
//State fsm signals
|
// State fsm signals
|
||||||
logic Active; //High when state is either Active1 or Active0 (during transmission)
|
logic Active; // High when state is either Active1 or Active0 (during transmission)
|
||||||
logic Active0; //High when state is Active0
|
logic Active0; // High when state is Active0
|
||||||
|
|
||||||
//Shift reg signals
|
// Shift reg signals
|
||||||
logic ShiftEdge; //Determines which edge of sck to shift from TransmitShiftReg
|
logic ShiftEdge; // Determines which edge of sck to shift from TransmitShiftReg
|
||||||
logic [7:0] TransmitShiftReg; //Transmit shift register
|
logic [7:0] TransmitShiftReg; // Transmit shift register
|
||||||
logic [7:0] ReceiveShiftReg; //Receive shift register
|
logic [7:0] ReceiveShiftReg; // Receive shift register
|
||||||
logic SampleEdge; //Determines which edge of sck to sample from ReceiveShiftReg
|
logic SampleEdge; // Determines which edge of sck to sample from ReceiveShiftReg
|
||||||
logic [7:0] TransmitDataEndian; //Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB
|
logic [7:0] TransmitDataEndian; // Reverses TransmitData from txFIFO if littleendian, since TransmitReg always shifts MSB
|
||||||
logic TransmitShiftRegLoad; //Determines when to load TransmitShiftReg
|
logic TransmitShiftRegLoad; // Determines when to load TransmitShiftReg
|
||||||
logic ReceiveShiftFull; //High when receive shift register is full
|
logic ReceiveShiftFull; // High when receive shift register is full
|
||||||
logic TransmitShiftEmpty; //High when transmit shift register is empty
|
logic TransmitShiftEmpty; // High when transmit shift register is empty
|
||||||
logic ShiftIn; //Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST)
|
logic ShiftIn; // Determines whether to shift from SPIIn or SPIOut (if SPI_LOOPBACK_TEST)
|
||||||
logic [3:0] LeftShiftAmount; //Determines left shift amount to left-align data when little endian
|
logic [3:0] LeftShiftAmount; // Determines left shift amount to left-align data when little endian
|
||||||
logic [7:0] ASR; //AlignedReceiveShiftReg
|
logic [7:0] ASR; // AlignedReceiveShiftReg
|
||||||
|
|
||||||
//CS signals
|
// CS signals
|
||||||
logic [3:0] ChipSelectAuto; //Assigns ChipSelect value to selected CS signal based on CS ID
|
logic [3:0] ChipSelectAuto; // Assigns ChipSelect value to selected CS signal based on CS ID
|
||||||
logic [3:0] ChipSelectInternal; //Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef
|
logic [3:0] ChipSelectInternal; // Defines what each ChipSelect signal should be based on transmission status and ChipSelectDef
|
||||||
logic DelayMode; //Determines where to place implicit half cycle delay based on sck phase for CS assertion
|
logic DelayMode; // Determines where to place implicit half cycle delay based on sck phase for CS assertion
|
||||||
|
|
||||||
//Miscellaneous signals delayed/early by 1 PCLK cycle
|
// Miscellaneous signals delayed/early by 1 PCLK cycle
|
||||||
logic ReceiveShiftFullDelay; //Delays ReceiveShiftFull signal by 1 PCLK cycle
|
logic ReceiveShiftFullDelay; // Delays ReceiveShiftFull signal by 1 PCLK cycle
|
||||||
logic TransmitFIFOWriteIncrementDelay; //TransmitFIFOWriteIncrement delayed by 1 PCLK cycle
|
logic ReceiveShiftFullDelayPCLK; // ReceiveShiftFull delayed by 1 PCLK cycle
|
||||||
logic ReceiveShiftFullDelayPCLK; //ReceiveShiftFull delayed by 1 PCLK cycle
|
|
||||||
logic TransmitFIFOReadEmptyDelay;
|
logic TransmitFIFOReadEmptyDelay;
|
||||||
logic SCLKenableEarly; //SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
|
logic SCLKenableEarly; // SCLKenable 1 PCLK cycle early, needed for on time register changes when ChipSelectMode is hold and Delay1[15:8] (InterXFR delay) is 0
|
||||||
|
|
||||||
//APB access
|
// APB access
|
||||||
assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses
|
assign Entry = {PADDR[7:2],2'b00}; // 32-bit word-aligned accesses
|
||||||
assign Memwrite = PWRITE & PENABLE & PSEL; // only write in access phase
|
assign Memwrite = PWRITE & PENABLE & PSEL; // Only write in access phase
|
||||||
assign PREADY = TransmitInactive; // tie PREADY to transmission for hardware interlock
|
assign PREADY = TransmitInactive; // Tie PREADY to transmission for hardware interlock
|
||||||
|
|
||||||
//Account for subword read/write circuitry
|
// Account for subword read/write circuitry
|
||||||
// -- Note SPI registers are 32 bits no matter what; access them with LW SW.
|
// -- Note SPI registers are 32 bits no matter what; access them with LW SW.
|
||||||
|
|
||||||
assign Din = PWDATA[31:0];
|
assign Din = PWDATA[31:0];
|
||||||
if (P.XLEN == 64) assign PRDATA = {Dout, Dout};
|
if (P.XLEN == 64) assign PRDATA = {Dout, Dout};
|
||||||
else assign PRDATA = Dout;
|
else assign PRDATA = Dout;
|
||||||
|
|
||||||
//Register access
|
// Register access
|
||||||
always_ff@(posedge PCLK, negedge PRESETn)
|
always_ff@(posedge PCLK, negedge PRESETn)
|
||||||
if (~PRESETn) begin
|
if (~PRESETn) begin
|
||||||
SckDiv <= #1 12'd3;
|
SckDiv <= #1 12'd3;
|
||||||
@ -167,13 +151,12 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
ReceiveWatermark <= #1 3'b0;
|
ReceiveWatermark <= #1 3'b0;
|
||||||
InterruptEnable <= #1 2'b0;
|
InterruptEnable <= #1 2'b0;
|
||||||
InterruptPending <= #1 2'b0;
|
InterruptPending <= #1 2'b0;
|
||||||
end else begin //writes
|
end else begin // writes
|
||||||
//According to FU540 spec: Once interrupt is pending, it will remain set until number
|
|
||||||
//of entries in tx/rx fifo is strictly more/less than tx/rxmark
|
|
||||||
|
|
||||||
/* verilator lint_off CASEINCOMPLETE */
|
/* verilator lint_off CASEINCOMPLETE */
|
||||||
if (Memwrite & TransmitInactive)
|
if (Memwrite & TransmitInactive)
|
||||||
case(Entry) //flop to sample inputs
|
case(Entry) // flop to sample inputs
|
||||||
8'h00: SckDiv <= Din[11:0];
|
8'h00: SckDiv <= Din[11:0];
|
||||||
8'h04: SckMode <= Din[1:0];
|
8'h04: SckMode <= Din[1:0];
|
||||||
8'h10: ChipSelectID <= Din[1:0];
|
8'h10: ChipSelectID <= Din[1:0];
|
||||||
@ -188,18 +171,21 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
8'h70: InterruptEnable <= Din[1:0];
|
8'h70: InterruptEnable <= Din[1:0];
|
||||||
endcase
|
endcase
|
||||||
/* verilator lint_off CASEINCOMPLETE */
|
/* verilator lint_off CASEINCOMPLETE */
|
||||||
//interrupt clearance
|
|
||||||
|
// According to FU540 spec: Once interrupt is pending, it will remain set until number
|
||||||
|
// of entries in tx/rx fifo is strictly more/less than tx/rxmark
|
||||||
InterruptPending[0] <= TransmitReadMark;
|
InterruptPending[0] <= TransmitReadMark;
|
||||||
InterruptPending[1] <= RecieveWriteMark;
|
InterruptPending[1] <= RecieveWriteMark;
|
||||||
case(Entry) // flop to sample inputs
|
|
||||||
|
case(Entry) // Flop to sample inputs
|
||||||
8'h00: Dout <= #1 {20'b0, SckDiv};
|
8'h00: Dout <= #1 {20'b0, SckDiv};
|
||||||
8'h04: Dout <= #1 {30'b0, SckMode};
|
8'h04: Dout <= #1 {30'b0, SckMode};
|
||||||
8'h10: Dout <= #1 {30'b0, ChipSelectID};
|
8'h10: Dout <= #1 {30'b0, ChipSelectID};
|
||||||
8'h14: Dout <= #1 {28'b0, ChipSelectDef};
|
8'h14: Dout <= #1 {28'b0, ChipSelectDef};
|
||||||
8'h18: Dout <= #1 {30'b0, ChipSelectMode};
|
8'h18: Dout <= #1 {30'b0, ChipSelectMode};
|
||||||
8'h28: Dout <= {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]};
|
8'h28: Dout <= #1 {8'b0, Delay0[15:8], 8'b0, Delay0[7:0]};
|
||||||
8'h2C: Dout <= {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]};
|
8'h2C: Dout <= #1 {8'b0, Delay1[15:8], 8'b0, Delay1[7:0]};
|
||||||
8'h40: Dout <= {12'b0, Format[4:1], 13'b0, Format[0], 2'b0};
|
8'h40: Dout <= #1 {12'b0, Format[4:1], 13'b0, Format[0], 2'b0};
|
||||||
8'h48: Dout <= #1 {23'b0, TransmitFIFOWriteFull, 8'b0};
|
8'h48: Dout <= #1 {23'b0, TransmitFIFOWriteFull, 8'b0};
|
||||||
8'h4C: Dout <= #1 {23'b0, ReceiveFIFOReadEmpty, ReceiveData[7:0]};
|
8'h4C: Dout <= #1 {23'b0, ReceiveFIFOReadEmpty, ReceiveData[7:0]};
|
||||||
8'h50: Dout <= #1 {29'b0, TransmitWatermark};
|
8'h50: Dout <= #1 {29'b0, TransmitWatermark};
|
||||||
@ -210,8 +196,9 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
//SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1))
|
// SPI enable generation, where SCLK = PCLK/(2*(SckDiv + 1))
|
||||||
//Generates a high signal at the rising and falling edge of SCLK by counting from 0 to SckDiv
|
// Asserts SCLKenable at the rising and falling edge of SCLK by counting from 0 to SckDiv
|
||||||
|
// Active at 2x SCLK frequency to account for implicit half cycle delays and actions on both clock edges depending on phase
|
||||||
assign SCLKenable = (DivCounter == SckDiv);
|
assign SCLKenable = (DivCounter == SckDiv);
|
||||||
assign SCLKenableEarly = ((DivCounter + 12'b1) == SckDiv);
|
assign SCLKenableEarly = ((DivCounter + 12'b1) == SckDiv);
|
||||||
always_ff @(posedge PCLK, negedge PRESETn)
|
always_ff @(posedge PCLK, negedge PRESETn)
|
||||||
@ -219,44 +206,38 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
else if (SCLKenable) DivCounter <= 0;
|
else if (SCLKenable) DivCounter <= 0;
|
||||||
else DivCounter <= DivCounter + 12'b1;
|
else DivCounter <= DivCounter + 12'b1;
|
||||||
|
|
||||||
//Boolean logic that tracks frame progression
|
// Asserts when transmission is one frame before complete
|
||||||
assign FrameCompare = (FrameCount < Format[4:1]);
|
assign ReceivePenultimateFrame = ((FrameCount + 4'b0001) == Format[4:1]);
|
||||||
assign ReceivePenultimateFrameBoolean = ((FrameCount + 4'b0001) == Format[4:1]);
|
|
||||||
|
|
||||||
//Computing delays
|
// Computing delays
|
||||||
// When sckmode.pha = 0, an extra half-period delay is implicit in the cs-sck delay, and vice-versa for sck-cs
|
// When sckmode.pha = 0, an extra half-period delay is implicit in the cs-sck delay, and vice-versa for sck-cs
|
||||||
assign ImplicitDelay1 = SckMode[0] ? 9'b0 : 9'b1;
|
assign ImplicitDelay1 = SckMode[0] ? 9'b0 : 9'b1;
|
||||||
assign ImplicitDelay2 = SckMode[0] ? 9'b1 : 9'b0;
|
assign ImplicitDelay2 = SckMode[0] ? 9'b1 : 9'b0;
|
||||||
|
|
||||||
assign CS_SCKCompare = CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1);
|
// Calculate when tx/rx shift registers are full/empty
|
||||||
assign SCK_CSCompare = SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2);
|
TransmitShiftFSM TransmitShiftFSM(PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0, TransmitShiftEmpty);
|
||||||
assign InterCSCompare = (InterCSCount >= ({Delay1[7:0],1'b0}));
|
ReceiveShiftFSM ReceiveShiftFSM(PCLK, PRESETn, SCLKenable, ReceivePenultimateFrame, SampleEdge, SckMode[0], ReceiveShiftFull);
|
||||||
assign InterXFRCompare = (InterXFRCount >= ({Delay1[15:8], 1'b0}));
|
|
||||||
|
|
||||||
//Calculate when tx/rx shift registers are full/empty
|
// Calculate tx/rx fifo write and recieve increment signals
|
||||||
TransmitShiftFSM TransmitShiftFSM_1 (PCLK, PRESETn, TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0, TransmitShiftEmpty);
|
|
||||||
ReceiveShiftFSM ReceiveShiftFSM_1 (PCLK, PRESETn, SCLKenable, ReceivePenultimateFrameBoolean, SampleEdge, SckMode[0], ReceiveShiftFull);
|
|
||||||
|
|
||||||
//Calculate tx/rx fifo write and recieve increment signals
|
|
||||||
assign TransmitFIFOWriteIncrement = (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive);
|
|
||||||
|
|
||||||
always_ff @(posedge PCLK, negedge PRESETn)
|
always_ff @(posedge PCLK, negedge PRESETn)
|
||||||
if (~PRESETn) TransmitFIFOWriteIncrementDelay <= 0;
|
if (~PRESETn) TransmitFIFOWriteIncrement <= 0;
|
||||||
else TransmitFIFOWriteIncrementDelay <= TransmitFIFOWriteIncrement;
|
else TransmitFIFOWriteIncrement <= (Memwrite & (Entry == 8'h48) & ~TransmitFIFOWriteFull & TransmitInactive);
|
||||||
|
|
||||||
always_ff @(posedge PCLK, negedge PRESETn)
|
always_ff @(posedge PCLK, negedge PRESETn)
|
||||||
if (~PRESETn) ReceiveFIFOReadIncrement <= 0;
|
if (~PRESETn) ReceiveFIFOReadIncrement <= 0;
|
||||||
else ReceiveFIFOReadIncrement <= ((Entry == 8'h4C) & ~ReceiveFIFOReadEmpty & PSEL & ~ReceiveFIFOReadIncrement);
|
else ReceiveFIFOReadIncrement <= ((Entry == 8'h4C) & ~ReceiveFIFOReadEmpty & PSEL & ~ReceiveFIFOReadIncrement);
|
||||||
|
|
||||||
//Tx/Rx FIFOs
|
// Tx/Rx FIFOs
|
||||||
SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrementDelay, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0], TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark);
|
SynchFIFO #(3,8) txFIFO(PCLK, 1'b1, SCLKenable, PRESETn, TransmitFIFOWriteIncrement, TransmitShiftEmpty, TransmitData[7:0], TransmitWriteWatermarkLevel, TransmitWatermark[2:0],
|
||||||
SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel, ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark);
|
TransmitFIFOReadData[7:0], TransmitFIFOWriteFull, TransmitFIFOReadEmpty, TransmitWriteMark, TransmitReadMark);
|
||||||
|
SynchFIFO #(3,8) rxFIFO(PCLK, SCLKenable, 1'b1, PRESETn, ReceiveShiftFullDelay, ReceiveFIFOReadIncrement, ReceiveShiftRegEndian, ReceiveWatermark[2:0], ReceiveReadWatermarkLevel,
|
||||||
|
ReceiveData[7:0], ReceiveFIFOWriteFull, ReceiveFIFOReadEmpty, RecieveWriteMark, RecieveReadMark);
|
||||||
|
|
||||||
always_ff @(posedge PCLK, negedge PRESETn)
|
always_ff @(posedge PCLK, negedge PRESETn)
|
||||||
if (~PRESETn) TransmitFIFOReadEmptyDelay <= 1;
|
if (~PRESETn) TransmitFIFOReadEmptyDelay <= 1;
|
||||||
else if (SCLKenable) TransmitFIFOReadEmptyDelay <= TransmitFIFOReadEmpty;
|
else if (SCLKenable) TransmitFIFOReadEmptyDelay <= TransmitFIFOReadEmpty;
|
||||||
|
|
||||||
|
|
||||||
always_ff @(posedge PCLK, negedge PRESETn)
|
always_ff @(posedge PCLK, negedge PRESETn)
|
||||||
if (~PRESETn) ReceiveShiftFullDelay <= 0;
|
if (~PRESETn) ReceiveShiftFullDelay <= 0;
|
||||||
else if (SCLKenable) ReceiveShiftFullDelay <= ReceiveShiftFull;
|
else if (SCLKenable) ReceiveShiftFullDelay <= ReceiveShiftFull;
|
||||||
@ -266,16 +247,16 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
|
|
||||||
assign TransmitShiftRegLoad = ~TransmitShiftEmpty & ~Active | (((ChipSelectMode == 2'b10) & ~|(Delay1[15:8])) & ((ReceiveShiftFullDelay | ReceiveShiftFull) & ~SampleEdge & ~TransmitFIFOReadEmpty));
|
assign TransmitShiftRegLoad = ~TransmitShiftEmpty & ~Active | (((ChipSelectMode == 2'b10) & ~|(Delay1[15:8])) & ((ReceiveShiftFullDelay | ReceiveShiftFull) & ~SampleEdge & ~TransmitFIFOReadEmpty));
|
||||||
|
|
||||||
//Main FSM which controls SPI transmission
|
// Main FSM which controls SPI transmission
|
||||||
typedef enum logic [2:0] {CS_INACTIVE, DELAY_0, ACTIVE_0, ACTIVE_1, DELAY_1,INTER_CS, INTER_XFR} statetype;
|
typedef enum logic [2:0] {CS_INACTIVE, DELAY_0, ACTIVE_0, ACTIVE_1, DELAY_1,INTER_CS, INTER_XFR} statetype;
|
||||||
statetype state;
|
statetype state;
|
||||||
|
|
||||||
always_ff @(posedge PCLK, negedge PRESETn)
|
always_ff @(posedge PCLK, negedge PRESETn)
|
||||||
if (~PRESETn) begin state <= CS_INACTIVE;
|
if (~PRESETn) begin
|
||||||
|
state <= CS_INACTIVE;
|
||||||
FrameCount <= 4'b0;
|
FrameCount <= 4'b0;
|
||||||
|
|
||||||
/* verilator lint_off CASEINCOMPLETE */
|
|
||||||
end else if (SCLKenable) begin
|
end else if (SCLKenable) begin
|
||||||
|
/* verilator lint_off CASEINCOMPLETE */
|
||||||
case (state)
|
case (state)
|
||||||
CS_INACTIVE: begin
|
CS_INACTIVE: begin
|
||||||
CS_SCKCount <= 9'b1;
|
CS_SCKCount <= 9'b1;
|
||||||
@ -288,7 +269,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
end
|
end
|
||||||
DELAY_0: begin
|
DELAY_0: begin
|
||||||
CS_SCKCount <= CS_SCKCount + 9'b1;
|
CS_SCKCount <= CS_SCKCount + 9'b1;
|
||||||
if (CS_SCKCompare) state <= ACTIVE_0;
|
if (CS_SCKCount >= (({Delay0[7:0], 1'b0}) + ImplicitDelay1)) state <= ACTIVE_0;
|
||||||
end
|
end
|
||||||
ACTIVE_0: begin
|
ACTIVE_0: begin
|
||||||
FrameCount <= FrameCount + 4'b1;
|
FrameCount <= FrameCount + 4'b1;
|
||||||
@ -296,7 +277,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
end
|
end
|
||||||
ACTIVE_1: begin
|
ACTIVE_1: begin
|
||||||
InterXFRCount <= 9'b1;
|
InterXFRCount <= 9'b1;
|
||||||
if (FrameCompare) state <= ACTIVE_0;
|
if (FrameCount < Format[4:1]) state <= ACTIVE_0;
|
||||||
else if ((ChipSelectMode[1:0] == 2'b10) & ~|(Delay1[15:8]) & (~TransmitFIFOReadEmpty)) begin
|
else if ((ChipSelectMode[1:0] == 2'b10) & ~|(Delay1[15:8]) & (~TransmitFIFOReadEmpty)) begin
|
||||||
state <= ACTIVE_0;
|
state <= ACTIVE_0;
|
||||||
CS_SCKCount <= 9'b1;
|
CS_SCKCount <= 9'b1;
|
||||||
@ -310,11 +291,11 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
end
|
end
|
||||||
DELAY_1: begin
|
DELAY_1: begin
|
||||||
SCK_CSCount <= SCK_CSCount + 9'b1;
|
SCK_CSCount <= SCK_CSCount + 9'b1;
|
||||||
if (SCK_CSCompare) state <= INTER_CS;
|
if (SCK_CSCount >= (({Delay0[15:8], 1'b0}) + ImplicitDelay2)) state <= INTER_CS;
|
||||||
end
|
end
|
||||||
INTER_CS: begin
|
INTER_CS: begin
|
||||||
InterCSCount <= InterCSCount + 9'b1;
|
InterCSCount <= InterCSCount + 9'b1;
|
||||||
if (InterCSCompare ) state <= CS_INACTIVE;
|
if (InterCSCount >= ({Delay1[7:0],1'b0})) state <= CS_INACTIVE;
|
||||||
end
|
end
|
||||||
INTER_XFR: begin
|
INTER_XFR: begin
|
||||||
CS_SCKCount <= 9'b1;
|
CS_SCKCount <= 9'b1;
|
||||||
@ -322,13 +303,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
FrameCount <= 4'b0;
|
FrameCount <= 4'b0;
|
||||||
InterCSCount <= 9'b10;
|
InterCSCount <= 9'b10;
|
||||||
InterXFRCount <= InterXFRCount + 9'b1;
|
InterXFRCount <= InterXFRCount + 9'b1;
|
||||||
if (InterXFRCompare & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0;
|
if ((InterXFRCount >= ({Delay1[15:8], 1'b0})) & ~TransmitFIFOReadEmptyDelay) state <= ACTIVE_0;
|
||||||
else if (~|ChipSelectMode[1:0]) state <= CS_INACTIVE;
|
else if (~|ChipSelectMode[1:0]) state <= CS_INACTIVE;
|
||||||
end
|
end
|
||||||
endcase
|
endcase
|
||||||
|
/* verilator lint_off CASEINCOMPLETE */
|
||||||
end
|
end
|
||||||
|
|
||||||
/* verilator lint_off CASEINCOMPLETE */
|
|
||||||
|
|
||||||
assign DelayMode = SckMode[0] ? (state == DELAY_1) : (state == ACTIVE_1 & ReceiveShiftFull);
|
assign DelayMode = SckMode[0] ? (state == DELAY_1) : (state == ACTIVE_1 & ReceiveShiftFull);
|
||||||
assign ChipSelectInternal = (state == CS_INACTIVE | state == INTER_CS | DelayMode & ~|(Delay0[15:8])) ? ChipSelectDef : ~ChipSelectDef;
|
assign ChipSelectInternal = (state == CS_INACTIVE | state == INTER_CS | DelayMode & ~|(Delay0[15:8])) ? ChipSelectDef : ~ChipSelectDef;
|
||||||
@ -339,7 +321,7 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
assign TransmitInactive = ((state == INTER_CS) | (state == CS_INACTIVE) | (state == INTER_XFR) | (ReceiveShiftFullDelayPCLK & ZeroDelayHoldMode));
|
assign TransmitInactive = ((state == INTER_CS) | (state == CS_INACTIVE) | (state == INTER_XFR) | (ReceiveShiftFullDelayPCLK & ZeroDelayHoldMode));
|
||||||
assign Active0 = (state == ACTIVE_0);
|
assign Active0 = (state == ACTIVE_0);
|
||||||
|
|
||||||
//Signal tracks which edge of sck to shift data
|
// Signal tracks which edge of sck to shift data
|
||||||
always_comb
|
always_comb
|
||||||
case(SckMode[1:0])
|
case(SckMode[1:0])
|
||||||
2'b00: ShiftEdge = ~sck & SCLKenable;
|
2'b00: ShiftEdge = ~sck & SCLKenable;
|
||||||
@ -349,36 +331,36 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
default: ShiftEdge = sck & SCLKenable;
|
default: ShiftEdge = sck & SCLKenable;
|
||||||
endcase
|
endcase
|
||||||
|
|
||||||
//Transmit shift register
|
// Transmit shift register
|
||||||
assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0];
|
assign TransmitDataEndian = Format[0] ? {TransmitFIFOReadData[0], TransmitFIFOReadData[1], TransmitFIFOReadData[2], TransmitFIFOReadData[3], TransmitFIFOReadData[4], TransmitFIFOReadData[5], TransmitFIFOReadData[6], TransmitFIFOReadData[7]} : TransmitFIFOReadData[7:0];
|
||||||
always_ff @(posedge PCLK, negedge PRESETn)
|
always_ff @(posedge PCLK, negedge PRESETn)
|
||||||
if(~PRESETn) TransmitShiftReg <= 8'b0;
|
if(~PRESETn) TransmitShiftReg <= 8'b0;
|
||||||
else if (TransmitShiftRegLoad) TransmitShiftReg <= TransmitDataEndian;
|
else if (TransmitShiftRegLoad) TransmitShiftReg <= TransmitDataEndian;
|
||||||
else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0};
|
else if (ShiftEdge & Active) TransmitShiftReg <= {TransmitShiftReg[6:0], 1'b0};
|
||||||
|
|
||||||
assign SPIOut = TransmitShiftReg[7];
|
assign SPIOut = TransmitShiftReg[7];
|
||||||
|
|
||||||
//If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn
|
// If in loopback mode, receive shift register is connected directly to module's output pins. Else, connected to SPIIn
|
||||||
//There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges
|
// There are no setup/hold time issues because transmit shift register and receive shift register always shift/sample on opposite edges
|
||||||
assign ShiftIn = P.SPI_LOOPBACK_TEST ? SPIOut : SPIIn;
|
assign ShiftIn = P.SPI_LOOPBACK_TEST ? SPIOut : SPIIn;
|
||||||
|
|
||||||
//Receive shift register
|
// Receive shift register
|
||||||
always_ff @(posedge PCLK, negedge PRESETn)
|
always_ff @(posedge PCLK, negedge PRESETn)
|
||||||
if(~PRESETn) ReceiveShiftReg <= 8'b0;
|
if(~PRESETn) ReceiveShiftReg <= 8'b0;
|
||||||
else if (SampleEdge & SCLKenable) begin
|
else if (SampleEdge & SCLKenable) begin
|
||||||
if (~Active) ReceiveShiftReg <= 8'b0;
|
if (~Active) ReceiveShiftReg <= 8'b0;
|
||||||
else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn};
|
else ReceiveShiftReg <= {ReceiveShiftReg[6:0], ShiftIn};
|
||||||
end
|
end
|
||||||
|
|
||||||
//Aligns received data and reverses if little-endian
|
// Aligns received data and reverses if little-endian
|
||||||
assign LeftShiftAmount = 4'h8 - Format[4:1];
|
assign LeftShiftAmount = 4'h8 - Format[4:1];
|
||||||
assign ASR = ReceiveShiftReg << LeftShiftAmount[2:0];
|
assign ASR = ReceiveShiftReg << LeftShiftAmount[2:0];
|
||||||
assign ReceiveShiftRegEndian = Format[0] ? {ASR[0], ASR[1], ASR[2], ASR[3], ASR[4], ASR[5], ASR[6], ASR[7]} : ASR[7:0];
|
assign ReceiveShiftRegEndian = Format[0] ? {ASR[0], ASR[1], ASR[2], ASR[3], ASR[4], ASR[5], ASR[6], ASR[7]} : ASR[7:0];
|
||||||
|
|
||||||
//Interrupt logic: raise interrupt if any enabled interrupts are pending
|
// Interrupt logic: raise interrupt if any enabled interrupts are pending
|
||||||
assign SPIIntr = |(InterruptPending & InterruptEnable);
|
assign SPIIntr = |(InterruptPending & InterruptEnable);
|
||||||
|
|
||||||
//Chip select logic
|
// Chip select logic
|
||||||
always_comb
|
always_comb
|
||||||
case(ChipSelectID[1:0])
|
case(ChipSelectID[1:0])
|
||||||
2'b00: ChipSelectAuto = {ChipSelectDef[3], ChipSelectDef[2], ChipSelectDef[1], ChipSelectInternal[0]};
|
2'b00: ChipSelectAuto = {ChipSelectDef[3], ChipSelectDef[2], ChipSelectDef[1], ChipSelectInternal[0]};
|
||||||
@ -390,14 +372,14 @@ module spi_apb import cvw::*; #(parameter cvw_t P) (
|
|||||||
assign SPICS = ChipSelectMode[0] ? ChipSelectDef : ChipSelectAuto;
|
assign SPICS = ChipSelectMode[0] ? ChipSelectDef : ChipSelectAuto;
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
module SynchFIFO #(parameter M =3 , N= 8)(
|
module SynchFIFO #(parameter M=3, N=8)( // 2^M entries of N bits each
|
||||||
input logic PCLK, wen, ren, PRESETn,
|
input logic PCLK, wen, ren, PRESETn,
|
||||||
input logic winc,rinc,
|
input logic winc, rinc,
|
||||||
input logic [N-1:0] wdata,
|
input logic [N-1:0] wdata,
|
||||||
input logic [M-1:0] wwatermarklevel, rwatermarklevel,
|
input logic [M-1:0] wwatermarklevel, rwatermarklevel,
|
||||||
output logic [N-1:0] rdata,
|
output logic [N-1:0] rdata,
|
||||||
output logic wfull, rempty,
|
output logic wfull, rempty,
|
||||||
output logic wwatermark, rwatermark);
|
output logic wwatermark, rwatermark);
|
||||||
|
|
||||||
/* Pointer FIFO using design elements from "Simulation and Synthesis Techniques
|
/* Pointer FIFO using design elements from "Simulation and Synthesis Techniques
|
||||||
for Asynchronous FIFO Design" by Clifford E. Cummings. Namely, M bit read and write pointers
|
for Asynchronous FIFO Design" by Clifford E. Cummings. Namely, M bit read and write pointers
|
||||||
@ -409,8 +391,6 @@ module SynchFIFO #(parameter M =3 , N= 8)(
|
|||||||
logic [N-1:0] mem[2**M];
|
logic [N-1:0] mem[2**M];
|
||||||
logic [M:0] rptr, wptr;
|
logic [M:0] rptr, wptr;
|
||||||
logic [M:0] rptrnext, wptrnext;
|
logic [M:0] rptrnext, wptrnext;
|
||||||
logic rempty_val;
|
|
||||||
logic wfull_val;
|
|
||||||
logic [M-1:0] raddr;
|
logic [M-1:0] raddr;
|
||||||
logic [M-1:0] waddr;
|
logic [M-1:0] waddr;
|
||||||
|
|
||||||
@ -428,53 +408,43 @@ module SynchFIFO #(parameter M =3 , N= 8)(
|
|||||||
end
|
end
|
||||||
else begin
|
else begin
|
||||||
if (wen) begin
|
if (wen) begin
|
||||||
wfull <= wfull_val;
|
wfull <= ({~wptrnext[M], wptrnext[M-1:0]} == rptr);
|
||||||
wptr <= wptrnext;
|
wptr <= wptrnext;
|
||||||
end
|
end
|
||||||
if (ren) begin
|
if (ren) begin
|
||||||
rptr <= rptrnext;
|
rptr <= rptrnext;
|
||||||
rempty <= rempty_val;
|
rempty <= (wptr == rptrnext);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign raddr = rptr[M-1:0];
|
assign raddr = rptr[M-1:0];
|
||||||
assign rptrnext = rptr + {3'b0, (rinc & ~rempty)};
|
assign rptrnext = rptr + {{(M){1'b0}}, (rinc & ~rempty)};
|
||||||
assign rempty_val = (wptr == rptrnext);
|
|
||||||
assign rwatermark = ((waddr - raddr) < rwatermarklevel) & ~wfull;
|
assign rwatermark = ((waddr - raddr) < rwatermarklevel) & ~wfull;
|
||||||
assign waddr = wptr[M-1:0];
|
assign waddr = wptr[M-1:0];
|
||||||
assign wwatermark = ((waddr - raddr) > wwatermarklevel) | wfull;
|
assign wwatermark = ((waddr - raddr) > wwatermarklevel) | wfull;
|
||||||
assign wptrnext = wptr + {3'b0, (winc & ~wfull)};
|
assign wptrnext = wptr + {{(M){1'b0}}, (winc & ~wfull)};
|
||||||
assign wfull_val = ({~wptrnext[M], wptrnext[M-1:0]} == rptr);
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
module TransmitShiftFSM(
|
module TransmitShiftFSM(
|
||||||
input logic PCLK, PRESETn,
|
input logic PCLK, PRESETn,
|
||||||
input logic TransmitFIFOReadEmpty, ReceivePenultimateFrameBoolean, Active0,
|
input logic TransmitFIFOReadEmpty, ReceivePenultimateFrame, Active0,
|
||||||
output logic TransmitShiftEmpty);
|
output logic TransmitShiftEmpty);
|
||||||
|
|
||||||
typedef enum logic [1:0] {TransmitShiftEmptyState, TransmitShiftHoldState, TransmitShiftNotEmptyState} statetype;
|
|
||||||
statetype TransmitState, TransmitNextState;
|
|
||||||
always_ff @(posedge PCLK, negedge PRESETn)
|
always_ff @(posedge PCLK, negedge PRESETn)
|
||||||
if (~PRESETn) TransmitState <= TransmitShiftEmptyState;
|
if (~PRESETn) TransmitShiftEmpty <= 1;
|
||||||
else TransmitState <= TransmitNextState;
|
else if (TransmitShiftEmpty) begin
|
||||||
|
if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrame & Active0))) TransmitShiftEmpty <= 1;
|
||||||
|
else if (~TransmitFIFOReadEmpty) TransmitShiftEmpty <= 0;
|
||||||
|
end else begin
|
||||||
|
if (ReceivePenultimateFrame & Active0) TransmitShiftEmpty <= 1;
|
||||||
|
else TransmitShiftEmpty <= 0;
|
||||||
|
end
|
||||||
|
|
||||||
always_comb
|
|
||||||
case(TransmitState)
|
|
||||||
TransmitShiftEmptyState: begin
|
|
||||||
if (TransmitFIFOReadEmpty | (~TransmitFIFOReadEmpty & (ReceivePenultimateFrameBoolean & Active0))) TransmitNextState = TransmitShiftEmptyState;
|
|
||||||
else if (~TransmitFIFOReadEmpty) TransmitNextState = TransmitShiftNotEmptyState;
|
|
||||||
end
|
|
||||||
TransmitShiftNotEmptyState: begin
|
|
||||||
if (ReceivePenultimateFrameBoolean & Active0) TransmitNextState = TransmitShiftEmptyState;
|
|
||||||
else TransmitNextState = TransmitShiftNotEmptyState;
|
|
||||||
end
|
|
||||||
endcase
|
|
||||||
assign TransmitShiftEmpty = (TransmitNextState == TransmitShiftEmptyState);
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
module ReceiveShiftFSM(
|
module ReceiveShiftFSM(
|
||||||
input logic PCLK, PRESETn, SCLKenable,
|
input logic PCLK, PRESETn, SCLKenable,
|
||||||
input logic ReceivePenultimateFrameBoolean, SampleEdge, SckMode,
|
input logic ReceivePenultimateFrame, SampleEdge, SckMode,
|
||||||
output logic ReceiveShiftFull
|
output logic ReceiveShiftFull
|
||||||
);
|
);
|
||||||
typedef enum logic [1:0] {ReceiveShiftFullState, ReceiveShiftNotFullState, ReceiveShiftDelayState} statetype;
|
typedef enum logic [1:0] {ReceiveShiftFullState, ReceiveShiftNotFullState, ReceiveShiftDelayState} statetype;
|
||||||
@ -484,17 +454,12 @@ module ReceiveShiftFSM(
|
|||||||
else if (SCLKenable) begin
|
else if (SCLKenable) begin
|
||||||
case (ReceiveState)
|
case (ReceiveState)
|
||||||
ReceiveShiftFullState: ReceiveState <= ReceiveShiftNotFullState;
|
ReceiveShiftFullState: ReceiveState <= ReceiveShiftNotFullState;
|
||||||
ReceiveShiftNotFullState: if (ReceivePenultimateFrameBoolean & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState;
|
ReceiveShiftNotFullState: if (ReceivePenultimateFrame & (SampleEdge)) ReceiveState <= ReceiveShiftDelayState;
|
||||||
else ReceiveState <= ReceiveShiftNotFullState;
|
else ReceiveState <= ReceiveShiftNotFullState;
|
||||||
ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState;
|
ReceiveShiftDelayState: ReceiveState <= ReceiveShiftFullState;
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState);
|
assign ReceiveShiftFull = SckMode ? (ReceiveState == ReceiveShiftFullState) : (ReceiveState == ReceiveShiftDelayState);
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -264,7 +264,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) (
|
|||||||
end
|
end
|
||||||
|
|
||||||
// global stall and flush control
|
// global stall and flush control
|
||||||
hazard hzu(
|
hazard #(P) hzu(
|
||||||
.BPWrongE, .CSRWriteFenceM, .RetM, .TrapM,
|
.BPWrongE, .CSRWriteFenceM, .RetM, .TrapM,
|
||||||
.LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD,
|
.LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD,
|
||||||
.LSUStallM, .IFUStallF,
|
.LSUStallM, .IFUStallF,
|
||||||
|
@ -11,7 +11,7 @@ export MOD ?= orig
|
|||||||
# title to add a note in the synth's directory name
|
# title to add a note in the synth's directory name
|
||||||
TITLE =
|
TITLE =
|
||||||
# tsmc28, sky130, and sky90 presently supported
|
# tsmc28, sky130, and sky90 presently supported
|
||||||
export TECH ?= sky90
|
export TECH ?= sky130
|
||||||
# MAXCORES allows parallel compilation, which is faster but less CPU-efficient
|
# MAXCORES allows parallel compilation, which is faster but less CPU-efficient
|
||||||
# Avoid when doing sweeps of many optimization points in parallel
|
# Avoid when doing sweeps of many optimization points in parallel
|
||||||
export MAXCORES ?= 1
|
export MAXCORES ?= 1
|
||||||
@ -20,7 +20,7 @@ export MAXCORES ?= 1
|
|||||||
export MAXOPT ?= 0
|
export MAXOPT ?= 0
|
||||||
export DRIVE ?= FLOP
|
export DRIVE ?= FLOP
|
||||||
export USESRAM ?= 0
|
export USESRAM ?= 0
|
||||||
|
export WIDTH ?= 32
|
||||||
|
|
||||||
time := $(shell date +%F-%H-%M)
|
time := $(shell date +%F-%H-%M)
|
||||||
hash := $(shell git rev-parse --short HEAD)
|
hash := $(shell git rev-parse --short HEAD)
|
||||||
@ -94,10 +94,10 @@ endif
|
|||||||
|
|
||||||
ifneq ($(MOD), orig)
|
ifneq ($(MOD), orig)
|
||||||
# PMP 0
|
# PMP 0
|
||||||
sed -i 's/PMP_ENTRIES \(64\|16\|0\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh
|
sed -i 's/PMP_ENTRIES.*\(64\|16\)/PMP_ENTRIES = 0;/' $(CONFIGDIR)/config.vh
|
||||||
ifneq ($(MOD), PMP0)
|
ifneq ($(MOD), PMP0)
|
||||||
# no priv
|
# no priv
|
||||||
sed -i 's/ZICSR_SUPPORTED *1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh
|
sed -i 's/ZICSR_SUPPORTED.*1/ZICSR_SUPPORTED = 0;/' $(CONFIGDIR)/config.vh
|
||||||
ifneq ($(MOD), noPriv)
|
ifneq ($(MOD), noPriv)
|
||||||
# turn off FPU
|
# turn off FPU
|
||||||
sed -i 's/1 *<< *3/0 << 3/' $(CONFIGDIR)/config.vh
|
sed -i 's/1 *<< *3/0 << 3/' $(CONFIGDIR)/config.vh
|
||||||
@ -147,4 +147,4 @@ clean:
|
|||||||
rm -f power.saif
|
rm -f power.saif
|
||||||
rm -f Synopsys_stack_trace_*.txt
|
rm -f Synopsys_stack_trace_*.txt
|
||||||
rm -f crte_*.txt
|
rm -f crte_*.txt
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ This subdirectory contains synthesis scripts for use with Synopsys
|
|||||||
scripts/synth.tcl.
|
scripts/synth.tcl.
|
||||||
|
|
||||||
Example Usage
|
Example Usage
|
||||||
make synth DESIGN=wallypipelinedcore FREQ=500
|
make synth DESIGN=wallypipelinedcore FREQ=500 CONFIG=rv32e
|
||||||
|
|
||||||
environment variables
|
environment variables
|
||||||
|
|
||||||
@ -38,5 +38,25 @@ To run ppa analysis that hones into target frequency, you can type:
|
|||||||
python3 ppa/ppaSynth.py from the synthDC directory. This runs a sweep
|
python3 ppa/ppaSynth.py from the synthDC directory. This runs a sweep
|
||||||
across all modules listed at the bottom of the ppaSynth.py file.
|
across all modules listed at the bottom of the ppaSynth.py file.
|
||||||
|
|
||||||
|
Two options for running the sweep. The first run runs all modules for
|
||||||
|
all techs around a given frequency (i.e., freqs). The second option
|
||||||
|
will run all designs for the specific module based on bestSynths.csv
|
||||||
|
values. Since the second option is 2nd, it has priority. If the
|
||||||
|
second set of values is commented out, it will run all widths.
|
||||||
|
|
||||||
|
WARNING: The first option may runs lots of runs that could expend all
|
||||||
|
the licenses available for a license. Therefore, care must be taken
|
||||||
|
to be sure that enough licenses are available for this first option.
|
||||||
|
|
||||||
|
##### Run specific syntheses
|
||||||
|
widths = [8, 16, 32, 64, 128]
|
||||||
|
modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8']
|
||||||
|
techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn']
|
||||||
|
freqs = [5000]
|
||||||
|
synthsToRun = allCombos(widths, modules, techs, freqs)
|
||||||
|
|
||||||
|
##### Run a sweep based on best delay found in existing syntheses
|
||||||
|
module = 'adder'
|
||||||
|
width = 32
|
||||||
|
tech = 'tsmc28psyn'
|
||||||
|
synthsToRun = freqSweep(module, width, tech)
|
@ -252,7 +252,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
TechSpec = namedtuple("TechSpec", "color shape targfreq fo4 add32area add32lpower add32denergy")
|
TechSpec = namedtuple("TechSpec", "color shape targfreq fo4 add32area add32lpower add32denergy")
|
||||||
techdict = {}
|
techdict = {}
|
||||||
techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 1440.600027, 714.057, 0.658023)
|
techdict['sky130'] = TechSpec('green', 'o', args.sky130freq, 99.5e-3, 2581, 18, 0.685)
|
||||||
techdict['sky90'] = TechSpec('gray', 'o', args.sky90freq, 43.2e-3, 1440.600027, 714.057, 0.658023)
|
techdict['sky90'] = TechSpec('gray', 'o', args.sky90freq, 43.2e-3, 1440.600027, 714.057, 0.658023)
|
||||||
techdict['tsmc28psyn'] = TechSpec('blue', 's', args.tsmcfreq, 12.2e-3, 209.286002, 1060.0, .081533)
|
techdict['tsmc28psyn'] = TechSpec('blue', 's', args.tsmcfreq, 12.2e-3, 209.286002, 1060.0, .081533)
|
||||||
|
|
||||||
|
@ -1,24 +1,74 @@
|
|||||||
Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (nJ)
|
Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (nJ)
|
||||||
priorityencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078
|
binencoder,sky130,8,1000,1.0000,50.960001,24.761,0.010685929975270078
|
||||||
priorityencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348
|
binencoder,sky130,16,1000,1.0000,136.220003,77.243,0.021773774467348
|
||||||
priorityencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111
|
binencoder,sky130,32,1000,1.0000,372.400007,189.626,0.04371111111111111
|
||||||
priorityencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981
|
binencoder,sky130,64,1000,1.0000,797.720015,382.205,0.07393850658857981
|
||||||
priorityencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861
|
binencoder,sky130,128,900,1.1111,1602.300031,610.009,0.1261366969785861
|
||||||
add,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422
|
adder,sky130,8,1700,0.588235,253.820005,154.438,0.10825587752870422
|
||||||
add,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417
|
adder,sky130,16,1300,0.7692307,722.260013,485.109,0.32460910944935417
|
||||||
add,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014
|
adder,sky130,32,1100,0.90909,1440.600027,714.057,0.6580226904376014
|
||||||
add,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874
|
adder,sky130,64,950,1.0526315,2781.240054,1050.0,0.9392239364188874
|
||||||
add,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755
|
adder,sky130,128,900,1.1111,6186.740118,2230.0,2.1480106100795755
|
||||||
|
csa,sky130,8,1000,1.0000,266.560005,154.202,0.13650573115665163
|
||||||
|
csa,sky130,16,1000,1.0000,533.12001,308.404,0.27263530601922104
|
||||||
|
csa,sky130,32,1000,1.0000,1066.240021,616.808,0.5448072247308093
|
||||||
|
csa,sky130,64,1000,1.0000,2132.480042,1230.0,1.0905412240768841
|
||||||
|
csa,sky130,128,1000,1.0000,4264.960083,2470.0,2.178553363682347
|
||||||
|
shifter,sky130,8,1000,1.0000,259.700005,196.451,0.07534088282874972
|
||||||
|
shifter,sky130,16,1000,1.0000,666.400006,558.433,0.19552906110283155
|
||||||
|
shifter,sky130,32,1000,1.0000,1475.880027,768.262,0.3807431082700759
|
||||||
|
shifter,sky130,64,1000,1.0000,3914.120062,2680.0,1.144802541988198
|
||||||
|
shifter,sky130,128,900,1.1111,9192.400136,6080.0,2.9008914525432616
|
||||||
|
comparator,sky130,8,1700,0.588235,200.900004,136.6,0.05001033271337053
|
||||||
|
comparator,sky130,16,1500,0.6666667,358.680007,189.253,0.06321553011448482
|
||||||
|
comparator,sky130,32,1300,0.7692307,690.900013,315.709,0.10771793448084398
|
||||||
|
comparator,sky130,64,1200,0.8333333,1372.980026,508.393,0.2048577820389901
|
||||||
|
comparator,sky130,128,1150,0.869565,2744.980052,796.047,0.34396273737011823
|
||||||
|
flop,sky130,8,1000,1.0000,133.279999,64.8145,0.193835
|
||||||
|
flop,sky130,16,1000,1.0000,266.5599975,129.629,0.38715000000000005
|
||||||
|
flop,sky130,32,1000,1.0000,533.119995,259.258,0.7723000000000001
|
||||||
|
flop,sky130,64,1000,1.0000,1066.23999,520.0,1.54955
|
||||||
|
flop,sky130,128,1000,1.0000,2132.4799805,1035.0,3.094
|
||||||
|
mux2,sky130,8,1000,1.0000,63.700001,21.541,0.01932440083034535
|
||||||
|
mux2,sky130,16,1000,1.0000,119.560002,32.354,0.03884536082474227
|
||||||
|
mux2,sky130,32,1000,1.0000,375.340008,259.372,0.13671796921846893
|
||||||
|
mux2,sky130,64,1000,1.0000,479.220009,115.22,0.15148539160324087
|
||||||
|
mux2,sky130,128,1000,1.0000,1302.420025,767.078,0.4665334665334665
|
||||||
|
mux4,sky130,8,1000,1.0000,148.960002,66.984,0.04026661024121879
|
||||||
|
mux4,sky130,16,1000,1.0000,392.0,398.313,0.1037037037037037
|
||||||
|
mux4,sky130,32,1000,1.0000,594.860011,331.197,0.131617289946576
|
||||||
|
mux4,sky130,64,1000,1.0000,899.640016,344.331,0.2862533692722372
|
||||||
|
mux4,sky130,128,1000,1.0000,2013.900038,818.249,0.6094182825484764
|
||||||
|
mux8,sky130,8,1000,1.0000,287.140006,116.648,0.06089260808926081
|
||||||
|
mux8,sky130,16,1000,1.0000,582.120003,282.366,0.14455681142177274
|
||||||
|
mux8,sky130,32,1000,1.0000,1319.079995,670.683,0.35777218376337316
|
||||||
|
mux8,sky130,64,1000,1.0000,2132.48004,808.482,0.44287680660701995
|
||||||
|
mux8,sky130,128,1000,1.0000,4575.620089,1830.0,0.9786276715410572
|
||||||
|
mul,sky130,8,1000,1.0000,2194.220041,1440.0,1.421374045801527
|
||||||
|
mul,sky130,16,1000,1.0000,7519.540137,4940.0,6.376128385155466
|
||||||
|
mul,sky130,32,1000,1.0000,25200.700446,14900.0,24.931847968545217
|
||||||
|
mul,sky130,64,1000,1.0000,86011.661365,42600.0,88.84651898734177
|
||||||
|
mul,sky130,128,800,1.2500,296198.144128,114000.0,273.3148854961832
|
||||||
|
binencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078
|
||||||
|
binencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348
|
||||||
|
binencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111
|
||||||
|
binencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981
|
||||||
|
binencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861
|
||||||
|
adder,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422
|
||||||
|
adder,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417
|
||||||
|
adder,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014
|
||||||
|
adder,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874
|
||||||
|
adder,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755
|
||||||
csa,sky90,8,5758,0.16536141368530738,266.560005,154.202,0.13650573115665163
|
csa,sky90,8,5758,0.16536141368530738,266.560005,154.202,0.13650573115665163
|
||||||
csa,sky90,16,5931,0.1654056314280897,533.12001,308.404,0.27263530601922104
|
csa,sky90,16,5931,0.1654056314280897,533.12001,308.404,0.27263530601922104
|
||||||
csa,sky90,32,5758,0.16536141368530738,1066.240021,616.808,0.5448072247308093
|
csa,sky90,32,5758,0.16536141368530738,1066.240021,616.808,0.5448072247308093
|
||||||
csa,sky90,64,5931,0.1654056314280897,2132.480042,1230.0,1.0905412240768841
|
csa,sky90,64,5931,0.1654056314280897,2132.480042,1230.0,1.0905412240768841
|
||||||
csa,sky90,128,5931,0.1654056314280897,4264.960083,2470.0,2.178553363682347
|
csa,sky90,128,5931,0.1654056314280897,4264.960083,2470.0,2.178553363682347
|
||||||
shiftleft,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972
|
shifter,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972
|
||||||
shiftleft,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155
|
shifter,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155
|
||||||
shiftleft,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759
|
shifter,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759
|
||||||
shiftleft,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198
|
shifter,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198
|
||||||
shiftleft,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616
|
shifter,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616
|
||||||
comparator,sky90,8,4839,0.20629126741062204,200.900004,136.6,0.05001033271337053
|
comparator,sky90,8,4839,0.20629126741062204,200.900004,136.6,0.05001033271337053
|
||||||
comparator,sky90,16,4018,0.24806303982080635,358.680007,189.253,0.06321553011448482
|
comparator,sky90,16,4018,0.24806303982080635,358.680007,189.253,0.06321553011448482
|
||||||
comparator,sky90,32,3602,0.276293542476402,690.900013,315.709,0.10771793448084398
|
comparator,sky90,32,3602,0.276293542476402,690.900013,315.709,0.10771793448084398
|
||||||
@ -44,31 +94,31 @@ mux8,sky90,16,3362,0.295237998810232,582.120003,282.366,0.14455681142177274
|
|||||||
mux8,sky90,32,3178,0.3140553102580239,1319.079995,670.683,0.35777218376337316
|
mux8,sky90,32,3178,0.3140553102580239,1319.079995,670.683,0.35777218376337316
|
||||||
mux8,sky90,64,2906,0.3440756228492774,2132.48004,808.482,0.44287680660701995
|
mux8,sky90,64,2906,0.3440756228492774,2132.48004,808.482,0.44287680660701995
|
||||||
mux8,sky90,128,2667,0.3749401308586427,4575.620089,1830.0,0.9786276715410572
|
mux8,sky90,128,2667,0.3749401308586427,4575.620089,1830.0,0.9786276715410572
|
||||||
mult,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527
|
mul,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527
|
||||||
mult,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466
|
mul,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466
|
||||||
mult,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217
|
mul,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217
|
||||||
mult,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177
|
mul,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177
|
||||||
mult,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832
|
mul,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832
|
||||||
priorityencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
|
binencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
|
||||||
priorityencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
|
binencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
|
||||||
priorityencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
|
binencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
|
||||||
priorityencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
|
binencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
|
||||||
priorityencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
|
binencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
|
||||||
add,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
|
adder,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
|
||||||
add,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
|
adder,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
|
||||||
add,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
|
adder,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
|
||||||
add,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
|
adder,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
|
||||||
add,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731
|
adder,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731
|
||||||
csa,tsmc28,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921
|
csa,tsmc28,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921
|
||||||
csa,tsmc28,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842
|
csa,tsmc28,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842
|
||||||
csa,tsmc28,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941
|
csa,tsmc28,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941
|
||||||
csa,tsmc28,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076
|
csa,tsmc28,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076
|
||||||
csa,tsmc28,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363
|
csa,tsmc28,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363
|
||||||
shiftleft,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
|
shifter,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
|
||||||
shiftleft,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
|
shifter,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
|
||||||
shiftleft,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
|
shifter,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
|
||||||
shiftleft,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
|
shifter,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
|
||||||
shiftleft,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
|
shifter,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
|
||||||
comparator,tsmc28,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243
|
comparator,tsmc28,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243
|
||||||
comparator,tsmc28,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673
|
comparator,tsmc28,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673
|
||||||
comparator,tsmc28,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319
|
comparator,tsmc28,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319
|
||||||
@ -94,8 +144,58 @@ mux8,tsmc28,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262
|
|||||||
mux8,tsmc28,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814
|
mux8,tsmc28,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814
|
||||||
mux8,tsmc28,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495
|
mux8,tsmc28,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495
|
||||||
mux8,tsmc28,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426
|
mux8,tsmc28,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426
|
||||||
mult,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
|
mul,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
|
||||||
mult,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
|
mul,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
|
||||||
mult,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
|
mul,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
|
||||||
mult,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
|
mul,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
|
||||||
mult,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719
|
mul,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719
|
||||||
|
binencoder,tsmc28psyn,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
|
||||||
|
binencoder,tsmc28psyn,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
|
||||||
|
binencoder,tsmc28psyn,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
|
||||||
|
binencoder,tsmc28psyn,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
|
||||||
|
binencoder,tsmc28psyn,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
|
||||||
|
adder,tsmc28psyn,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
|
||||||
|
adder,tsmc28psyn,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
|
||||||
|
adder,tsmc28psyn,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
|
||||||
|
adder,tsmc28psyn,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
|
||||||
|
adder,tsmc28psyn,128,7000,0.142857142857,907.452008,4360.0,0.3451183029643731
|
||||||
|
csa,tsmc28psyn,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921
|
||||||
|
csa,tsmc28psyn,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842
|
||||||
|
csa,tsmc28psyn,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941
|
||||||
|
csa,tsmc28psyn,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076
|
||||||
|
csa,tsmc28psyn,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363
|
||||||
|
shifter,tsmc28psyn,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
|
||||||
|
shifter,tsmc28psyn,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
|
||||||
|
shifter,tsmc28psyn,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
|
||||||
|
shifter,tsmc28psyn,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
|
||||||
|
shifter,tsmc28psyn,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
|
||||||
|
comparator,tsmc28psyn,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243
|
||||||
|
comparator,tsmc28psyn,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673
|
||||||
|
comparator,tsmc28psyn,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319
|
||||||
|
comparator,tsmc28psyn,64,11080,0.09024670758122744,294.21,1250.0,0.0684115523465704
|
||||||
|
comparator,tsmc28psyn,128,9371,0.10671119720414043,558.432,2400.0,0.12794792444776437
|
||||||
|
flop,tsmc28psyn,8,10,0.048889000000002625,15.12,78.6345,0.027246000000000003
|
||||||
|
flop,tsmc28psyn,16,10,0.048889000000002625,30.24,157.29,0.054290000000000005
|
||||||
|
flop,tsmc28psyn,32,10,0.048889000000002625,60.4799995,314.5805,0.10908000000000001
|
||||||
|
flop,tsmc28psyn,64,10,0.048889000000002625,120.959999,630.0,0.21765500000000004
|
||||||
|
flop,tsmc28psyn,128,10,0.048889000000002625,241.919998,1260.0,0.43579999999999997
|
||||||
|
mux2,tsmc28psyn,8,29614,0.03374481252110488,16.758,114.564,0.005436617815897886
|
||||||
|
mux2,tsmc28psyn,16,18767,0.053046021580433735,15.75,88.025,0.005142004582511856
|
||||||
|
mux2,tsmc28psyn,32,17903,0.05585556035301346,32.130001,171.146,0.009897782494553985
|
||||||
|
mux2,tsmc28psyn,64,18568,0.05371109651012495,91.35,523.884,0.027574321413183972
|
||||||
|
mux2,tsmc28psyn,128,16637,0.05991099044298852,176.525999,941.106,0.05012923002945243
|
||||||
|
mux4,tsmc28psyn,8,18151,0.055092383284667513,27.971999,133.963,0.008032615282904523
|
||||||
|
mux4,tsmc28psyn,16,16486,0.06057952759917506,39.438,186.231,0.012556108213029236
|
||||||
|
mux4,tsmc28psyn,32,15196,0.06580579126085812,69.174,324.969,0.023229797315082915
|
||||||
|
mux4,tsmc28psyn,64,13926,0.07180612868016659,137.465999,648.086,0.04574177796926612
|
||||||
|
mux4,tsmc28psyn,128,13090,0.07636619404125286,294.335997,1420.0,0.09358288770053477
|
||||||
|
mux8,tsmc28psyn,8,12902,0.07750336319950395,44.604,214.286,0.0117501162610448
|
||||||
|
mux8,tsmc28psyn,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262
|
||||||
|
mux8,tsmc28psyn,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814
|
||||||
|
mux8,tsmc28psyn,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495
|
||||||
|
mux8,tsmc28psyn,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426
|
||||||
|
mul,tsmc28psyn,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
|
||||||
|
mul,tsmc28psyn,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
|
||||||
|
mul,tsmc28psyn,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
|
||||||
|
mul,tsmc28psyn,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
|
||||||
|
mul,tsmc28psyn,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -12,13 +12,11 @@ from ppaAnalyze import synthsfromcsv
|
|||||||
|
|
||||||
def runCommand(module, width, tech, freq):
|
def runCommand(module, width, tech, freq):
|
||||||
command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq)
|
command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq)
|
||||||
print('here we go')
|
subprocess.call(command, shell=True)
|
||||||
|
|
||||||
subprocess.Popen(command, shell=True)
|
|
||||||
|
|
||||||
def deleteRedundant(synthsToRun):
|
def deleteRedundant(synthsToRun):
|
||||||
'''removes any previous runs for the current synthesis specifications'''
|
'''removes any previous runs for the current synthesis specifications'''
|
||||||
synthStr = "rm -rf runs/ppa_{}_{}_rv32e_{}nm_{}_*"
|
synthStr = "rm -rf runs/{}_{}_rv32e_{}_{}_*"
|
||||||
for synth in synthsToRun:
|
for synth in synthsToRun:
|
||||||
bashCommand = synthStr.format(*synth)
|
bashCommand = synthStr.format(*synth)
|
||||||
outputCPL = subprocess.check_output(['bash','-c', bashCommand])
|
outputCPL = subprocess.check_output(['bash','-c', bashCommand])
|
||||||
@ -34,8 +32,21 @@ def freqSweep(module, width, tech):
|
|||||||
synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]]
|
synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]]
|
||||||
return synthsToRun
|
return synthsToRun
|
||||||
|
|
||||||
|
def freqModuleSweep(widths, modules, tech):
|
||||||
|
synthsToRun = []
|
||||||
|
arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8]
|
||||||
|
allSynths = synthsfromcsv('ppa/bestSynths.csv')
|
||||||
|
for w in widths:
|
||||||
|
for module in modules:
|
||||||
|
for synth in allSynths:
|
||||||
|
if (synth.module == str(module)) & (synth.tech == tech) & (synth.width == w):
|
||||||
|
f = 1000/synth.delay
|
||||||
|
for freq in [round(f+f*x/100) for x in arr]:
|
||||||
|
synthsToRun += [[synth.module, str(synth.width), synth.tech, str(freq)]]
|
||||||
|
return synthsToRun
|
||||||
|
|
||||||
def filterRedundant(synthsToRun):
|
def filterRedundant(synthsToRun):
|
||||||
bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
|
bashCommand = "find . -path '*runs/*' -prune"
|
||||||
output = subprocess.check_output(['bash','-c', bashCommand])
|
output = subprocess.check_output(['bash','-c', bashCommand])
|
||||||
specReg = re.compile('[a-zA-Z0-9]+')
|
specReg = re.compile('[a-zA-Z0-9]+')
|
||||||
allSynths = output.decode("utf-8").split('\n')[:-1]
|
allSynths = output.decode("utf-8").split('\n')[:-1]
|
||||||
@ -59,21 +70,30 @@ def allCombos(widths, modules, techs, freqs):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
##### Run specific syntheses
|
##### Run specific syntheses for a specific frequency
|
||||||
widths = [8, 16, 32, 64, 128]
|
widths = [8, 16, 32, 64, 128]
|
||||||
modules = ['mult', 'add', 'shiftleft', 'flop', 'comparator', 'priorityencoder', 'add', 'csa', 'mux2', 'mux4', 'mux8']
|
modules = ['mul', 'adder', 'shifter', 'flop', 'comparator', 'binencoder', 'csa', 'mux2', 'mux4', 'mux8']
|
||||||
techs = ['sky90', 'tsmc28']
|
techs = ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn']
|
||||||
freqs = [5000]
|
freqs = [5000]
|
||||||
synthsToRun = allCombos(widths, modules, techs, freqs)
|
synthsToRun = allCombos(widths, modules, techs, freqs)
|
||||||
|
|
||||||
##### Run a sweep based on best delay found in existing syntheses
|
##### Run a sweep based on best delay found in existing syntheses
|
||||||
module = 'add'
|
module = 'adder'
|
||||||
width = 32
|
width = 32
|
||||||
tech = 'sky90'
|
tech = 'tsmc28psyn'
|
||||||
synthsToRun = freqSweep(module, width, tech)
|
synthsToRun = freqSweep(module, width, tech)
|
||||||
|
|
||||||
|
##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses
|
||||||
|
modules = ['adder']
|
||||||
|
# widths = [8, 16, 32, 64, 128]
|
||||||
|
widths = [32]
|
||||||
|
tech = 'sky130'
|
||||||
|
synthsToRun = freqModuleSweep(widths, modules, tech)
|
||||||
|
|
||||||
##### Only do syntheses for which a run doesn't already exist
|
##### Only do syntheses for which a run doesn't already exist
|
||||||
synthsToRun = filterRedundant(synthsToRun)
|
synthsToRun = filterRedundant(synthsToRun)
|
||||||
|
|
||||||
pool = Pool(processes=25)
|
pool = Pool(processes=25)
|
||||||
pool.starmap(runCommand, synthsToRun)
|
|
||||||
|
pool.starmap(runCommand, synthsToRun)
|
||||||
|
pool.close()
|
||||||
|
pool.join()
|
@ -18,7 +18,6 @@ suppress_message {VER-274}
|
|||||||
# Enable Multicore
|
# Enable Multicore
|
||||||
set_host_options -max_cores $::env(MAXCORES)
|
set_host_options -max_cores $::env(MAXCORES)
|
||||||
|
|
||||||
|
|
||||||
# get outputDir and configDir from environment (Makefile)
|
# get outputDir and configDir from environment (Makefile)
|
||||||
set outputDir $::env(OUTPUTDIR)
|
set outputDir $::env(OUTPUTDIR)
|
||||||
set cfg $::env(CONFIGDIR)
|
set cfg $::env(CONFIGDIR)
|
||||||
@ -26,6 +25,7 @@ set hdl_src "../src"
|
|||||||
set saifpower $::env(SAIFPOWER)
|
set saifpower $::env(SAIFPOWER)
|
||||||
set maxopt $::env(MAXOPT)
|
set maxopt $::env(MAXOPT)
|
||||||
set drive $::env(DRIVE)
|
set drive $::env(DRIVE)
|
||||||
|
set width $::env(WIDTH)
|
||||||
|
|
||||||
eval file copy -force [glob ${cfg}/*.vh] {$outputDir/hdl/}
|
eval file copy -force [glob ${cfg}/*.vh] {$outputDir/hdl/}
|
||||||
eval file copy -force [glob ${hdl_src}/cvw.sv] {$outputDir/hdl/}
|
eval file copy -force [glob ${hdl_src}/cvw.sv] {$outputDir/hdl/}
|
||||||
@ -88,7 +88,13 @@ if { [shell_is_in_topographical_mode] } {
|
|||||||
#set alib_library_analysis_path ./$outputDir
|
#set alib_library_analysis_path ./$outputDir
|
||||||
define_design_lib WORK -path ./$outputDir/WORK
|
define_design_lib WORK -path ./$outputDir/WORK
|
||||||
analyze -f sverilog -lib WORK $my_verilog_files
|
analyze -f sverilog -lib WORK $my_verilog_files
|
||||||
elaborate $my_toplevel -lib WORK
|
# If wrapper=0, we want to run against a specific module and pass
|
||||||
|
# width to DC
|
||||||
|
if { $wrapper == 1 } {
|
||||||
|
elaborate $my_toplevel -lib WORK
|
||||||
|
} else {
|
||||||
|
elaborate $my_toplevel -lib WORK -parameters WIDTH=$width
|
||||||
|
}
|
||||||
|
|
||||||
# Set the current_design
|
# Set the current_design
|
||||||
current_design $my_toplevel
|
current_design $my_toplevel
|
||||||
@ -447,4 +453,4 @@ set t2 [clock seconds]
|
|||||||
set t [expr $t2 - $t1]
|
set t [expr $t2 - $t1]
|
||||||
echo [expr $t/60]
|
echo [expr $t/60]
|
||||||
|
|
||||||
quit
|
quit
|
||||||
|
14
synthDC/wallySynthAll.sh
Executable file
14
synthDC/wallySynthAll.sh
Executable file
@ -0,0 +1,14 @@
|
|||||||
|
# Run all Wally synthesis experiments from chapter 8
|
||||||
|
# However, trying to run the freqsweeps at the same time maxes out licenses and some runs fail
|
||||||
|
#./wallySynth.py --freqsweep 330 --tech sky130
|
||||||
|
#./wallySynth.py --freqsweep 870 --tech sky90
|
||||||
|
#./wallySynth.py --freqsweep 2800 --tech tsmc28psyn --usesram
|
||||||
|
./wallySynth.py --configsweep --tech sky130 --targetfreq 330
|
||||||
|
./wallySynth.py --configsweep --tech sky90 --targetfreq 870
|
||||||
|
./wallySynth.py --configsweep --tech tsmc28psyn --targetfreq 2800 --usesram
|
||||||
|
./wallySynth.py --featuresweep --tech sky130 --targetfreq 330
|
||||||
|
./wallySynth.py --featuresweep --tech sky90 --targetfreq 870
|
||||||
|
./wallySynth.py --featuresweep --tech tsmc28psyn --targetfreq 2800 --usesram
|
||||||
|
# Extract summary data (run this by hand after all experiments finish)
|
||||||
|
#./extractSummary.py --sky130freq 330 --sky90freq 870 --tsmcfreq 2800
|
||||||
|
|
@ -115,8 +115,8 @@ module testbenchfp;
|
|||||||
logic FlushE;
|
logic FlushE;
|
||||||
logic IFDivStartE;
|
logic IFDivStartE;
|
||||||
logic FDivDoneE;
|
logic FDivDoneE;
|
||||||
logic [P.NE+1:0] QeM;
|
logic [P.NE+1:0] UeM;
|
||||||
logic [P.DIVb:0] QmM;
|
logic [P.DIVb:0] UmM;
|
||||||
logic [P.XLEN-1:0] FIntDivResultM;
|
logic [P.XLEN-1:0] FIntDivResultM;
|
||||||
logic ResMatch; // Check if result match
|
logic ResMatch; // Check if result match
|
||||||
logic FlagMatch; // Check if IEEE flags match
|
logic FlagMatch; // Check if IEEE flags match
|
||||||
@ -145,9 +145,12 @@ module testbenchfp;
|
|||||||
|
|
||||||
initial begin
|
initial begin
|
||||||
// Information displayed for user on what is simulating
|
// Information displayed for user on what is simulating
|
||||||
$display("\nThe start of simulation...");
|
//$display("\nThe start of simulation...");
|
||||||
$display("This simulation for TEST is %s", TEST);
|
//$display("This simulation for TEST is %s", TEST);
|
||||||
$display("This simulation for TEST is of the operand size of %s", TEST_SIZE);
|
//$display("This simulation for TEST is of the operand size of %s", TEST_SIZE);
|
||||||
|
|
||||||
|
// $display("FPDUR %d %d DIVN %d LOGR %d RK %d RADIX %d DURLEN %d", FPDUR, DIVN, LOGR, RK, RADIX, DURLEN);
|
||||||
|
|
||||||
if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported
|
if (P.Q_SUPPORTED & (TEST_SIZE == "QP" | TEST_SIZE == "all")) begin // if Quad percision is supported
|
||||||
if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion
|
if (TEST === "cvtint" | TEST === "all") begin // if testing integer conversion
|
||||||
// add the 128-bit cvtint tests to the to-be-tested list
|
// add the 128-bit cvtint tests to the to-be-tested list
|
||||||
@ -649,7 +652,7 @@ module testbenchfp;
|
|||||||
string tt0;
|
string tt0;
|
||||||
tt0 = $psprintf("%s", Tests[TestNum]);
|
tt0 = $psprintf("%s", Tests[TestNum]);
|
||||||
testname = {pp, tt0};
|
testname = {pp, tt0};
|
||||||
$display("Here you are %s", testname);
|
//$display("Here you are %s", testname);
|
||||||
$display("\n\nRunning %s vectors ", Tests[TestNum]);
|
$display("\n\nRunning %s vectors ", Tests[TestNum]);
|
||||||
$readmemh(testname, TestVectors);
|
$readmemh(testname, TestVectors);
|
||||||
// set the test index to 0
|
// set the test index to 0
|
||||||
@ -705,7 +708,7 @@ module testbenchfp;
|
|||||||
end
|
end
|
||||||
|
|
||||||
postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
|
postprocess #(P) postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
|
||||||
.OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
|
.OpCtrl(OpCtrlVal), .DivUm(Quot), .DivUe(DivCalcExp),
|
||||||
.Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
|
.Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), .FmaSs(Ss),
|
||||||
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
|
.XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResSubnormUf(CvtResSubnormUfE),
|
||||||
.XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
|
.XZero(XZero), .YZero(YZero), .CvtShiftAmt(CvtShiftAmtE),
|
||||||
@ -734,8 +737,8 @@ module testbenchfp;
|
|||||||
.XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero),
|
.XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero),
|
||||||
.XNaNE(XNaN), .YNaNE(YNaN),
|
.XNaNE(XNaN), .YNaNE(YNaN),
|
||||||
.FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
|
.FDivStartE(DivStart), .IDivStartE(1'b0), .W64E(1'b0),
|
||||||
.StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .QeM(DivCalcExp),
|
.StallM(1'b0), .DivStickyM(DivSticky), .FDivBusyE, .UeM(DivCalcExp),
|
||||||
.QmM(Quot),
|
.UmM(Quot),
|
||||||
.FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
|
.FlushE(1'b0), .ForwardedSrcAE('0), .ForwardedSrcBE('0), .Funct3M(Funct3M),
|
||||||
.Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
|
.Funct3E(Funct3E), .IntDivE(1'b0), .FIntDivResultM(FIntDivResultM),
|
||||||
.FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
|
.FDivDoneE(FDivDoneE), .IFDivStartE(IFDivStartE));
|
||||||
|
@ -389,6 +389,7 @@ module testbench;
|
|||||||
|
|
||||||
assign SDCCmd = SDCCmdOE ? SDCCmdOut : 1'bz;
|
assign SDCCmd = SDCCmdOE ? SDCCmdOut : 1'bz;
|
||||||
assign SDCCmdIn = SDCCmd;
|
assign SDCCmdIn = SDCCmd;
|
||||||
|
assign SDCDat = sd_dat_reg_t ? sd_dat_reg_o : sd_dat_i;
|
||||||
assign SDCDatIn = SDCDat;
|
assign SDCDatIn = SDCDat;
|
||||||
-----/\----- EXCLUDED -----/\----- */
|
-----/\----- EXCLUDED -----/\----- */
|
||||||
assign SDCIntr = '0;
|
assign SDCIntr = '0;
|
||||||
|
Loading…
Reference in New Issue
Block a user