This commit is contained in:
David Harris 2022-06-21 22:45:28 +00:00
commit d865a1ce95
70 changed files with 7197 additions and 2392 deletions

2
.gitignore vendored
View File

@ -32,7 +32,7 @@ testsBP/*/*/*.elf*
testsBP/*/OBJ/*
testsBP/*/*.a
tests/wally-riscv-arch-test/riscv-test-suite/*/I/*/*
tests/riscof/riscof_work*/*
tests/riscof/riscof_work/
tests/riscof/config32.ini
tests/riscof/config64.ini
tests/linux-testgen/linux-testvectors/*

View File

@ -4,20 +4,29 @@
embench_dir = ../../addins/embench-iot
all: sim size
all: build sim size
allClean: clean all
build: buildspeed buildsize
buildspeed: build_speedopt_speed build_sizeopt_speed
buildsize: build_speedopt_size build_sizeopt_size
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed
buildspeed:
$(embench_dir)/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
find $(embench_dir)/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
build_speedopt_speed:
$(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
find $(embench_dir)/bd_speedopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for size
buildsize:
$(embench_dir)/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
build_sizeopt_speed:
$(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-Os -nostartfiles"
find $(embench_dir)/bd_sizeopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
build_speedopt_size:
$(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-O2 -msave-restore" --dummy-libs="libgcc libm libc crt0"
build_sizeopt_size:
$(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
# builds dependencies, then launches modelsim and finally runs python wrapper script to present results
sim: modelsim_build_memfile modelsim_run speed
@ -28,35 +37,37 @@ modelsim_run:
cd ../../benchmarks/embench/
# builds the objdump based on the compiled c elf files
objdump: buildspeed
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
objdump:
find $(embench_dir)/bd_*_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
# build memfiles, objdump.lab and objdump.addr files
modelsim_build_memfile: objdump
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find $(embench_dir)/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
find $(embench_dir)/bd_*_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find $(embench_dir)/bd_*_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
# builds the tests for speed, runs them on spike and then launches python script to present results
# note that the speed python script benchmark_speed.py can get confused if there's both a .output file created from spike and modelsim
# you'll need to manually remove one of the two .output files, or run make clean
spike: buildspeed objdump spike_run speed
spike: buildspeed spike_run speed
# command to run spike on all of the benchmarks
spike_run:
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
find $(embench_dir)/bd_*opt_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
# python wrapper to present results of embench size benchmark
size: buildsize
$(embench_dir)/benchmark_size.py --builddir=bd_size --json-output > wallySize.json
$(embench_dir)/benchmark_size.py --builddir=bd_speedopt_size --json-output > wallySpeedOpt_size.json
$(embench_dir)/benchmark_size.py --builddir=bd_sizeopt_size --json-output > wallySizeOpt_size.json
# python wrapper to present results of embench speed benchmark
speed:
$(embench_dir)/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySpeed.json
$(embench_dir)/benchmark_speed.py --builddir=bd_sizeopt_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySizeOpt_speed.json
$(embench_dir)/benchmark_speed.py --builddir=bd_speedopt_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySpeedOpt_speed.json
# deletes all files
clean:
rm -rf $(embench_dir)/bd_speed/
rm -rf $(embench_dir)/bd_size/
rm -rf $(embench_dir)/bd_*_speed/
rm -rf $(embench_dir)/bd_*_size/
allclean: clean
rm -rf $(embench_dir)/logs/

View File

@ -3,9 +3,8 @@ import subprocess
import sys
import json
import plotly.graph_objects as go
from plotly.subplots import make_subplots
coremarkData = {}
embenchData = {}
debug = True
def loadCoremark():
@ -21,61 +20,85 @@ def loadCoremark():
if (debug): print(coremarkData)
return coremarkData
def loadEmbench():
def loadEmbench(embenchPath, embenchData):
"""loads the embench data dictionary"""
embenchPath = "embench/wallySpeed.json"
f = open(embenchPath)
embenchData = json.load(f)
if (debug): print(embenchData)
return embenchData
def graphEmbench(embenchData):
ydata = list(embenchData["speed results"]["detailed speed results"].keys()) + ["speed geometric mean","speed geometric sd","speed geometric range"]
xdata = list(embenchData["speed results"]["detailed speed results"].values()) + [embenchData["speed results"]["speed geometric mean"],embenchData["speed results"]["speed geometric sd"],embenchData["speed results"]["speed geometric range"]]
fig = go.Figure(go.Bar(
def graphEmbench(embenchSpeedOpt_SpeedData, embenchSizeOpt_SpeedData, embenchSpeedOpt_SizeData, embenchSizeOpt_SizeData):
fig = make_subplots(rows=2, cols=4,
# subplot_titles( "Wally's Embench Cycles and Instret (with -O2)","Wally's Embench Cycles Per Instruction (with -O2)"))
subplot_titles=( "Wally's Embench Cycles and Instret (with -O2)","Wally's Embench Cycles Per Instruction (with -O2)","Wally's Embench Speed Score (with -O2)","Wally's Embench Size Score (with -O2)",
"Wally's Embench Cycles and Instret (with -Os)","Wally's Embench Cycles Per Instruction (with -Os)","Wally's Embench Speed Score (with -Os)","Wally's Embench Size Score (with -Os)"))
ydata = list(embenchSpeedOpt_SpeedData["speed results"]["detailed speed results"].keys()) + ["speed geometric mean","speed geometric sd","speed geometric range"]
xdata = list(embenchSpeedOpt_SpeedData["speed results"]["detailed speed results"].values()) + [embenchSpeedOpt_SpeedData["speed results"]["speed geometric mean"],embenchSpeedOpt_SpeedData["speed results"]["speed geometric sd"],embenchSpeedOpt_SpeedData["speed results"]["speed geometric range"]]
fig.add_trace( go.Bar(
y=ydata,
x=xdata,
orientation='h'))
textposition='outside', text=xdata,
orientation='h'),
row=1,col=3)
fig.show()
ydata = list(embenchSizeOpt_SpeedData["speed results"]["detailed speed results"].keys()) + ["speed geometric mean","speed geometric sd","speed geometric range"]
xdata = list(embenchSizeOpt_SpeedData["speed results"]["detailed speed results"].values()) + [embenchSizeOpt_SpeedData["speed results"]["speed geometric mean"],embenchSizeOpt_SpeedData["speed results"]["speed geometric sd"],embenchSizeOpt_SpeedData["speed results"]["speed geometric range"]]
fig.add_trace( go.Bar(
y=ydata,
x=xdata,
textposition='outside', text=xdata,
orientation='h'),
row=2,col=3)
ydata = list(embenchSpeedOpt_SizeData["size results"]["detailed size results"].keys()) + ["size geometric mean","size geometric sd","size geometric range"]
xdata = list(embenchSpeedOpt_SizeData["size results"]["detailed size results"].values()) + [embenchSpeedOpt_SizeData["size results"]["size geometric mean"],embenchSpeedOpt_SizeData["size results"]["size geometric sd"],embenchSpeedOpt_SizeData["size results"]["size geometric range"]]
fig.add_trace( go.Bar(
y=ydata,
x=xdata,
textposition='outside', text=xdata,
orientation='h'),
row=1,col=4)
ydata = list(embenchSizeOpt_SizeData["size results"]["detailed size results"].keys()) + ["size geometric mean","size geometric sd","size geometric range"]
xdata = list(embenchSizeOpt_SizeData["size results"]["detailed size results"].values()) + [embenchSizeOpt_SizeData["size results"]["size geometric mean"],embenchSizeOpt_SizeData["size results"]["size geometric sd"],embenchSizeOpt_SizeData["size results"]["size geometric range"]]
fig.add_trace( go.Bar(
y=ydata,
x=xdata,
textposition='outside', text=xdata,
orientation='h'),
row=2,col=4)
# facet_row="Score", facet_col="Optimization Flag",
# category_orders={"Score": ["Cycles & Instr", "CPI", "SpeedScore", "SizeScore"],
# "Optimization Flag": ["O2", "Os"]}),
# orientation='h')
fig.update_layout(height=1500,width=4000, title_text="Wally Embench Scores", showlegend=False)
fig.write_image("figure.png", engine="kaleido")
# fig.show()
def main():
coremarkData = loadCoremark()
embenchData = loadEmbench()
graphEmbench(embenchData)
coremarkData = {}
embenchSizeOpt_SpeedData = {}
embenchSpeedOpt_SpeedData = {}
embenchSizeOpt_SizeData = {}
embenchSpeedOpt_SizeData = {}
# coremarkData = loadCoremark()
embenchSpeedOpt_SpeedData = loadEmbench("embench/wallySpeedOpt_speed.json", embenchSpeedOpt_SpeedData)
embenchSizeOpt_SpeedData = loadEmbench("embench/wallySizeOpt_speed.json", embenchSizeOpt_SpeedData)
embenchSpeedOpt_SizeData = loadEmbench("embench/wallySpeedOpt_size.json", embenchSpeedOpt_SizeData)
embenchSizeOpt_SizeData = loadEmbench("embench/wallySizeOpt_size.json", embenchSizeOpt_SizeData)
graphEmbench(embenchSpeedOpt_SpeedData, embenchSizeOpt_SpeedData, embenchSpeedOpt_SizeData, embenchSizeOpt_SizeData)
if __name__ == '__main__':
sys.exit(main())
# x =
# y =
# df = px.data.tips()
# fig = px.bar(df, x="total_bill", y="day", orientation='h')
# fig.show()
# import plotly.express as px
# result = sp.run(['ls', '-l'], stdout=sp.PIPE)
# result.stdout
# fig = go.Figure( go.Bar(
# x=[],
# y=[],
# color="species",
# facet_col="species",
# title="Using update_traces() With Plotly Express Figures"),
# orientation='h')
# fig.show()
#
# "ls -Art ../addins/embench-iot/logs/*speed* | tail -n 1 " # gets most recent embench speed log
# "ls -Art ../addins/embench-iot/logs/*size* | tail -n 1 " # gets most recent embench speed log
## get coremark score
# cat coremarkPath | grep "CoreMark 1.0" | cut -d ':' -f 2 | cut -d " " -f 2
# cat coremarkPath | grep "MTIME" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1
# cat coremarkPath | grep "MINSTRET" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1
# "ls -Art ../addins/embench-iot/logs/*speed* | tail -n 1 " # gets most recent embench speed log

View File

@ -95,6 +95,7 @@
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))

View File

@ -3,7 +3,7 @@ make allclean:
make all
make clean:
make clean -C ../../addins/riscv-arch-test
make clean -C ../../tests/riscof
make clean -C ../../tests/wally-riscv-arch-test
# make allclean -C ../../tests/imperas-riscv-tests
@ -15,8 +15,8 @@ make all:
#make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs
# Build riscv-arch-test 64 and 32-bit versions
make -C ../../addins/riscv-arch-test --jobs
make -C ../../addins/riscv-arch-test XLEN=32 --jobs
make -C ../../tests/riscof/ --jobs
make -C ../../tests/riscof/ XLEN=32 --jobs
# Build wally-riscv-arch-test
make -C ../../tests/wally-riscv-arch-test/ --jobs

View File

@ -1,6 +1,6 @@
ROOT := ../..
SUFFIX := work
ARCHDIR := $(ROOT)/addins/riscv-arch-test
ARCHDIR := $(ROOT)/tests/riscof
WALLYDIR:= $(ROOT)/tests/wally-riscv-arch-test
# IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests
# ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) $(IMPERASDIR)/$(SUFFIX)
@ -8,8 +8,9 @@ IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests
ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX)
ELFFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf")
OBJDUMPFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump")
MEMFILES ?= $(ELFFILES:.elf=.elf.memfile)
ADDRFILES ?= $(ELFFILES:.elf=.elf.objdump.addr)
ADDRFILES ?= $(OBJDUMPFILES:.objdump=.objdump.addr)
print:
echo "files in $(ALLDIRS) are $(ELFFILES)."

View File

@ -121,11 +121,11 @@ module fctrl (
assign FmtD = 0;
else if (`FPSIZES == 2)begin
logic [1:0] FmtTmp;
assign FmtTmp = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = (`FMT == FmtTmp);
end
else if (`FPSIZES == 3|`FPSIZES == 4)
assign FmtD = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
// Final Res Sel:
// fp int

View File

@ -34,13 +34,14 @@ module fpu (
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, // instruction from IFU
input logic [`XLEN-1:0] ReadDataW,// Read data from memory
input logic [`FLEN-1:0] ReadDataW,// Read data from memory
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU
input logic FlushE, FlushM, FlushW, // flush signals from HZU
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled?
output logic FRegWriteM, // FP register write enable
output logic FpLoadM, // Fp load instruction?
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
@ -348,6 +349,8 @@ module fpu (
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
assign FpLoadM = FResSelM[1];
postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM,
@ -378,21 +381,7 @@ module fpu (
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
generate
if(`FPSIZES == 1) assign ReadResW = {{`FLEN-`XLEN{1'b1}}, ReadDataW};
else if(`FPSIZES == 2)
mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ReadDataW[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ReadDataW[`S_LEN-1:0]},
{{`FLEN-`D_LEN{1'b1}}, ReadDataW[`D_LEN-1:0]},
{{`FLEN-`H_LEN{1'b1}}, ReadDataW[`H_LEN-1:0]},
{{`FLEN-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); // NaN boxing zeroes
endgenerate
// select the result to be written to the FP register
mux2 #(`FLEN) FPUResultMux (FpResW, ReadResW, FResSelW[1], FPUResultW);
mux2 #(`FLEN) FPUResultMux (FpResW, ReadDataW, FResSelW[1], FPUResultW);
endmodule // fpu

View File

@ -80,7 +80,7 @@ module comparator #(parameter WIDTH=64) (
assign flags = {eq, lt, ltu};
endmodule
// This comaprator is best
// This comparator is best
module comparator_dc_flip #(parameter WIDTH=64) (
input logic [WIDTH-1:0] a, b,
input logic sgnd,
@ -94,7 +94,7 @@ module comparator_dc_flip #(parameter WIDTH=64) (
assign bf = {b[WIDTH-1] ^ sgnd, b[WIDTH-2:0]};
// behavioral description gives best results
assign eq = (af == bf);
assign eq = (a == b);
assign lt = (af < bf);
assign flags = {eq, lt};
endmodule

View File

@ -41,7 +41,7 @@ module controller(
output logic IllegalBaseInstrFaultD,
// Execute stage control signals
input logic StallE, FlushE,
input logic [2:0] FlagsE,
input logic [1:0] FlagsE,
input logic FWriteIntE,
output logic PCSrcE, // for datapath and Hazard Unit
output logic [2:0] ALUControlE,
@ -52,6 +52,7 @@ module controller(
output logic MDUE, W64E,
output logic JumpE,
output logic SCE,
output logic BranchSignedE,
// Memory stage control signals
input logic StallM, FlushM,
output logic [1:0] MemRWM,
@ -211,8 +212,9 @@ module controller(
{IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MDUE, AtomicE, InvalidateICacheE, FlushDCacheE, FencePendingE, InstrValidE});
// Branch Logic
assign {eqE, ltE, ltuE} = FlagsE;
mux4 #(1) branchflagmux(eqE, 1'b0, ltE, ltuE, Funct3E[2:1], BranchFlagE);
assign BranchSignedE = ~(Funct3E[2:1] == 2'b11);
assign {eqE, ltE} = FlagsE;
mux3 #(1) branchflagmux(eqE, 1'b0, ltE, Funct3E[2:1], BranchFlagE);
assign BranchTakenE = BranchFlagE ^ Funct3E[0];
assign PCSrcE = JumpE | BranchE & BranchTakenE;

View File

@ -43,11 +43,12 @@ module datapath (
input logic ALUSrcAE, ALUSrcBE,
input logic ALUResultSrcE,
input logic JumpE,
input logic BranchSignedE,
input logic IllegalFPUInstrE,
input logic [`XLEN-1:0] FWriteDataE,
input logic [`XLEN-1:0] PCE,
input logic [`XLEN-1:0] PCLinkE,
output logic [2:0] FlagsE,
output logic [1:0] FlagsE,
output logic [`XLEN-1:0] IEUAdrE,
output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
// Memory stage signals
@ -63,9 +64,9 @@ module datapath (
input logic [2:0] ResultSrcW,
input logic [`XLEN-1:0] FCvtIntResW,
input logic [1:0] FResSelW,
output logic [`XLEN-1:0] ReadDataW,
input logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
// Hazard Unit signals
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E,
output logic [4:0] RdE, RdM, RdW
@ -106,7 +107,7 @@ module datapath (
mux3 #(`XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE);
mux3 #(`XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE);
comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, FlagsE);
comparator_dc_flip #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE);
mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE);
mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ExtImmE, ALUSrcBE, SrcBE);
alu #(`XLEN) alu(SrcAE, SrcBE, ALUControlE, Funct3E, ALUResultE, IEUAdrE);
@ -121,7 +122,6 @@ module datapath (
// Writeback stage pipeline register and logic
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
flopen #(`XLEN) ReadDataWReg(clk, ~StallW, ReadDataM, ReadDataW);
// floating point interactions: fcvt, fp stores
if (`F_SUPPORTED) begin:fpmux

View File

@ -60,11 +60,11 @@ module ieu (
output logic InvalidateICacheM, FlushDCacheM,
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
input logic [1:0] FResSelW,
input logic [`XLEN-1:0] FCvtIntResW,
output logic [4:0] RdW,
output logic [`XLEN-1:0] ReadDataW,
input logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
output logic InstrValidM,
// hazards
@ -78,7 +78,7 @@ module ieu (
);
logic [2:0] ImmSrcD;
logic [2:0] FlagsE;
logic [1:0] FlagsE;
logic [2:0] ALUControlE;
logic ALUSrcAE, ALUSrcBE;
logic [2:0] ResultSrcW;
@ -93,23 +93,24 @@ module ieu (
logic RegWriteM, RegWriteW;
logic MemReadE, CSRReadE;
logic JumpE;
logic BranchSignedE;
controller c(
.clk, .reset, .StallD, .FlushD, .InstrD, .ImmSrcD,
.IllegalIEUInstrFaultD, .IllegalBaseInstrFaultD, .StallE, .FlushE, .FlagsE, .FWriteIntE,
.PCSrcE, .ALUControlE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .MemReadE, .CSRReadE,
.Funct3E, .MDUE, .W64E, .JumpE, .StallM, .FlushM, .MemRWM,
.CSRReadM, .CSRWriteM, .PrivilegedM, .SCE, .AtomicM, .Funct3M,
.Funct3E, .MDUE, .W64E, .JumpE, .SCE, .BranchSignedE, .StallM, .FlushM, .MemRWM,
.CSRReadM, .CSRWriteM, .PrivilegedM, .AtomicM, .Funct3M,
.RegWriteM, .InvalidateICacheM, .FlushDCacheM, .InstrValidM, .FWriteIntM,
.StallW, .FlushW, .RegWriteW, .ResultSrcW, .CSRWriteFencePendingDEM, .StoreStallD);
datapath dp(
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .IllegalFPUInstrE,
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .BranchSignedE, .IllegalFPUInstrE,
.FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .ReadDataM, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
.CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
forward fw(
.Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,

View File

@ -30,12 +30,32 @@
`include "wally-config.vh"
module bigendianswap (
module bigendianswap #(parameter LEN=`XLEN) (
input logic BigEndianM,
input logic [`XLEN-1:0] a,
output logic [`XLEN-1:0] y);
input logic [LEN-1:0] a,
output logic [LEN-1:0] y);
if(`XLEN == 64) begin
if(LEN == 128) begin
always_comb
if (BigEndianM) begin // swap endianness
y[127:120] = a[7:0];
y[119:112] = a[15:8];
y[111:104] = a[23:16];
y[103:96] = a[31:24];
y[95:88] = a[39:32];
y[87:80] = a[47:40];
y[79:72] = a[55:48];
y[71:64] = a[63:56];
y[63:56] = a[71:64];
y[55:48] = a[79:72];
y[47:40] = a[87:80];
y[39:32] = a[95:88];
y[31:24] = a[103:96];
y[23:16] = a[111:104];
y[15:8] = a[119:112];
y[7:0] = a[127:120];
end else y = a;
end else if(LEN == 64) begin
always_comb
if (BigEndianM) begin // swap endianness
y[63:56] = a[7:0];

View File

@ -51,11 +51,13 @@ module lsu (
input logic [`XLEN-1:0] IEUAdrE,
(* mark_debug = "true" *)output logic [`XLEN-1:0] IEUAdrM,
input logic [`XLEN-1:0] WriteDataE,
output logic [`XLEN-1:0] ReadDataM,
output logic [`LLEN-1:0] ReadDataW,
// cpu privilege
input logic [1:0] PrivilegeModeW,
input logic BigEndianM,
input logic sfencevmaM,
// fpu
input logic FpLoadM,
// faults
output logic LoadPageFaultM, StoreAmoPageFaultM,
output logic LoadMisalignedFaultM, LoadAccessFaultM,
@ -110,6 +112,7 @@ module lsu (
logic [`XLEN-1:0] LSUWriteDataM;
logic [(`XLEN-1)/8:0] ByteMaskM;
logic [`XLEN-1:0] WriteDataM;
logic [`LLEN-1:0] ReadDataM;
// *** TO DO: Burst mode
@ -128,7 +131,7 @@ module lsu (
.DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM,
.TrapM, .DCacheStallM, .SATP_REGW, .PCF,
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW,
.ReadDataM, .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M,
.ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M,
.IEUAdrExtM, .PTE, .LSUWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE,
.LSUAdrE, .PreLSUPAdrM, .CPUBusy, .InterlockStall, .SelHPTW,
.IgnoreRequestTLB, .IgnoreRequestTrapM);
@ -187,8 +190,8 @@ module lsu (
// Either Data Cache or Data Tightly Integrated Memory or just bus interface
/////////////////////////////////////////////////////////////////////////////////////////////
logic [`XLEN-1:0] AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM;
logic [`XLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM;
logic [`XLEN-1:0] ReadDataWordMuxM;
logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM;
logic [`LLEN-1:0] ReadDataWordMuxM;
logic IgnoreRequest;
logic SelUncachedAdr;
assign IgnoreRequest = IgnoreRequestTLB | IgnoreRequestTrapM;
@ -197,7 +200,7 @@ module lsu (
// *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW.
// Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM,
.ReadDataWordM, .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
.ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
.DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM),
.DCacheMiss, .DCacheAccess);
end
@ -222,14 +225,14 @@ module lsu (
.SelUncachedAdr, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM,
.BusStall, .BusCommittedM);
mux2 #(`XLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1(DCacheBusWriteData[`XLEN-1:0]),
mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}),
.s(SelUncachedAdr), .y(ReadDataWordMuxM));
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM), .d1(FinalWriteDataM),
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM),
.s(SelUncachedAdr), .y(LSUBusHWDATA));
if(CACHE_ENABLED) begin : dcache
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`XLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.ByteMask(ByteMaskM), .WordCount,
@ -253,7 +256,7 @@ module lsu (
// Atomic operations
/////////////////////////////////////////////////////////////////////////////////////////////
if (`A_SUPPORTED) begin:atomic
atomic atomic(.clk, .reset, .StallW, .ReadDataM, .LSUWriteDataM, .LSUPAdrM,
atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .LSUWriteDataM, .LSUPAdrM,
.LSUFunct7M, .LSUFunct3M, .LSUAtomicM, .PreLSURWM, .IgnoreRequest,
.AMOWriteDataM, .SquashSCW, .LSURWM);
end else begin:lrsc
@ -266,7 +269,13 @@ module lsu (
subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
.LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM);
subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
.Funct3M(LSUFunct3M), .ReadDataM);
.FpLoadM, .Funct3M(LSUFunct3M), .ReadDataM);
/////////////////////////////////////////////////////////////////////////////////////////////
// MW Pipeline Register
/////////////////////////////////////////////////////////////////////////////////////////////
flopen #(`LLEN) ReadDataMWReg(clk, ~StallW, ReadDataM, ReadDataW);
/////////////////////////////////////////////////////////////////////////////////////////////
// Big Endian Byte Swapper
@ -274,8 +283,8 @@ module lsu (
// swap the bytes when read from big-endian memory
/////////////////////////////////////////////////////////////////////////////////////////////
if (`BIGENDIAN_SUPPORTED) begin:endian
bigendianswap storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM));
bigendianswap loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM));
bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
end else begin
assign FinalWriteDataM = LittleEndianWriteDataM;
assign LittleEndianReadDataWordM = ReadDataWordM;

View File

@ -32,10 +32,11 @@
module subwordread
(
input logic [`XLEN-1:0] ReadDataWordMuxM,
input logic [`LLEN-1:0] ReadDataWordMuxM,
input logic [2:0] LSUPAdrM,
input logic [2:0] Funct3M,
output logic [`XLEN-1:0] ReadDataM
input logic FpLoadM,
output logic [`LLEN-1:0] ReadDataM
);
logic [7:0] ByteM;
@ -74,18 +75,31 @@ module subwordread
1'b1: WordM = ReadDataWordMuxM[63:32];
endcase
// sign extension
logic [63:0] DblWordM;
assign DblWordM = ReadDataWordMuxM[63:0];
// sign extension/ NaN boxing
always_comb
case(Funct3M)
3'b000: ReadDataM = {{56{ByteM[7]}}, ByteM}; // lb
3'b001: ReadDataM = {{48{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: ReadDataM = {{32{WordM[31]}}, WordM[31:0]}; // lw
3'b011: ReadDataM = ReadDataWordMuxM; // ld
3'b100: ReadDataM = {56'b0, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {48'b0, HalfwordM[15:0]}; // lhu
3'b110: ReadDataM = {32'b0, WordM[31:0]}; // lwu
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: if(`ZFH_SUPPORTED)
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: if(`F_SUPPORTED)
ReadDataM = {{`LLEN-32{WordM[31]|FpLoadM}}, WordM[31:0]}; // lw/flw
else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]}; // lw
3'b011: if(`D_SUPPORTED)
ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadM}}, DblWordM[63:0]}; // ld/fld
else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]}; // ld/fld
3'b100: if(`Q_SUPPORTED)
ReadDataM = FpLoadM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
else
ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
3'b110: ReadDataM = {{`LLEN-32{1'b0}}, WordM[31:0]}; // lwu
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
endcase
end else begin:swrmux // 32-bit
// byte mux
always_comb
@ -105,13 +119,18 @@ module subwordread
// sign extension
always_comb
case(Funct3M)
3'b000: ReadDataM = {{24{ByteM[7]}}, ByteM}; // lb
3'b001: ReadDataM = {{16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: ReadDataM = ReadDataWordMuxM; // lw
3'b100: ReadDataM = {24'b0, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {16'b0, HalfwordM[15:0]}; // lhu
default: ReadDataM = ReadDataWordMuxM;
case(Funct3M)
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: if(`ZFH_SUPPORTED)
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: if(`F_SUPPORTED)
ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadM}}, ReadDataWordMuxM[31:0]}; // lw/flw
else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; // lw
3'b011: ReadDataM = ReadDataWordMuxM; // fld
3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
endcase
end
endmodule

View File

@ -98,6 +98,7 @@ module wallypipelinedcore (
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FRegWriteM;
logic FPUStallD;
logic FpLoadM;
logic [1:0] FResSelW;
logic [4:0] SetFflagsM;
@ -128,8 +129,7 @@ module wallypipelinedcore (
logic [`XLEN-1:0] IEUAdrE;
(* mark_debug = "true" *) logic [`XLEN-1:0] WriteDataE;
(* mark_debug = "true" *) logic [`XLEN-1:0] IEUAdrM;
(* mark_debug = "true" *) logic [`XLEN-1:0] ReadDataM;
logic [`XLEN-1:0] ReadDataW;
logic [`LLEN-1:0] ReadDataW;
logic CommittedM;
// AHB ifu interface
@ -229,8 +229,8 @@ module wallypipelinedcore (
.RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
// Writeback stage
.CSRReadValW, .ReadDataM, .MDUResultW,
.RdW, .ReadDataW,
.CSRReadValW, .MDUResultW,
.RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
.InstrValidM,
.FCvtIntResW,
.FResSelW,
@ -253,9 +253,10 @@ module wallypipelinedcore (
.AtomicM, .TrapM,
.CommittedM, .DCacheMiss, .DCacheAccess,
.SquashSCW,
.FpLoadM,
//.DataMisalignedM(DataMisalignedM),
.IEUAdrE, .IEUAdrM, .WriteDataE,
.ReadDataM, .FlushDCacheM,
.ReadDataW, .FlushDCacheM,
// connected to ahb (all stay the same)
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
@ -383,13 +384,14 @@ module wallypipelinedcore (
.clk, .reset,
.FRM_REGW, // Rounding mode from CSR
.InstrD, // instruction from IFU
.ReadDataW,// Read data from memory
.ReadDataW(ReadDataW[`FLEN-1:0]),// Read data from memory
.ForwardedSrcAE, // Integer input being processed (from IEU)
.StallE, .StallM, .StallW, // stall signals from HZU
.FlushE, .FlushM, .FlushW, // flush signals from HZU
.RdM, .RdW, // which FP register to write to (from IEU)
.STATUS_FS, // is floating-point enabled?
.FRegWriteM, // FP register write enable
.FpLoadM,
.FStallD, // Stall the decode stage
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory

View File

@ -1,4 +1,4 @@
all: exptestgen testgen qslc_r4a2
all: exptestgen testgen qslc_r4a2 qslc_r4a2b
sqrttestgen: sqrttestgen.c
gcc sqrttestgen.c -o sqrttestgen -lm
@ -15,5 +15,9 @@ qslc_r4a2: qslc_r4a2.c
gcc qslc_r4a2.c -o qslc_r4a2 -lm
./qslc_r4a2 > qslc_r4a2.sv
qslc_r4a2b: qslc_r4a2b.c
gcc qslc_r4a2b.c -o qslc_r4a2b -lm
./qslc_r4a2b > qslc_r4a2b.tv
clean:
rm -f testgen exptestgen qslc_r4a2

View File

@ -1 +1,2 @@
verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv

1048
pipelined/srt/qsel4.sv Normal file

File diff suppressed because it is too large Load Diff

BIN
pipelined/srt/qslc_r4a2b Executable file

Binary file not shown.

190
pipelined/srt/qslc_r4a2b.c Normal file
View File

@ -0,0 +1,190 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 12)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -4)
printf("0");
else if ((pla.tot) >= -13)
printf("2");
else
printf("1");
break;
case 1:
if ((pla.tot) >= 14)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -15)
printf("2");
else
printf("1");
break;
case 2:
if ((pla.tot) >= 15)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -16)
printf("2");
else
printf("1");
break;
case 3:
if ((pla.tot) >= 16)
printf("8");
else if ((pla.tot) >= 4)
printf("4");
else if ((pla.tot) >= -6)
printf("0");
else if ((pla.tot) >= -18)
printf("2");
else
printf("1");
break;
case 4:
if ((pla.tot) >= 18)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 5:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 6)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -20)
printf("2");
else
printf("1");
break;
case 6:
if ((pla.tot) >= 20)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -22)
printf("2");
else
printf("1");
break;
case 7:
if ((pla.tot) >= 24)
printf("8");
else if ((pla.tot) >= 8)
printf("4");
else if ((pla.tot) >= -8)
printf("0");
else if ((pla.tot) >= -24)
printf("2");
else
printf("1");
break;
default: printf ("X");
}
printf("\n");
(pla.tot)++;
}
(pla.divisor)++;
}
}

1024
pipelined/srt/qslc_r4a2b.tv Normal file

File diff suppressed because it is too large Load Diff

2
pipelined/srt/sim-srt4 Executable file
View File

@ -0,0 +1,2 @@
vsim -do "do srt-radix4.do"

1
pipelined/srt/sim-srt4-batch Executable file
View File

@ -0,0 +1 @@
vsim -c -do "do srt-radix4.do"

View File

@ -0,0 +1,31 @@
# srt.do
#
# David_Harris@hmc.edu 19 October 2021
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv qsel4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
vopt +acc work.testbenchradix4 -o workopt
vsim workopt
-- display input and output signals as hexidecimal values
add wave /testbenchradix4/*
add wave /testbenchradix4/srtradix4/*
add wave /testbenchradix4/srtradix4/qsel4/*
add wave /testbenchradix4/srtradix4/otfc4/*
-- Run the Simulation
run -all

323
pipelined/srt/srt-radix4.sv Normal file
View File

@ -0,0 +1,323 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu 13 January 2022
// Modified:
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF)
module srtradix4 (
input logic clk,
input logic Start,
input logic Stall, // *** multiple pipe stages
input logic Flush, // *** multiple pipe stages
// Floating Point Inputs
// later add exponents, signs, special cases
input logic XSign, YSign,
input logic [`NE-1:0] XExp, YExp,
input logic [`NF-1:0] XFrac, YFrac,
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic rsign,
output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers
output logic [`NE-1:0] rExp,
output logic [3:0] Flags
);
// logic qp, qz, qm; // quotient is +1, 0, or -1
logic [3:0] q;
logic [`NE-1:0] calcExp;
logic calcSign;
logic [`DIVLEN-1:0] X, Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN;
logic [`DIVLEN+3:0] WC, WCA, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - assumed one is added here since all numbers are normlaized
// *** wait what about zero? is that specal case? can the divider handle it?
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// *** what does N and A stand for?
// *** change shift amount for radix4
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, Start, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, Start, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D);
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// *** change this for radix 4 - generate w/ stine code
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
qsel4 qsel4(.D, .WS, .WC, .q);
// Store the expoenent and sign until division is done
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
flopen #(1) signflop(clk, Start, calcSign, rsign);
// Divisor Selection logic
// *** radix 4 change to choose -2 to 2
// - choose the negitive version of what's being selected
assign DBar = ~D;
assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
assign D2 = {D[`DIVLEN+2:0], 1'b0};
always_comb
case (q)
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
4'b0010: Dsel = D;
4'b0001: Dsel = D2;
default: Dsel = {`DIVLEN+4{1'bx}};
endcase
// Partial Product Generation
// WSA, WCA = WS + WC - qD
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
//*** change for radix 4
otfc4 #(`DIVLEN) otfc4(clk, Start, q, Quot);
expcalc expcalc(.XExp, .YExp, .calcExp);
signcalc signcalc(.XSign, .YSign, .calcSign);
endmodule
////////////////
// Submodules //
////////////////
///////////////////
// Preprocessing //
///////////////////
module srtpreproc (
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [`NF-1:0] XFrac, YFrac,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN-1:0] X, D,
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
output logic intSign // Quotient integer sign
);
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
logic [`XLEN-1:0] PosA, PosB;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
assign PreprocA = ExtraA << zeroCntA;
assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}};
assign X = Int ? PreprocA : PreprocX;
assign D = Int ? PreprocB : PreprocY;
assign intExp = zeroCntB - zeroCntA + 1;
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
endmodule
/////////////////////////////////
// Quotient Selection, Radix 2 //
/////////////////////////////////
module qsel2 ( // *** eventually just change to 4 bits
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz, qm
);
logic [`DIVLEN+3:`DIVLEN] p, g;
logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
// Quotient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign #1 sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
assign #1 sign = (ps[55]^pc[55])^
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
(ps[52]&pc[52]))))); */
// Produce quotient = +1, 0, or -1
assign #1 qp = magnitude & ~sign;
assign #1 qz = ~magnitude;
assign #1 qm = magnitude & sign;
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc4 #(parameter N=65) (
input logic clk,
input logic Start,
input logic [3:0] q,
output logic [N-1:0] r
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-2 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N:0] QR, QMR;
// if starting a new divison set Q to 0 and QM to -1
mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, Start, QMux);
mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, Start, QMMux);
flop #(N+3) Qreg(clk, QMux, Q);
flop #(N+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
// *** how does the 0 concatination numbers work?
always_comb begin
QR = Q[N:0];
QMR = QM[N:0]; // Shift Q and QM
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
end else if (q[2]) begin // +1
QNext = {QR, 2'b01};
QMNext = {QR, 2'b00};
end else if (q[1]) begin // -1
QNext = {QMR, 2'b11};
QMNext = {QMR, 2'b10};
end else if (q[0]) begin // -2
QNext = {QMR, 2'b10};
QMNext = {QMR, 2'b01};
end else begin // 0
QNext = {QR, 2'b00};
QMNext = {QMR, 2'b11};
end
end
assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is required to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign #1 out1 = in1 ^ in2 ^ in3;
assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule
//////////////
// expcalc //
//////////////
module expcalc(
input logic [`NE-1:0] XExp, YExp,
output logic [`NE-1:0] calcExp
);
assign calcExp = XExp - YExp + (`NE)'(`BIAS);
endmodule
//////////////
// signcalc //
//////////////
module signcalc(
input logic XSign, YSign,
output logic calcSign
);
assign calcSign = XSign ^ YSign;
endmodule

View File

@ -1 +1,3 @@
add wave -noupdate /testbench/clk
add wave -noupdate /testbench/*
add wave -noupdate /testbench/srt/*
add wave -noupdate /testbench/srt/otfc2/*

View File

@ -1,17 +1,26 @@
CC = gcc
CFLAGS = -lm
LIBS =
OBJS = disp.o srt4div.o
CC = gcc
CFLAGS = -lm
LIBS =
OBJS4 = disp.o srt4div.o
OBJS2 = disp.o srt2div.o
srt4div: $(OBJS)
$(CC) -g -O3 -o srt4div $(OBJS) $(CFLAGS)
all: srt4div srt2div
disp.o: disp.h disp.c
$(CC) -g -c -o disp.o disp.c $(CFLAGS)
$(CC) -g -c -o disp.o disp.c
srt4div.o: srt4div.c
$(CC) -g -c -o srt4div.o srt4div.c $(CFLAGS)
$(CC) -g -c -o srt4div.o srt4div.c
srt2div.o: srt2div.c
$(CC) -g -c -o srt2div.o srt2div.c
srt4div: $(OBJS4)
$(CC) -g -O3 -o srt4div $(OBJS4) $(CFLAGS)
srt2div: $(OBJS2)
$(CC) -g -O3 -o srt2div $(OBJS2) $(CFLAGS)
clean:
rm -f *.o *~

30
pipelined/srt/stine/notes Normal file
View File

@ -0,0 +1,30 @@
Dividend x --(0.10101111), divisord --(0.11000101)(i -- 16(0.1100)2- 12)
X = 175 (xAF)
D = 197 (xC5)
X = 175/256 = 0.68359375
D = 197/256 = 0.76953125
Note: Add lg(r) extra iterations due to shifting of computed q
q_{computed} = q / radix
./srt4div 0.68359375 0.76953125 8 10
r=2
X = 0.10011111
D = 0.11000101
X = 159 (9F)
D = 197 (C5)
X = 159/256 = 0.62109375
D = 197/256 = 0.76953125
./srt2div 0.62109375 0.76953125 8 9

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

BIN
pipelined/srt/stine/srt2div Executable file

Binary file not shown.

114
pipelined/srt/stine/srt2div.c Executable file
View File

@ -0,0 +1,114 @@
#include "disp.h"
// QSLC is for division by recuerrence for
// r=2 using a CPA - See 5.109 EL
int qst (double D, double prem) {
int q;
// For Debugging
printf("rw --> %lg\n", prem);
if (prem >= 0.5) {
q = 1;
} else if (prem >= -0.5) {
q = 0;
} else {
q = -1;
}
return q;
}
/*
This routine performs a radix-2 SRT division
algorithm. The user inputs the numerator, the denominator,
and the number of iterations. It assumes that 0.5 <= D < 1.
*/
int main(int argc, char* argv[]) {
double P, N, D, Q, RQ, RD, RREM, scale;
int q;
int num_iter, i;
int prec;
int radix = 2;
if (argc < 5) {
fprintf(stderr,
"Usage: %s numerator denominator num_iterations prec\n",
argv[0]);
exit(1);
}
sscanf(argv[1],"%lg", &N);
sscanf(argv[2],"%lg", &D);
sscanf(argv[3],"%d", &num_iter);
sscanf(argv[4],"%d", &prec);
// Round to precision
N = rne(N, prec);
D = rne(D, prec);
printf("N = ");
disp_bin(N, 3, prec, stdout);
printf("\n");
printf("D = ");
disp_bin(D, 3, prec, stdout);
printf("\n");
Q = 0;
P = N * pow(2.0, -log2(radix));
printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n",
N, D, N/D, num_iter);
for (scale = 1, i = 0; i < num_iter; i++) {
scale = scale * pow(2.0, -log2(radix));
q = qst(flr(2*D, 1), 2*P);
printf("2*W[n] = ");
disp_bin(radix*P, 3, prec, stdout);
printf("\n");
printf("q*D = ");
disp_bin(q*D, 3, prec, stdout);
printf("\n");
printf("W[n+1] = ");
disp_bin(P ,3, prec, stdout);
printf("\n");
// Recurrence
P = radix * P - q * D;
Q = Q + q*scale;
printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P);
printf("i = %d, q = %d", i, q);
printf(", Q = ");
disp_bin(Q, 3, prec, stdout);
printf(", W = ");
disp_bin(P, 3, prec, stdout);
printf("\n\n");
}
if (P < 0) {
Q = Q - scale;
P = P + D;
printf("\nCorrecting Negative Remainder\n");
printf("Q = %1.18lf, W = %1.18lf\n", Q, P);
printf("Q = ");
disp_bin(Q, 3, prec, stdout);
printf(", W = ");
disp_bin(P, 3, prec, stdout);
printf("\n");
}
// Output Results
RQ = N/D;
// Since q_{computed} = q / radix, multiply by radix
RD = Q * radix;
printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
printf("true = ");
disp_bin(RQ, 3, prec, stdout);
printf(", computed = ");
disp_bin(RD, 3, prec, stdout);
printf("\n\n");
printf("REM = %1.18lf \n", P);
printf("REM = ");
disp_bin(P, 3, prec, stdout);
printf("\n\n");
return 0;
}

View File

@ -0,0 +1,508 @@
%
% PD Region for Np = 3; Nd = 4;
% w/CPA
%
% Clear all variables and screen
clear
clf
% Define the number of bits (input Dividend)
n = 4;
%
% Define Divisor Range
% Normalized Floating Point [Dmin,Dmax] = [1,2]
% Normalized Fixed Point [Dmin, Dmax] =[1/2,1]
%
Dminimum = 1.0/2;
Dmaximum = 2.0/2;
% Define an ulp
ulp = 2^(-n);
% radix = beta
beta = 4;
% rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2
%
% SD representations have alpha < beta - 1
%
% alpha = ceil(beta/2) minimially redundant
% alpha = beta -1 maximally redundant (rho = 1)
% alpha = (beta-1)/2 nonredundant
% alpha > beta - 1 over-redundant
%
rho = 2/3;
% Calculation of max digit set
alpha = rho*(beta-1);
% Da contains digit set
q = [];
for i = -alpha:alpha
q = [q; i];
end
% 4r(i-1)/D values
hold on
% figure(1)
grid off
for i = 1:length(q)
x = -rho+q(i):ulp:rho+q(i);
% Plot redundancy (overlap) Positive
z = [rho+q(i),rho+q(i)];
y = [x(length(x))-q(i),0];
% Plot redundancy (overlap) Negative
if (i ~= length(q))
w = [-rho+q(i+1)-q(i+1),0];
u = [-rho+q(i+1),-rho+q(i+1)];
% plot(u,w,'b')
end
% plot(x,x-q(i))
% plot(z,y,'r')
end
% title('Robertson Diagram for Radix-4 SRT Divison')
Np = 3;
Nd = 4;
Dmin = Dminimum;
Dmax = Dmaximum;
ulpd = 2^(-Nd);
ulpp = 2^(-Np);
%
% Plot Atkins P-D plot
% Normalized Floating Point [Dmin,Dmax] = [1,2]
% Normalized Fixed Point [Dmin, Dmax] =[1/2,1]
%
Dmin = Dminimum;
Dmax = Dmaximum;
for i = 1:length(q)
D = Dmin:ulp:Dmax;
P1 = (rho+q(i))*D;
P2 = (-rho+q(i))*D;
hold on
p1 = plot(D,P1);
p1.Color = '#0000ff';
p2 = plot(D,P2);
p2.Color = '#ff0000';
axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum])
xticks(D)
p1.LineWidth = 2.0;
p2.LineWidth = 2.0;
end
% Let's make x/y axis binary
j = [];
for i=1:length(D)
j = [j disp_bin(D(i), 1, 4)];
end
yk = [];
yk2 = [];
for i=-2.5:0.5:2.5;
yk = [yk disp_bin(i, 3, 3)];
yk2 = [yk2 i];
end
xtickangle(90)
xticklabels(j)
yticklabels(yk)
% Let's draw allow points on PD plot
% Positive Portions
index = 1;
i = 0:ulpp:rho*beta*Dmaximum;
for j = Dmin:ulpd:Dmax
plot(j*ones(1,length(i)),i,'k')
end
j = Dmin:ulpd:Dmax;
for i = 0:ulpp:rho*beta*Dmaximum
plot(j,i*ones(length(j)),'k')
end
% Negative Portions
index = 1;
i = 0:-ulpp:rho*-beta*Dmaximum;
for j = Dmin:ulpd:Dmax
plot(j*ones(1,length(i)),i,'k')
end
j = Dmin:ulpd:Dmax;
for i = 0:-ulpp:-rho*beta*Dmaximum
plot(j,i*ones(length(j)),'k')
end
% Labels and Printing
xlh = xlabel(['Divisor (d)']);
%xlh.FontSize = 18;
xlh.Position(2) = xlh.Position(2) - 0.1;
ylh = ylabel(['P = 4 \cdot w_i']);
ylh.Position(1) = ylh.Position(1)-0.02;
%ylh.FontSize = 18;
% Containment Values (placed manually although not bad)
m2 = [3/4 7/8 1.0 1.0 5/4 5/4 5/4 3/2 3/2];
m1 = [1/4 1/4 1/4 1/4 1/2 1/2 1/2 1/2 1/2];
m0 = [-1/4 -1/4 -1/4 -1/4 -1/2 -1/2 -1/2 -1/2 -1/2];
m1b = [-3/4 -7/8 -1 -1 -5/4 -5/4 -5/4 -3/2 -3/2];
x2 = Dmin:ulpd:Dmax;
s2 = stairs(x2, m2);
s2.Color = '#8f08d1';
s2.LineWidth = 3.0;
%s2.LineStyle = '--';
s1 = stairs(x2, m1);
s1.Color = '#8f08d1';
s1.LineWidth = 3.0;
s0 = stairs(x2, m0);
s0.Color = '#8f08d1';
s0.LineWidth = 3.0;
s1b = stairs(x2, m1b);
s1b.Color = '#8f08d1';
s1b.LineWidth = 3.0;
% Place manually Quotient (ugh)
j = Dmin+ulpd/2:ulpd:Dmax;
i = rho*beta*Dmaximum-ulpp*3/4:-ulpp:-rho*beta*Dmaximum;
text(j(1), i(1), '2')
text(j(1), i(2), '2')
text(j(1), i(3), '2')
text(j(1), i(4), '2')
text(j(1), i(5), '2')
text(j(1), i(6), '2')
text(j(1), i(7), '2')
text(j(1), i(8), '2')
text(j(1), i(9), '2')
text(j(1), i(10), '2')
text(j(1), i(11), '2')
text(j(1), i(12), '2')
text(j(1), i(13), '2')
text(j(1), i(14), '2')
text(j(1), i(15), '2')
text(j(1), i(16), '1')
text(j(1), i(17), '1')
text(j(1), i(18), '1')
text(j(1), i(19), '1')
text(j(1), i(20), '0')
text(j(1), i(21), '0')
text(j(1), i(22), '0')
text(j(1), i(23), '0')
text(j(1), i(24), '-1')
text(j(1), i(25), '-1')
text(j(1), i(26), '-1')
text(j(1), i(27), '-1')
text(j(1), i(28), '-2')
text(j(1), i(29), '-2')
text(j(1), i(30), '-2')
text(j(1), i(31), '-2')
text(j(1), i(32), '-2')
text(j(1), i(33), '-2')
text(j(1), i(34), '-2')
text(j(1), i(35), '-2')
text(j(1), i(36), '-2')
text(j(1), i(37), '-2')
text(j(1), i(38), '-2')
text(j(1), i(39), '-2')
text(j(1), i(40), '-2')
text(j(1), i(41), '-2')
text(j(1), i(42), '-2')
text(j(2), i(1), '2')
text(j(2), i(2), '2')
text(j(2), i(3), '2')
text(j(2), i(4), '2')
text(j(2), i(5), '2')
text(j(2), i(6), '2')
text(j(2), i(7), '2')
text(j(2), i(8), '2')
text(j(2), i(9), '2')
text(j(2), i(10), '2')
text(j(2), i(11), '2')
text(j(2), i(12), '2')
text(j(2), i(13), '2')
text(j(2), i(14), '2')
text(j(2), i(15), '1')
text(j(2), i(16), '1')
text(j(2), i(17), '1')
text(j(2), i(18), '1')
text(j(2), i(19), '1')
text(j(2), i(20), '0')
text(j(2), i(21), '0')
text(j(2), i(22), '0')
text(j(2), i(23), '0')
text(j(2), i(24), '-1')
text(j(2), i(25), '-1')
text(j(2), i(26), '-1')
text(j(2), i(27), '-1')
text(j(2), i(28), '-1')
text(j(2), i(29), '-2')
text(j(2), i(30), '-2')
text(j(2), i(31), '-2')
text(j(2), i(32), '-2')
text(j(2), i(33), '-2')
text(j(2), i(34), '-2')
text(j(2), i(35), '-2')
text(j(2), i(36), '-2')
text(j(2), i(37), '-2')
text(j(2), i(38), '-2')
text(j(2), i(39), '-2')
text(j(2), i(40), '-2')
text(j(2), i(41), '-2')
text(j(2), i(42), '-2')
text(j(3), i(1), '2')
text(j(3), i(2), '2')
text(j(3), i(3), '2')
text(j(3), i(4), '2')
text(j(3), i(5), '2')
text(j(3), i(6), '2')
text(j(3), i(7), '2')
text(j(3), i(8), '2')
text(j(3), i(9), '2')
text(j(3), i(10), '2')
text(j(3), i(11), '2')
text(j(3), i(12), '2')
text(j(3), i(13), '2')
text(j(3), i(14), '1')
text(j(3), i(15), '1')
text(j(3), i(16), '1')
text(j(3), i(17), '1')
text(j(3), i(18), '1')
text(j(3), i(19), '1')
text(j(3), i(20), '0')
text(j(3), i(21), '0')
text(j(3), i(22), '0')
text(j(3), i(23), '0')
text(j(3), i(24), '-1')
text(j(3), i(25), '-1')
text(j(3), i(26), '-1')
text(j(3), i(27), '-1')
text(j(3), i(28), '-1')
text(j(3), i(29), '-1')
text(j(3), i(30), '-2')
text(j(3), i(31), '-2')
text(j(3), i(32), '-2')
text(j(3), i(33), '-2')
text(j(3), i(34), '-2')
text(j(3), i(35), '-2')
text(j(3), i(36), '-2')
text(j(3), i(37), '-2')
text(j(3), i(38), '-2')
text(j(3), i(39), '-2')
text(j(3), i(40), '-2')
text(j(3), i(41), '-2')
text(j(3), i(42), '-2')
text(j(4), i(1), '2')
text(j(4), i(2), '2')
text(j(4), i(3), '2')
text(j(4), i(4), '2')
text(j(4), i(5), '2')
text(j(4), i(6), '2')
text(j(4), i(7), '2')
text(j(4), i(8), '2')
text(j(4), i(9), '2')
text(j(4), i(10), '2')
text(j(4), i(11), '2')
text(j(4), i(12), '2')
text(j(4), i(13), '2')
text(j(4), i(14), '1')
text(j(4), i(15), '1')
text(j(4), i(16), '1')
text(j(4), i(17), '1')
text(j(4), i(18), '1')
text(j(4), i(19), '1')
text(j(4), i(20), '0')
text(j(4), i(21), '0')
text(j(4), i(22), '0')
text(j(4), i(23), '0')
text(j(4), i(24), '-1')
text(j(4), i(25), '-1')
text(j(4), i(26), '-1')
text(j(4), i(27), '-1')
text(j(4), i(28), '-1')
text(j(4), i(29), '-1')
text(j(4), i(30), '-2')
text(j(4), i(31), '-2')
text(j(4), i(32), '-2')
text(j(4), i(33), '-2')
text(j(4), i(34), '-2')
text(j(4), i(35), '-2')
text(j(4), i(36), '-2')
text(j(4), i(37), '-2')
text(j(4), i(38), '-2')
text(j(4), i(39), '-2')
text(j(4), i(40), '-2')
text(j(4), i(41), '-2')
text(j(4), i(42), '-2')
text(j(5), i(1), '2')
text(j(5), i(2), '2')
text(j(5), i(3), '2')
text(j(5), i(4), '2')
text(j(5), i(5), '2')
text(j(5), i(6), '2')
text(j(5), i(7), '2')
text(j(5), i(8), '2')
text(j(5), i(9), '2')
text(j(5), i(10), '2')
text(j(5), i(11), '2')
text(j(5), i(12), '1')
text(j(5), i(13), '1')
text(j(5), i(14), '1')
text(j(5), i(15), '1')
text(j(5), i(16), '1')
text(j(5), i(17), '1')
text(j(5), i(18), '0')
text(j(5), i(19), '0')
text(j(5), i(20), '0')
text(j(5), i(21), '0')
text(j(5), i(22), '0')
text(j(5), i(23), '0')
text(j(5), i(24), '0')
text(j(5), i(25), '0')
text(j(5), i(26), '-1')
text(j(5), i(27), '-1')
text(j(5), i(28), '-1')
text(j(5), i(29), '-1')
text(j(5), i(30), '-1')
text(j(5), i(31), '-1')
text(j(5), i(32), '-2')
text(j(5), i(33), '-2')
text(j(5), i(34), '-2')
text(j(5), i(35), '-2')
text(j(5), i(36), '-2')
text(j(5), i(37), '-2')
text(j(5), i(38), '-2')
text(j(5), i(39), '-2')
text(j(5), i(40), '-2')
text(j(5), i(41), '-2')
text(j(5), i(42), '-2')
text(j(6), i(1), '2')
text(j(6), i(2), '2')
text(j(6), i(3), '2')
text(j(6), i(4), '2')
text(j(6), i(5), '2')
text(j(6), i(6), '2')
text(j(6), i(7), '2')
text(j(6), i(8), '2')
text(j(6), i(9), '2')
text(j(6), i(10), '2')
text(j(6), i(11), '2')
text(j(6), i(12), '1')
text(j(6), i(13), '1')
text(j(6), i(14), '1')
text(j(6), i(15), '1')
text(j(6), i(16), '1')
text(j(6), i(17), '1')
text(j(6), i(18), '0')
text(j(6), i(19), '0')
text(j(6), i(20), '0')
text(j(6), i(21), '0')
text(j(6), i(22), '0')
text(j(6), i(23), '0')
text(j(6), i(24), '0')
text(j(6), i(25), '0')
text(j(6), i(26), '-1')
text(j(6), i(27), '-1')
text(j(6), i(28), '-1')
text(j(6), i(29), '-1')
text(j(6), i(30), '-1')
text(j(6), i(31), '-1')
text(j(6), i(32), '-2')
text(j(6), i(33), '-2')
text(j(6), i(34), '-2')
text(j(6), i(35), '-2')
text(j(6), i(36), '-2')
text(j(6), i(37), '-2')
text(j(6), i(38), '-2')
text(j(6), i(39), '-2')
text(j(6), i(40), '-2')
text(j(6), i(41), '-2')
text(j(6), i(42), '-2')
text(j(7), i(1), '2')
text(j(7), i(2), '2')
text(j(7), i(3), '2')
text(j(7), i(4), '2')
text(j(7), i(5), '2')
text(j(7), i(6), '2')
text(j(7), i(7), '2')
text(j(7), i(8), '2')
text(j(7), i(9), '2')
text(j(7), i(10), '2')
text(j(7), i(11), '2')
text(j(7), i(12), '1')
text(j(7), i(13), '1')
text(j(7), i(14), '1')
text(j(7), i(15), '1')
text(j(7), i(16), '1')
text(j(7), i(17), '1')
text(j(7), i(18), '0')
text(j(7), i(19), '0')
text(j(7), i(20), '0')
text(j(7), i(21), '0')
text(j(7), i(22), '0')
text(j(7), i(23), '0')
text(j(7), i(24), '0')
text(j(7), i(25), '0')
text(j(7), i(26), '-1')
text(j(7), i(27), '-1')
text(j(7), i(28), '-1')
text(j(7), i(29), '-1')
text(j(7), i(30), '-1')
text(j(7), i(31), '-1')
text(j(7), i(32), '-2')
text(j(7), i(33), '-2')
text(j(7), i(34), '-2')
text(j(7), i(35), '-2')
text(j(7), i(36), '-2')
text(j(7), i(37), '-2')
text(j(7), i(38), '-2')
text(j(7), i(39), '-2')
text(j(7), i(40), '-2')
text(j(7), i(41), '-2')
text(j(7), i(42), '-2')
text(j(8), i(1), '2')
text(j(8), i(2), '2')
text(j(8), i(3), '2')
text(j(8), i(4), '2')
text(j(8), i(5), '2')
text(j(8), i(6), '2')
text(j(8), i(7), '2')
text(j(8), i(8), '2')
text(j(8), i(9), '2')
text(j(8), i(10), '1')
text(j(8), i(11), '1')
text(j(8), i(12), '1')
text(j(8), i(13), '1')
text(j(8), i(14), '1')
text(j(8), i(15), '1')
text(j(8), i(16), '1')
text(j(8), i(17), '1')
text(j(8), i(18), '0')
text(j(8), i(19), '0')
text(j(8), i(20), '0')
text(j(8), i(21), '0')
text(j(8), i(22), '0')
text(j(8), i(23), '0')
text(j(8), i(24), '0')
text(j(8), i(25), '0')
text(j(8), i(26), '-1')
text(j(8), i(27), '-1')
text(j(8), i(28), '-1')
text(j(8), i(29), '-1')
text(j(8), i(30), '-2')
text(j(8), i(31), '-2')
text(j(8), i(32), '-2')
text(j(8), i(33), '-2')
text(j(8), i(34), '-2')
text(j(8), i(35), '-2')
text(j(8), i(36), '-2')
text(j(8), i(37), '-2')
text(j(8), i(38), '-2')
text(j(8), i(39), '-2')
text(j(8), i(40), '-2')
text(j(8), i(41), '-2')
text(j(8), i(42), '-2')
print -dpng pd_cpa.png

View File

@ -0,0 +1,333 @@
%
% Clear all variables and screen
clear
clf
% Define the number of bits (input Dividend)
n = 4;
%
% Define Divisor Range
% Normalized Floating Point [Dmin,Dmax] = [1,2]
% Normalized Fixed Point [Dmin, Dmax] =[1/2,1]
%
Dminimum = 1.0/2;
Dmaximum = 2.0/2;
% Define an ulp
ulp = 2^(-n);
% radix = beta
beta = 4;
% rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2
%
% SD representations have alpha < beta - 1
%
% alpha = ceil(beta/2) minimially redundant
% alpha = beta -1 maximally redundant (rho = 1)
% alpha = (beta-1)/2 nonredundant
% alpha > beta - 1 over-redundant
%
rho = 2/3;
% Calculation of max digit set
alpha = rho*(beta-1);
% Da contains digit set
q = [];
for i = -alpha:alpha
q = [q; i];
end
% 4r(i-1)/D values
hold on
% figure(1)
grid off
for i = 1:length(q)
x = -rho+q(i):ulp:rho+q(i);
% Plot redundancy (overlap) Positive
z = [rho+q(i),rho+q(i)];
y = [x(length(x))-q(i),0];
% Plot redundancy (overlap) Negative
if (i ~= length(q))
w = [-rho+q(i+1)-q(i+1),0];
u = [-rho+q(i+1),-rho+q(i+1)];
% plot(u,w,'b')
end
% plot(x,x-q(i))
% plot(z,y,'r')
end
% title('Robertson Diagram for Radix-4 SRT Divison')
Np = 3;
Nd = 3;
Dmin = Dminimum;
Dmax = Dmaximum;
ulpd = 2^(-Nd);
ulpp = 2^(-Np);
%
% Plot Atkins P-D plot
% Normalized Floating Point [Dmin,Dmax] = [1,2]
% Normalized Fixed Point [Dmin, Dmax] =[1/2,1]
%
Dmin = Dminimum;
Dmax = Dmaximum;
for i = 1:length(q)
D = Dmin:ulpd:Dmax;
P1 = (rho+q(i))*D;
P2 = (-rho+q(i))*D;
hold on
p1 = plot(D,P1,'b');
p2 = plot(D,P2,'r');
axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum])
xticks(D)
p1.LineWidth = 2.0;
p2.LineWidth = 2.0;
end
% Let's make x axis binary
D = Dmin:ulpd:Dmax;
j = [];
for i=1:length(D)
j = [j disp_bin(D(i), 1, 3)];
end
yk = [];
yk2 = [];
for i=-2.5:0.5:2.5;
yk = [yk disp_bin(i, 3, 3)];
yk2 = [yk2 i];
end
xtickangle(90)
xticklabels(j)
yticklabels(yk)
% Let's draw allow points on PD plot
% Positive Portions
index = 1;
i = 0:ulpp:rho*beta*Dmaximum;
for j = Dmin:ulpd:Dmax
plot(j*ones(1,length(i)),i,'k');
end
j = Dmin:ulpd:Dmax;
for i = 0:ulpp:rho*beta*Dmaximum
plot(j,i*ones(length(j)),'k');
end
% Negative Portions
index = 1;
i = 0:-ulpp:rho*-beta*Dmaximum;
for j = Dmin:ulpd:Dmax
plot(j*ones(1,length(i)),i,'k');
end
j = Dmin:ulpd:Dmax;
for i = 0:-ulpp:-rho*beta*Dmaximum
plot(j,i*ones(length(j)),'k');
end
% Labels and Printing
xlh = xlabel(['Divisor (d)']);
xlh.Position(2) = xlh.Position(2) - 0.1;
xlh.FontSize = 18;
ylh = ylabel(['P = 4 \cdot w_i']);
ylh.Position(1) = ylh.Position(1)-0.02;
ylh.FontSize = 18;
% Containment Values (placed manually although not bad)
m2 = [5/6 1.0 5/4 11/8 11/8];
m1 = [1/4 1/4 1/2 1/2 1/2];
m0 = [-1/4 -1/4 -1/2 -1/2 -1/2];
m1b = [-5/6 -1 -5/4 -11/8 -11/8];
x2 = Dmin:ulpd:Dmax;
s2 = stairs(x2, m2);
s2.Color = '#8f08d1';
s2.LineWidth = 3.0;
s1 = stairs(x2, m1);
s1.Color = '#8f08d1';
s1.LineWidth = 3.0;
s0 = stairs(x2, m0);
s0.Color = '#8f08d1';
s0.LineWidth = 3.0;
s1b = stairs(x2, m1b);
s1b.Color = '#8f08d1';
s1b.LineWidth = 3.0;
% Place manually Quotient (ugh)
j = Dmin+ulpd/2:ulpd:Dmax;
i = rho*beta*Dmaximum-ulpp*3/4:-ulpp:-rho*beta*Dmaximum;
text(j(1), i(1), '2')
text(j(1), i(2), '2')
text(j(1), i(3), '2')
text(j(1), i(4), '2')
text(j(1), i(5), '2')
text(j(1), i(6), '2')
text(j(1), i(7), '2')
text(j(1), i(8), '2')
text(j(1), i(9), '2')
text(j(1), i(10), '2')
text(j(1), i(11), '2')
text(j(1), i(12), '2')
text(j(1), i(13), '2')
text(j(1), i(14), '2')
error1 = text(j(1), i(15), 'Full Precision', 'FontSize', 16);
text(j(1), i(16), '1')
text(j(1), i(17), '1')
text(j(1), i(18), '1')
text(j(1), i(19), '1')
text(j(1), i(20), '0')
text(j(1), i(21), '0')
text(j(1), i(22), '0')
text(j(1), i(23), '0')
text(j(1), i(24), '-1')
text(j(1), i(25), '-1')
text(j(1), i(26), '-1')
text(j(1), i(27), '-1')
error2 = text(j(1), i(28), 'Full Precision', 'FontSize', 16);
text(j(1), i(29), '-2')
text(j(1), i(30), '-2')
text(j(1), i(31), '-2')
text(j(1), i(32), '-2')
text(j(1), i(33), '-2')
text(j(1), i(34), '-2')
text(j(1), i(35), '-2')
text(j(1), i(36), '-2')
text(j(1), i(37), '-2')
text(j(1), i(38), '-2')
text(j(1), i(39), '-2')
text(j(1), i(40), '-2')
text(j(1), i(41), '-2')
text(j(1), i(42), '-2')
text(j(2), i(1), '2')
text(j(2), i(2), '2')
text(j(2), i(3), '2')
text(j(2), i(4), '2')
text(j(2), i(5), '2')
text(j(2), i(6), '2')
text(j(2), i(7), '2')
text(j(2), i(8), '2')
text(j(2), i(9), '2')
text(j(2), i(10), '2')
text(j(2), i(11), '2')
text(j(2), i(12), '2')
text(j(2), i(13), '2')
text(j(2), i(14), '1')
text(j(2), i(15), '1')
text(j(2), i(16), '1')
text(j(2), i(17), '1')
text(j(2), i(18), '1')
text(j(2), i(19), '1')
text(j(2), i(20), '0')
text(j(2), i(21), '0')
text(j(2), i(22), '0')
text(j(2), i(23), '0')
text(j(2), i(24), '-1')
text(j(2), i(25), '-1')
text(j(2), i(26), '-1')
text(j(2), i(27), '-1')
text(j(2), i(28), '-1')
text(j(2), i(29), '-1')
text(j(2), i(30), '-2')
text(j(2), i(31), '-2')
text(j(2), i(32), '-2')
text(j(2), i(33), '-2')
text(j(2), i(34), '-2')
text(j(2), i(35), '-2')
text(j(2), i(36), '-2')
text(j(2), i(37), '-2')
text(j(2), i(38), '-2')
text(j(2), i(39), '-2')
text(j(2), i(40), '-2')
text(j(2), i(41), '-2')
text(j(2), i(42), '-2')
text(j(3), i(1), '2')
text(j(3), i(2), '2')
text(j(3), i(3), '2')
text(j(3), i(4), '2')
text(j(3), i(5), '2')
text(j(3), i(6), '2')
text(j(3), i(7), '2')
text(j(3), i(8), '2')
text(j(3), i(9), '2')
text(j(3), i(10), '2')
text(j(3), i(11), '2')
text(j(3), i(12), '1')
text(j(3), i(13), '1')
text(j(3), i(14), '1')
text(j(3), i(15), '1')
text(j(3), i(16), '1')
text(j(3), i(17), '1')
text(j(3), i(18), '0')
text(j(3), i(19), '0')
text(j(3), i(20), '0')
text(j(3), i(21), '0')
text(j(3), i(22), '0')
text(j(3), i(23), '0')
text(j(3), i(24), '0')
text(j(3), i(25), '0')
text(j(3), i(26), '-1')
text(j(3), i(27), '-1')
text(j(3), i(28), '-1')
text(j(3), i(29), '-1')
text(j(3), i(30), '-1')
text(j(3), i(31), '-1')
text(j(3), i(32), '-2')
text(j(3), i(33), '-2')
text(j(3), i(34), '-2')
text(j(3), i(35), '-2')
text(j(3), i(36), '-2')
text(j(3), i(37), '-2')
text(j(3), i(38), '-2')
text(j(3), i(39), '-2')
text(j(3), i(40), '-2')
text(j(3), i(41), '-2')
text(j(3), i(42), '-2')
text(j(4), i(1), '2')
text(j(4), i(2), '2')
text(j(4), i(3), '2')
text(j(4), i(4), '2')
text(j(4), i(5), '2')
text(j(4), i(6), '2')
text(j(4), i(7), '2')
text(j(4), i(8), '2')
text(j(4), i(9), '2')
text(j(4), i(10), '2')
text(j(4), i(11), '1')
text(j(4), i(12), '1')
text(j(4), i(13), '1')
text(j(4), i(14), '1')
text(j(4), i(15), '1')
text(j(4), i(16), '1')
text(j(4), i(17), '1')
text(j(4), i(18), '0')
text(j(4), i(19), '0')
text(j(4), i(20), '0')
text(j(4), i(21), '0')
text(j(4), i(22), '0')
text(j(4), i(23), '0')
text(j(4), i(24), '0')
text(j(4), i(25), '0')
text(j(4), i(26), '-1')
text(j(4), i(27), '-1')
text(j(4), i(28), '-1')
text(j(4), i(29), '-1')
text(j(4), i(30), '-1')
text(j(4), i(31), '-1')
text(j(4), i(32), '-1')
text(j(4), i(33), '-2')
text(j(4), i(34), '-2')
text(j(4), i(35), '-2')
text(j(4), i(36), '-2')
text(j(4), i(37), '-2')
text(j(4), i(38), '-2')
text(j(4), i(39), '-2')
text(j(4), i(40), '-2')
text(j(4), i(41), '-2')
text(j(4), i(42), '-2')
print -dpng pd_bad.png

View File

@ -0,0 +1,855 @@
%
% Clear all variables and screen
clear
clf
% Define the number of bits (input Dividend)
n = 4;
%
% Define Divisor Range
% Normalized Floating Point [Dmin,Dmax] = [1,2]
% Normalized Fixed Point [Dmin, Dmax] =[1/2,1]
%
Dminimum = 1.0/2;
Dmaximum = 2.0/2;
% Define an ulp
ulp = 2^(-n);
% radix = beta
beta = 4;
% rho = redundancy factor -> SHOULD ALWAYS BE >= THAN 1/2
%
% SD representations have alpha < beta - 1
%
% alpha = ceil(beta/2) minimially redundant
% alpha = beta -1 maximally redundant (rho = 1)
% alpha = (beta-1)/2 nonredundant
% alpha > beta - 1 over-redundant
%
rho = 2/3;
% Calculation of max digit set
alpha = rho*(beta-1);
% Da contains digit set
q = [];
for i = -alpha:alpha
q = [q; i];
end
% 4r(i-1)/D values
hold on
% figure(1)
grid off
for i = 1:length(q)
x = -rho+q(i):ulp:rho+q(i);
% Plot redundancy (overlap) Positive
z = [rho+q(i),rho+q(i)];
y = [x(length(x))-q(i),0];
% Plot redundancy (overlap) Negative
if (i ~= length(q))
w = [-rho+q(i+1)-q(i+1),0];
u = [-rho+q(i+1),-rho+q(i+1)];
% plot(u,w,'b')
end
% plot(x,x-q(i))
% plot(z,y,'r')
end
% title('Robertson Diagram for Radix-4 SRT Divison')
%
% Plot Atkins P-D plot
% Normalized Floating Point [Dmin,Dmax] = [1,2]
% Normalized Fixed Point [Dmin, Dmax] =[1/2,1]
%
Dmin = Dminimum;
Dmax = Dmaximum;
for i = 1:length(q)
D = Dmin:ulp:Dmax;
P1 = (rho+q(i))*D;
P2 = (-rho+q(i))*D;
hold on
p1 = plot(D,P1,'b');
p2 = plot(D,P2,'r');
axis([Dmin Dmax -beta*rho*Dmaximum beta*rho*Dmaximum])
xticks(D)
p1.LineWidth = 2.0;
p2.LineWidth = 2.0;
end
% Let's make x axis binary
j = [];
for i=1:length(D)
j = [j disp_bin(D(i), 1, 4)];
end
yk = [];
yk2 = [];
for i=-2.5:0.5:2.5;
yk = [yk disp_bin(i, 3, 4)];
yk2 = [yk2 i];
end
xtickangle(90)
xticklabels(j)
yticklabels(yk)
Np = 4;
Nd = 4;
Dmin = Dminimum;
Dmax = Dmaximum;
ulpd = 2^(-Nd);
ulpp = 2^(-Np);
% Let's draw allow points on PD plot
% Positive Portions
index = 1;
i = 0:ulpp:rho*beta*Dmaximum;
for j = Dmin:ulpd:Dmax
plot(j*ones(1,length(i)),i,'k');
end
j = Dmin:ulpd:Dmax;
for i = 0:ulpp:rho*beta*Dmaximum
plot(j,i*ones(length(j)),'k');
end
% Negative Portions
index = 1;
i = 0:-ulpp:rho*-beta*Dmaximum;
for j = Dmin:ulpd:Dmax
plot(j*ones(1,length(i)),i,'k');
end
j = Dmin:ulpd:Dmax;
for i = 0:-ulpp:-rho*beta*Dmaximum
plot(j,i*ones(length(j)),'k');
end
% Labels and Printing
xlh = xlabel(['Divisor (d)']);
xlh.Position(2) = xlh.Position(2) - 0.1;
%xlh.FontSize = 18;
ylh = ylabel(['P = 4 \cdot w_i']);
ylh.Position(1) = ylh.Position(1)-0.02;
%ylh.FontSize = 18;
% Containment Values (placed manually although not bad)
m2 = [3/4 7/8 15/16 1.0 9/8 19/16 5/4 6/4 6/4];
m1 = [1/4 1/4 1/4 1/4 3/8 3/8 1/2 1/2 1/2];
m0 = [-1/4 -3/8 -3/8 -3/8 -1/2 -1/2 -1/2 -1/2 -1/2];
m1b = [-13/16 -15/16 -1 -9/8 -5/4 -5/4 -11/8 -6/4 -6/4];
x2 = Dmin:ulpd:Dmax;
s2 = stairs(x2, m2);
s2.Color = '#8f08d1';
s2.LineWidth = 3.0;
s1 = stairs(x2, m1);
s1.Color = '#8f08d1';
s1.LineWidth = 3.0;
s0 = stairs(x2, m0);
s0.Color = '#8f08d1';
s0.LineWidth = 3.0;
s1b = stairs(x2, m1b);
s1b.Color = '#8f08d1';
s1b.LineWidth = 3.0;
% Place manually Quotient (ugh)
j = Dmin+ulpd/2:ulpd:Dmax;
i = rho*beta*Dmaximum-ulpp:-ulpp:-rho*beta*Dmaximum;
% 1
text(j(1), i(1), '2')
text(j(1), i(2), '2')
text(j(1), i(3), '2')
text(j(1), i(4), '2')
text(j(1), i(5), '2')
text(j(1), i(6), '2')
text(j(1), i(7), '2')
text(j(1), i(8), '2')
text(j(1), i(9), '2')
text(j(1), i(10), '2')
text(j(1), i(11), '2')
text(j(1), i(12), '2')
text(j(1), i(13), '2')
text(j(1), i(14), '2')
text(j(1), i(15), '2')
text(j(1), i(16), '2')
text(j(1), i(17), '2')
text(j(1), i(18), '2')
text(j(1), i(19), '2')
text(j(1), i(20), '2')
text(j(1), i(21), '2')
text(j(1), i(22), '2')
text(j(1), i(23), '2')
text(j(1), i(24), '2')
text(j(1), i(25), '2')
text(j(1), i(26), '2')
text(j(1), i(27), '2')
text(j(1), i(28), '2')
text(j(1), i(29), '2')
text(j(1), i(30), '2')
text(j(1), i(31), '1')
text(j(1), i(32), '1')
text(j(1), i(33), '1')
text(j(1), i(34), '1')
text(j(1), i(35), '1')
text(j(1), i(36), '1')
text(j(1), i(37), '1')
text(j(1), i(38), '1')
text(j(1), i(39), '0')
text(j(1), i(40), '0')
text(j(1), i(41), '0')
text(j(1), i(42), '0')
text(j(1), i(43), '0')
text(j(1), i(44), '0')
text(j(1), i(45), '0')
text(j(1), i(46), '0')
text(j(1), i(47), '-1')
text(j(1), i(48), '-1')
text(j(1), i(49), '-1')
text(j(1), i(50), '-1')
text(j(1), i(51), '-1')
text(j(1), i(52), '-1')
text(j(1), i(53), '-1')
text(j(1), i(54), '-1')
text(j(1), i(55), '-1')
text(j(1), i(56), '-2')
text(j(1), i(57), '-2')
text(j(1), i(58), '-2')
text(j(1), i(59), '-2')
text(j(1), i(60), '-2')
text(j(1), i(61), '-2')
text(j(1), i(62), '-2')
text(j(1), i(63), '-2')
text(j(1), i(64), '-2')
text(j(1), i(65), '-2')
text(j(1), i(66), '-2')
text(j(1), i(67), '-2')
text(j(1), i(68), '-2')
text(j(1), i(69), '-2')
text(j(1), i(70), '-2')
text(j(1), i(71), '-2')
text(j(1), i(72), '-2')
text(j(1), i(73), '-2')
text(j(1), i(74), '-2')
text(j(1), i(75), '-2')
text(j(1), i(76), '-2')
text(j(1), i(77), '-2')
text(j(1), i(78), '-2')
text(j(1), i(79), '-2')
text(j(1), i(80), '-2')
text(j(1), i(81), '-2')
text(j(1), i(82), '-2')
text(j(1), i(83), '-2')
text(j(1), i(84), '-2')
text(j(2), i(1), '2')
text(j(2), i(2), '2')
text(j(2), i(3), '2')
text(j(2), i(4), '2')
text(j(2), i(5), '2')
text(j(2), i(6), '2')
text(j(2), i(7), '2')
text(j(2), i(8), '2')
text(j(2), i(9), '2')
text(j(2), i(10), '2')
text(j(2), i(11), '2')
text(j(2), i(12), '2')
text(j(2), i(13), '2')
text(j(2), i(14), '2')
text(j(2), i(15), '2')
text(j(2), i(16), '2')
text(j(2), i(17), '2')
text(j(2), i(18), '2')
text(j(2), i(19), '2')
text(j(2), i(20), '2')
text(j(2), i(21), '2')
text(j(2), i(22), '2')
text(j(2), i(23), '2')
text(j(2), i(24), '2')
text(j(2), i(25), '2')
text(j(2), i(26), '2')
text(j(2), i(27), '2')
text(j(2), i(28), '2')
text(j(2), i(29), '1')
text(j(2), i(30), '1')
text(j(2), i(31), '1')
text(j(2), i(32), '1')
text(j(2), i(33), '1')
text(j(2), i(34), '1')
text(j(2), i(35), '1')
text(j(2), i(36), '1')
text(j(2), i(37), '1')
text(j(2), i(38), '1')
text(j(2), i(39), '0')
text(j(2), i(40), '0')
text(j(2), i(41), '0')
text(j(2), i(42), '0')
text(j(2), i(43), '0')
text(j(2), i(44), '0')
text(j(2), i(45), '0')
text(j(2), i(46), '0')
text(j(2), i(47), '0')
text(j(2), i(48), '0')
text(j(2), i(49), '-1')
text(j(2), i(50), '-1')
text(j(2), i(51), '-1')
text(j(2), i(52), '-1')
text(j(2), i(53), '-1')
text(j(2), i(54), '-1')
text(j(2), i(55), '-1')
text(j(2), i(56), '-1')
text(j(2), i(57), '-1')
text(j(2), i(58), '-2')
text(j(2), i(59), '-2')
text(j(2), i(60), '-2')
text(j(2), i(61), '-2')
text(j(2), i(62), '-2')
text(j(2), i(63), '-2')
text(j(2), i(64), '-2')
text(j(2), i(65), '-2')
text(j(2), i(66), '-2')
text(j(2), i(67), '-2')
text(j(2), i(68), '-2')
text(j(2), i(69), '-2')
text(j(2), i(70), '-2')
text(j(2), i(71), '-2')
text(j(2), i(72), '-2')
text(j(2), i(73), '-2')
text(j(2), i(74), '-2')
text(j(2), i(75), '-2')
text(j(2), i(76), '-2')
text(j(2), i(77), '-2')
text(j(2), i(78), '-2')
text(j(2), i(79), '-2')
text(j(2), i(80), '-2')
text(j(2), i(81), '-2')
text(j(2), i(82), '-2')
text(j(2), i(83), '-2')
text(j(2), i(84), '-2')
% 3
text(j(3), i(1), '2')
text(j(3), i(2), '2')
text(j(3), i(3), '2')
text(j(3), i(4), '2')
text(j(3), i(5), '2')
text(j(3), i(6), '2')
text(j(3), i(7), '2')
text(j(3), i(8), '2')
text(j(3), i(9), '2')
text(j(3), i(10), '2')
text(j(3), i(11), '2')
text(j(3), i(12), '2')
text(j(3), i(13), '2')
text(j(3), i(14), '2')
text(j(3), i(15), '2')
text(j(3), i(16), '2')
text(j(3), i(17), '2')
text(j(3), i(18), '2')
text(j(3), i(19), '2')
text(j(3), i(20), '2')
text(j(3), i(21), '2')
text(j(3), i(22), '2')
text(j(3), i(23), '2')
text(j(3), i(24), '2')
text(j(3), i(25), '2')
text(j(3), i(26), '2')
text(j(3), i(27), '2')
text(j(3), i(28), '1')
text(j(3), i(29), '1')
text(j(3), i(30), '1')
text(j(3), i(31), '1')
text(j(3), i(32), '1')
text(j(3), i(33), '1')
text(j(3), i(34), '1')
text(j(3), i(35), '1')
text(j(3), i(36), '1')
text(j(3), i(37), '1')
text(j(3), i(38), '1')
text(j(3), i(39), '0')
text(j(3), i(40), '0')
text(j(3), i(41), '0')
text(j(3), i(42), '0')
text(j(3), i(43), '0')
text(j(3), i(44), '0')
text(j(3), i(45), '0')
text(j(3), i(46), '0')
text(j(3), i(47), '0')
text(j(3), i(48), '0')
text(j(3), i(49), '-1')
text(j(3), i(50), '-1')
text(j(3), i(51), '-1')
text(j(3), i(52), '-1')
text(j(3), i(53), '-1')
text(j(3), i(54), '-1')
text(j(3), i(55), '-1')
text(j(3), i(56), '-1')
text(j(3), i(57), '-1')
text(j(3), i(58), '-1')
text(j(3), i(59), '-2')
text(j(3), i(60), '-2')
text(j(3), i(61), '-2')
text(j(3), i(62), '-2')
text(j(3), i(63), '-2')
text(j(3), i(64), '-2')
text(j(3), i(65), '-2')
text(j(3), i(66), '-2')
text(j(3), i(67), '-2')
text(j(3), i(68), '-2')
text(j(3), i(69), '-2')
text(j(3), i(70), '-2')
text(j(3), i(71), '-2')
text(j(3), i(72), '-2')
text(j(3), i(73), '-2')
text(j(3), i(74), '-2')
text(j(3), i(75), '-2')
text(j(3), i(76), '-2')
text(j(3), i(77), '-2')
text(j(3), i(78), '-2')
text(j(3), i(79), '-2')
text(j(3), i(80), '-2')
text(j(3), i(81), '-2')
text(j(3), i(82), '-2')
text(j(3), i(83), '-2')
text(j(3), i(84), '-2')
% 4
text(j(4), i(1), '2')
text(j(4), i(2), '2')
text(j(4), i(3), '2')
text(j(4), i(4), '2')
text(j(4), i(5), '2')
text(j(4), i(6), '2')
text(j(4), i(7), '2')
text(j(4), i(8), '2')
text(j(4), i(9), '2')
text(j(4), i(10), '2')
text(j(4), i(11), '2')
text(j(4), i(12), '2')
text(j(4), i(13), '2')
text(j(4), i(14), '2')
text(j(4), i(15), '2')
text(j(4), i(16), '2')
text(j(4), i(17), '2')
text(j(4), i(18), '2')
text(j(4), i(19), '2')
text(j(4), i(20), '2')
text(j(4), i(21), '2')
text(j(4), i(22), '2')
text(j(4), i(23), '2')
text(j(4), i(24), '2')
text(j(4), i(25), '2')
text(j(4), i(26), '2')
text(j(4), i(27), '1')
text(j(4), i(28), '1')
text(j(4), i(29), '1')
text(j(4), i(30), '1')
text(j(4), i(31), '1')
text(j(4), i(32), '1')
text(j(4), i(33), '1')
text(j(4), i(34), '1')
text(j(4), i(35), '1')
text(j(4), i(36), '1')
text(j(4), i(37), '1')
text(j(4), i(38), '1')
text(j(4), i(39), '0')
text(j(4), i(40), '0')
text(j(4), i(41), '0')
text(j(4), i(42), '0')
text(j(4), i(43), '0')
text(j(4), i(44), '0')
text(j(4), i(45), '0')
text(j(4), i(46), '0')
text(j(4), i(47), '0')
text(j(4), i(48), '0')
text(j(4), i(49), '-1')
text(j(4), i(50), '-1')
text(j(4), i(51), '-1')
text(j(4), i(52), '-1')
text(j(4), i(53), '-1')
text(j(4), i(54), '-1')
text(j(4), i(55), '-1')
text(j(4), i(56), '-1')
text(j(4), i(57), '-1')
text(j(4), i(58), '-1')
text(j(4), i(59), '-1')
text(j(4), i(60), '-1')
text(j(4), i(61), '-2')
text(j(4), i(62), '-2')
text(j(4), i(63), '-2')
text(j(4), i(64), '-2')
text(j(4), i(65), '-2')
text(j(4), i(66), '-2')
text(j(4), i(67), '-2')
text(j(4), i(68), '-2')
text(j(4), i(69), '-2')
text(j(4), i(70), '-2')
text(j(4), i(71), '-2')
text(j(4), i(72), '-2')
text(j(4), i(73), '-2')
text(j(4), i(74), '-2')
text(j(4), i(75), '-2')
text(j(4), i(76), '-2')
text(j(4), i(77), '-2')
text(j(4), i(78), '-2')
text(j(4), i(79), '-2')
text(j(4), i(80), '-2')
text(j(4), i(81), '-2')
text(j(4), i(82), '-2')
text(j(4), i(83), '-2')
text(j(4), i(84), '-2')
% 5
text(j(5), i(1), '2')
text(j(5), i(2), '2')
text(j(5), i(3), '2')
text(j(5), i(4), '2')
text(j(5), i(5), '2')
text(j(5), i(6), '2')
text(j(5), i(7), '2')
text(j(5), i(8), '2')
text(j(5), i(9), '2')
text(j(5), i(10), '2')
text(j(5), i(11), '2')
text(j(5), i(12), '2')
text(j(5), i(13), '2')
text(j(5), i(14), '2')
text(j(5), i(15), '2')
text(j(5), i(16), '2')
text(j(5), i(17), '2')
text(j(5), i(18), '2')
text(j(5), i(19), '2')
text(j(5), i(20), '2')
text(j(5), i(21), '2')
text(j(5), i(22), '2')
text(j(5), i(23), '2')
text(j(5), i(24), '2')
text(j(5), i(25), '1')
text(j(5), i(26), '1')
text(j(5), i(27), '1')
text(j(5), i(28), '1')
text(j(5), i(29), '1')
text(j(5), i(30), '1')
text(j(5), i(31), '1')
text(j(5), i(32), '1')
text(j(5), i(33), '1')
text(j(5), i(34), '1')
text(j(5), i(35), '1')
text(j(5), i(36), '1')
text(j(5), i(37), '0')
text(j(5), i(38), '0')
text(j(5), i(39), '0')
text(j(5), i(40), '0')
text(j(5), i(41), '0')
text(j(5), i(42), '0')
text(j(5), i(43), '0')
text(j(5), i(44), '0')
text(j(5), i(45), '0')
text(j(5), i(46), '0')
text(j(5), i(47), '0')
text(j(5), i(48), '0')
text(j(5), i(49), '0')
text(j(5), i(50), '0')
text(j(5), i(51), '-1')
text(j(5), i(52), '-1')
text(j(5), i(53), '-1')
text(j(5), i(54), '-1')
text(j(5), i(55), '-1')
text(j(5), i(56), '-1')
text(j(5), i(57), '-1')
text(j(5), i(58), '-1')
text(j(5), i(59), '-1')
text(j(5), i(60), '-1')
text(j(5), i(61), '-1')
text(j(5), i(62), '-1')
text(j(5), i(63), '-2')
text(j(5), i(64), '-2')
text(j(5), i(65), '-2')
text(j(5), i(66), '-2')
text(j(5), i(67), '-2')
text(j(5), i(68), '-2')
text(j(5), i(69), '-2')
text(j(5), i(70), '-2')
text(j(5), i(71), '-2')
text(j(5), i(72), '-2')
text(j(5), i(73), '-2')
text(j(5), i(74), '-2')
text(j(5), i(75), '-2')
text(j(5), i(76), '-2')
text(j(5), i(77), '-2')
text(j(5), i(78), '-2')
text(j(5), i(79), '-2')
text(j(5), i(80), '-2')
text(j(5), i(81), '-2')
text(j(5), i(82), '-2')
text(j(5), i(83), '-2')
text(j(5), i(84), '-2')
% 6
text(j(6), i(1), '2')
text(j(6), i(2), '2')
text(j(6), i(3), '2')
text(j(6), i(4), '2')
text(j(6), i(5), '2')
text(j(6), i(6), '2')
text(j(6), i(7), '2')
text(j(6), i(8), '2')
text(j(6), i(9), '2')
text(j(6), i(10), '2')
text(j(6), i(11), '2')
text(j(6), i(12), '2')
text(j(6), i(13), '2')
text(j(6), i(14), '2')
text(j(6), i(15), '2')
text(j(6), i(16), '2')
text(j(6), i(17), '2')
text(j(6), i(18), '2')
text(j(6), i(19), '2')
text(j(6), i(20), '2')
text(j(6), i(21), '2')
text(j(6), i(22), '2')
text(j(6), i(23), '2')
text(j(6), i(24), '1')
text(j(6), i(25), '1')
text(j(6), i(26), '1')
text(j(6), i(27), '1')
text(j(6), i(28), '1')
text(j(6), i(29), '1')
text(j(6), i(30), '1')
text(j(6), i(31), '1')
text(j(6), i(32), '1')
text(j(6), i(33), '1')
text(j(6), i(34), '1')
text(j(6), i(35), '1')
text(j(6), i(36), '1')
text(j(6), i(37), '0')
text(j(6), i(38), '0')
text(j(6), i(39), '0')
text(j(6), i(40), '0')
text(j(6), i(41), '0')
text(j(6), i(42), '0')
text(j(6), i(43), '0')
text(j(6), i(44), '0')
text(j(6), i(45), '0')
text(j(6), i(46), '0')
text(j(6), i(47), '0')
text(j(6), i(48), '0')
text(j(6), i(49), '0')
text(j(6), i(50), '0')
text(j(6), i(51), '-1')
text(j(6), i(52), '-1')
text(j(6), i(53), '-1')
text(j(6), i(54), '-1')
text(j(6), i(55), '-1')
text(j(6), i(56), '-1')
text(j(6), i(57), '-1')
text(j(6), i(58), '-1')
text(j(6), i(59), '-1')
text(j(6), i(60), '-1')
text(j(6), i(61), '-1')
text(j(6), i(62), '-1')
text(j(6), i(63), '-2')
text(j(6), i(64), '-2')
text(j(6), i(65), '-2')
text(j(6), i(66), '-2')
text(j(6), i(67), '-2')
text(j(6), i(68), '-2')
text(j(6), i(69), '-2')
text(j(6), i(70), '-2')
text(j(6), i(71), '-2')
text(j(6), i(72), '-2')
text(j(6), i(73), '-2')
text(j(6), i(74), '-2')
text(j(6), i(75), '-2')
text(j(6), i(76), '-2')
text(j(6), i(77), '-2')
text(j(6), i(78), '-2')
text(j(6), i(79), '-2')
text(j(6), i(80), '-2')
text(j(6), i(81), '-2')
text(j(6), i(82), '-2')
text(j(6), i(83), '-2')
text(j(6), i(84), '-2')
% 7
text(j(7), i(1), '2')
text(j(7), i(2), '2')
text(j(7), i(3), '2')
text(j(7), i(4), '2')
text(j(7), i(5), '2')
text(j(7), i(6), '2')
text(j(7), i(7), '2')
text(j(7), i(8), '2')
text(j(7), i(9), '2')
text(j(7), i(10), '2')
text(j(7), i(11), '2')
text(j(7), i(12), '2')
text(j(7), i(13), '2')
text(j(7), i(14), '2')
text(j(7), i(15), '2')
text(j(7), i(16), '2')
text(j(7), i(17), '2')
text(j(7), i(18), '2')
text(j(7), i(19), '2')
text(j(7), i(20), '2')
text(j(7), i(21), '2')
text(j(7), i(22), '2')
text(j(7), i(23), '1')
text(j(7), i(24), '1')
text(j(7), i(25), '1')
text(j(7), i(26), '1')
text(j(7), i(27), '1')
text(j(7), i(28), '1')
text(j(7), i(29), '1')
text(j(7), i(30), '1')
text(j(7), i(31), '1')
text(j(7), i(32), '1')
text(j(7), i(33), '1')
text(j(7), i(34), '1')
text(j(7), i(35), '0')
text(j(7), i(36), '0')
text(j(7), i(37), '0')
text(j(7), i(38), '0')
text(j(7), i(39), '0')
text(j(7), i(40), '0')
text(j(7), i(41), '0')
text(j(7), i(42), '0')
text(j(7), i(43), '0')
text(j(7), i(44), '0')
text(j(7), i(45), '0')
text(j(7), i(46), '0')
text(j(7), i(47), '0')
text(j(7), i(48), '0')
text(j(7), i(49), '0')
text(j(7), i(50), '0')
text(j(7), i(51), '-1')
text(j(7), i(52), '-1')
text(j(7), i(53), '-1')
text(j(7), i(54), '-1')
text(j(7), i(55), '-1')
text(j(7), i(56), '-1')
text(j(7), i(57), '-1')
text(j(7), i(58), '-1')
text(j(7), i(59), '-1')
text(j(7), i(60), '-1')
text(j(7), i(61), '-1')
text(j(7), i(62), '-1')
text(j(7), i(63), '-1')
text(j(7), i(64), '-1')
text(j(7), i(65), '-2')
text(j(7), i(66), '-2')
text(j(7), i(67), '-2')
text(j(7), i(68), '-2')
text(j(7), i(69), '-2')
text(j(7), i(70), '-2')
text(j(7), i(71), '-2')
text(j(7), i(72), '-2')
text(j(7), i(73), '-2')
text(j(7), i(74), '-2')
text(j(7), i(75), '-2')
text(j(7), i(76), '-2')
text(j(7), i(77), '-2')
text(j(7), i(78), '-2')
text(j(7), i(79), '-2')
text(j(7), i(80), '-2')
text(j(7), i(81), '-2')
text(j(7), i(82), '-2')
text(j(7), i(83), '-2')
text(j(7), i(84), '-2')
% 8
text(j(8), i(1), '2')
text(j(8), i(2), '2')
text(j(8), i(3), '2')
text(j(8), i(4), '2')
text(j(8), i(5), '2')
text(j(8), i(6), '2')
text(j(8), i(7), '2')
text(j(8), i(8), '2')
text(j(8), i(9), '2')
text(j(8), i(10), '2')
text(j(8), i(11), '2')
text(j(8), i(12), '2')
text(j(8), i(13), '2')
text(j(8), i(14), '2')
text(j(8), i(15), '2')
text(j(8), i(16), '2')
text(j(8), i(17), '2')
text(j(8), i(18), '2')
text(j(8), i(19), '1')
text(j(8), i(20), '1')
text(j(8), i(21), '1')
text(j(8), i(22), '1')
text(j(8), i(23), '1')
text(j(8), i(24), '1')
text(j(8), i(25), '1')
text(j(8), i(26), '1')
text(j(8), i(27), '1')
text(j(8), i(28), '1')
text(j(8), i(29), '1')
text(j(8), i(30), '1')
text(j(8), i(31), '1')
text(j(8), i(32), '1')
text(j(8), i(33), '1')
text(j(8), i(34), '1')
text(j(8), i(35), '0')
text(j(8), i(36), '0')
text(j(8), i(37), '0')
text(j(8), i(38), '0')
text(j(8), i(39), '0')
text(j(8), i(40), '0')
text(j(8), i(41), '0')
text(j(8), i(42), '0')
text(j(8), i(43), '0')
text(j(8), i(44), '0')
text(j(8), i(45), '0')
text(j(8), i(46), '0')
text(j(8), i(47), '0')
text(j(8), i(48), '0')
text(j(8), i(49), '0')
text(j(8), i(50), '0')
text(j(8), i(51), '-1')
text(j(8), i(52), '-1')
text(j(8), i(53), '-1')
text(j(8), i(54), '-1')
text(j(8), i(55), '-1')
text(j(8), i(56), '-1')
text(j(8), i(57), '-1')
text(j(8), i(58), '-1')
text(j(8), i(59), '-1')
text(j(8), i(60), '-1')
text(j(8), i(61), '-1')
text(j(8), i(62), '-1')
text(j(8), i(63), '-1')
text(j(8), i(64), '-1')
text(j(8), i(65), '-1')
text(j(8), i(66), '-1')
text(j(8), i(67), '-2')
text(j(8), i(68), '-2')
text(j(8), i(69), '-2')
text(j(8), i(70), '-2')
text(j(8), i(71), '-2')
text(j(8), i(72), '-2')
text(j(8), i(73), '-2')
text(j(8), i(74), '-2')
text(j(8), i(75), '-2')
text(j(8), i(76), '-2')
text(j(8), i(77), '-2')
text(j(8), i(78), '-2')
text(j(8), i(79), '-2')
text(j(8), i(80), '-2')
text(j(8), i(81), '-2')
text(j(8), i(82), '-2')
text(j(8), i(83), '-2')
text(j(8), i(84), '-2')
orient('landscape')
print -dpng 'pd_csa.png'

Binary file not shown.

View File

@ -1,83 +1,45 @@
#include "disp.h"
#include <math.h>
// QSLC is for division by recuerrence for
// r=4 using a CPA - See Table 5.9 EL
int qslc (double prem, double d) {
int q;
// For Debugging
printf("d --> %lg\n", d);
printf("rw --> %lg\n", prem);
if ((d>=0.0)&&(d<1.0)) {
if (prem>=1.0)
q = 2;
else if (prem>=0.25)
q = 1;
else if (prem>=-0.25)
q = 0;
else if (prem >= -1)
q = -1;
else
q = -2;
return q;
}
if ((d>=1.0)&&(d<2.0)) {
if (prem>=2.0)
q = 2;
else if (prem>=0.66667)
q = 1;
else if (prem>=-0.6667)
q = 0;
else if (prem >= -2)
q = -1;
else
q = -2;
return q;
}
if ((d>=2.0)&&(d<3.0)) {
if (prem>=4.0)
q = 2;
else if (prem>=1.25)
q = 1;
else if (prem>=-1.25)
q = 0;
else if (prem >= -4)
q = -1;
else
q = -2;
return q;
}
if ((d>=3.0)&&(d<4.0)) {
if (prem>=5.0)
if ((d>=8.0)&&(d<9.0)) {
if (prem>=6.0)
q = 2;
else if (prem>=2.0)
q = 1;
else if (prem>=-2.0)
q = 0;
else if (prem >= -5)
else if (prem >= -6)
q = -1;
else
q = -2;
return q;
}
if ((d>=4.0)&&(d<5.0)) {
if (prem>=6.66667)
if ((d>=9.0)&&(d<10.0)) {
if (prem>=7)
q = 2;
else if (prem>=2.0)
q = 1;
else if (prem>=-2.0)
q = 0;
else if (prem >= -6.66667)
else if (prem >= 7.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=5.0)&&(d<6.0)) {
if ((d>=10.0)&&(d<11.0)) {
if (prem>=8.0)
q = 2;
else if (prem>=2.0)
@ -91,7 +53,21 @@ int qslc (double prem, double d) {
return q;
}
if ((d>=6.0)&&(d<7.0)) {
if ((d>=11.0)&&(d<12.0)) {
if (prem>=8.0)
q = 2;
else if (prem>=2.0)
q = 1;
else if (prem>=-2.0)
q = 0;
else if (prem >= -8.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=12.0)&&(d<13.0)) {
if (prem>=10.0)
q = 2;
else if (prem>=4.0)
@ -105,21 +81,35 @@ int qslc (double prem, double d) {
return q;
}
if ((d>=7.0)&&(d<8.0)) {
if (prem>=11.0)
if ((d>=13.0)&&(d<14.0)) {
if (prem>=10.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -11.0)
else if (prem >= -10.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=8.0)&&(d<9.0)) {
if ((d>=14.0)&&(d<15.0)) {
if (prem>=10.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -10.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=15.0)&&(d<16.0)) {
if (prem>=12.0)
q = 2;
else if (prem>=4.0)
@ -133,106 +123,9 @@ int qslc (double prem, double d) {
return q;
}
if ((d>=9.0)&&(d<10.0)) {
if (prem>=15.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -15.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=10.0)&&(d<11.0)) {
if (prem>=15.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -15.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=11.0)&&(d<12.0)) {
if (prem>=16.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -16.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=12.0)&&(d<13.0)) {
if (prem>=20.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -20.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=13.0)&&(d<14.0)) {
if (prem>=20.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -20.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=14.0)&&(d<15.0)) {
if (prem>=20.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -20.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=15.0)&&(d<16.0)) {
if (prem>=24.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -24.0)
q = -1;
else
q = -2;
return q;
}
}
/*
This routine performs a radix-4 SRT division
algorithm. The user inputs the numerator, the denominator,
@ -246,6 +139,8 @@ int main(int argc, char* argv[]) {
int q;
int num_iter, i;
int prec;
int radix = 4;
if (argc < 5) {
fprintf(stderr,
"Usage: %s numerator denominator num_iterations prec\n",
@ -267,27 +162,29 @@ int main(int argc, char* argv[]) {
printf("\n");
Q = 0;
P = N*0.25;
P = N * pow(2.0, -log2(radix));
printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n",
N, D, N/D, num_iter);
for (scale = 1, i = 0; i < num_iter; i++) {
// Shift by r
scale = scale*0.25;
q = qslc(flr((4*P)*16,3), D*16);
//q = -q;
scale = scale * pow(2.0, -log2(radix));
// (4*P)*8 because of footnote in Table 5.9, page 296 EL
// i.e., real value = shown value / 8
// D*16 since we use 4 bits of D (1 bit known)
q = qslc(flr((radix * P) * 8, 3), D*16);
printf("4*W[n] = ");
disp_bin(4*P,3,prec,stdout);
disp_bin(radix*P, 3, prec, stdout);
printf("\n");
printf("q*D = ");
disp_bin(q*D,3,prec,stdout);
disp_bin(q*D, 3, prec, stdout);
printf("\n");
printf("W[n+1] = ");
disp_bin(P ,3,prec,stdout);
disp_bin(P ,3, prec, stdout);
printf("\n");
// Recurrence
P = 4*P - q*D;
P = radix * P - q * D;
// OTFC
Q = Q + q*scale;
Q = Q + q * scale;
printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P);
printf("i = %d, q = %d", i, q);
printf(", Q = ");
@ -296,8 +193,9 @@ int main(int argc, char* argv[]) {
disp_bin(P, 3, prec, stdout);
printf("\n\n");
}
// Is shifted partial remainder negative?
if (P < 0) {
Q = Q - scale;
Q = Q - pow(2.0, -prec);
P = P + D;
printf("\nCorrecting Negative Remainder\n");
printf("Q = %1.18lf, W = %1.18lf\n", Q, P);
@ -306,9 +204,12 @@ int main(int argc, char* argv[]) {
printf(", W = ");
disp_bin(P, 3, prec, stdout);
printf("\n");
}
RQ = flr(N/D, (double) prec);
RD = Q*4;
}
// Output Results
RQ = flr(N/D, prec);
// Since q_{computed} = q / radix, multiply by radix
RD = Q * radix;
printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
printf("true = ");
disp_bin(RQ, 3, prec, stdout);

View File

@ -0,0 +1,148 @@
`include "wally-config.vh"
`define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF)
/////////////
// counter //
/////////////
module counter(input logic clk,
input logic req,
output logic done);
logic [5:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
// build a divider which completes in fewer iterations.
// You are not responsible for the (trivial) circuit
// design of the block.
always @(posedge clk)
begin
if (count == `DIVLEN/2+1) done <= #1 1;
else if (done | req) done <= #1 0;
if (req) count <= #1 0;
else count <= #1 count+1;
end
endmodule
///////////
// clock //
///////////
module clock(clk);
output clk;
// Internal clk signal
logic clk;
endmodule
//////////
// testbench //
//////////
module testbenchradix4;
logic clk;
logic req;
logic done;
logic [63:0] a, b;
logic [51:0] afrac, bfrac;
logic [10:0] aExp, bExp;
logic asign, bsign;
logic [51:0] r, rOTFC;
logic [`DIVLEN-1:0] Quot, QuotOTFC;
logic [54:0] rp, rm; // positive quotient digits
// Test parameters
parameter MEM_SIZE = 40000;
parameter MEM_WIDTH = 64+64+64;
`define memr 63:0
`define memb 127:64
`define mema 191:128
// Test logicisters
logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file
logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a
// bit field of an array
logic [63:0] correctr, nextr, diffn, diffp;
logic [10:0] rExp;
logic rsign;
integer testnum, errors;
// Divider
srtradix4 srtradix4(.clk, .Start(req),
.Stall(1'b0), .Flush(1'b0),
.XExp(aExp), .YExp(bExp), .rExp,
.XSign(asign), .YSign(bsign), .rsign,
.XFrac(afrac), .YFrac(bfrac),
.SrcA('0), .SrcB('0), .Fmt(2'b00),
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0),
.Quot, .Rem(), .Flags());
// Counter
counter counter(clk, req, done);
initial
forever
begin
clk = 1; #17;
clk = 0; #17;
end
// Read test vectors from disk
initial
begin
testnum = 0;
errors = 0;
$readmemh ("testvectors", Tests);
Vec = Tests[testnum];
a = Vec[`mema];
{asign, aExp, afrac} = a;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
r = Quot[`DIVLEN-1:`DIVLEN - 52];
req <= 1;
end
// Apply directed test vectors read from file.
always @(posedge clk)
begin
r = Quot[`DIVLEN-1:`DIVLEN - 52];
if (done) begin
req <= 1;
diffp = correctr[51:0] - r;
diffn = r - correctr[51:0];
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin
errors = errors+1;
$display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
$display("failed\n");
$stop;
end
if (afrac === 52'hxxxxxxxxxxxxx)
begin
$display("%d Tests completed successfully", testnum);
$stop;
end
end
if (req)
begin
req <= 0;
correctr = nextr;
testnum = testnum+1;
Vec = Tests[testnum];
$display("a = %h b = %h",a,b);
a = Vec[`mema];
{asign, aExp, afrac} = a;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
end
end
endmodule

View File

@ -7,7 +7,7 @@ module counter(input logic clk,
input logic req,
output logic done);
logic [5:0] count;
logic [7:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
@ -17,7 +17,7 @@ module counter(input logic clk,
always @(posedge clk)
begin
if (count == 54) done <= #1 1;
if (count == `DIVLEN+1) done <= #1 1;
else if (done | req) done <= #1 0;
if (req) count <= #1 0;
else count <= #1 count+1;
@ -110,12 +110,14 @@ module testbench;
always @(posedge clk)
begin
r = Quot[`DIVLEN:`DIVLEN - 52];
rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
if (done)
begin
req <= #5 1;
diffp = correctr[51:0] - r;
diffn = r - correctr[51:0];
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1)) // check if accurate to 1 ulp
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin
errors = errors+1;
$display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);

View File

@ -559,11 +559,11 @@ module testbench;
if ((dut.core.lsu.LSUPAdrM == 'h10000002) | (dut.core.lsu.LSUPAdrM == 'h10000005) | (dut.core.lsu.LSUPAdrM == 'h10000006)) begin \
if(!NO_SPOOFING) begin \
$display("%tns, %d instrs: Overwrite UART's Register in memory stage.", $time, AttemptedInstructionCount); \
force dut.core.ieu.dp.ReadDataM = ExpectedMemReadDataM; \
force dut.core.lsu.ReadDataM = ExpectedMemReadDataM; \
end \
end else \
if(!NO_SPOOFING) \
release dut.core.ieu.dp.ReadDataM; \
release dut.core.lsu.ReadDataM; \
if(textM.substr(0,5) == "rdtime") begin \
//$display("%tns, %d instrs: Overwrite MTIME_CLINT on read of MTIME in memory stage.", $time, InstrCountW-1); \
if(!NO_SPOOFING) \

View File

@ -51,7 +51,6 @@ module testbench;
string tests[];
logic [3:0] dummy;
string ProgramAddrMapFile, ProgramLabelMapFile;
logic [`AHBW-1:0] HRDATAEXT;
logic HREADYEXT, HRESPEXT;
logic [31:0] HADDR;
@ -65,6 +64,9 @@ logic [3:0] dummy;
logic HCLK, HRESETn;
logic [`XLEN-1:0] PCW;
string ProgramAddrMapFile, ProgramLabelMapFile;
integer ProgramAddrLabelArray [string] = '{ "begin_signature" : 0, "tohost" : 0 };
logic DCacheFlushDone, DCacheFlushStart;
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
@ -117,6 +119,7 @@ logic [3:0] dummy;
"wally32i": tests = wally32i;
"wally32e": tests = wally32e;
"wally32priv": tests = wally32priv;
"wally32periph": tests = wally32periph;
"embench": tests = embench;
endcase
end
@ -127,7 +130,7 @@ logic [3:0] dummy;
end
string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile;
integer outputFilePointer, ProgramLabelMap, ProgramAddrMap;
integer outputFilePointer;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout;
@ -193,6 +196,9 @@ logic [3:0] dummy;
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
// declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array
// to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test)
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
$display("Read memfile %s", memfilename);
reset_ext = 1; # 42; reset_ext = 0;
end
@ -217,30 +223,12 @@ logic [3:0] dummy;
end
// Termination condition (i.e. we finished running current test)
if (DCacheFlushDone) begin
// Gets the memory location of begin_signature
adrstr = "0";
ProgramLabelMap = $fopen(ProgramLabelMapFile, "r");
ProgramAddrMap = $fopen(ProgramAddrMapFile, "r");
if (ProgramLabelMap & ProgramAddrMap) begin // check we found both files
while (!$feof(ProgramLabelMap)) begin
string label;
integer returncode;
returncode = $fgets(label, ProgramLabelMap);
returncode = $fgets(adrstr, ProgramAddrMap);
if (label == "begin_signature\n") begin
if (DEBUG) $display("%s begin_signature adrstr: %s", TEST, adrstr);
break;
end
end
end
if (adrstr == "0") begin
integer begin_signature_addr;
begin_signature_addr = ProgramAddrLabelArray["begin_signature"];
if (!begin_signature_addr)
$display("begin_signature addr not found in %s", ProgramLabelMapFile);
end
$fclose(ProgramLabelMap);
$fclose(ProgramAddrMap);
testadr = ($unsigned(adrstr.atohex()))/(`XLEN/8);
testadrNoBase = (adrstr.atohex() - `RAM_BASE)/(`XLEN/8);
testadr = ($unsigned(begin_signature_addr))/(`XLEN/8);
testadrNoBase = (begin_signature_addr - `RAM_BASE)/(`XLEN/8);
#600; // give time for instructions in pipeline to finish
if (TEST == "embench") begin
// Writes contents of begin_signature to .sim.output file
@ -262,8 +250,10 @@ logic [3:0] dummy;
for(i=0; i<SIGNATURESIZE; i=i+1) begin
sig32[i] = 'bx;
end
// riscof tests have a different signature, tests[0] == "1" refers to RISCVARCHTESTs
if (tests[0] == "1") signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
else signame = {pathname, tests[test], ".signature.output"};
// read signature, reformat in 64 bits if necessary
signame = {pathname, tests[test], ".signature.output"};
$readmemh(signame, sig32);
i = 0;
while (i < SIGNATURESIZE) begin
@ -318,7 +308,6 @@ logic [3:0] dummy;
end
end
// move onto the next test, check to see if we're done
// test = test + 2;
test = test + 1;
if (test == tests.size()) begin
if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
@ -336,6 +325,8 @@ logic [3:0] dummy;
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
ProgramAddrLabelArray = '{ "begin_signature" : 0, "tohost" : 0 };
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
$display("Read memfile %s", memfilename);
reset_ext = 1; # 47; reset_ext = 0;
end
@ -363,7 +354,8 @@ logic [3:0] dummy;
(dut.core.ieu.dp.regf.we3 &
dut.core.ieu.dp.regf.a3 == 3 &
dut.core.ieu.dp.regf.wd3 == 1)) |
(dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM;
((dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM ) |
((dut.core.lsu.IEUAdrM == ProgramAddrLabelArray["tohost"]) & InstrMName == "SW" );
DCacheFlushFSM DCacheFlushFSM(.clk(clk),
.reset(reset),
@ -395,7 +387,7 @@ module riscvassertions;
assert (`DIV_BITSPERCYCLE == 1 | `DIV_BITSPERCYCLE==2 | `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4");
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
// assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
@ -523,5 +515,26 @@ module copyShadow
end
end
endmodule
endmodule
task automatic updateProgramAddrLabelArray;
input string ProgramAddrMapFile, ProgramLabelMapFile;
inout integer ProgramAddrLabelArray [string];
// Gets the memory location of begin_signature
integer ProgramLabelMapFP, ProgramAddrMapFP;
ProgramLabelMapFP = $fopen(ProgramLabelMapFile, "r");
ProgramAddrMapFP = $fopen(ProgramAddrMapFile, "r");
if (ProgramLabelMapFP & ProgramAddrMapFP) begin // check we found both files
while (!$feof(ProgramLabelMapFP)) begin
string label, adrstr;
integer returncode;
returncode = $fscanf(ProgramLabelMapFP, "%s\n", label);
returncode = $fscanf(ProgramAddrMapFP, "%s\n", adrstr);
if (ProgramAddrLabelArray.exists(label))
ProgramAddrLabelArray[label] = adrstr.atohex();
end
end
$fclose(ProgramLabelMapFP);
$fclose(ProgramAddrMapFP);
endtask

File diff suppressed because it is too large Load Diff

View File

@ -29,7 +29,7 @@ k = 3 6
ifeq ($(TECH), sky130)
FREQS = 25 50 100 150 200 250 300 350 400
else
else ifeq ($(TECH), sky90)
FREQS = 500 550 600 650 700 750 800 850 900 950 1000
endif

View File

@ -1,101 +1,101 @@
Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (fJ)
priorityencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,1.0685929975270078e-05
priorityencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,2.1773774467348e-05
priorityencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,4.371111111111111e-05
priorityencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,7.393850658857981e-05
priorityencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.0001261366969785861
add,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.00010825587752870422
add,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.00032460910944935416
add,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.0006580226904376014
add,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.0009392239364188873
add,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,0.0021480106100795755
csa,sky90,8,5758,0.16536141368530738,266.560005,154.202,0.00013650573115665162
csa,sky90,16,5931,0.1654056314280897,533.12001,308.404,0.00027263530601922105
csa,sky90,32,5758,0.16536141368530738,1066.240021,616.808,0.0005448072247308093
csa,sky90,64,5931,0.1654056314280897,2132.480042,1230.0,0.0010905412240768842
csa,sky90,128,5931,0.1654056314280897,4264.960083,2470.0,0.002178553363682347
shiftleft,sky90,8,4327,0.23025600254217704,259.700005,196.451,7.534088282874971e-05
shiftleft,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.00019552906110283157
shiftleft,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.0003807431082700759
shiftleft,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,0.001144802541988198
shiftleft,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,0.0029008914525432616
comparator,sky90,8,4839,0.20629126741062204,200.900004,136.6,5.001033271337053e-05
comparator,sky90,16,4018,0.24806303982080635,358.680007,189.253,6.321553011448482e-05
comparator,sky90,32,3602,0.276293542476402,690.900013,315.709,0.00010771793448084398
comparator,sky90,64,3129,0.31954192361776923,1372.980026,508.393,0.0002048577820389901
comparator,sky90,128,2791,0.35824651809387315,2744.980052,796.047,0.0003439627373701182
flop,sky90,8,10,0.1143419999999935,133.279999,64.8145,0.000193835
flop,sky90,16,10,0.1143419999999935,266.5599975,129.629,0.00038715000000000006
flop,sky90,32,10,0.1143419999999935,533.119995,259.258,0.0007723000000000001
flop,sky90,64,10,0.1143419999999935,1066.23999,520.0,0.00154955
flop,sky90,128,10,0.1143419999999935,2132.4799805,1035.0,0.003094
mux2,sky90,8,5299,0.1883518518588413,63.700001,21.541,1.932440083034535e-05
mux2,sky90,16,4850,0.20207356701030926,119.560002,32.354,3.884536082474227e-05
mux2,sky90,32,5003,0.19908807195682593,375.340008,259.372,0.00013671796921846892
mux2,sky90,64,3989,0.24961239583855604,479.220007,148.175,0.00016570569064928555
mux2,sky90,128,4004,0.24974824975024976,1302.420025,767.078,0.0004665334665334665
mux4,sky90,8,4661,0.21448923471358078,164.640002,88.494,4.31452478009011e-05
mux4,sky90,16,4392,0.22421770309653916,359.659999,419.855,0.0001006375227686703
mux4,sky90,32,4118,0.24283532831471588,594.860011,331.197,0.00013161728994657602
mux4,sky90,64,3710,0.26931477897574124,899.640016,344.331,0.00028625336927223723
mux4,sky90,128,3167,0.31575023618566467,2016.840039,722.109,0.0005917271866119355
mux8,sky90,8,3585,0.2789170278940028,287.140006,116.648,6.089260808926081e-05
mux8,sky90,16,3362,0.295237998810232,582.120003,282.366,0.00014455681142177274
mux8,sky90,32,3178,0.3140553102580239,1319.079995,670.683,0.0003577721837633732
mux8,sky90,64,2906,0.3440756228492774,2132.48004,808.482,0.0004428768066070199
mux8,sky90,128,2667,0.3749401308586427,4575.620089,1830.0,0.0009786276715410573
mult,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,0.0014213740458015268
mult,sky90,16,997,1.0029260270812437,7519.540137,4940.0,0.0063761283851554666
mult,sky90,32,763,1.3106129895150722,25200.700446,14900.0,0.024931847968545216
mult,sky90,64,632,1.5822664810126583,86011.661365,42600.0,0.08884651898734176
mult,sky90,128,524,1.9083759465648855,296198.144128,114000.0,0.2733148854961832
priorityencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,1.716929950534546e-06
priorityencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,4.008845810003294e-06
priorityencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,9.323372206025267e-06
priorityencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,1.847290640394089e-05
priorityencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,4.117132867132867e-05
add,tsmc28,8,13838,0.07207477814713109,34.272,187.089,1.3311172134701546e-05
add,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,3.367763214998698e-05
add,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,8.153281695882594e-05
add,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.0001409943943456008
add,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.00034511830296437315
csa,tsmc28,8,24524,0.040663382319360626,52.416,482.462,2.173381177621921e-05
csa,tsmc28,16,24524,0.040663382319360626,104.832,964.99,4.346762355243842e-05
csa,tsmc28,32,24524,0.040663382319360626,209.664,1930.0,8.677214157559942e-05
csa,tsmc28,64,24524,0.040663382319360626,419.327999,3860.0,0.00017342195400424075
csa,tsmc28,128,24524,0.040663382319360626,838.655998,7720.0,0.00034717011906703634
shiftleft,tsmc28,8,15202,0.0656078183133798,50.652,367.074,1.6991185370346006e-05
shiftleft,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,3.388681802778719e-05
shiftleft,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.00010180452696359654
shiftleft,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.0002895309477756286
shiftleft,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.000566566994162039
comparator,tsmc28,8,17422,0.05733769130983814,35.784,170.595,9.488003673516243e-06
comparator,tsmc28,16,13736,0.07273839778683751,54.558,250.167,1.4349155503785673e-05
comparator,tsmc28,32,12139,0.08236710865804432,145.782,622.975,3.567015404893319e-05
comparator,tsmc28,64,11080,0.09024670758122744,294.21,1250.0,6.84115523465704e-05
comparator,tsmc28,128,9371,0.10671119720414043,558.432,2400.0,0.00012794792444776438
flop,tsmc28,8,10,0.048889000000002625,15.12,78.6345,2.7246000000000003e-05
flop,tsmc28,16,10,0.048889000000002625,30.24,157.29,5.4290000000000004e-05
flop,tsmc28,32,10,0.048889000000002625,60.4799995,314.5805,0.00010908
flop,tsmc28,64,10,0.048889000000002625,120.959999,630.0,0.00021765500000000003
flop,tsmc28,128,10,0.048889000000002625,241.919998,1260.0,0.00043579999999999997
mux2,tsmc28,8,29614,0.03374481252110488,16.758,114.564,5.436617815897886e-06
mux2,tsmc28,16,18767,0.053046021580433735,15.75,88.025,5.142004582511856e-06
mux2,tsmc28,32,17903,0.05585556035301346,32.130001,171.146,9.897782494553986e-06
mux2,tsmc28,64,18568,0.05371109651012495,91.35,523.884,2.757432141318397e-05
mux2,tsmc28,128,16637,0.05991099044298852,176.525999,941.106,5.012923002945243e-05
mux4,tsmc28,8,18151,0.055092383284667513,27.971999,133.963,8.032615282904523e-06
mux4,tsmc28,16,16486,0.06057952759917506,39.438,186.231,1.2556108213029237e-05
mux4,tsmc28,32,15196,0.06580579126085812,69.174,324.969,2.3229797315082915e-05
mux4,tsmc28,64,13926,0.07180612868016659,137.465999,648.086,4.5741777969266124e-05
mux4,tsmc28,128,13090,0.07636619404125286,294.335997,1420.0,9.358288770053477e-05
mux8,tsmc28,8,12902,0.07750336319950395,44.604,214.286,1.17501162610448e-05
mux8,tsmc28,16,12264,0.08147446510110894,128.771998,548.714,2.666340508806262e-05
mux8,tsmc28,32,11713,0.08517122410996329,172.115999,823.633,4.695637326047981e-05
mux8,tsmc28,64,11014,0.09067453550027238,304.163999,1460.0,8.498274922825495e-05
mux8,tsmc28,128,10474,0.09542350830628223,683.045996,2820.0,0.00015705556616383426
mult,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.00037769230769230767
mult,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,0.0014553548049227546
mult,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,0.0063033300362677225
mult,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,0.01854602510460251
mult,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,0.05001177730192719
Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (nJ)
priorityencoder,sky90,8,7683,0.12508649056358195,50.960001,24.761,0.010685929975270078
priorityencoder,sky90,16,5773,0.16977016282695304,136.220003,77.243,0.021773774467348
priorityencoder,sky90,32,4500,0.2218912222222222,372.400007,189.626,0.04371111111111111
priorityencoder,sky90,64,4098,0.2439914738897023,797.720015,382.205,0.07393850658857981
priorityencoder,sky90,128,3409,0.2933331557641537,1602.300031,610.009,0.1261366969785861
add,sky90,8,3658,0.27337042810278844,253.820005,154.438,0.10825587752870422
add,sky90,16,2942,0.3393218266485384,722.260013,485.109,0.32460910944935417
add,sky90,32,2468,0.40496338573743923,1440.600027,714.057,0.6580226904376014
add,sky90,64,2139,0.4674681813931744,2781.240054,1050.0,0.9392239364188874
add,sky90,128,1885,0.5304949787798409,6186.740118,2230.0,2.1480106100795755
csa,sky90,8,5758,0.16536141368530738,266.560005,154.202,0.13650573115665163
csa,sky90,16,5931,0.1654056314280897,533.12001,308.404,0.27263530601922104
csa,sky90,32,5758,0.16536141368530738,1066.240021,616.808,0.5448072247308093
csa,sky90,64,5931,0.1654056314280897,2132.480042,1230.0,1.0905412240768841
csa,sky90,128,5931,0.1654056314280897,4264.960083,2470.0,2.178553363682347
shiftleft,sky90,8,4327,0.23025600254217704,259.700005,196.451,0.07534088282874972
shiftleft,sky90,16,3355,0.29803959314456036,666.400006,558.433,0.19552906110283155
shiftleft,sky90,32,2503,0.39951757530962845,1475.880027,768.262,0.3807431082700759
shiftleft,sky90,64,2203,0.45385946391284615,3914.120062,2680.0,1.144802541988198
shiftleft,sky90,128,1907,0.5242938489774515,9192.400136,6080.0,2.9008914525432616
comparator,sky90,8,4839,0.20629126741062204,200.900004,136.6,0.05001033271337053
comparator,sky90,16,4018,0.24806303982080635,358.680007,189.253,0.06321553011448482
comparator,sky90,32,3602,0.276293542476402,690.900013,315.709,0.10771793448084398
comparator,sky90,64,3129,0.31954192361776923,1372.980026,508.393,0.2048577820389901
comparator,sky90,128,2791,0.35824651809387315,2744.980052,796.047,0.34396273737011823
flop,sky90,8,10,0.1143419999999935,133.279999,64.8145,0.193835
flop,sky90,16,10,0.1143419999999935,266.5599975,129.629,0.38715000000000005
flop,sky90,32,10,0.1143419999999935,533.119995,259.258,0.7723000000000001
flop,sky90,64,10,0.1143419999999935,1066.23999,520.0,1.54955
flop,sky90,128,10,0.1143419999999935,2132.4799805,1035.0,3.094
mux2,sky90,8,5299,0.1883518518588413,63.700001,21.541,0.01932440083034535
mux2,sky90,16,4850,0.20207356701030926,119.560002,32.354,0.03884536082474227
mux2,sky90,32,5003,0.19908807195682593,375.340008,259.372,0.13671796921846893
mux2,sky90,64,4073,0.24517727326295113,479.220009,115.22,0.15148539160324087
mux2,sky90,128,4004,0.24974824975024976,1302.420025,767.078,0.4665334665334665
mux4,sky90,8,4726,0.21123742953872196,148.960002,66.984,0.04026661024121879
mux4,sky90,16,4455,0.2231388911335578,392.0,398.313,0.1037037037037037
mux4,sky90,32,4118,0.24283532831471588,594.860011,331.197,0.131617289946576
mux4,sky90,64,3710,0.26931477897574124,899.640016,344.331,0.2862533692722372
mux4,sky90,128,3249,0.3077210113881194,2013.900038,818.249,0.6094182825484764
mux8,sky90,8,3585,0.2789170278940028,287.140006,116.648,0.06089260808926081
mux8,sky90,16,3362,0.295237998810232,582.120003,282.366,0.14455681142177274
mux8,sky90,32,3178,0.3140553102580239,1319.079995,670.683,0.35777218376337316
mux8,sky90,64,2906,0.3440756228492774,2132.48004,808.482,0.44287680660701995
mux8,sky90,128,2667,0.3749401308586427,4575.620089,1830.0,0.9786276715410572
mult,sky90,8,1310,0.7631557786259543,2194.220041,1440.0,1.421374045801527
mult,sky90,16,997,1.0029260270812437,7519.540137,4940.0,6.376128385155466
mult,sky90,32,763,1.3106129895150722,25200.700446,14900.0,24.931847968545217
mult,sky90,64,632,1.5822664810126583,86011.661365,42600.0,88.84651898734177
mult,sky90,128,524,1.9083759465648855,296198.144128,114000.0,273.3148854961832
priorityencoder,tsmc28,8,31335,0.031912196106590074,8.316,34.836,0.001716929950534546
priorityencoder,tsmc28,16,21253,0.04703118086858326,21.672,78.026,0.004008845810003294
priorityencoder,tsmc28,32,16464,0.06071258114674442,61.614,207.499,0.009323372206025266
priorityencoder,tsmc28,64,13804,0.07239877021153289,137.466,425.592,0.01847290640394089
priorityencoder,tsmc28,128,11440,0.0874065874125874,317.646,973.649,0.041171328671328666
add,tsmc28,8,13838,0.07207477814713109,34.272,187.089,0.013311172134701546
add,tsmc28,16,11521,0.08678002100512108,90.972001,475.207,0.03367763214998698
add,tsmc28,32,9812,0.1018860211985324,209.286002,1060.0,0.08153281695882594
add,tsmc28,64,8206,0.12185605215695831,388.836003,1770.0,0.1409943943456008
add,tsmc28,128,7354,0.13597341881968997,907.452008,4360.0,0.3451183029643731
csa,tsmc28,8,24524,0.040663382319360626,52.416,482.462,0.02173381177621921
csa,tsmc28,16,24524,0.040663382319360626,104.832,964.99,0.04346762355243842
csa,tsmc28,32,24524,0.040663382319360626,209.664,1930.0,0.08677214157559941
csa,tsmc28,64,24524,0.040663382319360626,419.327999,3860.0,0.17342195400424076
csa,tsmc28,128,24524,0.040663382319360626,838.655998,7720.0,0.3471701190670363
shiftleft,tsmc28,8,15202,0.0656078183133798,50.652,367.074,0.016991185370346006
shiftleft,tsmc28,16,11804,0.08465604506946797,127.511999,602.29,0.03388681802778719
shiftleft,tsmc28,32,9587,0.10430391697089808,384.803997,1940.0,0.10180452696359654
shiftleft,tsmc28,64,8272,0.12086674854932303,1041.263998,5460.0,0.2895309477756286
shiftleft,tsmc28,128,7023,0.14238329232521713,1836.953994,8670.0,0.566566994162039
comparator,tsmc28,8,17422,0.05733769130983814,35.784,170.595,0.009488003673516243
comparator,tsmc28,16,13736,0.07273839778683751,54.558,250.167,0.014349155503785673
comparator,tsmc28,32,12139,0.08236710865804432,145.782,622.975,0.03567015404893319
comparator,tsmc28,64,11080,0.09024670758122744,294.21,1250.0,0.0684115523465704
comparator,tsmc28,128,9371,0.10671119720414043,558.432,2400.0,0.12794792444776437
flop,tsmc28,8,10,0.048889000000002625,15.12,78.6345,0.027246000000000003
flop,tsmc28,16,10,0.048889000000002625,30.24,157.29,0.054290000000000005
flop,tsmc28,32,10,0.048889000000002625,60.4799995,314.5805,0.10908000000000001
flop,tsmc28,64,10,0.048889000000002625,120.959999,630.0,0.21765500000000004
flop,tsmc28,128,10,0.048889000000002625,241.919998,1260.0,0.43579999999999997
mux2,tsmc28,8,29614,0.03374481252110488,16.758,114.564,0.005436617815897886
mux2,tsmc28,16,18767,0.053046021580433735,15.75,88.025,0.005142004582511856
mux2,tsmc28,32,17903,0.05585556035301346,32.130001,171.146,0.009897782494553985
mux2,tsmc28,64,18568,0.05371109651012495,91.35,523.884,0.027574321413183972
mux2,tsmc28,128,16637,0.05991099044298852,176.525999,941.106,0.05012923002945243
mux4,tsmc28,8,18151,0.055092383284667513,27.971999,133.963,0.008032615282904523
mux4,tsmc28,16,16486,0.06057952759917506,39.438,186.231,0.012556108213029236
mux4,tsmc28,32,15196,0.06580579126085812,69.174,324.969,0.023229797315082915
mux4,tsmc28,64,13926,0.07180612868016659,137.465999,648.086,0.04574177796926612
mux4,tsmc28,128,13090,0.07636619404125286,294.335997,1420.0,0.09358288770053477
mux8,tsmc28,8,12902,0.07750336319950395,44.604,214.286,0.0117501162610448
mux8,tsmc28,16,12264,0.08147446510110894,128.771998,548.714,0.02666340508806262
mux8,tsmc28,32,11713,0.08517122410996329,172.115999,823.633,0.046956373260479814
mux8,tsmc28,64,11014,0.09067453550027238,304.163999,1460.0,0.08498274922825495
mux8,tsmc28,128,10474,0.09542350830628223,683.045996,2820.0,0.15705556616383426
mult,tsmc28,8,5200,0.1922996923076923,577.206,4340.0,0.37769230769230766
mult,tsmc28,16,3819,0.26184265147944485,1634.472002,11800.0,1.4553548049227547
mult,tsmc28,32,3033,0.3295775611605671,6343.721998,47200.0,6.303330036267723
mult,tsmc28,64,2390,0.4184090418410042,16045.092071,109000.0,18.54602510460251
mult,tsmc28,128,1868,0.5353279057815846,44272.49428,262000.0,50.01177730192719

1 Module Tech Width Target Freq Delay Area L Power (nW) D energy (fJ) D energy (nJ)
2 priorityencoder sky90 8 7683 0.12508649056358195 50.960001 24.761 1.0685929975270078e-05 0.010685929975270078
3 priorityencoder sky90 16 5773 0.16977016282695304 136.220003 77.243 2.1773774467348e-05 0.021773774467348
4 priorityencoder sky90 32 4500 0.2218912222222222 372.400007 189.626 4.371111111111111e-05 0.04371111111111111
5 priorityencoder sky90 64 4098 0.2439914738897023 797.720015 382.205 7.393850658857981e-05 0.07393850658857981
6 priorityencoder sky90 128 3409 0.2933331557641537 1602.300031 610.009 0.0001261366969785861 0.1261366969785861
7 add sky90 8 3658 0.27337042810278844 253.820005 154.438 0.00010825587752870422 0.10825587752870422
8 add sky90 16 2942 0.3393218266485384 722.260013 485.109 0.00032460910944935416 0.32460910944935417
9 add sky90 32 2468 0.40496338573743923 1440.600027 714.057 0.0006580226904376014 0.6580226904376014
10 add sky90 64 2139 0.4674681813931744 2781.240054 1050.0 0.0009392239364188873 0.9392239364188874
11 add sky90 128 1885 0.5304949787798409 6186.740118 2230.0 0.0021480106100795755 2.1480106100795755
12 csa sky90 8 5758 0.16536141368530738 266.560005 154.202 0.00013650573115665162 0.13650573115665163
13 csa sky90 16 5931 0.1654056314280897 533.12001 308.404 0.00027263530601922105 0.27263530601922104
14 csa sky90 32 5758 0.16536141368530738 1066.240021 616.808 0.0005448072247308093 0.5448072247308093
15 csa sky90 64 5931 0.1654056314280897 2132.480042 1230.0 0.0010905412240768842 1.0905412240768841
16 csa sky90 128 5931 0.1654056314280897 4264.960083 2470.0 0.002178553363682347 2.178553363682347
17 shiftleft sky90 8 4327 0.23025600254217704 259.700005 196.451 7.534088282874971e-05 0.07534088282874972
18 shiftleft sky90 16 3355 0.29803959314456036 666.400006 558.433 0.00019552906110283157 0.19552906110283155
19 shiftleft sky90 32 2503 0.39951757530962845 1475.880027 768.262 0.0003807431082700759 0.3807431082700759
20 shiftleft sky90 64 2203 0.45385946391284615 3914.120062 2680.0 0.001144802541988198 1.144802541988198
21 shiftleft sky90 128 1907 0.5242938489774515 9192.400136 6080.0 0.0029008914525432616 2.9008914525432616
22 comparator sky90 8 4839 0.20629126741062204 200.900004 136.6 5.001033271337053e-05 0.05001033271337053
23 comparator sky90 16 4018 0.24806303982080635 358.680007 189.253 6.321553011448482e-05 0.06321553011448482
24 comparator sky90 32 3602 0.276293542476402 690.900013 315.709 0.00010771793448084398 0.10771793448084398
25 comparator sky90 64 3129 0.31954192361776923 1372.980026 508.393 0.0002048577820389901 0.2048577820389901
26 comparator sky90 128 2791 0.35824651809387315 2744.980052 796.047 0.0003439627373701182 0.34396273737011823
27 flop sky90 8 10 0.1143419999999935 133.279999 64.8145 0.000193835 0.193835
28 flop sky90 16 10 0.1143419999999935 266.5599975 129.629 0.00038715000000000006 0.38715000000000005
29 flop sky90 32 10 0.1143419999999935 533.119995 259.258 0.0007723000000000001 0.7723000000000001
30 flop sky90 64 10 0.1143419999999935 1066.23999 520.0 0.00154955 1.54955
31 flop sky90 128 10 0.1143419999999935 2132.4799805 1035.0 0.003094 3.094
32 mux2 sky90 8 5299 0.1883518518588413 63.700001 21.541 1.932440083034535e-05 0.01932440083034535
33 mux2 sky90 16 4850 0.20207356701030926 119.560002 32.354 3.884536082474227e-05 0.03884536082474227
34 mux2 sky90 32 5003 0.19908807195682593 375.340008 259.372 0.00013671796921846892 0.13671796921846893
35 mux2 sky90 64 3989 4073 0.24961239583855604 0.24517727326295113 479.220007 479.220009 148.175 115.22 0.00016570569064928555 0.15148539160324087
36 mux2 sky90 128 4004 0.24974824975024976 1302.420025 767.078 0.0004665334665334665 0.4665334665334665
37 mux4 sky90 8 4661 4726 0.21448923471358078 0.21123742953872196 164.640002 148.960002 88.494 66.984 4.31452478009011e-05 0.04026661024121879
38 mux4 sky90 16 4392 4455 0.22421770309653916 0.2231388911335578 359.659999 392.0 419.855 398.313 0.0001006375227686703 0.1037037037037037
39 mux4 sky90 32 4118 0.24283532831471588 594.860011 331.197 0.00013161728994657602 0.131617289946576
40 mux4 sky90 64 3710 0.26931477897574124 899.640016 344.331 0.00028625336927223723 0.2862533692722372
41 mux4 sky90 128 3167 3249 0.31575023618566467 0.3077210113881194 2016.840039 2013.900038 722.109 818.249 0.0005917271866119355 0.6094182825484764
42 mux8 sky90 8 3585 0.2789170278940028 287.140006 116.648 6.089260808926081e-05 0.06089260808926081
43 mux8 sky90 16 3362 0.295237998810232 582.120003 282.366 0.00014455681142177274 0.14455681142177274
44 mux8 sky90 32 3178 0.3140553102580239 1319.079995 670.683 0.0003577721837633732 0.35777218376337316
45 mux8 sky90 64 2906 0.3440756228492774 2132.48004 808.482 0.0004428768066070199 0.44287680660701995
46 mux8 sky90 128 2667 0.3749401308586427 4575.620089 1830.0 0.0009786276715410573 0.9786276715410572
47 mult sky90 8 1310 0.7631557786259543 2194.220041 1440.0 0.0014213740458015268 1.421374045801527
48 mult sky90 16 997 1.0029260270812437 7519.540137 4940.0 0.0063761283851554666 6.376128385155466
49 mult sky90 32 763 1.3106129895150722 25200.700446 14900.0 0.024931847968545216 24.931847968545217
50 mult sky90 64 632 1.5822664810126583 86011.661365 42600.0 0.08884651898734176 88.84651898734177
51 mult sky90 128 524 1.9083759465648855 296198.144128 114000.0 0.2733148854961832 273.3148854961832
52 priorityencoder tsmc28 8 31335 0.031912196106590074 8.316 34.836 1.716929950534546e-06 0.001716929950534546
53 priorityencoder tsmc28 16 21253 0.04703118086858326 21.672 78.026 4.008845810003294e-06 0.004008845810003294
54 priorityencoder tsmc28 32 16464 0.06071258114674442 61.614 207.499 9.323372206025267e-06 0.009323372206025266
55 priorityencoder tsmc28 64 13804 0.07239877021153289 137.466 425.592 1.847290640394089e-05 0.01847290640394089
56 priorityencoder tsmc28 128 11440 0.0874065874125874 317.646 973.649 4.117132867132867e-05 0.041171328671328666
57 add tsmc28 8 13838 0.07207477814713109 34.272 187.089 1.3311172134701546e-05 0.013311172134701546
58 add tsmc28 16 11521 0.08678002100512108 90.972001 475.207 3.367763214998698e-05 0.03367763214998698
59 add tsmc28 32 9812 0.1018860211985324 209.286002 1060.0 8.153281695882594e-05 0.08153281695882594
60 add tsmc28 64 8206 0.12185605215695831 388.836003 1770.0 0.0001409943943456008 0.1409943943456008
61 add tsmc28 128 7354 0.13597341881968997 907.452008 4360.0 0.00034511830296437315 0.3451183029643731
62 csa tsmc28 8 24524 0.040663382319360626 52.416 482.462 2.173381177621921e-05 0.02173381177621921
63 csa tsmc28 16 24524 0.040663382319360626 104.832 964.99 4.346762355243842e-05 0.04346762355243842
64 csa tsmc28 32 24524 0.040663382319360626 209.664 1930.0 8.677214157559942e-05 0.08677214157559941
65 csa tsmc28 64 24524 0.040663382319360626 419.327999 3860.0 0.00017342195400424075 0.17342195400424076
66 csa tsmc28 128 24524 0.040663382319360626 838.655998 7720.0 0.00034717011906703634 0.3471701190670363
67 shiftleft tsmc28 8 15202 0.0656078183133798 50.652 367.074 1.6991185370346006e-05 0.016991185370346006
68 shiftleft tsmc28 16 11804 0.08465604506946797 127.511999 602.29 3.388681802778719e-05 0.03388681802778719
69 shiftleft tsmc28 32 9587 0.10430391697089808 384.803997 1940.0 0.00010180452696359654 0.10180452696359654
70 shiftleft tsmc28 64 8272 0.12086674854932303 1041.263998 5460.0 0.0002895309477756286 0.2895309477756286
71 shiftleft tsmc28 128 7023 0.14238329232521713 1836.953994 8670.0 0.000566566994162039 0.566566994162039
72 comparator tsmc28 8 17422 0.05733769130983814 35.784 170.595 9.488003673516243e-06 0.009488003673516243
73 comparator tsmc28 16 13736 0.07273839778683751 54.558 250.167 1.4349155503785673e-05 0.014349155503785673
74 comparator tsmc28 32 12139 0.08236710865804432 145.782 622.975 3.567015404893319e-05 0.03567015404893319
75 comparator tsmc28 64 11080 0.09024670758122744 294.21 1250.0 6.84115523465704e-05 0.0684115523465704
76 comparator tsmc28 128 9371 0.10671119720414043 558.432 2400.0 0.00012794792444776438 0.12794792444776437
77 flop tsmc28 8 10 0.048889000000002625 15.12 78.6345 2.7246000000000003e-05 0.027246000000000003
78 flop tsmc28 16 10 0.048889000000002625 30.24 157.29 5.4290000000000004e-05 0.054290000000000005
79 flop tsmc28 32 10 0.048889000000002625 60.4799995 314.5805 0.00010908 0.10908000000000001
80 flop tsmc28 64 10 0.048889000000002625 120.959999 630.0 0.00021765500000000003 0.21765500000000004
81 flop tsmc28 128 10 0.048889000000002625 241.919998 1260.0 0.00043579999999999997 0.43579999999999997
82 mux2 tsmc28 8 29614 0.03374481252110488 16.758 114.564 5.436617815897886e-06 0.005436617815897886
83 mux2 tsmc28 16 18767 0.053046021580433735 15.75 88.025 5.142004582511856e-06 0.005142004582511856
84 mux2 tsmc28 32 17903 0.05585556035301346 32.130001 171.146 9.897782494553986e-06 0.009897782494553985
85 mux2 tsmc28 64 18568 0.05371109651012495 91.35 523.884 2.757432141318397e-05 0.027574321413183972
86 mux2 tsmc28 128 16637 0.05991099044298852 176.525999 941.106 5.012923002945243e-05 0.05012923002945243
87 mux4 tsmc28 8 18151 0.055092383284667513 27.971999 133.963 8.032615282904523e-06 0.008032615282904523
88 mux4 tsmc28 16 16486 0.06057952759917506 39.438 186.231 1.2556108213029237e-05 0.012556108213029236
89 mux4 tsmc28 32 15196 0.06580579126085812 69.174 324.969 2.3229797315082915e-05 0.023229797315082915
90 mux4 tsmc28 64 13926 0.07180612868016659 137.465999 648.086 4.5741777969266124e-05 0.04574177796926612
91 mux4 tsmc28 128 13090 0.07636619404125286 294.335997 1420.0 9.358288770053477e-05 0.09358288770053477
92 mux8 tsmc28 8 12902 0.07750336319950395 44.604 214.286 1.17501162610448e-05 0.0117501162610448
93 mux8 tsmc28 16 12264 0.08147446510110894 128.771998 548.714 2.666340508806262e-05 0.02666340508806262
94 mux8 tsmc28 32 11713 0.08517122410996329 172.115999 823.633 4.695637326047981e-05 0.046956373260479814
95 mux8 tsmc28 64 11014 0.09067453550027238 304.163999 1460.0 8.498274922825495e-05 0.08498274922825495
96 mux8 tsmc28 128 10474 0.09542350830628223 683.045996 2820.0 0.00015705556616383426 0.15705556616383426
97 mult tsmc28 8 5200 0.1922996923076923 577.206 4340.0 0.00037769230769230767 0.37769230769230766
98 mult tsmc28 16 3819 0.26184265147944485 1634.472002 11800.0 0.0014553548049227546 1.4553548049227547
99 mult tsmc28 32 3033 0.3295775611605671 6343.721998 47200.0 0.0063033300362677225 6.303330036267723
100 mult tsmc28 64 2390 0.4184090418410042 16045.092071 109000.0 0.01854602510460251 18.54602510460251
101 mult tsmc28 128 1868 0.5353279057815846 44272.49428 262000.0 0.05001177730192719 50.01177730192719

View File

@ -1,28 +0,0 @@
#!/bin/bash
# David_Harris@hmc.edu and Madeleine Masser-Frye 11 May 2022
# Run PPA experiments on different modules
rm -rf runs/ppa*
make synth DESIGN=ppa_add_16 TECH=sky90 DRIVE=INV FREQ=10 MAXOPT=1 &
make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=1 MAXOPT=1 &
make synth DESIGN=ppa_add_64 TECH=sky90 DRIVE=INV FREQ=10 MAXOPT=1 &
make synth DESIGN=ppa_add_16 TECH=sky90 DRIVE=INV FREQ=4000 MAXOPT=1 &
make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=4000 MAXOPT=1 &
make synth DESIGN=ppa_add_64 TECH=sky90 DRIVE=INV FREQ=4000 MAXOPT=1 &
make synth DESIGN=ppa_add_16 TECH=sky90 DRIVE=INV FREQ=5000 MAXOPT=1 &
make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=5000 MAXOPT=1 &
make synth DESIGN=ppa_add_64 TECH=sky90 DRIVE=INV FREQ=5000 MAXOPT=1 &
make synth DESIGN=ppa_add_16 TECH=sky90 DRIVE=INV FREQ=6000 MAXOPT=1 &
make synth DESIGN=ppa_add_32 TECH=sky90 DRIVE=INV FREQ=6000 MAXOPT=1 &
make synth DESIGN=ppa_add_64 TECH=sky90 DRIVE=INV FREQ=6000 MAXOPT=1 &
make synth DESIGN=ppa_comparator_16 TECH=sky90 DRIVE=INV FREQ=10 MAXOPT=1 &
make synth DESIGN=ppa_comparator_32 TECH=sky90 DRIVE=INV FREQ=10 MAXOPT=1 &
make synth DESIGN=ppa_comparator_64 TECH=sky90 DRIVE=INV FREQ=10 MAXOPT=1 &
make synth DESIGN=ppa_comparator_16 TECH=sky90 DRIVE=INV FREQ=8000 MAXOPT=1 &
make synth DESIGN=ppa_comparator_32 TECH=sky90 DRIVE=INV FREQ=8000 MAXOPT=1 &
make synth DESIGN=ppa_comparator_64 TECH=sky90 DRIVE=INV FREQ=8000 MAXOPT=1 &
make synth DESIGN=ppa_comparator_16 TECH=sky90 DRIVE=INV FREQ=10000 MAXOPT=1 &
make synth DESIGN=ppa_comparator_32 TECH=sky90 DRIVE=INV FREQ=10000 MAXOPT=1 &
make synth DESIGN=ppa_comparator_64 TECH=sky90 DRIVE=INV FREQ=10000 MAXOPT=1 &
wait
grep "Critical Path Length" runs/ppa_*/reports/*qor*
grep "Design Area" runs/ppa_*/reports/*qor*

View File

@ -41,7 +41,7 @@ def synthsintocsv():
file = open("ppaData.csv", "w")
writer = csv.writer(file)
writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (fJ)'])
writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (nJ)'])
for oneSynth in allSynths:
module, width, risc, tech, freq = specReg.findall(oneSynth)[2:7]
@ -60,7 +60,7 @@ def synthsintocsv():
delay = 1000/int(freq) - metrics[0]
area = metrics[1]
lpower = metrics[4]
denergy = (metrics[2] + metrics[3])/int(freq) # (switching + internal powers)*delay, more practical units for regression coefs
denergy = (metrics[2] + metrics[3])/int(freq)*1000 # (switching + internal powers)*delay, more practical units for regression coefs
if ('flop' in module): # since two flops in each module
[area, lpower, denergy] = [n/2 for n in [area, lpower, denergy]]
@ -85,7 +85,7 @@ def cleanup():
output = subprocess.check_output(['bash','-c', bashCommand])
allSynths = output.decode("utf-8").split('\n')[:-1]
for oneSynth in allSynths:
for phrase in [['Path Length', 'qor']]: #, ['Design Area', 'qor'], ['100', 'power']]:
for phrase in [['Path Length', 'qor']]:
bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*'
bashCommand = bashCommand.format(*phrase)
try: output = subprocess.check_output(['bash','-c', bashCommand])
@ -131,54 +131,18 @@ def csvOfBest(filename):
for w in widths:
m = np.Inf # large number to start
best = None
if ([mod, tech, w] in leftblue):
for oneSynth in allSynths: # leftmost blue
if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == mod):
if (oneSynth.freq < m) & (1000/oneSynth.delay < oneSynth.freq):
# if ([mod, tech, w] != ['mux2', 'sky90', 128]) or (oneSynth.area < 1100):
m = oneSynth.freq
best = oneSynth
else:
for oneSynth in allSynths: # best achievable, rightmost green
if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == mod):
if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq):
m = oneSynth.delay
best = oneSynth
# contenders = []
# delays = []
# for oneSynth in allSynths: # choose synth w minimal delay
# if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == mod):
# contenders += [oneSynth]
# delays += [oneSynth.delay]
# if oneSynth.delay < m:
# m = oneSynth.delay
# best = oneSynth
# for oneSynth in contenders: # if m is min delay, choose best area within s as percent of m
# s = oneSynth.delay/m - 1
# if s < 0.1:
# if oneSynth.area < best.area:
# best = oneSynth
# bestval = 1.9 # score algorithm
# for oneSynth in contenders:
# delaydif = abs(1 - oneSynth.delay/best.delay)
# areadif = 1 - oneSynth.area/best.area
# try: val = areadif/delaydif
# except: val = 1
# # if (oneSynth.width == 64) & (oneSynth.tech == 'sky90') & (oneSynth.module == 'comparator'):
# # print(oneSynth.freq, ' ', delaydif, ' ', areadif, ' ', val)
# if val > bestval:
# bestval = val
# best = oneSynth
for oneSynth in allSynths: # best achievable, rightmost green
if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == mod):
if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq):
m = oneSynth.delay
best = oneSynth
if (best != None) & (best not in bestSynths):
bestSynths += [best]
file = open(filename, "w")
writer = csv.writer(file)
writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (fJ)'])
writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (nJ)'])
for synth in bestSynths:
writer.writerow(list(synth))
file.close()
@ -265,7 +229,7 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, colo
if norm:
ylabeldic = {"lpower": "Leakage Power (add32)", "denergy": "Energy/Op (add32)", "area": "Area (add32)", "delay": "Delay (FO4)"}
else:
ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (fJ)", "area": "Area (sq microns)", "delay": "Delay (ns)"}
ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (nJ)", "area": "Area (sq microns)", "delay": "Delay (ns)"}
ax.set_ylabel(ylabeldic[var])
ax.set_xticks(widths)
@ -549,13 +513,12 @@ def squarify(fig):
l = (1.-axs/h)/2
fig.subplots_adjust(bottom=l, top=1-l)
def plotPPA(mod, freq=None, norm=True, aleOpt=False):
''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits
if no freq specified, uses the synthesis with best achievable delay for each width
overlays data from both techs
'''
plt.rcParams["figure.figsize"] = (7,3.6)
plt.rcParams["figure.figsize"] = (7,3.46)
fig, axs = plt.subplots(2, 2)
arr = [['delay', 'area'], ['lpower', 'denergy']]
@ -591,13 +554,7 @@ def plotPPA(mod, freq=None, norm=True, aleOpt=False):
plt.savefig(saveStr)
# plt.show()
def plotBestAreas(mod):
fig, axs = plt.subplots(1, 1)
oneMetricPlot(mod, 'area', freq=10)
plt.title(mod + ' Optimized Areas (target freq 10MHz)')
plt.savefig('./plots/bestAreas/' + mod + '.png')
def makeDaLegend():
def makeLineLegend():
plt.rcParams["figure.figsize"] = (5.5,0.3)
fig = plt.figure()
fullLeg = [lines.Line2D([0], [0], color='black', label='fastest', linestyle='-')]
@ -609,25 +566,6 @@ def makeDaLegend():
saveStr = './plots/PPA/legend.png'
plt.savefig(saveStr)
def calcAvgRsq():
with open('ppaFitting.csv', newline='') as csvfile:
csvreader = csv.reader(csvfile)
allSynths = list(csvreader)[1:]
csvfile.close()
others = []
muxes = []
for synth in allSynths:
if ('easy' not in synth) or ('delay' not in synth):
if 'mux' in synth[0]:
muxes += [float(synth[8])]
elif '0.0' != synth[8]:
others += [float(synth[8])]
print('Others: ', np.mean(others))
print('Muxes: ', np.mean(muxes))
def muxPlot(fits='clsgn', norm=True):
''' module: string module name
freq: int freq (MHz)
@ -660,6 +598,7 @@ def muxPlot(fits='clsgn', norm=True):
techdict = spec._asdict()
norm = techdict['delay']
metric = [m/norm for m in metric]
# print(spec.tech, ' ', metric)
if len(metric) == 3: # don't include the spec if we don't have points for all
xp, pred, coefs, r2 = regress(inputs, metric, fits, ale=False)
@ -679,23 +618,71 @@ def muxPlot(fits='clsgn', norm=True):
ax.legend(handles = fullLeg)
plt.savefig('./plots/PPA/mux.png')
def stdDevError():
for var in ['delay', 'area', 'lpower', 'denergy']:
errlist = []
for module in modules:
ale = (var != 'delay')
metL = []
modFit = fitDict[module]
fits = modFit[ale]
funcArr = genFuncs(fits)
for spec in techSpecs:
metric = getVals(spec.tech, module, var)
techdict = spec._asdict()
norm = techdict[var]
metL += [m/norm for m in metric]
if ale:
ws = [w/normAddWidth for w in widths]
else:
ws = widths
ws = ws*2
mat = []
for w in ws:
row = []
for func in funcArr:
row += [func(w)]
mat += [row]
y = np.array(metL, dtype=np.float)
coefs = opt.nnls(mat, y)[0]
yp = []
for w in ws:
n = [func(w) for func in funcArr]
yp += [sum(np.multiply(coefs, n))]
if (var == 'delay') & (module == 'flop'):
pass
elif (module == 'mult') & ale:
pass
else:
for i in range(len(y)):
errlist += [abs(y[i]/yp[i]-1)]
# print(module, ' ', var, ' ', np.mean(errlist[-10:]))
avgErr = np.mean(errlist)
stdv = np.std(errlist)
print(var, ' ', avgErr, ' ', stdv)
if __name__ == '__main__':
##############################
# set up stuff, global variables
widths = [8, 16, 32, 64, 128]
modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult'] # 'mux2d', 'mux4d', 'mux8d']
modules = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mux2', 'mux4', 'mux8', 'mult'] #, 'mux2d', 'mux4d', 'mux8d']
normAddWidth = 32 # divisor to use with N since normalizing to add_32
fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 'ls', 'ls'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']}
fitDict = {'add': ['cg', 'l', 'l'], 'mult': ['cg', 's', 's'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shiftleft': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'priorityencoder': ['cg', 'l', 'l']}
fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l']))
leftblue = [] #[['mux2', 'tsmc28', 8], ['mux4', 'sky90', 16]]
TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy")
techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1330.84, 582.81, 520.66], ['tsmc28', 'blue', '^', 12.2e-3, 209.29, 1060, 81.43]]
techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['tsmc28', 'blue', '^', 12.2e-3, 209.286002, 1060.0, .08153281695882594]]
techSpecs = [TechSpec(*t) for t in techSpecs]
combined = TechSpec('combined fit', 'red', '_', 0, 0, 0, 0)
# invz1arealeakage = [['sky90', 1.96, 1.98], ['gf32', .351, .3116], ['tsmc28', .252, 1.09]] #['gf32', 'purple', 's', 15e-3]
##############################
# cleanup() # run to remove garbage synth runs
@ -704,21 +691,21 @@ if __name__ == '__main__':
allSynths = synthsfromcsv('ppaData.csv') # your csv here!
bestSynths = csvOfBest('bestSynths.csv')
# ### plotting examples
# ### function examples
# squareAreaDelay('sky90', 'add', 32)
# oneMetricPlot('add', 'area')
# oneMetricPlot('mult', 'lpower')
# freqPlot('sky90', 'mux4', 16)
# plotBestAreas('add')
# makeCoefTable()
# calcAvgRsq()
# makeEqTable()
# makeDaLegend()
# makeLineLegend()
# muxPlot()
# stdDevError()
# for mod in modules:
# # plotPPA(mod, norm=False)
# # plotPPA(mod, aleOpt=True)
# for w in widths:
# freqPlot('sky90', mod, w)
# freqPlot('tsmc28', mod, w)
# plt.close('all')
for mod in modules:
plotPPA(mod, norm=False)
plotPPA(mod, aleOpt=True)
for w in widths:
freqPlot('sky90', mod, w)
freqPlot('tsmc28', mod, w)
plt.close('all')

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,11 @@
Element,Best delay,Fast area,Fast leakage,Fast energy,Small area,Small leakage,Small energy
priorityencoder,0.98$log_2$(N),0.33S,0.25S,0.093S,0.15S,0.046S,0.00046S
add,1.8 + 1.4$log_2$(N),1.1S,0.95S,1S,0.34S,0.16S,0.025S
csa,3.6,0.93S,1.5S,1.1S,0.34S,0.16S,0.00055S
shiftleft,0.48 + 1.6$log_2$(N),1.9S,2.3S,1.5S,0.8S,0.29S,0.0059S
comparator,2 + 0.94$log_2$(N),0.6S,0.47S,0.31S,0.34S,0.16S,0.00089S
flop,3.3,0.34S,0.37S,0.0012S,0.34S,0.37S,0.0012S
mux2,2.8 + 0.38$log_2$(N),0.2S,0.18S,0.16S,0.15S,0.12S,0.0011S
mux4,3.1 + 0.51$log_2$(N),0.36S,0.32S,0.28S,0.28S,0.11S,0.0021S
mux8,5 + 0.45$log_2$(N),0.76S,0.66S,0.45S,0.55S,0.24S,0.0029S
mult,6$log_2$(N),13S + 10$S^2$,26S + 7.3$S^2$,42S + 25$S^2$,1.1S + 7.9$S^2$,1S + 3.4$S^2$,2.1$S^2$
priorityencoder,0.98$log_2$(N),0.32S,0.22S,0.087S,0.14S,0.044S,0.033S
add,1.9 + 1.4$log_2$(N),1S,0.89S,0.91S,0.32S,0.15S,0.33S
csa,3.6,0.87S,1.3S,0.95S,0.33S,0.15S,0.31S
shiftleft,0.46 + 1.6$log_2$(N),1.9S,2.1S,1.4S,0.77S,0.28S,0.48S
comparator,2.1 + 0.91$log_2$(N),0.58S,0.44S,0.27S,0.33S,0.15S,0.12S
flop,3.3,0.33S,0.33S,1.3S,0.33S,0.33S,1.3S
mux2,2.6 + 0.41$log_2$(N),0.21S,0.23S,0.16S,0.14S,0.11S,0.14S
mux4,3.1 + 0.5$log_2$(N),0.35S,0.31S,0.26S,0.27S,0.11S,0.19S
mux8,4.9 + 0.46$log_2$(N),0.8S,0.66S,0.43S,0.53S,0.23S,0.24S
mult,6$log_2$(N),13$S^2$,13$S^2$,33$S^2$,7.8$S^2$,3.5$S^2$,14$S^2$

1 Element Best delay Fast area Fast leakage Fast energy Small area Small leakage Small energy
2 priorityencoder 0.98$log_2$(N) 0.33S 0.32S 0.25S 0.22S 0.093S 0.087S 0.15S 0.14S 0.046S 0.044S 0.00046S 0.033S
3 add 1.8 + 1.4$log_2$(N) 1.9 + 1.4$log_2$(N) 1.1S 1S 0.95S 0.89S 1S 0.91S 0.34S 0.32S 0.16S 0.15S 0.025S 0.33S
4 csa 3.6 0.93S 0.87S 1.5S 1.3S 1.1S 0.95S 0.34S 0.33S 0.16S 0.15S 0.00055S 0.31S
5 shiftleft 0.48 + 1.6$log_2$(N) 0.46 + 1.6$log_2$(N) 1.9S 2.3S 2.1S 1.5S 1.4S 0.8S 0.77S 0.29S 0.28S 0.0059S 0.48S
6 comparator 2 + 0.94$log_2$(N) 2.1 + 0.91$log_2$(N) 0.6S 0.58S 0.47S 0.44S 0.31S 0.27S 0.34S 0.33S 0.16S 0.15S 0.00089S 0.12S
7 flop 3.3 0.34S 0.33S 0.37S 0.33S 0.0012S 1.3S 0.34S 0.33S 0.37S 0.33S 0.0012S 1.3S
8 mux2 2.8 + 0.38$log_2$(N) 2.6 + 0.41$log_2$(N) 0.2S 0.21S 0.18S 0.23S 0.16S 0.15S 0.14S 0.12S 0.11S 0.0011S 0.14S
9 mux4 3.1 + 0.51$log_2$(N) 3.1 + 0.5$log_2$(N) 0.36S 0.35S 0.32S 0.31S 0.28S 0.26S 0.28S 0.27S 0.11S 0.0021S 0.19S
10 mux8 5 + 0.45$log_2$(N) 4.9 + 0.46$log_2$(N) 0.76S 0.8S 0.66S 0.45S 0.43S 0.55S 0.53S 0.24S 0.23S 0.0029S 0.24S
11 mult 6$log_2$(N) 13S + 10$S^2$ 13$S^2$ 26S + 7.3$S^2$ 13$S^2$ 42S + 25$S^2$ 33$S^2$ 1.1S + 7.9$S^2$ 7.8$S^2$ 1S + 3.4$S^2$ 3.5$S^2$ 2.1$S^2$ 14$S^2$

View File

@ -1,41 +1,81 @@
Module,Metric,1,N,N^2,log2(N),Nlog2(N),R^2
priorityencoder,delay,4.865032478368464,,,1.0346781590203091,,0.990533246983837
priorityencoder,area,,0.3296349181169891,,,,0.9718942704677337
priorityencoder,lpower,,0.2508481588069769,,,,0.9418329012771585
priorityencoder,denergy,,0.09327161156406552,,,,0.8065924672945542
add,delay,8.961254531683414,,,1.4310340215065527,,0.9564367595740637
add,area,,1.0710989265923485,,,,0.988580182173048
add,lpower,,0.9470245397661955,,,,0.9951383820581323
add,denergy,,0.9954952282287014,,,,0.9928308616130285
csa,delay,3.590384717869601,,,,,0.0
csa,area,,0.9312877569527923,,,,0.999393942859829
csa,lpower,,1.5320774877598933,,,,0.9400384192534433
csa,denergy,,1.1454135769936609,,,,0.9735205275004183
shiftleft,delay,8.66019468793489,,,1.6351711913499432,,0.9873681453602638
shiftleft,area,,1.9102134686740575,,,,0.9466461680123697
shiftleft,lpower,,2.277088275290811,,,,0.9624044038708768
shiftleft,denergy,,1.4931073444617051,,,,0.9454881696599784
comparator,delay,6.680678539086959,,,0.9397668550976327,,0.98789326603378
comparator,area,,0.6003877936704982,,,,0.9672416909621802
comparator,lpower,,0.46756802348373877,,,,0.8609362596824635
comparator,denergy,,0.3089180049610159,,,,0.8267293340232036
flop,delay,3.3270503187614153,,,,,0.0
flop,area,,0.34478305655859876,,,,0.9433629202566682
flop,lpower,,0.3707856336608904,,,,0.9170347531086821
flop,denergy,,0.0011765517257429892,,,,0.688648230209356
mux2,delay,4.732514086885074,,,0.38138175938205005,,0.5638177354804589
mux2,area,,0.19794547955000782,,,,0.9753613114571431
mux2,lpower,,0.1881638557015794,,,,0.7572248871637561
mux2,denergy,,0.16278100836605952,,,,0.9811112115671446
mux4,delay,5.67790744523475,,,0.5081925137582493,,0.8316415055210026
mux4,area,,0.35778033738856435,,,,0.9880049722019894
mux4,lpower,,0.32236674794207065,,,,0.8279138454959137
mux4,denergy,,0.28073375091037084,,,,0.9943662618662574
mux8,delay,7.252700330388384,,,0.45254210999717837,,0.8464368692304263
mux8,area,,0.7614128432326613,,,,0.9863118376555963
mux8,lpower,,0.6570734849206145,,,,0.9855956038468652
mux8,denergy,,0.4496346388149245,,,,0.9785597135426944
mult,delay,29.562138166420393,,,6.711916207386673,,0.9833266087176287
mult,area,,,13.838943348894976,,,0.9875861886135875
mult,lpower,,,14.380577146903335,,,0.9349609233308782
mult,denergy,,,36.51397409545879,,,0.9719012952478829
Module,Metric,Target,1,N,N^2,log2(N),Nlog2(N),R^2
priorityencoder,delay,easy,0.0,,,6.815655848737334,,0.5471505976585844
priorityencoder,area,easy,,0.14996313076366272,,,,0.9751246139683207
priorityencoder,lpower,easy,,0.04628232776780845,,,,0.6646449382421588
priorityencoder,denergy,easy,,0.00046338953826781273,,,,0.8843605490100168
priorityencoder,delay,hard,0.0,,,0.9775747670327015,,0.987274834491306
priorityencoder,area,hard,,0.3296349181169891,,,,0.9718942704677337
priorityencoder,lpower,hard,,0.2508481588069769,,,,0.9418329012771585
priorityencoder,denergy,hard,,0.09327161156406552,,,,0.8065924672945542
add,delay,easy,0.0,,,27.820556626526365,,0.5330240516496716
add,area,easy,,0.33740563909904386,,,,0.9968636684818916
add,lpower,easy,,0.1559461482654009,,,,0.7403794995975848
add,denergy,easy,,0.025219887569037786,,,,0.6462978476180771
add,delay,hard,1.8060844241506506,,,1.4310340215065525,,0.9564367595740637
add,area,hard,,1.0710989265923485,,,,0.988580182173048
add,lpower,hard,,0.9470245397661955,,,,0.9951383820581323
add,denergy,hard,,0.9954952282287014,,,,0.9928308616130285
csa,delay,easy,5.827386725865409,,,,,0.0
csa,area,easy,,0.3404841239399024,,,,0.9966821820865757
csa,lpower,easy,,0.1555300133584381,,,,0.7410756093594764
csa,denergy,easy,,0.0005478126632729184,,,,0.5798854696439455
csa,delay,hard,3.590384717869601,,,,,0.0
csa,area,hard,,0.9312877569527923,,,,0.9993939428598292
csa,lpower,hard,,1.5320774877598933,,,,0.9400384192534433
csa,denergy,hard,,1.1454135769936609,,,,0.9735205275004183
shiftleft,delay,easy,0.0,,,5.744537363106859,,0.7778961884907117
shiftleft,area,easy,,0.8000093911038876,,,,0.9576897492378456
shiftleft,lpower,easy,,0.28913702096331206,,,,0.623995386847899
shiftleft,denergy,easy,,0.005924456057944899,,,,0.7200057939838627
shiftleft,delay,hard,0.48433873118517795,,,1.635171191349943,,0.9873681453602638
shiftleft,area,hard,,1.9102134686740575,,,,0.9466461680123697
shiftleft,lpower,hard,,2.277088275290811,,,,0.9624044038708768
shiftleft,denergy,hard,,1.4931073444617051,,,,0.9454881696599784
comparator,delay,easy,0.0,,,4.706704191403,,0.5450694752498024
comparator,area,easy,,0.3425601443761704,,,,0.978537426983507
comparator,lpower,easy,,0.15525826941742596,,,,0.8078417286943447
comparator,denergy,easy,,0.0008896717814426517,,,,0.9267090446396561
comparator,delay,hard,1.9818442635987938,,,0.9397668550976329,,0.9878932660337799
comparator,area,hard,,0.6003877936704982,,,,0.9672416909621802
comparator,lpower,hard,,0.46756802348373877,,,,0.8609362596824635
comparator,denergy,hard,,0.3089180049610159,,,,0.8267293340232036
flop,delay,easy,3.3270503187614153,,,,,0.0
flop,area,easy,,0.34478305655859876,,,,0.9433629202566682
flop,lpower,easy,,0.3707856336608904,,,,0.9170347531086821
flop,denergy,easy,,0.0011765517257429892,,,,0.688648230209356
flop,delay,hard,3.3270503187614153,,,,,0.0
flop,area,hard,,0.34478305655859876,,,,0.9433629202566682
flop,lpower,hard,,0.3707856336608904,,,,0.9170347531086821
flop,denergy,hard,,0.0011765517257429892,,,,0.688648230209356
mux2,delay,easy,0.0,,,3.276654474184255,,0.7130397298335213
mux2,area,easy,,0.15083561354737726,,,,0.976644158286422
mux2,lpower,easy,,0.12067626255418841,,,,0.9344813545348312
mux2,denergy,easy,,0.0011206170933885473,,,,0.5565267433319017
mux2,delay,hard,2.539326242287272,,,0.4143154417811283,,0.550540330173853
mux2,area,hard,,0.2261714754439734,,,,0.9792793539936671
mux2,lpower,hard,,0.27830936017352714,,,,0.9042661125086188
mux2,denergy,hard,,0.19333666277894856,,,,0.9525189311701613
mux4,delay,easy,0.0,,,3.934462222278399,,0.5700865267151127
mux4,area,easy,,0.2839183647889992,,,,0.9755791039549218
mux4,lpower,easy,,0.11187597259171647,,,,0.5559362399863286
mux4,denergy,easy,,0.0021483489610266407,,,,0.5348528431713737
mux4,delay,hard,3.1369448764435073,,,0.5081925137582488,,0.8316415055210026
mux4,area,hard,,0.35778033738856435,,,,0.9880049722019894
mux4,lpower,hard,,0.32236674794207065,,,,0.8279138454959137
mux4,denergy,hard,,0.28073375091037084,,,,0.9943662618662574
mux8,delay,easy,0.0,,,4.439779694843578,,0.6601108415004824
mux8,area,easy,,0.5486426664163658,,,,0.9756934275959698
mux8,lpower,easy,,0.2380559585648822,,,,0.5468728724048277
mux8,denergy,easy,,0.0029053391137917966,,,,0.5231276299250225
mux8,delay,hard,4.9490961359025585,,,0.45254210999717775,,0.8846872287553096
mux8,area,hard,,0.7645051946159651,,,,0.9834986761377894
mux8,lpower,hard,,0.6697504633436362,,,,0.9599639296705227
mux8,denergy,hard,,0.4542825975429124,,,,0.9698068520398291
mult,delay,easy,0.0,,,61.777235436483835,,0.539191885251039
mult,area,easy,,1.1322969325198273,7.852086512913157,,,0.997120506119588
mult,lpower,easy,,1.0320514230056876,3.411671281132248,,,0.7465081509218953
mult,denergy,easy,,0.0,2.146924193738219,,,0.8988257654810033
mult,delay,hard,0.0,,,5.971649009143529,,0.9704083045351518
mult,area,hard,,13.296909763669026,10.2773763878058,,,0.9969630019304513
mult,lpower,hard,,26.397088944265164,7.310137358727654,,,0.9703772416232848
mult,denergy,hard,,41.5120348723692,25.39500777044283,,,0.9849195751440497

1 Module Metric Target 1 N N^2 log2(N) Nlog2(N) R^2
2 priorityencoder delay easy 4.865032478368464 0.0 1.0346781590203091 6.815655848737334 0.990533246983837 0.5471505976585844
3 priorityencoder area easy 0.3296349181169891 0.14996313076366272 0.9718942704677337 0.9751246139683207
4 priorityencoder lpower easy 0.2508481588069769 0.04628232776780845 0.9418329012771585 0.6646449382421588
5 priorityencoder denergy easy 0.09327161156406552 0.00046338953826781273 0.8065924672945542 0.8843605490100168
6 add priorityencoder delay hard 8.961254531683414 0.0 1.4310340215065527 0.9775747670327015 0.9564367595740637 0.987274834491306
7 add priorityencoder area hard 1.0710989265923485 0.3296349181169891 0.988580182173048 0.9718942704677337
8 add priorityencoder lpower hard 0.9470245397661955 0.2508481588069769 0.9951383820581323 0.9418329012771585
9 add priorityencoder denergy hard 0.9954952282287014 0.09327161156406552 0.9928308616130285 0.8065924672945542
10 csa add delay easy 3.590384717869601 0.0 27.820556626526365 0.0 0.5330240516496716
11 csa add area easy 0.9312877569527923 0.33740563909904386 0.999393942859829 0.9968636684818916
12 csa add lpower easy 1.5320774877598933 0.1559461482654009 0.9400384192534433 0.7403794995975848
13 csa add denergy easy 1.1454135769936609 0.025219887569037786 0.9735205275004183 0.6462978476180771
14 shiftleft add delay hard 8.66019468793489 1.8060844241506506 1.6351711913499432 1.4310340215065525 0.9873681453602638 0.9564367595740637
15 shiftleft add area hard 1.9102134686740575 1.0710989265923485 0.9466461680123697 0.988580182173048
16 shiftleft add lpower hard 2.277088275290811 0.9470245397661955 0.9624044038708768 0.9951383820581323
17 shiftleft add denergy hard 1.4931073444617051 0.9954952282287014 0.9454881696599784 0.9928308616130285
18 comparator csa delay easy 6.680678539086959 5.827386725865409 0.9397668550976327 0.98789326603378 0.0
19 comparator csa area easy 0.6003877936704982 0.3404841239399024 0.9672416909621802 0.9966821820865757
20 comparator csa lpower easy 0.46756802348373877 0.1555300133584381 0.8609362596824635 0.7410756093594764
21 comparator csa denergy easy 0.3089180049610159 0.0005478126632729184 0.8267293340232036 0.5798854696439455
22 flop csa delay hard 3.3270503187614153 3.590384717869601 0.0
23 flop csa area hard 0.34478305655859876 0.9312877569527923 0.9433629202566682 0.9993939428598292
24 flop csa lpower hard 0.3707856336608904 1.5320774877598933 0.9170347531086821 0.9400384192534433
25 flop csa denergy hard 0.0011765517257429892 1.1454135769936609 0.688648230209356 0.9735205275004183
26 mux2 shiftleft delay easy 4.732514086885074 0.0 0.38138175938205005 5.744537363106859 0.5638177354804589 0.7778961884907117
27 mux2 shiftleft area easy 0.19794547955000782 0.8000093911038876 0.9753613114571431 0.9576897492378456
28 mux2 shiftleft lpower easy 0.1881638557015794 0.28913702096331206 0.7572248871637561 0.623995386847899
29 mux2 shiftleft denergy easy 0.16278100836605952 0.005924456057944899 0.9811112115671446 0.7200057939838627
30 mux4 shiftleft delay hard 5.67790744523475 0.48433873118517795 0.5081925137582493 1.635171191349943 0.8316415055210026 0.9873681453602638
31 mux4 shiftleft area hard 0.35778033738856435 1.9102134686740575 0.9880049722019894 0.9466461680123697
32 mux4 shiftleft lpower hard 0.32236674794207065 2.277088275290811 0.8279138454959137 0.9624044038708768
33 mux4 shiftleft denergy hard 0.28073375091037084 1.4931073444617051 0.9943662618662574 0.9454881696599784
34 mux8 comparator delay easy 7.252700330388384 0.0 0.45254210999717837 4.706704191403 0.8464368692304263 0.5450694752498024
35 mux8 comparator area easy 0.7614128432326613 0.3425601443761704 0.9863118376555963 0.978537426983507
36 mux8 comparator lpower easy 0.6570734849206145 0.15525826941742596 0.9855956038468652 0.8078417286943447
37 mux8 comparator denergy easy 0.4496346388149245 0.0008896717814426517 0.9785597135426944 0.9267090446396561
38 mult comparator delay hard 29.562138166420393 1.9818442635987938 6.711916207386673 0.9397668550976329 0.9833266087176287 0.9878932660337799
39 mult comparator area hard 0.6003877936704982 13.838943348894976 0.9875861886135875 0.9672416909621802
40 mult comparator lpower hard 0.46756802348373877 14.380577146903335 0.9349609233308782 0.8609362596824635
41 mult comparator denergy hard 0.3089180049610159 36.51397409545879 0.9719012952478829 0.8267293340232036
42 flop delay easy 3.3270503187614153 0.0
43 flop area easy 0.34478305655859876 0.9433629202566682
44 flop lpower easy 0.3707856336608904 0.9170347531086821
45 flop denergy easy 0.0011765517257429892 0.688648230209356
46 flop delay hard 3.3270503187614153 0.0
47 flop area hard 0.34478305655859876 0.9433629202566682
48 flop lpower hard 0.3707856336608904 0.9170347531086821
49 flop denergy hard 0.0011765517257429892 0.688648230209356
50 mux2 delay easy 0.0 3.276654474184255 0.7130397298335213
51 mux2 area easy 0.15083561354737726 0.976644158286422
52 mux2 lpower easy 0.12067626255418841 0.9344813545348312
53 mux2 denergy easy 0.0011206170933885473 0.5565267433319017
54 mux2 delay hard 2.539326242287272 0.4143154417811283 0.550540330173853
55 mux2 area hard 0.2261714754439734 0.9792793539936671
56 mux2 lpower hard 0.27830936017352714 0.9042661125086188
57 mux2 denergy hard 0.19333666277894856 0.9525189311701613
58 mux4 delay easy 0.0 3.934462222278399 0.5700865267151127
59 mux4 area easy 0.2839183647889992 0.9755791039549218
60 mux4 lpower easy 0.11187597259171647 0.5559362399863286
61 mux4 denergy easy 0.0021483489610266407 0.5348528431713737
62 mux4 delay hard 3.1369448764435073 0.5081925137582488 0.8316415055210026
63 mux4 area hard 0.35778033738856435 0.9880049722019894
64 mux4 lpower hard 0.32236674794207065 0.8279138454959137
65 mux4 denergy hard 0.28073375091037084 0.9943662618662574
66 mux8 delay easy 0.0 4.439779694843578 0.6601108415004824
67 mux8 area easy 0.5486426664163658 0.9756934275959698
68 mux8 lpower easy 0.2380559585648822 0.5468728724048277
69 mux8 denergy easy 0.0029053391137917966 0.5231276299250225
70 mux8 delay hard 4.9490961359025585 0.45254210999717775 0.8846872287553096
71 mux8 area hard 0.7645051946159651 0.9834986761377894
72 mux8 lpower hard 0.6697504633436362 0.9599639296705227
73 mux8 denergy hard 0.4542825975429124 0.9698068520398291
74 mult delay easy 0.0 61.777235436483835 0.539191885251039
75 mult area easy 1.1322969325198273 7.852086512913157 0.997120506119588
76 mult lpower easy 1.0320514230056876 3.411671281132248 0.7465081509218953
77 mult denergy easy 0.0 2.146924193738219 0.8988257654810033
78 mult delay hard 0.0 5.971649009143529 0.9704083045351518
79 mult area hard 13.296909763669026 10.2773763878058 0.9969630019304513
80 mult lpower hard 26.397088944265164 7.310137358727654 0.9703772416232848
81 mult denergy hard 41.5120348723692 25.39500777044283 0.9849195751440497

View File

@ -1,16 +1,13 @@
#!/usr/bin/python3
# Madeleine Masser-Frye mmasserfrye@hmc.edu 5/22
# Madeleine Masser-Frye mmasserfrye@hmc.edu 6/22
from collections import namedtuple
import csv
import subprocess
import re
from multiprocessing import Pool, cpu_count
from multiprocessing import Pool
from ppaAnalyze import synthsfromcsv
def runCommand(module, width, tech, freq):
command = "make synth DESIGN=ppa_{}_{} TECH={} DRIVE=INV FREQ={} MAXOPT=1".format(module, width, tech, freq)
command = "make synth DESIGN=ppa_{}_{} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq)
subprocess.Popen(command, shell=True)
def deleteRedundant(LoT):
@ -20,58 +17,40 @@ def deleteRedundant(LoT):
bashCommand = synthStr.format(*synth)
outputCPL = subprocess.check_output(['bash','-c', bashCommand])
def getData(filename):
Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy")
with open(filename, newline='') as csvfile:
csvreader = csv.reader(csvfile)
global allSynths
allSynths = list(csvreader)
for i in range(len(allSynths)):
for j in range(len(allSynths[0])):
try: allSynths[i][j] = int(allSynths[i][j])
except:
try: allSynths[i][j] = float(allSynths[i][j])
except: pass
allSynths[i] = Synth(*allSynths[i])
if __name__ == '__main__':
LoT = []
synthsToRun = []
##### Run specific syntheses
# widths = [8]
# modules = ['mult', 'add', 'shiftleft', 'flop', 'comparator', 'priorityencoder', 'add', 'csa', 'mux2', 'mux4', 'mux8']
# techs = ['sky90']
# freqs = [5000]
# for w in widths:
# for module in modules:
# for tech in techs:
# for freq in freqs:
# LoT += [[module, str(w), tech, str(freq)]]
# arr = [-5, -3, -1, 1, 3, 5]
arr2 = [-8, -6, -4, -2, 0, 2, 4, 6, 8]
##### Run a sweep based on best delay found in existing syntheses
arr = [-8, -6, -4, -2, 0, 2, 4, 6, 8]
allSynths = synthsfromcsv('bestSynths.csv')
for synth in allSynths:
f = 1000/synth.delay
for freq in [round(f+f*x/100) for x in arr]:
LoT += [[synth.module, str(synth.width), synth.tech, str(freq)]]
##### Only do syntheses for which a run doesn't already exist
bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
output = subprocess.check_output(['bash','-c', bashCommand])
specReg = re.compile('[a-zA-Z0-9]+')
allSynths = output.decode("utf-8").split('\n')[:-1]
allSynths = [specReg.findall(oneSynth)[2:7] for oneSynth in allSynths]
allSynths = [oneSynth[0:2] + [oneSynth[3][:-2]] + [oneSynth[4]] for oneSynth in allSynths]
for synth in LoT:
if (synth not in allSynths):
synthsToRun += [synth]
widths = [128]
modules = ['mux2', 'mux4', 'mux8', 'shiftleft', 'flop', 'comparator', 'mult', 'priorityencoder', 'add', 'csa']
techs = ['tsmc28']
LoT = []
allSynths = synthsfromcsv('ppaData.csv')
for w in widths:
for module in modules:
for tech in techs:
m = 100000 # large number to start
for oneSynth in allSynths:
if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == module):
if (oneSynth.delay < m):
m = oneSynth.delay
synth = oneSynth
# f = 1000/synth.delay
for freq in [10]: #[round(f+f*x/100) for x in arr2]:
LoT += [[synth.module, str(synth.width), synth.tech, str(freq)]]
bashCommand = "find . -path '*runs/ppa*rv32e*' -prune"
output = subprocess.check_output(['bash','-c', bashCommand])
specReg = re.compile('[a-zA-Z0-9]+')
allSynths = output.decode("utf-8").split('\n')[:-1]
allSynths = [specReg.findall(oneSynth)[2:7] for oneSynth in allSynths]
allSynths = [oneSynth[0:2] + [oneSynth[3][:-2]] + [oneSynth[4]] for oneSynth in allSynths]
synthsToRun = []
for synth in LoT:
if synth not in allSynths:
synthsToRun += [synth]
pool = Pool(processes=25)
pool.starmap(runCommand, synthsToRun)
pool.close()
pool = Pool(processes=25)
pool.starmap(runCommand, synthsToRun)

View File

@ -1,5 +1,5 @@
#!/usr/bin/bash
rm -r runs/*
# rm -r runs/*
make clean
make del
make freqs TECH=$1

View File

@ -1,31 +1,17 @@
arch_dir = ../../addins/riscv-arch-test
work_dir = "./riscof_work"
work_dir = ./riscof_work
current_dir = $(shell pwd)
XLEN ?= 64
all: clone memfile
all: build
clone:
build:
mkdir -p $(work_dir)
mkdir -p work
sed 's,{0},$(current_dir),g;s,{1},32imc,g' config.ini > config32.ini
sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini
riscof run --work-dir=$(work_dir) --config=config64.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env
cp -r $(work_dir)/rv64i_m work/
riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env
cp -r $(work_dir)/rv32i_m work/
# sed >> config64.ini
# (cd $(arch_dir) && riscof validateyaml --config=config.ini)
# (cd $(arch_dir) && riscof --verbose info arch-test --clone)
# (cd $(arch_dir) && riscof testlist --config=config.ini --suite=riscv-arch-test/riscv-test-suite/ --env=riscv-arch-test/riscv-test-suite/env)
# sed -i 's/riscv{.}-unknown-/riscv64-unknown-/g' $(arch_dir)/spike/riscof_spike.py
# sed -i 's/riscv{.}-unknown-/riscv64-unknown-/g' $(arch_dir)/sail_cSim/riscof_sail_cSim.py
memfile:
sleep 1
find work/rv*/*/ -type f -name "*ref.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
find work/rv32*/*/ -type f -name "*ref.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find work/rv64*/*/ -type f -name "*ref.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 64 --input "$$f" --output "$$f.memfile"; done
find work/rv*/*/ -type f -name "*.objdump" | while read f; do extractFunctionRadix.sh $$f; done
sed 's,{0},$(current_dir),g;s,{1},$(XLEN)$(if $(findstring 64,$(XLEN)),gc,imc),g' config.ini > config$(XLEN).ini
riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
rm -rf work/rv$(XLEN)i_m
mv -f $(work_dir)/rv$(XLEN)i_m work/
clean:
rm -f config64.ini

View File

@ -90,7 +90,7 @@ class sail_cSim(pluginTemplate):
test_dir = testentry['work_dir']
test_name = test.rsplit('/',1)[1][:-2]
elf = 'ref.elf'
elf = 'Ref.elf'
execute = "@cd "+testentry['work_dir']+";"
@ -98,7 +98,7 @@ class sail_cSim(pluginTemplate):
compile_cmd = cmd + ' -D' + " -D".join(testentry['macros'])
execute+=compile_cmd+";"
execute += self.objdump_cmd.format(elf, self.xlen, 'ref.disass')
execute += self.objdump_cmd.format(elf, self.xlen, 'Ref.elf.objdump')
sig_file = os.path.join(test_dir, self.name[:-1] + ".signature")
execute += self.sail_exe[self.xlen] + ' --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name)

View File

@ -1,11 +1,11 @@
hart_ids: [0]
hart0:
ISA: RV32IMCZicsr_Zifencei
ISA: RV32IMFCZicsr_Zifencei
physical_addr_sz: 32
User_Spec_Version: '2.3'
supported_xlen: [32]
misa:
reset-val: 0x40001104
reset-val: 0x40001124
rv32:
accessible: true
mxl:
@ -23,7 +23,7 @@ hart0:
warl:
dependency_fields: []
legal:
- extensions[25:0] bitmask [0x0001104, 0x0000000]
- extensions[25:0] bitmask [0x0001124, 0x0000000]
wr_illegal:
- Unchanged

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-ADD.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.392776//
// Created 2022-06-17 22:58:09.906970//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLT.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.393471//
// Created 2022-06-17 22:58:09.909889//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLTU.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.393741//
// Created 2022-06-17 22:58:09.911056//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SUB.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.393180//
// Created 2022-06-17 22:58:09.908718//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-XOR.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.394013//
// Created 2022-06-17 22:58:09.913218//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -53,8 +53,8 @@ target_tests_nosim = \
WALLY-status-fp-enabled-01 \
WALLY-status-sie-01 \
WALLY-status-tw-01 \
WALLY-gpio-01 \
# unclear why wfi, status-fp-enabled, status-sie, and status-tw fail
rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests))

View File

@ -0,0 +1,7 @@
00000000
00000000
A5A5A5A5
5A5AFFFF
00000000
5A5A0000
A55A0000

View File

@ -0,0 +1,99 @@
///////////////////////////////////////////
//
// WALLY-gpio
//
// Author: David_Harris@hmc.edu and Nicholas Lucio <nlucio@hmc.edu>
//
// Created 2022-06-16
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
///////////////////////////////////////////
#include "WALLY-TEST-LIB-32.h"
INIT_TESTS
TRAP_HANDLER m
j run_test_loop // begin test loop/table tests instead of executing inline code.
INIT_TEST_TABLE
END_TESTS
TEST_STACK_AND_DATA
.align 2
test_cases:
# ---------------------------------------------------------------------------------------------
# Test Contents
#
# Here is where the actual tests are held, or rather, what the actual tests do.
# each entry consists of 3 values that will be read in as follows:
#
# '.4byte [x28 Value], [x29 Value], [x30 value]'
# or
# '.4byte [address], [value], [test type]'
#
# The encoding for x30 test type values can be found in the test handler in the framework file
#
# ---------------------------------------------------------------------------------------------
.equ GPIO, 0x10060000
.equ input_val, (GPIO+0x00)
.equ input_en, (GPIO+0x04)
.equ output_en, (GPIO+0x08)
.equ output_val, (GPIO+0x0C)
.equ rise_ie, (GPIO+0x18)
.equ rise_ip, (GPIO+0x1C)
.equ fall_ie, (GPIO+0x20)
.equ fall_ip, (GPIO+0x24)
.equ high_ie, (GPIO+0x28)
.equ high_ip, (GPIO+0x2C)
.equ low_ie, (GPIO+0x30)
.equ low_ip, (GPIO+0x34)
.equ iof_en, (GPIO+0x38)
.equ iof_sel, (GPIO+0x3C)
.equ out_xor, (GPIO+0x40)
# =========== Verify all registers reset to zero ===========
.4byte input_val, 0x00000000, read32_test # input_val reset to zero
.4byte input_en, 0x00000000, read32_test # input_en reset to zero
# =========== Test output and input pins ===========
.4byte output_en, 0xFFFFFFFF, write32_test # enable all output pins
.4byte output_val, 0xA5A5A5A5, write32_test # write alternating pattern to output pins
.4byte input_en, 0xFFFFFFFF, write32_test # enable all input pins
.4byte input_val, 0xA5A5A5A5, read32_test # read pattern from output pins
.4byte output_val, 0x5A5AFFFF, write32_test # write different pattern to output pins
.4byte input_val, 0x5A5AFFFF, read32_test # read different pattern from output pins
# =========== Test input enables ===========
.4byte input_en, 0x00000000, write32_test # disable all input pins
.4byte input_val, 0x00000000, read32_test # read 0 since input pins are disabled
.4byte input_en, 0xFFFF0000, write32_test # enable a few input pins
.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above.
# =========== Test output enables(?) ===========
.4byte output_en, 0xFFFFFFFF, write32_test # undo changes made to output enable
# =========== Test XOR functionality ===========
.4byte out_xor, 0xFF00FF00, write32_test # invert certain pin values
.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working
.4byte 0x0, 0x0, terminate_test # terminate tests

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-ADD.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.394307//
// Created 2022-06-17 22:58:09.914370//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.394785//
// Created 2022-06-17 22:58:09.916813//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLTU.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.395005//
// Created 2022-06-17 22:58:09.917963//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SUB.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.394545//
// Created 2022-06-17 22:58:09.915580//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-XOR.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.395231//
// Created 2022-06-17 22:58:09.919138//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation