Merge branch 'main' of github.com:davidharrishmc/riscv-wally into main

This commit is contained in:
slmnemo 2022-06-21 02:16:26 -07:00
commit 80a57d0469
44 changed files with 2480 additions and 917 deletions

2
.gitignore vendored
View File

@ -32,7 +32,7 @@ testsBP/*/*/*.elf*
testsBP/*/OBJ/*
testsBP/*/*.a
tests/wally-riscv-arch-test/riscv-test-suite/*/I/*/*
tests/riscof/riscof_work*/*
tests/riscof/riscof_work/
tests/riscof/config32.ini
tests/riscof/config64.ini
tests/linux-testgen/linux-testvectors/*

@ -1 +1 @@
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230

View File

@ -4,20 +4,29 @@
embench_dir = ../../addins/embench-iot
all: sim size
all: build sim size
allClean: clean all
build: buildspeed buildsize
buildspeed: build_speedopt_speed build_sizeopt_speed
buildsize: build_speedopt_size build_sizeopt_size
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed
buildspeed:
$(embench_dir)/build_all.py --builddir=bd_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
find $(embench_dir)/bd_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
build_speedopt_speed:
$(embench_dir)/build_all.py --builddir=bd_speedopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-O2 -nostartfiles"
find $(embench_dir)/bd_speedopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for size
buildsize:
$(embench_dir)/build_all.py --builddir=bd_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
build_sizeopt_speed:
$(embench_dir)/build_all.py --builddir=bd_sizeopt_speed --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/crt0.S" --cflags="-Os -nostartfiles"
find $(embench_dir)/bd_sizeopt_speed/ -type f ! -name "*.*" | while read f; do cp "$$f" "$$f.elf"; done
# uses the build_all.py python file to build the tests in addins/embench-iot/bd_speed/ optimized for speed and size
build_speedopt_size:
$(embench_dir)/build_all.py --builddir=bd_speedopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-O2 -msave-restore" --dummy-libs="libgcc libm libc crt0"
build_sizeopt_size:
$(embench_dir)/build_all.py --builddir=bd_sizeopt_size --arch riscv32 --chip generic --board rv32wallyverilog --ldflags="-nostdlib -nostartfiles ../../../config/riscv32/boards/rv32wallyverilog/startup/dummy.S" --cflags="-Os -msave-restore" --dummy-libs="libgcc libm libc crt0"
# builds dependencies, then launches modelsim and finally runs python wrapper script to present results
sim: modelsim_build_memfile modelsim_run speed
@ -28,35 +37,37 @@ modelsim_run:
cd ../../benchmarks/embench/
# builds the objdump based on the compiled c elf files
objdump: buildspeed
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
objdump:
find $(embench_dir)/bd_*_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
# build memfiles, objdump.lab and objdump.addr files
modelsim_build_memfile: objdump
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find $(embench_dir)/bd_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
find $(embench_dir)/bd_*_speed/ -type f -name "*.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find $(embench_dir)/bd_*_speed/ -type f -name "*.elf.objdump" | while read f; do extractFunctionRadix.sh $$f; done
# builds the tests for speed, runs them on spike and then launches python script to present results
# note that the speed python script benchmark_speed.py can get confused if there's both a .output file created from spike and modelsim
# you'll need to manually remove one of the two .output files, or run make clean
spike: buildspeed objdump spike_run speed
spike: buildspeed spike_run speed
# command to run spike on all of the benchmarks
spike_run:
find $(embench_dir)/bd_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
find $(embench_dir)/bd_*opt_speed/ -type f -name "*.elf" | while read f; do spike --isa=rv32imac +signature=$$f.spike.output +signature-granularity=4 $$f; done
# python wrapper to present results of embench size benchmark
size: buildsize
$(embench_dir)/benchmark_size.py --builddir=bd_size --json-output > wallySize.json
$(embench_dir)/benchmark_size.py --builddir=bd_speedopt_size --json-output > wallySpeedOpt_size.json
$(embench_dir)/benchmark_size.py --builddir=bd_sizeopt_size --json-output > wallySizeOpt_size.json
# python wrapper to present results of embench speed benchmark
speed:
$(embench_dir)/benchmark_speed.py --builddir=bd_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySpeed.json
$(embench_dir)/benchmark_speed.py --builddir=bd_sizeopt_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySizeOpt_speed.json
$(embench_dir)/benchmark_speed.py --builddir=bd_speedopt_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySpeedOpt_speed.json
# deletes all files
clean:
rm -rf $(embench_dir)/bd_speed/
rm -rf $(embench_dir)/bd_size/
rm -rf $(embench_dir)/bd_*_speed/
rm -rf $(embench_dir)/bd_*_size/
allclean: clean
rm -rf $(embench_dir)/logs/

View File

@ -3,9 +3,8 @@ import subprocess
import sys
import json
import plotly.graph_objects as go
from plotly.subplots import make_subplots
coremarkData = {}
embenchData = {}
debug = True
def loadCoremark():
@ -21,61 +20,85 @@ def loadCoremark():
if (debug): print(coremarkData)
return coremarkData
def loadEmbench():
def loadEmbench(embenchPath, embenchData):
"""loads the embench data dictionary"""
embenchPath = "embench/wallySpeed.json"
f = open(embenchPath)
embenchData = json.load(f)
if (debug): print(embenchData)
return embenchData
def graphEmbench(embenchData):
ydata = list(embenchData["speed results"]["detailed speed results"].keys()) + ["speed geometric mean","speed geometric sd","speed geometric range"]
xdata = list(embenchData["speed results"]["detailed speed results"].values()) + [embenchData["speed results"]["speed geometric mean"],embenchData["speed results"]["speed geometric sd"],embenchData["speed results"]["speed geometric range"]]
fig = go.Figure(go.Bar(
def graphEmbench(embenchSpeedOpt_SpeedData, embenchSizeOpt_SpeedData, embenchSpeedOpt_SizeData, embenchSizeOpt_SizeData):
fig = make_subplots(rows=2, cols=4,
# subplot_titles( "Wally's Embench Cycles and Instret (with -O2)","Wally's Embench Cycles Per Instruction (with -O2)"))
subplot_titles=( "Wally's Embench Cycles and Instret (with -O2)","Wally's Embench Cycles Per Instruction (with -O2)","Wally's Embench Speed Score (with -O2)","Wally's Embench Size Score (with -O2)",
"Wally's Embench Cycles and Instret (with -Os)","Wally's Embench Cycles Per Instruction (with -Os)","Wally's Embench Speed Score (with -Os)","Wally's Embench Size Score (with -Os)"))
ydata = list(embenchSpeedOpt_SpeedData["speed results"]["detailed speed results"].keys()) + ["speed geometric mean","speed geometric sd","speed geometric range"]
xdata = list(embenchSpeedOpt_SpeedData["speed results"]["detailed speed results"].values()) + [embenchSpeedOpt_SpeedData["speed results"]["speed geometric mean"],embenchSpeedOpt_SpeedData["speed results"]["speed geometric sd"],embenchSpeedOpt_SpeedData["speed results"]["speed geometric range"]]
fig.add_trace( go.Bar(
y=ydata,
x=xdata,
orientation='h'))
textposition='outside', text=xdata,
orientation='h'),
row=1,col=3)
fig.show()
ydata = list(embenchSizeOpt_SpeedData["speed results"]["detailed speed results"].keys()) + ["speed geometric mean","speed geometric sd","speed geometric range"]
xdata = list(embenchSizeOpt_SpeedData["speed results"]["detailed speed results"].values()) + [embenchSizeOpt_SpeedData["speed results"]["speed geometric mean"],embenchSizeOpt_SpeedData["speed results"]["speed geometric sd"],embenchSizeOpt_SpeedData["speed results"]["speed geometric range"]]
fig.add_trace( go.Bar(
y=ydata,
x=xdata,
textposition='outside', text=xdata,
orientation='h'),
row=2,col=3)
ydata = list(embenchSpeedOpt_SizeData["size results"]["detailed size results"].keys()) + ["size geometric mean","size geometric sd","size geometric range"]
xdata = list(embenchSpeedOpt_SizeData["size results"]["detailed size results"].values()) + [embenchSpeedOpt_SizeData["size results"]["size geometric mean"],embenchSpeedOpt_SizeData["size results"]["size geometric sd"],embenchSpeedOpt_SizeData["size results"]["size geometric range"]]
fig.add_trace( go.Bar(
y=ydata,
x=xdata,
textposition='outside', text=xdata,
orientation='h'),
row=1,col=4)
ydata = list(embenchSizeOpt_SizeData["size results"]["detailed size results"].keys()) + ["size geometric mean","size geometric sd","size geometric range"]
xdata = list(embenchSizeOpt_SizeData["size results"]["detailed size results"].values()) + [embenchSizeOpt_SizeData["size results"]["size geometric mean"],embenchSizeOpt_SizeData["size results"]["size geometric sd"],embenchSizeOpt_SizeData["size results"]["size geometric range"]]
fig.add_trace( go.Bar(
y=ydata,
x=xdata,
textposition='outside', text=xdata,
orientation='h'),
row=2,col=4)
# facet_row="Score", facet_col="Optimization Flag",
# category_orders={"Score": ["Cycles & Instr", "CPI", "SpeedScore", "SizeScore"],
# "Optimization Flag": ["O2", "Os"]}),
# orientation='h')
fig.update_layout(height=1500,width=4000, title_text="Wally Embench Scores", showlegend=False)
fig.write_image("figure.png", engine="kaleido")
# fig.show()
def main():
coremarkData = loadCoremark()
embenchData = loadEmbench()
graphEmbench(embenchData)
coremarkData = {}
embenchSizeOpt_SpeedData = {}
embenchSpeedOpt_SpeedData = {}
embenchSizeOpt_SizeData = {}
embenchSpeedOpt_SizeData = {}
# coremarkData = loadCoremark()
embenchSpeedOpt_SpeedData = loadEmbench("embench/wallySpeedOpt_speed.json", embenchSpeedOpt_SpeedData)
embenchSizeOpt_SpeedData = loadEmbench("embench/wallySizeOpt_speed.json", embenchSizeOpt_SpeedData)
embenchSpeedOpt_SizeData = loadEmbench("embench/wallySpeedOpt_size.json", embenchSpeedOpt_SizeData)
embenchSizeOpt_SizeData = loadEmbench("embench/wallySizeOpt_size.json", embenchSizeOpt_SizeData)
graphEmbench(embenchSpeedOpt_SpeedData, embenchSizeOpt_SpeedData, embenchSpeedOpt_SizeData, embenchSizeOpt_SizeData)
if __name__ == '__main__':
sys.exit(main())
# x =
# y =
# df = px.data.tips()
# fig = px.bar(df, x="total_bill", y="day", orientation='h')
# fig.show()
# import plotly.express as px
# result = sp.run(['ls', '-l'], stdout=sp.PIPE)
# result.stdout
# fig = go.Figure( go.Bar(
# x=[],
# y=[],
# color="species",
# facet_col="species",
# title="Using update_traces() With Plotly Express Figures"),
# orientation='h')
# fig.show()
#
# "ls -Art ../addins/embench-iot/logs/*speed* | tail -n 1 " # gets most recent embench speed log
# "ls -Art ../addins/embench-iot/logs/*size* | tail -n 1 " # gets most recent embench speed log
## get coremark score
# cat coremarkPath | grep "CoreMark 1.0" | cut -d ':' -f 2 | cut -d " " -f 2
# cat coremarkPath | grep "MTIME" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1
# cat coremarkPath | grep "MINSTRET" | cut -d ':' -f 2 | cut -d " " -f 2 | tail -1
# "ls -Art ../addins/embench-iot/logs/*speed* | tail -n 1 " # gets most recent embench speed log

View File

@ -95,6 +95,7 @@
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))

View File

@ -3,7 +3,7 @@ make allclean:
make all
make clean:
make clean -C ../../addins/riscv-arch-test
make clean -C ../../tests/riscof
make clean -C ../../tests/wally-riscv-arch-test
# make allclean -C ../../tests/imperas-riscv-tests
@ -15,8 +15,8 @@ make all:
#make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs
# Build riscv-arch-test 64 and 32-bit versions
make -C ../../addins/riscv-arch-test --jobs
make -C ../../addins/riscv-arch-test XLEN=32 --jobs
make -C ../../tests/riscof/ --jobs
make -C ../../tests/riscof/ XLEN=32 --jobs
# Build wally-riscv-arch-test
make -C ../../tests/wally-riscv-arch-test/ --jobs

View File

@ -1,6 +1,6 @@
ROOT := ../..
SUFFIX := work
ARCHDIR := $(ROOT)/addins/riscv-arch-test
ARCHDIR := $(ROOT)/tests/riscof
WALLYDIR:= $(ROOT)/tests/wally-riscv-arch-test
# IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests
# ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) $(IMPERASDIR)/$(SUFFIX)

View File

@ -121,11 +121,11 @@ module fctrl (
assign FmtD = 0;
else if (`FPSIZES == 2)begin
logic [1:0] FmtTmp;
assign FmtTmp = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = (`FMT == FmtTmp);
end
else if (`FPSIZES == 3|`FPSIZES == 4)
assign FmtD = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
// Final Res Sel:
// fp int

View File

@ -34,13 +34,14 @@ module fpu (
input logic reset,
input logic [2:0] FRM_REGW, // Rounding mode from CSR
input logic [31:0] InstrD, // instruction from IFU
input logic [`XLEN-1:0] ReadDataW,// Read data from memory
input logic [`FLEN-1:0] ReadDataW,// Read data from memory
input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU)
input logic StallE, StallM, StallW, // stall signals from HZU
input logic FlushE, FlushM, FlushW, // flush signals from HZU
input logic [4:0] RdM, RdW, // which FP register to write to (from IEU)
input logic [1:0] STATUS_FS, // Is floating-point enabled?
output logic FRegWriteM, // FP register write enable
output logic FpLoadM, // Fp load instruction?
output logic FStallD, // Stall the decode stage
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
@ -348,6 +349,8 @@ module fpu (
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
assign FpLoadM = FResSelM[1];
postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM,
@ -378,21 +381,7 @@ module fpu (
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
generate
if(`FPSIZES == 1) assign ReadResW = {{`FLEN-`XLEN{1'b1}}, ReadDataW};
else if(`FPSIZES == 2)
mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ReadDataW[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW);
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ReadDataW[`S_LEN-1:0]},
{{`FLEN-`D_LEN{1'b1}}, ReadDataW[`D_LEN-1:0]},
{{`FLEN-`H_LEN{1'b1}}, ReadDataW[`H_LEN-1:0]},
{{`FLEN-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); // NaN boxing zeroes
endgenerate
// select the result to be written to the FP register
mux2 #(`FLEN) FPUResultMux (FpResW, ReadResW, FResSelW[1], FPUResultW);
mux2 #(`FLEN) FPUResultMux (FpResW, ReadDataW, FResSelW[1], FPUResultW);
endmodule // fpu

View File

@ -63,9 +63,9 @@ module datapath (
input logic [2:0] ResultSrcW,
input logic [`XLEN-1:0] FCvtIntResW,
input logic [1:0] FResSelW,
output logic [`XLEN-1:0] ReadDataW,
input logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
// Hazard Unit signals
output logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E,
output logic [4:0] RdE, RdM, RdW
@ -121,7 +121,6 @@ module datapath (
// Writeback stage pipeline register and logic
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
flopen #(`XLEN) ReadDataWReg(clk, ~StallW, ReadDataM, ReadDataW);
// floating point interactions: fcvt, fp stores
if (`F_SUPPORTED) begin:fpmux

View File

@ -60,11 +60,11 @@ module ieu (
output logic InvalidateICacheM, FlushDCacheM,
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
input logic [`XLEN-1:0] CSRReadValW, MDUResultW,
input logic [1:0] FResSelW,
input logic [`XLEN-1:0] FCvtIntResW,
output logic [4:0] RdW,
output logic [`XLEN-1:0] ReadDataW,
input logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
output logic InstrValidM,
// hazards
@ -109,7 +109,7 @@ module ieu (
.FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .ReadDataM, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
.CSRReadValW, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
forward fw(
.Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW,

View File

@ -30,12 +30,32 @@
`include "wally-config.vh"
module bigendianswap (
module bigendianswap #(parameter LEN=`XLEN) (
input logic BigEndianM,
input logic [`XLEN-1:0] a,
output logic [`XLEN-1:0] y);
input logic [LEN-1:0] a,
output logic [LEN-1:0] y);
if(`XLEN == 64) begin
if(LEN == 128) begin
always_comb
if (BigEndianM) begin // swap endianness
y[127:120] = a[7:0];
y[119:112] = a[15:8];
y[111:104] = a[23:16];
y[103:96] = a[31:24];
y[95:88] = a[39:32];
y[87:80] = a[47:40];
y[79:72] = a[55:48];
y[71:64] = a[63:56];
y[63:56] = a[71:64];
y[55:48] = a[79:72];
y[47:40] = a[87:80];
y[39:32] = a[95:88];
y[31:24] = a[103:96];
y[23:16] = a[111:104];
y[15:8] = a[119:112];
y[7:0] = a[127:120];
end else y = a;
end else if(LEN == 64) begin
always_comb
if (BigEndianM) begin // swap endianness
y[63:56] = a[7:0];

View File

@ -51,11 +51,13 @@ module lsu (
input logic [`XLEN-1:0] IEUAdrE,
(* mark_debug = "true" *)output logic [`XLEN-1:0] IEUAdrM,
input logic [`XLEN-1:0] WriteDataE,
output logic [`XLEN-1:0] ReadDataM,
output logic [`LLEN-1:0] ReadDataW,
// cpu privilege
input logic [1:0] PrivilegeModeW,
input logic BigEndianM,
input logic sfencevmaM,
// fpu
input logic FpLoadM,
// faults
output logic LoadPageFaultM, StoreAmoPageFaultM,
output logic LoadMisalignedFaultM, LoadAccessFaultM,
@ -110,6 +112,7 @@ module lsu (
logic [`XLEN-1:0] LSUWriteDataM;
logic [(`XLEN-1)/8:0] ByteMaskM;
logic [`XLEN-1:0] WriteDataM;
logic [`LLEN-1:0] ReadDataM;
// *** TO DO: Burst mode
@ -128,7 +131,7 @@ module lsu (
.DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM,
.TrapM, .DCacheStallM, .SATP_REGW, .PCF,
.STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW,
.ReadDataM, .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M,
.ReadDataM(ReadDataM[`XLEN-1:0]), .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M,
.IEUAdrExtM, .PTE, .LSUWriteDataM, .PageType, .PreLSURWM, .LSUAtomicM, .IEUAdrE,
.LSUAdrE, .PreLSUPAdrM, .CPUBusy, .InterlockStall, .SelHPTW,
.IgnoreRequestTLB, .IgnoreRequestTrapM);
@ -187,8 +190,8 @@ module lsu (
// Either Data Cache or Data Tightly Integrated Memory or just bus interface
/////////////////////////////////////////////////////////////////////////////////////////////
logic [`XLEN-1:0] AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM;
logic [`XLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM;
logic [`XLEN-1:0] ReadDataWordMuxM;
logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM;
logic [`LLEN-1:0] ReadDataWordMuxM;
logic IgnoreRequest;
logic SelUncachedAdr;
assign IgnoreRequest = IgnoreRequestTLB | IgnoreRequestTrapM;
@ -197,7 +200,7 @@ module lsu (
// *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW.
// Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops
dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM,
.ReadDataWordM, .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
.ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM,
.DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM),
.DCacheMiss, .DCacheAccess);
end
@ -222,14 +225,14 @@ module lsu (
.SelUncachedAdr, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM,
.BusStall, .BusCommittedM);
mux2 #(`XLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1(DCacheBusWriteData[`XLEN-1:0]),
mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}),
.s(SelUncachedAdr), .y(ReadDataWordMuxM));
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM), .d1(FinalWriteDataM),
mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM),
.s(SelUncachedAdr), .y(LSUBusHWDATA));
if(CACHE_ENABLED) begin : dcache
cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`XLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache(
.clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM),
.FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM),
.ByteMask(ByteMaskM), .WordCount,
@ -253,7 +256,7 @@ module lsu (
// Atomic operations
/////////////////////////////////////////////////////////////////////////////////////////////
if (`A_SUPPORTED) begin:atomic
atomic atomic(.clk, .reset, .StallW, .ReadDataM, .LSUWriteDataM, .LSUPAdrM,
atomic atomic(.clk, .reset, .StallW, .ReadDataM(ReadDataM[`XLEN-1:0]), .LSUWriteDataM, .LSUPAdrM,
.LSUFunct7M, .LSUFunct3M, .LSUAtomicM, .PreLSURWM, .IgnoreRequest,
.AMOWriteDataM, .SquashSCW, .LSURWM);
end else begin:lrsc
@ -266,7 +269,13 @@ module lsu (
subwordwrite subwordwrite(.LSUPAdrM(LSUPAdrM[2:0]),
.LSUFunct3M, .AMOWriteDataM, .LittleEndianWriteDataM, .ByteMaskM);
subwordread subwordread(.ReadDataWordMuxM, .LSUPAdrM(LSUPAdrM[2:0]),
.Funct3M(LSUFunct3M), .ReadDataM);
.FpLoadM, .Funct3M(LSUFunct3M), .ReadDataM);
/////////////////////////////////////////////////////////////////////////////////////////////
// MW Pipeline Register
/////////////////////////////////////////////////////////////////////////////////////////////
flopen #(`LLEN) ReadDataMWReg(clk, ~StallW, ReadDataM, ReadDataW);
/////////////////////////////////////////////////////////////////////////////////////////////
// Big Endian Byte Swapper
@ -274,8 +283,8 @@ module lsu (
// swap the bytes when read from big-endian memory
/////////////////////////////////////////////////////////////////////////////////////////////
if (`BIGENDIAN_SUPPORTED) begin:endian
bigendianswap storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM));
bigendianswap loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM));
bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM));
end else begin
assign FinalWriteDataM = LittleEndianWriteDataM;
assign LittleEndianReadDataWordM = ReadDataWordM;

View File

@ -32,10 +32,11 @@
module subwordread
(
input logic [`XLEN-1:0] ReadDataWordMuxM,
input logic [`LLEN-1:0] ReadDataWordMuxM,
input logic [2:0] LSUPAdrM,
input logic [2:0] Funct3M,
output logic [`XLEN-1:0] ReadDataM
input logic FpLoadM,
output logic [`LLEN-1:0] ReadDataM
);
logic [7:0] ByteM;
@ -74,18 +75,31 @@ module subwordread
1'b1: WordM = ReadDataWordMuxM[63:32];
endcase
// sign extension
logic [63:0] DblWordM;
assign DblWordM = ReadDataWordMuxM[63:0];
// sign extension/ NaN boxing
always_comb
case(Funct3M)
3'b000: ReadDataM = {{56{ByteM[7]}}, ByteM}; // lb
3'b001: ReadDataM = {{48{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: ReadDataM = {{32{WordM[31]}}, WordM[31:0]}; // lw
3'b011: ReadDataM = ReadDataWordMuxM; // ld
3'b100: ReadDataM = {56'b0, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {48'b0, HalfwordM[15:0]}; // lhu
3'b110: ReadDataM = {32'b0, WordM[31:0]}; // lwu
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: if(`ZFH_SUPPORTED)
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: if(`F_SUPPORTED)
ReadDataM = {{`LLEN-32{WordM[31]|FpLoadM}}, WordM[31:0]}; // lw/flw
else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]}; // lw
3'b011: if(`D_SUPPORTED)
ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadM}}, DblWordM[63:0]}; // ld/fld
else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]}; // ld/fld
3'b100: if(`Q_SUPPORTED)
ReadDataM = FpLoadM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq
else
ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
3'b110: ReadDataM = {{`LLEN-32{1'b0}}, WordM[31:0]}; // lwu
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
endcase
end else begin:swrmux // 32-bit
// byte mux
always_comb
@ -105,13 +119,18 @@ module subwordread
// sign extension
always_comb
case(Funct3M)
3'b000: ReadDataM = {{24{ByteM[7]}}, ByteM}; // lb
3'b001: ReadDataM = {{16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: ReadDataM = ReadDataWordMuxM; // lw
3'b100: ReadDataM = {24'b0, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {16'b0, HalfwordM[15:0]}; // lhu
default: ReadDataM = ReadDataWordMuxM;
case(Funct3M)
3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: if(`ZFH_SUPPORTED)
ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadM}}, HalfwordM[15:0]}; // lh/flh
else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh
3'b010: if(`F_SUPPORTED)
ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadM}}, ReadDataWordMuxM[31:0]}; // lw/flw
else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; // lw
3'b011: ReadDataM = ReadDataWordMuxM; // fld
3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
endcase
end
endmodule

View File

@ -98,6 +98,7 @@ module wallypipelinedcore (
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FRegWriteM;
logic FPUStallD;
logic FpLoadM;
logic [1:0] FResSelW;
logic [4:0] SetFflagsM;
@ -128,8 +129,7 @@ module wallypipelinedcore (
logic [`XLEN-1:0] IEUAdrE;
(* mark_debug = "true" *) logic [`XLEN-1:0] WriteDataE;
(* mark_debug = "true" *) logic [`XLEN-1:0] IEUAdrM;
(* mark_debug = "true" *) logic [`XLEN-1:0] ReadDataM;
logic [`XLEN-1:0] ReadDataW;
logic [`LLEN-1:0] ReadDataW;
logic CommittedM;
// AHB ifu interface
@ -229,8 +229,8 @@ module wallypipelinedcore (
.RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM,
// Writeback stage
.CSRReadValW, .ReadDataM, .MDUResultW,
.RdW, .ReadDataW,
.CSRReadValW, .MDUResultW,
.RdW, .ReadDataW(ReadDataW[`XLEN-1:0]),
.InstrValidM,
.FCvtIntResW,
.FResSelW,
@ -253,9 +253,10 @@ module wallypipelinedcore (
.AtomicM, .TrapM,
.CommittedM, .DCacheMiss, .DCacheAccess,
.SquashSCW,
.FpLoadM,
//.DataMisalignedM(DataMisalignedM),
.IEUAdrE, .IEUAdrM, .WriteDataE,
.ReadDataM, .FlushDCacheM,
.ReadDataW, .FlushDCacheM,
// connected to ahb (all stay the same)
.LSUBusAdr, .LSUBusRead, .LSUBusWrite, .LSUBusAck, .LSUBusInit,
.LSUBusHRDATA, .LSUBusHWDATA, .LSUBusSize, .LSUBurstType, .LSUTransType, .LSUTransComplete,
@ -383,13 +384,14 @@ module wallypipelinedcore (
.clk, .reset,
.FRM_REGW, // Rounding mode from CSR
.InstrD, // instruction from IFU
.ReadDataW,// Read data from memory
.ReadDataW(ReadDataW[`FLEN-1:0]),// Read data from memory
.ForwardedSrcAE, // Integer input being processed (from IEU)
.StallE, .StallM, .StallW, // stall signals from HZU
.FlushE, .FlushM, .FlushW, // flush signals from HZU
.RdM, .RdW, // which FP register to write to (from IEU)
.STATUS_FS, // is floating-point enabled?
.FRegWriteM, // FP register write enable
.FpLoadM,
.FStallD, // Stall the decode stage
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory

View File

@ -1 +1,2 @@
verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv
verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv

1048
pipelined/srt/qsel4.sv Normal file

File diff suppressed because it is too large Load Diff

2
pipelined/srt/sim-srt4 Executable file
View File

@ -0,0 +1,2 @@
vsim -do "do srt-radix4.do"

1
pipelined/srt/sim-srt4-batch Executable file
View File

@ -0,0 +1 @@
vsim -c -do "do srt-radix4.do"

View File

@ -0,0 +1,31 @@
# srt.do
#
# David_Harris@hmc.edu 19 October 2021
# Use this wally-pipelined.do file to run this example.
# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
# do wally-pipelined.do
# or, to run from a shell, type the following at the shell prompt:
# vsim -do wally-pipelined.do -c
# (omit the "-c" to see the GUI while running from the shell)
onbreak {resume}
# create library
if [file exists work] {
vdel -all
}
vlib work
vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv qsel4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
vopt +acc work.testbenchradix4 -o workopt
vsim workopt
-- display input and output signals as hexidecimal values
add wave /testbenchradix4/*
add wave /testbenchradix4/srtradix4/*
add wave /testbenchradix4/srtradix4/qsel4/*
add wave /testbenchradix4/srtradix4/otfc4/*
-- Run the Simulation
run -all

323
pipelined/srt/srt-radix4.sv Normal file
View File

@ -0,0 +1,323 @@
///////////////////////////////////////////
// srt.sv
//
// Written: David_Harris@hmc.edu 13 January 2022
// Modified:
//
// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF)
module srtradix4 (
input logic clk,
input logic Start,
input logic Stall, // *** multiple pipe stages
input logic Flush, // *** multiple pipe stages
// Floating Point Inputs
// later add exponents, signs, special cases
input logic XSign, YSign,
input logic [`NE-1:0] XExp, YExp,
input logic [`NF-1:0] XFrac, YFrac,
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic rsign,
output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers
output logic [`NE-1:0] rExp,
output logic [3:0] Flags
);
// logic qp, qz, qm; // quotient is +1, 0, or -1
logic [3:0] q;
logic [`NE-1:0] calcExp;
logic calcSign;
logic [`DIVLEN-1:0] X, Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN;
logic [`DIVLEN+3:0] WC, WCA, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
// Otherwise, the divisor is retained and the partial remainder
// is fed back for the next iteration.
// - assumed one is added here since all numbers are normlaized
// *** wait what about zero? is that specal case? can the divider handle it?
// - when the start signal is asserted X and 0 are loaded into WS and WC
// - otherwise load WSA into the flipflop
// *** what does N and A stand for?
// *** change shift amount for radix4
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, Start, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, Start, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D);
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
// *** change this for radix 4 - generate w/ stine code
// q encoding:
// 1000 = +2
// 0100 = +1
// 0000 = 0
// 0010 = -1
// 0001 = -2
qsel4 qsel4(.D, .WS, .WC, .q);
// Store the expoenent and sign until division is done
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
flopen #(1) signflop(clk, Start, calcSign, rsign);
// Divisor Selection logic
// *** radix 4 change to choose -2 to 2
// - choose the negitive version of what's being selected
assign DBar = ~D;
assign DBar2 = {~D[`DIVLEN+2:0], 1'b1};
assign D2 = {D[`DIVLEN+2:0], 1'b0};
always_comb
case (q)
4'b1000: Dsel = DBar2;
4'b0100: Dsel = DBar;
4'b0000: Dsel = {(`DIVLEN+4){1'b0}};
4'b0010: Dsel = D;
4'b0001: Dsel = D2;
default: Dsel = {`DIVLEN+4{1'bx}};
endcase
// Partial Product Generation
// WSA, WCA = WS + WC - qD
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
//*** change for radix 4
otfc4 #(`DIVLEN) otfc4(clk, Start, q, Quot);
expcalc expcalc(.XExp, .YExp, .calcExp);
signcalc signcalc(.XSign, .YSign, .calcSign);
endmodule
////////////////
// Submodules //
////////////////
///////////////////
// Preprocessing //
///////////////////
module srtpreproc (
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [`NF-1:0] XFrac, YFrac,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN-1:0] X, D,
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
output logic intSign // Quotient integer sign
);
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
logic [`XLEN-1:0] PosA, PosB;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
assign PreprocA = ExtraA << zeroCntA;
assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}};
assign X = Int ? PreprocA : PreprocX;
assign D = Int ? PreprocB : PreprocY;
assign intExp = zeroCntB - zeroCntA + 1;
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
endmodule
/////////////////////////////////
// Quotient Selection, Radix 2 //
/////////////////////////////////
module qsel2 ( // *** eventually just change to 4 bits
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz, qm
);
logic [`DIVLEN+3:`DIVLEN] p, g;
logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
// Quotient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign #1 sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
assign #1 sign = (ps[55]^pc[55])^
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
(ps[52]&pc[52]))))); */
// Produce quotient = +1, 0, or -1
assign #1 qp = magnitude & ~sign;
assign #1 qz = ~magnitude;
assign #1 qm = magnitude & sign;
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc4 #(parameter N=65) (
input logic clk,
input logic Start,
input logic [3:0] q,
output logic [N-1:0] r
);
// The on-the-fly converter transfers the quotient
// bits to the quotient as they come.
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-2 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N:0] QR, QMR;
// if starting a new divison set Q to 0 and QM to -1
mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, Start, QMux);
mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, Start, QMMux);
flop #(N+3) Qreg(clk, QMux, Q);
flop #(N+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
// else if q = 1 Q = {Q, 01} QM = {Q, 00}
// else if q = 0 Q = {Q, 00} QM = {QM, 11}
// else if q = -1 Q = {QM, 11} QM = {QM, 10}
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
// *** how does the 0 concatination numbers work?
always_comb begin
QR = Q[N:0];
QMR = QM[N:0]; // Shift Q and QM
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
end else if (q[2]) begin // +1
QNext = {QR, 2'b01};
QMNext = {QR, 2'b00};
end else if (q[1]) begin // -1
QNext = {QMR, 2'b11};
QMNext = {QMR, 2'b10};
end else if (q[0]) begin // -2
QNext = {QMR, 2'b10};
QMNext = {QMR, 2'b01};
end else begin // 0
QNext = {QR, 2'b00};
QMNext = {QMR, 2'b11};
end
end
assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
endmodule
/////////
// csa //
/////////
module csa #(parameter N=69) (
input logic [N-1:0] in1, in2, in3,
input logic cin,
output logic [N-1:0] out1, out2
);
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is required to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
assign #1 out1 = in1 ^ in2 ^ in3;
assign #1 out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) |
(in2[N-2:0] & in3[N-2:0]), cin};
endmodule
//////////////
// expcalc //
//////////////
module expcalc(
input logic [`NE-1:0] XExp, YExp,
output logic [`NE-1:0] calcExp
);
assign calcExp = XExp - YExp + (`NE)'(`BIAS);
endmodule
//////////////
// signcalc //
//////////////
module signcalc(
input logic XSign, YSign,
output logic calcSign
);
assign calcSign = XSign ^ YSign;
endmodule

View File

@ -1 +1,3 @@
add wave -noupdate /testbench/clk
add wave -noupdate /testbench/*
add wave -noupdate /testbench/srt/*
add wave -noupdate /testbench/srt/otfc2/*

13
pipelined/srt/stine/notes Normal file
View File

@ -0,0 +1,13 @@
Dividend x --(0.10101111), divisord --(0.11000101)(i -- 16(0.1100)2- 12)
X = 175 (xAF)
D = 197 (xC5)
X = 175/256 = 0.68359375
D = 197/256 = 0.76953125
Note: Add lg(r) extra iterations due to shifting of computed q
q_{computed} = q / radix
./srt4div 0.68359375 0.76953125 4 10

Binary file not shown.

View File

@ -1,83 +1,45 @@
#include "disp.h"
#include <math.h>
// QSLC is for division by recuerrence for
// r=4 using a CPA - See Table 5.9 EL
int qslc (double prem, double d) {
int q;
// For Debugging
printf("d --> %lg\n", d);
printf("rw --> %lg\n", prem);
if ((d>=0.0)&&(d<1.0)) {
if (prem>=1.0)
q = 2;
else if (prem>=0.25)
q = 1;
else if (prem>=-0.25)
q = 0;
else if (prem >= -1)
q = -1;
else
q = -2;
return q;
}
if ((d>=1.0)&&(d<2.0)) {
if (prem>=2.0)
q = 2;
else if (prem>=0.66667)
q = 1;
else if (prem>=-0.6667)
q = 0;
else if (prem >= -2)
q = -1;
else
q = -2;
return q;
}
if ((d>=2.0)&&(d<3.0)) {
if (prem>=4.0)
q = 2;
else if (prem>=1.25)
q = 1;
else if (prem>=-1.25)
q = 0;
else if (prem >= -4)
q = -1;
else
q = -2;
return q;
}
if ((d>=3.0)&&(d<4.0)) {
if (prem>=5.0)
if ((d>=8.0)&&(d<9.0)) {
if (prem>=6.0)
q = 2;
else if (prem>=2.0)
q = 1;
else if (prem>=-2.0)
q = 0;
else if (prem >= -5)
else if (prem >= -6)
q = -1;
else
q = -2;
return q;
}
if ((d>=4.0)&&(d<5.0)) {
if (prem>=6.66667)
if ((d>=9.0)&&(d<10.0)) {
if (prem>=7)
q = 2;
else if (prem>=2.0)
q = 1;
else if (prem>=-2.0)
q = 0;
else if (prem >= -6.66667)
else if (prem >= 7.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=5.0)&&(d<6.0)) {
if ((d>=10.0)&&(d<11.0)) {
if (prem>=8.0)
q = 2;
else if (prem>=2.0)
@ -91,7 +53,21 @@ int qslc (double prem, double d) {
return q;
}
if ((d>=6.0)&&(d<7.0)) {
if ((d>=11.0)&&(d<12.0)) {
if (prem>=8.0)
q = 2;
else if (prem>=2.0)
q = 1;
else if (prem>=-2.0)
q = 0;
else if (prem >= -8.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=12.0)&&(d<13.0)) {
if (prem>=10.0)
q = 2;
else if (prem>=4.0)
@ -105,21 +81,35 @@ int qslc (double prem, double d) {
return q;
}
if ((d>=7.0)&&(d<8.0)) {
if (prem>=11.0)
if ((d>=13.0)&&(d<14.0)) {
if (prem>=10.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -11.0)
else if (prem >= -10.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=8.0)&&(d<9.0)) {
if ((d>=14.0)&&(d<15.0)) {
if (prem>=10.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -10.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=15.0)&&(d<16.0)) {
if (prem>=12.0)
q = 2;
else if (prem>=4.0)
@ -133,106 +123,9 @@ int qslc (double prem, double d) {
return q;
}
if ((d>=9.0)&&(d<10.0)) {
if (prem>=15.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -15.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=10.0)&&(d<11.0)) {
if (prem>=15.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -15.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=11.0)&&(d<12.0)) {
if (prem>=16.0)
q = 2;
else if (prem>=4.0)
q = 1;
else if (prem>=-4.0)
q = 0;
else if (prem >= -16.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=12.0)&&(d<13.0)) {
if (prem>=20.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -20.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=13.0)&&(d<14.0)) {
if (prem>=20.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -20.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=14.0)&&(d<15.0)) {
if (prem>=20.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -20.0)
q = -1;
else
q = -2;
return q;
}
if ((d>=15.0)&&(d<16.0)) {
if (prem>=24.0)
q = 2;
else if (prem>=8.0)
q = 1;
else if (prem>=-8.0)
q = 0;
else if (prem >= -24.0)
q = -1;
else
q = -2;
return q;
}
}
/*
This routine performs a radix-4 SRT division
algorithm. The user inputs the numerator, the denominator,
@ -246,6 +139,8 @@ int main(int argc, char* argv[]) {
int q;
int num_iter, i;
int prec;
int radix = 4;
if (argc < 5) {
fprintf(stderr,
"Usage: %s numerator denominator num_iterations prec\n",
@ -267,27 +162,29 @@ int main(int argc, char* argv[]) {
printf("\n");
Q = 0;
P = N*0.25;
P = N * pow(2.0, -log2(radix));
printf("N = %lg, D = %lg, N/D = %lg, num_iter = %d \n\n",
N, D, N/D, num_iter);
for (scale = 1, i = 0; i < num_iter; i++) {
// Shift by r
scale = scale*0.25;
q = qslc(flr((4*P)*16,3), D*16);
//q = -q;
scale = scale * pow(2.0, -log2(radix));
// (4*P)*8 because of footnote in Table 5.9, page 296 EL
// i.e., real value = shown value / 8
// D*16 since we use 4 bits of D (1 bit known)
q = qslc(flr((radix * P) * 8, 3), D*16);
printf("4*W[n] = ");
disp_bin(4*P,3,prec,stdout);
disp_bin(radix*P, 3, prec, stdout);
printf("\n");
printf("q*D = ");
disp_bin(q*D,3,prec,stdout);
disp_bin(q*D, 3, prec, stdout);
printf("\n");
printf("W[n+1] = ");
disp_bin(P ,3,prec,stdout);
disp_bin(P ,3, prec, stdout);
printf("\n");
// Recurrence
P = 4*P - q*D;
P = radix * P - q * D;
// OTFC
Q = Q + q*scale;
Q = Q + q * scale;
printf("i = %d, q = %d, Q = %1.18lf, W = %1.18lf\n", i, q, Q, P);
printf("i = %d, q = %d", i, q);
printf(", Q = ");
@ -296,8 +193,9 @@ int main(int argc, char* argv[]) {
disp_bin(P, 3, prec, stdout);
printf("\n\n");
}
// Is shifted partial remainder negative?
if (P < 0) {
Q = Q - scale;
Q = Q - pow(2.0, -prec);
P = P + D;
printf("\nCorrecting Negative Remainder\n");
printf("Q = %1.18lf, W = %1.18lf\n", Q, P);
@ -306,9 +204,12 @@ int main(int argc, char* argv[]) {
printf(", W = ");
disp_bin(P, 3, prec, stdout);
printf("\n");
}
RQ = flr(N/D, (double) prec);
RD = Q*4;
}
// Output Results
RQ = N/D;
// Since q_{computed} = q / radix, multiply by radix
RD = Q * radix;
printf("true = %1.18lf, computed = %1.18lf, \n", RQ, RD);
printf("true = ");
disp_bin(RQ, 3, prec, stdout);

View File

@ -0,0 +1,148 @@
`include "wally-config.vh"
`define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF)
/////////////
// counter //
/////////////
module counter(input logic clk,
input logic req,
output logic done);
logic [5:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
// build a divider which completes in fewer iterations.
// You are not responsible for the (trivial) circuit
// design of the block.
always @(posedge clk)
begin
if (count == `DIVLEN/2+1) done <= #1 1;
else if (done | req) done <= #1 0;
if (req) count <= #1 0;
else count <= #1 count+1;
end
endmodule
///////////
// clock //
///////////
module clock(clk);
output clk;
// Internal clk signal
logic clk;
endmodule
//////////
// testbench //
//////////
module testbenchradix4;
logic clk;
logic req;
logic done;
logic [63:0] a, b;
logic [51:0] afrac, bfrac;
logic [10:0] aExp, bExp;
logic asign, bsign;
logic [51:0] r, rOTFC;
logic [`DIVLEN-1:0] Quot, QuotOTFC;
logic [54:0] rp, rm; // positive quotient digits
// Test parameters
parameter MEM_SIZE = 40000;
parameter MEM_WIDTH = 64+64+64;
`define memr 63:0
`define memb 127:64
`define mema 191:128
// Test logicisters
logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file
logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a
// bit field of an array
logic [63:0] correctr, nextr, diffn, diffp;
logic [10:0] rExp;
logic rsign;
integer testnum, errors;
// Divider
srtradix4 srtradix4(.clk, .Start(req),
.Stall(1'b0), .Flush(1'b0),
.XExp(aExp), .YExp(bExp), .rExp,
.XSign(asign), .YSign(bsign), .rsign,
.XFrac(afrac), .YFrac(bfrac),
.SrcA('0), .SrcB('0), .Fmt(2'b00),
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0),
.Quot, .Rem(), .Flags());
// Counter
counter counter(clk, req, done);
initial
forever
begin
clk = 1; #17;
clk = 0; #17;
end
// Read test vectors from disk
initial
begin
testnum = 0;
errors = 0;
$readmemh ("testvectors", Tests);
Vec = Tests[testnum];
a = Vec[`mema];
{asign, aExp, afrac} = a;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
r = Quot[`DIVLEN-1:`DIVLEN - 52];
req <= 1;
end
// Apply directed test vectors read from file.
always @(posedge clk)
begin
r = Quot[`DIVLEN-1:`DIVLEN - 52];
if (done) begin
req <= 1;
diffp = correctr[51:0] - r;
diffn = r - correctr[51:0];
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin
errors = errors+1;
$display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
$display("failed\n");
$stop;
end
if (afrac === 52'hxxxxxxxxxxxxx)
begin
$display("%d Tests completed successfully", testnum);
$stop;
end
end
if (req)
begin
req <= 0;
correctr = nextr;
testnum = testnum+1;
Vec = Tests[testnum];
$display("a = %h b = %h",a,b);
a = Vec[`mema];
{asign, aExp, afrac} = a;
b = Vec[`memb];
{bsign, bExp, bfrac} = b;
nextr = Vec[`memr];
end
end
endmodule

View File

@ -7,7 +7,7 @@ module counter(input logic clk,
input logic req,
output logic done);
logic [5:0] count;
logic [7:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
@ -17,7 +17,7 @@ module counter(input logic clk,
always @(posedge clk)
begin
if (count == 54) done <= #1 1;
if (count == `DIVLEN+1) done <= #1 1;
else if (done | req) done <= #1 0;
if (req) count <= #1 0;
else count <= #1 count+1;
@ -110,12 +110,14 @@ module testbench;
always @(posedge clk)
begin
r = Quot[`DIVLEN:`DIVLEN - 52];
rOTFC = QuotOTFC[`DIVLEN:`DIVLEN - 52];
if (done)
begin
req <= #5 1;
diffp = correctr[51:0] - r;
diffn = r - correctr[51:0];
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1)) // check if accurate to 1 ulp
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin
errors = errors+1;
$display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);

View File

@ -559,11 +559,11 @@ module testbench;
if ((dut.core.lsu.LSUPAdrM == 'h10000002) | (dut.core.lsu.LSUPAdrM == 'h10000005) | (dut.core.lsu.LSUPAdrM == 'h10000006)) begin \
if(!NO_SPOOFING) begin \
$display("%tns, %d instrs: Overwrite UART's Register in memory stage.", $time, AttemptedInstructionCount); \
force dut.core.ieu.dp.ReadDataM = ExpectedMemReadDataM; \
force dut.core.lsu.ReadDataM = ExpectedMemReadDataM; \
end \
end else \
if(!NO_SPOOFING) \
release dut.core.ieu.dp.ReadDataM; \
release dut.core.lsu.ReadDataM; \
if(textM.substr(0,5) == "rdtime") begin \
//$display("%tns, %d instrs: Overwrite MTIME_CLINT on read of MTIME in memory stage.", $time, InstrCountW-1); \
if(!NO_SPOOFING) \

View File

@ -51,7 +51,6 @@ module testbench;
string tests[];
logic [3:0] dummy;
string ProgramAddrMapFile, ProgramLabelMapFile;
logic [`AHBW-1:0] HRDATAEXT;
logic HREADYEXT, HRESPEXT;
logic [31:0] HADDR;
@ -65,6 +64,9 @@ logic [3:0] dummy;
logic HCLK, HRESETn;
logic [`XLEN-1:0] PCW;
string ProgramAddrMapFile, ProgramLabelMapFile;
integer ProgramAddrLabelArray [string] = '{ "begin_signature" : 0, "tohost" : 0 };
logic DCacheFlushDone, DCacheFlushStart;
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
@ -128,7 +130,7 @@ logic [3:0] dummy;
end
string signame, memfilename, pathname, objdumpfilename, adrstr, outputfile;
integer outputFilePointer, ProgramLabelMap, ProgramAddrMap;
integer outputFilePointer;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout;
@ -194,6 +196,9 @@ logic [3:0] dummy;
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
// declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array
// to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test)
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
$display("Read memfile %s", memfilename);
reset_ext = 1; # 42; reset_ext = 0;
end
@ -218,30 +223,12 @@ logic [3:0] dummy;
end
// Termination condition (i.e. we finished running current test)
if (DCacheFlushDone) begin
// Gets the memory location of begin_signature
adrstr = "0";
ProgramLabelMap = $fopen(ProgramLabelMapFile, "r");
ProgramAddrMap = $fopen(ProgramAddrMapFile, "r");
if (ProgramLabelMap & ProgramAddrMap) begin // check we found both files
while (!$feof(ProgramLabelMap)) begin
string label;
integer returncode;
returncode = $fgets(label, ProgramLabelMap);
returncode = $fgets(adrstr, ProgramAddrMap);
if (label == "begin_signature\n") begin
if (DEBUG) $display("%s begin_signature adrstr: %s", TEST, adrstr);
break;
end
end
end
if (adrstr == "0") begin
integer begin_signature_addr;
begin_signature_addr = ProgramAddrLabelArray["begin_signature"];
if (!begin_signature_addr)
$display("begin_signature addr not found in %s", ProgramLabelMapFile);
end
$fclose(ProgramLabelMap);
$fclose(ProgramAddrMap);
testadr = ($unsigned(adrstr.atohex()))/(`XLEN/8);
testadrNoBase = (adrstr.atohex() - `RAM_BASE)/(`XLEN/8);
testadr = ($unsigned(begin_signature_addr))/(`XLEN/8);
testadrNoBase = (begin_signature_addr - `RAM_BASE)/(`XLEN/8);
#600; // give time for instructions in pipeline to finish
if (TEST == "embench") begin
// Writes contents of begin_signature to .sim.output file
@ -263,8 +250,10 @@ logic [3:0] dummy;
for(i=0; i<SIGNATURESIZE; i=i+1) begin
sig32[i] = 'bx;
end
// riscof tests have a different signature, tests[0] == "1" refers to RISCVARCHTESTs
if (tests[0] == "1") signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
else signame = {pathname, tests[test], ".signature.output"};
// read signature, reformat in 64 bits if necessary
signame = {pathname, tests[test], ".signature.output"};
$readmemh(signame, sig32);
i = 0;
while (i < SIGNATURESIZE) begin
@ -319,7 +308,6 @@ logic [3:0] dummy;
end
end
// move onto the next test, check to see if we're done
// test = test + 2;
test = test + 1;
if (test == tests.size()) begin
if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
@ -337,6 +325,8 @@ logic [3:0] dummy;
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
ProgramAddrLabelArray = '{ "begin_signature" : 0, "tohost" : 0 };
updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
$display("Read memfile %s", memfilename);
reset_ext = 1; # 47; reset_ext = 0;
end
@ -364,7 +354,8 @@ logic [3:0] dummy;
(dut.core.ieu.dp.regf.we3 &
dut.core.ieu.dp.regf.a3 == 3 &
dut.core.ieu.dp.regf.wd3 == 1)) |
(dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM;
((dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM ) |
((dut.core.lsu.IEUAdrM == ProgramAddrLabelArray["tohost"]) & InstrMName == "SW" );
DCacheFlushFSM DCacheFlushFSM(.clk(clk),
.reset(reset),
@ -396,7 +387,7 @@ module riscvassertions;
assert (`DIV_BITSPERCYCLE == 1 | `DIV_BITSPERCYCLE==2 | `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4");
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
// assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
@ -524,5 +515,26 @@ module copyShadow
end
end
endmodule
endmodule
task automatic updateProgramAddrLabelArray;
input string ProgramAddrMapFile, ProgramLabelMapFile;
inout integer ProgramAddrLabelArray [string];
// Gets the memory location of begin_signature
integer ProgramLabelMapFP, ProgramAddrMapFP;
ProgramLabelMapFP = $fopen(ProgramLabelMapFile, "r");
ProgramAddrMapFP = $fopen(ProgramAddrMapFile, "r");
if (ProgramLabelMapFP & ProgramAddrMapFP) begin // check we found both files
while (!$feof(ProgramLabelMapFP)) begin
string label, adrstr;
integer returncode;
returncode = $fscanf(ProgramLabelMapFP, "%s\n", label);
returncode = $fscanf(ProgramAddrMapFP, "%s\n", adrstr);
if (ProgramAddrLabelArray.exists(label))
ProgramAddrLabelArray[label] = adrstr.atohex();
end
end
$fclose(ProgramLabelMapFP);
$fclose(ProgramAddrMapFP);
endtask

File diff suppressed because it is too large Load Diff

1
synthDC/secondtest/test2 Normal file
View File

@ -0,0 +1 @@
test 123

View File

@ -1,31 +1,17 @@
arch_dir = ../../addins/riscv-arch-test
work_dir = "./riscof_work"
work_dir = ./riscof_work
current_dir = $(shell pwd)
XLEN ?= 64
all: clone memfile
all: build
clone:
build:
mkdir -p $(work_dir)
mkdir -p work
sed 's,{0},$(current_dir),g;s,{1},32imc,g' config.ini > config32.ini
sed 's,{0},$(current_dir),g;s,{1},64gc,g' config.ini > config64.ini
riscof run --work-dir=$(work_dir) --config=config64.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env
cp -r $(work_dir)/rv64i_m work/
riscof run --work-dir=$(work_dir) --config=config32.ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env
cp -r $(work_dir)/rv32i_m work/
# sed >> config64.ini
# (cd $(arch_dir) && riscof validateyaml --config=config.ini)
# (cd $(arch_dir) && riscof --verbose info arch-test --clone)
# (cd $(arch_dir) && riscof testlist --config=config.ini --suite=riscv-arch-test/riscv-test-suite/ --env=riscv-arch-test/riscv-test-suite/env)
# sed -i 's/riscv{.}-unknown-/riscv64-unknown-/g' $(arch_dir)/spike/riscof_spike.py
# sed -i 's/riscv{.}-unknown-/riscv64-unknown-/g' $(arch_dir)/sail_cSim/riscof_sail_cSim.py
memfile:
sleep 1
find work/rv*/*/ -type f -name "*ref.elf" | while read f; do riscv64-unknown-elf-objdump -S -D "$$f" > "$$f.objdump"; done
find work/rv32*/*/ -type f -name "*ref.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 32 --input "$$f" --output "$$f.memfile"; done
find work/rv64*/*/ -type f -name "*ref.elf" | while read f; do riscv64-unknown-elf-elf2hex --bit-width 64 --input "$$f" --output "$$f.memfile"; done
find work/rv*/*/ -type f -name "*.objdump" | while read f; do extractFunctionRadix.sh $$f; done
sed 's,{0},$(current_dir),g;s,{1},$(XLEN)$(if $(findstring 64,$(XLEN)),gc,imc),g' config.ini > config$(XLEN).ini
riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
rm -rf work/rv$(XLEN)i_m
mv -f $(work_dir)/rv$(XLEN)i_m work/
clean:
rm -f config64.ini

View File

@ -90,7 +90,7 @@ class sail_cSim(pluginTemplate):
test_dir = testentry['work_dir']
test_name = test.rsplit('/',1)[1][:-2]
elf = 'ref.elf'
elf = 'Ref.elf'
execute = "@cd "+testentry['work_dir']+";"
@ -98,7 +98,7 @@ class sail_cSim(pluginTemplate):
compile_cmd = cmd + ' -D' + " -D".join(testentry['macros'])
execute+=compile_cmd+";"
execute += self.objdump_cmd.format(elf, self.xlen, 'ref.disass')
execute += self.objdump_cmd.format(elf, self.xlen, 'Ref.elf.objdump')
sig_file = os.path.join(test_dir, self.name[:-1] + ".signature")
execute += self.sail_exe[self.xlen] + ' --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name)

View File

@ -1,11 +1,11 @@
hart_ids: [0]
hart0:
ISA: RV32IMCZicsr_Zifencei
ISA: RV32IMFCZicsr_Zifencei
physical_addr_sz: 32
User_Spec_Version: '2.3'
supported_xlen: [32]
misa:
reset-val: 0x40001104
reset-val: 0x40001124
rv32:
accessible: true
mxl:
@ -23,7 +23,7 @@ hart0:
warl:
dependency_fields: []
legal:
- extensions[25:0] bitmask [0x0001104, 0x0000000]
- extensions[25:0] bitmask [0x0001124, 0x0000000]
wr_illegal:
- Unchanged

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-ADD.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.392776//
// Created 2022-06-17 22:58:09.906970//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLT.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.393471//
// Created 2022-06-17 22:58:09.909889//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SLTU.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.393741//
// Created 2022-06-17 22:58:09.911056//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-SUB.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.393180//
// Created 2022-06-17 22:58:09.908718//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/src/WALLY-XOR.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.394013//
// Created 2022-06-17 22:58:09.913218//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-ADD.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.394307//
// Created 2022-06-17 22:58:09.914370//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.394785//
// Created 2022-06-17 22:58:09.916813//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLTU.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.395005//
// Created 2022-06-17 22:58:09.917963//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SUB.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.394545//
// Created 2022-06-17 22:58:09.915580//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation

View File

@ -1,7 +1,7 @@
///////////////////////////////////////////
// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-XOR.S
// David_Harris@hmc.edu & Katherine Parry
// Created 2022-01-27 08:08:42.395231//
// Created 2022-06-17 22:58:09.919138//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation