This commit is contained in:
Madeleine Masser-Frye 2022-06-24 00:23:53 +00:00
commit 77a46ead39
44 changed files with 2750 additions and 831 deletions

View File

@ -124,8 +124,6 @@
`define PLIC_NUM_SRC 53
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/buildroot/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/buildroot/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -130,8 +130,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
`define BPRED_ENABLED 0
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -128,8 +128,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -130,8 +130,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/rv32i/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv32i/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -128,8 +128,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -130,8 +130,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt"
`define BPRED_ENABLED 1
//`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
`define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE

View File

@ -32,7 +32,7 @@
`define DESIGN_COMPILER 0
// RV32 or RV64: XLEN = 32 or 64
`define XLEN 64
`define XLEN 32
// IEEE 754 compliance
`define IEEE754 0
@ -132,8 +132,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -131,8 +131,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -131,8 +131,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -131,8 +131,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/rv64i/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64i/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -131,8 +131,6 @@
`define PLIC_GPIO_ID 3
`define PLIC_UART_ID 10
`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
`define BPRED_ENABLED 1
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
`define TESTSBP 0

View File

@ -94,11 +94,12 @@
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6))
`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
// Disable spurious Verilator warnings

View File

@ -1,30 +1,9 @@
make allclean:
make clean
make all
make clean:
make clean -C ../../tests/riscof
make clean -C ../../tests/wally-riscv-arch-test
# make allclean -C ../../tests/imperas-riscv-tests
make all:
all: archtests wallytests memfiles
# *** Build old tests/imperas-riscv-tests for now;
# Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test
# DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired
#make -C ../../tests/imperas-riscv-tests --jobs
#make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs
# Build riscv-arch-test 64 and 32-bit versions
make -C ../../tests/riscof/ --jobs
make -C ../../tests/riscof/ XLEN=32 --jobs
# Build wally-riscv-arch-test
make -C ../../tests/wally-riscv-arch-test/ --jobs
make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs
# build the memfiles and address files.
make -f makefile-memfile wally-sim-files --jobs
# Only compile Imperas tests if they are installed locally.
# They are usually a symlink to $RISCV/imperas-riscv-tests and only
# get compiled there manually during installation
@ -36,4 +15,22 @@ make all:
# Link Linux test vectors (fix this later***)
#cd ../../tests/linux-testgen/linux-testvectors/;./tvLinker.sh
allclean: clean all
clean:
make clean -C ../../tests/riscof
make clean -C ../../tests/wally-riscv-arch-test
# make allclean -C ../../tests/imperas-riscv-tests
archtests:
# Build riscv-arch-test 64 and 32-bit versions
make -C ../../tests/riscof/ --jobs
make -C ../../tests/riscof/ XLEN=32 --jobs
wallytests:
# Build wally-riscv-arch-test
make -C ../../tests/wally-riscv-arch-test/ --jobs
make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs
memfiles:
make -f makefile-memfile wally-sim-files --jobs

View File

@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/
verilator=`which verilator`
basepath=$(dirname $0)/..
for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do
for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do
echo "$config linting..."
if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then
echo "Exiting after $config lint due to errors or warnings"

View File

@ -8,8 +8,9 @@ IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests
ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX)
ELFFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf")
OBJDUMPFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump")
MEMFILES ?= $(ELFFILES:.elf=.elf.memfile)
ADDRFILES ?= $(ELFFILES:.elf=.elf.objdump.addr)
ADDRFILES ?= $(OBJDUMPFILES:.objdump=.objdump.addr)
print:
echo "files in $(ALLDIRS) are $(ELFFILES)."

View File

@ -71,7 +71,7 @@ for test in tests64gc:
grepstr="All tests ran without failures")
configs.append(tc)
tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv"] #, "imperas32mmu""wally32i",
tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i",
for test in tests32gc:
tc = TestCase(
name=test,

View File

@ -32,7 +32,7 @@ vlib work
# start and run simulation
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
# $num = the added words after the call
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697
vsim -voptargs=+acc work.testbenchfp -G TEST=$2

View File

@ -7,3 +7,22 @@ add wave -noupdate /testbenchfp/Y
add wave -noupdate /testbenchfp/Z
add wave -noupdate /testbenchfp/Res
add wave -noupdate /testbenchfp/Ans
add wave -noupdate /testbenchfp/DivStart
add wave -noupdate /testbenchfp/DivDone
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
add wave -group {Testbench} -noupdate /testbenchfp/*
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*

View File

@ -7,10 +7,10 @@ module cvtshiftcalc(
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NF:0] XManM, // input mantissas
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic CvtResDenormUfM,
output logic CvtResUf,
output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted
output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted
);
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
@ -31,8 +31,8 @@ module cvtshiftcalc(
// | `NF-1 zeros | Mantissa | 0's if nessisary |
// - otherwise:
// | LzcInM | 0's if nessisary |
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} :
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} :
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} :
{CvtLzcInM, {`NF+1{1'b0}}};

View File

@ -0,0 +1,15 @@
`include "wally-config.vh"
module divshiftcalc(
input logic [`DIVLEN+2:0] Quot,
input logic [`NE:0] DivCalcExpM,
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
output logic [`NE:0] CorrDivExp
);
assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]};
// the quotent is in the range [.5,2)
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]};
endmodule

View File

@ -12,11 +12,11 @@ module fcvt (
input logic XDenormE, // is the input denormalized
input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [`NE:0] CvtCalcExpE, // the calculated expoent
output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by
output logic [`LOGCVTLEN-1:0] CvtShiftAmtE, // how much to shift by
output logic CvtResDenormUfE,// does the result underflow or is denormalized
output logic CvtResSgnE, // the result's sign
output logic IntZeroE, // is the integer zero?
output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder)
output logic [`CVTLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder)
);
// OpCtrls:
@ -43,7 +43,7 @@ module fcvt (
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion?
logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC
logic [`LOGCVTLEN-1:0] ZeroCnt; // output from the LZC
// seperate OpCtrl for code readability
@ -78,10 +78,10 @@ module fcvt (
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
assign CvtLzcInE = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
{XManE[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
lzc #(`CVTLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
///////////////////////////////////////////////////////////////////////////
// shifter
@ -99,9 +99,9 @@ module fcvt (
// - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] :
(ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~CvtCalcExpE[`NE]}} :
CvtResDenormUfE&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+CvtCalcExpE[`LOGCVTLEN-1:0] :
(ZeroCnt+1)&{`LOGCVTLEN{XDenormE|IntToFp}};
///////////////////////////////////////////////////////////////////////////
// exp calculations
@ -180,7 +180,7 @@ module fcvt (
// - shift left to normilize (-1-ZeroCnt)
// - newBias to make the biased exponent
//
assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (ZeroCnt&{`LOGCVTLEN{XDenormE|IntToFp}})};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion

View File

@ -82,7 +82,7 @@ module fpu (
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM; // input's sign - memory stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
@ -104,23 +104,27 @@ module fpu (
logic FOpCtrlQ;
// Fma Signals
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic InvZE, InvZM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic InvZE, InvZM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
// Cvt Signals
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
logic CvtResSgnE, CvtResSgnM; // the result's sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
logic CvtResSgnE, CvtResSgnM; // the result's sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
//divide signals
logic [`DIVLEN+2:0] Quot;
logic [`NE:0] DivCalcExpM;
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
@ -317,7 +321,7 @@ module fpu (
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
@ -333,7 +337,7 @@ module fpu (
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
{AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
@ -351,9 +355,9 @@ module fpu (
assign FpLoadM = FResSelM[1];
postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM,
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM,
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM,
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);

View File

@ -30,7 +30,7 @@
`include "wally-config.vh"
module postprocess(
input logic XSgnM, // input signs
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
@ -51,13 +51,15 @@ module postprocess(
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NE:0] DivCalcExpM, // the calculated expoent
input logic CvtResDenormUfM,
input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by
input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by
input logic CvtResSgnM, // the result's sign
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic IntZeroM, // is the input zero
input logic [1:0] PostProcSelM, // select result to be written to fp register
input logic [`DIVLEN+2:0] Quot,
output logic [`FLEN-1:0] PostProcResM, // FMA final result
output logic [4:0] PostProcFlgM,
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
@ -75,13 +77,14 @@ module postprocess(
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
logic UfPlus1; // do you add one (for determining underflow flag)
logic Round; // bits needed to determine rounding
logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted
logic Mult; // multiply opperation
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
@ -91,6 +94,7 @@ module postprocess(
logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion?
logic [`NE+1:0] RoundExp;
logic [`NE:0] CorrDivExp;
logic [1:0] NegResMSBS;
logic CvtOp;
logic FmaOp;
@ -135,6 +139,7 @@ module postprocess(
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt);
always_comb
case(PostProcSelM)
@ -143,12 +148,12 @@ module postprocess(
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
end
2'b00: begin // cvt
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmtM};
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
end
2'b01: begin //div
ShiftAmt = 0;//{DivShiftAmt};
ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
2'b01: begin //div ***prob can take out
ShiftAmt = DivShiftAmt;
ShiftIn = {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}};
end
default: begin
ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}};
@ -171,9 +176,9 @@ module postprocess(
// round to infinity
// round to nearest max magnitude
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
.InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
.UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
.DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
@ -181,7 +186,7 @@ module postprocess(
resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
.FmaOp, .DivOp, .CvtOp, .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult,
.CvtResSgnM, .RoundSgn, .ResSgn);
.XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn, .ResSgn);
///////////////////////////////////////////////////////////////////////////////
// Flags

View File

@ -4,6 +4,8 @@ module resultsign(
input logic [2:0] FrmM,
input logic PSgnM, ZSgnEffM,
input logic InvZM,
input logic XSgnM,
input logic YSgnM,
input logic ZInfM,
input logic InfIn,
input logic NegSumM,
@ -25,6 +27,7 @@ module resultsign(
logic FmaResSgn;
logic FmaResSgnTmp;
logic Underflow;
logic DivSgn;
// logic ResultSgnTmp;
// Determine the sign if the sum is zero
@ -43,9 +46,10 @@ module resultsign(
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
// Sign for rounding calulation
assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
assign DivSgn = XSgnM^YSgnM;
assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
// Sign for rounding calulation
assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
endmodule

View File

@ -11,6 +11,7 @@ module round(
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic FmaOp,
input logic DivOp,
input logic [1:0] PostProcSelM,
input logic CvtResDenormUfM,
input logic ToInt,
@ -23,6 +24,7 @@ module round(
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic RoundSgn, // the result's sign
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NE:0] CorrDivExp, // the calculated expoent
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
output logic [`NF-1:0] ResFrac, // Result fraction
@ -303,7 +305,7 @@ module round(
case(PostProcSelM)
2'b10: RoundExp = SumExp; // fma
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
2'b01: RoundExp = 0; // divide
2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide
default: RoundExp = 0;
endcase

View File

@ -54,10 +54,6 @@ module bram1p1rw
logic [DATA_WIDTH-1:0] RAM [(2**ADDR_WIDTH)-1:0];
integer i;
initial begin
$readmemh("big64.txt", RAM);
end
always @ (posedge clk) begin
dout <= RAM[addr];
if(we) begin

View File

@ -60,7 +60,7 @@ module clint (
flopr #(16) entrydflop(HCLK, ~HRESETn, entry, entryd);
assign HRESPCLINT = 0; // OK
assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during accesses
assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during asynchronous MTIME accesses
// word aligned reads
if (`XLEN==64) assign #2 entry = {HADDR[15:3], 3'b000};
@ -87,8 +87,7 @@ module clint (
always_ff @(posedge HCLK or negedge HRESETn)
if (~HRESETn) begin
MSIP <= 0;
MTIMECMP <= 0;
// MTIMECMP is not reset
MTIMECMP <= 64'hFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts
end else if (memwrite) begin
if (entryd == 16'h0000) MSIP <= HWDATA[0];
if (entryd == 16'h4000) begin
@ -104,7 +103,6 @@ module clint (
always_ff @(posedge HCLK or negedge HRESETn)
if (~HRESETn) begin
MTIME <= 0;
// MTIMECMP is not reset
end else if (memwrite & entryd == 16'hBFF8) begin
// MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed
for(j=0;j<`XLEN/8;j++)

View File

@ -1,4 +1,4 @@
all: exptestgen testgen qslc_r4a2 qslc_r4a2b
all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2
sqrttestgen: sqrttestgen.c
gcc sqrttestgen.c -o sqrttestgen -lm
@ -19,5 +19,9 @@ qslc_r4a2b: qslc_r4a2b.c
gcc qslc_r4a2b.c -o qslc_r4a2b -lm
./qslc_r4a2b > qslc_r4a2b.tv
qslc_sqrt_r4a2: qslc_sqrt_r4a2.c
gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm
./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv
clean:
rm -f testgen exptestgen qslc_r4a2
rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2

1024
pipelined/srt/qsel4.dat Normal file

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,7 @@ module qsel4 (
logic [2:0] Dmsbs;
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...

BIN
pipelined/srt/qslc_sqrt_r4a2 Executable file

Binary file not shown.

View File

@ -0,0 +1,198 @@
/*
Program: qslc_r4a2.c
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
User: James E. Stine
*/
#include <stdio.h>
#include <math.h>
#define DIVISOR_SIZE 3
#define CARRY_SIZE 7
#define SUM_SIZE 7
#define TOT_SIZE 7
void disp_binary(double, int, int);
struct bits {
unsigned int divisor : DIVISOR_SIZE;
int tot : TOT_SIZE;
} pla;
/*
Function: disp_binary
Description: This function displays a Double-Precision number into
four 16 bit integers using the global union variable
dp_number
Argument List: double x The value to be converted
int bits_to_left Number of bits left of radix point
int bits_to_right Number of bits right of radix point
Return value: none
*/
void disp_binary(double x, int bits_to_left, int bits_to_right) {
int i;
double diff;
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
printf("0");
}
if (i == bits_to_right+1)
;
return;
}
if (x < 0.0)
x = pow(2.0, ((double) bits_to_left)) + x;
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
diff = pow(2.0, ((double) -i) );
if (x < diff)
printf("0");
else {
printf("1");
x -= diff;
}
if (i == 0)
;
}
}
int main() {
int m;
int n;
int o;
pla.divisor = 0;
pla.tot = 0;
printf("\tcase({D[5:3],Wmsbs})\n");
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
printf("\t\t11'b");
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
printf("_");
disp_binary((double) pla.tot, TOT_SIZE, 0);
printf(": q = 4'b");
/*
4 bits for Radix 4 (a=2)
1000 = +2
0100 = +1
0000 = 0
0010 = -1
0001 = -2
*/
switch (pla.divisor) {
case 0:
if ((pla.tot) >= 24)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -8)
printf("0000");
else if ((pla.tot) >= -26)
printf("0010");
else
printf("0001");
break;
case 1:
if ((pla.tot) >= 28)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -10)
printf("0000");
else if ((pla.tot) >= -28)
printf("0010");
else
printf("0001");
break;
case 2:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -32)
printf("0010");
else
printf("0001");
break;
case 3:
if ((pla.tot) >= 32)
printf("1000");
else if ((pla.tot) >= 8)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -34)
printf("0010");
else
printf("0001");
break;
case 4:
if ((pla.tot) >= 36)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -12)
printf("0000");
else if ((pla.tot) >= -36)
printf("0010");
else
printf("0001");
break;
case 5:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 12)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -40)
printf("0010");
else
printf("0001");
break;
case 6:
if ((pla.tot) >= 40)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -44)
printf("0010");
else
printf("0001");
break;
case 7:
if ((pla.tot) >= 44)
printf("1000");
else if ((pla.tot) >= 16)
printf("0100");
else if ((pla.tot) >= -16)
printf("0000");
else if ((pla.tot) >= -46)
printf("0010");
else
printf("0001");
break;
default: printf ("XXX");
}
printf(";\n");
(pla.tot)++;
}
(pla.divisor)++;
}
printf("\tendcase\n");
}

File diff suppressed because it is too large Load Diff

View File

@ -17,7 +17,7 @@ if [file exists work] {
}
vlib work
vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv qsel4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
vopt +acc work.testbenchradix4 -o workopt
vsim workopt

View File

@ -30,42 +30,35 @@
`include "wally-config.vh"
`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF)
module srtradix4 (
input logic clk,
input logic Start,
input logic Stall, // *** multiple pipe stages
input logic Flush, // *** multiple pipe stages
// Floating Point Inputs
// later add exponents, signs, special cases
input logic XSign, YSign,
input logic [`NE-1:0] XExp, YExp,
input logic [`NF-1:0] XFrac, YFrac,
input logic DivStart,
input logic [`NE-1:0] XExpE, YExpE,
input logic [`NF:0] XManE, YManE,
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic XZeroE,
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic rsign,
output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers
output logic [`NE-1:0] rExp,
output logic [3:0] Flags
output logic DivDone,
output logic [`DIVLEN+2:0] Quot,
output logic [`XLEN-1:0] Rem, // *** later handle integers
output logic [`NE:0] DivCalcExpE
);
// logic qp, qz, qm; // quotient is +1, 0, or -1
logic [3:0] q;
logic [`NE-1:0] calcExp;
logic calcSign;
logic [`DIVLEN-1:0] X, Dpreproc;
logic [`NE:0] DivCalcExp;
logic [`DIVLEN:0] X;
logic [`DIVLEN-1:0] Dpreproc;
logic [`DIVLEN+3:0] WS, WSA, WSN;
logic [`DIVLEN+3:0] WC, WCA, WCN;
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
logic [$clog2(`XLEN+1)-1:0] intExp;
logic intSign;
srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
// Top Muxes and Registers
// When start is asserted, the inputs are loaded into the divider.
@ -77,11 +70,11 @@ module srtradix4 (
// - otherwise load WSA into the flipflop
// *** what does N and A stand for?
// *** change shift amount for radix4
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, Start, WSN);
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, Start, WCN);
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D);
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
// Quotient Selection logic
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
@ -94,9 +87,8 @@ module srtradix4 (
// 0001 = -2
qsel4 qsel4(.D, .WS, .WC, .q);
// Store the expoenent and sign until division is done
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
flopen #(1) signflop(clk, Start, calcSign, rsign);
// Store the expoenent and sign until division is DivDone
flopen #(`NE+1) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
// Divisor Selection logic
// *** radix 4 change to choose -2 to 2
@ -120,11 +112,11 @@ module srtradix4 (
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
//*** change for radix 4
otfc4 #(`DIVLEN) otfc4(clk, Start, q, Quot);
otfc4 otfc4(.clk, .DivStart, .q, .Quot);
expcalc expcalc(.XExp, .YExp, .calcExp);
expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp);
signcalc signcalc(.XSign, .YSign, .calcSign);
divcounter divcounter(clk, DivStart, DivDone);
endmodule
@ -132,91 +124,154 @@ endmodule
// Submodules //
////////////////
/////////////
// counter //
/////////////
module divcounter(input logic clk,
input logic DivStart,
output logic DivDone);
logic [5:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
// build a divider which completes in fewer iterations.
// You are not responsible for the (trivial) circuit
// design of the block.
always @(posedge clk)
begin
DivDone = 0;
if (count == `DIVLEN/2+1) DivDone <= #1 1;
else if (DivDone | DivStart) DivDone <= #1 0;
if (DivStart) count <= #1 0;
else count <= #1 count+1;
end
endmodule
module qsel4 (
input logic [`DIVLEN+3:0] D,
input logic [`DIVLEN+3:0] WS, WC,
output logic [3:0] q
);
logic [6:0] Wmsbs;
logic [7:0] PreWmsbs;
logic [2:0] Dmsbs;
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
assign Wmsbs = PreWmsbs[7:1];
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
// D = 0001.xxx...
// Dmsbs = | |
// W = xxxx.xxx...
// Wmsbs = | |
logic [3:0] QSel4[1023:0];
initial begin
integer d, w, i, w2;
for(d=0; d<8; d++)
for(w=0; w<128; w++)begin
i = d*128+w;
w2 = w-128*(w>=64); // convert to two's complement
case(d)
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-4) QSel4[i] = 4'b0000;
else if(w2>=-13) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
1: if(w2>=14) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-15) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
2: if(w2>=15) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-16) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
3: if(w2>=16) QSel4[i] = 4'b1000;
else if(w2>=4) QSel4[i] = 4'b0100;
else if(w2>=-6) QSel4[i] = 4'b0000;
else if(w2>=-18) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
4: if(w2>=18) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
5: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=6) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-20) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
6: if(w2>=20) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-22) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
7: if(w2>=24) QSel4[i] = 4'b1000;
else if(w2>=8) QSel4[i] = 4'b0100;
else if(w2>=-8) QSel4[i] = 4'b0000;
else if(w2>=-24) QSel4[i] = 4'b0010;
else QSel4[i] = 4'b0001;
endcase
end
end
assign q = QSel4[{Dmsbs,Wmsbs}];
endmodule
///////////////////
// Preprocessing //
///////////////////
module srtpreproc (
input logic [`XLEN-1:0] SrcA, SrcB,
input logic [`NF-1:0] XFrac, YFrac,
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
input logic [`NF:0] XManE, YManE,
input logic W64, // 32-bit ints on XLEN=64
input logic Signed, // Interpret integers as signed 2's complement
input logic Int, // Choose integer inputs
input logic Sqrt, // perform square root, not divide
output logic [`DIVLEN-1:0] X, D,
output logic [`DIVLEN:0] X,
output logic [`DIVLEN-1:0] Dpreproc,
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
output logic intSign // Quotient integer sign
);
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
logic [`XLEN-1:0] PosA, PosB;
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
// logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
// logic [`XLEN-1:0] PosA, PosB;
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
logic [`DIVLEN:0] PreprocA, PreprocX;
logic [`DIVLEN-1:0] PreprocB, PreprocY;
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
// assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
lzc #(`XLEN) lzcA (PosA, zeroCntA);
lzc #(`XLEN) lzcB (PosB, zeroCntB);
// lzc #(`XLEN) lzcA (PosA, zeroCntA);
// lzc #(`XLEN) lzcB (PosB, zeroCntB);
assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
assign PreprocA = ExtraA << zeroCntA;
assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}};
// assign PreprocA = ExtraA << zeroCntA;
// assign PreprocB = ExtraB << (zeroCntB + 1);
assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}};
assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}};
assign X = Int ? PreprocA : PreprocX;
assign D = Int ? PreprocB : PreprocY;
assign intExp = zeroCntB - zeroCntA + 1;
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
assign Dpreproc = Int ? PreprocB : PreprocY;
// assign intExp = zeroCntB - zeroCntA + 1;
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
endmodule
/////////////////////////////////
// Quotient Selection, Radix 2 //
/////////////////////////////////
module qsel2 ( // *** eventually just change to 4 bits
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
output logic qp, qz, qm
);
logic [`DIVLEN+3:`DIVLEN] p, g;
logic magnitude, sign, cout;
// The quotient selection logic is presented for simplicity, not
// for efficiency. You can probably optimize your logic to
// select the proper divisor with less delay.
// Quotient equations from EE371 lecture notes 13-20
assign p = ps ^ pc;
assign g = ps & pc;
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
assign #1 sign = p[`DIVLEN+3] ^ cout;
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
(ps[52]^pc[52]));
assign #1 sign = (ps[55]^pc[55])^
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
(ps[52]&pc[52]))))); */
// Produce quotient = +1, 0, or -1
assign #1 qp = magnitude & ~sign;
assign #1 qz = ~magnitude;
assign #1 qm = magnitude & sign;
endmodule
///////////////////////////////////
// On-The-Fly Converter, Radix 2 //
///////////////////////////////////
module otfc4 #(parameter N=65) (
module otfc4 (
input logic clk,
input logic Start,
input logic DivStart,
input logic [3:0] q,
output logic [N-1:0] r
output logic [`DIVLEN+2:0] Quot
);
// The on-the-fly converter transfers the quotient
@ -224,20 +279,20 @@ module otfc4 #(parameter N=65) (
//
// This code follows the psuedocode presented in the
// floating point chapter of the book. Right now,
// it is written for Radix-2 division.
// it is written for Radix-4 division.
//
// QM is Q-1. It allows us to write negative bits
// without using a costly CPA.
logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
// QR and QMR are the shifted versions of Q and QM.
// They are treated as [N-1:r] size signals, and
// discard the r most significant bits of Q and QM.
logic [N:0] QR, QMR;
logic [`DIVLEN:0] QR, QMR;
// if starting a new divison set Q to 0 and QM to -1
mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, Start, QMux);
mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, Start, QMMux);
flop #(N+3) Qreg(clk, QMux, Q);
flop #(N+3) QMreg(clk, QMMux, QM);
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
// shift Q (quotent) and QM (quotent-1)
// if q = 2 Q = {Q, 10} QM = {Q, 01}
@ -247,11 +302,9 @@ module otfc4 #(parameter N=65) (
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
// *** how does the 0 concatination numbers work?
always_comb begin
QR = Q[N:0];
QMR = QM[N:0]; // Shift Q and QM
QR = Quot[`DIVLEN:0];
QMR = QM[`DIVLEN:0]; // Shift Q and QM
if (q[3]) begin // +2
QNext = {QR, 2'b10};
QMNext = {QR, 2'b01};
@ -269,7 +322,8 @@ module otfc4 #(parameter N=65) (
QMNext = {QMR, 2'b11};
end
end
assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
// Quot is in the range [.5, 2) so normalize the result if nesissary
// assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
endmodule
@ -287,7 +341,7 @@ module csa #(parameter N=69) (
// This block adds in1, in2, in3, and cin to produce
// a result out1 / out2 in carry-save redundant form.
// cin is just added to the least significant bit and
// is required to handle adding a negative divisor.
// is Startuired to handle adding a negative divisor.
// Fortunately, the carry (out2) is shifted left by one
// bit, leaving room in the least significant bit to
// insert cin.
@ -302,22 +356,11 @@ endmodule
// expcalc //
//////////////
module expcalc(
input logic [`NE-1:0] XExp, YExp,
output logic [`NE-1:0] calcExp
input logic [`NE-1:0] XExpE, YExpE,
input logic XZeroE,
output logic [`NE:0] DivCalcExp
);
assign calcExp = XExp - YExp + (`NE)'(`BIAS);
assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
endmodule
//////////////
// signcalc //
//////////////
module signcalc(
input logic XSign, YSign,
output logic calcSign
);
assign calcSign = XSign ^ YSign;
endmodule

View File

@ -2,30 +2,6 @@
`include "wally-config.vh"
`define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF)
/////////////
// counter //
/////////////
module counter(input logic clk,
input logic req,
output logic done);
logic [5:0] count;
// This block of control logic sequences the divider
// through its iterations. You may modify it if you
// build a divider which completes in fewer iterations.
// You are not responsible for the (trivial) circuit
// design of the block.
always @(posedge clk)
begin
if (count == `DIVLEN/2+1) done <= #1 1;
else if (done | req) done <= #1 0;
if (req) count <= #1 0;
else count <= #1 count+1;
end
endmodule
///////////
// clock //
///////////
@ -43,7 +19,7 @@ endmodule
module testbenchradix4;
logic clk;
logic req;
logic done;
logic DivDone;
logic [63:0] a, b;
logic [51:0] afrac, bfrac;
logic [10:0] aExp, bExp;
@ -65,22 +41,20 @@ module testbenchradix4;
logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a
// bit field of an array
logic [63:0] correctr, nextr, diffn, diffp;
logic [10:0] rExp;
logic rsign;
logic [10:0] DivExp;
logic DivSgn;
integer testnum, errors;
// Divider
srtradix4 srtradix4(.clk, .Start(req),
.Stall(1'b0), .Flush(1'b0),
.XExp(aExp), .YExp(bExp), .rExp,
.XSign(asign), .YSign(bsign), .rsign,
srtradix4 srtradix4(.clk, .DivStart(req),
.XExpE(aExp), .YExpE(bExp), .DivExp,
.XSgnE(asign), .YSgnE(bsign), .DivSgn,
.XFrac(afrac), .YFrac(bfrac),
.SrcA('0), .SrcB('0), .Fmt(2'b00),
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0),
.Quot, .Rem(), .Flags());
.SrcA('0), .SrcB('0),
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone,
.Quot, .Rem());
// Counter
counter counter(clk, req, done);
initial
@ -112,14 +86,14 @@ module testbenchradix4;
always @(posedge clk)
begin
r = Quot[`DIVLEN-1:`DIVLEN - 52];
if (done) begin
if (DivDone) begin
req <= 1;
diffp = correctr[51:0] - r;
diffn = r - correctr[51:0];
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
if ((DivSgn !== correctr[63]) | (DivExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
begin
errors = errors+1;
$display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
$display("result was %h_%h, should be %h %h %h\n", DivExp, r, correctr, diffn, diffp);
$display("failed\n");
$stop;
end

View File

@ -48,13 +48,14 @@ module testbenchfp;
logic XInf, YInf, ZInf; // is the input infinity
logic XZero, YZero, ZZero; // is the input zero
logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones
logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder)
logic [`CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder)
logic IntZeroE;
logic CvtResSgnE;
logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
logic [`NE:0] CvtCalcExpE; // the calculated expoent
logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
logic [`DIVLEN+2:0] Quot;
logic CvtResDenormUfE;
logic DivStart, DivDone;
// in-between FMA signals
@ -68,6 +69,8 @@ module testbenchfp;
logic NegSumE;
logic ZSgnEffE;
logic PSgnE;
logic DivSgn;
logic [`NE:0] DivCalcExp;
///////////////////////////////////////////////////////////////////////////////////////////////
@ -205,16 +208,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b11};
end
end
// if (TEST === "div" | TEST === "all") begin // if division is being tested
// // add the divide tests/op-ctrls/unit/fmt
// Tests = {Tests, f128div};
// OpCtrl = {OpCtrl, `DIV_OPCTRL};
// WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b11};
// end
// end
if (TEST === "div" | TEST === "all") begin // if division is being tested
// add the divide tests/op-ctrls/unit/fmt
Tests = {Tests, f128div};
OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b11};
end
end
// if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
// // add the square-root tests/op-ctrls/unit/fmt
// Tests = {Tests, f128sqrt};
@ -332,16 +335,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b01};
end
end
// if (TEST === "div" | TEST === "all") begin // if division is being tested
// // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f64div};
// OpCtrl = {OpCtrl, `DIV_OPCTRL};
// WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b01};
// end
// end
if (TEST === "div" | TEST === "all") begin // if division is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f64div};
OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b01};
end
end
// if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
// // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f64sqrt};
@ -443,16 +446,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b00};
end
end
// if (TEST === "div" | TEST === "all") begin // if division is being tested
// // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f32div};
// OpCtrl = {OpCtrl, `DIV_OPCTRL};
// WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b00};
// end
// end
if (TEST === "div" | TEST === "all") begin // if division is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f32div};
OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b00};
end
end
// if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f32sqrt};
@ -536,16 +539,16 @@ module testbenchfp;
Fmt = {Fmt, 2'b10};
end
end
// if (TEST === "div" | TEST === "all") begin // if division is being tested
// // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f16div};
// OpCtrl = {OpCtrl, `DIV_OPCTRL};
// WriteInt = {WriteInt, 1'b0};
// for(int i = 0; i<5; i++) begin
// Unit = {Unit, `DIVUNIT};
// Fmt = {Fmt, 2'b10};
// end
// end
if (TEST === "div" | TEST === "all") begin // if division is being tested
// add the correct tests/op-ctrls/unit/fmt to their lists
Tests = {Tests, f16div};
OpCtrl = {OpCtrl, `DIV_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `DIVUNIT};
Fmt = {Fmt, 2'b10};
end
end
// if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
// // add the correct tests/op-ctrls/unit/fmt to their lists
// Tests = {Tests, f16sqrt};
@ -611,7 +614,7 @@ module testbenchfp;
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart,
.XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
.XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN),
.XDenormE(XDenorm), .ZDenormE(ZDenorm),
@ -639,8 +642,8 @@ module testbenchfp;
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]),
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
@ -650,21 +653,16 @@ module testbenchfp;
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
.FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
// fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf),
// .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
// .CvtRes, .CvtFlgE);
// *** integrade divide and squareroot
// fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmVal[1:0]), .op_type(FOpCtrlQ),
// .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
// .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
// .FDivBusyE, .done(FDivSqrtDoneE), .AS_Res(FDivRes), .Flg(FDivFlg));
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero),
.XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]),
.DivDone, .Quot, .Rem());
assign CmpFlg[3:0] = 0;
// produce clock
@ -817,7 +815,7 @@ end
///////////////////////////////////////////////////////////////////////////////////////////////
// check if the non-fma test is correct
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
errors += 1;
$display("There is an error in %s", Tests[TestNum]);
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@ -840,8 +838,7 @@ end
$stop;
end
VectorNum += 1; // increment the vector
if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
@ -895,15 +892,17 @@ module readvectors (
output logic XDenormE, ZDenormE, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic XExpMaxE,
output logic XExpMaxE,
output logic DivStart,
output logic [`FLEN-1:0] X, Y, Z
);
// apply test vectors on rising edge of clk
// Format of vectors Inputs(1/2/3)_AnsFlg
always @(posedge clk) begin
always @(VectorNum) begin
#1;
AnsFlg = TestVector[4:0];
DivStart = 1'b0;
case (Unit)
`FMAUNIT:
case (Fmt)
@ -972,21 +971,33 @@ module readvectors (
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
Ans = TestVector[8+(`Q_LEN-1):8];
if (~clk) #5;
DivStart = 1'b1; #10 // one clk cycle
DivStart = 1'b0;
end
2'b01: begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b00: begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
if (~clk) #5;
DivStart = 1'b1; #10
DivStart = 1'b0;
end
endcase
`CMPUNIT:

View File

@ -1,473 +0,0 @@
///////////////////////////////////////////
// testbench.sv
//
// Written: David_Harris@hmc.edu 9 January 2021
// Modified:
//
// Purpose: Wally Testbench and helper modules
// Applies test programs from the riscv-arch-test and Imperas suites
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
`include "tests.vh"
module testbench;
parameter TESTSPERIPH = 0; // set to 0 for regression
parameter TESTSPRIV = 0; // set to 0 for regression
parameter DEBUG=0;
parameter TEST="none";
logic clk;
logic reset_ext, reset;
parameter SIGNATURESIZE = 5000000;
int test, i, errors, totalerrors;
logic [31:0] sig32[0:SIGNATURESIZE];
logic [`XLEN-1:0] signature[0:SIGNATURESIZE];
logic [`XLEN-1:0] testadr;
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
logic [31:0] InstrW;
string tests[];
logic [3:0] dummy;
string ProgramAddrMapFile, ProgramLabelMapFile;
logic [`AHBW-1:0] HRDATAEXT;
logic HREADYEXT, HRESPEXT;
logic [31:0] HADDR;
logic [`AHBW-1:0] HWDATA;
logic HWRITE;
logic [2:0] HSIZE;
logic [2:0] HBURST;
logic [3:0] HPROT;
logic [1:0] HTRANS;
logic HMASTLOCK;
logic HCLK, HRESETn;
logic [`XLEN-1:0] PCW;
logic DCacheFlushDone, DCacheFlushStart;
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
// check assertions for a legal configuration
riscvassertions riscvassertions();
// pick tests based on modes supported
initial begin
$display("TEST is %s", TEST);
//tests = '{};
if (`XLEN == 64) begin // RV64
case (TEST)
"arch64i": tests = arch64i;
"arch64priv": tests = arch64priv;
"arch64c": if (`C_SUPPORTED)
if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv};
else tests = {arch64c};
"arch64m": if (`M_SUPPORTED) tests = arch64m;
"arch64d": if (`D_SUPPORTED) tests = arch64d;
"imperas64i": tests = imperas64i;
"imperas64p": tests = imperas64p;
// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
"imperas64f": if (`F_SUPPORTED) tests = imperas64f;
"imperas64d": if (`D_SUPPORTED) tests = imperas64d;
"imperas64m": if (`M_SUPPORTED) tests = imperas64m;
"imperas64a": if (`A_SUPPORTED) tests = imperas64a;
"imperas64c": if (`C_SUPPORTED) tests = imperas64c;
else tests = imperas64iNOc;
"testsBP64": tests = testsBP64;
"wally64i": tests = wally64i; // *** redo
"wally64priv": tests = wally64priv;// *** redo
"imperas64periph": tests = imperas64periph;
"coremark": tests = coremark;
endcase
end else begin // RV32
case (TEST)
"arch32i": tests = arch32i;
"arch32priv": tests = arch32priv;
"arch32c": if (`C_SUPPORTED)
if (`ZICSR_SUPPORTED) tests = {arch32c, arch32cpriv};
else tests = {arch32c};
"arch32m": if (`M_SUPPORTED) tests = arch32m;
"arch32f": if (`F_SUPPORTED) tests = arch32f;
"imperas32i": tests = imperas32i;
"imperas32p": tests = imperas32p;
// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
"imperas32a": if (`A_SUPPORTED) tests = imperas32a;
"imperas32c": if (`C_SUPPORTED) tests = imperas32c;
else tests = imperas32iNOc;
"wally32i": tests = wally32i; // *** redo
"wally32e": tests = wally32e;
"wally32priv": tests = wally32priv; // *** redo
"imperas32periph": tests = imperas32periph;
endcase
end
if (tests.size() == 0) begin
$display("TEST %s not supported in this configuration", TEST);
$stop;
end
end
string signame, memfilename, pathname;
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
logic UARTSin, UARTSout;
logic SDCCLK;
logic SDCCmdIn;
logic SDCCmdOut;
logic SDCCmdOE;
logic [3:0] SDCDatIn;
logic HREADY;
logic HSELEXT;
// instantiate device to be tested
assign GPIOPinsIn = 0;
assign UARTSin = 1;
assign HREADYEXT = 1;
assign HRESPEXT = 0;
assign HRDATAEXT = 0;
wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
.HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT,
.HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
.UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK);
// Track names of instructions
instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
dut.core.ifu.FinalInstrRawF[31:0],
dut.core.ifu.InstrD, dut.core.ifu.InstrE,
dut.core.ifu.InstrM, InstrW,
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
// initialize tests
localparam integer MemStartAddr = `RAM_BASE>>(1+`XLEN/32);
localparam integer MemEndAddr = (`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32);
initial
begin
test = 1;
totalerrors = 0;
testadr = 0;
// fill memory with defined values to reduce Xs in simulation
// Quick note the memory will need to be initialized. The C library does not
// guarantee the initialized reads. For example a strcmp can read 6 byte
// strings, but uses a load double to read them in. If the last 2 bytes are
// not initialized the compare results in an 'x' which propagates through
// the design.
if (TEST == "coremark")
for (i=MemStartAddr; i<MemEndAddr; i = i+1)
dut.uncore.ram.ram.RAM[i] = 64'h0;
// read test vectors into memory
pathname = tvpaths[tests[0].atoi()];
/* if (tests[0] == `IMPERASTEST)
pathname = tvpaths[0];
else pathname = tvpaths[1]; */
memfilename = {pathname, tests[test], ".elf.memfile"};
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.ram.RAM);
else $readmemh(memfilename, dut.uncore.ram.RAM);
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.ram.RAM);
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
$display("Read memfile %s", memfilename);
reset_ext = 1; # 42; reset_ext = 0;
end
// generate clock to sequence tests
always
begin
clk = 1; # 5; clk = 0; # 5;
// if ($time % 100000 == 0) $display("Time is %0t", $time);
end
// check results
always @(negedge clk)
begin
if (TEST == "coremark")
if (dut.core.priv.priv.ecallM) begin
$display("Benchmark: coremark is done.");
$stop;
end
if (DCacheFlushDone) begin
#600; // give time for instructions in pipeline to finish
// clear signature to prevent contamination from previous tests
for(i=0; i<SIGNATURESIZE; i=i+1) begin
sig32[i] = 'bx;
end
// read signature, reformat in 64 bits if necessary
signame = {pathname, tests[test], ".signature.output"};
$readmemh(signame, sig32);
i = 0;
while (i < SIGNATURESIZE) begin
if (`XLEN == 32) begin
signature[i] = sig32[i];
i = i+1;
end else begin
signature[i/2] = {sig32[i+1], sig32[i]};
i = i + 2;
end
if (i >= 4 & sig32[i-4] === 'bx) begin
if (i == 4) begin
i = SIGNATURESIZE+1; // flag empty file
$display(" Error: empty test file");
end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
end
end
// Check errors
errors = (i == SIGNATURESIZE+1); // error if file is empty
i = 0;
testadr = (`RAM_BASE+tests[test+1].atohex())/(`XLEN/8);
/* verilator lint_off INFINITELOOP */
while (signature[i] !== 'bx) begin
logic [`XLEN-1:0] sig;
if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.ram.RAM[testadr+i];
else sig = dut.uncore.ram.RAM[testadr+i];
// $display("signature[%h] = %h sig = %h", i, signature[i], sig);
if (signature[i] !== sig &
//if (signature[i] !== dut.core.lsu.dtim.ram.RAM[testadr+i] &
(signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1?
if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
// report errors unless they are garbage at the end of the sim
// kind of hacky test for garbage right now
$display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
errors = errors+1;
$display(" Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h",
tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
// tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.RAM[testadr+i], signature[i]);
$stop;//***debug
end
end
i = i + 1;
end
/* verilator lint_on INFINITELOOP */
if (errors == 0) begin
$display("%s succeeded. Brilliant!!!", tests[test]);
end
else begin
$display("%s failed with %d errors. :(", tests[test], errors);
totalerrors = totalerrors+1;
end
test = test + 2;
if (test == tests.size()) begin
if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
else $display("FAIL: %d test programs had errors", totalerrors);
$stop;
end
else begin
//pathname = tvpaths[tests[0]];
memfilename = {pathname, tests[test], ".elf.memfile"};
//$readmemh(memfilename, dut.uncore.ram.ram.RAM);
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.ram.RAM);
else $readmemh(memfilename, dut.uncore.ram.RAM);
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.ram.RAM);
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
$display("Read memfile %s", memfilename);
reset_ext = 1; # 47; //reset_ext = 0;
end
end
end // always @ (negedge clk)
// track the current function or global label
if (DEBUG == 1) begin : FunctionName
FunctionName FunctionName(.reset(reset),
.clk(clk),
.ProgramAddrMapFile(ProgramAddrMapFile),
.ProgramLabelMapFile(ProgramLabelMapFile));
end
// Termination condition
// terminate on a specific ECALL after li x3,1 for old Imperas tests, *** remove this when old imperas tests are removed
// or sw gp,-56(t0) for new Imperas tests
// or sd gp, -56(t0)
// or on a jump to self infinite loop (6f) for RISC-V Arch tests
logic ecf; // remove this once we don't rely on old Imperas tests with Ecalls
if (`ZICSR_SUPPORTED) assign ecf = dut.core.priv.priv.EcallFaultM;
else assign ecf = 0;
assign DCacheFlushStart = ecf &
(dut.core.ieu.dp.regf.rf[3] == 1 |
(dut.core.ieu.dp.regf.we3 &
dut.core.ieu.dp.regf.a3 == 3 &
dut.core.ieu.dp.regf.wd3 == 1)) |
(dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM;
DCacheFlushFSM DCacheFlushFSM(.clk(clk),
.reset(reset),
.start(DCacheFlushStart),
.done(DCacheFlushDone));
// initialize the branch predictor
if (`BPRED_ENABLED == 1)
initial begin
$readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem);
$readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem);
end
endmodule
module riscvassertions;
initial begin
assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64");
assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support");
assert (`DIV_BITSPERCYCLE == 1 | `DIV_BITSPERCYCLE==2 | `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4");
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (`IMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`ICACHE_LINELENINBITS >= 32 | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled");
assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size");
assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2");
assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (`DMEM != `MEM_CACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2");
assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (`IMEM != `MEM_CACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2");
assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2");
assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF");
assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
end
endmodule
/* verilator lint_on STMTDLY */
/* verilator lint_on WIDTH */
module DCacheFlushFSM
(input logic clk,
input logic reset,
input logic start,
output logic done);
genvar adr;
logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)];
if(`DMEM == `MEM_CACHE) begin
localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES;
localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS;
localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN;
localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN;
localparam integer lognumlines = $clog2(numlines);
localparam integer loglinebytelen = $clog2(linebytelen);
localparam integer lognumways = $clog2(numways);
localparam integer tagstart = lognumlines + loglinebytelen;
genvar index, way, cacheWord;
logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0];
logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0];
logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0];
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
for(index = 0; index < numlines; index++) begin
for(way = 0; way < numways; way++) begin
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
copyShadow #(.tagstart(tagstart),
.loglinebytelen(loglinebytelen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
.index(index),
.cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]),
.CacheAdr(CacheAdr[way][index][cacheWord]),
.CacheTag(CacheTag[way][index][cacheWord]),
.CacheValid(CacheValid[way][index][cacheWord]),
.CacheDirty(CacheDirty[way][index][cacheWord]));
end
end
end
integer i, j, k;
always @(posedge clk) begin
if (start) begin #1
#1
for(i = 0; i < numlines; i++) begin
for(j = 0; j < numways; j++) begin
for(k = 0; k < numwords; k++) begin
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
end
end
end
end
end
end
end
flop #(1) doneReg(.clk, .d(start), .q(done));
endmodule
module copyShadow
#(parameter tagstart, loglinebytelen)
(input logic clk,
input logic start,
input logic [`PA_BITS-1:tagstart] tag,
input logic valid, dirty,
input logic [`XLEN-1:0] data,
input logic [32-1:0] index,
input logic [32-1:0] cacheWord,
output logic [`XLEN-1:0] CacheData,
output logic [`PA_BITS-1:0] CacheAdr,
output logic [`XLEN-1:0] CacheTag,
output logic CacheValid,
output logic CacheDirty);
always_ff @(posedge clk) begin
if(start) begin
CacheTag = tag;
CacheValid = valid;
CacheDirty = dirty;
CacheData = data;
CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8));
end
end
endmodule

View File

@ -15,6 +15,7 @@ export MAXCORES ?= 4
# MAXOPT turns on flattening, boundary optimization, and retiming
# The output netlist is hard to interpret, but significantly better PPA
export MAXOPT ?= 0
export DRIVE ?= FLOP
time := $(shell date +%F-%H-%M)
hash := $(shell git rev-parse --short HEAD)

View File

@ -1,7 +1,23 @@
00000000 # test reset to zero
00000000
00000000
A5A5A5A5
A5A5A5A5 # test output pins
5A5AFFFF
00000000
00000000 # test input enables
5A5A0000
A55A0000
A55A0000 # test XOR
A55A0000 # Test interrupt pending bits: high_ip
5AA5FFFF # low_ip
00000000 # rise_ip
00000000 # fall_ip
A4AA0000 # input_val
A5FA0000 # high_ip
5BF5FFFF # low_ip
00A00000 # rise_ip
01500000 # fall_ip
00000000 # MEIP
00000000 # Test interrupts can be enabled without being triggered: MIP = 0
00000000 # MIP = 0
00000000 # MIP = 0
00000000 # MIP = 0
00000800 # Test interrupts can be enabled and triggered: MEIP set
00000000 # MEIP = 0

View File

@ -827,6 +827,28 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
addi a6, a6, 4
.endm
// Place this macro in peripheral tests to setup all the PLIC registers to generate external interrupts
.macro SETUP_PLIC
# Setup PLIC with a series of register writes
.equ PLIC_INTPRI_GPIO, 0x0C00000C # GPIO is interrupt 3
.equ PLIC_INTPRI_UART, 0x0C000028 # UART is interrupt 10
.equ PLIC_INTPENDING0, 0x0C001000 # intPending0 register
.equ PLIC_INTEN00, 0x0C002000 # interrupt enables for context 0 (machine mode) sources 31:1
.equ PLIC_INTEN10, 0x0C002080 # interrupt enables for context 1 (supervisor mode) sources 31:1
.equ PLIC_THRESH0, 0x0C200000 # Priority threshold for context 0 (machine mode)
.equ PLIC_CLAIM0, 0x0C200004 # Claim/Complete register for context 0
.equ PLIC_THRESH1, 0x0C201000 # Priority threshold for context 1 (supervisor mode)
.equ PLIC_CLAIM1, 0x0C201004 # Claim/Complete register for context 1
.4byte PLIC_THRESH0, 0, write32_test # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts
.4byte PLIC_THRESH1, 7, write32_test # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts
.4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority
.4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority
.4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode
.4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode
.endm
.macro END_TESTS
// invokes one final ecall to return to machine mode then terminates this program, so the output is
// 0x8: termination called from U mode
@ -937,6 +959,20 @@ read08_test:
addi a6, a6, 4
j test_loop // go to next test case
readmip_test: // read the MIP into the signature
csrr t2, mip
sw t2, 0(t1)
addi t1, t1, 4
addi a6, a6, 4
j test_loop // go to next test case
readsip_test: // read the MIP into the signature
csrr t2, sip
sw t2, 0(t1)
addi t1, t1, 4
addi a6, a6, 4
j test_loop // go to next test case
goto_s_mode:
// return to address in t3,
li a0, 3 // Trap handler behavior (go to supervisor mode)

View File

@ -72,6 +72,7 @@ test_cases:
.4byte input_val, 0x00000000, read32_test # input_val reset to zero
.4byte input_en, 0x00000000, read32_test # input_en reset to zero
# *** add more
# =========== Test output and input pins ===========
@ -86,14 +87,49 @@ test_cases:
.4byte input_en, 0x00000000, write32_test # disable all input pins
.4byte input_val, 0x00000000, read32_test # read 0 since input pins are disabled
.4byte input_en, 0xFFFF0000, write32_test # enable a few input pins
.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above.
.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above.
# =========== Test output enables(?) ===========
.4byte output_en, 0xFFFFFFFF, write32_test # undo changes made to output enable
# =========== Test XOR functionality ===========
.4byte out_xor, 0xFF00FF00, write32_test # invert certain pin values
.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working
.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working
# =========== Test Interrupt Pending bits ===========
SETUP_PLIC
.4byte low_ip, 0xFFFFFFFF, write32_test # clear pending low interrupts
.4byte high_ip, 0xFFFFFFFF, write32_test # clear pending high interrupts
.4byte rise_ip, 0xFFFFFFFF, write32_test # clear pending rise interrupts
.4byte fall_ip, 0xFFFFFFFF, write32_test # clear pending fall interrupts
.4byte high_ip, 0xA55A0000, read32_test # check pending high interrupts
.4byte low_ip, 0x5AA5FFFF, read32_test # check pending low interrupts
.4byte rise_ip, 0x00000000, read32_test # check pending rise interrupts
.4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts
.4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts
.4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output
.4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?)
.4byte low_ip, 0x5BF5FFFF, read32_test # low interrupt pending should be opposite high for enabled pins
.4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising)
.4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling)
.4byte 0x0, 0x00000000, readmip_test # Check no external interrupt has been generated
# =========== Test interrupts can be enabled without being triggered ===========
.4byte high_ie, 0x00010000, write32_test # enable high interrupt on bit 16, no pending interrupt
.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending
.4byte low_ie, 0x00020000, write32_test # enable low interrupt on bit 17, no pending interrupt
.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending
.4byte rise_ie, 0x00010000, write32_test # enable rise interrupt on bit 16, no pending interrupt
.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending
.4byte fall_ie, 0x00010000, write32_test # enable fall interrupt on bit 16, no pending interrupt
.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending
# =========== Test interrupts can be enabled and triggered
.4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending
.4byte 0x0, 0x00000800, readmip_test # MEIP should be raised
.4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending
.4byte 0x0, 0x00000000, readmip_test # MEIP should be released
.4byte 0x0, 0x0, terminate_test # terminate tests

View File

@ -254,12 +254,12 @@ FFFFEE00
FFFFEE00
00000000
00000000
02BEEF10
02BEEF10 # Something here is failing
0000000B
80000000
00000003
000000FF
FFFFFFFF
00000000
000000FF
00000000
00000000
@ -270,20 +270,20 @@ FFFFFFFF
FFFFFF00
00000000
00000000
02BEEF11
02BEEF11 # this might be wrong
0000000B
80000000
00000003
000000CC
CCCCCCCC
00000000
00000000
00000033
00000000
000000FF
000000CC
FFFFFF33
FFFFFF33
00000003
00000033 # input
00000000 # output
00000000 # rise ip
00000000 # serviced rise ip
000000CC # fall ip
00000000
000000FF # high ip
00000033 # why is this 0x33?
FFFFFFCC # low ip
FFFFFFCC # serviced low ip
00000000
00000000
03BEEF12
@ -454,9 +454,9 @@ FFFFFF33
00080000
00080000
00000000
00000000 # is it this one that's failing?
00000000
00000000
00080000
00080000 # failing
00080000
FFFFFFFF
FFF7FFFF
@ -478,7 +478,7 @@ FFFFFFFF
FFFFFFFE
00000000
00000000
04BEEF1E
04BEEF1E # this might also be wrong
00000009
80000000
0000000A

View File

@ -271,7 +271,7 @@ main_code: #####
sw t1, 0x04(t0)
# raise all output_en
sw t1, 0x08(t0)
# raise all input_en
# raise all rise_en
sw t1, 0x18(t0)
# ========== Execute Test ==========
# set MEIE
@ -616,6 +616,9 @@ Intr02BEEF11:
sw t1, 0x08(t0)
# set initial output state
sw x0, 0x0C(t0)
# clear XOR
li t1, 0x00000000
sw t1, 0x40(t0)
# clear all pending interrupts
li t1, 0xFFFFFFFF
sw t1, 0x1C(t0)
@ -843,7 +846,7 @@ Intr03BEEF1A:
sw t1, 0x04(t0)
# raise all output_en
sw t1, 0x08(t0)
# raise all input_en
# raise all rise_en
sw t1, 0x18(t0)
# ========== Execute Test ==========
# set MEIE and SEIE