mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-03 10:15:19 +00:00
Merge branch 'main' of https://github.com/davidharrishmc/riscv-wally
This commit is contained in:
commit
77a46ead39
@ -124,8 +124,6 @@
|
||||
`define PLIC_NUM_SRC 53
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/buildroot/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/buildroot/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -130,8 +130,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 0
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -128,8 +128,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -130,8 +130,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv32i/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv32i/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -128,8 +128,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -130,8 +130,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
//`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE
|
||||
|
@ -32,7 +32,7 @@
|
||||
`define DESIGN_COMPILER 0
|
||||
|
||||
// RV32 or RV64: XLEN = 32 or 64
|
||||
`define XLEN 64
|
||||
`define XLEN 32
|
||||
|
||||
// IEEE 754 compliance
|
||||
`define IEEE754 0
|
||||
@ -132,8 +132,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -131,8 +131,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -131,8 +131,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/shared/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -131,8 +131,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv64i/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv64i/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -131,8 +131,6 @@
|
||||
`define PLIC_GPIO_ID 3
|
||||
`define PLIC_UART_ID 10
|
||||
|
||||
`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
|
||||
`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
|
||||
`define BPRED_ENABLED 1
|
||||
`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
|
||||
`define TESTSBP 0
|
||||
|
@ -94,11 +94,12 @@
|
||||
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
|
||||
|
||||
// largest length in IEU/FPU
|
||||
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
`define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN)
|
||||
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
|
||||
`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
|
||||
`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
|
||||
`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1))
|
||||
`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9))
|
||||
`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6))
|
||||
`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF)
|
||||
|
||||
// Disable spurious Verilator warnings
|
||||
|
||||
|
@ -1,30 +1,9 @@
|
||||
make allclean:
|
||||
make clean
|
||||
make all
|
||||
|
||||
make clean:
|
||||
make clean -C ../../tests/riscof
|
||||
make clean -C ../../tests/wally-riscv-arch-test
|
||||
# make allclean -C ../../tests/imperas-riscv-tests
|
||||
|
||||
make all:
|
||||
all: archtests wallytests memfiles
|
||||
# *** Build old tests/imperas-riscv-tests for now;
|
||||
# Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test
|
||||
# DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired
|
||||
#make -C ../../tests/imperas-riscv-tests --jobs
|
||||
#make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs
|
||||
|
||||
# Build riscv-arch-test 64 and 32-bit versions
|
||||
make -C ../../tests/riscof/ --jobs
|
||||
make -C ../../tests/riscof/ XLEN=32 --jobs
|
||||
|
||||
# Build wally-riscv-arch-test
|
||||
make -C ../../tests/wally-riscv-arch-test/ --jobs
|
||||
make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs
|
||||
|
||||
# build the memfiles and address files.
|
||||
make -f makefile-memfile wally-sim-files --jobs
|
||||
|
||||
# Only compile Imperas tests if they are installed locally.
|
||||
# They are usually a symlink to $RISCV/imperas-riscv-tests and only
|
||||
# get compiled there manually during installation
|
||||
@ -36,4 +15,22 @@ make all:
|
||||
# Link Linux test vectors (fix this later***)
|
||||
#cd ../../tests/linux-testgen/linux-testvectors/;./tvLinker.sh
|
||||
|
||||
|
||||
allclean: clean all
|
||||
|
||||
clean:
|
||||
make clean -C ../../tests/riscof
|
||||
make clean -C ../../tests/wally-riscv-arch-test
|
||||
# make allclean -C ../../tests/imperas-riscv-tests
|
||||
|
||||
archtests:
|
||||
# Build riscv-arch-test 64 and 32-bit versions
|
||||
make -C ../../tests/riscof/ --jobs
|
||||
make -C ../../tests/riscof/ XLEN=32 --jobs
|
||||
|
||||
wallytests:
|
||||
# Build wally-riscv-arch-test
|
||||
make -C ../../tests/wally-riscv-arch-test/ --jobs
|
||||
make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs
|
||||
|
||||
memfiles:
|
||||
make -f makefile-memfile wally-sim-files --jobs
|
||||
|
@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/
|
||||
verilator=`which verilator`
|
||||
|
||||
basepath=$(dirname $0)/..
|
||||
for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do
|
||||
for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do
|
||||
echo "$config linting..."
|
||||
if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then
|
||||
echo "Exiting after $config lint due to errors or warnings"
|
||||
|
@ -8,8 +8,9 @@ IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests
|
||||
ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX)
|
||||
|
||||
ELFFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf")
|
||||
OBJDUMPFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump")
|
||||
MEMFILES ?= $(ELFFILES:.elf=.elf.memfile)
|
||||
ADDRFILES ?= $(ELFFILES:.elf=.elf.objdump.addr)
|
||||
ADDRFILES ?= $(OBJDUMPFILES:.objdump=.objdump.addr)
|
||||
|
||||
print:
|
||||
echo "files in $(ALLDIRS) are $(ELFFILES)."
|
||||
|
@ -71,7 +71,7 @@ for test in tests64gc:
|
||||
grepstr="All tests ran without failures")
|
||||
configs.append(tc)
|
||||
|
||||
tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv"] #, "imperas32mmu""wally32i",
|
||||
tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i",
|
||||
for test in tests32gc:
|
||||
tc = TestCase(
|
||||
name=test,
|
||||
|
@ -32,7 +32,7 @@ vlib work
|
||||
# start and run simulation
|
||||
# remove +acc flag for faster sim during regressions if there is no need to access internal signals
|
||||
# $num = the added words after the call
|
||||
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697
|
||||
vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697
|
||||
|
||||
vsim -voptargs=+acc work.testbenchfp -G TEST=$2
|
||||
|
||||
|
@ -7,3 +7,22 @@ add wave -noupdate /testbenchfp/Y
|
||||
add wave -noupdate /testbenchfp/Z
|
||||
add wave -noupdate /testbenchfp/Res
|
||||
add wave -noupdate /testbenchfp/Ans
|
||||
add wave -noupdate /testbenchfp/DivStart
|
||||
add wave -noupdate /testbenchfp/DivDone
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/*
|
||||
add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/*
|
||||
add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
|
||||
add wave -group {Testbench} -noupdate /testbenchfp/*
|
||||
add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
|
||||
|
@ -7,10 +7,10 @@ module cvtshiftcalc(
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic [`NF:0] XManM, // input mantissas
|
||||
input logic [`FMTBITS-1:0] OutFmt, // output format
|
||||
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic CvtResDenormUfM,
|
||||
output logic CvtResUf,
|
||||
output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted
|
||||
output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted
|
||||
);
|
||||
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
|
||||
|
||||
@ -31,8 +31,8 @@ module cvtshiftcalc(
|
||||
// | `NF-1 zeros | Mantissa | 0's if nessisary |
|
||||
// - otherwise:
|
||||
// | LzcInM | 0's if nessisary |
|
||||
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
|
||||
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} :
|
||||
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} :
|
||||
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} :
|
||||
{CvtLzcInM, {`NF+1{1'b0}}};
|
||||
|
||||
|
||||
|
15
pipelined/src/fpu/divshiftcalc.sv
Normal file
15
pipelined/src/fpu/divshiftcalc.sv
Normal file
@ -0,0 +1,15 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module divshiftcalc(
|
||||
input logic [`DIVLEN+2:0] Quot,
|
||||
input logic [`NE:0] DivCalcExpM,
|
||||
output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt,
|
||||
output logic [`NE:0] CorrDivExp
|
||||
);
|
||||
|
||||
assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]};
|
||||
// the quotent is in the range [.5,2)
|
||||
// if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift
|
||||
assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]};
|
||||
|
||||
endmodule
|
@ -12,11 +12,11 @@ module fcvt (
|
||||
input logic XDenormE, // is the input denormalized
|
||||
input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
|
||||
output logic [`NE:0] CvtCalcExpE, // the calculated expoent
|
||||
output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by
|
||||
output logic [`LOGCVTLEN-1:0] CvtShiftAmtE, // how much to shift by
|
||||
output logic CvtResDenormUfE,// does the result underflow or is denormalized
|
||||
output logic CvtResSgnE, // the result's sign
|
||||
output logic IntZeroE, // is the integer zero?
|
||||
output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder)
|
||||
output logic [`CVTLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder)
|
||||
);
|
||||
|
||||
// OpCtrls:
|
||||
@ -43,7 +43,7 @@ module fcvt (
|
||||
logic Int64; // is the integer 64 bits?
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic ToInt; // is the opperation an fp->int conversion?
|
||||
logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC
|
||||
logic [`LOGCVTLEN-1:0] ZeroCnt; // output from the LZC
|
||||
|
||||
|
||||
// seperate OpCtrl for code readability
|
||||
@ -78,10 +78,10 @@ module fcvt (
|
||||
// choose the input to the leading zero counter i.e. priority encoder
|
||||
// int -> fp : | positive integer | 00000... (if needed) |
|
||||
// fp -> fp : | fraction | 00000... (if needed) |
|
||||
assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
|
||||
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
|
||||
assign CvtLzcInE = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
|
||||
{XManE[`NF-1:0], {`CVTLEN-`NF{1'b0}}};
|
||||
|
||||
lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
|
||||
lzc #(`CVTLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// shifter
|
||||
@ -99,9 +99,9 @@ module fcvt (
|
||||
// - only shift fp -> fp if the intital value is denormalized
|
||||
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
|
||||
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
|
||||
assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
|
||||
CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] :
|
||||
(ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
|
||||
assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~CvtCalcExpE[`NE]}} :
|
||||
CvtResDenormUfE&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+CvtCalcExpE[`LOGCVTLEN-1:0] :
|
||||
(ZeroCnt+1)&{`LOGCVTLEN{XDenormE|IntToFp}};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// exp calculations
|
||||
@ -180,7 +180,7 @@ module fcvt (
|
||||
// - shift left to normilize (-1-ZeroCnt)
|
||||
// - newBias to make the biased exponent
|
||||
//
|
||||
assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
|
||||
assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (ZeroCnt&{`LOGCVTLEN{XDenormE|IntToFp}})};
|
||||
// find if the result is dnormal or underflows
|
||||
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
|
||||
// - can't underflow an integer to Fp conversion
|
||||
|
@ -82,7 +82,7 @@ module fpu (
|
||||
|
||||
// unpacking signals
|
||||
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
|
||||
logic XSgnM; // input's sign - memory stage
|
||||
logic XSgnM, YSgnM; // input's sign - memory stage
|
||||
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
|
||||
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
|
||||
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
|
||||
@ -104,23 +104,27 @@ module fpu (
|
||||
logic FOpCtrlQ;
|
||||
|
||||
// Fma Signals
|
||||
logic [3*`NF+5:0] SumE, SumM;
|
||||
logic [`NE+1:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic InvZE, InvZM;
|
||||
logic NegSumE, NegSumM;
|
||||
logic ZSgnEffE, ZSgnEffM;
|
||||
logic PSgnE, PSgnM;
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
|
||||
logic [3*`NF+5:0] SumE, SumM;
|
||||
logic [`NE+1:0] ProdExpE, ProdExpM;
|
||||
logic AddendStickyE, AddendStickyM;
|
||||
logic KillProdE, KillProdM;
|
||||
logic InvZE, InvZM;
|
||||
logic NegSumE, NegSumM;
|
||||
logic ZSgnEffE, ZSgnEffM;
|
||||
logic PSgnE, PSgnM;
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
|
||||
|
||||
// Cvt Signals
|
||||
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
|
||||
logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
|
||||
logic CvtResSgnE, CvtResSgnM; // the result's sign
|
||||
logic IntZeroE, IntZeroM; // is the integer zero?
|
||||
logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
|
||||
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
|
||||
logic CvtResSgnE, CvtResSgnM; // the result's sign
|
||||
logic IntZeroE, IntZeroM; // is the integer zero?
|
||||
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
|
||||
|
||||
//divide signals
|
||||
logic [`DIVLEN+2:0] Quot;
|
||||
logic [`NE:0] DivCalcExpM;
|
||||
|
||||
// result and flag signals
|
||||
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
|
||||
@ -317,7 +321,7 @@ module fpu (
|
||||
|
||||
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
|
||||
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
|
||||
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
|
||||
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
|
||||
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
|
||||
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
|
||||
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
|
||||
@ -333,7 +337,7 @@ module fpu (
|
||||
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
|
||||
{AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
|
||||
{AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
|
||||
flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
|
||||
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
|
||||
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
|
||||
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
|
||||
|
||||
@ -351,9 +355,9 @@ module fpu (
|
||||
|
||||
assign FpLoadM = FResSelM[1];
|
||||
|
||||
postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
|
||||
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM,
|
||||
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM,
|
||||
postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
|
||||
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot,
|
||||
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM,
|
||||
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM,
|
||||
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM,
|
||||
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
|
||||
|
@ -30,7 +30,7 @@
|
||||
`include "wally-config.vh"
|
||||
|
||||
module postprocess(
|
||||
input logic XSgnM, // input signs
|
||||
input logic XSgnM, YSgnM, // input signs
|
||||
input logic [`NE-1:0] ZExpM, // input exponents
|
||||
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
|
||||
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
|
||||
@ -51,13 +51,15 @@ module postprocess(
|
||||
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
|
||||
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic [`NE:0] DivCalcExpM, // the calculated expoent
|
||||
input logic CvtResDenormUfM,
|
||||
input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by
|
||||
input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by
|
||||
input logic CvtResSgnM, // the result's sign
|
||||
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
|
||||
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
|
||||
input logic IntZeroM, // is the input zero
|
||||
input logic [1:0] PostProcSelM, // select result to be written to fp register
|
||||
input logic [`DIVLEN+2:0] Quot,
|
||||
output logic [`FLEN-1:0] PostProcResM, // FMA final result
|
||||
output logic [4:0] PostProcFlgM,
|
||||
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
|
||||
@ -75,13 +77,14 @@ module postprocess(
|
||||
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
|
||||
logic UfPlus1; // do you add one (for determining underflow flag)
|
||||
logic Round; // bits needed to determine rounding
|
||||
logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted
|
||||
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted
|
||||
logic Mult; // multiply opperation
|
||||
logic [`FLEN:0] RoundAdd; // how much to add to the result
|
||||
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
|
||||
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
|
||||
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
|
||||
logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt;
|
||||
logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero
|
||||
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
|
||||
logic Plus1; // add one to the final result?
|
||||
@ -91,6 +94,7 @@ module postprocess(
|
||||
logic IntToFp; // is the opperation an int->fp conversion?
|
||||
logic ToInt; // is the opperation an fp->int conversion?
|
||||
logic [`NE+1:0] RoundExp;
|
||||
logic [`NE:0] CorrDivExp;
|
||||
logic [1:0] NegResMSBS;
|
||||
logic CvtOp;
|
||||
logic FmaOp;
|
||||
@ -135,6 +139,7 @@ module postprocess(
|
||||
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
|
||||
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
|
||||
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
|
||||
divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt);
|
||||
|
||||
always_comb
|
||||
case(PostProcSelM)
|
||||
@ -143,12 +148,12 @@ module postprocess(
|
||||
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
|
||||
end
|
||||
2'b00: begin // cvt
|
||||
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
|
||||
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
|
||||
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmtM};
|
||||
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
|
||||
end
|
||||
2'b01: begin //div
|
||||
ShiftAmt = 0;//{DivShiftAmt};
|
||||
ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
|
||||
2'b01: begin //div ***prob can take out
|
||||
ShiftAmt = DivShiftAmt;
|
||||
ShiftIn = {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}};
|
||||
end
|
||||
default: begin
|
||||
ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}};
|
||||
@ -171,9 +176,9 @@ module postprocess(
|
||||
// round to infinity
|
||||
// round to nearest max magnitude
|
||||
|
||||
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
|
||||
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp,
|
||||
.InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
|
||||
.UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
|
||||
.DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Sign calculation
|
||||
@ -181,7 +186,7 @@ module postprocess(
|
||||
|
||||
resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
|
||||
.FmaOp, .DivOp, .CvtOp, .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult,
|
||||
.CvtResSgnM, .RoundSgn, .ResSgn);
|
||||
.XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn, .ResSgn);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Flags
|
||||
|
@ -4,6 +4,8 @@ module resultsign(
|
||||
input logic [2:0] FrmM,
|
||||
input logic PSgnM, ZSgnEffM,
|
||||
input logic InvZM,
|
||||
input logic XSgnM,
|
||||
input logic YSgnM,
|
||||
input logic ZInfM,
|
||||
input logic InfIn,
|
||||
input logic NegSumM,
|
||||
@ -25,6 +27,7 @@ module resultsign(
|
||||
logic FmaResSgn;
|
||||
logic FmaResSgnTmp;
|
||||
logic Underflow;
|
||||
logic DivSgn;
|
||||
// logic ResultSgnTmp;
|
||||
|
||||
// Determine the sign if the sum is zero
|
||||
@ -43,9 +46,10 @@ module resultsign(
|
||||
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
|
||||
assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
|
||||
|
||||
// Sign for rounding calulation
|
||||
assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
|
||||
assign DivSgn = XSgnM^YSgnM;
|
||||
|
||||
assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp);
|
||||
// Sign for rounding calulation
|
||||
assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
|
||||
assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp);
|
||||
|
||||
endmodule
|
@ -11,6 +11,7 @@ module round(
|
||||
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
|
||||
input logic [2:0] FrmM, // rounding mode
|
||||
input logic FmaOp,
|
||||
input logic DivOp,
|
||||
input logic [1:0] PostProcSelM,
|
||||
input logic CvtResDenormUfM,
|
||||
input logic ToInt,
|
||||
@ -23,6 +24,7 @@ module round(
|
||||
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
|
||||
input logic RoundSgn, // the result's sign
|
||||
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
|
||||
input logic [`NE:0] CorrDivExp, // the calculated expoent
|
||||
output logic UfPlus1, // do you add or subtract on from the result
|
||||
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
|
||||
output logic [`NF-1:0] ResFrac, // Result fraction
|
||||
@ -303,7 +305,7 @@ module round(
|
||||
case(PostProcSelM)
|
||||
2'b10: RoundExp = SumExp; // fma
|
||||
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
|
||||
2'b01: RoundExp = 0; // divide
|
||||
2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide
|
||||
default: RoundExp = 0;
|
||||
endcase
|
||||
|
||||
|
@ -54,10 +54,6 @@ module bram1p1rw
|
||||
logic [DATA_WIDTH-1:0] RAM [(2**ADDR_WIDTH)-1:0];
|
||||
integer i;
|
||||
|
||||
initial begin
|
||||
$readmemh("big64.txt", RAM);
|
||||
end
|
||||
|
||||
always @ (posedge clk) begin
|
||||
dout <= RAM[addr];
|
||||
if(we) begin
|
||||
|
@ -60,7 +60,7 @@ module clint (
|
||||
flopr #(16) entrydflop(HCLK, ~HRESETn, entry, entryd);
|
||||
|
||||
assign HRESPCLINT = 0; // OK
|
||||
assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during accesses
|
||||
assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during asynchronous MTIME accesses
|
||||
|
||||
// word aligned reads
|
||||
if (`XLEN==64) assign #2 entry = {HADDR[15:3], 3'b000};
|
||||
@ -87,8 +87,7 @@ module clint (
|
||||
always_ff @(posedge HCLK or negedge HRESETn)
|
||||
if (~HRESETn) begin
|
||||
MSIP <= 0;
|
||||
MTIMECMP <= 0;
|
||||
// MTIMECMP is not reset
|
||||
MTIMECMP <= 64'hFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts
|
||||
end else if (memwrite) begin
|
||||
if (entryd == 16'h0000) MSIP <= HWDATA[0];
|
||||
if (entryd == 16'h4000) begin
|
||||
@ -104,7 +103,6 @@ module clint (
|
||||
always_ff @(posedge HCLK or negedge HRESETn)
|
||||
if (~HRESETn) begin
|
||||
MTIME <= 0;
|
||||
// MTIMECMP is not reset
|
||||
end else if (memwrite & entryd == 16'hBFF8) begin
|
||||
// MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed
|
||||
for(j=0;j<`XLEN/8;j++)
|
||||
|
@ -1,4 +1,4 @@
|
||||
all: exptestgen testgen qslc_r4a2 qslc_r4a2b
|
||||
all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2
|
||||
|
||||
sqrttestgen: sqrttestgen.c
|
||||
gcc sqrttestgen.c -o sqrttestgen -lm
|
||||
@ -19,5 +19,9 @@ qslc_r4a2b: qslc_r4a2b.c
|
||||
gcc qslc_r4a2b.c -o qslc_r4a2b -lm
|
||||
./qslc_r4a2b > qslc_r4a2b.tv
|
||||
|
||||
qslc_sqrt_r4a2: qslc_sqrt_r4a2.c
|
||||
gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm
|
||||
./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv
|
||||
|
||||
clean:
|
||||
rm -f testgen exptestgen qslc_r4a2
|
||||
rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2
|
||||
|
1024
pipelined/srt/qsel4.dat
Normal file
1024
pipelined/srt/qsel4.dat
Normal file
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,7 @@ module qsel4 (
|
||||
logic [2:0] Dmsbs;
|
||||
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
|
||||
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
// W = xxxx.xxx...
|
||||
|
BIN
pipelined/srt/qslc_sqrt_r4a2
Executable file
BIN
pipelined/srt/qslc_sqrt_r4a2
Executable file
Binary file not shown.
198
pipelined/srt/qslc_sqrt_r4a2.c
Normal file
198
pipelined/srt/qslc_sqrt_r4a2.c
Normal file
@ -0,0 +1,198 @@
|
||||
/*
|
||||
Program: qslc_r4a2.c
|
||||
Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory)
|
||||
User: James E. Stine
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#define DIVISOR_SIZE 3
|
||||
#define CARRY_SIZE 7
|
||||
#define SUM_SIZE 7
|
||||
#define TOT_SIZE 7
|
||||
|
||||
void disp_binary(double, int, int);
|
||||
|
||||
struct bits {
|
||||
unsigned int divisor : DIVISOR_SIZE;
|
||||
int tot : TOT_SIZE;
|
||||
} pla;
|
||||
|
||||
/*
|
||||
|
||||
Function: disp_binary
|
||||
Description: This function displays a Double-Precision number into
|
||||
four 16 bit integers using the global union variable
|
||||
dp_number
|
||||
Argument List: double x The value to be converted
|
||||
int bits_to_left Number of bits left of radix point
|
||||
int bits_to_right Number of bits right of radix point
|
||||
Return value: none
|
||||
|
||||
*/
|
||||
void disp_binary(double x, int bits_to_left, int bits_to_right) {
|
||||
int i;
|
||||
double diff;
|
||||
|
||||
if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) {
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
printf("0");
|
||||
}
|
||||
if (i == bits_to_right+1)
|
||||
;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (x < 0.0)
|
||||
x = pow(2.0, ((double) bits_to_left)) + x;
|
||||
|
||||
for (i = -bits_to_left + 1; i <= bits_to_right; i++) {
|
||||
diff = pow(2.0, ((double) -i) );
|
||||
if (x < diff)
|
||||
printf("0");
|
||||
else {
|
||||
printf("1");
|
||||
x -= diff;
|
||||
}
|
||||
if (i == 0)
|
||||
;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main() {
|
||||
int m;
|
||||
int n;
|
||||
int o;
|
||||
pla.divisor = 0;
|
||||
pla.tot = 0;
|
||||
printf("\tcase({D[5:3],Wmsbs})\n");
|
||||
for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) {
|
||||
for (m=0; m < pow(2.0, TOT_SIZE); m++) {
|
||||
printf("\t\t11'b");
|
||||
disp_binary((double) pla.divisor, DIVISOR_SIZE, 0);
|
||||
printf("_");
|
||||
disp_binary((double) pla.tot, TOT_SIZE, 0);
|
||||
printf(": q = 4'b");
|
||||
|
||||
/*
|
||||
4 bits for Radix 4 (a=2)
|
||||
1000 = +2
|
||||
0100 = +1
|
||||
0000 = 0
|
||||
0010 = -1
|
||||
0001 = -2
|
||||
*/
|
||||
switch (pla.divisor) {
|
||||
case 0:
|
||||
if ((pla.tot) >= 24)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -8)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -26)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 1:
|
||||
if ((pla.tot) >= 28)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -10)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -28)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 2:
|
||||
if ((pla.tot) >= 32)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -12)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -32)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 3:
|
||||
if ((pla.tot) >= 32)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 8)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -12)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -34)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 4:
|
||||
if ((pla.tot) >= 36)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 12)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -12)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -36)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 5:
|
||||
if ((pla.tot) >= 40)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 12)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -16)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -40)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 6:
|
||||
if ((pla.tot) >= 40)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 16)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -16)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -44)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
case 7:
|
||||
if ((pla.tot) >= 44)
|
||||
printf("1000");
|
||||
else if ((pla.tot) >= 16)
|
||||
printf("0100");
|
||||
else if ((pla.tot) >= -16)
|
||||
printf("0000");
|
||||
else if ((pla.tot) >= -46)
|
||||
printf("0010");
|
||||
else
|
||||
printf("0001");
|
||||
break;
|
||||
default: printf ("XXX");
|
||||
|
||||
}
|
||||
|
||||
printf(";\n");
|
||||
(pla.tot)++;
|
||||
}
|
||||
(pla.divisor)++;
|
||||
}
|
||||
printf("\tendcase\n");
|
||||
|
||||
}
|
1026
pipelined/srt/qslc_sqrt_r4a2.sv
Normal file
1026
pipelined/srt/qslc_sqrt_r4a2.sv
Normal file
File diff suppressed because it is too large
Load Diff
@ -17,7 +17,7 @@ if [file exists work] {
|
||||
}
|
||||
vlib work
|
||||
|
||||
vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv qsel4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
|
||||
vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv
|
||||
vopt +acc work.testbenchradix4 -o workopt
|
||||
vsim workopt
|
||||
|
||||
|
@ -30,42 +30,35 @@
|
||||
|
||||
`include "wally-config.vh"
|
||||
|
||||
`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF)
|
||||
|
||||
module srtradix4 (
|
||||
input logic clk,
|
||||
input logic Start,
|
||||
input logic Stall, // *** multiple pipe stages
|
||||
input logic Flush, // *** multiple pipe stages
|
||||
// Floating Point Inputs
|
||||
// later add exponents, signs, special cases
|
||||
input logic XSign, YSign,
|
||||
input logic [`NE-1:0] XExp, YExp,
|
||||
input logic [`NF-1:0] XFrac, YFrac,
|
||||
input logic DivStart,
|
||||
input logic [`NE-1:0] XExpE, YExpE,
|
||||
input logic [`NF:0] XManE, YManE,
|
||||
input logic [`XLEN-1:0] SrcA, SrcB,
|
||||
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
|
||||
input logic XZeroE,
|
||||
input logic W64, // 32-bit ints on XLEN=64
|
||||
input logic Signed, // Interpret integers as signed 2's complement
|
||||
input logic Int, // Choose integer inputs
|
||||
input logic Sqrt, // perform square root, not divide
|
||||
output logic rsign,
|
||||
output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers
|
||||
output logic [`NE-1:0] rExp,
|
||||
output logic [3:0] Flags
|
||||
output logic DivDone,
|
||||
output logic [`DIVLEN+2:0] Quot,
|
||||
output logic [`XLEN-1:0] Rem, // *** later handle integers
|
||||
output logic [`NE:0] DivCalcExpE
|
||||
);
|
||||
|
||||
// logic qp, qz, qm; // quotient is +1, 0, or -1
|
||||
logic [3:0] q;
|
||||
logic [`NE-1:0] calcExp;
|
||||
logic calcSign;
|
||||
logic [`DIVLEN-1:0] X, Dpreproc;
|
||||
logic [`NE:0] DivCalcExp;
|
||||
logic [`DIVLEN:0] X;
|
||||
logic [`DIVLEN-1:0] Dpreproc;
|
||||
logic [`DIVLEN+3:0] WS, WSA, WSN;
|
||||
logic [`DIVLEN+3:0] WC, WCA, WCN;
|
||||
logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel;
|
||||
logic [$clog2(`XLEN+1)-1:0] intExp;
|
||||
logic intSign;
|
||||
|
||||
srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
|
||||
srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign);
|
||||
|
||||
// Top Muxes and Registers
|
||||
// When start is asserted, the inputs are loaded into the divider.
|
||||
@ -77,11 +70,11 @@ module srtradix4 (
|
||||
// - otherwise load WSA into the flipflop
|
||||
// *** what does N and A stand for?
|
||||
// *** change shift amount for radix4
|
||||
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, Start, WSN);
|
||||
mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN);
|
||||
flop #(`DIVLEN+4) wsflop(clk, WSN, WS);
|
||||
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, Start, WCN);
|
||||
mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
|
||||
flop #(`DIVLEN+4) wcflop(clk, WCN, WC);
|
||||
flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D);
|
||||
flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
|
||||
|
||||
// Quotient Selection logic
|
||||
// Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
|
||||
@ -94,9 +87,8 @@ module srtradix4 (
|
||||
// 0001 = -2
|
||||
qsel4 qsel4(.D, .WS, .WC, .q);
|
||||
|
||||
// Store the expoenent and sign until division is done
|
||||
flopen #(`NE) expflop(clk, Start, calcExp, rExp);
|
||||
flopen #(1) signflop(clk, Start, calcSign, rsign);
|
||||
// Store the expoenent and sign until division is DivDone
|
||||
flopen #(`NE+1) expflop(clk, DivStart, DivCalcExp, DivCalcExpE);
|
||||
|
||||
// Divisor Selection logic
|
||||
// *** radix 4 change to choose -2 to 2
|
||||
@ -120,11 +112,11 @@ module srtradix4 (
|
||||
csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
|
||||
|
||||
//*** change for radix 4
|
||||
otfc4 #(`DIVLEN) otfc4(clk, Start, q, Quot);
|
||||
otfc4 otfc4(.clk, .DivStart, .q, .Quot);
|
||||
|
||||
expcalc expcalc(.XExp, .YExp, .calcExp);
|
||||
expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp);
|
||||
|
||||
signcalc signcalc(.XSign, .YSign, .calcSign);
|
||||
divcounter divcounter(clk, DivStart, DivDone);
|
||||
|
||||
endmodule
|
||||
|
||||
@ -132,91 +124,154 @@ endmodule
|
||||
// Submodules //
|
||||
////////////////
|
||||
|
||||
/////////////
|
||||
// counter //
|
||||
/////////////
|
||||
module divcounter(input logic clk,
|
||||
input logic DivStart,
|
||||
output logic DivDone);
|
||||
|
||||
logic [5:0] count;
|
||||
|
||||
// This block of control logic sequences the divider
|
||||
// through its iterations. You may modify it if you
|
||||
// build a divider which completes in fewer iterations.
|
||||
// You are not responsible for the (trivial) circuit
|
||||
// design of the block.
|
||||
|
||||
always @(posedge clk)
|
||||
begin
|
||||
DivDone = 0;
|
||||
if (count == `DIVLEN/2+1) DivDone <= #1 1;
|
||||
else if (DivDone | DivStart) DivDone <= #1 0;
|
||||
if (DivStart) count <= #1 0;
|
||||
else count <= #1 count+1;
|
||||
end
|
||||
endmodule
|
||||
|
||||
module qsel4 (
|
||||
input logic [`DIVLEN+3:0] D,
|
||||
input logic [`DIVLEN+3:0] WS, WC,
|
||||
output logic [3:0] q
|
||||
);
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
||||
logic [2:0] Dmsbs;
|
||||
assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4];
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3];
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
// W = xxxx.xxx...
|
||||
// Wmsbs = | |
|
||||
|
||||
logic [3:0] QSel4[1023:0];
|
||||
|
||||
initial begin
|
||||
integer d, w, i, w2;
|
||||
for(d=0; d<8; d++)
|
||||
for(w=0; w<128; w++)begin
|
||||
i = d*128+w;
|
||||
w2 = w-128*(w>=64); // convert to two's complement
|
||||
case(d)
|
||||
0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-4) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-13) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
1: if(w2>=14) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-6) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-15) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
2: if(w2>=15) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-6) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-16) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
3: if(w2>=16) QSel4[i] = 4'b1000;
|
||||
else if(w2>=4) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-6) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-18) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
4: if(w2>=18) QSel4[i] = 4'b1000;
|
||||
else if(w2>=6) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-20) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
5: if(w2>=20) QSel4[i] = 4'b1000;
|
||||
else if(w2>=6) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-20) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
6: if(w2>=20) QSel4[i] = 4'b1000;
|
||||
else if(w2>=8) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-22) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
7: if(w2>=24) QSel4[i] = 4'b1000;
|
||||
else if(w2>=8) QSel4[i] = 4'b0100;
|
||||
else if(w2>=-8) QSel4[i] = 4'b0000;
|
||||
else if(w2>=-24) QSel4[i] = 4'b0010;
|
||||
else QSel4[i] = 4'b0001;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
assign q = QSel4[{Dmsbs,Wmsbs}];
|
||||
|
||||
endmodule
|
||||
|
||||
///////////////////
|
||||
// Preprocessing //
|
||||
///////////////////
|
||||
module srtpreproc (
|
||||
input logic [`XLEN-1:0] SrcA, SrcB,
|
||||
input logic [`NF-1:0] XFrac, YFrac,
|
||||
input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
|
||||
input logic [`NF:0] XManE, YManE,
|
||||
input logic W64, // 32-bit ints on XLEN=64
|
||||
input logic Signed, // Interpret integers as signed 2's complement
|
||||
input logic Int, // Choose integer inputs
|
||||
input logic Sqrt, // perform square root, not divide
|
||||
output logic [`DIVLEN-1:0] X, D,
|
||||
output logic [`DIVLEN:0] X,
|
||||
output logic [`DIVLEN-1:0] Dpreproc,
|
||||
output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent
|
||||
output logic intSign // Quotient integer sign
|
||||
);
|
||||
|
||||
logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
|
||||
logic [`XLEN-1:0] PosA, PosB;
|
||||
logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
|
||||
// logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
|
||||
// logic [`XLEN-1:0] PosA, PosB;
|
||||
// logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
|
||||
logic [`DIVLEN:0] PreprocA, PreprocX;
|
||||
logic [`DIVLEN-1:0] PreprocB, PreprocY;
|
||||
|
||||
assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
|
||||
assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
|
||||
// assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
|
||||
// assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
|
||||
|
||||
lzc #(`XLEN) lzcA (PosA, zeroCntA);
|
||||
lzc #(`XLEN) lzcB (PosB, zeroCntB);
|
||||
// lzc #(`XLEN) lzcA (PosA, zeroCntA);
|
||||
// lzc #(`XLEN) lzcB (PosB, zeroCntB);
|
||||
|
||||
assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
|
||||
assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
|
||||
// assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
|
||||
// assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
|
||||
|
||||
assign PreprocA = ExtraA << zeroCntA;
|
||||
assign PreprocB = ExtraB << (zeroCntB + 1);
|
||||
assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}};
|
||||
assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}};
|
||||
// assign PreprocA = ExtraA << zeroCntA;
|
||||
// assign PreprocB = ExtraB << (zeroCntB + 1);
|
||||
assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}};
|
||||
assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}};
|
||||
|
||||
|
||||
assign X = Int ? PreprocA : PreprocX;
|
||||
assign D = Int ? PreprocB : PreprocY;
|
||||
assign intExp = zeroCntB - zeroCntA + 1;
|
||||
assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
|
||||
assign Dpreproc = Int ? PreprocB : PreprocY;
|
||||
// assign intExp = zeroCntB - zeroCntA + 1;
|
||||
// assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
|
||||
endmodule
|
||||
|
||||
/////////////////////////////////
|
||||
// Quotient Selection, Radix 2 //
|
||||
/////////////////////////////////
|
||||
module qsel2 ( // *** eventually just change to 4 bits
|
||||
input logic [`DIVLEN+3:`DIVLEN] ps, pc,
|
||||
output logic qp, qz, qm
|
||||
);
|
||||
|
||||
logic [`DIVLEN+3:`DIVLEN] p, g;
|
||||
logic magnitude, sign, cout;
|
||||
|
||||
// The quotient selection logic is presented for simplicity, not
|
||||
// for efficiency. You can probably optimize your logic to
|
||||
// select the proper divisor with less delay.
|
||||
|
||||
// Quotient equations from EE371 lecture notes 13-20
|
||||
assign p = ps ^ pc;
|
||||
assign g = ps & pc;
|
||||
|
||||
assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
|
||||
assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN]));
|
||||
assign #1 sign = p[`DIVLEN+3] ^ cout;
|
||||
/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) &
|
||||
(ps[52]^pc[52]));
|
||||
assign #1 sign = (ps[55]^pc[55])^
|
||||
(ps[54] & pc[54] | ((ps[54]^pc[54]) &
|
||||
(ps[53]&pc[53] | ((ps[53]^pc[53]) &
|
||||
(ps[52]&pc[52]))))); */
|
||||
|
||||
// Produce quotient = +1, 0, or -1
|
||||
assign #1 qp = magnitude & ~sign;
|
||||
assign #1 qz = ~magnitude;
|
||||
assign #1 qm = magnitude & sign;
|
||||
endmodule
|
||||
|
||||
|
||||
///////////////////////////////////
|
||||
// On-The-Fly Converter, Radix 2 //
|
||||
///////////////////////////////////
|
||||
module otfc4 #(parameter N=65) (
|
||||
module otfc4 (
|
||||
input logic clk,
|
||||
input logic Start,
|
||||
input logic DivStart,
|
||||
input logic [3:0] q,
|
||||
output logic [N-1:0] r
|
||||
output logic [`DIVLEN+2:0] Quot
|
||||
);
|
||||
|
||||
// The on-the-fly converter transfers the quotient
|
||||
@ -224,20 +279,20 @@ module otfc4 #(parameter N=65) (
|
||||
//
|
||||
// This code follows the psuedocode presented in the
|
||||
// floating point chapter of the book. Right now,
|
||||
// it is written for Radix-2 division.
|
||||
// it is written for Radix-4 division.
|
||||
//
|
||||
// QM is Q-1. It allows us to write negative bits
|
||||
// without using a costly CPA.
|
||||
logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux;
|
||||
logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux;
|
||||
// QR and QMR are the shifted versions of Q and QM.
|
||||
// They are treated as [N-1:r] size signals, and
|
||||
// discard the r most significant bits of Q and QM.
|
||||
logic [N:0] QR, QMR;
|
||||
logic [`DIVLEN:0] QR, QMR;
|
||||
// if starting a new divison set Q to 0 and QM to -1
|
||||
mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, Start, QMux);
|
||||
mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, Start, QMMux);
|
||||
flop #(N+3) Qreg(clk, QMux, Q);
|
||||
flop #(N+3) QMreg(clk, QMMux, QM);
|
||||
mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux);
|
||||
mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux);
|
||||
flop #(`DIVLEN+3) Qreg(clk, QMux, Quot);
|
||||
flop #(`DIVLEN+3) QMreg(clk, QMMux, QM);
|
||||
|
||||
// shift Q (quotent) and QM (quotent-1)
|
||||
// if q = 2 Q = {Q, 10} QM = {Q, 01}
|
||||
@ -247,11 +302,9 @@ module otfc4 #(parameter N=65) (
|
||||
// else if q = -2 Q = {QM, 10} QM = {QM, 01}
|
||||
// *** how does the 0 concatination numbers work?
|
||||
|
||||
|
||||
|
||||
always_comb begin
|
||||
QR = Q[N:0];
|
||||
QMR = QM[N:0]; // Shift Q and QM
|
||||
QR = Quot[`DIVLEN:0];
|
||||
QMR = QM[`DIVLEN:0]; // Shift Q and QM
|
||||
if (q[3]) begin // +2
|
||||
QNext = {QR, 2'b10};
|
||||
QMNext = {QR, 2'b01};
|
||||
@ -269,7 +322,8 @@ module otfc4 #(parameter N=65) (
|
||||
QMNext = {QMR, 2'b11};
|
||||
end
|
||||
end
|
||||
assign r = Q[N+2] ? Q[N+1:2] : Q[N:1];
|
||||
// Quot is in the range [.5, 2) so normalize the result if nesissary
|
||||
// assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1];
|
||||
|
||||
endmodule
|
||||
|
||||
@ -287,7 +341,7 @@ module csa #(parameter N=69) (
|
||||
// This block adds in1, in2, in3, and cin to produce
|
||||
// a result out1 / out2 in carry-save redundant form.
|
||||
// cin is just added to the least significant bit and
|
||||
// is required to handle adding a negative divisor.
|
||||
// is Startuired to handle adding a negative divisor.
|
||||
// Fortunately, the carry (out2) is shifted left by one
|
||||
// bit, leaving room in the least significant bit to
|
||||
// insert cin.
|
||||
@ -302,22 +356,11 @@ endmodule
|
||||
// expcalc //
|
||||
//////////////
|
||||
module expcalc(
|
||||
input logic [`NE-1:0] XExp, YExp,
|
||||
output logic [`NE-1:0] calcExp
|
||||
input logic [`NE-1:0] XExpE, YExpE,
|
||||
input logic XZeroE,
|
||||
output logic [`NE:0] DivCalcExp
|
||||
);
|
||||
|
||||
assign calcExp = XExp - YExp + (`NE)'(`BIAS);
|
||||
assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}};
|
||||
|
||||
endmodule
|
||||
|
||||
//////////////
|
||||
// signcalc //
|
||||
//////////////
|
||||
module signcalc(
|
||||
input logic XSign, YSign,
|
||||
output logic calcSign
|
||||
);
|
||||
|
||||
assign calcSign = XSign ^ YSign;
|
||||
|
||||
endmodule
|
@ -2,30 +2,6 @@
|
||||
`include "wally-config.vh"
|
||||
`define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF)
|
||||
|
||||
/////////////
|
||||
// counter //
|
||||
/////////////
|
||||
module counter(input logic clk,
|
||||
input logic req,
|
||||
output logic done);
|
||||
|
||||
logic [5:0] count;
|
||||
|
||||
// This block of control logic sequences the divider
|
||||
// through its iterations. You may modify it if you
|
||||
// build a divider which completes in fewer iterations.
|
||||
// You are not responsible for the (trivial) circuit
|
||||
// design of the block.
|
||||
|
||||
always @(posedge clk)
|
||||
begin
|
||||
if (count == `DIVLEN/2+1) done <= #1 1;
|
||||
else if (done | req) done <= #1 0;
|
||||
if (req) count <= #1 0;
|
||||
else count <= #1 count+1;
|
||||
end
|
||||
endmodule
|
||||
|
||||
///////////
|
||||
// clock //
|
||||
///////////
|
||||
@ -43,7 +19,7 @@ endmodule
|
||||
module testbenchradix4;
|
||||
logic clk;
|
||||
logic req;
|
||||
logic done;
|
||||
logic DivDone;
|
||||
logic [63:0] a, b;
|
||||
logic [51:0] afrac, bfrac;
|
||||
logic [10:0] aExp, bExp;
|
||||
@ -65,22 +41,20 @@ module testbenchradix4;
|
||||
logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a
|
||||
// bit field of an array
|
||||
logic [63:0] correctr, nextr, diffn, diffp;
|
||||
logic [10:0] rExp;
|
||||
logic rsign;
|
||||
logic [10:0] DivExp;
|
||||
logic DivSgn;
|
||||
integer testnum, errors;
|
||||
|
||||
// Divider
|
||||
srtradix4 srtradix4(.clk, .Start(req),
|
||||
.Stall(1'b0), .Flush(1'b0),
|
||||
.XExp(aExp), .YExp(bExp), .rExp,
|
||||
.XSign(asign), .YSign(bsign), .rsign,
|
||||
srtradix4 srtradix4(.clk, .DivStart(req),
|
||||
.XExpE(aExp), .YExpE(bExp), .DivExp,
|
||||
.XSgnE(asign), .YSgnE(bsign), .DivSgn,
|
||||
.XFrac(afrac), .YFrac(bfrac),
|
||||
.SrcA('0), .SrcB('0), .Fmt(2'b00),
|
||||
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0),
|
||||
.Quot, .Rem(), .Flags());
|
||||
.SrcA('0), .SrcB('0),
|
||||
.W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone,
|
||||
.Quot, .Rem());
|
||||
|
||||
// Counter
|
||||
counter counter(clk, req, done);
|
||||
|
||||
|
||||
initial
|
||||
@ -112,14 +86,14 @@ module testbenchradix4;
|
||||
always @(posedge clk)
|
||||
begin
|
||||
r = Quot[`DIVLEN-1:`DIVLEN - 52];
|
||||
if (done) begin
|
||||
if (DivDone) begin
|
||||
req <= 1;
|
||||
diffp = correctr[51:0] - r;
|
||||
diffn = r - correctr[51:0];
|
||||
if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
|
||||
if ((DivSgn !== correctr[63]) | (DivExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
|
||||
begin
|
||||
errors = errors+1;
|
||||
$display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp);
|
||||
$display("result was %h_%h, should be %h %h %h\n", DivExp, r, correctr, diffn, diffp);
|
||||
$display("failed\n");
|
||||
$stop;
|
||||
end
|
||||
|
@ -48,13 +48,14 @@ module testbenchfp;
|
||||
logic XInf, YInf, ZInf; // is the input infinity
|
||||
logic XZero, YZero, ZZero; // is the input zero
|
||||
logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones
|
||||
logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder)
|
||||
logic [`CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder)
|
||||
logic IntZeroE;
|
||||
logic CvtResSgnE;
|
||||
logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
|
||||
logic [`NE:0] CvtCalcExpE; // the calculated expoent
|
||||
logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by
|
||||
logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by
|
||||
logic [`DIVLEN+2:0] Quot;
|
||||
logic CvtResDenormUfE;
|
||||
logic DivStart, DivDone;
|
||||
|
||||
|
||||
// in-between FMA signals
|
||||
@ -68,6 +69,8 @@ module testbenchfp;
|
||||
logic NegSumE;
|
||||
logic ZSgnEffE;
|
||||
logic PSgnE;
|
||||
logic DivSgn;
|
||||
logic [`NE:0] DivCalcExp;
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -205,16 +208,16 @@ module testbenchfp;
|
||||
Fmt = {Fmt, 2'b11};
|
||||
end
|
||||
end
|
||||
// if (TEST === "div" | TEST === "all") begin // if division is being tested
|
||||
// // add the divide tests/op-ctrls/unit/fmt
|
||||
// Tests = {Tests, f128div};
|
||||
// OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
// WriteInt = {WriteInt, 1'b0};
|
||||
// for(int i = 0; i<5; i++) begin
|
||||
// Unit = {Unit, `DIVUNIT};
|
||||
// Fmt = {Fmt, 2'b11};
|
||||
// end
|
||||
// end
|
||||
if (TEST === "div" | TEST === "all") begin // if division is being tested
|
||||
// add the divide tests/op-ctrls/unit/fmt
|
||||
Tests = {Tests, f128div};
|
||||
OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b11};
|
||||
end
|
||||
end
|
||||
// if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested
|
||||
// // add the square-root tests/op-ctrls/unit/fmt
|
||||
// Tests = {Tests, f128sqrt};
|
||||
@ -332,16 +335,16 @@ module testbenchfp;
|
||||
Fmt = {Fmt, 2'b01};
|
||||
end
|
||||
end
|
||||
// if (TEST === "div" | TEST === "all") begin // if division is being tested
|
||||
// // add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
// Tests = {Tests, f64div};
|
||||
// OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
// WriteInt = {WriteInt, 1'b0};
|
||||
// for(int i = 0; i<5; i++) begin
|
||||
// Unit = {Unit, `DIVUNIT};
|
||||
// Fmt = {Fmt, 2'b01};
|
||||
// end
|
||||
// end
|
||||
if (TEST === "div" | TEST === "all") begin // if division is being tested
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f64div};
|
||||
OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b01};
|
||||
end
|
||||
end
|
||||
// if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted
|
||||
// // add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
// Tests = {Tests, f64sqrt};
|
||||
@ -443,16 +446,16 @@ module testbenchfp;
|
||||
Fmt = {Fmt, 2'b00};
|
||||
end
|
||||
end
|
||||
// if (TEST === "div" | TEST === "all") begin // if division is being tested
|
||||
// // add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
// Tests = {Tests, f32div};
|
||||
// OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
// WriteInt = {WriteInt, 1'b0};
|
||||
// for(int i = 0; i<5; i++) begin
|
||||
// Unit = {Unit, `DIVUNIT};
|
||||
// Fmt = {Fmt, 2'b00};
|
||||
// end
|
||||
// end
|
||||
if (TEST === "div" | TEST === "all") begin // if division is being tested
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f32div};
|
||||
OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b00};
|
||||
end
|
||||
end
|
||||
// if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
|
||||
// // add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
// Tests = {Tests, f32sqrt};
|
||||
@ -536,16 +539,16 @@ module testbenchfp;
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
end
|
||||
// if (TEST === "div" | TEST === "all") begin // if division is being tested
|
||||
// // add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
// Tests = {Tests, f16div};
|
||||
// OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
// WriteInt = {WriteInt, 1'b0};
|
||||
// for(int i = 0; i<5; i++) begin
|
||||
// Unit = {Unit, `DIVUNIT};
|
||||
// Fmt = {Fmt, 2'b10};
|
||||
// end
|
||||
// end
|
||||
if (TEST === "div" | TEST === "all") begin // if division is being tested
|
||||
// add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
Tests = {Tests, f16div};
|
||||
OpCtrl = {OpCtrl, `DIV_OPCTRL};
|
||||
WriteInt = {WriteInt, 1'b0};
|
||||
for(int i = 0; i<5; i++) begin
|
||||
Unit = {Unit, `DIVUNIT};
|
||||
Fmt = {Fmt, 2'b10};
|
||||
end
|
||||
end
|
||||
// if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested
|
||||
// // add the correct tests/op-ctrls/unit/fmt to their lists
|
||||
// Tests = {Tests, f16sqrt};
|
||||
@ -611,7 +614,7 @@ module testbenchfp;
|
||||
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
|
||||
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
|
||||
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
|
||||
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
|
||||
.XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart,
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
|
||||
.XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN),
|
||||
.XDenormE(XDenorm), .ZDenormE(ZDenorm),
|
||||
@ -639,8 +642,8 @@ module testbenchfp;
|
||||
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
|
||||
.ProdExpE, .AddendStickyE, .KillProdE);
|
||||
|
||||
postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
|
||||
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
|
||||
postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]),
|
||||
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp),
|
||||
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
|
||||
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
|
||||
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
|
||||
@ -650,21 +653,16 @@ module testbenchfp;
|
||||
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
|
||||
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
|
||||
|
||||
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
|
||||
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
|
||||
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
|
||||
.FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
|
||||
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
|
||||
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
|
||||
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
|
||||
// fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf),
|
||||
// .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
|
||||
// .CvtRes, .CvtFlgE);
|
||||
// *** integrade divide and squareroot
|
||||
// fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmVal[1:0]), .op_type(FOpCtrlQ),
|
||||
// .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
|
||||
// .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
|
||||
// .FDivBusyE, .done(FDivSqrtDoneE), .AS_Res(FDivRes), .Flg(FDivFlg));
|
||||
|
||||
srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero),
|
||||
.XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]),
|
||||
.DivDone, .Quot, .Rem());
|
||||
|
||||
assign CmpFlg[3:0] = 0;
|
||||
|
||||
// produce clock
|
||||
@ -817,7 +815,7 @@ end
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// check if the non-fma test is correct
|
||||
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
|
||||
if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin
|
||||
errors += 1;
|
||||
$display("There is an error in %s", Tests[TestNum]);
|
||||
$display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
|
||||
@ -840,8 +838,7 @@ end
|
||||
$stop;
|
||||
end
|
||||
|
||||
|
||||
VectorNum += 1; // increment the vector
|
||||
if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector
|
||||
|
||||
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
|
||||
|
||||
@ -895,15 +892,17 @@ module readvectors (
|
||||
output logic XDenormE, ZDenormE, // is XYZ denormalized
|
||||
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
|
||||
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
|
||||
output logic XExpMaxE,
|
||||
output logic XExpMaxE,
|
||||
output logic DivStart,
|
||||
output logic [`FLEN-1:0] X, Y, Z
|
||||
);
|
||||
|
||||
// apply test vectors on rising edge of clk
|
||||
// Format of vectors Inputs(1/2/3)_AnsFlg
|
||||
always @(posedge clk) begin
|
||||
always @(VectorNum) begin
|
||||
#1;
|
||||
AnsFlg = TestVector[4:0];
|
||||
DivStart = 1'b0;
|
||||
case (Unit)
|
||||
`FMAUNIT:
|
||||
case (Fmt)
|
||||
@ -972,21 +971,33 @@ module readvectors (
|
||||
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
|
||||
Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
|
||||
Ans = TestVector[8+(`Q_LEN-1):8];
|
||||
if (~clk) #5;
|
||||
DivStart = 1'b1; #10 // one clk cycle
|
||||
DivStart = 1'b0;
|
||||
end
|
||||
2'b01: begin // double
|
||||
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
|
||||
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
|
||||
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
|
||||
if (~clk) #5;
|
||||
DivStart = 1'b1; #10
|
||||
DivStart = 1'b0;
|
||||
end
|
||||
2'b00: begin // single
|
||||
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
|
||||
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]};
|
||||
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
|
||||
if (~clk) #5;
|
||||
DivStart = 1'b1; #10
|
||||
DivStart = 1'b0;
|
||||
end
|
||||
2'b10: begin // half
|
||||
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
|
||||
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
|
||||
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
|
||||
if (~clk) #5;
|
||||
DivStart = 1'b1; #10
|
||||
DivStart = 1'b0;
|
||||
end
|
||||
endcase
|
||||
`CMPUNIT:
|
||||
|
@ -1,473 +0,0 @@
|
||||
///////////////////////////////////////////
|
||||
// testbench.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu 9 January 2021
|
||||
// Modified:
|
||||
//
|
||||
// Purpose: Wally Testbench and helper modules
|
||||
// Applies test programs from the riscv-arch-test and Imperas suites
|
||||
//
|
||||
// A component of the Wally configurable RISC-V project.
|
||||
//
|
||||
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
|
||||
//
|
||||
// MIT LICENSE
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
||||
// software and associated documentation files (the "Software"), to deal in the Software
|
||||
// without restriction, including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
|
||||
// to whom the Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all copies or
|
||||
// substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
// OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "wally-config.vh"
|
||||
`include "tests.vh"
|
||||
|
||||
module testbench;
|
||||
parameter TESTSPERIPH = 0; // set to 0 for regression
|
||||
parameter TESTSPRIV = 0; // set to 0 for regression
|
||||
parameter DEBUG=0;
|
||||
parameter TEST="none";
|
||||
|
||||
logic clk;
|
||||
logic reset_ext, reset;
|
||||
|
||||
parameter SIGNATURESIZE = 5000000;
|
||||
|
||||
int test, i, errors, totalerrors;
|
||||
logic [31:0] sig32[0:SIGNATURESIZE];
|
||||
logic [`XLEN-1:0] signature[0:SIGNATURESIZE];
|
||||
logic [`XLEN-1:0] testadr;
|
||||
string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName;
|
||||
logic [31:0] InstrW;
|
||||
|
||||
string tests[];
|
||||
logic [3:0] dummy;
|
||||
|
||||
string ProgramAddrMapFile, ProgramLabelMapFile;
|
||||
logic [`AHBW-1:0] HRDATAEXT;
|
||||
logic HREADYEXT, HRESPEXT;
|
||||
logic [31:0] HADDR;
|
||||
logic [`AHBW-1:0] HWDATA;
|
||||
logic HWRITE;
|
||||
logic [2:0] HSIZE;
|
||||
logic [2:0] HBURST;
|
||||
logic [3:0] HPROT;
|
||||
logic [1:0] HTRANS;
|
||||
logic HMASTLOCK;
|
||||
logic HCLK, HRESETn;
|
||||
logic [`XLEN-1:0] PCW;
|
||||
|
||||
logic DCacheFlushDone, DCacheFlushStart;
|
||||
|
||||
flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW);
|
||||
flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW);
|
||||
|
||||
// check assertions for a legal configuration
|
||||
riscvassertions riscvassertions();
|
||||
|
||||
// pick tests based on modes supported
|
||||
initial begin
|
||||
$display("TEST is %s", TEST);
|
||||
//tests = '{};
|
||||
if (`XLEN == 64) begin // RV64
|
||||
case (TEST)
|
||||
"arch64i": tests = arch64i;
|
||||
"arch64priv": tests = arch64priv;
|
||||
"arch64c": if (`C_SUPPORTED)
|
||||
if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv};
|
||||
else tests = {arch64c};
|
||||
"arch64m": if (`M_SUPPORTED) tests = arch64m;
|
||||
"arch64d": if (`D_SUPPORTED) tests = arch64d;
|
||||
"imperas64i": tests = imperas64i;
|
||||
"imperas64p": tests = imperas64p;
|
||||
// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu;
|
||||
"imperas64f": if (`F_SUPPORTED) tests = imperas64f;
|
||||
"imperas64d": if (`D_SUPPORTED) tests = imperas64d;
|
||||
"imperas64m": if (`M_SUPPORTED) tests = imperas64m;
|
||||
"imperas64a": if (`A_SUPPORTED) tests = imperas64a;
|
||||
"imperas64c": if (`C_SUPPORTED) tests = imperas64c;
|
||||
else tests = imperas64iNOc;
|
||||
"testsBP64": tests = testsBP64;
|
||||
"wally64i": tests = wally64i; // *** redo
|
||||
"wally64priv": tests = wally64priv;// *** redo
|
||||
"imperas64periph": tests = imperas64periph;
|
||||
"coremark": tests = coremark;
|
||||
endcase
|
||||
end else begin // RV32
|
||||
case (TEST)
|
||||
"arch32i": tests = arch32i;
|
||||
"arch32priv": tests = arch32priv;
|
||||
"arch32c": if (`C_SUPPORTED)
|
||||
if (`ZICSR_SUPPORTED) tests = {arch32c, arch32cpriv};
|
||||
else tests = {arch32c};
|
||||
"arch32m": if (`M_SUPPORTED) tests = arch32m;
|
||||
"arch32f": if (`F_SUPPORTED) tests = arch32f;
|
||||
"imperas32i": tests = imperas32i;
|
||||
"imperas32p": tests = imperas32p;
|
||||
// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu;
|
||||
"imperas32f": if (`F_SUPPORTED) tests = imperas32f;
|
||||
"imperas32m": if (`M_SUPPORTED) tests = imperas32m;
|
||||
"imperas32a": if (`A_SUPPORTED) tests = imperas32a;
|
||||
"imperas32c": if (`C_SUPPORTED) tests = imperas32c;
|
||||
else tests = imperas32iNOc;
|
||||
"wally32i": tests = wally32i; // *** redo
|
||||
"wally32e": tests = wally32e;
|
||||
"wally32priv": tests = wally32priv; // *** redo
|
||||
"imperas32periph": tests = imperas32periph;
|
||||
endcase
|
||||
end
|
||||
if (tests.size() == 0) begin
|
||||
$display("TEST %s not supported in this configuration", TEST);
|
||||
$stop;
|
||||
end
|
||||
end
|
||||
|
||||
string signame, memfilename, pathname;
|
||||
|
||||
logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn;
|
||||
logic UARTSin, UARTSout;
|
||||
|
||||
logic SDCCLK;
|
||||
logic SDCCmdIn;
|
||||
logic SDCCmdOut;
|
||||
logic SDCCmdOE;
|
||||
logic [3:0] SDCDatIn;
|
||||
|
||||
logic HREADY;
|
||||
logic HSELEXT;
|
||||
|
||||
|
||||
// instantiate device to be tested
|
||||
assign GPIOPinsIn = 0;
|
||||
assign UARTSin = 1;
|
||||
assign HREADYEXT = 1;
|
||||
assign HRESPEXT = 0;
|
||||
assign HRDATAEXT = 0;
|
||||
|
||||
wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT,
|
||||
.HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT,
|
||||
.HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn,
|
||||
.UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK);
|
||||
|
||||
// Track names of instructions
|
||||
instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE,
|
||||
dut.core.ifu.FinalInstrRawF[31:0],
|
||||
dut.core.ifu.InstrD, dut.core.ifu.InstrE,
|
||||
dut.core.ifu.InstrM, InstrW,
|
||||
InstrFName, InstrDName, InstrEName, InstrMName, InstrWName);
|
||||
|
||||
// initialize tests
|
||||
localparam integer MemStartAddr = `RAM_BASE>>(1+`XLEN/32);
|
||||
localparam integer MemEndAddr = (`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32);
|
||||
|
||||
initial
|
||||
begin
|
||||
test = 1;
|
||||
totalerrors = 0;
|
||||
testadr = 0;
|
||||
// fill memory with defined values to reduce Xs in simulation
|
||||
// Quick note the memory will need to be initialized. The C library does not
|
||||
// guarantee the initialized reads. For example a strcmp can read 6 byte
|
||||
// strings, but uses a load double to read them in. If the last 2 bytes are
|
||||
// not initialized the compare results in an 'x' which propagates through
|
||||
// the design.
|
||||
if (TEST == "coremark")
|
||||
for (i=MemStartAddr; i<MemEndAddr; i = i+1)
|
||||
dut.uncore.ram.ram.RAM[i] = 64'h0;
|
||||
|
||||
// read test vectors into memory
|
||||
pathname = tvpaths[tests[0].atoi()];
|
||||
/* if (tests[0] == `IMPERASTEST)
|
||||
pathname = tvpaths[0];
|
||||
else pathname = tvpaths[1]; */
|
||||
memfilename = {pathname, tests[test], ".elf.memfile"};
|
||||
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.ram.RAM);
|
||||
else $readmemh(memfilename, dut.uncore.ram.RAM);
|
||||
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.ram.RAM);
|
||||
|
||||
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
|
||||
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
|
||||
$display("Read memfile %s", memfilename);
|
||||
reset_ext = 1; # 42; reset_ext = 0;
|
||||
end
|
||||
|
||||
// generate clock to sequence tests
|
||||
always
|
||||
begin
|
||||
clk = 1; # 5; clk = 0; # 5;
|
||||
// if ($time % 100000 == 0) $display("Time is %0t", $time);
|
||||
end
|
||||
|
||||
// check results
|
||||
always @(negedge clk)
|
||||
begin
|
||||
if (TEST == "coremark")
|
||||
if (dut.core.priv.priv.ecallM) begin
|
||||
$display("Benchmark: coremark is done.");
|
||||
$stop;
|
||||
end
|
||||
if (DCacheFlushDone) begin
|
||||
|
||||
#600; // give time for instructions in pipeline to finish
|
||||
// clear signature to prevent contamination from previous tests
|
||||
for(i=0; i<SIGNATURESIZE; i=i+1) begin
|
||||
sig32[i] = 'bx;
|
||||
end
|
||||
|
||||
// read signature, reformat in 64 bits if necessary
|
||||
signame = {pathname, tests[test], ".signature.output"};
|
||||
$readmemh(signame, sig32);
|
||||
i = 0;
|
||||
while (i < SIGNATURESIZE) begin
|
||||
if (`XLEN == 32) begin
|
||||
signature[i] = sig32[i];
|
||||
i = i+1;
|
||||
end else begin
|
||||
signature[i/2] = {sig32[i+1], sig32[i]};
|
||||
i = i + 2;
|
||||
end
|
||||
if (i >= 4 & sig32[i-4] === 'bx) begin
|
||||
if (i == 4) begin
|
||||
i = SIGNATURESIZE+1; // flag empty file
|
||||
$display(" Error: empty test file");
|
||||
end else i = SIGNATURESIZE; // skip over the rest of the x's for efficiency
|
||||
end
|
||||
end
|
||||
|
||||
// Check errors
|
||||
errors = (i == SIGNATURESIZE+1); // error if file is empty
|
||||
i = 0;
|
||||
testadr = (`RAM_BASE+tests[test+1].atohex())/(`XLEN/8);
|
||||
/* verilator lint_off INFINITELOOP */
|
||||
while (signature[i] !== 'bx) begin
|
||||
logic [`XLEN-1:0] sig;
|
||||
if (`DMEM == `MEM_TIM) sig = dut.core.lsu.dtim.ram.RAM[testadr+i];
|
||||
else sig = dut.uncore.ram.RAM[testadr+i];
|
||||
// $display("signature[%h] = %h sig = %h", i, signature[i], sig);
|
||||
if (signature[i] !== sig &
|
||||
//if (signature[i] !== dut.core.lsu.dtim.ram.RAM[testadr+i] &
|
||||
(signature[i] !== DCacheFlushFSM.ShadowRAM[testadr+i])) begin // ***i+1?
|
||||
if ((signature[i] !== '0 | signature[i+4] !== 'x)) begin
|
||||
// if (signature[i+4] !== 'bx | (signature[i] !== 32'hFFFFFFFF & signature[i] !== 32'h00000000)) begin
|
||||
// report errors unless they are garbage at the end of the sim
|
||||
// kind of hacky test for garbage right now
|
||||
$display("sig4 = %h ne %b", signature[i+4], signature[i+4] !== 'bx);
|
||||
errors = errors+1;
|
||||
$display(" Error on test %s result %d: adr = %h sim (D$) %h sim (DMEM) = %h, signature = %h",
|
||||
tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], sig, signature[i]);
|
||||
// tests[test], i, (testadr+i)*(`XLEN/8), DCacheFlushFSM.ShadowRAM[testadr+i], dut.core.lsu.dtim.ram.RAM[testadr+i], signature[i]);
|
||||
$stop;//***debug
|
||||
end
|
||||
end
|
||||
i = i + 1;
|
||||
end
|
||||
/* verilator lint_on INFINITELOOP */
|
||||
if (errors == 0) begin
|
||||
$display("%s succeeded. Brilliant!!!", tests[test]);
|
||||
end
|
||||
else begin
|
||||
$display("%s failed with %d errors. :(", tests[test], errors);
|
||||
totalerrors = totalerrors+1;
|
||||
end
|
||||
test = test + 2;
|
||||
if (test == tests.size()) begin
|
||||
if (totalerrors == 0) $display("SUCCESS! All tests ran without failures.");
|
||||
else $display("FAIL: %d test programs had errors", totalerrors);
|
||||
$stop;
|
||||
end
|
||||
else begin
|
||||
//pathname = tvpaths[tests[0]];
|
||||
memfilename = {pathname, tests[test], ".elf.memfile"};
|
||||
//$readmemh(memfilename, dut.uncore.ram.ram.RAM);
|
||||
if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.ram.RAM);
|
||||
else $readmemh(memfilename, dut.uncore.ram.RAM);
|
||||
if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.ram.RAM);
|
||||
|
||||
ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
|
||||
ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
|
||||
$display("Read memfile %s", memfilename);
|
||||
reset_ext = 1; # 47; //reset_ext = 0;
|
||||
end
|
||||
end
|
||||
end // always @ (negedge clk)
|
||||
|
||||
// track the current function or global label
|
||||
if (DEBUG == 1) begin : FunctionName
|
||||
FunctionName FunctionName(.reset(reset),
|
||||
.clk(clk),
|
||||
.ProgramAddrMapFile(ProgramAddrMapFile),
|
||||
.ProgramLabelMapFile(ProgramLabelMapFile));
|
||||
end
|
||||
|
||||
// Termination condition
|
||||
// terminate on a specific ECALL after li x3,1 for old Imperas tests, *** remove this when old imperas tests are removed
|
||||
// or sw gp,-56(t0) for new Imperas tests
|
||||
// or sd gp, -56(t0)
|
||||
// or on a jump to self infinite loop (6f) for RISC-V Arch tests
|
||||
logic ecf; // remove this once we don't rely on old Imperas tests with Ecalls
|
||||
if (`ZICSR_SUPPORTED) assign ecf = dut.core.priv.priv.EcallFaultM;
|
||||
else assign ecf = 0;
|
||||
assign DCacheFlushStart = ecf &
|
||||
(dut.core.ieu.dp.regf.rf[3] == 1 |
|
||||
(dut.core.ieu.dp.regf.we3 &
|
||||
dut.core.ieu.dp.regf.a3 == 3 &
|
||||
dut.core.ieu.dp.regf.wd3 == 1)) |
|
||||
(dut.core.ifu.InstrM == 32'h6f | dut.core.ifu.InstrM == 32'hfc32a423 | dut.core.ifu.InstrM == 32'hfc32a823) & dut.core.ieu.c.InstrValidM;
|
||||
|
||||
DCacheFlushFSM DCacheFlushFSM(.clk(clk),
|
||||
.reset(reset),
|
||||
.start(DCacheFlushStart),
|
||||
.done(DCacheFlushDone));
|
||||
|
||||
// initialize the branch predictor
|
||||
if (`BPRED_ENABLED == 1)
|
||||
initial begin
|
||||
$readmemb(`TWO_BIT_PRELOAD, dut.core.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem);
|
||||
$readmemb(`BTB_PRELOAD, dut.core.ifu.bpred.bpred.TargetPredictor.memory.mem);
|
||||
end
|
||||
endmodule
|
||||
|
||||
module riscvassertions;
|
||||
initial begin
|
||||
assert (`PMP_ENTRIES == 0 | `PMP_ENTRIES==16 | `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64");
|
||||
assert (`S_SUPPORTED | `VIRTMEM_SUPPORTED == 0) else $error("Virtual memory requires S mode support");
|
||||
assert (`DIV_BITSPERCYCLE == 1 | `DIV_BITSPERCYCLE==2 | `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4");
|
||||
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
|
||||
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
|
||||
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
|
||||
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
|
||||
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
|
||||
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");
|
||||
assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (`IMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
|
||||
assert (`ICACHE_LINELENINBITS >= 32 | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled");
|
||||
assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size");
|
||||
assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2");
|
||||
assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (`DMEM != `MEM_CACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2");
|
||||
assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2");
|
||||
assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (`IMEM != `MEM_CACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2");
|
||||
assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2");
|
||||
assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2");
|
||||
assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF");
|
||||
assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported.");
|
||||
assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported");
|
||||
assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported");
|
||||
// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM");
|
||||
assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache");
|
||||
assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache");
|
||||
end
|
||||
endmodule
|
||||
|
||||
|
||||
/* verilator lint_on STMTDLY */
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
module DCacheFlushFSM
|
||||
(input logic clk,
|
||||
input logic reset,
|
||||
input logic start,
|
||||
output logic done);
|
||||
|
||||
genvar adr;
|
||||
|
||||
logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)];
|
||||
|
||||
if(`DMEM == `MEM_CACHE) begin
|
||||
localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES;
|
||||
localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS;
|
||||
localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN;
|
||||
localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN;
|
||||
localparam integer lognumlines = $clog2(numlines);
|
||||
localparam integer loglinebytelen = $clog2(linebytelen);
|
||||
localparam integer lognumways = $clog2(numways);
|
||||
localparam integer tagstart = lognumlines + loglinebytelen;
|
||||
|
||||
|
||||
|
||||
genvar index, way, cacheWord;
|
||||
logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0];
|
||||
logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0];
|
||||
logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0];
|
||||
logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0];
|
||||
logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0];
|
||||
for(index = 0; index < numlines; index++) begin
|
||||
for(way = 0; way < numways; way++) begin
|
||||
for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin
|
||||
copyShadow #(.tagstart(tagstart),
|
||||
.loglinebytelen(loglinebytelen))
|
||||
copyShadow(.clk,
|
||||
.start,
|
||||
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]),
|
||||
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
|
||||
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
|
||||
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]),
|
||||
.index(index),
|
||||
.cacheWord(cacheWord),
|
||||
.CacheData(CacheData[way][index][cacheWord]),
|
||||
.CacheAdr(CacheAdr[way][index][cacheWord]),
|
||||
.CacheTag(CacheTag[way][index][cacheWord]),
|
||||
.CacheValid(CacheValid[way][index][cacheWord]),
|
||||
.CacheDirty(CacheDirty[way][index][cacheWord]));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
integer i, j, k;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (start) begin #1
|
||||
#1
|
||||
for(i = 0; i < numlines; i++) begin
|
||||
for(j = 0; j < numways; j++) begin
|
||||
for(k = 0; k < numwords; k++) begin
|
||||
if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin
|
||||
ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
flop #(1) doneReg(.clk, .d(start), .q(done));
|
||||
endmodule
|
||||
|
||||
module copyShadow
|
||||
#(parameter tagstart, loglinebytelen)
|
||||
(input logic clk,
|
||||
input logic start,
|
||||
input logic [`PA_BITS-1:tagstart] tag,
|
||||
input logic valid, dirty,
|
||||
input logic [`XLEN-1:0] data,
|
||||
input logic [32-1:0] index,
|
||||
input logic [32-1:0] cacheWord,
|
||||
output logic [`XLEN-1:0] CacheData,
|
||||
output logic [`PA_BITS-1:0] CacheAdr,
|
||||
output logic [`XLEN-1:0] CacheTag,
|
||||
output logic CacheValid,
|
||||
output logic CacheDirty);
|
||||
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if(start) begin
|
||||
CacheTag = tag;
|
||||
CacheValid = valid;
|
||||
CacheDirty = dirty;
|
||||
CacheData = data;
|
||||
CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8));
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
@ -15,6 +15,7 @@ export MAXCORES ?= 4
|
||||
# MAXOPT turns on flattening, boundary optimization, and retiming
|
||||
# The output netlist is hard to interpret, but significantly better PPA
|
||||
export MAXOPT ?= 0
|
||||
export DRIVE ?= FLOP
|
||||
|
||||
time := $(shell date +%F-%H-%M)
|
||||
hash := $(shell git rev-parse --short HEAD)
|
||||
|
@ -1,7 +1,23 @@
|
||||
00000000 # test reset to zero
|
||||
00000000
|
||||
00000000
|
||||
A5A5A5A5
|
||||
A5A5A5A5 # test output pins
|
||||
5A5AFFFF
|
||||
00000000
|
||||
00000000 # test input enables
|
||||
5A5A0000
|
||||
A55A0000
|
||||
A55A0000 # test XOR
|
||||
A55A0000 # Test interrupt pending bits: high_ip
|
||||
5AA5FFFF # low_ip
|
||||
00000000 # rise_ip
|
||||
00000000 # fall_ip
|
||||
A4AA0000 # input_val
|
||||
A5FA0000 # high_ip
|
||||
5BF5FFFF # low_ip
|
||||
00A00000 # rise_ip
|
||||
01500000 # fall_ip
|
||||
00000000 # MEIP
|
||||
00000000 # Test interrupts can be enabled without being triggered: MIP = 0
|
||||
00000000 # MIP = 0
|
||||
00000000 # MIP = 0
|
||||
00000000 # MIP = 0
|
||||
00000800 # Test interrupts can be enabled and triggered: MEIP set
|
||||
00000000 # MEIP = 0
|
||||
|
@ -827,6 +827,28 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a
|
||||
addi a6, a6, 4
|
||||
.endm
|
||||
|
||||
// Place this macro in peripheral tests to setup all the PLIC registers to generate external interrupts
|
||||
.macro SETUP_PLIC
|
||||
# Setup PLIC with a series of register writes
|
||||
|
||||
.equ PLIC_INTPRI_GPIO, 0x0C00000C # GPIO is interrupt 3
|
||||
.equ PLIC_INTPRI_UART, 0x0C000028 # UART is interrupt 10
|
||||
.equ PLIC_INTPENDING0, 0x0C001000 # intPending0 register
|
||||
.equ PLIC_INTEN00, 0x0C002000 # interrupt enables for context 0 (machine mode) sources 31:1
|
||||
.equ PLIC_INTEN10, 0x0C002080 # interrupt enables for context 1 (supervisor mode) sources 31:1
|
||||
.equ PLIC_THRESH0, 0x0C200000 # Priority threshold for context 0 (machine mode)
|
||||
.equ PLIC_CLAIM0, 0x0C200004 # Claim/Complete register for context 0
|
||||
.equ PLIC_THRESH1, 0x0C201000 # Priority threshold for context 1 (supervisor mode)
|
||||
.equ PLIC_CLAIM1, 0x0C201004 # Claim/Complete register for context 1
|
||||
|
||||
.4byte PLIC_THRESH0, 0, write32_test # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts
|
||||
.4byte PLIC_THRESH1, 7, write32_test # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts
|
||||
.4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority
|
||||
.4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority
|
||||
.4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode
|
||||
.4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode
|
||||
.endm
|
||||
|
||||
.macro END_TESTS
|
||||
// invokes one final ecall to return to machine mode then terminates this program, so the output is
|
||||
// 0x8: termination called from U mode
|
||||
@ -937,6 +959,20 @@ read08_test:
|
||||
addi a6, a6, 4
|
||||
j test_loop // go to next test case
|
||||
|
||||
readmip_test: // read the MIP into the signature
|
||||
csrr t2, mip
|
||||
sw t2, 0(t1)
|
||||
addi t1, t1, 4
|
||||
addi a6, a6, 4
|
||||
j test_loop // go to next test case
|
||||
|
||||
readsip_test: // read the MIP into the signature
|
||||
csrr t2, sip
|
||||
sw t2, 0(t1)
|
||||
addi t1, t1, 4
|
||||
addi a6, a6, 4
|
||||
j test_loop // go to next test case
|
||||
|
||||
goto_s_mode:
|
||||
// return to address in t3,
|
||||
li a0, 3 // Trap handler behavior (go to supervisor mode)
|
||||
|
@ -72,6 +72,7 @@ test_cases:
|
||||
|
||||
.4byte input_val, 0x00000000, read32_test # input_val reset to zero
|
||||
.4byte input_en, 0x00000000, read32_test # input_en reset to zero
|
||||
# *** add more
|
||||
|
||||
# =========== Test output and input pins ===========
|
||||
|
||||
@ -86,14 +87,49 @@ test_cases:
|
||||
.4byte input_en, 0x00000000, write32_test # disable all input pins
|
||||
.4byte input_val, 0x00000000, read32_test # read 0 since input pins are disabled
|
||||
.4byte input_en, 0xFFFF0000, write32_test # enable a few input pins
|
||||
.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above.
|
||||
.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above.
|
||||
|
||||
# =========== Test output enables(?) ===========
|
||||
|
||||
.4byte output_en, 0xFFFFFFFF, write32_test # undo changes made to output enable
|
||||
|
||||
# =========== Test XOR functionality ===========
|
||||
.4byte out_xor, 0xFF00FF00, write32_test # invert certain pin values
|
||||
.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working
|
||||
.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working
|
||||
|
||||
# =========== Test Interrupt Pending bits ===========
|
||||
|
||||
SETUP_PLIC
|
||||
|
||||
.4byte low_ip, 0xFFFFFFFF, write32_test # clear pending low interrupts
|
||||
.4byte high_ip, 0xFFFFFFFF, write32_test # clear pending high interrupts
|
||||
.4byte rise_ip, 0xFFFFFFFF, write32_test # clear pending rise interrupts
|
||||
.4byte fall_ip, 0xFFFFFFFF, write32_test # clear pending fall interrupts
|
||||
.4byte high_ip, 0xA55A0000, read32_test # check pending high interrupts
|
||||
.4byte low_ip, 0x5AA5FFFF, read32_test # check pending low interrupts
|
||||
.4byte rise_ip, 0x00000000, read32_test # check pending rise interrupts
|
||||
.4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts
|
||||
.4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts
|
||||
.4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output
|
||||
.4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?)
|
||||
.4byte low_ip, 0x5BF5FFFF, read32_test # low interrupt pending should be opposite high for enabled pins
|
||||
.4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising)
|
||||
.4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling)
|
||||
.4byte 0x0, 0x00000000, readmip_test # Check no external interrupt has been generated
|
||||
|
||||
# =========== Test interrupts can be enabled without being triggered ===========
|
||||
|
||||
.4byte high_ie, 0x00010000, write32_test # enable high interrupt on bit 16, no pending interrupt
|
||||
.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending
|
||||
.4byte low_ie, 0x00020000, write32_test # enable low interrupt on bit 17, no pending interrupt
|
||||
.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending
|
||||
.4byte rise_ie, 0x00010000, write32_test # enable rise interrupt on bit 16, no pending interrupt
|
||||
.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending
|
||||
.4byte fall_ie, 0x00010000, write32_test # enable fall interrupt on bit 16, no pending interrupt
|
||||
.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending
|
||||
|
||||
# =========== Test interrupts can be enabled and triggered
|
||||
|
||||
.4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending
|
||||
.4byte 0x0, 0x00000800, readmip_test # MEIP should be raised
|
||||
.4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending
|
||||
.4byte 0x0, 0x00000000, readmip_test # MEIP should be released
|
||||
|
||||
.4byte 0x0, 0x0, terminate_test # terminate tests
|
||||
|
@ -254,12 +254,12 @@ FFFFEE00
|
||||
FFFFEE00
|
||||
00000000
|
||||
00000000
|
||||
02BEEF10
|
||||
02BEEF10 # Something here is failing
|
||||
0000000B
|
||||
80000000
|
||||
00000003
|
||||
000000FF
|
||||
FFFFFFFF
|
||||
00000000
|
||||
000000FF
|
||||
00000000
|
||||
00000000
|
||||
@ -270,20 +270,20 @@ FFFFFFFF
|
||||
FFFFFF00
|
||||
00000000
|
||||
00000000
|
||||
02BEEF11
|
||||
02BEEF11 # this might be wrong
|
||||
0000000B
|
||||
80000000
|
||||
00000003
|
||||
000000CC
|
||||
CCCCCCCC
|
||||
00000000
|
||||
00000000
|
||||
00000033
|
||||
00000000
|
||||
000000FF
|
||||
000000CC
|
||||
FFFFFF33
|
||||
FFFFFF33
|
||||
00000003
|
||||
00000033 # input
|
||||
00000000 # output
|
||||
00000000 # rise ip
|
||||
00000000 # serviced rise ip
|
||||
000000CC # fall ip
|
||||
00000000
|
||||
000000FF # high ip
|
||||
00000033 # why is this 0x33?
|
||||
FFFFFFCC # low ip
|
||||
FFFFFFCC # serviced low ip
|
||||
00000000
|
||||
00000000
|
||||
03BEEF12
|
||||
@ -454,9 +454,9 @@ FFFFFF33
|
||||
00080000
|
||||
00080000
|
||||
00000000
|
||||
00000000 # is it this one that's failing?
|
||||
00000000
|
||||
00000000
|
||||
00080000
|
||||
00080000 # failing
|
||||
00080000
|
||||
FFFFFFFF
|
||||
FFF7FFFF
|
||||
@ -478,7 +478,7 @@ FFFFFFFF
|
||||
FFFFFFFE
|
||||
00000000
|
||||
00000000
|
||||
04BEEF1E
|
||||
04BEEF1E # this might also be wrong
|
||||
00000009
|
||||
80000000
|
||||
0000000A
|
||||
|
@ -271,7 +271,7 @@ main_code: #####
|
||||
sw t1, 0x04(t0)
|
||||
# raise all output_en
|
||||
sw t1, 0x08(t0)
|
||||
# raise all input_en
|
||||
# raise all rise_en
|
||||
sw t1, 0x18(t0)
|
||||
# ========== Execute Test ==========
|
||||
# set MEIE
|
||||
@ -616,6 +616,9 @@ Intr02BEEF11:
|
||||
sw t1, 0x08(t0)
|
||||
# set initial output state
|
||||
sw x0, 0x0C(t0)
|
||||
# clear XOR
|
||||
li t1, 0x00000000
|
||||
sw t1, 0x40(t0)
|
||||
# clear all pending interrupts
|
||||
li t1, 0xFFFFFFFF
|
||||
sw t1, 0x1C(t0)
|
||||
@ -843,7 +846,7 @@ Intr03BEEF1A:
|
||||
sw t1, 0x04(t0)
|
||||
# raise all output_en
|
||||
sw t1, 0x08(t0)
|
||||
# raise all input_en
|
||||
# raise all rise_en
|
||||
sw t1, 0x18(t0)
|
||||
# ========== Execute Test ==========
|
||||
# set MEIE and SEIE
|
||||
|
Loading…
Reference in New Issue
Block a user