diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh index dc6c9bb00..f11b71c0a 100644 --- a/pipelined/config/buildroot/wally-config.vh +++ b/pipelined/config/buildroot/wally-config.vh @@ -124,8 +124,6 @@ `define PLIC_NUM_SRC 53 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/buildroot/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/buildroot/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh index 15b2e08e7..7d083f3b5 100644 --- a/pipelined/config/rv32e/wally-config.vh +++ b/pipelined/config/rv32e/wally-config.vh @@ -130,8 +130,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt" `define BPRED_ENABLED 0 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh index 3522fd1e6..70124d551 100644 --- a/pipelined/config/rv32gc/wally-config.vh +++ b/pipelined/config/rv32gc/wally-config.vh @@ -128,8 +128,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32i/wally-config.vh b/pipelined/config/rv32i/wally-config.vh index 80d167a3d..d44072d6a 100644 --- a/pipelined/config/rv32i/wally-config.vh +++ b/pipelined/config/rv32i/wally-config.vh @@ -130,8 +130,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32i/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32i/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index 13b2eb747..e42fd3100 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -128,8 +128,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh index 82f8446bb..3bc745eb1 100644 --- a/pipelined/config/rv64BP/wally-config.vh +++ b/pipelined/config/rv64BP/wally-config.vh @@ -130,8 +130,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt" `define BPRED_ENABLED 1 //`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE `define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index bcc791338..cc8d1b2b8 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,7 +32,7 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 +`define XLEN 32 // IEEE 754 compliance `define IEEE754 0 @@ -132,8 +132,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/shared/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64fpquad/wally-config.vh b/pipelined/config/rv64fpquad/wally-config.vh index 08e8006ce..0dee000e2 100644 --- a/pipelined/config/rv64fpquad/wally-config.vh +++ b/pipelined/config/rv64fpquad/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/shared/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index 042364aca..9afa1a679 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/shared/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64i/wally-config.vh b/pipelined/config/rv64i/wally-config.vh index 402c3b364..67ca51a7a 100644 --- a/pipelined/config/rv64i/wally-config.vh +++ b/pipelined/config/rv64i/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv64i/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64i/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh index 491759359..fca1f2609 100644 --- a/pipelined/config/rv64ic/wally-config.vh +++ b/pipelined/config/rv64ic/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index afe822f46..3c2699da0 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -94,11 +94,12 @@ `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS) // largest length in IEU/FPU -`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF) +`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF) `define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN) -`define LOGLGLEN $unsigned($clog2(`LGLEN+1)) -`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9)) -`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6)) +`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) +`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9)) +`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6)) +`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF) // Disable spurious Verilator warnings diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile index 0a9e7d993..5ad721722 100644 --- a/pipelined/regression/Makefile +++ b/pipelined/regression/Makefile @@ -1,30 +1,9 @@ -make allclean: - make clean - make all - -make clean: - make clean -C ../../tests/riscof - make clean -C ../../tests/wally-riscv-arch-test -# make allclean -C ../../tests/imperas-riscv-tests - -make all: +all: archtests wallytests memfiles # *** Build old tests/imperas-riscv-tests for now; # Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test # DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired #make -C ../../tests/imperas-riscv-tests --jobs #make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs - - # Build riscv-arch-test 64 and 32-bit versions - make -C ../../tests/riscof/ --jobs - make -C ../../tests/riscof/ XLEN=32 --jobs - - # Build wally-riscv-arch-test - make -C ../../tests/wally-riscv-arch-test/ --jobs - make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs - -# build the memfiles and address files. - make -f makefile-memfile wally-sim-files --jobs - # Only compile Imperas tests if they are installed locally. # They are usually a symlink to $RISCV/imperas-riscv-tests and only # get compiled there manually during installation @@ -36,4 +15,22 @@ make all: # Link Linux test vectors (fix this later***) #cd ../../tests/linux-testgen/linux-testvectors/;./tvLinker.sh - +allclean: clean all + +clean: + make clean -C ../../tests/riscof + make clean -C ../../tests/wally-riscv-arch-test +# make allclean -C ../../tests/imperas-riscv-tests + +archtests: + # Build riscv-arch-test 64 and 32-bit versions + make -C ../../tests/riscof/ --jobs + make -C ../../tests/riscof/ XLEN=32 --jobs + +wallytests: + # Build wally-riscv-arch-test + make -C ../../tests/wally-riscv-arch-test/ --jobs + make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs + +memfiles: + make -f makefile-memfile wally-sim-files --jobs diff --git a/pipelined/regression/lint-wally b/pipelined/regression/lint-wally index 2b5288d51..750486c4e 100755 --- a/pipelined/regression/lint-wally +++ b/pipelined/regression/lint-wally @@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/ verilator=`which verilator` basepath=$(dirname $0)/.. -for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do +for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do echo "$config linting..." if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then echo "Exiting after $config lint due to errors or warnings" diff --git a/pipelined/regression/makefile-memfile b/pipelined/regression/makefile-memfile index 892e6db9b..c41963864 100644 --- a/pipelined/regression/makefile-memfile +++ b/pipelined/regression/makefile-memfile @@ -8,8 +8,9 @@ IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) ELFFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf") +OBJDUMPFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump") MEMFILES ?= $(ELFFILES:.elf=.elf.memfile) -ADDRFILES ?= $(ELFFILES:.elf=.elf.objdump.addr) +ADDRFILES ?= $(OBJDUMPFILES:.objdump=.objdump.addr) print: echo "files in $(ALLDIRS) are $(ELFFILES)." diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index 664f99648..07058241d 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -71,7 +71,7 @@ for test in tests64gc: grepstr="All tests ran without failures") configs.append(tc) -tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv"] #, "imperas32mmu""wally32i", +tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i", for test in tests32gc: tc = TestCase( name=test, diff --git a/pipelined/regression/testfloat.do b/pipelined/regression/testfloat.do index 68c240c8a..db6948699 100644 --- a/pipelined/regression/testfloat.do +++ b/pipelined/regression/testfloat.do @@ -32,7 +32,7 @@ vlib work # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals # $num = the added words after the call -vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 vsim -voptargs=+acc work.testbenchfp -G TEST=$2 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 61b35a51b..a58400cca 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -7,3 +7,22 @@ add wave -noupdate /testbenchfp/Y add wave -noupdate /testbenchfp/Z add wave -noupdate /testbenchfp/Res add wave -noupdate /testbenchfp/Ans +add wave -noupdate /testbenchfp/DivStart +add wave -noupdate /testbenchfp/DivDone +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* +add wave -group {Testbench} -noupdate /testbenchfp/* +add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv index 899dffb77..ab054342f 100644 --- a/pipelined/src/fpu/cvtshiftcalc.sv +++ b/pipelined/src/fpu/cvtshiftcalc.sv @@ -7,10 +7,10 @@ module cvtshiftcalc( input logic [`NE:0] CvtCalcExpM, // the calculated expoent input logic [`NF:0] XManM, // input mantissas input logic [`FMTBITS-1:0] OutFmt, // output format - input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) + input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) input logic CvtResDenormUfM, output logic CvtResUf, - output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted + output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted ); logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF) @@ -31,8 +31,8 @@ module cvtshiftcalc( // | `NF-1 zeros | Mantissa | 0's if nessisary | // - otherwise: // | LzcInM | 0's if nessisary | - assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : - CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} : + assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : + CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} : {CvtLzcInM, {`NF+1{1'b0}}}; diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv new file mode 100644 index 000000000..57022e5ae --- /dev/null +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -0,0 +1,15 @@ +`include "wally-config.vh" + +module divshiftcalc( + input logic [`DIVLEN+2:0] Quot, + input logic [`NE:0] DivCalcExpM, + output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, + output logic [`NE:0] CorrDivExp +); + + assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]}; + // the quotent is in the range [.5,2) + // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift + assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]}; + +endmodule diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index a76122804..26ca7dd83 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -12,11 +12,11 @@ module fcvt ( input logic XDenormE, // is the input denormalized input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half) output logic [`NE:0] CvtCalcExpE, // the calculated expoent - output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by + output logic [`LOGCVTLEN-1:0] CvtShiftAmtE, // how much to shift by output logic CvtResDenormUfE,// does the result underflow or is denormalized output logic CvtResSgnE, // the result's sign output logic IntZeroE, // is the integer zero? - output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder) + output logic [`CVTLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder) ); // OpCtrls: @@ -43,7 +43,7 @@ module fcvt ( logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? logic ToInt; // is the opperation an fp->int conversion? - logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC + logic [`LOGCVTLEN-1:0] ZeroCnt; // output from the LZC // seperate OpCtrl for code readability @@ -78,10 +78,10 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} : - {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; + assign CvtLzcInE = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} : + {XManE[`NF-1:0], {`CVTLEN-`NF{1'b0}}}; - lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt); + lzc #(`CVTLEN) lzc (.num(CvtLzcInE), .ZeroCnt); /////////////////////////////////////////////////////////////////////////// // shifter @@ -99,9 +99,9 @@ module fcvt ( // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} : - CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] : - (ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}}; + assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~CvtCalcExpE[`NE]}} : + CvtResDenormUfE&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+CvtCalcExpE[`LOGCVTLEN-1:0] : + (ZeroCnt+1)&{`LOGCVTLEN{XDenormE|IntToFp}}; /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -180,7 +180,7 @@ module fcvt ( // - shift left to normilize (-1-ZeroCnt) // - newBias to make the biased exponent // - assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})}; + assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (ZeroCnt&{`LOGCVTLEN{XDenormE|IntToFp}})}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index be73e9e7a..ff83079a8 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -82,7 +82,7 @@ module fpu ( // unpacking signals logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM; // input's sign - memory stage + logic XSgnM, YSgnM; // input's sign - memory stage logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage logic [`NE-1:0] ZExpM; // input's exponent - memory stage logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage @@ -104,23 +104,27 @@ module fpu ( logic FOpCtrlQ; // Fma Signals - logic [3*`NF+5:0] SumE, SumM; - logic [`NE+1:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic KillProdE, KillProdM; - logic InvZE, InvZM; - logic NegSumE, NegSumM; - logic ZSgnEffE, ZSgnEffM; - logic PSgnE, PSgnM; - logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; + logic [3*`NF+5:0] SumE, SumM; + logic [`NE+1:0] ProdExpE, ProdExpM; + logic AddendStickyE, AddendStickyM; + logic KillProdE, KillProdM; + logic InvZE, InvZM; + logic NegSumE, NegSumM; + logic ZSgnEffE, ZSgnEffM; + logic PSgnE, PSgnM; + logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; // Cvt Signals - logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent - logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by - logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized - logic CvtResSgnE, CvtResSgnM; // the result's sign - logic IntZeroE, IntZeroM; // is the integer zero? - logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized + logic CvtResSgnE, CvtResSgnM; // the result's sign + logic IntZeroE, IntZeroM; // is the integer zero? + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + + //divide signals + logic [`DIVLEN+2:0] Quot; + logic [`NE:0] DivCalcExpM; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -317,7 +321,7 @@ module fpu ( // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM}); - flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM); + flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM}); flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); @@ -333,7 +337,7 @@ module fpu ( flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, {AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE}, {AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM}); - flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, + flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); @@ -351,9 +355,9 @@ module fpu ( assign FpLoadM = FResSelM[1]; - postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, - .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, - .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, + postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, + .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot, + .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM, .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM); diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 267647346..d970fdbce 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" module postprocess( - input logic XSgnM, // input signs + input logic XSgnM, YSgnM, // input signs input logic [`NE-1:0] ZExpM, // input exponents input logic [`NF:0] XManM, YManM, ZManM, // input mantissas input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude @@ -51,13 +51,15 @@ module postprocess( input logic [2:0] FOpCtrlM, // choose which opperation (look below for values) input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count input logic [`NE:0] CvtCalcExpM, // the calculated expoent + input logic [`NE:0] DivCalcExpM, // the calculated expoent input logic CvtResDenormUfM, - input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by + input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by input logic CvtResSgnM, // the result's sign input logic FWriteIntM, // is fp->int (since it's writting to the integer register) - input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) + input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) input logic IntZeroM, // is the input zero input logic [1:0] PostProcSelM, // select result to be written to fp register + input logic [`DIVLEN+2:0] Quot, output logic [`FLEN-1:0] PostProcResM, // FMA final result output logic [4:0] PostProcFlgM, output logic [`XLEN-1:0] FCvtIntResM // the int conversion result @@ -75,13 +77,14 @@ module postprocess( logic [3*`NF+8:0] FmaShiftIn; // is the sum zero logic UfPlus1; // do you add one (for determining underflow flag) logic Round; // bits needed to determine rounding - logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted + logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted logic Mult; // multiply opperation logic [`FLEN:0] RoundAdd; // how much to add to the result logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results logic PreResultDenorm; // is the result denormalized - calculated before LZA corection logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count + logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic Plus1; // add one to the final result? @@ -91,6 +94,7 @@ module postprocess( logic IntToFp; // is the opperation an int->fp conversion? logic ToInt; // is the opperation an fp->int conversion? logic [`NE+1:0] RoundExp; + logic [`NE:0] CorrDivExp; logic [1:0] NegResMSBS; logic CvtOp; logic FmaOp; @@ -135,6 +139,7 @@ module postprocess( .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp, .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt); always_comb case(PostProcSelM) @@ -143,12 +148,12 @@ module postprocess( ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}}; end 2'b00: begin // cvt - ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM}; - ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}}; + ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmtM}; + ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end - 2'b01: begin //div - ShiftAmt = 0;//{DivShiftAmt}; - ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn}; + 2'b01: begin //div ***prob can take out + ShiftAmt = DivShiftAmt; + ShiftIn = {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}}; end default: begin ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; @@ -171,9 +176,9 @@ module postprocess( // round to infinity // round to nearest max magnitude - round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, + round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp, .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf, - .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); + .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); /////////////////////////////////////////////////////////////////////////////// // Sign calculation @@ -181,7 +186,7 @@ module postprocess( resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky, .FmaOp, .DivOp, .CvtOp, .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, - .CvtResSgnM, .RoundSgn, .ResSgn); + .XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn, .ResSgn); /////////////////////////////////////////////////////////////////////////////// // Flags diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index c8862ff94..9a76cf8f3 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -4,6 +4,8 @@ module resultsign( input logic [2:0] FrmM, input logic PSgnM, ZSgnEffM, input logic InvZM, + input logic XSgnM, + input logic YSgnM, input logic ZInfM, input logic InfIn, input logic NegSumM, @@ -25,6 +27,7 @@ module resultsign( logic FmaResSgn; logic FmaResSgnTmp; logic Underflow; + logic DivSgn; // logic ResultSgnTmp; // Determine the sign if the sum is zero @@ -43,9 +46,10 @@ module resultsign( assign InfSgn = ZInfM ? ZSgnEffM : PSgnM; assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp; - // Sign for rounding calulation - assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp); + assign DivSgn = XSgnM^YSgnM; - assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp); + // Sign for rounding calulation + assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp); + assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 92f1d4c27..73395caed 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -11,6 +11,7 @@ module round( input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single input logic [2:0] FrmM, // rounding mode input logic FmaOp, + input logic DivOp, input logic [1:0] PostProcSelM, input logic CvtResDenormUfM, input logic ToInt, @@ -23,6 +24,7 @@ module round( input logic [`NE+1:0] SumExp, // exponent of the normalized sum input logic RoundSgn, // the result's sign input logic [`NE:0] CvtCalcExpM, // the calculated expoent + input logic [`NE:0] CorrDivExp, // the calculated expoent output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow output logic [`NF-1:0] ResFrac, // Result fraction @@ -303,7 +305,7 @@ module round( case(PostProcSelM) 2'b10: RoundExp = SumExp; // fma 2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt - 2'b01: RoundExp = 0; // divide + 2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide default: RoundExp = 0; endcase diff --git a/pipelined/src/generic/flop/bram1p1rw.sv b/pipelined/src/generic/flop/bram1p1rw.sv index d0d3c40a8..51fe54214 100644 --- a/pipelined/src/generic/flop/bram1p1rw.sv +++ b/pipelined/src/generic/flop/bram1p1rw.sv @@ -54,10 +54,6 @@ module bram1p1rw logic [DATA_WIDTH-1:0] RAM [(2**ADDR_WIDTH)-1:0]; integer i; - initial begin - $readmemh("big64.txt", RAM); - end - always @ (posedge clk) begin dout <= RAM[addr]; if(we) begin diff --git a/pipelined/src/uncore/clint.sv b/pipelined/src/uncore/clint.sv index 47acfddc2..4781360e5 100644 --- a/pipelined/src/uncore/clint.sv +++ b/pipelined/src/uncore/clint.sv @@ -60,7 +60,7 @@ module clint ( flopr #(16) entrydflop(HCLK, ~HRESETn, entry, entryd); assign HRESPCLINT = 0; // OK - assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during accesses + assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during asynchronous MTIME accesses // word aligned reads if (`XLEN==64) assign #2 entry = {HADDR[15:3], 3'b000}; @@ -87,8 +87,7 @@ module clint ( always_ff @(posedge HCLK or negedge HRESETn) if (~HRESETn) begin MSIP <= 0; - MTIMECMP <= 0; - // MTIMECMP is not reset + MTIMECMP <= 64'hFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts end else if (memwrite) begin if (entryd == 16'h0000) MSIP <= HWDATA[0]; if (entryd == 16'h4000) begin @@ -104,7 +103,6 @@ module clint ( always_ff @(posedge HCLK or negedge HRESETn) if (~HRESETn) begin MTIME <= 0; - // MTIMECMP is not reset end else if (memwrite & entryd == 16'hBFF8) begin // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed for(j=0;j<`XLEN/8;j++) diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile index 63146339c..49b21be7a 100644 --- a/pipelined/srt/Makefile +++ b/pipelined/srt/Makefile @@ -1,4 +1,4 @@ -all: exptestgen testgen qslc_r4a2 qslc_r4a2b +all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen: sqrttestgen.c gcc sqrttestgen.c -o sqrttestgen -lm @@ -19,5 +19,9 @@ qslc_r4a2b: qslc_r4a2b.c gcc qslc_r4a2b.c -o qslc_r4a2b -lm ./qslc_r4a2b > qslc_r4a2b.tv +qslc_sqrt_r4a2: qslc_sqrt_r4a2.c + gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm + ./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv + clean: - rm -f testgen exptestgen qslc_r4a2 + rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 diff --git a/pipelined/srt/qsel4.dat b/pipelined/srt/qsel4.dat new file mode 100644 index 000000000..b92d81e8e --- /dev/null +++ b/pipelined/srt/qsel4.dat @@ -0,0 +1,1024 @@ +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/pipelined/srt/qsel4.sv b/pipelined/srt/qsel4.sv index 069f4268c..70b8b92d2 100644 --- a/pipelined/srt/qsel4.sv +++ b/pipelined/srt/qsel4.sv @@ -11,7 +11,7 @@ module qsel4 ( logic [2:0] Dmsbs; assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; assign Wmsbs = PreWmsbs[7:1]; - assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; // D = 0001.xxx... // Dmsbs = | | // W = xxxx.xxx... diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2 new file mode 100755 index 000000000..5cff70cdf Binary files /dev/null and b/pipelined/srt/qslc_sqrt_r4a2 differ diff --git a/pipelined/srt/qslc_sqrt_r4a2.c b/pipelined/srt/qslc_sqrt_r4a2.c new file mode 100644 index 000000000..252293cc0 --- /dev/null +++ b/pipelined/srt/qslc_sqrt_r4a2.c @@ -0,0 +1,198 @@ +/* + Program: qslc_r4a2.c + Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) + User: James E. Stine + +*/ + +#include +#include + +#define DIVISOR_SIZE 3 +#define CARRY_SIZE 7 +#define SUM_SIZE 7 +#define TOT_SIZE 7 + +void disp_binary(double, int, int); + +struct bits { + unsigned int divisor : DIVISOR_SIZE; + int tot : TOT_SIZE; +} pla; + +/* + + Function: disp_binary + Description: This function displays a Double-Precision number into + four 16 bit integers using the global union variable + dp_number + Argument List: double x The value to be converted + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point + Return value: none + +*/ +void disp_binary(double x, int bits_to_left, int bits_to_right) { + int i; + double diff; + + if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + printf("0"); + } + if (i == bits_to_right+1) + ; + + return; + } + + if (x < 0.0) + x = pow(2.0, ((double) bits_to_left)) + x; + + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, ((double) -i) ); + if (x < diff) + printf("0"); + else { + printf("1"); + x -= diff; + } + if (i == 0) + ; + + } + +} + +int main() { + int m; + int n; + int o; + pla.divisor = 0; + pla.tot = 0; + printf("\tcase({D[5:3],Wmsbs})\n"); + for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { + for (m=0; m < pow(2.0, TOT_SIZE); m++) { + printf("\t\t11'b"); + disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); + printf("_"); + disp_binary((double) pla.tot, TOT_SIZE, 0); + printf(": q = 4'b"); + + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + */ + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 24) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -26) + printf("0010"); + else + printf("0001"); + break; + case 1: + if ((pla.tot) >= 28) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -10) + printf("0000"); + else if ((pla.tot) >= -28) + printf("0010"); + else + printf("0001"); + break; + case 2: + if ((pla.tot) >= 32) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -32) + printf("0010"); + else + printf("0001"); + break; + case 3: + if ((pla.tot) >= 32) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -34) + printf("0010"); + else + printf("0001"); + break; + case 4: + if ((pla.tot) >= 36) + printf("1000"); + else if ((pla.tot) >= 12) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -36) + printf("0010"); + else + printf("0001"); + break; + case 5: + if ((pla.tot) >= 40) + printf("1000"); + else if ((pla.tot) >= 12) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -40) + printf("0010"); + else + printf("0001"); + break; + case 6: + if ((pla.tot) >= 40) + printf("1000"); + else if ((pla.tot) >= 16) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -44) + printf("0010"); + else + printf("0001"); + break; + case 7: + if ((pla.tot) >= 44) + printf("1000"); + else if ((pla.tot) >= 16) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -46) + printf("0010"); + else + printf("0001"); + break; + default: printf ("XXX"); + + } + + printf(";\n"); + (pla.tot)++; + } + (pla.divisor)++; + } + printf("\tendcase\n"); + +} diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv new file mode 100644 index 000000000..805dbbaeb --- /dev/null +++ b/pipelined/srt/qslc_sqrt_r4a2.sv @@ -0,0 +1,1026 @@ + case({D[5:3],Wmsbs}) + 11'b000_0000000: q = 4'b0000; + 11'b000_0000001: q = 4'b0000; + 11'b000_0000010: q = 4'b0000; + 11'b000_0000011: q = 4'b0000; + 11'b000_0000100: q = 4'b0000; + 11'b000_0000101: q = 4'b0000; + 11'b000_0000110: q = 4'b0000; + 11'b000_0000111: q = 4'b0000; + 11'b000_0001000: q = 4'b0100; + 11'b000_0001001: q = 4'b0100; + 11'b000_0001010: q = 4'b0100; + 11'b000_0001011: q = 4'b0100; + 11'b000_0001100: q = 4'b0100; + 11'b000_0001101: q = 4'b0100; + 11'b000_0001110: q = 4'b0100; + 11'b000_0001111: q = 4'b0100; + 11'b000_0010000: q = 4'b0100; + 11'b000_0010001: q = 4'b0100; + 11'b000_0010010: q = 4'b0100; + 11'b000_0010011: q = 4'b0100; + 11'b000_0010100: q = 4'b0100; + 11'b000_0010101: q = 4'b0100; + 11'b000_0010110: q = 4'b0100; + 11'b000_0010111: q = 4'b0100; + 11'b000_0011000: q = 4'b1000; + 11'b000_0011001: q = 4'b1000; + 11'b000_0011010: q = 4'b1000; + 11'b000_0011011: q = 4'b1000; + 11'b000_0011100: q = 4'b1000; + 11'b000_0011101: q = 4'b1000; + 11'b000_0011110: q = 4'b1000; + 11'b000_0011111: q = 4'b1000; + 11'b000_0100000: q = 4'b1000; + 11'b000_0100001: q = 4'b1000; + 11'b000_0100010: q = 4'b1000; + 11'b000_0100011: q = 4'b1000; + 11'b000_0100100: q = 4'b1000; + 11'b000_0100101: q = 4'b1000; + 11'b000_0100110: q = 4'b1000; + 11'b000_0100111: q = 4'b1000; + 11'b000_0101000: q = 4'b1000; + 11'b000_0101001: q = 4'b1000; + 11'b000_0101010: q = 4'b1000; + 11'b000_0101011: q = 4'b1000; + 11'b000_0101100: q = 4'b1000; + 11'b000_0101101: q = 4'b1000; + 11'b000_0101110: q = 4'b1000; + 11'b000_0101111: q = 4'b1000; + 11'b000_0110000: q = 4'b1000; + 11'b000_0110001: q = 4'b1000; + 11'b000_0110010: q = 4'b1000; + 11'b000_0110011: q = 4'b1000; + 11'b000_0110100: q = 4'b1000; + 11'b000_0110101: q = 4'b1000; + 11'b000_0110110: q = 4'b1000; + 11'b000_0110111: q = 4'b1000; + 11'b000_0111000: q = 4'b1000; + 11'b000_0111001: q = 4'b1000; + 11'b000_0111010: q = 4'b1000; + 11'b000_0111011: q = 4'b1000; + 11'b000_0111100: q = 4'b1000; + 11'b000_0111101: q = 4'b1000; + 11'b000_0111110: q = 4'b1000; + 11'b000_0111111: q = 4'b1000; + 11'b000_1000000: q = 4'b0001; + 11'b000_1000001: q = 4'b0001; + 11'b000_1000010: q = 4'b0001; + 11'b000_1000011: q = 4'b0001; + 11'b000_1000100: q = 4'b0001; + 11'b000_1000101: q = 4'b0001; + 11'b000_1000110: q = 4'b0001; + 11'b000_1000111: q = 4'b0001; + 11'b000_1001000: q = 4'b0001; + 11'b000_1001001: q = 4'b0001; + 11'b000_1001010: q = 4'b0001; + 11'b000_1001011: q = 4'b0001; + 11'b000_1001100: q = 4'b0001; + 11'b000_1001101: q = 4'b0001; + 11'b000_1001110: q = 4'b0001; + 11'b000_1001111: q = 4'b0001; + 11'b000_1010000: q = 4'b0001; + 11'b000_1010001: q = 4'b0001; + 11'b000_1010010: q = 4'b0001; + 11'b000_1010011: q = 4'b0001; + 11'b000_1010100: q = 4'b0001; + 11'b000_1010101: q = 4'b0001; + 11'b000_1010110: q = 4'b0001; + 11'b000_1010111: q = 4'b0001; + 11'b000_1011000: q = 4'b0001; + 11'b000_1011001: q = 4'b0001; + 11'b000_1011010: q = 4'b0001; + 11'b000_1011011: q = 4'b0001; + 11'b000_1011100: q = 4'b0001; + 11'b000_1011101: q = 4'b0001; + 11'b000_1011110: q = 4'b0001; + 11'b000_1011111: q = 4'b0001; + 11'b000_1100000: q = 4'b0001; + 11'b000_1100001: q = 4'b0001; + 11'b000_1100010: q = 4'b0001; + 11'b000_1100011: q = 4'b0001; + 11'b000_1100100: q = 4'b0001; + 11'b000_1100101: q = 4'b0001; + 11'b000_1100110: q = 4'b0010; + 11'b000_1100111: q = 4'b0010; + 11'b000_1101000: q = 4'b0010; + 11'b000_1101001: q = 4'b0010; + 11'b000_1101010: q = 4'b0010; + 11'b000_1101011: q = 4'b0010; + 11'b000_1101100: q = 4'b0010; + 11'b000_1101101: q = 4'b0010; + 11'b000_1101110: q = 4'b0010; + 11'b000_1101111: q = 4'b0010; + 11'b000_1110000: q = 4'b0010; + 11'b000_1110001: q = 4'b0010; + 11'b000_1110010: q = 4'b0010; + 11'b000_1110011: q = 4'b0010; + 11'b000_1110100: q = 4'b0010; + 11'b000_1110101: q = 4'b0010; + 11'b000_1110110: q = 4'b0010; + 11'b000_1110111: q = 4'b0010; + 11'b000_1111000: q = 4'b0000; + 11'b000_1111001: q = 4'b0000; + 11'b000_1111010: q = 4'b0000; + 11'b000_1111011: q = 4'b0000; + 11'b000_1111100: q = 4'b0000; + 11'b000_1111101: q = 4'b0000; + 11'b000_1111110: q = 4'b0000; + 11'b000_1111111: q = 4'b0000; + 11'b001_0000000: q = 4'b0000; + 11'b001_0000001: q = 4'b0000; + 11'b001_0000010: q = 4'b0000; + 11'b001_0000011: q = 4'b0000; + 11'b001_0000100: q = 4'b0000; + 11'b001_0000101: q = 4'b0000; + 11'b001_0000110: q = 4'b0000; + 11'b001_0000111: q = 4'b0000; + 11'b001_0001000: q = 4'b0100; + 11'b001_0001001: q = 4'b0100; + 11'b001_0001010: q = 4'b0100; + 11'b001_0001011: q = 4'b0100; + 11'b001_0001100: q = 4'b0100; + 11'b001_0001101: q = 4'b0100; + 11'b001_0001110: q = 4'b0100; + 11'b001_0001111: q = 4'b0100; + 11'b001_0010000: q = 4'b0100; + 11'b001_0010001: q = 4'b0100; + 11'b001_0010010: q = 4'b0100; + 11'b001_0010011: q = 4'b0100; + 11'b001_0010100: q = 4'b0100; + 11'b001_0010101: q = 4'b0100; + 11'b001_0010110: q = 4'b0100; + 11'b001_0010111: q = 4'b0100; + 11'b001_0011000: q = 4'b0100; + 11'b001_0011001: q = 4'b0100; + 11'b001_0011010: q = 4'b0100; + 11'b001_0011011: q = 4'b0100; + 11'b001_0011100: q = 4'b1000; + 11'b001_0011101: q = 4'b1000; + 11'b001_0011110: q = 4'b1000; + 11'b001_0011111: q = 4'b1000; + 11'b001_0100000: q = 4'b1000; + 11'b001_0100001: q = 4'b1000; + 11'b001_0100010: q = 4'b1000; + 11'b001_0100011: q = 4'b1000; + 11'b001_0100100: q = 4'b1000; + 11'b001_0100101: q = 4'b1000; + 11'b001_0100110: q = 4'b1000; + 11'b001_0100111: q = 4'b1000; + 11'b001_0101000: q = 4'b1000; + 11'b001_0101001: q = 4'b1000; + 11'b001_0101010: q = 4'b1000; + 11'b001_0101011: q = 4'b1000; + 11'b001_0101100: q = 4'b1000; + 11'b001_0101101: q = 4'b1000; + 11'b001_0101110: q = 4'b1000; + 11'b001_0101111: q = 4'b1000; + 11'b001_0110000: q = 4'b1000; + 11'b001_0110001: q = 4'b1000; + 11'b001_0110010: q = 4'b1000; + 11'b001_0110011: q = 4'b1000; + 11'b001_0110100: q = 4'b1000; + 11'b001_0110101: q = 4'b1000; + 11'b001_0110110: q = 4'b1000; + 11'b001_0110111: q = 4'b1000; + 11'b001_0111000: q = 4'b1000; + 11'b001_0111001: q = 4'b1000; + 11'b001_0111010: q = 4'b1000; + 11'b001_0111011: q = 4'b1000; + 11'b001_0111100: q = 4'b1000; + 11'b001_0111101: q = 4'b1000; + 11'b001_0111110: q = 4'b1000; + 11'b001_0111111: q = 4'b1000; + 11'b001_1000000: q = 4'b0001; + 11'b001_1000001: q = 4'b0001; + 11'b001_1000010: q = 4'b0001; + 11'b001_1000011: q = 4'b0001; + 11'b001_1000100: q = 4'b0001; + 11'b001_1000101: q = 4'b0001; + 11'b001_1000110: q = 4'b0001; + 11'b001_1000111: q = 4'b0001; + 11'b001_1001000: q = 4'b0001; + 11'b001_1001001: q = 4'b0001; + 11'b001_1001010: q = 4'b0001; + 11'b001_1001011: q = 4'b0001; + 11'b001_1001100: q = 4'b0001; + 11'b001_1001101: q = 4'b0001; + 11'b001_1001110: q = 4'b0001; + 11'b001_1001111: q = 4'b0001; + 11'b001_1010000: q = 4'b0001; + 11'b001_1010001: q = 4'b0001; + 11'b001_1010010: q = 4'b0001; + 11'b001_1010011: q = 4'b0001; + 11'b001_1010100: q = 4'b0001; + 11'b001_1010101: q = 4'b0001; + 11'b001_1010110: q = 4'b0001; + 11'b001_1010111: q = 4'b0001; + 11'b001_1011000: q = 4'b0001; + 11'b001_1011001: q = 4'b0001; + 11'b001_1011010: q = 4'b0001; + 11'b001_1011011: q = 4'b0001; + 11'b001_1011100: q = 4'b0001; + 11'b001_1011101: q = 4'b0001; + 11'b001_1011110: q = 4'b0001; + 11'b001_1011111: q = 4'b0001; + 11'b001_1100000: q = 4'b0001; + 11'b001_1100001: q = 4'b0001; + 11'b001_1100010: q = 4'b0001; + 11'b001_1100011: q = 4'b0001; + 11'b001_1100100: q = 4'b0010; + 11'b001_1100101: q = 4'b0010; + 11'b001_1100110: q = 4'b0010; + 11'b001_1100111: q = 4'b0010; + 11'b001_1101000: q = 4'b0010; + 11'b001_1101001: q = 4'b0010; + 11'b001_1101010: q = 4'b0010; + 11'b001_1101011: q = 4'b0010; + 11'b001_1101100: q = 4'b0010; + 11'b001_1101101: q = 4'b0010; + 11'b001_1101110: q = 4'b0010; + 11'b001_1101111: q = 4'b0010; + 11'b001_1110000: q = 4'b0010; + 11'b001_1110001: q = 4'b0010; + 11'b001_1110010: q = 4'b0010; + 11'b001_1110011: q = 4'b0010; + 11'b001_1110100: q = 4'b0010; + 11'b001_1110101: q = 4'b0010; + 11'b001_1110110: q = 4'b0000; + 11'b001_1110111: q = 4'b0000; + 11'b001_1111000: q = 4'b0000; + 11'b001_1111001: q = 4'b0000; + 11'b001_1111010: q = 4'b0000; + 11'b001_1111011: q = 4'b0000; + 11'b001_1111100: q = 4'b0000; + 11'b001_1111101: q = 4'b0000; + 11'b001_1111110: q = 4'b0000; + 11'b001_1111111: q = 4'b0000; + 11'b010_0000000: q = 4'b0000; + 11'b010_0000001: q = 4'b0000; + 11'b010_0000010: q = 4'b0000; + 11'b010_0000011: q = 4'b0000; + 11'b010_0000100: q = 4'b0000; + 11'b010_0000101: q = 4'b0000; + 11'b010_0000110: q = 4'b0000; + 11'b010_0000111: q = 4'b0000; + 11'b010_0001000: q = 4'b0100; + 11'b010_0001001: q = 4'b0100; + 11'b010_0001010: q = 4'b0100; + 11'b010_0001011: q = 4'b0100; + 11'b010_0001100: q = 4'b0100; + 11'b010_0001101: q = 4'b0100; + 11'b010_0001110: q = 4'b0100; + 11'b010_0001111: q = 4'b0100; + 11'b010_0010000: q = 4'b0100; + 11'b010_0010001: q = 4'b0100; + 11'b010_0010010: q = 4'b0100; + 11'b010_0010011: q = 4'b0100; + 11'b010_0010100: q = 4'b0100; + 11'b010_0010101: q = 4'b0100; + 11'b010_0010110: q = 4'b0100; + 11'b010_0010111: q = 4'b0100; + 11'b010_0011000: q = 4'b0100; + 11'b010_0011001: q = 4'b0100; + 11'b010_0011010: q = 4'b0100; + 11'b010_0011011: q = 4'b0100; + 11'b010_0011100: q = 4'b0100; + 11'b010_0011101: q = 4'b0100; + 11'b010_0011110: q = 4'b0100; + 11'b010_0011111: q = 4'b0100; + 11'b010_0100000: q = 4'b1000; + 11'b010_0100001: q = 4'b1000; + 11'b010_0100010: q = 4'b1000; + 11'b010_0100011: q = 4'b1000; + 11'b010_0100100: q = 4'b1000; + 11'b010_0100101: q = 4'b1000; + 11'b010_0100110: q = 4'b1000; + 11'b010_0100111: q = 4'b1000; + 11'b010_0101000: q = 4'b1000; + 11'b010_0101001: q = 4'b1000; + 11'b010_0101010: q = 4'b1000; + 11'b010_0101011: q = 4'b1000; + 11'b010_0101100: q = 4'b1000; + 11'b010_0101101: q = 4'b1000; + 11'b010_0101110: q = 4'b1000; + 11'b010_0101111: q = 4'b1000; + 11'b010_0110000: q = 4'b1000; + 11'b010_0110001: q = 4'b1000; + 11'b010_0110010: q = 4'b1000; + 11'b010_0110011: q = 4'b1000; + 11'b010_0110100: q = 4'b1000; + 11'b010_0110101: q = 4'b1000; + 11'b010_0110110: q = 4'b1000; + 11'b010_0110111: q = 4'b1000; + 11'b010_0111000: q = 4'b1000; + 11'b010_0111001: q = 4'b1000; + 11'b010_0111010: q = 4'b1000; + 11'b010_0111011: q = 4'b1000; + 11'b010_0111100: q = 4'b1000; + 11'b010_0111101: q = 4'b1000; + 11'b010_0111110: q = 4'b1000; + 11'b010_0111111: q = 4'b1000; + 11'b010_1000000: q = 4'b0001; + 11'b010_1000001: q = 4'b0001; + 11'b010_1000010: q = 4'b0001; + 11'b010_1000011: q = 4'b0001; + 11'b010_1000100: q = 4'b0001; + 11'b010_1000101: q = 4'b0001; + 11'b010_1000110: q = 4'b0001; + 11'b010_1000111: q = 4'b0001; + 11'b010_1001000: q = 4'b0001; + 11'b010_1001001: q = 4'b0001; + 11'b010_1001010: q = 4'b0001; + 11'b010_1001011: q = 4'b0001; + 11'b010_1001100: q = 4'b0001; + 11'b010_1001101: q = 4'b0001; + 11'b010_1001110: q = 4'b0001; + 11'b010_1001111: q = 4'b0001; + 11'b010_1010000: q = 4'b0001; + 11'b010_1010001: q = 4'b0001; + 11'b010_1010010: q = 4'b0001; + 11'b010_1010011: q = 4'b0001; + 11'b010_1010100: q = 4'b0001; + 11'b010_1010101: q = 4'b0001; + 11'b010_1010110: q = 4'b0001; + 11'b010_1010111: q = 4'b0001; + 11'b010_1011000: q = 4'b0001; + 11'b010_1011001: q = 4'b0001; + 11'b010_1011010: q = 4'b0001; + 11'b010_1011011: q = 4'b0001; + 11'b010_1011100: q = 4'b0001; + 11'b010_1011101: q = 4'b0001; + 11'b010_1011110: q = 4'b0001; + 11'b010_1011111: q = 4'b0001; + 11'b010_1100000: q = 4'b0010; + 11'b010_1100001: q = 4'b0010; + 11'b010_1100010: q = 4'b0010; + 11'b010_1100011: q = 4'b0010; + 11'b010_1100100: q = 4'b0010; + 11'b010_1100101: q = 4'b0010; + 11'b010_1100110: q = 4'b0010; + 11'b010_1100111: q = 4'b0010; + 11'b010_1101000: q = 4'b0010; + 11'b010_1101001: q = 4'b0010; + 11'b010_1101010: q = 4'b0010; + 11'b010_1101011: q = 4'b0010; + 11'b010_1101100: q = 4'b0010; + 11'b010_1101101: q = 4'b0010; + 11'b010_1101110: q = 4'b0010; + 11'b010_1101111: q = 4'b0010; + 11'b010_1110000: q = 4'b0010; + 11'b010_1110001: q = 4'b0010; + 11'b010_1110010: q = 4'b0010; + 11'b010_1110011: q = 4'b0010; + 11'b010_1110100: q = 4'b0000; + 11'b010_1110101: q = 4'b0000; + 11'b010_1110110: q = 4'b0000; + 11'b010_1110111: q = 4'b0000; + 11'b010_1111000: q = 4'b0000; + 11'b010_1111001: q = 4'b0000; + 11'b010_1111010: q = 4'b0000; + 11'b010_1111011: q = 4'b0000; + 11'b010_1111100: q = 4'b0000; + 11'b010_1111101: q = 4'b0000; + 11'b010_1111110: q = 4'b0000; + 11'b010_1111111: q = 4'b0000; + 11'b011_0000000: q = 4'b0000; + 11'b011_0000001: q = 4'b0000; + 11'b011_0000010: q = 4'b0000; + 11'b011_0000011: q = 4'b0000; + 11'b011_0000100: q = 4'b0000; + 11'b011_0000101: q = 4'b0000; + 11'b011_0000110: q = 4'b0000; + 11'b011_0000111: q = 4'b0000; + 11'b011_0001000: q = 4'b0100; + 11'b011_0001001: q = 4'b0100; + 11'b011_0001010: q = 4'b0100; + 11'b011_0001011: q = 4'b0100; + 11'b011_0001100: q = 4'b0100; + 11'b011_0001101: q = 4'b0100; + 11'b011_0001110: q = 4'b0100; + 11'b011_0001111: q = 4'b0100; + 11'b011_0010000: q = 4'b0100; + 11'b011_0010001: q = 4'b0100; + 11'b011_0010010: q = 4'b0100; + 11'b011_0010011: q = 4'b0100; + 11'b011_0010100: q = 4'b0100; + 11'b011_0010101: q = 4'b0100; + 11'b011_0010110: q = 4'b0100; + 11'b011_0010111: q = 4'b0100; + 11'b011_0011000: q = 4'b0100; + 11'b011_0011001: q = 4'b0100; + 11'b011_0011010: q = 4'b0100; + 11'b011_0011011: q = 4'b0100; + 11'b011_0011100: q = 4'b0100; + 11'b011_0011101: q = 4'b0100; + 11'b011_0011110: q = 4'b0100; + 11'b011_0011111: q = 4'b0100; + 11'b011_0100000: q = 4'b1000; + 11'b011_0100001: q = 4'b1000; + 11'b011_0100010: q = 4'b1000; + 11'b011_0100011: q = 4'b1000; + 11'b011_0100100: q = 4'b1000; + 11'b011_0100101: q = 4'b1000; + 11'b011_0100110: q = 4'b1000; + 11'b011_0100111: q = 4'b1000; + 11'b011_0101000: q = 4'b1000; + 11'b011_0101001: q = 4'b1000; + 11'b011_0101010: q = 4'b1000; + 11'b011_0101011: q = 4'b1000; + 11'b011_0101100: q = 4'b1000; + 11'b011_0101101: q = 4'b1000; + 11'b011_0101110: q = 4'b1000; + 11'b011_0101111: q = 4'b1000; + 11'b011_0110000: q = 4'b1000; + 11'b011_0110001: q = 4'b1000; + 11'b011_0110010: q = 4'b1000; + 11'b011_0110011: q = 4'b1000; + 11'b011_0110100: q = 4'b1000; + 11'b011_0110101: q = 4'b1000; + 11'b011_0110110: q = 4'b1000; + 11'b011_0110111: q = 4'b1000; + 11'b011_0111000: q = 4'b1000; + 11'b011_0111001: q = 4'b1000; + 11'b011_0111010: q = 4'b1000; + 11'b011_0111011: q = 4'b1000; + 11'b011_0111100: q = 4'b1000; + 11'b011_0111101: q = 4'b1000; + 11'b011_0111110: q = 4'b1000; + 11'b011_0111111: q = 4'b1000; + 11'b011_1000000: q = 4'b0001; + 11'b011_1000001: q = 4'b0001; + 11'b011_1000010: q = 4'b0001; + 11'b011_1000011: q = 4'b0001; + 11'b011_1000100: q = 4'b0001; + 11'b011_1000101: q = 4'b0001; + 11'b011_1000110: q = 4'b0001; + 11'b011_1000111: q = 4'b0001; + 11'b011_1001000: q = 4'b0001; + 11'b011_1001001: q = 4'b0001; + 11'b011_1001010: q = 4'b0001; + 11'b011_1001011: q = 4'b0001; + 11'b011_1001100: q = 4'b0001; + 11'b011_1001101: q = 4'b0001; + 11'b011_1001110: q = 4'b0001; + 11'b011_1001111: q = 4'b0001; + 11'b011_1010000: q = 4'b0001; + 11'b011_1010001: q = 4'b0001; + 11'b011_1010010: q = 4'b0001; + 11'b011_1010011: q = 4'b0001; + 11'b011_1010100: q = 4'b0001; + 11'b011_1010101: q = 4'b0001; + 11'b011_1010110: q = 4'b0001; + 11'b011_1010111: q = 4'b0001; + 11'b011_1011000: q = 4'b0001; + 11'b011_1011001: q = 4'b0001; + 11'b011_1011010: q = 4'b0001; + 11'b011_1011011: q = 4'b0001; + 11'b011_1011100: q = 4'b0001; + 11'b011_1011101: q = 4'b0001; + 11'b011_1011110: q = 4'b0010; + 11'b011_1011111: q = 4'b0010; + 11'b011_1100000: q = 4'b0010; + 11'b011_1100001: q = 4'b0010; + 11'b011_1100010: q = 4'b0010; + 11'b011_1100011: q = 4'b0010; + 11'b011_1100100: q = 4'b0010; + 11'b011_1100101: q = 4'b0010; + 11'b011_1100110: q = 4'b0010; + 11'b011_1100111: q = 4'b0010; + 11'b011_1101000: q = 4'b0010; + 11'b011_1101001: q = 4'b0010; + 11'b011_1101010: q = 4'b0010; + 11'b011_1101011: q = 4'b0010; + 11'b011_1101100: q = 4'b0010; + 11'b011_1101101: q = 4'b0010; + 11'b011_1101110: q = 4'b0010; + 11'b011_1101111: q = 4'b0010; + 11'b011_1110000: q = 4'b0010; + 11'b011_1110001: q = 4'b0010; + 11'b011_1110010: q = 4'b0010; + 11'b011_1110011: q = 4'b0010; + 11'b011_1110100: q = 4'b0000; + 11'b011_1110101: q = 4'b0000; + 11'b011_1110110: q = 4'b0000; + 11'b011_1110111: q = 4'b0000; + 11'b011_1111000: q = 4'b0000; + 11'b011_1111001: q = 4'b0000; + 11'b011_1111010: q = 4'b0000; + 11'b011_1111011: q = 4'b0000; + 11'b011_1111100: q = 4'b0000; + 11'b011_1111101: q = 4'b0000; + 11'b011_1111110: q = 4'b0000; + 11'b011_1111111: q = 4'b0000; + 11'b100_0000000: q = 4'b0000; + 11'b100_0000001: q = 4'b0000; + 11'b100_0000010: q = 4'b0000; + 11'b100_0000011: q = 4'b0000; + 11'b100_0000100: q = 4'b0000; + 11'b100_0000101: q = 4'b0000; + 11'b100_0000110: q = 4'b0000; + 11'b100_0000111: q = 4'b0000; + 11'b100_0001000: q = 4'b0000; + 11'b100_0001001: q = 4'b0000; + 11'b100_0001010: q = 4'b0000; + 11'b100_0001011: q = 4'b0000; + 11'b100_0001100: q = 4'b0100; + 11'b100_0001101: q = 4'b0100; + 11'b100_0001110: q = 4'b0100; + 11'b100_0001111: q = 4'b0100; + 11'b100_0010000: q = 4'b0100; + 11'b100_0010001: q = 4'b0100; + 11'b100_0010010: q = 4'b0100; + 11'b100_0010011: q = 4'b0100; + 11'b100_0010100: q = 4'b0100; + 11'b100_0010101: q = 4'b0100; + 11'b100_0010110: q = 4'b0100; + 11'b100_0010111: q = 4'b0100; + 11'b100_0011000: q = 4'b0100; + 11'b100_0011001: q = 4'b0100; + 11'b100_0011010: q = 4'b0100; + 11'b100_0011011: q = 4'b0100; + 11'b100_0011100: q = 4'b0100; + 11'b100_0011101: q = 4'b0100; + 11'b100_0011110: q = 4'b0100; + 11'b100_0011111: q = 4'b0100; + 11'b100_0100000: q = 4'b0100; + 11'b100_0100001: q = 4'b0100; + 11'b100_0100010: q = 4'b0100; + 11'b100_0100011: q = 4'b0100; + 11'b100_0100100: q = 4'b1000; + 11'b100_0100101: q = 4'b1000; + 11'b100_0100110: q = 4'b1000; + 11'b100_0100111: q = 4'b1000; + 11'b100_0101000: q = 4'b1000; + 11'b100_0101001: q = 4'b1000; + 11'b100_0101010: q = 4'b1000; + 11'b100_0101011: q = 4'b1000; + 11'b100_0101100: q = 4'b1000; + 11'b100_0101101: q = 4'b1000; + 11'b100_0101110: q = 4'b1000; + 11'b100_0101111: q = 4'b1000; + 11'b100_0110000: q = 4'b1000; + 11'b100_0110001: q = 4'b1000; + 11'b100_0110010: q = 4'b1000; + 11'b100_0110011: q = 4'b1000; + 11'b100_0110100: q = 4'b1000; + 11'b100_0110101: q = 4'b1000; + 11'b100_0110110: q = 4'b1000; + 11'b100_0110111: q = 4'b1000; + 11'b100_0111000: q = 4'b1000; + 11'b100_0111001: q = 4'b1000; + 11'b100_0111010: q = 4'b1000; + 11'b100_0111011: q = 4'b1000; + 11'b100_0111100: q = 4'b1000; + 11'b100_0111101: q = 4'b1000; + 11'b100_0111110: q = 4'b1000; + 11'b100_0111111: q = 4'b1000; + 11'b100_1000000: q = 4'b0001; + 11'b100_1000001: q = 4'b0001; + 11'b100_1000010: q = 4'b0001; + 11'b100_1000011: q = 4'b0001; + 11'b100_1000100: q = 4'b0001; + 11'b100_1000101: q = 4'b0001; + 11'b100_1000110: q = 4'b0001; + 11'b100_1000111: q = 4'b0001; + 11'b100_1001000: q = 4'b0001; + 11'b100_1001001: q = 4'b0001; + 11'b100_1001010: q = 4'b0001; + 11'b100_1001011: q = 4'b0001; + 11'b100_1001100: q = 4'b0001; + 11'b100_1001101: q = 4'b0001; + 11'b100_1001110: q = 4'b0001; + 11'b100_1001111: q = 4'b0001; + 11'b100_1010000: q = 4'b0001; + 11'b100_1010001: q = 4'b0001; + 11'b100_1010010: q = 4'b0001; + 11'b100_1010011: q = 4'b0001; + 11'b100_1010100: q = 4'b0001; + 11'b100_1010101: q = 4'b0001; + 11'b100_1010110: q = 4'b0001; + 11'b100_1010111: q = 4'b0001; + 11'b100_1011000: q = 4'b0001; + 11'b100_1011001: q = 4'b0001; + 11'b100_1011010: q = 4'b0001; + 11'b100_1011011: q = 4'b0001; + 11'b100_1011100: q = 4'b0010; + 11'b100_1011101: q = 4'b0010; + 11'b100_1011110: q = 4'b0010; + 11'b100_1011111: q = 4'b0010; + 11'b100_1100000: q = 4'b0010; + 11'b100_1100001: q = 4'b0010; + 11'b100_1100010: q = 4'b0010; + 11'b100_1100011: q = 4'b0010; + 11'b100_1100100: q = 4'b0010; + 11'b100_1100101: q = 4'b0010; + 11'b100_1100110: q = 4'b0010; + 11'b100_1100111: q = 4'b0010; + 11'b100_1101000: q = 4'b0010; + 11'b100_1101001: q = 4'b0010; + 11'b100_1101010: q = 4'b0010; + 11'b100_1101011: q = 4'b0010; + 11'b100_1101100: q = 4'b0010; + 11'b100_1101101: q = 4'b0010; + 11'b100_1101110: q = 4'b0010; + 11'b100_1101111: q = 4'b0010; + 11'b100_1110000: q = 4'b0010; + 11'b100_1110001: q = 4'b0010; + 11'b100_1110010: q = 4'b0010; + 11'b100_1110011: q = 4'b0010; + 11'b100_1110100: q = 4'b0000; + 11'b100_1110101: q = 4'b0000; + 11'b100_1110110: q = 4'b0000; + 11'b100_1110111: q = 4'b0000; + 11'b100_1111000: q = 4'b0000; + 11'b100_1111001: q = 4'b0000; + 11'b100_1111010: q = 4'b0000; + 11'b100_1111011: q = 4'b0000; + 11'b100_1111100: q = 4'b0000; + 11'b100_1111101: q = 4'b0000; + 11'b100_1111110: q = 4'b0000; + 11'b100_1111111: q = 4'b0000; + 11'b101_0000000: q = 4'b0000; + 11'b101_0000001: q = 4'b0000; + 11'b101_0000010: q = 4'b0000; + 11'b101_0000011: q = 4'b0000; + 11'b101_0000100: q = 4'b0000; + 11'b101_0000101: q = 4'b0000; + 11'b101_0000110: q = 4'b0000; + 11'b101_0000111: q = 4'b0000; + 11'b101_0001000: q = 4'b0000; + 11'b101_0001001: q = 4'b0000; + 11'b101_0001010: q = 4'b0000; + 11'b101_0001011: q = 4'b0000; + 11'b101_0001100: q = 4'b0100; + 11'b101_0001101: q = 4'b0100; + 11'b101_0001110: q = 4'b0100; + 11'b101_0001111: q = 4'b0100; + 11'b101_0010000: q = 4'b0100; + 11'b101_0010001: q = 4'b0100; + 11'b101_0010010: q = 4'b0100; + 11'b101_0010011: q = 4'b0100; + 11'b101_0010100: q = 4'b0100; + 11'b101_0010101: q = 4'b0100; + 11'b101_0010110: q = 4'b0100; + 11'b101_0010111: q = 4'b0100; + 11'b101_0011000: q = 4'b0100; + 11'b101_0011001: q = 4'b0100; + 11'b101_0011010: q = 4'b0100; + 11'b101_0011011: q = 4'b0100; + 11'b101_0011100: q = 4'b0100; + 11'b101_0011101: q = 4'b0100; + 11'b101_0011110: q = 4'b0100; + 11'b101_0011111: q = 4'b0100; + 11'b101_0100000: q = 4'b0100; + 11'b101_0100001: q = 4'b0100; + 11'b101_0100010: q = 4'b0100; + 11'b101_0100011: q = 4'b0100; + 11'b101_0100100: q = 4'b0100; + 11'b101_0100101: q = 4'b0100; + 11'b101_0100110: q = 4'b0100; + 11'b101_0100111: q = 4'b0100; + 11'b101_0101000: q = 4'b1000; + 11'b101_0101001: q = 4'b1000; + 11'b101_0101010: q = 4'b1000; + 11'b101_0101011: q = 4'b1000; + 11'b101_0101100: q = 4'b1000; + 11'b101_0101101: q = 4'b1000; + 11'b101_0101110: q = 4'b1000; + 11'b101_0101111: q = 4'b1000; + 11'b101_0110000: q = 4'b1000; + 11'b101_0110001: q = 4'b1000; + 11'b101_0110010: q = 4'b1000; + 11'b101_0110011: q = 4'b1000; + 11'b101_0110100: q = 4'b1000; + 11'b101_0110101: q = 4'b1000; + 11'b101_0110110: q = 4'b1000; + 11'b101_0110111: q = 4'b1000; + 11'b101_0111000: q = 4'b1000; + 11'b101_0111001: q = 4'b1000; + 11'b101_0111010: q = 4'b1000; + 11'b101_0111011: q = 4'b1000; + 11'b101_0111100: q = 4'b1000; + 11'b101_0111101: q = 4'b1000; + 11'b101_0111110: q = 4'b1000; + 11'b101_0111111: q = 4'b1000; + 11'b101_1000000: q = 4'b0001; + 11'b101_1000001: q = 4'b0001; + 11'b101_1000010: q = 4'b0001; + 11'b101_1000011: q = 4'b0001; + 11'b101_1000100: q = 4'b0001; + 11'b101_1000101: q = 4'b0001; + 11'b101_1000110: q = 4'b0001; + 11'b101_1000111: q = 4'b0001; + 11'b101_1001000: q = 4'b0001; + 11'b101_1001001: q = 4'b0001; + 11'b101_1001010: q = 4'b0001; + 11'b101_1001011: q = 4'b0001; + 11'b101_1001100: q = 4'b0001; + 11'b101_1001101: q = 4'b0001; + 11'b101_1001110: q = 4'b0001; + 11'b101_1001111: q = 4'b0001; + 11'b101_1010000: q = 4'b0001; + 11'b101_1010001: q = 4'b0001; + 11'b101_1010010: q = 4'b0001; + 11'b101_1010011: q = 4'b0001; + 11'b101_1010100: q = 4'b0001; + 11'b101_1010101: q = 4'b0001; + 11'b101_1010110: q = 4'b0001; + 11'b101_1010111: q = 4'b0001; + 11'b101_1011000: q = 4'b0010; + 11'b101_1011001: q = 4'b0010; + 11'b101_1011010: q = 4'b0010; + 11'b101_1011011: q = 4'b0010; + 11'b101_1011100: q = 4'b0010; + 11'b101_1011101: q = 4'b0010; + 11'b101_1011110: q = 4'b0010; + 11'b101_1011111: q = 4'b0010; + 11'b101_1100000: q = 4'b0010; + 11'b101_1100001: q = 4'b0010; + 11'b101_1100010: q = 4'b0010; + 11'b101_1100011: q = 4'b0010; + 11'b101_1100100: q = 4'b0010; + 11'b101_1100101: q = 4'b0010; + 11'b101_1100110: q = 4'b0010; + 11'b101_1100111: q = 4'b0010; + 11'b101_1101000: q = 4'b0010; + 11'b101_1101001: q = 4'b0010; + 11'b101_1101010: q = 4'b0010; + 11'b101_1101011: q = 4'b0010; + 11'b101_1101100: q = 4'b0010; + 11'b101_1101101: q = 4'b0010; + 11'b101_1101110: q = 4'b0010; + 11'b101_1101111: q = 4'b0010; + 11'b101_1110000: q = 4'b0000; + 11'b101_1110001: q = 4'b0000; + 11'b101_1110010: q = 4'b0000; + 11'b101_1110011: q = 4'b0000; + 11'b101_1110100: q = 4'b0000; + 11'b101_1110101: q = 4'b0000; + 11'b101_1110110: q = 4'b0000; + 11'b101_1110111: q = 4'b0000; + 11'b101_1111000: q = 4'b0000; + 11'b101_1111001: q = 4'b0000; + 11'b101_1111010: q = 4'b0000; + 11'b101_1111011: q = 4'b0000; + 11'b101_1111100: q = 4'b0000; + 11'b101_1111101: q = 4'b0000; + 11'b101_1111110: q = 4'b0000; + 11'b101_1111111: q = 4'b0000; + 11'b110_0000000: q = 4'b0000; + 11'b110_0000001: q = 4'b0000; + 11'b110_0000010: q = 4'b0000; + 11'b110_0000011: q = 4'b0000; + 11'b110_0000100: q = 4'b0000; + 11'b110_0000101: q = 4'b0000; + 11'b110_0000110: q = 4'b0000; + 11'b110_0000111: q = 4'b0000; + 11'b110_0001000: q = 4'b0000; + 11'b110_0001001: q = 4'b0000; + 11'b110_0001010: q = 4'b0000; + 11'b110_0001011: q = 4'b0000; + 11'b110_0001100: q = 4'b0000; + 11'b110_0001101: q = 4'b0000; + 11'b110_0001110: q = 4'b0000; + 11'b110_0001111: q = 4'b0000; + 11'b110_0010000: q = 4'b0100; + 11'b110_0010001: q = 4'b0100; + 11'b110_0010010: q = 4'b0100; + 11'b110_0010011: q = 4'b0100; + 11'b110_0010100: q = 4'b0100; + 11'b110_0010101: q = 4'b0100; + 11'b110_0010110: q = 4'b0100; + 11'b110_0010111: q = 4'b0100; + 11'b110_0011000: q = 4'b0100; + 11'b110_0011001: q = 4'b0100; + 11'b110_0011010: q = 4'b0100; + 11'b110_0011011: q = 4'b0100; + 11'b110_0011100: q = 4'b0100; + 11'b110_0011101: q = 4'b0100; + 11'b110_0011110: q = 4'b0100; + 11'b110_0011111: q = 4'b0100; + 11'b110_0100000: q = 4'b0100; + 11'b110_0100001: q = 4'b0100; + 11'b110_0100010: q = 4'b0100; + 11'b110_0100011: q = 4'b0100; + 11'b110_0100100: q = 4'b0100; + 11'b110_0100101: q = 4'b0100; + 11'b110_0100110: q = 4'b0100; + 11'b110_0100111: q = 4'b0100; + 11'b110_0101000: q = 4'b1000; + 11'b110_0101001: q = 4'b1000; + 11'b110_0101010: q = 4'b1000; + 11'b110_0101011: q = 4'b1000; + 11'b110_0101100: q = 4'b1000; + 11'b110_0101101: q = 4'b1000; + 11'b110_0101110: q = 4'b1000; + 11'b110_0101111: q = 4'b1000; + 11'b110_0110000: q = 4'b1000; + 11'b110_0110001: q = 4'b1000; + 11'b110_0110010: q = 4'b1000; + 11'b110_0110011: q = 4'b1000; + 11'b110_0110100: q = 4'b1000; + 11'b110_0110101: q = 4'b1000; + 11'b110_0110110: q = 4'b1000; + 11'b110_0110111: q = 4'b1000; + 11'b110_0111000: q = 4'b1000; + 11'b110_0111001: q = 4'b1000; + 11'b110_0111010: q = 4'b1000; + 11'b110_0111011: q = 4'b1000; + 11'b110_0111100: q = 4'b1000; + 11'b110_0111101: q = 4'b1000; + 11'b110_0111110: q = 4'b1000; + 11'b110_0111111: q = 4'b1000; + 11'b110_1000000: q = 4'b0001; + 11'b110_1000001: q = 4'b0001; + 11'b110_1000010: q = 4'b0001; + 11'b110_1000011: q = 4'b0001; + 11'b110_1000100: q = 4'b0001; + 11'b110_1000101: q = 4'b0001; + 11'b110_1000110: q = 4'b0001; + 11'b110_1000111: q = 4'b0001; + 11'b110_1001000: q = 4'b0001; + 11'b110_1001001: q = 4'b0001; + 11'b110_1001010: q = 4'b0001; + 11'b110_1001011: q = 4'b0001; + 11'b110_1001100: q = 4'b0001; + 11'b110_1001101: q = 4'b0001; + 11'b110_1001110: q = 4'b0001; + 11'b110_1001111: q = 4'b0001; + 11'b110_1010000: q = 4'b0001; + 11'b110_1010001: q = 4'b0001; + 11'b110_1010010: q = 4'b0001; + 11'b110_1010011: q = 4'b0001; + 11'b110_1010100: q = 4'b0010; + 11'b110_1010101: q = 4'b0010; + 11'b110_1010110: q = 4'b0010; + 11'b110_1010111: q = 4'b0010; + 11'b110_1011000: q = 4'b0010; + 11'b110_1011001: q = 4'b0010; + 11'b110_1011010: q = 4'b0010; + 11'b110_1011011: q = 4'b0010; + 11'b110_1011100: q = 4'b0010; + 11'b110_1011101: q = 4'b0010; + 11'b110_1011110: q = 4'b0010; + 11'b110_1011111: q = 4'b0010; + 11'b110_1100000: q = 4'b0010; + 11'b110_1100001: q = 4'b0010; + 11'b110_1100010: q = 4'b0010; + 11'b110_1100011: q = 4'b0010; + 11'b110_1100100: q = 4'b0010; + 11'b110_1100101: q = 4'b0010; + 11'b110_1100110: q = 4'b0010; + 11'b110_1100111: q = 4'b0010; + 11'b110_1101000: q = 4'b0010; + 11'b110_1101001: q = 4'b0010; + 11'b110_1101010: q = 4'b0010; + 11'b110_1101011: q = 4'b0010; + 11'b110_1101100: q = 4'b0010; + 11'b110_1101101: q = 4'b0010; + 11'b110_1101110: q = 4'b0010; + 11'b110_1101111: q = 4'b0010; + 11'b110_1110000: q = 4'b0000; + 11'b110_1110001: q = 4'b0000; + 11'b110_1110010: q = 4'b0000; + 11'b110_1110011: q = 4'b0000; + 11'b110_1110100: q = 4'b0000; + 11'b110_1110101: q = 4'b0000; + 11'b110_1110110: q = 4'b0000; + 11'b110_1110111: q = 4'b0000; + 11'b110_1111000: q = 4'b0000; + 11'b110_1111001: q = 4'b0000; + 11'b110_1111010: q = 4'b0000; + 11'b110_1111011: q = 4'b0000; + 11'b110_1111100: q = 4'b0000; + 11'b110_1111101: q = 4'b0000; + 11'b110_1111110: q = 4'b0000; + 11'b110_1111111: q = 4'b0000; + 11'b111_0000000: q = 4'b0000; + 11'b111_0000001: q = 4'b0000; + 11'b111_0000010: q = 4'b0000; + 11'b111_0000011: q = 4'b0000; + 11'b111_0000100: q = 4'b0000; + 11'b111_0000101: q = 4'b0000; + 11'b111_0000110: q = 4'b0000; + 11'b111_0000111: q = 4'b0000; + 11'b111_0001000: q = 4'b0000; + 11'b111_0001001: q = 4'b0000; + 11'b111_0001010: q = 4'b0000; + 11'b111_0001011: q = 4'b0000; + 11'b111_0001100: q = 4'b0000; + 11'b111_0001101: q = 4'b0000; + 11'b111_0001110: q = 4'b0000; + 11'b111_0001111: q = 4'b0000; + 11'b111_0010000: q = 4'b0100; + 11'b111_0010001: q = 4'b0100; + 11'b111_0010010: q = 4'b0100; + 11'b111_0010011: q = 4'b0100; + 11'b111_0010100: q = 4'b0100; + 11'b111_0010101: q = 4'b0100; + 11'b111_0010110: q = 4'b0100; + 11'b111_0010111: q = 4'b0100; + 11'b111_0011000: q = 4'b0100; + 11'b111_0011001: q = 4'b0100; + 11'b111_0011010: q = 4'b0100; + 11'b111_0011011: q = 4'b0100; + 11'b111_0011100: q = 4'b0100; + 11'b111_0011101: q = 4'b0100; + 11'b111_0011110: q = 4'b0100; + 11'b111_0011111: q = 4'b0100; + 11'b111_0100000: q = 4'b0100; + 11'b111_0100001: q = 4'b0100; + 11'b111_0100010: q = 4'b0100; + 11'b111_0100011: q = 4'b0100; + 11'b111_0100100: q = 4'b0100; + 11'b111_0100101: q = 4'b0100; + 11'b111_0100110: q = 4'b0100; + 11'b111_0100111: q = 4'b0100; + 11'b111_0101000: q = 4'b0100; + 11'b111_0101001: q = 4'b0100; + 11'b111_0101010: q = 4'b0100; + 11'b111_0101011: q = 4'b0100; + 11'b111_0101100: q = 4'b1000; + 11'b111_0101101: q = 4'b1000; + 11'b111_0101110: q = 4'b1000; + 11'b111_0101111: q = 4'b1000; + 11'b111_0110000: q = 4'b1000; + 11'b111_0110001: q = 4'b1000; + 11'b111_0110010: q = 4'b1000; + 11'b111_0110011: q = 4'b1000; + 11'b111_0110100: q = 4'b1000; + 11'b111_0110101: q = 4'b1000; + 11'b111_0110110: q = 4'b1000; + 11'b111_0110111: q = 4'b1000; + 11'b111_0111000: q = 4'b1000; + 11'b111_0111001: q = 4'b1000; + 11'b111_0111010: q = 4'b1000; + 11'b111_0111011: q = 4'b1000; + 11'b111_0111100: q = 4'b1000; + 11'b111_0111101: q = 4'b1000; + 11'b111_0111110: q = 4'b1000; + 11'b111_0111111: q = 4'b1000; + 11'b111_1000000: q = 4'b0001; + 11'b111_1000001: q = 4'b0001; + 11'b111_1000010: q = 4'b0001; + 11'b111_1000011: q = 4'b0001; + 11'b111_1000100: q = 4'b0001; + 11'b111_1000101: q = 4'b0001; + 11'b111_1000110: q = 4'b0001; + 11'b111_1000111: q = 4'b0001; + 11'b111_1001000: q = 4'b0001; + 11'b111_1001001: q = 4'b0001; + 11'b111_1001010: q = 4'b0001; + 11'b111_1001011: q = 4'b0001; + 11'b111_1001100: q = 4'b0001; + 11'b111_1001101: q = 4'b0001; + 11'b111_1001110: q = 4'b0001; + 11'b111_1001111: q = 4'b0001; + 11'b111_1010000: q = 4'b0001; + 11'b111_1010001: q = 4'b0001; + 11'b111_1010010: q = 4'b0010; + 11'b111_1010011: q = 4'b0010; + 11'b111_1010100: q = 4'b0010; + 11'b111_1010101: q = 4'b0010; + 11'b111_1010110: q = 4'b0010; + 11'b111_1010111: q = 4'b0010; + 11'b111_1011000: q = 4'b0010; + 11'b111_1011001: q = 4'b0010; + 11'b111_1011010: q = 4'b0010; + 11'b111_1011011: q = 4'b0010; + 11'b111_1011100: q = 4'b0010; + 11'b111_1011101: q = 4'b0010; + 11'b111_1011110: q = 4'b0010; + 11'b111_1011111: q = 4'b0010; + 11'b111_1100000: q = 4'b0010; + 11'b111_1100001: q = 4'b0010; + 11'b111_1100010: q = 4'b0010; + 11'b111_1100011: q = 4'b0010; + 11'b111_1100100: q = 4'b0010; + 11'b111_1100101: q = 4'b0010; + 11'b111_1100110: q = 4'b0010; + 11'b111_1100111: q = 4'b0010; + 11'b111_1101000: q = 4'b0010; + 11'b111_1101001: q = 4'b0010; + 11'b111_1101010: q = 4'b0010; + 11'b111_1101011: q = 4'b0010; + 11'b111_1101100: q = 4'b0010; + 11'b111_1101101: q = 4'b0010; + 11'b111_1101110: q = 4'b0010; + 11'b111_1101111: q = 4'b0010; + 11'b111_1110000: q = 4'b0000; + 11'b111_1110001: q = 4'b0000; + 11'b111_1110010: q = 4'b0000; + 11'b111_1110011: q = 4'b0000; + 11'b111_1110100: q = 4'b0000; + 11'b111_1110101: q = 4'b0000; + 11'b111_1110110: q = 4'b0000; + 11'b111_1110111: q = 4'b0000; + 11'b111_1111000: q = 4'b0000; + 11'b111_1111001: q = 4'b0000; + 11'b111_1111010: q = 4'b0000; + 11'b111_1111011: q = 4'b0000; + 11'b111_1111100: q = 4'b0000; + 11'b111_1111101: q = 4'b0000; + 11'b111_1111110: q = 4'b0000; + 11'b111_1111111: q = 4'b0000; + endcase diff --git a/pipelined/srt/srt-radix4.do b/pipelined/srt/srt-radix4.do index b213aa994..07dedfbfe 100644 --- a/pipelined/srt/srt-radix4.do +++ b/pipelined/srt/srt-radix4.do @@ -17,7 +17,7 @@ if [file exists work] { } vlib work -vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv qsel4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv +vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv vopt +acc work.testbenchradix4 -o workopt vsim workopt diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index ccb6453c0..8fd8d5419 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -30,42 +30,35 @@ `include "wally-config.vh" -`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF) - module srtradix4 ( input logic clk, - input logic Start, - input logic Stall, // *** multiple pipe stages - input logic Flush, // *** multiple pipe stages - // Floating Point Inputs - // later add exponents, signs, special cases - input logic XSign, YSign, - input logic [`NE-1:0] XExp, YExp, - input logic [`NF-1:0] XFrac, YFrac, + input logic DivStart, + input logic [`NE-1:0] XExpE, YExpE, + input logic [`NF:0] XManE, YManE, input logic [`XLEN-1:0] SrcA, SrcB, - input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit + input logic XZeroE, input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide - output logic rsign, - output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers - output logic [`NE-1:0] rExp, - output logic [3:0] Flags + output logic DivDone, + output logic [`DIVLEN+2:0] Quot, + output logic [`XLEN-1:0] Rem, // *** later handle integers + output logic [`NE:0] DivCalcExpE ); // logic qp, qz, qm; // quotient is +1, 0, or -1 logic [3:0] q; - logic [`NE-1:0] calcExp; - logic calcSign; - logic [`DIVLEN-1:0] X, Dpreproc; + logic [`NE:0] DivCalcExp; + logic [`DIVLEN:0] X; + logic [`DIVLEN-1:0] Dpreproc; logic [`DIVLEN+3:0] WS, WSA, WSN; logic [`DIVLEN+3:0] WC, WCA, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; - srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); + srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -77,11 +70,11 @@ module srtradix4 ( // - otherwise load WSA into the flipflop // *** what does N and A stand for? // *** change shift amount for radix4 - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, Start, WSN); + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN); flop #(`DIVLEN+4) wsflop(clk, WSN, WS); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, Start, WCN); + mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC); - flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D); + flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -94,9 +87,8 @@ module srtradix4 ( // 0001 = -2 qsel4 qsel4(.D, .WS, .WC, .q); - // Store the expoenent and sign until division is done - flopen #(`NE) expflop(clk, Start, calcExp, rExp); - flopen #(1) signflop(clk, Start, calcSign, rsign); + // Store the expoenent and sign until division is DivDone + flopen #(`NE+1) expflop(clk, DivStart, DivCalcExp, DivCalcExpE); // Divisor Selection logic // *** radix 4 change to choose -2 to 2 @@ -120,11 +112,11 @@ module srtradix4 ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); //*** change for radix 4 - otfc4 #(`DIVLEN) otfc4(clk, Start, q, Quot); + otfc4 otfc4(.clk, .DivStart, .q, .Quot); - expcalc expcalc(.XExp, .YExp, .calcExp); + expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp); - signcalc signcalc(.XSign, .YSign, .calcSign); + divcounter divcounter(clk, DivStart, DivDone); endmodule @@ -132,91 +124,154 @@ endmodule // Submodules // //////////////// +///////////// +// counter // +///////////// +module divcounter(input logic clk, + input logic DivStart, + output logic DivDone); + + logic [5:0] count; + + // This block of control logic sequences the divider + // through its iterations. You may modify it if you + // build a divider which completes in fewer iterations. + // You are not responsible for the (trivial) circuit + // design of the block. + + always @(posedge clk) + begin + DivDone = 0; + if (count == `DIVLEN/2+1) DivDone <= #1 1; + else if (DivDone | DivStart) DivDone <= #1 0; + if (DivStart) count <= #1 0; + else count <= #1 count+1; + end +endmodule + +module qsel4 ( + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] WS, WC, + output logic [3:0] q +); + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] Dmsbs; + assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; + assign Wmsbs = PreWmsbs[7:1]; + assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + // D = 0001.xxx... + // Dmsbs = | | + // W = xxxx.xxx... + // Wmsbs = | | + + logic [3:0] QSel4[1023:0]; + + initial begin + integer d, w, i, w2; + for(d=0; d<8; d++) + for(w=0; w<128; w++)begin + i = d*128+w; + w2 = w-128*(w>=64); // convert to two's complement + case(d) + 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-4) QSel4[i] = 4'b0000; + else if(w2>=-13) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 1: if(w2>=14) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-15) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 2: if(w2>=15) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-16) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 3: if(w2>=16) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-18) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 4: if(w2>=18) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 5: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 6: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-22) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 7: if(w2>=24) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-24) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + endcase + end + end + assign q = QSel4[{Dmsbs,Wmsbs}]; + +endmodule + /////////////////// // Preprocessing // /////////////////// module srtpreproc ( input logic [`XLEN-1:0] SrcA, SrcB, - input logic [`NF-1:0] XFrac, YFrac, - input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit + input logic [`NF:0] XManE, YManE, input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide - output logic [`DIVLEN-1:0] X, D, + output logic [`DIVLEN:0] X, + output logic [`DIVLEN-1:0] Dpreproc, output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent output logic intSign // Quotient integer sign ); - logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; - logic [`XLEN-1:0] PosA, PosB; - logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; + // logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; + // logic [`XLEN-1:0] PosA, PosB; + // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; + logic [`DIVLEN:0] PreprocA, PreprocX; + logic [`DIVLEN-1:0] PreprocB, PreprocY; - assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; - assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; + // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; + // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; - lzc #(`XLEN) lzcA (PosA, zeroCntA); - lzc #(`XLEN) lzcB (PosB, zeroCntB); + // lzc #(`XLEN) lzcA (PosA, zeroCntA); + // lzc #(`XLEN) lzcB (PosB, zeroCntB); - assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; - assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; + // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; + // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; - assign PreprocA = ExtraA << zeroCntA; - assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}}; - assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}}; + // assign PreprocA = ExtraA << zeroCntA; + // assign PreprocB = ExtraB << (zeroCntB + 1); + assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}}; + assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}}; assign X = Int ? PreprocA : PreprocX; - assign D = Int ? PreprocB : PreprocY; - assign intExp = zeroCntB - zeroCntA + 1; - assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); + assign Dpreproc = Int ? PreprocB : PreprocY; + // assign intExp = zeroCntB - zeroCntA + 1; + // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); endmodule -///////////////////////////////// -// Quotient Selection, Radix 2 // -///////////////////////////////// -module qsel2 ( // *** eventually just change to 4 bits - input logic [`DIVLEN+3:`DIVLEN] ps, pc, - output logic qp, qz, qm -); - - logic [`DIVLEN+3:`DIVLEN] p, g; - logic magnitude, sign, cout; - - // The quotient selection logic is presented for simplicity, not - // for efficiency. You can probably optimize your logic to - // select the proper divisor with less delay. - - // Quotient equations from EE371 lecture notes 13-20 - assign p = ps ^ pc; - assign g = ps & pc; - - assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); - assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); - assign #1 sign = p[`DIVLEN+3] ^ cout; -/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & - (ps[52]^pc[52])); - assign #1 sign = (ps[55]^pc[55])^ - (ps[54] & pc[54] | ((ps[54]^pc[54]) & - (ps[53]&pc[53] | ((ps[53]^pc[53]) & - (ps[52]&pc[52]))))); */ - - // Produce quotient = +1, 0, or -1 - assign #1 qp = magnitude & ~sign; - assign #1 qz = ~magnitude; - assign #1 qm = magnitude & sign; -endmodule - - /////////////////////////////////// // On-The-Fly Converter, Radix 2 // /////////////////////////////////// -module otfc4 #(parameter N=65) ( +module otfc4 ( input logic clk, - input logic Start, + input logic DivStart, input logic [3:0] q, - output logic [N-1:0] r + output logic [`DIVLEN+2:0] Quot ); // The on-the-fly converter transfers the quotient @@ -224,20 +279,20 @@ module otfc4 #(parameter N=65) ( // // This code follows the psuedocode presented in the // floating point chapter of the book. Right now, - // it is written for Radix-2 division. + // it is written for Radix-4 division. // // QM is Q-1. It allows us to write negative bits // without using a costly CPA. - logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux; + logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux; // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. - logic [N:0] QR, QMR; + logic [`DIVLEN:0] QR, QMR; // if starting a new divison set Q to 0 and QM to -1 - mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, Start, QMux); - mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, Start, QMMux); - flop #(N+3) Qreg(clk, QMux, Q); - flop #(N+3) QMreg(clk, QMMux, QM); + mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux); + mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux); + flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); + flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); // shift Q (quotent) and QM (quotent-1) // if q = 2 Q = {Q, 10} QM = {Q, 01} @@ -247,11 +302,9 @@ module otfc4 #(parameter N=65) ( // else if q = -2 Q = {QM, 10} QM = {QM, 01} // *** how does the 0 concatination numbers work? - - always_comb begin - QR = Q[N:0]; - QMR = QM[N:0]; // Shift Q and QM + QR = Quot[`DIVLEN:0]; + QMR = QM[`DIVLEN:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; QMNext = {QR, 2'b01}; @@ -269,7 +322,8 @@ module otfc4 #(parameter N=65) ( QMNext = {QMR, 2'b11}; end end - assign r = Q[N+2] ? Q[N+1:2] : Q[N:1]; + // Quot is in the range [.5, 2) so normalize the result if nesissary + // assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1]; endmodule @@ -287,7 +341,7 @@ module csa #(parameter N=69) ( // This block adds in1, in2, in3, and cin to produce // a result out1 / out2 in carry-save redundant form. // cin is just added to the least significant bit and - // is required to handle adding a negative divisor. + // is Startuired to handle adding a negative divisor. // Fortunately, the carry (out2) is shifted left by one // bit, leaving room in the least significant bit to // insert cin. @@ -302,22 +356,11 @@ endmodule // expcalc // ////////////// module expcalc( - input logic [`NE-1:0] XExp, YExp, - output logic [`NE-1:0] calcExp + input logic [`NE-1:0] XExpE, YExpE, + input logic XZeroE, + output logic [`NE:0] DivCalcExp ); - assign calcExp = XExp - YExp + (`NE)'(`BIAS); + assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}}; endmodule - -////////////// -// signcalc // -////////////// -module signcalc( - input logic XSign, YSign, - output logic calcSign -); - - assign calcSign = XSign ^ YSign; - -endmodule \ No newline at end of file diff --git a/pipelined/srt/testbench-radix4.sv b/pipelined/srt/testbench-radix4.sv index 6ac616ed6..434ef74b0 100644 --- a/pipelined/srt/testbench-radix4.sv +++ b/pipelined/srt/testbench-radix4.sv @@ -2,30 +2,6 @@ `include "wally-config.vh" `define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF) -///////////// -// counter // -///////////// -module counter(input logic clk, - input logic req, - output logic done); - - logic [5:0] count; - - // This block of control logic sequences the divider - // through its iterations. You may modify it if you - // build a divider which completes in fewer iterations. - // You are not responsible for the (trivial) circuit - // design of the block. - - always @(posedge clk) - begin - if (count == `DIVLEN/2+1) done <= #1 1; - else if (done | req) done <= #1 0; - if (req) count <= #1 0; - else count <= #1 count+1; - end -endmodule - /////////// // clock // /////////// @@ -43,7 +19,7 @@ endmodule module testbenchradix4; logic clk; logic req; - logic done; + logic DivDone; logic [63:0] a, b; logic [51:0] afrac, bfrac; logic [10:0] aExp, bExp; @@ -65,22 +41,20 @@ module testbenchradix4; logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a // bit field of an array logic [63:0] correctr, nextr, diffn, diffp; - logic [10:0] rExp; - logic rsign; + logic [10:0] DivExp; + logic DivSgn; integer testnum, errors; // Divider - srtradix4 srtradix4(.clk, .Start(req), - .Stall(1'b0), .Flush(1'b0), - .XExp(aExp), .YExp(bExp), .rExp, - .XSign(asign), .YSign(bsign), .rsign, + srtradix4 srtradix4(.clk, .DivStart(req), + .XExpE(aExp), .YExpE(bExp), .DivExp, + .XSgnE(asign), .YSgnE(bsign), .DivSgn, .XFrac(afrac), .YFrac(bfrac), - .SrcA('0), .SrcB('0), .Fmt(2'b00), - .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), - .Quot, .Rem(), .Flags()); + .SrcA('0), .SrcB('0), + .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone, + .Quot, .Rem()); // Counter - counter counter(clk, req, done); initial @@ -112,14 +86,14 @@ module testbenchradix4; always @(posedge clk) begin r = Quot[`DIVLEN-1:`DIVLEN - 52]; - if (done) begin + if (DivDone) begin req <= 1; diffp = correctr[51:0] - r; diffn = r - correctr[51:0]; - if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + if ((DivSgn !== correctr[63]) | (DivExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp begin errors = errors+1; - $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp); + $display("result was %h_%h, should be %h %h %h\n", DivExp, r, correctr, diffn, diffp); $display("failed\n"); $stop; end diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 4bae7d106..e8afb299b 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -48,13 +48,14 @@ module testbenchfp; logic XInf, YInf, ZInf; // is the input infinity logic XZero, YZero, ZZero; // is the input zero logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones - logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) + logic [`CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) logic IntZeroE; logic CvtResSgnE; - logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5; logic [`NE:0] CvtCalcExpE; // the calculated expoent - logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by + logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by + logic [`DIVLEN+2:0] Quot; logic CvtResDenormUfE; + logic DivStart, DivDone; // in-between FMA signals @@ -68,6 +69,8 @@ module testbenchfp; logic NegSumE; logic ZSgnEffE; logic PSgnE; + logic DivSgn; + logic [`NE:0] DivCalcExp; /////////////////////////////////////////////////////////////////////////////////////////////// @@ -205,16 +208,16 @@ module testbenchfp; Fmt = {Fmt, 2'b11}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the divide tests/op-ctrls/unit/fmt - // Tests = {Tests, f128div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b11}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f128div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested // // add the square-root tests/op-ctrls/unit/fmt // Tests = {Tests, f128sqrt}; @@ -332,16 +335,16 @@ module testbenchfp; Fmt = {Fmt, 2'b01}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f64div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b01}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted // // add the correct tests/op-ctrls/unit/fmt to their lists // Tests = {Tests, f64sqrt}; @@ -443,16 +446,16 @@ module testbenchfp; Fmt = {Fmt, 2'b00}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f32div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b00}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // // add the correct tests/op-ctrls/unit/fmt to their lists // Tests = {Tests, f32sqrt}; @@ -536,16 +539,16 @@ module testbenchfp; Fmt = {Fmt, 2'b10}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f16div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b10}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // // add the correct tests/op-ctrls/unit/fmt to their lists // Tests = {Tests, f16sqrt}; @@ -611,7 +614,7 @@ module testbenchfp; readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal), .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal), - .XManE(XMan), .YManE(YMan), .ZManE(ZMan), + .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart, .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), .XDenormE(XDenorm), .ZDenormE(ZDenorm), @@ -639,8 +642,8 @@ module testbenchfp; .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE); - postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]), - .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), + postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]), + .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp), .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE), @@ -650,21 +653,16 @@ module testbenchfp; .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes)); -fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), + fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE, .FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE); fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - // fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf), - // .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal), - // .CvtRes, .CvtFlgE); - // *** integrade divide and squareroot - // fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmVal[1:0]), .op_type(FOpCtrlQ), - // .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - // .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Res(FDivRes), .Flg(FDivFlg)); - + srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), + .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), + .DivDone, .Quot, .Rem()); + assign CmpFlg[3:0] = 0; // produce clock @@ -817,7 +815,7 @@ end /////////////////////////////////////////////////////////////////////////////////////////////// // check if the non-fma test is correct - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); @@ -840,8 +838,7 @@ end $stop; end - - VectorNum += 1; // increment the vector + if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file @@ -895,15 +892,17 @@ module readvectors ( output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XExpMaxE, + output logic XExpMaxE, + output logic DivStart, output logic [`FLEN-1:0] X, Y, Z ); // apply test vectors on rising edge of clk // Format of vectors Inputs(1/2/3)_AnsFlg - always @(posedge clk) begin + always @(VectorNum) begin #1; AnsFlg = TestVector[4:0]; + DivStart = 1'b0; case (Unit) `FMAUNIT: case (Fmt) @@ -972,21 +971,33 @@ module readvectors ( X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; Ans = TestVector[8+(`Q_LEN-1):8]; + if (~clk) #5; + DivStart = 1'b1; #10 // one clk cycle + DivStart = 1'b0; end 2'b01: begin // double X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; end 2'b00: begin // single X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; end 2'b10: begin // half X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + if (~clk) #5; + DivStart = 1'b1; #10 + DivStart = 1'b0; end endcase `CMPUNIT: diff --git a/pipelined/testbench/testbench.sv.bak b/pipelined/testbench/testbench.sv.bak deleted file mode 100644 index 8fdde9326..000000000 --- a/pipelined/testbench/testbench.sv.bak +++ /dev/null @@ -1,473 +0,0 @@ -/////////////////////////////////////////// -// testbench.sv -// -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: -// -// Purpose: Wally Testbench and helper modules -// Applies test programs from the riscv-arch-test and Imperas suites -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" -`include "tests.vh" - -module testbench; - parameter TESTSPERIPH = 0; // set to 0 for regression - parameter TESTSPRIV = 0; // set to 0 for regression - parameter DEBUG=0; - parameter TEST="none"; - - logic clk; - logic reset_ext, reset; - - parameter SIGNATURESIZE = 5000000; - - int test, i, errors, totalerrors; - logic [31:0] sig32[0:SIGNATURESIZE]; - logic [`XLEN-1:0] signature[0:SIGNATURESIZE]; - logic [`XLEN-1:0] testadr; - string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; - logic [31:0] InstrW; - -string tests[]; -logic [3:0] dummy; - - string ProgramAddrMapFile, ProgramLabelMapFile; - logic [`AHBW-1:0] HRDATAEXT; - logic HREADYEXT, HRESPEXT; - logic [31:0] HADDR; - logic [`AHBW-1:0] HWDATA; - logic HWRITE; - logic [2:0] HSIZE; - logic [2:0] HBURST; - logic [3:0] HPROT; - logic [1:0] HTRANS; - logic HMASTLOCK; - logic HCLK, HRESETn; - logic [`XLEN-1:0] PCW; - - logic DCacheFlushDone, DCacheFlushStart; - - flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW); - flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW); - - // check assertions for a legal configuration - riscvassertions riscvassertions(); - - // pick tests based on modes supported - initial begin - $display("TEST is %s", TEST); - //tests = '{}; - if (`XLEN == 64) begin // RV64 - case (TEST) - "arch64i": tests = arch64i; - "arch64priv": tests = arch64priv; - "arch64c": if (`C_SUPPORTED) - if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv}; - else tests = {arch64c}; - "arch64m": if (`M_SUPPORTED) tests = arch64m; - "arch64d": if (`D_SUPPORTED) tests = arch64d; - "imperas64i": tests = imperas64i; - "imperas64p": tests = imperas64p; -// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu; - "imperas64f": if (`F_SUPPORTED) tests = imperas64f; - "imperas64d": if (`D_SUPPORTED) tests = imperas64d; - "imperas64m": if (`M_SUPPORTED) tests = imperas64m; - "imperas64a": if (`A_SUPPORTED) tests = imperas64a; - "imperas64c": if (`C_SUPPORTED) tests = imperas64c; - else tests = imperas64iNOc; - "testsBP64": tests = testsBP64; - "wally64i": tests = wally64i; // *** redo - "wally64priv": tests = wally64priv;// *** redo - "imperas64periph": tests = imperas64periph; - "coremark": tests = coremark; - endcase - end else begin // RV32 - case (TEST) - "arch32i": tests = arch32i; - "arch32priv": tests = arch32priv; - "arch32c": if (`C_SUPPORTED) - if (`ZICSR_SUPPORTED) tests = {arch32c, arch32cpriv}; - else tests = {arch32c}; - "arch32m": if (`M_SUPPORTED) tests = arch32m; - "arch32f": if (`F_SUPPORTED) tests = arch32f; - "imperas32i": tests = imperas32i; - "imperas32p": tests = imperas32p; -// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu; - "imperas32f": if (`F_SUPPORTED) tests = imperas32f; - "imperas32m": if (`M_SUPPORTED) tests = imperas32m; - "imperas32a": if (`A_SUPPORTED) tests = imperas32a; - "imperas32c": if (`C_SUPPORTED) tests = imperas32c; - else tests = imperas32iNOc; - "wally32i": tests = wally32i; // *** redo - "wally32e": tests = wally32e; - "wally32priv": tests = wally32priv; // *** redo - "imperas32periph": tests = imperas32periph; - endcase - end - if (tests.size() == 0) begin - $display("TEST %s not supported in this configuration", TEST); - $stop; - end - end - - string signame, memfilename, pathname; - - logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn; - logic UARTSin, UARTSout; - - logic SDCCLK; - logic SDCCmdIn; - logic SDCCmdOut; - logic SDCCmdOE; - logic [3:0] SDCDatIn; - - logic HREADY; - logic HSELEXT; - - - // instantiate device to be tested - assign GPIOPinsIn = 0; - assign UARTSin = 1; - assign HREADYEXT = 1; - assign HRESPEXT = 0; - assign HRDATAEXT = 0; - - wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT, - .HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT, - .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, - .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); - - // Track names of instructions - instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE, - dut.core.ifu.FinalInstrRawF[31:0], - dut.core.ifu.InstrD, dut.core.ifu.InstrE, - dut.core.ifu.InstrM, InstrW, - InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); - - // initialize tests - localparam integer MemStartAddr = `RAM_BASE>>(1+`XLEN/32); - localparam integer MemEndAddr = (`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32); - - initial - begin - test = 1; - totalerrors = 0; - testadr = 0; - // fill memory with defined values to reduce Xs in simulation - // Quick note the memory will need to be initialized. The C library does not - // guarantee the initialized reads. For example a strcmp can read 6 byte - // strings, but uses a load double to read them in. If the last 2 bytes are - // not initialized the compare results in an 'x' which propagates through - // the design. - if (TEST == "coremark") - for (i=MemStartAddr; i= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); - assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); - assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (`IMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_LINELENINBITS >= 32 | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); - assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (`DMEM != `MEM_CACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (`IMEM != `MEM_CACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); - assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); - assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF"); - assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); - assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported"); - assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); -// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM"); - assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); - assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); - end -endmodule - - -/* verilator lint_on STMTDLY */ -/* verilator lint_on WIDTH */ - -module DCacheFlushFSM - (input logic clk, - input logic reset, - input logic start, - output logic done); - - genvar adr; - - logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)]; - - if(`DMEM == `MEM_CACHE) begin - localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; - localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; - localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; - localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; - localparam integer lognumlines = $clog2(numlines); - localparam integer loglinebytelen = $clog2(linebytelen); - localparam integer lognumways = $clog2(numways); - localparam integer tagstart = lognumlines + loglinebytelen; - - - - genvar index, way, cacheWord; - logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0]; - for(index = 0; index < numlines; index++) begin - for(way = 0; way < numways; way++) begin - for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin - copyShadow #(.tagstart(tagstart), - .loglinebytelen(loglinebytelen)) - copyShadow(.clk, - .start, - .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), - .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), - .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), - .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]), - .index(index), - .cacheWord(cacheWord), - .CacheData(CacheData[way][index][cacheWord]), - .CacheAdr(CacheAdr[way][index][cacheWord]), - .CacheTag(CacheTag[way][index][cacheWord]), - .CacheValid(CacheValid[way][index][cacheWord]), - .CacheDirty(CacheDirty[way][index][cacheWord])); - end - end - end - - integer i, j, k; - - always @(posedge clk) begin - if (start) begin #1 - #1 - for(i = 0; i < numlines; i++) begin - for(j = 0; j < numways; j++) begin - for(k = 0; k < numwords; k++) begin - if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin - ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; - end - end - end - end - end - end - - - end - flop #(1) doneReg(.clk, .d(start), .q(done)); -endmodule - -module copyShadow - #(parameter tagstart, loglinebytelen) - (input logic clk, - input logic start, - input logic [`PA_BITS-1:tagstart] tag, - input logic valid, dirty, - input logic [`XLEN-1:0] data, - input logic [32-1:0] index, - input logic [32-1:0] cacheWord, - output logic [`XLEN-1:0] CacheData, - output logic [`PA_BITS-1:0] CacheAdr, - output logic [`XLEN-1:0] CacheTag, - output logic CacheValid, - output logic CacheDirty); - - - always_ff @(posedge clk) begin - if(start) begin - CacheTag = tag; - CacheValid = valid; - CacheDirty = dirty; - CacheData = data; - CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8)); - end - end - -endmodule - diff --git a/synthDC/Makefile b/synthDC/Makefile index 193153cac..3de666659 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -15,6 +15,7 @@ export MAXCORES ?= 4 # MAXOPT turns on flattening, boundary optimization, and retiming # The output netlist is hard to interpret, but significantly better PPA export MAXOPT ?= 0 +export DRIVE ?= FLOP time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output index 278e0aa70..3cbf56ae5 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output @@ -1,7 +1,23 @@ +00000000 # test reset to zero 00000000 -00000000 -A5A5A5A5 +A5A5A5A5 # test output pins 5A5AFFFF -00000000 +00000000 # test input enables 5A5A0000 -A55A0000 +A55A0000 # test XOR +A55A0000 # Test interrupt pending bits: high_ip +5AA5FFFF # low_ip +00000000 # rise_ip +00000000 # fall_ip +A4AA0000 # input_val +A5FA0000 # high_ip +5BF5FFFF # low_ip +00A00000 # rise_ip +01500000 # fall_ip +00000000 # MEIP +00000000 # Test interrupts can be enabled without being triggered: MIP = 0 +00000000 # MIP = 0 +00000000 # MIP = 0 +00000000 # MIP = 0 +00000800 # Test interrupts can be enabled and triggered: MEIP set +00000000 # MEIP = 0 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index a72ae385a..0caad5d0b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -827,6 +827,28 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a addi a6, a6, 4 .endm +// Place this macro in peripheral tests to setup all the PLIC registers to generate external interrupts +.macro SETUP_PLIC + # Setup PLIC with a series of register writes + + .equ PLIC_INTPRI_GPIO, 0x0C00000C # GPIO is interrupt 3 + .equ PLIC_INTPRI_UART, 0x0C000028 # UART is interrupt 10 + .equ PLIC_INTPENDING0, 0x0C001000 # intPending0 register + .equ PLIC_INTEN00, 0x0C002000 # interrupt enables for context 0 (machine mode) sources 31:1 + .equ PLIC_INTEN10, 0x0C002080 # interrupt enables for context 1 (supervisor mode) sources 31:1 + .equ PLIC_THRESH0, 0x0C200000 # Priority threshold for context 0 (machine mode) + .equ PLIC_CLAIM0, 0x0C200004 # Claim/Complete register for context 0 + .equ PLIC_THRESH1, 0x0C201000 # Priority threshold for context 1 (supervisor mode) + .equ PLIC_CLAIM1, 0x0C201004 # Claim/Complete register for context 1 + + .4byte PLIC_THRESH0, 0, write32_test # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts + .4byte PLIC_THRESH1, 7, write32_test # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts + .4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority + .4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority + .4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode + .4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode +.endm + .macro END_TESTS // invokes one final ecall to return to machine mode then terminates this program, so the output is // 0x8: termination called from U mode @@ -937,6 +959,20 @@ read08_test: addi a6, a6, 4 j test_loop // go to next test case +readmip_test: // read the MIP into the signature + csrr t2, mip + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + +readsip_test: // read the MIP into the signature + csrr t2, sip + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + goto_s_mode: // return to address in t3, li a0, 3 // Trap handler behavior (go to supervisor mode) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index e4792a78c..be40c0e26 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -72,6 +72,7 @@ test_cases: .4byte input_val, 0x00000000, read32_test # input_val reset to zero .4byte input_en, 0x00000000, read32_test # input_en reset to zero +# *** add more # =========== Test output and input pins =========== @@ -86,14 +87,49 @@ test_cases: .4byte input_en, 0x00000000, write32_test # disable all input pins .4byte input_val, 0x00000000, read32_test # read 0 since input pins are disabled .4byte input_en, 0xFFFF0000, write32_test # enable a few input pins -.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above. +.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above. -# =========== Test output enables(?) =========== - -.4byte output_en, 0xFFFFFFFF, write32_test # undo changes made to output enable # =========== Test XOR functionality =========== .4byte out_xor, 0xFF00FF00, write32_test # invert certain pin values -.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working +.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working + +# =========== Test Interrupt Pending bits =========== + +SETUP_PLIC + +.4byte low_ip, 0xFFFFFFFF, write32_test # clear pending low interrupts +.4byte high_ip, 0xFFFFFFFF, write32_test # clear pending high interrupts +.4byte rise_ip, 0xFFFFFFFF, write32_test # clear pending rise interrupts +.4byte fall_ip, 0xFFFFFFFF, write32_test # clear pending fall interrupts +.4byte high_ip, 0xA55A0000, read32_test # check pending high interrupts +.4byte low_ip, 0x5AA5FFFF, read32_test # check pending low interrupts +.4byte rise_ip, 0x00000000, read32_test # check pending rise interrupts +.4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts +.4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts +.4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output +.4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) +.4byte low_ip, 0x5BF5FFFF, read32_test # low interrupt pending should be opposite high for enabled pins +.4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) +.4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) +.4byte 0x0, 0x00000000, readmip_test # Check no external interrupt has been generated + +# =========== Test interrupts can be enabled without being triggered =========== + +.4byte high_ie, 0x00010000, write32_test # enable high interrupt on bit 16, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending +.4byte low_ie, 0x00020000, write32_test # enable low interrupt on bit 17, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending +.4byte rise_ie, 0x00010000, write32_test # enable rise interrupt on bit 16, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending +.4byte fall_ie, 0x00010000, write32_test # enable fall interrupt on bit 16, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending + +# =========== Test interrupts can be enabled and triggered + +.4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending +.4byte 0x0, 0x00000800, readmip_test # MEIP should be raised +.4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending +.4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte 0x0, 0x0, terminate_test # terminate tests diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output index 7b23883c6..fd88590e3 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output @@ -254,12 +254,12 @@ FFFFEE00 FFFFEE00 00000000 00000000 -02BEEF10 +02BEEF10 # Something here is failing 0000000B 80000000 00000003 000000FF -FFFFFFFF +00000000 000000FF 00000000 00000000 @@ -270,20 +270,20 @@ FFFFFFFF FFFFFF00 00000000 00000000 -02BEEF11 +02BEEF11 # this might be wrong 0000000B 80000000 -00000003 -000000CC -CCCCCCCC -00000000 -00000000 -00000033 -00000000 -000000FF -000000CC -FFFFFF33 -FFFFFF33 +00000003 +00000033 # input +00000000 # output +00000000 # rise ip +00000000 # serviced rise ip +000000CC # fall ip +00000000 +000000FF # high ip +00000033 # why is this 0x33? +FFFFFFCC # low ip +FFFFFFCC # serviced low ip 00000000 00000000 03BEEF12 @@ -454,9 +454,9 @@ FFFFFF33 00080000 00080000 00000000 +00000000 # is it this one that's failing? 00000000 -00000000 -00080000 +00080000 # failing 00080000 FFFFFFFF FFF7FFFF @@ -478,7 +478,7 @@ FFFFFFFF FFFFFFFE 00000000 00000000 -04BEEF1E +04BEEF1E # this might also be wrong 00000009 80000000 0000000A diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S index c44d7a681..705875146 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S @@ -271,7 +271,7 @@ main_code: ##### sw t1, 0x04(t0) # raise all output_en sw t1, 0x08(t0) - # raise all input_en + # raise all rise_en sw t1, 0x18(t0) # ========== Execute Test ========== # set MEIE @@ -616,6 +616,9 @@ Intr02BEEF11: sw t1, 0x08(t0) # set initial output state sw x0, 0x0C(t0) + # clear XOR + li t1, 0x00000000 + sw t1, 0x40(t0) # clear all pending interrupts li t1, 0xFFFFFFFF sw t1, 0x1C(t0) @@ -843,7 +846,7 @@ Intr03BEEF1A: sw t1, 0x04(t0) # raise all output_en sw t1, 0x08(t0) - # raise all input_en + # raise all rise_en sw t1, 0x18(t0) # ========== Execute Test ========== # set MEIE and SEIE