From ecd733942a2d5c88c80fb19b3ffb351b3f7ff269 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 14 Jun 2022 22:04:38 +0000 Subject: [PATCH 01/26] Removed testbench.sv.bak --- pipelined/testbench/testbench.sv.bak | 473 --------------------------- 1 file changed, 473 deletions(-) delete mode 100644 pipelined/testbench/testbench.sv.bak diff --git a/pipelined/testbench/testbench.sv.bak b/pipelined/testbench/testbench.sv.bak deleted file mode 100644 index 8fdde932..00000000 --- a/pipelined/testbench/testbench.sv.bak +++ /dev/null @@ -1,473 +0,0 @@ -/////////////////////////////////////////// -// testbench.sv -// -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: -// -// Purpose: Wally Testbench and helper modules -// Applies test programs from the riscv-arch-test and Imperas suites -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" -`include "tests.vh" - -module testbench; - parameter TESTSPERIPH = 0; // set to 0 for regression - parameter TESTSPRIV = 0; // set to 0 for regression - parameter DEBUG=0; - parameter TEST="none"; - - logic clk; - logic reset_ext, reset; - - parameter SIGNATURESIZE = 5000000; - - int test, i, errors, totalerrors; - logic [31:0] sig32[0:SIGNATURESIZE]; - logic [`XLEN-1:0] signature[0:SIGNATURESIZE]; - logic [`XLEN-1:0] testadr; - string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; - logic [31:0] InstrW; - -string tests[]; -logic [3:0] dummy; - - string ProgramAddrMapFile, ProgramLabelMapFile; - logic [`AHBW-1:0] HRDATAEXT; - logic HREADYEXT, HRESPEXT; - logic [31:0] HADDR; - logic [`AHBW-1:0] HWDATA; - logic HWRITE; - logic [2:0] HSIZE; - logic [2:0] HBURST; - logic [3:0] HPROT; - logic [1:0] HTRANS; - logic HMASTLOCK; - logic HCLK, HRESETn; - logic [`XLEN-1:0] PCW; - - logic DCacheFlushDone, DCacheFlushStart; - - flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW); - flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW); - - // check assertions for a legal configuration - riscvassertions riscvassertions(); - - // pick tests based on modes supported - initial begin - $display("TEST is %s", TEST); - //tests = '{}; - if (`XLEN == 64) begin // RV64 - case (TEST) - "arch64i": tests = arch64i; - "arch64priv": tests = arch64priv; - "arch64c": if (`C_SUPPORTED) - if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv}; - else tests = {arch64c}; - "arch64m": if (`M_SUPPORTED) tests = arch64m; - "arch64d": if (`D_SUPPORTED) tests = arch64d; - "imperas64i": tests = imperas64i; - "imperas64p": tests = imperas64p; -// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu; - "imperas64f": if (`F_SUPPORTED) tests = imperas64f; - "imperas64d": if (`D_SUPPORTED) tests = imperas64d; - "imperas64m": if (`M_SUPPORTED) tests = imperas64m; - "imperas64a": if (`A_SUPPORTED) tests = imperas64a; - "imperas64c": if (`C_SUPPORTED) tests = imperas64c; - else tests = imperas64iNOc; - "testsBP64": tests = testsBP64; - "wally64i": tests = wally64i; // *** redo - "wally64priv": tests = wally64priv;// *** redo - "imperas64periph": tests = imperas64periph; - "coremark": tests = coremark; - endcase - end else begin // RV32 - case (TEST) - "arch32i": tests = arch32i; - "arch32priv": tests = arch32priv; - "arch32c": if (`C_SUPPORTED) - if (`ZICSR_SUPPORTED) tests = {arch32c, arch32cpriv}; - else tests = {arch32c}; - "arch32m": if (`M_SUPPORTED) tests = arch32m; - "arch32f": if (`F_SUPPORTED) tests = arch32f; - "imperas32i": tests = imperas32i; - "imperas32p": tests = imperas32p; -// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu; - "imperas32f": if (`F_SUPPORTED) tests = imperas32f; - "imperas32m": if (`M_SUPPORTED) tests = imperas32m; - "imperas32a": if (`A_SUPPORTED) tests = imperas32a; - "imperas32c": if (`C_SUPPORTED) tests = imperas32c; - else tests = imperas32iNOc; - "wally32i": tests = wally32i; // *** redo - "wally32e": tests = wally32e; - "wally32priv": tests = wally32priv; // *** redo - "imperas32periph": tests = imperas32periph; - endcase - end - if (tests.size() == 0) begin - $display("TEST %s not supported in this configuration", TEST); - $stop; - end - end - - string signame, memfilename, pathname; - - logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn; - logic UARTSin, UARTSout; - - logic SDCCLK; - logic SDCCmdIn; - logic SDCCmdOut; - logic SDCCmdOE; - logic [3:0] SDCDatIn; - - logic HREADY; - logic HSELEXT; - - - // instantiate device to be tested - assign GPIOPinsIn = 0; - assign UARTSin = 1; - assign HREADYEXT = 1; - assign HRESPEXT = 0; - assign HRDATAEXT = 0; - - wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT, - .HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT, - .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, - .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); - - // Track names of instructions - instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE, - dut.core.ifu.FinalInstrRawF[31:0], - dut.core.ifu.InstrD, dut.core.ifu.InstrE, - dut.core.ifu.InstrM, InstrW, - InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); - - // initialize tests - localparam integer MemStartAddr = `RAM_BASE>>(1+`XLEN/32); - localparam integer MemEndAddr = (`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32); - - initial - begin - test = 1; - totalerrors = 0; - testadr = 0; - // fill memory with defined values to reduce Xs in simulation - // Quick note the memory will need to be initialized. The C library does not - // guarantee the initialized reads. For example a strcmp can read 6 byte - // strings, but uses a load double to read them in. If the last 2 bytes are - // not initialized the compare results in an 'x' which propagates through - // the design. - if (TEST == "coremark") - for (i=MemStartAddr; i= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); - assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); - assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (`IMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_LINELENINBITS >= 32 | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); - assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (`DMEM != `MEM_CACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (`IMEM != `MEM_CACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); - assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); - assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF"); - assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); - assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported"); - assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); -// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM"); - assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); - assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); - end -endmodule - - -/* verilator lint_on STMTDLY */ -/* verilator lint_on WIDTH */ - -module DCacheFlushFSM - (input logic clk, - input logic reset, - input logic start, - output logic done); - - genvar adr; - - logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)]; - - if(`DMEM == `MEM_CACHE) begin - localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; - localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; - localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; - localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; - localparam integer lognumlines = $clog2(numlines); - localparam integer loglinebytelen = $clog2(linebytelen); - localparam integer lognumways = $clog2(numways); - localparam integer tagstart = lognumlines + loglinebytelen; - - - - genvar index, way, cacheWord; - logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0]; - for(index = 0; index < numlines; index++) begin - for(way = 0; way < numways; way++) begin - for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin - copyShadow #(.tagstart(tagstart), - .loglinebytelen(loglinebytelen)) - copyShadow(.clk, - .start, - .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), - .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), - .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), - .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]), - .index(index), - .cacheWord(cacheWord), - .CacheData(CacheData[way][index][cacheWord]), - .CacheAdr(CacheAdr[way][index][cacheWord]), - .CacheTag(CacheTag[way][index][cacheWord]), - .CacheValid(CacheValid[way][index][cacheWord]), - .CacheDirty(CacheDirty[way][index][cacheWord])); - end - end - end - - integer i, j, k; - - always @(posedge clk) begin - if (start) begin #1 - #1 - for(i = 0; i < numlines; i++) begin - for(j = 0; j < numways; j++) begin - for(k = 0; k < numwords; k++) begin - if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin - ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; - end - end - end - end - end - end - - - end - flop #(1) doneReg(.clk, .d(start), .q(done)); -endmodule - -module copyShadow - #(parameter tagstart, loglinebytelen) - (input logic clk, - input logic start, - input logic [`PA_BITS-1:tagstart] tag, - input logic valid, dirty, - input logic [`XLEN-1:0] data, - input logic [32-1:0] index, - input logic [32-1:0] cacheWord, - output logic [`XLEN-1:0] CacheData, - output logic [`PA_BITS-1:0] CacheAdr, - output logic [`XLEN-1:0] CacheTag, - output logic CacheValid, - output logic CacheDirty); - - - always_ff @(posedge clk) begin - if(start) begin - CacheTag = tag; - CacheValid = valid; - CacheDirty = dirty; - CacheData = data; - CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8)); - end - end - -endmodule - From cf56a0d76a47c7e6c2c8930ef314e016421fb08f Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Tue, 21 Jun 2022 15:39:04 -0700 Subject: [PATCH 02/26] fixed issue where the unused spike elf files were being used to find objdump files that didn't exist causing makefile-memfile to fail prematurely --- pipelined/regression/makefile-memfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/regression/makefile-memfile b/pipelined/regression/makefile-memfile index 892e6db9..c4196386 100644 --- a/pipelined/regression/makefile-memfile +++ b/pipelined/regression/makefile-memfile @@ -8,8 +8,9 @@ IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) ELFFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf") +OBJDUMPFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump") MEMFILES ?= $(ELFFILES:.elf=.elf.memfile) -ADDRFILES ?= $(ELFFILES:.elf=.elf.objdump.addr) +ADDRFILES ?= $(OBJDUMPFILES:.objdump=.objdump.addr) print: echo "files in $(ALLDIRS) are $(ELFFILES)." From c41391e228136849ee605b9327f5d9b5e543add3 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 21 Jun 2022 15:48:47 -0700 Subject: [PATCH 03/26] removed rv64fp from lint --- pipelined/regression/lint-wally | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/regression/lint-wally b/pipelined/regression/lint-wally index 2b5288d5..750486c4 100755 --- a/pipelined/regression/lint-wally +++ b/pipelined/regression/lint-wally @@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/ verilator=`which verilator` basepath=$(dirname $0)/.. -for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do +for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do echo "$config linting..." if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then echo "Exiting after $config lint due to errors or warnings" From e9f5778e2a5a55ec2e2e7034b0b105a16037edd2 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 21 Jun 2022 15:49:52 -0700 Subject: [PATCH 04/26] using memread for quotent select --- pipelined/srt/srt-radix4.do | 2 +- pipelined/srt/srt-radix4.sv | 142 ++++++++++++++++-------------- pipelined/srt/testbench-radix4.sv | 48 +++------- 3 files changed, 86 insertions(+), 106 deletions(-) diff --git a/pipelined/srt/srt-radix4.do b/pipelined/srt/srt-radix4.do index b213aa99..07dedfbf 100644 --- a/pipelined/srt/srt-radix4.do +++ b/pipelined/srt/srt-radix4.do @@ -17,7 +17,7 @@ if [file exists work] { } vlib work -vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv qsel4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv +vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv vopt +acc work.testbenchradix4 -o workopt vsim workopt diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index ccb6453c..6c9cd0fa 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -34,29 +34,24 @@ module srtradix4 ( input logic clk, - input logic Start, - input logic Stall, // *** multiple pipe stages - input logic Flush, // *** multiple pipe stages - // Floating Point Inputs - // later add exponents, signs, special cases - input logic XSign, YSign, - input logic [`NE-1:0] XExp, YExp, + input logic DivStart, + input logic XSgnE, YSgnE, + input logic [`NE-1:0] XExpE, YExpE, input logic [`NF-1:0] XFrac, YFrac, input logic [`XLEN-1:0] SrcA, SrcB, - input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide - output logic rsign, + output logic DivDone, + output logic DivSgn, output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers - output logic [`NE-1:0] rExp, - output logic [3:0] Flags + output logic [`NE-1:0] DivExp ); // logic qp, qz, qm; // quotient is +1, 0, or -1 logic [3:0] q; - logic [`NE-1:0] calcExp; + logic [`NE-1:0] DivCalcExp; logic calcSign; logic [`DIVLEN-1:0] X, Dpreproc; logic [`DIVLEN+3:0] WS, WSA, WSN; @@ -65,7 +60,7 @@ module srtradix4 ( logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; - srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); + srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -77,11 +72,11 @@ module srtradix4 ( // - otherwise load WSA into the flipflop // *** what does N and A stand for? // *** change shift amount for radix4 - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, Start, WSN); + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, DivStart, WSN); flop #(`DIVLEN+4) wsflop(clk, WSN, WS); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, Start, WCN); + mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC); - flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D); + flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -94,9 +89,9 @@ module srtradix4 ( // 0001 = -2 qsel4 qsel4(.D, .WS, .WC, .q); - // Store the expoenent and sign until division is done - flopen #(`NE) expflop(clk, Start, calcExp, rExp); - flopen #(1) signflop(clk, Start, calcSign, rsign); + // Store the expoenent and sign until division is DivDone + flopen #(`NE) expflop(clk, DivStart, DivCalcExp, DivExp); + flopen #(1) signflop(clk, DivStart, calcSign, DivSgn); // Divisor Selection logic // *** radix 4 change to choose -2 to 2 @@ -120,11 +115,13 @@ module srtradix4 ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); //*** change for radix 4 - otfc4 #(`DIVLEN) otfc4(clk, Start, q, Quot); + otfc4 #(`DIVLEN) otfc4(clk, DivStart, q, Quot); - expcalc expcalc(.XExp, .YExp, .calcExp); + expcalc expcalc(.XExpE, .YExpE, .DivCalcExp); - signcalc signcalc(.XSign, .YSign, .calcSign); + signcalc signcalc(.XSgnE, .YSgnE, .calcSign); + + counter counter(clk, DivStart, DivDone); endmodule @@ -132,13 +129,58 @@ endmodule // Submodules // //////////////// +///////////// +// counter // +///////////// +module counter(input logic clk, + input logic DivStart, + output logic DivDone); + + logic [5:0] count; + + // This block of control logic sequences the divider + // through its iterations. You may modify it if you + // build a divider which completes in fewer iterations. + // You are not responsible for the (trivial) circuit + // design of the block. + + always @(posedge clk) + begin + if (count == `DIVLEN/2+1) DivDone <= #1 1; + else if (DivDone | DivStart) DivDone <= #1 0; + if (DivStart) count <= #1 0; + else count <= #1 count+1; + end +endmodule + +module qsel4 ( + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] WS, WC, + output logic [3:0] q +); + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] Dmsbs; + assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; + assign Wmsbs = PreWmsbs[7:1]; + assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + // D = 0001.xxx... + // Dmsbs = | | + // W = xxxx.xxx... + // Wmsbs = | | + + logic [3:0] QSel4[1023:0]; + initial $readmemh("qslc_r4a2b.tv", QSel4); + assign q = QSel4[{Dmsbs,Wmsbs}]; + +endmodule + /////////////////// // Preprocessing // /////////////////// module srtpreproc ( input logic [`XLEN-1:0] SrcA, SrcB, input logic [`NF-1:0] XFrac, YFrac, - input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs @@ -173,48 +215,12 @@ module srtpreproc ( assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); endmodule -///////////////////////////////// -// Quotient Selection, Radix 2 // -///////////////////////////////// -module qsel2 ( // *** eventually just change to 4 bits - input logic [`DIVLEN+3:`DIVLEN] ps, pc, - output logic qp, qz, qm -); - - logic [`DIVLEN+3:`DIVLEN] p, g; - logic magnitude, sign, cout; - - // The quotient selection logic is presented for simplicity, not - // for efficiency. You can probably optimize your logic to - // select the proper divisor with less delay. - - // Quotient equations from EE371 lecture notes 13-20 - assign p = ps ^ pc; - assign g = ps & pc; - - assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); - assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); - assign #1 sign = p[`DIVLEN+3] ^ cout; -/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & - (ps[52]^pc[52])); - assign #1 sign = (ps[55]^pc[55])^ - (ps[54] & pc[54] | ((ps[54]^pc[54]) & - (ps[53]&pc[53] | ((ps[53]^pc[53]) & - (ps[52]&pc[52]))))); */ - - // Produce quotient = +1, 0, or -1 - assign #1 qp = magnitude & ~sign; - assign #1 qz = ~magnitude; - assign #1 qm = magnitude & sign; -endmodule - - /////////////////////////////////// // On-The-Fly Converter, Radix 2 // /////////////////////////////////// module otfc4 #(parameter N=65) ( input logic clk, - input logic Start, + input logic DivStart, input logic [3:0] q, output logic [N-1:0] r ); @@ -234,8 +240,8 @@ module otfc4 #(parameter N=65) ( // discard the r most significant bits of Q and QM. logic [N:0] QR, QMR; // if starting a new divison set Q to 0 and QM to -1 - mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, Start, QMux); - mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, Start, QMMux); + mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, DivStart, QMux); + mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, DivStart, QMMux); flop #(N+3) Qreg(clk, QMux, Q); flop #(N+3) QMreg(clk, QMMux, QM); @@ -287,7 +293,7 @@ module csa #(parameter N=69) ( // This block adds in1, in2, in3, and cin to produce // a result out1 / out2 in carry-save redundant form. // cin is just added to the least significant bit and - // is required to handle adding a negative divisor. + // is Startuired to handle adding a negative divisor. // Fortunately, the carry (out2) is shifted left by one // bit, leaving room in the least significant bit to // insert cin. @@ -302,11 +308,11 @@ endmodule // expcalc // ////////////// module expcalc( - input logic [`NE-1:0] XExp, YExp, - output logic [`NE-1:0] calcExp + input logic [`NE-1:0] XExpE, YExpE, + output logic [`NE-1:0] DivCalcExp ); - assign calcExp = XExp - YExp + (`NE)'(`BIAS); + assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS); endmodule @@ -314,10 +320,10 @@ endmodule // signcalc // ////////////// module signcalc( - input logic XSign, YSign, + input logic XSgnE, YSgnE, output logic calcSign ); - assign calcSign = XSign ^ YSign; + assign calcSign = XSgnE ^ YSgnE; endmodule \ No newline at end of file diff --git a/pipelined/srt/testbench-radix4.sv b/pipelined/srt/testbench-radix4.sv index 6ac616ed..0cea8059 100644 --- a/pipelined/srt/testbench-radix4.sv +++ b/pipelined/srt/testbench-radix4.sv @@ -2,30 +2,6 @@ `include "wally-config.vh" `define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF) -///////////// -// counter // -///////////// -module counter(input logic clk, - input logic req, - output logic done); - - logic [5:0] count; - - // This block of control logic sequences the divider - // through its iterations. You may modify it if you - // build a divider which completes in fewer iterations. - // You are not responsible for the (trivial) circuit - // design of the block. - - always @(posedge clk) - begin - if (count == `DIVLEN/2+1) done <= #1 1; - else if (done | req) done <= #1 0; - if (req) count <= #1 0; - else count <= #1 count+1; - end -endmodule - /////////// // clock // /////////// @@ -43,7 +19,7 @@ endmodule module testbenchradix4; logic clk; logic req; - logic done; + logic DivDone; logic [63:0] a, b; logic [51:0] afrac, bfrac; logic [10:0] aExp, bExp; @@ -65,22 +41,20 @@ module testbenchradix4; logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a // bit field of an array logic [63:0] correctr, nextr, diffn, diffp; - logic [10:0] rExp; - logic rsign; + logic [10:0] DivExp; + logic DivSgn; integer testnum, errors; // Divider - srtradix4 srtradix4(.clk, .Start(req), - .Stall(1'b0), .Flush(1'b0), - .XExp(aExp), .YExp(bExp), .rExp, - .XSign(asign), .YSign(bsign), .rsign, + srtradix4 srtradix4(.clk, .DivStart(req), + .XExpE(aExp), .YExpE(bExp), .DivExp, + .XSgnE(asign), .YSgnE(bsign), .DivSgn, .XFrac(afrac), .YFrac(bfrac), .SrcA('0), .SrcB('0), .Fmt(2'b00), - .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), - .Quot, .Rem(), .Flags()); + .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone, + .Quot, .Rem()); // Counter - counter counter(clk, req, done); initial @@ -112,14 +86,14 @@ module testbenchradix4; always @(posedge clk) begin r = Quot[`DIVLEN-1:`DIVLEN - 52]; - if (done) begin + if (DivDone) begin req <= 1; diffp = correctr[51:0] - r; diffn = r - correctr[51:0]; - if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + if ((DivSgn !== correctr[63]) | (DivExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp begin errors = errors+1; - $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp); + $display("result was %h_%h, should be %h %h %h\n", DivExp, r, correctr, diffn, diffp); $display("failed\n"); $stop; end From d291387b81e9a414f04a05e85bd6c1be62446702 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Tue, 21 Jun 2022 15:54:24 -0700 Subject: [PATCH 05/26] added individual makes for arch and wally tests as well as memfiles to Makefile. run using make archtests/wallytests/memfiles --- pipelined/regression/Makefile | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile index 0a9e7d99..5cf4d408 100644 --- a/pipelined/regression/Makefile +++ b/pipelined/regression/Makefile @@ -8,22 +8,16 @@ make clean: # make allclean -C ../../tests/imperas-riscv-tests make all: + make archtests + make wallytests + make memfiles # *** Build old tests/imperas-riscv-tests for now; # Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test # DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired #make -C ../../tests/imperas-riscv-tests --jobs #make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs - - # Build riscv-arch-test 64 and 32-bit versions - make -C ../../tests/riscof/ --jobs - make -C ../../tests/riscof/ XLEN=32 --jobs - # Build wally-riscv-arch-test - make -C ../../tests/wally-riscv-arch-test/ --jobs - make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs -# build the memfiles and address files. - make -f makefile-memfile wally-sim-files --jobs # Only compile Imperas tests if they are installed locally. # They are usually a symlink to $RISCV/imperas-riscv-tests and only @@ -36,4 +30,15 @@ make all: # Link Linux test vectors (fix this later***) #cd ../../tests/linux-testgen/linux-testvectors/;./tvLinker.sh - +make archtests: + # Build riscv-arch-test 64 and 32-bit versions + make -C ../../tests/riscof/ --jobs + make -C ../../tests/riscof/ XLEN=32 --jobs + +make wallytests: + # Build wally-riscv-arch-test + make -C ../../tests/wally-riscv-arch-test/ --jobs + make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs + +make memfiles: + make -f makefile-memfile wally-sim-files --jobs From 3d5645d6832dd642f59478b8b3911d326b874c41 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 21 Jun 2022 22:56:01 +0000 Subject: [PATCH 06/26] Trimmed lint-wally --- pipelined/regression/lint-wally | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/regression/lint-wally b/pipelined/regression/lint-wally index 2b5288d5..750486c4 100755 --- a/pipelined/regression/lint-wally +++ b/pipelined/regression/lint-wally @@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/ verilator=`which verilator` basepath=$(dirname $0)/.. -for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do +for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do echo "$config linting..." if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then echo "Exiting after $config lint due to errors or warnings" From 10b6ff39a820b1aff272ddd046e42004cf13201a Mon Sep 17 00:00:00 2001 From: slmnemo Date: Tue, 21 Jun 2022 16:10:18 -0700 Subject: [PATCH 07/26] changed order of makefiles and fixed warnings when running makes --- pipelined/regression/Makefile | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile index 5cf4d408..5ad72172 100644 --- a/pipelined/regression/Makefile +++ b/pipelined/regression/Makefile @@ -1,24 +1,9 @@ -make allclean: - make clean - make all - -make clean: - make clean -C ../../tests/riscof - make clean -C ../../tests/wally-riscv-arch-test -# make allclean -C ../../tests/imperas-riscv-tests - -make all: - make archtests - make wallytests - make memfiles +all: archtests wallytests memfiles # *** Build old tests/imperas-riscv-tests for now; # Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test # DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired #make -C ../../tests/imperas-riscv-tests --jobs #make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs - - - # Only compile Imperas tests if they are installed locally. # They are usually a symlink to $RISCV/imperas-riscv-tests and only # get compiled there manually during installation @@ -30,15 +15,22 @@ make all: # Link Linux test vectors (fix this later***) #cd ../../tests/linux-testgen/linux-testvectors/;./tvLinker.sh -make archtests: +allclean: clean all + +clean: + make clean -C ../../tests/riscof + make clean -C ../../tests/wally-riscv-arch-test +# make allclean -C ../../tests/imperas-riscv-tests + +archtests: # Build riscv-arch-test 64 and 32-bit versions make -C ../../tests/riscof/ --jobs make -C ../../tests/riscof/ XLEN=32 --jobs -make wallytests: +wallytests: # Build wally-riscv-arch-test make -C ../../tests/wally-riscv-arch-test/ --jobs make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs -make memfiles: +memfiles: make -f makefile-memfile wally-sim-files --jobs From 4a6dee59262308d5f3f54f989b48f3d27760b738 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 00:07:34 +0000 Subject: [PATCH 08/26] Testfloat running division - not passing --- pipelined/config/shared/wally-shared.vh | 9 +- pipelined/regression/testfloat.do | 2 +- pipelined/regression/wave-fpu.do | 15 + pipelined/src/fpu/cvtshiftcalc.sv | 8 +- pipelined/src/fpu/fcvt.sv | 20 +- pipelined/src/fpu/fpu.sv | 12 +- pipelined/src/fpu/postprocess.sv | 21 +- pipelined/src/fpu/resultsign.sv | 10 +- pipelined/srt/qsel4.dat | 1024 +++++++++++++++++++++++ pipelined/srt/qsel4.sv | 2 +- pipelined/srt/srt-radix4.sv | 58 +- pipelined/srt/testbench-radix4.sv | 2 +- pipelined/testbench/testbench-fp.sv | 129 +-- 13 files changed, 1173 insertions(+), 139 deletions(-) create mode 100644 pipelined/srt/qsel4.dat diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index afe822f4..3c2699da 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -94,11 +94,12 @@ `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS) // largest length in IEU/FPU -`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF) +`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF) `define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN) -`define LOGLGLEN $unsigned($clog2(`LGLEN+1)) -`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9)) -`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6)) +`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) +`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9)) +`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6)) +`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF) // Disable spurious Verilator warnings diff --git a/pipelined/regression/testfloat.do b/pipelined/regression/testfloat.do index 68c240c8..db694869 100644 --- a/pipelined/regression/testfloat.do +++ b/pipelined/regression/testfloat.do @@ -32,7 +32,7 @@ vlib work # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals # $num = the added words after the call -vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 vsim -voptargs=+acc work.testbenchfp -G TEST=$2 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 61b35a51..906eb256 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -7,3 +7,18 @@ add wave -noupdate /testbenchfp/Y add wave -noupdate /testbenchfp/Z add wave -noupdate /testbenchfp/Res add wave -noupdate /testbenchfp/Ans +add wave -noupdate /testbenchfp/DivStart +add wave -noupdate /testbenchfp/DivDone +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/* diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv index 899dffb7..ab054342 100644 --- a/pipelined/src/fpu/cvtshiftcalc.sv +++ b/pipelined/src/fpu/cvtshiftcalc.sv @@ -7,10 +7,10 @@ module cvtshiftcalc( input logic [`NE:0] CvtCalcExpM, // the calculated expoent input logic [`NF:0] XManM, // input mantissas input logic [`FMTBITS-1:0] OutFmt, // output format - input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) + input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) input logic CvtResDenormUfM, output logic CvtResUf, - output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted + output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted ); logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF) @@ -31,8 +31,8 @@ module cvtshiftcalc( // | `NF-1 zeros | Mantissa | 0's if nessisary | // - otherwise: // | LzcInM | 0's if nessisary | - assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : - CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} : + assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : + CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} : {CvtLzcInM, {`NF+1{1'b0}}}; diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index a7612280..26ca7dd8 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -12,11 +12,11 @@ module fcvt ( input logic XDenormE, // is the input denormalized input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half) output logic [`NE:0] CvtCalcExpE, // the calculated expoent - output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by + output logic [`LOGCVTLEN-1:0] CvtShiftAmtE, // how much to shift by output logic CvtResDenormUfE,// does the result underflow or is denormalized output logic CvtResSgnE, // the result's sign output logic IntZeroE, // is the integer zero? - output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder) + output logic [`CVTLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder) ); // OpCtrls: @@ -43,7 +43,7 @@ module fcvt ( logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? logic ToInt; // is the opperation an fp->int conversion? - logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC + logic [`LOGCVTLEN-1:0] ZeroCnt; // output from the LZC // seperate OpCtrl for code readability @@ -78,10 +78,10 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} : - {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; + assign CvtLzcInE = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} : + {XManE[`NF-1:0], {`CVTLEN-`NF{1'b0}}}; - lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt); + lzc #(`CVTLEN) lzc (.num(CvtLzcInE), .ZeroCnt); /////////////////////////////////////////////////////////////////////////// // shifter @@ -99,9 +99,9 @@ module fcvt ( // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} : - CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] : - (ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}}; + assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~CvtCalcExpE[`NE]}} : + CvtResDenormUfE&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+CvtCalcExpE[`LOGCVTLEN-1:0] : + (ZeroCnt+1)&{`LOGCVTLEN{XDenormE|IntToFp}}; /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -180,7 +180,7 @@ module fcvt ( // - shift left to normilize (-1-ZeroCnt) // - newBias to make the biased exponent // - assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})}; + assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (ZeroCnt&{`LOGCVTLEN{XDenormE|IntToFp}})}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index be73e9e7..b8a2e191 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -82,7 +82,7 @@ module fpu ( // unpacking signals logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM; // input's sign - memory stage + logic XSgnM, YSgnM; // input's sign - memory stage logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage logic [`NE-1:0] ZExpM; // input's exponent - memory stage logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage @@ -116,11 +116,11 @@ module fpu ( // Cvt Signals logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent - logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized logic CvtResSgnE, CvtResSgnM; // the result's sign logic IntZeroE, IntZeroM; // is the integer zero? - logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -317,7 +317,7 @@ module fpu ( // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM}); - flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM); + flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM}); flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); @@ -333,7 +333,7 @@ module fpu ( flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, {AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE}, {AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM}); - flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, + flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); @@ -351,7 +351,7 @@ module fpu ( assign FpLoadM = FResSelM[1]; - postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, + postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 26764734..c5392055 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" module postprocess( - input logic XSgnM, // input signs + input logic XSgnM, YSgnM, // input signs input logic [`NE-1:0] ZExpM, // input exponents input logic [`NF:0] XManM, YManM, ZManM, // input mantissas input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude @@ -52,12 +52,13 @@ module postprocess( input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count input logic [`NE:0] CvtCalcExpM, // the calculated expoent input logic CvtResDenormUfM, - input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by + input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by input logic CvtResSgnM, // the result's sign input logic FWriteIntM, // is fp->int (since it's writting to the integer register) - input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) + input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) input logic IntZeroM, // is the input zero input logic [1:0] PostProcSelM, // select result to be written to fp register + input logic [`DIVLEN-1:0] Quot, output logic [`FLEN-1:0] PostProcResM, // FMA final result output logic [4:0] PostProcFlgM, output logic [`XLEN-1:0] FCvtIntResM // the int conversion result @@ -75,7 +76,7 @@ module postprocess( logic [3*`NF+8:0] FmaShiftIn; // is the sum zero logic UfPlus1; // do you add one (for determining underflow flag) logic Round; // bits needed to determine rounding - logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted + logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted logic Mult; // multiply opperation logic [`FLEN:0] RoundAdd; // how much to add to the result logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results @@ -143,12 +144,12 @@ module postprocess( ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}}; end 2'b00: begin // cvt - ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM}; - ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}}; + ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmtM}; + ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end - 2'b01: begin //div - ShiftAmt = 0;//{DivShiftAmt}; - ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn}; + 2'b01: begin //div ***prob can take out + ShiftAmt = 1'b0;//{DivShiftAmt}; + ShiftIn = {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}}; end default: begin ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; @@ -181,7 +182,7 @@ module postprocess( resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky, .FmaOp, .DivOp, .CvtOp, .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, - .CvtResSgnM, .RoundSgn, .ResSgn); + .XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn, .ResSgn); /////////////////////////////////////////////////////////////////////////////// // Flags diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index c8862ff9..9a76cf8f 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -4,6 +4,8 @@ module resultsign( input logic [2:0] FrmM, input logic PSgnM, ZSgnEffM, input logic InvZM, + input logic XSgnM, + input logic YSgnM, input logic ZInfM, input logic InfIn, input logic NegSumM, @@ -25,6 +27,7 @@ module resultsign( logic FmaResSgn; logic FmaResSgnTmp; logic Underflow; + logic DivSgn; // logic ResultSgnTmp; // Determine the sign if the sum is zero @@ -43,9 +46,10 @@ module resultsign( assign InfSgn = ZInfM ? ZSgnEffM : PSgnM; assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp; - // Sign for rounding calulation - assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp); + assign DivSgn = XSgnM^YSgnM; - assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp); + // Sign for rounding calulation + assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp); + assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp); endmodule \ No newline at end of file diff --git a/pipelined/srt/qsel4.dat b/pipelined/srt/qsel4.dat new file mode 100644 index 00000000..b92d81e8 --- /dev/null +++ b/pipelined/srt/qsel4.dat @@ -0,0 +1,1024 @@ +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/pipelined/srt/qsel4.sv b/pipelined/srt/qsel4.sv index 069f4268..70b8b92d 100644 --- a/pipelined/srt/qsel4.sv +++ b/pipelined/srt/qsel4.sv @@ -11,7 +11,7 @@ module qsel4 ( logic [2:0] Dmsbs; assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; assign Wmsbs = PreWmsbs[7:1]; - assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; // D = 0001.xxx... // Dmsbs = | | // W = xxxx.xxx... diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index 6c9cd0fa..671c6350 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -30,12 +30,9 @@ `include "wally-config.vh" -`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF) - module srtradix4 ( input logic clk, input logic DivStart, - input logic XSgnE, YSgnE, input logic [`NE-1:0] XExpE, YExpE, input logic [`NF-1:0] XFrac, YFrac, input logic [`XLEN-1:0] SrcA, SrcB, @@ -44,8 +41,8 @@ module srtradix4 ( input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic DivDone, - output logic DivSgn, - output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers + output logic [`DIVLEN-1:0] Quot, + output logic [`XLEN-1:0] Rem, // *** later handle integers output logic [`NE-1:0] DivExp ); @@ -91,7 +88,6 @@ module srtradix4 ( // Store the expoenent and sign until division is DivDone flopen #(`NE) expflop(clk, DivStart, DivCalcExp, DivExp); - flopen #(1) signflop(clk, DivStart, calcSign, DivSgn); // Divisor Selection logic // *** radix 4 change to choose -2 to 2 @@ -115,13 +111,11 @@ module srtradix4 ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); //*** change for radix 4 - otfc4 #(`DIVLEN) otfc4(clk, DivStart, q, Quot); + otfc4 otfc4(clk, DivStart, q, Quot); expcalc expcalc(.XExpE, .YExpE, .DivCalcExp); - signcalc signcalc(.XSgnE, .YSgnE, .calcSign); - - counter counter(clk, DivStart, DivDone); + divcounter divcounter(clk, DivStart, DivDone); endmodule @@ -132,7 +126,7 @@ endmodule ///////////// // counter // ///////////// -module counter(input logic clk, +module divcounter(input logic clk, input logic DivStart, output logic DivDone); @@ -146,6 +140,7 @@ module counter(input logic clk, always @(posedge clk) begin + DivDone = 0; if (count == `DIVLEN/2+1) DivDone <= #1 1; else if (DivDone | DivStart) DivDone <= #1 0; if (DivStart) count <= #1 0; @@ -170,7 +165,7 @@ module qsel4 ( // Wmsbs = | | logic [3:0] QSel4[1023:0]; - initial $readmemh("qslc_r4a2b.tv", QSel4); + initial $readmemh("../srt/qsel4.dat", QSel4); assign q = QSel4[{Dmsbs,Wmsbs}]; endmodule @@ -218,11 +213,11 @@ endmodule /////////////////////////////////// // On-The-Fly Converter, Radix 2 // /////////////////////////////////// -module otfc4 #(parameter N=65) ( +module otfc4 ( input logic clk, input logic DivStart, input logic [3:0] q, - output logic [N-1:0] r + output logic [`DIVLEN-1:0] Quot ); // The on-the-fly converter transfers the quotient @@ -230,20 +225,20 @@ module otfc4 #(parameter N=65) ( // // This code follows the psuedocode presented in the // floating point chapter of the book. Right now, - // it is written for Radix-2 division. + // it is written for Radix-4 division. // // QM is Q-1. It allows us to write negative bits // without using a costly CPA. - logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux; + logic [`DIVLEN+2:0] Q, QM, QNext, QMNext, QMux, QMMux; // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. - logic [N:0] QR, QMR; + logic [`DIVLEN:0] QR, QMR; // if starting a new divison set Q to 0 and QM to -1 - mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, DivStart, QMux); - mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, DivStart, QMMux); - flop #(N+3) Qreg(clk, QMux, Q); - flop #(N+3) QMreg(clk, QMMux, QM); + mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux); + mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux); + flop #(`DIVLEN+3) Qreg(clk, QMux, Q); + flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); // shift Q (quotent) and QM (quotent-1) // if q = 2 Q = {Q, 10} QM = {Q, 01} @@ -253,11 +248,9 @@ module otfc4 #(parameter N=65) ( // else if q = -2 Q = {QM, 10} QM = {QM, 01} // *** how does the 0 concatination numbers work? - - always_comb begin - QR = Q[N:0]; - QMR = QM[N:0]; // Shift Q and QM + QR = Q[`DIVLEN:0]; + QMR = QM[`DIVLEN:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; QMNext = {QR, 2'b01}; @@ -275,7 +268,8 @@ module otfc4 #(parameter N=65) ( QMNext = {QMR, 2'b11}; end end - assign r = Q[N+2] ? Q[N+1:2] : Q[N:1]; + // Quot is in the range [.5, 2) so normalize the result if nesissary + assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1]; endmodule @@ -315,15 +309,3 @@ module expcalc( assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS); endmodule - -////////////// -// signcalc // -////////////// -module signcalc( - input logic XSgnE, YSgnE, - output logic calcSign -); - - assign calcSign = XSgnE ^ YSgnE; - -endmodule \ No newline at end of file diff --git a/pipelined/srt/testbench-radix4.sv b/pipelined/srt/testbench-radix4.sv index 0cea8059..434ef74b 100644 --- a/pipelined/srt/testbench-radix4.sv +++ b/pipelined/srt/testbench-radix4.sv @@ -50,7 +50,7 @@ module testbenchradix4; .XExpE(aExp), .YExpE(bExp), .DivExp, .XSgnE(asign), .YSgnE(bsign), .DivSgn, .XFrac(afrac), .YFrac(bfrac), - .SrcA('0), .SrcB('0), .Fmt(2'b00), + .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone, .Quot, .Rem()); diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 4bae7d10..748670b4 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -48,13 +48,13 @@ module testbenchfp; logic XInf, YInf, ZInf; // is the input infinity logic XZero, YZero, ZZero; // is the input zero logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones - logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) + logic [`CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) logic IntZeroE; logic CvtResSgnE; - logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5; logic [`NE:0] CvtCalcExpE; // the calculated expoent - logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by + logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by logic CvtResDenormUfE; + logic DivStart, DivDone; // in-between FMA signals @@ -68,6 +68,9 @@ module testbenchfp; logic NegSumE; logic ZSgnEffE; logic PSgnE; + logic DivSgn; + logic [`DIVLEN-1:0] Quot; + logic [`NE-1:0] DivExp; /////////////////////////////////////////////////////////////////////////////////////////////// @@ -205,16 +208,16 @@ module testbenchfp; Fmt = {Fmt, 2'b11}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the divide tests/op-ctrls/unit/fmt - // Tests = {Tests, f128div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b11}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f128div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested // // add the square-root tests/op-ctrls/unit/fmt // Tests = {Tests, f128sqrt}; @@ -332,16 +335,16 @@ module testbenchfp; Fmt = {Fmt, 2'b01}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f64div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b01}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted // // add the correct tests/op-ctrls/unit/fmt to their lists // Tests = {Tests, f64sqrt}; @@ -443,16 +446,16 @@ module testbenchfp; Fmt = {Fmt, 2'b00}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f32div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b00}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // // add the correct tests/op-ctrls/unit/fmt to their lists // Tests = {Tests, f32sqrt}; @@ -536,16 +539,16 @@ module testbenchfp; Fmt = {Fmt, 2'b10}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f16div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b10}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // // add the correct tests/op-ctrls/unit/fmt to their lists // Tests = {Tests, f16sqrt}; @@ -611,7 +614,7 @@ module testbenchfp; readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal), .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal), - .XManE(XMan), .YManE(YMan), .ZManE(ZMan), + .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart, .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), .XDenormE(XDenorm), .ZDenormE(ZDenorm), @@ -639,8 +642,8 @@ module testbenchfp; .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE); - postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]), - .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), + postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]), + .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE), @@ -650,21 +653,16 @@ module testbenchfp; .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes)); -fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), + fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE, .FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE); fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - // fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf), - // .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal), - // .CvtRes, .CvtFlgE); - // *** integrade divide and squareroot - // fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmVal[1:0]), .op_type(FOpCtrlQ), - // .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - // .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Res(FDivRes), .Flg(FDivFlg)); - + srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivExp, + .XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), + .DivDone, .Quot, .Rem()); + assign CmpFlg[3:0] = 0; // produce clock @@ -817,7 +815,7 @@ end /////////////////////////////////////////////////////////////////////////////////////////////// // check if the non-fma test is correct - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); @@ -840,8 +838,7 @@ end $stop; end - - VectorNum += 1; // increment the vector + if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file @@ -895,15 +892,17 @@ module readvectors ( output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XExpMaxE, + output logic XExpMaxE, + output logic DivStart, output logic [`FLEN-1:0] X, Y, Z ); // apply test vectors on rising edge of clk // Format of vectors Inputs(1/2/3)_AnsFlg - always @(posedge clk) begin + always @(TestNum) begin #1; AnsFlg = TestVector[4:0]; + DivStart = 1'b0; case (Unit) `FMAUNIT: case (Fmt) @@ -972,21 +971,29 @@ module readvectors ( X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; Ans = TestVector[8+(`Q_LEN-1):8]; + DivStart = 1'b1; #10 // one clk cycle + DivStart = 1'b0; end 2'b01: begin // double X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + DivStart = 1'b1; #10 + DivStart = 1'b0; end 2'b00: begin // single X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + DivStart = 1'b1; #10 + DivStart = 1'b0; end 2'b10: begin // half X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + DivStart = 1'b1; #10 + DivStart = 1'b0; end endcase `CMPUNIT: From 49067792dcf2a07aa177aa9639f0417b1e7edfdd Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 16:11:50 +0000 Subject: [PATCH 09/26] fixt lint error --- pipelined/src/fpu/fpu.sv | 38 ++++++++++++++++------------- pipelined/src/fpu/postprocess.sv | 5 ++-- pipelined/src/fpu/round.sv | 3 ++- pipelined/srt/srt-radix4.sv | 9 +++---- pipelined/testbench/testbench-fp.sv | 6 ++--- 5 files changed, 33 insertions(+), 28 deletions(-) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index b8a2e191..da46d73e 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -104,23 +104,27 @@ module fpu ( logic FOpCtrlQ; // Fma Signals - logic [3*`NF+5:0] SumE, SumM; - logic [`NE+1:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic KillProdE, KillProdM; - logic InvZE, InvZM; - logic NegSumE, NegSumM; - logic ZSgnEffE, ZSgnEffM; - logic PSgnE, PSgnM; - logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; + logic [3*`NF+5:0] SumE, SumM; + logic [`NE+1:0] ProdExpE, ProdExpM; + logic AddendStickyE, AddendStickyM; + logic KillProdE, KillProdM; + logic InvZE, InvZM; + logic NegSumE, NegSumM; + logic ZSgnEffE, ZSgnEffM; + logic PSgnE, PSgnM; + logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; // Cvt Signals - logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent - logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by - logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized - logic CvtResSgnE, CvtResSgnM; // the result's sign - logic IntZeroE, IntZeroM; // is the integer zero? - logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized + logic CvtResSgnE, CvtResSgnM; // the result's sign + logic IntZeroE, IntZeroM; // is the integer zero? + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + + //divide signals + logic [`DIVLEN-1:0] Quot; + logic [`NE:0] DivCalcExpM; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -352,8 +356,8 @@ module fpu ( assign FpLoadM = FResSelM[1]; postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, - .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, - .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, + .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot, + .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM, .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM); diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index c5392055..4b2870da 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -51,6 +51,7 @@ module postprocess( input logic [2:0] FOpCtrlM, // choose which opperation (look below for values) input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count input logic [`NE:0] CvtCalcExpM, // the calculated expoent + input logic [`NE:0] DivCalcExpM, // the calculated expoent input logic CvtResDenormUfM, input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by input logic CvtResSgnM, // the result's sign @@ -148,7 +149,7 @@ module postprocess( ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end 2'b01: begin //div ***prob can take out - ShiftAmt = 1'b0;//{DivShiftAmt}; + ShiftAmt = {$clog2(`NORMSHIFTSZ){1'b0}};//{DivShiftAmt}; ShiftIn = {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}}; end default: begin @@ -172,7 +173,7 @@ module postprocess( // round to infinity // round to nearest max magnitude - round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, + round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .DivCalcExpM, .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 92f1d4c2..8e3b9fe4 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -23,6 +23,7 @@ module round( input logic [`NE+1:0] SumExp, // exponent of the normalized sum input logic RoundSgn, // the result's sign input logic [`NE:0] CvtCalcExpM, // the calculated expoent + input logic [`NE:0] DivCalcExpM, // the calculated expoent output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow output logic [`NF-1:0] ResFrac, // Result fraction @@ -303,7 +304,7 @@ module round( case(PostProcSelM) 2'b10: RoundExp = SumExp; // fma 2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt - 2'b01: RoundExp = 0; // divide + 2'b01: RoundExp = {DivCalcExpM[`NE], DivCalcExpM[`NE:0]}; // divide default: RoundExp = 0; endcase diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index 671c6350..6894a0f9 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -43,13 +43,12 @@ module srtradix4 ( output logic DivDone, output logic [`DIVLEN-1:0] Quot, output logic [`XLEN-1:0] Rem, // *** later handle integers - output logic [`NE-1:0] DivExp + output logic [`NE:0] DivCalcExpE ); // logic qp, qz, qm; // quotient is +1, 0, or -1 logic [3:0] q; - logic [`NE-1:0] DivCalcExp; - logic calcSign; + logic [`NE:0] DivCalcExp; logic [`DIVLEN-1:0] X, Dpreproc; logic [`DIVLEN+3:0] WS, WSA, WSN; logic [`DIVLEN+3:0] WC, WCA, WCN; @@ -87,7 +86,7 @@ module srtradix4 ( qsel4 qsel4(.D, .WS, .WC, .q); // Store the expoenent and sign until division is DivDone - flopen #(`NE) expflop(clk, DivStart, DivCalcExp, DivExp); + flopen #(`NE+1) expflop(clk, DivStart, DivCalcExp, DivCalcExpE); // Divisor Selection logic // *** radix 4 change to choose -2 to 2 @@ -303,7 +302,7 @@ endmodule ////////////// module expcalc( input logic [`NE-1:0] XExpE, YExpE, - output logic [`NE-1:0] DivCalcExp + output logic [`NE:0] DivCalcExp ); assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS); diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 748670b4..70787b3c 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -70,7 +70,7 @@ module testbenchfp; logic PSgnE; logic DivSgn; logic [`DIVLEN-1:0] Quot; - logic [`NE-1:0] DivExp; + logic [`NE:0] DivCalcExp; /////////////////////////////////////////////////////////////////////////////////////////////// @@ -643,7 +643,7 @@ module testbenchfp; .ProdExpE, .AddendStickyE, .KillProdE); postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]), - .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, + .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp), .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE), @@ -659,7 +659,7 @@ module testbenchfp; fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivExp, + srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .DivDone, .Quot, .Rem()); From 001e8e077d7921e7f4fce11e6e896410c79564b8 Mon Sep 17 00:00:00 2001 From: James Stine Date: Thu, 23 Jun 2022 11:46:44 -0500 Subject: [PATCH 10/26] Add sqrt qlsc table generator --- pipelined/srt/Makefile | 8 +- pipelined/srt/qslc_sqrt_r4a2 | Bin 0 -> 16152 bytes pipelined/srt/qslc_sqrt_r4a2.c | 198 ++++++ pipelined/srt/qslc_sqrt_r4a2.sv | 1026 +++++++++++++++++++++++++++++++ 4 files changed, 1230 insertions(+), 2 deletions(-) create mode 100755 pipelined/srt/qslc_sqrt_r4a2 create mode 100644 pipelined/srt/qslc_sqrt_r4a2.c create mode 100644 pipelined/srt/qslc_sqrt_r4a2.sv diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile index 63146339..49b21be7 100644 --- a/pipelined/srt/Makefile +++ b/pipelined/srt/Makefile @@ -1,4 +1,4 @@ -all: exptestgen testgen qslc_r4a2 qslc_r4a2b +all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen: sqrttestgen.c gcc sqrttestgen.c -o sqrttestgen -lm @@ -19,5 +19,9 @@ qslc_r4a2b: qslc_r4a2b.c gcc qslc_r4a2b.c -o qslc_r4a2b -lm ./qslc_r4a2b > qslc_r4a2b.tv +qslc_sqrt_r4a2: qslc_sqrt_r4a2.c + gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm + ./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv + clean: - rm -f testgen exptestgen qslc_r4a2 + rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2 new file mode 100755 index 0000000000000000000000000000000000000000..047de1ba3bf6e4421c0a8c69adbcce1eb2e9b293 GIT binary patch literal 16152 zcmeHOdvsLA8J`V=BnWIkP(X$CfdY~xfrKIw36F)12b4lM;`3&c-6VTVvg_`Khf0V> zl$;n#ZL6{NwAACX=&4n!^yraAlt)inqqR1D8WlYotr7xiM0ET6X6D;WZWG(n_OEv5 zoO{2S@B96}nfdPAduMj%ZuP8Op5br^PQ%0{f=cmRr76*d_21ALh!QbhjK+DYm?B1? zJY4FOy+jFERqD(bD6_SG7$or~Qf3f&rcw)*+(MGX8!BaIDGN)X`$4=+GU>8izCBX$ zSh77;pG(<=O+H8fB$g(RVzVPpn#GRdfwD=*WxJNlqayKkYTi!GV<{M~CC8IuLdSd^ zPrJ;AjS@>cuSWCia+#tDmQ)9&weQ9J|4x0YHLu+z-3Yr}sMLZb*Y{2EC@%kM;>)~7 z*EiI42 z;-u4I6d6sb%>KwWUfT&@R#aN{!kY(16mP!!(^-?3JpY`3-+aPQyNN>>O5~3(Ih84& zgM%+qrZ^k+fu!eLLdpm5oh zC|`=>oI&)$DlJ=O`2&$abubzWM9Nkztqs)&%6t{I0byCyb)kAI8uLYBmZhMB0>hz= z09S7ESyjP$Uv2P4;$kFJYHpn`STDj2u_&D?YkUzAjs)vtRRWkg8XWT|SE+IZyCK=)}0weMBhwuBHW0k&LKVMj1uJzd^%FcQ&cP>(T8hhrr>@xIx z4-hh8==GynI_NR(F}D!MtK811x1kZ}U|aDd zaJ_Ar<49c87n^{tzX=d^Jtx`aoZ3XVeOdzFO~$sOcSy|m%G=W8-TUz(@7}&)UdMj# zYhT62!@)z^L3XmM$~hJNVm`&EshG^7;lg#^=Hgo^>uq^2Hp<&td^-w>?yr)`gdf4# zpScBP$F<-N;CF9?ksPf94;L%H*E!acXxVbOrJ<|Wnd?a$b2g5CdW;ZN&a%B^>umLO zeb!m!JQDA9&WxjZ4@=|pgH>Ka83i+G{uaEs^ohhz5S zqmU>EuGw?k(ds#Vv8Su?gYL(nC9j}(5*ude3Ot8V!?FQ2EC&-UTb1Z_u9q!JnC{Er za6t3229R^OdEORJAN8+K)i+AD>X%<8lOImnQ$Q_z5qTMbUgtH0MOcgmw;$SPL@n3( zZ00%mRi(MBZ+qq>WSq}sj)SP0mUXhKB{K_IjEa+^=;|Cu#NhdW=Oii--K|)39PsqX z{H4eP_Mw99CS9TiSU7(LYu4tjEa$U`&yhojJcYaQ;EROWL$`p6{#3);x;)85pQNX4d|}Nm=`fRPD`uS#R5%i}D+GarCm(V9n=tzRz1}J>oXBv)v0mJpm zN2%fC4B4`)Oz3`smMG{H1FAYvXhQEN=mZ5lHnYFKpURevF`;3Cet=ac`g_oT9+uF= zNFy{Jg6;tnop{4E%p$|X%ExVnkEyZ~-!q{Og5IQ{R~k^&iI53>^8}!)71V7&kIR;o zn$TSYou;6}3}}~xo@+vz2zmk|8J%#NhH5gr4FdwPXGVX_R3|#Hj!zAzg#>+8LBDH2 zRVRL8LdO!cRY8LWRCVGu6Z-z=fR-!hB?k1bvJ=;t(B}zS1SmRjscCqC3}-4IC#Lsz z;#_$pPBEby33?Q-7xed41A2;tzK7p4QvF>?(ESSfpaE5#*keM^Cg}YNy4isKS$5(+ z6Z&Z%p!Aa^ICMgwur967+rbi2VXj$y6ZLlHu1d zAP~C@9~a7&y+ala$n8sPE` z-0Kpz+QdCUxS3$n`yy4#y`&uvt=rVz0xfh?lrFIbMO5(Wv{ZbWyS6sR9XLDU9esEW zBG)$l74(Z@@Cb19s?F)Z%l7*yG<*6S&0G4mI`4WM*BkH8ZKUkVk}k54X?Y3CuvGTt zbZpNINYY7=PG&jpx*jC7=I%_{?!?QVB$M)mb!eZQQ_4AQY8FdvNT+lJ(h*2UARPfS z0`xzUnQ|(9(ZKYZmR()6VD7cEuBeMvM7PWkIXMOS7gUG^t{Ys%u0rGr^7HdWKF$Ie zIu*!VfmpwOy|`H91nT|dNQfUDOD6rtlgSx)K7WkkX}mQy;eA(($9D!MJUVSP+kkwD zd-5HR;FXE*~@TQs*ffvzx@C$qQ!A zJ?|V+6aN|<^MR8@QX-K)h9e*P5=c%|*PS>PqpkB$zwnI7y**>;=n=#69B7hk%i~aA zfj%6^#G>E+VHVlbDII}y1kw>mM<5-6bOh27NJk(Yfpi4^cO$_2@^~K}Eg2~B0*T8l zN-gkLjY@hkGFlhkn5;6BbeZ@6ouSLTvd}QW`v7UZLW%94e3cAQ`APgRP6ncjLG@^9665>%^G3yh$ zVchPx#^GmsnYjKby39EIry~1@xJniNoRj5t?GNiyS>|?O87Iq)+KwfxEOS4!`~R9q z?fbTQ%-7euM9bA$mTPH)|I=c2iGr+Hx^#hS`nrmS`dEXjpuj!Hou5~5QG+ZO%+Q*z z@kZlwxHy`$L+Cmy&}rtv$B>5RddOy-yWIndah5dcJS8T9hK= z5R*l2iPG@>L%XXT;1Vvyc{Y3K zmxKA)FY%`f`#5-2(oRk7pJy-cNc_aq{lxk{WX2*6<4crA-@f7`ii7P3&qbgH)8|3& zQZ5FZTcsXPo00))Q96X5tDw&nlTz1(S-v6gVbW)e75rRd{DvX$E!w}mJ@-KGVzMD0 zf_`kqB*FIqo1P*)MM9G}ZhN86MLZ{#FjC2HK|cN%Sr*_)?9D&}&P7RrZ2iG}9nX~6XKQ04r58M4E)>cMh1?~{` zz56PHR?Jr|poqpAs;bJUjDE9=77EuNMWU?fZrEGC83S|>WhSog8qu2 zUoR^rxccq@tr++LgXNEgtQudvpFVCVy&Nciu-tpy;cKK|h5U_!-2&rqSgP!X4dQ7xPs>#v)4Aa1P%@1R`PKt`Efm?&|sm zcQ_IX2O_b}hN_|=h_7pceyv)(q%@C4n}pZ+qBX+p-&~I>l#E4`WkVnm4TkCm6fD>x zfm$CCbS_*Q6K>f|H?r>P5Q>zE1}cR+7TAO$4L_hm@~XH4HG06+_~DI9%7+?xiotnQ z<*N(AnW7`y!i^zchmp*;*ncExo`zc*?;@4#-^uYjO`9c_^CZv3L31PK5A{75VJY!^ zNR@<`hm6U$e-}o-mz#;7i>#yfuF2>3K1&w^wY2lEfF6Gpgt+}Y|76Ma^Ze7^ejoTW z_hdfLPg(ZpdZ~>RXXf)fF$f(^RhZB7U6wq*1w_7FM(eE2xhiEslt(+(M0~{vbrFks5|IE?mR5g#-_+brT$@ro8PuloRn!vJziCVHh zmcOv^@^_qeJeOG>1;=Udd41cS zN8b9!t19)Eng1mWG`C_i-+wu16N4uY>ehp{0g8ApCG%7<_N~Ywoqb%w? yUpwUIFn#|?Q(tZmKYwVw$Zc0lb +#include + +#define DIVISOR_SIZE 3 +#define CARRY_SIZE 7 +#define SUM_SIZE 7 +#define TOT_SIZE 7 + +void disp_binary(double, int, int); + +struct bits { + unsigned int divisor : DIVISOR_SIZE; + int tot : TOT_SIZE; +} pla; + +/* + + Function: disp_binary + Description: This function displays a Double-Precision number into + four 16 bit integers using the global union variable + dp_number + Argument List: double x The value to be converted + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point + Return value: none + +*/ +void disp_binary(double x, int bits_to_left, int bits_to_right) { + int i; + double diff; + + if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + printf("0"); + } + if (i == bits_to_right+1) + ; + + return; + } + + if (x < 0.0) + x = pow(2.0, ((double) bits_to_left)) + x; + + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, ((double) -i) ); + if (x < diff) + printf("0"); + else { + printf("1"); + x -= diff; + } + if (i == 0) + ; + + } + +} + +int main() { + int m; + int n; + int o; + pla.divisor = 0; + pla.tot = 0; + printf("\tcase({D[5:3],Wmsbs})\n"); + for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { + for (m=0; m < pow(2.0, TOT_SIZE); m++) { + printf("\t\t10'b"); + disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); + printf("_"); + disp_binary((double) pla.tot, TOT_SIZE, 0); + printf(": q = 4'b"); + + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + */ + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 24) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -26) + printf("0010"); + else + printf("0001"); + break; + case 1: + if ((pla.tot) >= 28) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -10) + printf("0000"); + else if ((pla.tot) >= -28) + printf("0010"); + else + printf("0001"); + break; + case 2: + if ((pla.tot) >= 32) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -32) + printf("0010"); + else + printf("0001"); + break; + case 3: + if ((pla.tot) >= 32) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -34) + printf("0010"); + else + printf("0001"); + break; + case 4: + if ((pla.tot) >= 36) + printf("1000"); + else if ((pla.tot) >= 12) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -36) + printf("0010"); + else + printf("0001"); + break; + case 5: + if ((pla.tot) >= 40) + printf("1000"); + else if ((pla.tot) >= 12) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -40) + printf("0010"); + else + printf("0001"); + break; + case 6: + if ((pla.tot) >= 40) + printf("1000"); + else if ((pla.tot) >= 16) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -44) + printf("0010"); + else + printf("0001"); + break; + case 7: + if ((pla.tot) >= 44) + printf("1000"); + else if ((pla.tot) >= 16) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -46) + printf("0010"); + else + printf("0001"); + break; + default: printf ("XXX"); + + } + + printf(";\n"); + (pla.tot)++; + } + (pla.divisor)++; + } + printf("\tendcase\n"); + +} diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv new file mode 100644 index 00000000..be4e3e39 --- /dev/null +++ b/pipelined/srt/qslc_sqrt_r4a2.sv @@ -0,0 +1,1026 @@ + case({D[5:3],Wmsbs}) + 10'b000_0000000: q = 4'b0000; + 10'b000_0000001: q = 4'b0000; + 10'b000_0000010: q = 4'b0000; + 10'b000_0000011: q = 4'b0000; + 10'b000_0000100: q = 4'b0000; + 10'b000_0000101: q = 4'b0000; + 10'b000_0000110: q = 4'b0000; + 10'b000_0000111: q = 4'b0000; + 10'b000_0001000: q = 4'b0100; + 10'b000_0001001: q = 4'b0100; + 10'b000_0001010: q = 4'b0100; + 10'b000_0001011: q = 4'b0100; + 10'b000_0001100: q = 4'b0100; + 10'b000_0001101: q = 4'b0100; + 10'b000_0001110: q = 4'b0100; + 10'b000_0001111: q = 4'b0100; + 10'b000_0010000: q = 4'b0100; + 10'b000_0010001: q = 4'b0100; + 10'b000_0010010: q = 4'b0100; + 10'b000_0010011: q = 4'b0100; + 10'b000_0010100: q = 4'b0100; + 10'b000_0010101: q = 4'b0100; + 10'b000_0010110: q = 4'b0100; + 10'b000_0010111: q = 4'b0100; + 10'b000_0011000: q = 4'b1000; + 10'b000_0011001: q = 4'b1000; + 10'b000_0011010: q = 4'b1000; + 10'b000_0011011: q = 4'b1000; + 10'b000_0011100: q = 4'b1000; + 10'b000_0011101: q = 4'b1000; + 10'b000_0011110: q = 4'b1000; + 10'b000_0011111: q = 4'b1000; + 10'b000_0100000: q = 4'b1000; + 10'b000_0100001: q = 4'b1000; + 10'b000_0100010: q = 4'b1000; + 10'b000_0100011: q = 4'b1000; + 10'b000_0100100: q = 4'b1000; + 10'b000_0100101: q = 4'b1000; + 10'b000_0100110: q = 4'b1000; + 10'b000_0100111: q = 4'b1000; + 10'b000_0101000: q = 4'b1000; + 10'b000_0101001: q = 4'b1000; + 10'b000_0101010: q = 4'b1000; + 10'b000_0101011: q = 4'b1000; + 10'b000_0101100: q = 4'b1000; + 10'b000_0101101: q = 4'b1000; + 10'b000_0101110: q = 4'b1000; + 10'b000_0101111: q = 4'b1000; + 10'b000_0110000: q = 4'b1000; + 10'b000_0110001: q = 4'b1000; + 10'b000_0110010: q = 4'b1000; + 10'b000_0110011: q = 4'b1000; + 10'b000_0110100: q = 4'b1000; + 10'b000_0110101: q = 4'b1000; + 10'b000_0110110: q = 4'b1000; + 10'b000_0110111: q = 4'b1000; + 10'b000_0111000: q = 4'b1000; + 10'b000_0111001: q = 4'b1000; + 10'b000_0111010: q = 4'b1000; + 10'b000_0111011: q = 4'b1000; + 10'b000_0111100: q = 4'b1000; + 10'b000_0111101: q = 4'b1000; + 10'b000_0111110: q = 4'b1000; + 10'b000_0111111: q = 4'b1000; + 10'b000_1000000: q = 4'b0001; + 10'b000_1000001: q = 4'b0001; + 10'b000_1000010: q = 4'b0001; + 10'b000_1000011: q = 4'b0001; + 10'b000_1000100: q = 4'b0001; + 10'b000_1000101: q = 4'b0001; + 10'b000_1000110: q = 4'b0001; + 10'b000_1000111: q = 4'b0001; + 10'b000_1001000: q = 4'b0001; + 10'b000_1001001: q = 4'b0001; + 10'b000_1001010: q = 4'b0001; + 10'b000_1001011: q = 4'b0001; + 10'b000_1001100: q = 4'b0001; + 10'b000_1001101: q = 4'b0001; + 10'b000_1001110: q = 4'b0001; + 10'b000_1001111: q = 4'b0001; + 10'b000_1010000: q = 4'b0001; + 10'b000_1010001: q = 4'b0001; + 10'b000_1010010: q = 4'b0001; + 10'b000_1010011: q = 4'b0001; + 10'b000_1010100: q = 4'b0001; + 10'b000_1010101: q = 4'b0001; + 10'b000_1010110: q = 4'b0001; + 10'b000_1010111: q = 4'b0001; + 10'b000_1011000: q = 4'b0001; + 10'b000_1011001: q = 4'b0001; + 10'b000_1011010: q = 4'b0001; + 10'b000_1011011: q = 4'b0001; + 10'b000_1011100: q = 4'b0001; + 10'b000_1011101: q = 4'b0001; + 10'b000_1011110: q = 4'b0001; + 10'b000_1011111: q = 4'b0001; + 10'b000_1100000: q = 4'b0001; + 10'b000_1100001: q = 4'b0001; + 10'b000_1100010: q = 4'b0001; + 10'b000_1100011: q = 4'b0001; + 10'b000_1100100: q = 4'b0001; + 10'b000_1100101: q = 4'b0001; + 10'b000_1100110: q = 4'b0010; + 10'b000_1100111: q = 4'b0010; + 10'b000_1101000: q = 4'b0010; + 10'b000_1101001: q = 4'b0010; + 10'b000_1101010: q = 4'b0010; + 10'b000_1101011: q = 4'b0010; + 10'b000_1101100: q = 4'b0010; + 10'b000_1101101: q = 4'b0010; + 10'b000_1101110: q = 4'b0010; + 10'b000_1101111: q = 4'b0010; + 10'b000_1110000: q = 4'b0010; + 10'b000_1110001: q = 4'b0010; + 10'b000_1110010: q = 4'b0010; + 10'b000_1110011: q = 4'b0010; + 10'b000_1110100: q = 4'b0010; + 10'b000_1110101: q = 4'b0010; + 10'b000_1110110: q = 4'b0010; + 10'b000_1110111: q = 4'b0010; + 10'b000_1111000: q = 4'b0000; + 10'b000_1111001: q = 4'b0000; + 10'b000_1111010: q = 4'b0000; + 10'b000_1111011: q = 4'b0000; + 10'b000_1111100: q = 4'b0000; + 10'b000_1111101: q = 4'b0000; + 10'b000_1111110: q = 4'b0000; + 10'b000_1111111: q = 4'b0000; + 10'b001_0000000: q = 4'b0000; + 10'b001_0000001: q = 4'b0000; + 10'b001_0000010: q = 4'b0000; + 10'b001_0000011: q = 4'b0000; + 10'b001_0000100: q = 4'b0000; + 10'b001_0000101: q = 4'b0000; + 10'b001_0000110: q = 4'b0000; + 10'b001_0000111: q = 4'b0000; + 10'b001_0001000: q = 4'b0100; + 10'b001_0001001: q = 4'b0100; + 10'b001_0001010: q = 4'b0100; + 10'b001_0001011: q = 4'b0100; + 10'b001_0001100: q = 4'b0100; + 10'b001_0001101: q = 4'b0100; + 10'b001_0001110: q = 4'b0100; + 10'b001_0001111: q = 4'b0100; + 10'b001_0010000: q = 4'b0100; + 10'b001_0010001: q = 4'b0100; + 10'b001_0010010: q = 4'b0100; + 10'b001_0010011: q = 4'b0100; + 10'b001_0010100: q = 4'b0100; + 10'b001_0010101: q = 4'b0100; + 10'b001_0010110: q = 4'b0100; + 10'b001_0010111: q = 4'b0100; + 10'b001_0011000: q = 4'b0100; + 10'b001_0011001: q = 4'b0100; + 10'b001_0011010: q = 4'b0100; + 10'b001_0011011: q = 4'b0100; + 10'b001_0011100: q = 4'b1000; + 10'b001_0011101: q = 4'b1000; + 10'b001_0011110: q = 4'b1000; + 10'b001_0011111: q = 4'b1000; + 10'b001_0100000: q = 4'b1000; + 10'b001_0100001: q = 4'b1000; + 10'b001_0100010: q = 4'b1000; + 10'b001_0100011: q = 4'b1000; + 10'b001_0100100: q = 4'b1000; + 10'b001_0100101: q = 4'b1000; + 10'b001_0100110: q = 4'b1000; + 10'b001_0100111: q = 4'b1000; + 10'b001_0101000: q = 4'b1000; + 10'b001_0101001: q = 4'b1000; + 10'b001_0101010: q = 4'b1000; + 10'b001_0101011: q = 4'b1000; + 10'b001_0101100: q = 4'b1000; + 10'b001_0101101: q = 4'b1000; + 10'b001_0101110: q = 4'b1000; + 10'b001_0101111: q = 4'b1000; + 10'b001_0110000: q = 4'b1000; + 10'b001_0110001: q = 4'b1000; + 10'b001_0110010: q = 4'b1000; + 10'b001_0110011: q = 4'b1000; + 10'b001_0110100: q = 4'b1000; + 10'b001_0110101: q = 4'b1000; + 10'b001_0110110: q = 4'b1000; + 10'b001_0110111: q = 4'b1000; + 10'b001_0111000: q = 4'b1000; + 10'b001_0111001: q = 4'b1000; + 10'b001_0111010: q = 4'b1000; + 10'b001_0111011: q = 4'b1000; + 10'b001_0111100: q = 4'b1000; + 10'b001_0111101: q = 4'b1000; + 10'b001_0111110: q = 4'b1000; + 10'b001_0111111: q = 4'b1000; + 10'b001_1000000: q = 4'b0001; + 10'b001_1000001: q = 4'b0001; + 10'b001_1000010: q = 4'b0001; + 10'b001_1000011: q = 4'b0001; + 10'b001_1000100: q = 4'b0001; + 10'b001_1000101: q = 4'b0001; + 10'b001_1000110: q = 4'b0001; + 10'b001_1000111: q = 4'b0001; + 10'b001_1001000: q = 4'b0001; + 10'b001_1001001: q = 4'b0001; + 10'b001_1001010: q = 4'b0001; + 10'b001_1001011: q = 4'b0001; + 10'b001_1001100: q = 4'b0001; + 10'b001_1001101: q = 4'b0001; + 10'b001_1001110: q = 4'b0001; + 10'b001_1001111: q = 4'b0001; + 10'b001_1010000: q = 4'b0001; + 10'b001_1010001: q = 4'b0001; + 10'b001_1010010: q = 4'b0001; + 10'b001_1010011: q = 4'b0001; + 10'b001_1010100: q = 4'b0001; + 10'b001_1010101: q = 4'b0001; + 10'b001_1010110: q = 4'b0001; + 10'b001_1010111: q = 4'b0001; + 10'b001_1011000: q = 4'b0001; + 10'b001_1011001: q = 4'b0001; + 10'b001_1011010: q = 4'b0001; + 10'b001_1011011: q = 4'b0001; + 10'b001_1011100: q = 4'b0001; + 10'b001_1011101: q = 4'b0001; + 10'b001_1011110: q = 4'b0001; + 10'b001_1011111: q = 4'b0001; + 10'b001_1100000: q = 4'b0001; + 10'b001_1100001: q = 4'b0001; + 10'b001_1100010: q = 4'b0001; + 10'b001_1100011: q = 4'b0001; + 10'b001_1100100: q = 4'b0010; + 10'b001_1100101: q = 4'b0010; + 10'b001_1100110: q = 4'b0010; + 10'b001_1100111: q = 4'b0010; + 10'b001_1101000: q = 4'b0010; + 10'b001_1101001: q = 4'b0010; + 10'b001_1101010: q = 4'b0010; + 10'b001_1101011: q = 4'b0010; + 10'b001_1101100: q = 4'b0010; + 10'b001_1101101: q = 4'b0010; + 10'b001_1101110: q = 4'b0010; + 10'b001_1101111: q = 4'b0010; + 10'b001_1110000: q = 4'b0010; + 10'b001_1110001: q = 4'b0010; + 10'b001_1110010: q = 4'b0010; + 10'b001_1110011: q = 4'b0010; + 10'b001_1110100: q = 4'b0010; + 10'b001_1110101: q = 4'b0010; + 10'b001_1110110: q = 4'b0000; + 10'b001_1110111: q = 4'b0000; + 10'b001_1111000: q = 4'b0000; + 10'b001_1111001: q = 4'b0000; + 10'b001_1111010: q = 4'b0000; + 10'b001_1111011: q = 4'b0000; + 10'b001_1111100: q = 4'b0000; + 10'b001_1111101: q = 4'b0000; + 10'b001_1111110: q = 4'b0000; + 10'b001_1111111: q = 4'b0000; + 10'b010_0000000: q = 4'b0000; + 10'b010_0000001: q = 4'b0000; + 10'b010_0000010: q = 4'b0000; + 10'b010_0000011: q = 4'b0000; + 10'b010_0000100: q = 4'b0000; + 10'b010_0000101: q = 4'b0000; + 10'b010_0000110: q = 4'b0000; + 10'b010_0000111: q = 4'b0000; + 10'b010_0001000: q = 4'b0100; + 10'b010_0001001: q = 4'b0100; + 10'b010_0001010: q = 4'b0100; + 10'b010_0001011: q = 4'b0100; + 10'b010_0001100: q = 4'b0100; + 10'b010_0001101: q = 4'b0100; + 10'b010_0001110: q = 4'b0100; + 10'b010_0001111: q = 4'b0100; + 10'b010_0010000: q = 4'b0100; + 10'b010_0010001: q = 4'b0100; + 10'b010_0010010: q = 4'b0100; + 10'b010_0010011: q = 4'b0100; + 10'b010_0010100: q = 4'b0100; + 10'b010_0010101: q = 4'b0100; + 10'b010_0010110: q = 4'b0100; + 10'b010_0010111: q = 4'b0100; + 10'b010_0011000: q = 4'b0100; + 10'b010_0011001: q = 4'b0100; + 10'b010_0011010: q = 4'b0100; + 10'b010_0011011: q = 4'b0100; + 10'b010_0011100: q = 4'b0100; + 10'b010_0011101: q = 4'b0100; + 10'b010_0011110: q = 4'b0100; + 10'b010_0011111: q = 4'b0100; + 10'b010_0100000: q = 4'b1000; + 10'b010_0100001: q = 4'b1000; + 10'b010_0100010: q = 4'b1000; + 10'b010_0100011: q = 4'b1000; + 10'b010_0100100: q = 4'b1000; + 10'b010_0100101: q = 4'b1000; + 10'b010_0100110: q = 4'b1000; + 10'b010_0100111: q = 4'b1000; + 10'b010_0101000: q = 4'b1000; + 10'b010_0101001: q = 4'b1000; + 10'b010_0101010: q = 4'b1000; + 10'b010_0101011: q = 4'b1000; + 10'b010_0101100: q = 4'b1000; + 10'b010_0101101: q = 4'b1000; + 10'b010_0101110: q = 4'b1000; + 10'b010_0101111: q = 4'b1000; + 10'b010_0110000: q = 4'b1000; + 10'b010_0110001: q = 4'b1000; + 10'b010_0110010: q = 4'b1000; + 10'b010_0110011: q = 4'b1000; + 10'b010_0110100: q = 4'b1000; + 10'b010_0110101: q = 4'b1000; + 10'b010_0110110: q = 4'b1000; + 10'b010_0110111: q = 4'b1000; + 10'b010_0111000: q = 4'b1000; + 10'b010_0111001: q = 4'b1000; + 10'b010_0111010: q = 4'b1000; + 10'b010_0111011: q = 4'b1000; + 10'b010_0111100: q = 4'b1000; + 10'b010_0111101: q = 4'b1000; + 10'b010_0111110: q = 4'b1000; + 10'b010_0111111: q = 4'b1000; + 10'b010_1000000: q = 4'b0001; + 10'b010_1000001: q = 4'b0001; + 10'b010_1000010: q = 4'b0001; + 10'b010_1000011: q = 4'b0001; + 10'b010_1000100: q = 4'b0001; + 10'b010_1000101: q = 4'b0001; + 10'b010_1000110: q = 4'b0001; + 10'b010_1000111: q = 4'b0001; + 10'b010_1001000: q = 4'b0001; + 10'b010_1001001: q = 4'b0001; + 10'b010_1001010: q = 4'b0001; + 10'b010_1001011: q = 4'b0001; + 10'b010_1001100: q = 4'b0001; + 10'b010_1001101: q = 4'b0001; + 10'b010_1001110: q = 4'b0001; + 10'b010_1001111: q = 4'b0001; + 10'b010_1010000: q = 4'b0001; + 10'b010_1010001: q = 4'b0001; + 10'b010_1010010: q = 4'b0001; + 10'b010_1010011: q = 4'b0001; + 10'b010_1010100: q = 4'b0001; + 10'b010_1010101: q = 4'b0001; + 10'b010_1010110: q = 4'b0001; + 10'b010_1010111: q = 4'b0001; + 10'b010_1011000: q = 4'b0001; + 10'b010_1011001: q = 4'b0001; + 10'b010_1011010: q = 4'b0001; + 10'b010_1011011: q = 4'b0001; + 10'b010_1011100: q = 4'b0001; + 10'b010_1011101: q = 4'b0001; + 10'b010_1011110: q = 4'b0001; + 10'b010_1011111: q = 4'b0001; + 10'b010_1100000: q = 4'b0010; + 10'b010_1100001: q = 4'b0010; + 10'b010_1100010: q = 4'b0010; + 10'b010_1100011: q = 4'b0010; + 10'b010_1100100: q = 4'b0010; + 10'b010_1100101: q = 4'b0010; + 10'b010_1100110: q = 4'b0010; + 10'b010_1100111: q = 4'b0010; + 10'b010_1101000: q = 4'b0010; + 10'b010_1101001: q = 4'b0010; + 10'b010_1101010: q = 4'b0010; + 10'b010_1101011: q = 4'b0010; + 10'b010_1101100: q = 4'b0010; + 10'b010_1101101: q = 4'b0010; + 10'b010_1101110: q = 4'b0010; + 10'b010_1101111: q = 4'b0010; + 10'b010_1110000: q = 4'b0010; + 10'b010_1110001: q = 4'b0010; + 10'b010_1110010: q = 4'b0010; + 10'b010_1110011: q = 4'b0010; + 10'b010_1110100: q = 4'b0000; + 10'b010_1110101: q = 4'b0000; + 10'b010_1110110: q = 4'b0000; + 10'b010_1110111: q = 4'b0000; + 10'b010_1111000: q = 4'b0000; + 10'b010_1111001: q = 4'b0000; + 10'b010_1111010: q = 4'b0000; + 10'b010_1111011: q = 4'b0000; + 10'b010_1111100: q = 4'b0000; + 10'b010_1111101: q = 4'b0000; + 10'b010_1111110: q = 4'b0000; + 10'b010_1111111: q = 4'b0000; + 10'b011_0000000: q = 4'b0000; + 10'b011_0000001: q = 4'b0000; + 10'b011_0000010: q = 4'b0000; + 10'b011_0000011: q = 4'b0000; + 10'b011_0000100: q = 4'b0000; + 10'b011_0000101: q = 4'b0000; + 10'b011_0000110: q = 4'b0000; + 10'b011_0000111: q = 4'b0000; + 10'b011_0001000: q = 4'b0100; + 10'b011_0001001: q = 4'b0100; + 10'b011_0001010: q = 4'b0100; + 10'b011_0001011: q = 4'b0100; + 10'b011_0001100: q = 4'b0100; + 10'b011_0001101: q = 4'b0100; + 10'b011_0001110: q = 4'b0100; + 10'b011_0001111: q = 4'b0100; + 10'b011_0010000: q = 4'b0100; + 10'b011_0010001: q = 4'b0100; + 10'b011_0010010: q = 4'b0100; + 10'b011_0010011: q = 4'b0100; + 10'b011_0010100: q = 4'b0100; + 10'b011_0010101: q = 4'b0100; + 10'b011_0010110: q = 4'b0100; + 10'b011_0010111: q = 4'b0100; + 10'b011_0011000: q = 4'b0100; + 10'b011_0011001: q = 4'b0100; + 10'b011_0011010: q = 4'b0100; + 10'b011_0011011: q = 4'b0100; + 10'b011_0011100: q = 4'b0100; + 10'b011_0011101: q = 4'b0100; + 10'b011_0011110: q = 4'b0100; + 10'b011_0011111: q = 4'b0100; + 10'b011_0100000: q = 4'b1000; + 10'b011_0100001: q = 4'b1000; + 10'b011_0100010: q = 4'b1000; + 10'b011_0100011: q = 4'b1000; + 10'b011_0100100: q = 4'b1000; + 10'b011_0100101: q = 4'b1000; + 10'b011_0100110: q = 4'b1000; + 10'b011_0100111: q = 4'b1000; + 10'b011_0101000: q = 4'b1000; + 10'b011_0101001: q = 4'b1000; + 10'b011_0101010: q = 4'b1000; + 10'b011_0101011: q = 4'b1000; + 10'b011_0101100: q = 4'b1000; + 10'b011_0101101: q = 4'b1000; + 10'b011_0101110: q = 4'b1000; + 10'b011_0101111: q = 4'b1000; + 10'b011_0110000: q = 4'b1000; + 10'b011_0110001: q = 4'b1000; + 10'b011_0110010: q = 4'b1000; + 10'b011_0110011: q = 4'b1000; + 10'b011_0110100: q = 4'b1000; + 10'b011_0110101: q = 4'b1000; + 10'b011_0110110: q = 4'b1000; + 10'b011_0110111: q = 4'b1000; + 10'b011_0111000: q = 4'b1000; + 10'b011_0111001: q = 4'b1000; + 10'b011_0111010: q = 4'b1000; + 10'b011_0111011: q = 4'b1000; + 10'b011_0111100: q = 4'b1000; + 10'b011_0111101: q = 4'b1000; + 10'b011_0111110: q = 4'b1000; + 10'b011_0111111: q = 4'b1000; + 10'b011_1000000: q = 4'b0001; + 10'b011_1000001: q = 4'b0001; + 10'b011_1000010: q = 4'b0001; + 10'b011_1000011: q = 4'b0001; + 10'b011_1000100: q = 4'b0001; + 10'b011_1000101: q = 4'b0001; + 10'b011_1000110: q = 4'b0001; + 10'b011_1000111: q = 4'b0001; + 10'b011_1001000: q = 4'b0001; + 10'b011_1001001: q = 4'b0001; + 10'b011_1001010: q = 4'b0001; + 10'b011_1001011: q = 4'b0001; + 10'b011_1001100: q = 4'b0001; + 10'b011_1001101: q = 4'b0001; + 10'b011_1001110: q = 4'b0001; + 10'b011_1001111: q = 4'b0001; + 10'b011_1010000: q = 4'b0001; + 10'b011_1010001: q = 4'b0001; + 10'b011_1010010: q = 4'b0001; + 10'b011_1010011: q = 4'b0001; + 10'b011_1010100: q = 4'b0001; + 10'b011_1010101: q = 4'b0001; + 10'b011_1010110: q = 4'b0001; + 10'b011_1010111: q = 4'b0001; + 10'b011_1011000: q = 4'b0001; + 10'b011_1011001: q = 4'b0001; + 10'b011_1011010: q = 4'b0001; + 10'b011_1011011: q = 4'b0001; + 10'b011_1011100: q = 4'b0001; + 10'b011_1011101: q = 4'b0001; + 10'b011_1011110: q = 4'b0010; + 10'b011_1011111: q = 4'b0010; + 10'b011_1100000: q = 4'b0010; + 10'b011_1100001: q = 4'b0010; + 10'b011_1100010: q = 4'b0010; + 10'b011_1100011: q = 4'b0010; + 10'b011_1100100: q = 4'b0010; + 10'b011_1100101: q = 4'b0010; + 10'b011_1100110: q = 4'b0010; + 10'b011_1100111: q = 4'b0010; + 10'b011_1101000: q = 4'b0010; + 10'b011_1101001: q = 4'b0010; + 10'b011_1101010: q = 4'b0010; + 10'b011_1101011: q = 4'b0010; + 10'b011_1101100: q = 4'b0010; + 10'b011_1101101: q = 4'b0010; + 10'b011_1101110: q = 4'b0010; + 10'b011_1101111: q = 4'b0010; + 10'b011_1110000: q = 4'b0010; + 10'b011_1110001: q = 4'b0010; + 10'b011_1110010: q = 4'b0010; + 10'b011_1110011: q = 4'b0010; + 10'b011_1110100: q = 4'b0000; + 10'b011_1110101: q = 4'b0000; + 10'b011_1110110: q = 4'b0000; + 10'b011_1110111: q = 4'b0000; + 10'b011_1111000: q = 4'b0000; + 10'b011_1111001: q = 4'b0000; + 10'b011_1111010: q = 4'b0000; + 10'b011_1111011: q = 4'b0000; + 10'b011_1111100: q = 4'b0000; + 10'b011_1111101: q = 4'b0000; + 10'b011_1111110: q = 4'b0000; + 10'b011_1111111: q = 4'b0000; + 10'b100_0000000: q = 4'b0000; + 10'b100_0000001: q = 4'b0000; + 10'b100_0000010: q = 4'b0000; + 10'b100_0000011: q = 4'b0000; + 10'b100_0000100: q = 4'b0000; + 10'b100_0000101: q = 4'b0000; + 10'b100_0000110: q = 4'b0000; + 10'b100_0000111: q = 4'b0000; + 10'b100_0001000: q = 4'b0000; + 10'b100_0001001: q = 4'b0000; + 10'b100_0001010: q = 4'b0000; + 10'b100_0001011: q = 4'b0000; + 10'b100_0001100: q = 4'b0100; + 10'b100_0001101: q = 4'b0100; + 10'b100_0001110: q = 4'b0100; + 10'b100_0001111: q = 4'b0100; + 10'b100_0010000: q = 4'b0100; + 10'b100_0010001: q = 4'b0100; + 10'b100_0010010: q = 4'b0100; + 10'b100_0010011: q = 4'b0100; + 10'b100_0010100: q = 4'b0100; + 10'b100_0010101: q = 4'b0100; + 10'b100_0010110: q = 4'b0100; + 10'b100_0010111: q = 4'b0100; + 10'b100_0011000: q = 4'b0100; + 10'b100_0011001: q = 4'b0100; + 10'b100_0011010: q = 4'b0100; + 10'b100_0011011: q = 4'b0100; + 10'b100_0011100: q = 4'b0100; + 10'b100_0011101: q = 4'b0100; + 10'b100_0011110: q = 4'b0100; + 10'b100_0011111: q = 4'b0100; + 10'b100_0100000: q = 4'b0100; + 10'b100_0100001: q = 4'b0100; + 10'b100_0100010: q = 4'b0100; + 10'b100_0100011: q = 4'b0100; + 10'b100_0100100: q = 4'b1000; + 10'b100_0100101: q = 4'b1000; + 10'b100_0100110: q = 4'b1000; + 10'b100_0100111: q = 4'b1000; + 10'b100_0101000: q = 4'b1000; + 10'b100_0101001: q = 4'b1000; + 10'b100_0101010: q = 4'b1000; + 10'b100_0101011: q = 4'b1000; + 10'b100_0101100: q = 4'b1000; + 10'b100_0101101: q = 4'b1000; + 10'b100_0101110: q = 4'b1000; + 10'b100_0101111: q = 4'b1000; + 10'b100_0110000: q = 4'b1000; + 10'b100_0110001: q = 4'b1000; + 10'b100_0110010: q = 4'b1000; + 10'b100_0110011: q = 4'b1000; + 10'b100_0110100: q = 4'b1000; + 10'b100_0110101: q = 4'b1000; + 10'b100_0110110: q = 4'b1000; + 10'b100_0110111: q = 4'b1000; + 10'b100_0111000: q = 4'b1000; + 10'b100_0111001: q = 4'b1000; + 10'b100_0111010: q = 4'b1000; + 10'b100_0111011: q = 4'b1000; + 10'b100_0111100: q = 4'b1000; + 10'b100_0111101: q = 4'b1000; + 10'b100_0111110: q = 4'b1000; + 10'b100_0111111: q = 4'b1000; + 10'b100_1000000: q = 4'b0001; + 10'b100_1000001: q = 4'b0001; + 10'b100_1000010: q = 4'b0001; + 10'b100_1000011: q = 4'b0001; + 10'b100_1000100: q = 4'b0001; + 10'b100_1000101: q = 4'b0001; + 10'b100_1000110: q = 4'b0001; + 10'b100_1000111: q = 4'b0001; + 10'b100_1001000: q = 4'b0001; + 10'b100_1001001: q = 4'b0001; + 10'b100_1001010: q = 4'b0001; + 10'b100_1001011: q = 4'b0001; + 10'b100_1001100: q = 4'b0001; + 10'b100_1001101: q = 4'b0001; + 10'b100_1001110: q = 4'b0001; + 10'b100_1001111: q = 4'b0001; + 10'b100_1010000: q = 4'b0001; + 10'b100_1010001: q = 4'b0001; + 10'b100_1010010: q = 4'b0001; + 10'b100_1010011: q = 4'b0001; + 10'b100_1010100: q = 4'b0001; + 10'b100_1010101: q = 4'b0001; + 10'b100_1010110: q = 4'b0001; + 10'b100_1010111: q = 4'b0001; + 10'b100_1011000: q = 4'b0001; + 10'b100_1011001: q = 4'b0001; + 10'b100_1011010: q = 4'b0001; + 10'b100_1011011: q = 4'b0001; + 10'b100_1011100: q = 4'b0010; + 10'b100_1011101: q = 4'b0010; + 10'b100_1011110: q = 4'b0010; + 10'b100_1011111: q = 4'b0010; + 10'b100_1100000: q = 4'b0010; + 10'b100_1100001: q = 4'b0010; + 10'b100_1100010: q = 4'b0010; + 10'b100_1100011: q = 4'b0010; + 10'b100_1100100: q = 4'b0010; + 10'b100_1100101: q = 4'b0010; + 10'b100_1100110: q = 4'b0010; + 10'b100_1100111: q = 4'b0010; + 10'b100_1101000: q = 4'b0010; + 10'b100_1101001: q = 4'b0010; + 10'b100_1101010: q = 4'b0010; + 10'b100_1101011: q = 4'b0010; + 10'b100_1101100: q = 4'b0010; + 10'b100_1101101: q = 4'b0010; + 10'b100_1101110: q = 4'b0010; + 10'b100_1101111: q = 4'b0010; + 10'b100_1110000: q = 4'b0010; + 10'b100_1110001: q = 4'b0010; + 10'b100_1110010: q = 4'b0010; + 10'b100_1110011: q = 4'b0010; + 10'b100_1110100: q = 4'b0000; + 10'b100_1110101: q = 4'b0000; + 10'b100_1110110: q = 4'b0000; + 10'b100_1110111: q = 4'b0000; + 10'b100_1111000: q = 4'b0000; + 10'b100_1111001: q = 4'b0000; + 10'b100_1111010: q = 4'b0000; + 10'b100_1111011: q = 4'b0000; + 10'b100_1111100: q = 4'b0000; + 10'b100_1111101: q = 4'b0000; + 10'b100_1111110: q = 4'b0000; + 10'b100_1111111: q = 4'b0000; + 10'b101_0000000: q = 4'b0000; + 10'b101_0000001: q = 4'b0000; + 10'b101_0000010: q = 4'b0000; + 10'b101_0000011: q = 4'b0000; + 10'b101_0000100: q = 4'b0000; + 10'b101_0000101: q = 4'b0000; + 10'b101_0000110: q = 4'b0000; + 10'b101_0000111: q = 4'b0000; + 10'b101_0001000: q = 4'b0000; + 10'b101_0001001: q = 4'b0000; + 10'b101_0001010: q = 4'b0000; + 10'b101_0001011: q = 4'b0000; + 10'b101_0001100: q = 4'b0100; + 10'b101_0001101: q = 4'b0100; + 10'b101_0001110: q = 4'b0100; + 10'b101_0001111: q = 4'b0100; + 10'b101_0010000: q = 4'b0100; + 10'b101_0010001: q = 4'b0100; + 10'b101_0010010: q = 4'b0100; + 10'b101_0010011: q = 4'b0100; + 10'b101_0010100: q = 4'b0100; + 10'b101_0010101: q = 4'b0100; + 10'b101_0010110: q = 4'b0100; + 10'b101_0010111: q = 4'b0100; + 10'b101_0011000: q = 4'b0100; + 10'b101_0011001: q = 4'b0100; + 10'b101_0011010: q = 4'b0100; + 10'b101_0011011: q = 4'b0100; + 10'b101_0011100: q = 4'b0100; + 10'b101_0011101: q = 4'b0100; + 10'b101_0011110: q = 4'b0100; + 10'b101_0011111: q = 4'b0100; + 10'b101_0100000: q = 4'b0100; + 10'b101_0100001: q = 4'b0100; + 10'b101_0100010: q = 4'b0100; + 10'b101_0100011: q = 4'b0100; + 10'b101_0100100: q = 4'b0100; + 10'b101_0100101: q = 4'b0100; + 10'b101_0100110: q = 4'b0100; + 10'b101_0100111: q = 4'b0100; + 10'b101_0101000: q = 4'b1000; + 10'b101_0101001: q = 4'b1000; + 10'b101_0101010: q = 4'b1000; + 10'b101_0101011: q = 4'b1000; + 10'b101_0101100: q = 4'b1000; + 10'b101_0101101: q = 4'b1000; + 10'b101_0101110: q = 4'b1000; + 10'b101_0101111: q = 4'b1000; + 10'b101_0110000: q = 4'b1000; + 10'b101_0110001: q = 4'b1000; + 10'b101_0110010: q = 4'b1000; + 10'b101_0110011: q = 4'b1000; + 10'b101_0110100: q = 4'b1000; + 10'b101_0110101: q = 4'b1000; + 10'b101_0110110: q = 4'b1000; + 10'b101_0110111: q = 4'b1000; + 10'b101_0111000: q = 4'b1000; + 10'b101_0111001: q = 4'b1000; + 10'b101_0111010: q = 4'b1000; + 10'b101_0111011: q = 4'b1000; + 10'b101_0111100: q = 4'b1000; + 10'b101_0111101: q = 4'b1000; + 10'b101_0111110: q = 4'b1000; + 10'b101_0111111: q = 4'b1000; + 10'b101_1000000: q = 4'b0001; + 10'b101_1000001: q = 4'b0001; + 10'b101_1000010: q = 4'b0001; + 10'b101_1000011: q = 4'b0001; + 10'b101_1000100: q = 4'b0001; + 10'b101_1000101: q = 4'b0001; + 10'b101_1000110: q = 4'b0001; + 10'b101_1000111: q = 4'b0001; + 10'b101_1001000: q = 4'b0001; + 10'b101_1001001: q = 4'b0001; + 10'b101_1001010: q = 4'b0001; + 10'b101_1001011: q = 4'b0001; + 10'b101_1001100: q = 4'b0001; + 10'b101_1001101: q = 4'b0001; + 10'b101_1001110: q = 4'b0001; + 10'b101_1001111: q = 4'b0001; + 10'b101_1010000: q = 4'b0001; + 10'b101_1010001: q = 4'b0001; + 10'b101_1010010: q = 4'b0001; + 10'b101_1010011: q = 4'b0001; + 10'b101_1010100: q = 4'b0001; + 10'b101_1010101: q = 4'b0001; + 10'b101_1010110: q = 4'b0001; + 10'b101_1010111: q = 4'b0001; + 10'b101_1011000: q = 4'b0010; + 10'b101_1011001: q = 4'b0010; + 10'b101_1011010: q = 4'b0010; + 10'b101_1011011: q = 4'b0010; + 10'b101_1011100: q = 4'b0010; + 10'b101_1011101: q = 4'b0010; + 10'b101_1011110: q = 4'b0010; + 10'b101_1011111: q = 4'b0010; + 10'b101_1100000: q = 4'b0010; + 10'b101_1100001: q = 4'b0010; + 10'b101_1100010: q = 4'b0010; + 10'b101_1100011: q = 4'b0010; + 10'b101_1100100: q = 4'b0010; + 10'b101_1100101: q = 4'b0010; + 10'b101_1100110: q = 4'b0010; + 10'b101_1100111: q = 4'b0010; + 10'b101_1101000: q = 4'b0010; + 10'b101_1101001: q = 4'b0010; + 10'b101_1101010: q = 4'b0010; + 10'b101_1101011: q = 4'b0010; + 10'b101_1101100: q = 4'b0010; + 10'b101_1101101: q = 4'b0010; + 10'b101_1101110: q = 4'b0010; + 10'b101_1101111: q = 4'b0010; + 10'b101_1110000: q = 4'b0000; + 10'b101_1110001: q = 4'b0000; + 10'b101_1110010: q = 4'b0000; + 10'b101_1110011: q = 4'b0000; + 10'b101_1110100: q = 4'b0000; + 10'b101_1110101: q = 4'b0000; + 10'b101_1110110: q = 4'b0000; + 10'b101_1110111: q = 4'b0000; + 10'b101_1111000: q = 4'b0000; + 10'b101_1111001: q = 4'b0000; + 10'b101_1111010: q = 4'b0000; + 10'b101_1111011: q = 4'b0000; + 10'b101_1111100: q = 4'b0000; + 10'b101_1111101: q = 4'b0000; + 10'b101_1111110: q = 4'b0000; + 10'b101_1111111: q = 4'b0000; + 10'b110_0000000: q = 4'b0000; + 10'b110_0000001: q = 4'b0000; + 10'b110_0000010: q = 4'b0000; + 10'b110_0000011: q = 4'b0000; + 10'b110_0000100: q = 4'b0000; + 10'b110_0000101: q = 4'b0000; + 10'b110_0000110: q = 4'b0000; + 10'b110_0000111: q = 4'b0000; + 10'b110_0001000: q = 4'b0000; + 10'b110_0001001: q = 4'b0000; + 10'b110_0001010: q = 4'b0000; + 10'b110_0001011: q = 4'b0000; + 10'b110_0001100: q = 4'b0000; + 10'b110_0001101: q = 4'b0000; + 10'b110_0001110: q = 4'b0000; + 10'b110_0001111: q = 4'b0000; + 10'b110_0010000: q = 4'b0100; + 10'b110_0010001: q = 4'b0100; + 10'b110_0010010: q = 4'b0100; + 10'b110_0010011: q = 4'b0100; + 10'b110_0010100: q = 4'b0100; + 10'b110_0010101: q = 4'b0100; + 10'b110_0010110: q = 4'b0100; + 10'b110_0010111: q = 4'b0100; + 10'b110_0011000: q = 4'b0100; + 10'b110_0011001: q = 4'b0100; + 10'b110_0011010: q = 4'b0100; + 10'b110_0011011: q = 4'b0100; + 10'b110_0011100: q = 4'b0100; + 10'b110_0011101: q = 4'b0100; + 10'b110_0011110: q = 4'b0100; + 10'b110_0011111: q = 4'b0100; + 10'b110_0100000: q = 4'b0100; + 10'b110_0100001: q = 4'b0100; + 10'b110_0100010: q = 4'b0100; + 10'b110_0100011: q = 4'b0100; + 10'b110_0100100: q = 4'b0100; + 10'b110_0100101: q = 4'b0100; + 10'b110_0100110: q = 4'b0100; + 10'b110_0100111: q = 4'b0100; + 10'b110_0101000: q = 4'b1000; + 10'b110_0101001: q = 4'b1000; + 10'b110_0101010: q = 4'b1000; + 10'b110_0101011: q = 4'b1000; + 10'b110_0101100: q = 4'b1000; + 10'b110_0101101: q = 4'b1000; + 10'b110_0101110: q = 4'b1000; + 10'b110_0101111: q = 4'b1000; + 10'b110_0110000: q = 4'b1000; + 10'b110_0110001: q = 4'b1000; + 10'b110_0110010: q = 4'b1000; + 10'b110_0110011: q = 4'b1000; + 10'b110_0110100: q = 4'b1000; + 10'b110_0110101: q = 4'b1000; + 10'b110_0110110: q = 4'b1000; + 10'b110_0110111: q = 4'b1000; + 10'b110_0111000: q = 4'b1000; + 10'b110_0111001: q = 4'b1000; + 10'b110_0111010: q = 4'b1000; + 10'b110_0111011: q = 4'b1000; + 10'b110_0111100: q = 4'b1000; + 10'b110_0111101: q = 4'b1000; + 10'b110_0111110: q = 4'b1000; + 10'b110_0111111: q = 4'b1000; + 10'b110_1000000: q = 4'b0001; + 10'b110_1000001: q = 4'b0001; + 10'b110_1000010: q = 4'b0001; + 10'b110_1000011: q = 4'b0001; + 10'b110_1000100: q = 4'b0001; + 10'b110_1000101: q = 4'b0001; + 10'b110_1000110: q = 4'b0001; + 10'b110_1000111: q = 4'b0001; + 10'b110_1001000: q = 4'b0001; + 10'b110_1001001: q = 4'b0001; + 10'b110_1001010: q = 4'b0001; + 10'b110_1001011: q = 4'b0001; + 10'b110_1001100: q = 4'b0001; + 10'b110_1001101: q = 4'b0001; + 10'b110_1001110: q = 4'b0001; + 10'b110_1001111: q = 4'b0001; + 10'b110_1010000: q = 4'b0001; + 10'b110_1010001: q = 4'b0001; + 10'b110_1010010: q = 4'b0001; + 10'b110_1010011: q = 4'b0001; + 10'b110_1010100: q = 4'b0010; + 10'b110_1010101: q = 4'b0010; + 10'b110_1010110: q = 4'b0010; + 10'b110_1010111: q = 4'b0010; + 10'b110_1011000: q = 4'b0010; + 10'b110_1011001: q = 4'b0010; + 10'b110_1011010: q = 4'b0010; + 10'b110_1011011: q = 4'b0010; + 10'b110_1011100: q = 4'b0010; + 10'b110_1011101: q = 4'b0010; + 10'b110_1011110: q = 4'b0010; + 10'b110_1011111: q = 4'b0010; + 10'b110_1100000: q = 4'b0010; + 10'b110_1100001: q = 4'b0010; + 10'b110_1100010: q = 4'b0010; + 10'b110_1100011: q = 4'b0010; + 10'b110_1100100: q = 4'b0010; + 10'b110_1100101: q = 4'b0010; + 10'b110_1100110: q = 4'b0010; + 10'b110_1100111: q = 4'b0010; + 10'b110_1101000: q = 4'b0010; + 10'b110_1101001: q = 4'b0010; + 10'b110_1101010: q = 4'b0010; + 10'b110_1101011: q = 4'b0010; + 10'b110_1101100: q = 4'b0010; + 10'b110_1101101: q = 4'b0010; + 10'b110_1101110: q = 4'b0010; + 10'b110_1101111: q = 4'b0010; + 10'b110_1110000: q = 4'b0000; + 10'b110_1110001: q = 4'b0000; + 10'b110_1110010: q = 4'b0000; + 10'b110_1110011: q = 4'b0000; + 10'b110_1110100: q = 4'b0000; + 10'b110_1110101: q = 4'b0000; + 10'b110_1110110: q = 4'b0000; + 10'b110_1110111: q = 4'b0000; + 10'b110_1111000: q = 4'b0000; + 10'b110_1111001: q = 4'b0000; + 10'b110_1111010: q = 4'b0000; + 10'b110_1111011: q = 4'b0000; + 10'b110_1111100: q = 4'b0000; + 10'b110_1111101: q = 4'b0000; + 10'b110_1111110: q = 4'b0000; + 10'b110_1111111: q = 4'b0000; + 10'b111_0000000: q = 4'b0000; + 10'b111_0000001: q = 4'b0000; + 10'b111_0000010: q = 4'b0000; + 10'b111_0000011: q = 4'b0000; + 10'b111_0000100: q = 4'b0000; + 10'b111_0000101: q = 4'b0000; + 10'b111_0000110: q = 4'b0000; + 10'b111_0000111: q = 4'b0000; + 10'b111_0001000: q = 4'b0000; + 10'b111_0001001: q = 4'b0000; + 10'b111_0001010: q = 4'b0000; + 10'b111_0001011: q = 4'b0000; + 10'b111_0001100: q = 4'b0000; + 10'b111_0001101: q = 4'b0000; + 10'b111_0001110: q = 4'b0000; + 10'b111_0001111: q = 4'b0000; + 10'b111_0010000: q = 4'b0100; + 10'b111_0010001: q = 4'b0100; + 10'b111_0010010: q = 4'b0100; + 10'b111_0010011: q = 4'b0100; + 10'b111_0010100: q = 4'b0100; + 10'b111_0010101: q = 4'b0100; + 10'b111_0010110: q = 4'b0100; + 10'b111_0010111: q = 4'b0100; + 10'b111_0011000: q = 4'b0100; + 10'b111_0011001: q = 4'b0100; + 10'b111_0011010: q = 4'b0100; + 10'b111_0011011: q = 4'b0100; + 10'b111_0011100: q = 4'b0100; + 10'b111_0011101: q = 4'b0100; + 10'b111_0011110: q = 4'b0100; + 10'b111_0011111: q = 4'b0100; + 10'b111_0100000: q = 4'b0100; + 10'b111_0100001: q = 4'b0100; + 10'b111_0100010: q = 4'b0100; + 10'b111_0100011: q = 4'b0100; + 10'b111_0100100: q = 4'b0100; + 10'b111_0100101: q = 4'b0100; + 10'b111_0100110: q = 4'b0100; + 10'b111_0100111: q = 4'b0100; + 10'b111_0101000: q = 4'b0100; + 10'b111_0101001: q = 4'b0100; + 10'b111_0101010: q = 4'b0100; + 10'b111_0101011: q = 4'b0100; + 10'b111_0101100: q = 4'b1000; + 10'b111_0101101: q = 4'b1000; + 10'b111_0101110: q = 4'b1000; + 10'b111_0101111: q = 4'b1000; + 10'b111_0110000: q = 4'b1000; + 10'b111_0110001: q = 4'b1000; + 10'b111_0110010: q = 4'b1000; + 10'b111_0110011: q = 4'b1000; + 10'b111_0110100: q = 4'b1000; + 10'b111_0110101: q = 4'b1000; + 10'b111_0110110: q = 4'b1000; + 10'b111_0110111: q = 4'b1000; + 10'b111_0111000: q = 4'b1000; + 10'b111_0111001: q = 4'b1000; + 10'b111_0111010: q = 4'b1000; + 10'b111_0111011: q = 4'b1000; + 10'b111_0111100: q = 4'b1000; + 10'b111_0111101: q = 4'b1000; + 10'b111_0111110: q = 4'b1000; + 10'b111_0111111: q = 4'b1000; + 10'b111_1000000: q = 4'b0001; + 10'b111_1000001: q = 4'b0001; + 10'b111_1000010: q = 4'b0001; + 10'b111_1000011: q = 4'b0001; + 10'b111_1000100: q = 4'b0001; + 10'b111_1000101: q = 4'b0001; + 10'b111_1000110: q = 4'b0001; + 10'b111_1000111: q = 4'b0001; + 10'b111_1001000: q = 4'b0001; + 10'b111_1001001: q = 4'b0001; + 10'b111_1001010: q = 4'b0001; + 10'b111_1001011: q = 4'b0001; + 10'b111_1001100: q = 4'b0001; + 10'b111_1001101: q = 4'b0001; + 10'b111_1001110: q = 4'b0001; + 10'b111_1001111: q = 4'b0001; + 10'b111_1010000: q = 4'b0001; + 10'b111_1010001: q = 4'b0001; + 10'b111_1010010: q = 4'b0010; + 10'b111_1010011: q = 4'b0010; + 10'b111_1010100: q = 4'b0010; + 10'b111_1010101: q = 4'b0010; + 10'b111_1010110: q = 4'b0010; + 10'b111_1010111: q = 4'b0010; + 10'b111_1011000: q = 4'b0010; + 10'b111_1011001: q = 4'b0010; + 10'b111_1011010: q = 4'b0010; + 10'b111_1011011: q = 4'b0010; + 10'b111_1011100: q = 4'b0010; + 10'b111_1011101: q = 4'b0010; + 10'b111_1011110: q = 4'b0010; + 10'b111_1011111: q = 4'b0010; + 10'b111_1100000: q = 4'b0010; + 10'b111_1100001: q = 4'b0010; + 10'b111_1100010: q = 4'b0010; + 10'b111_1100011: q = 4'b0010; + 10'b111_1100100: q = 4'b0010; + 10'b111_1100101: q = 4'b0010; + 10'b111_1100110: q = 4'b0010; + 10'b111_1100111: q = 4'b0010; + 10'b111_1101000: q = 4'b0010; + 10'b111_1101001: q = 4'b0010; + 10'b111_1101010: q = 4'b0010; + 10'b111_1101011: q = 4'b0010; + 10'b111_1101100: q = 4'b0010; + 10'b111_1101101: q = 4'b0010; + 10'b111_1101110: q = 4'b0010; + 10'b111_1101111: q = 4'b0010; + 10'b111_1110000: q = 4'b0000; + 10'b111_1110001: q = 4'b0000; + 10'b111_1110010: q = 4'b0000; + 10'b111_1110011: q = 4'b0000; + 10'b111_1110100: q = 4'b0000; + 10'b111_1110101: q = 4'b0000; + 10'b111_1110110: q = 4'b0000; + 10'b111_1110111: q = 4'b0000; + 10'b111_1111000: q = 4'b0000; + 10'b111_1111001: q = 4'b0000; + 10'b111_1111010: q = 4'b0000; + 10'b111_1111011: q = 4'b0000; + 10'b111_1111100: q = 4'b0000; + 10'b111_1111101: q = 4'b0000; + 10'b111_1111110: q = 4'b0000; + 10'b111_1111111: q = 4'b0000; + endcase From 79bf543ba951d31ff6db64e5052348ec24a1229c Mon Sep 17 00:00:00 2001 From: James Stine Date: Thu, 23 Jun 2022 11:59:05 -0500 Subject: [PATCH 11/26] Update --- pipelined/srt/qslc_sqrt_r4a2 | Bin 16152 -> 16152 bytes pipelined/srt/qslc_sqrt_r4a2.c | 2 +- pipelined/srt/qslc_sqrt_r4a2.sv | 2048 +++++++++++++++---------------- 3 files changed, 1025 insertions(+), 1025 deletions(-) diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2 index 047de1ba3bf6e4421c0a8c69adbcce1eb2e9b293..5cff70cdf9d63dd415b92ba2ce9092b7da87695f 100755 GIT binary patch delta 40 wcmbPHH=}Mt1+$2Xoo>~Yl(XU7lN# Date: Thu, 23 Jun 2022 13:08:15 -0700 Subject: [PATCH 12/26] Fixed wally-periph, regression is now working --- .../references/WALLY-periph.reference_output | 34 +++++++++---------- .../rv64i_m/privilege/src/WALLY-periph.S | 7 ++-- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output index 7b23883c..fd88590e 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output @@ -254,12 +254,12 @@ FFFFEE00 FFFFEE00 00000000 00000000 -02BEEF10 +02BEEF10 # Something here is failing 0000000B 80000000 00000003 000000FF -FFFFFFFF +00000000 000000FF 00000000 00000000 @@ -270,20 +270,20 @@ FFFFFFFF FFFFFF00 00000000 00000000 -02BEEF11 +02BEEF11 # this might be wrong 0000000B 80000000 -00000003 -000000CC -CCCCCCCC -00000000 -00000000 -00000033 -00000000 -000000FF -000000CC -FFFFFF33 -FFFFFF33 +00000003 +00000033 # input +00000000 # output +00000000 # rise ip +00000000 # serviced rise ip +000000CC # fall ip +00000000 +000000FF # high ip +00000033 # why is this 0x33? +FFFFFFCC # low ip +FFFFFFCC # serviced low ip 00000000 00000000 03BEEF12 @@ -454,9 +454,9 @@ FFFFFF33 00080000 00080000 00000000 +00000000 # is it this one that's failing? 00000000 -00000000 -00080000 +00080000 # failing 00080000 FFFFFFFF FFF7FFFF @@ -478,7 +478,7 @@ FFFFFFFF FFFFFFFE 00000000 00000000 -04BEEF1E +04BEEF1E # this might also be wrong 00000009 80000000 0000000A diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S index c44d7a68..70587514 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S @@ -271,7 +271,7 @@ main_code: ##### sw t1, 0x04(t0) # raise all output_en sw t1, 0x08(t0) - # raise all input_en + # raise all rise_en sw t1, 0x18(t0) # ========== Execute Test ========== # set MEIE @@ -616,6 +616,9 @@ Intr02BEEF11: sw t1, 0x08(t0) # set initial output state sw x0, 0x0C(t0) + # clear XOR + li t1, 0x00000000 + sw t1, 0x40(t0) # clear all pending interrupts li t1, 0xFFFFFFFF sw t1, 0x1C(t0) @@ -843,7 +846,7 @@ Intr03BEEF1A: sw t1, 0x04(t0) # raise all output_en sw t1, 0x08(t0) - # raise all input_en + # raise all rise_en sw t1, 0x18(t0) # ========== Execute Test ========== # set MEIE and SEIE From 3d794742e9ed55d1031960083a8d2f64bd40f0a2 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 13:22:00 -0700 Subject: [PATCH 13/26] Updating new GPIO tests --- .../references/WALLY-gpio-01.reference_output | 12 +++++--- .../rv32i_m/privilege/src/WALLY-gpio-01.S | 28 +++++++++++++++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output index 278e0aa7..e6fd4d7f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output @@ -1,7 +1,11 @@ +00000000 # test reset to zero 00000000 -00000000 -A5A5A5A5 +A5A5A5A5 # test output pins 5A5AFFFF -00000000 +00000000 # test input enables 5A5A0000 -A55A0000 +A55A0000 # test XOR +# A55A0000 # test interrupt pins +# 5AA5FFFF +# 00000000 +# 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index e4792a78..38bc533b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -88,12 +88,34 @@ test_cases: .4byte input_en, 0xFFFF0000, write32_test # enable a few input pins .4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above. -# =========== Test output enables(?) =========== - -.4byte output_en, 0xFFFFFFFF, write32_test # undo changes made to output enable # =========== Test XOR functionality =========== .4byte out_xor, 0xFF00FF00, write32_test # invert certain pin values .4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working +# =========== End of functioning tests =========== +# # =========== Test Interrupt Pending bits =========== + +# .4byte low_ip, 0xFFFFFFFF, write32_test # clear pending low interrupts +# .4byte high_ip, 0xFFFFFFFF, write32_test # clear pending high interrupts +# .4byte rise_ip, 0xFFFFFFFF, write32_test # clear pending rise interrupts +# .4byte fall_ip, 0xFFFFFFFF, write32_test # clear pending fall interrupts +# .4byte high_ip, 0xA55A0000, read32_test # check pending high interrupts +# .4byte low_ip, 0x5AA5FFFF, read32_test # check pending low interrupts +# .4byte rise_ip, 0x00000000, read32_test # check pending rise interrupts +# .4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts +# .4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts +# .4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output +# .4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) +# .4byte low_ip, 0x5BF50000, read32_test # low interrupt pending should be opposite high for enabled pins +# .4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) +# .4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) + +# # =========== Test Interrupt Enable without interrupts =========== + +# .4byte high_ie, 0x00010000, write32_test # enable high interrupt on bit 16, no pending interrupt +# .4byte high_ip, 0xA5FA0000, read32_test # read to show no interrupt has happened +# .4byte low_ie, 0x00020000, write32_test # enable low interrupt on bit 17, no pending interrupt +# .4byte low_ip, 5BF50000, read32_test # read to show no interrupt has happened + .4byte 0x0, 0x0, terminate_test # terminate tests From 66b148b76e4d38a9864ea05e5db8f4f1feebf77b Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 23 Jun 2022 21:06:11 +0000 Subject: [PATCH 14/26] GPIO tests --- .../references/WALLY-gpio-01.reference_output | 20 +++++-- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 36 +++++++++++ .../rv32i_m/privilege/src/WALLY-gpio-01.S | 60 ++++++++++++------- 3 files changed, 89 insertions(+), 27 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output index e6fd4d7f..73f898ca 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output @@ -5,7 +5,19 @@ A5A5A5A5 # test output pins 00000000 # test input enables 5A5A0000 A55A0000 # test XOR -# A55A0000 # test interrupt pins -# 5AA5FFFF -# 00000000 -# 00000000 +A55A0000 # Test interrupt pending bits: high_ip +5AA5FFFF # low_ip +00000000 # rise_ip +00000000 # fall_ip +A4AA0000 # input_val +A5FA0000 # high_ip +5BF50000 # low_ip +00A00000 # rise_ip +01500000 # fall_ip +00000000 # MEIP +00000000 # Test interrupts can be enabled without being triggered: MIP = 0 +00000000 # MIP = 0 +00000000 # MIP = 0 +00000000 # MIP = 0 +00000800 # Test interrupts can be enabled and triggered: MEIP set +00000000 # MEIP = 0 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index a72ae385..0caad5d0 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -827,6 +827,28 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a addi a6, a6, 4 .endm +// Place this macro in peripheral tests to setup all the PLIC registers to generate external interrupts +.macro SETUP_PLIC + # Setup PLIC with a series of register writes + + .equ PLIC_INTPRI_GPIO, 0x0C00000C # GPIO is interrupt 3 + .equ PLIC_INTPRI_UART, 0x0C000028 # UART is interrupt 10 + .equ PLIC_INTPENDING0, 0x0C001000 # intPending0 register + .equ PLIC_INTEN00, 0x0C002000 # interrupt enables for context 0 (machine mode) sources 31:1 + .equ PLIC_INTEN10, 0x0C002080 # interrupt enables for context 1 (supervisor mode) sources 31:1 + .equ PLIC_THRESH0, 0x0C200000 # Priority threshold for context 0 (machine mode) + .equ PLIC_CLAIM0, 0x0C200004 # Claim/Complete register for context 0 + .equ PLIC_THRESH1, 0x0C201000 # Priority threshold for context 1 (supervisor mode) + .equ PLIC_CLAIM1, 0x0C201004 # Claim/Complete register for context 1 + + .4byte PLIC_THRESH0, 0, write32_test # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts + .4byte PLIC_THRESH1, 7, write32_test # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts + .4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority + .4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority + .4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode + .4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode +.endm + .macro END_TESTS // invokes one final ecall to return to machine mode then terminates this program, so the output is // 0x8: termination called from U mode @@ -937,6 +959,20 @@ read08_test: addi a6, a6, 4 j test_loop // go to next test case +readmip_test: // read the MIP into the signature + csrr t2, mip + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + +readsip_test: // read the MIP into the signature + csrr t2, sip + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + goto_s_mode: // return to address in t3, li a0, 3 // Trap handler behavior (go to supervisor mode) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index 38bc533b..b8a751c5 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -72,6 +72,7 @@ test_cases: .4byte input_val, 0x00000000, read32_test # input_val reset to zero .4byte input_en, 0x00000000, read32_test # input_en reset to zero +# *** add more # =========== Test output and input pins =========== @@ -86,36 +87,49 @@ test_cases: .4byte input_en, 0x00000000, write32_test # disable all input pins .4byte input_val, 0x00000000, read32_test # read 0 since input pins are disabled .4byte input_en, 0xFFFF0000, write32_test # enable a few input pins -.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above. +.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above. # =========== Test XOR functionality =========== .4byte out_xor, 0xFF00FF00, write32_test # invert certain pin values -.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working +.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working -# =========== End of functioning tests =========== -# # =========== Test Interrupt Pending bits =========== +# =========== Test Interrupt Pending bits =========== -# .4byte low_ip, 0xFFFFFFFF, write32_test # clear pending low interrupts -# .4byte high_ip, 0xFFFFFFFF, write32_test # clear pending high interrupts -# .4byte rise_ip, 0xFFFFFFFF, write32_test # clear pending rise interrupts -# .4byte fall_ip, 0xFFFFFFFF, write32_test # clear pending fall interrupts -# .4byte high_ip, 0xA55A0000, read32_test # check pending high interrupts -# .4byte low_ip, 0x5AA5FFFF, read32_test # check pending low interrupts -# .4byte rise_ip, 0x00000000, read32_test # check pending rise interrupts -# .4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts -# .4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts -# .4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output -# .4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) -# .4byte low_ip, 0x5BF50000, read32_test # low interrupt pending should be opposite high for enabled pins -# .4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) -# .4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) +SETUP_PLIC -# # =========== Test Interrupt Enable without interrupts =========== +.4byte low_ip, 0xFFFFFFFF, write32_test # clear pending low interrupts +.4byte high_ip, 0xFFFFFFFF, write32_test # clear pending high interrupts +.4byte rise_ip, 0xFFFFFFFF, write32_test # clear pending rise interrupts +.4byte fall_ip, 0xFFFFFFFF, write32_test # clear pending fall interrupts +.4byte high_ip, 0xA55A0000, read32_test # check pending high interrupts +.4byte low_ip, 0x5AA5FFFF, read32_test # check pending low interrupts +.4byte rise_ip, 0x00000000, read32_test # check pending rise interrupts +.4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts +.4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts +.4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output +.4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) +.4byte low_ip, 0x5BF50000, read32_test # low interrupt pending should be opposite high for enabled pins +.4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) +.4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) +.4byte 0x0, 0x00000000, readmip_test # Check no external interrupt has been generated -# .4byte high_ie, 0x00010000, write32_test # enable high interrupt on bit 16, no pending interrupt -# .4byte high_ip, 0xA5FA0000, read32_test # read to show no interrupt has happened -# .4byte low_ie, 0x00020000, write32_test # enable low interrupt on bit 17, no pending interrupt -# .4byte low_ip, 5BF50000, read32_test # read to show no interrupt has happened +# =========== Test interrupts can be enabled without being triggered =========== + +.4byte high_ie, 0x00010000, write32_test # enable high interrupt on bit 16, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending +.4byte low_ie, 0x00020000, write32_test # enable low interrupt on bit 17, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending +.4byte rise_ie, 0x00010000, write32_test # enable rise interrupt on bit 16, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending +.4byte fall_ie, 0x00010000, write32_test # enable fall interrupt on bit 16, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending + +# =========== Test interrupts can be enabled and triggered + +.4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending +.4byte 0x0, 0x00000800, readmip_test # MEIP should be raised +.4byte low_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending +.4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte 0x0, 0x0, terminate_test # terminate tests From 4c8f5fbd8939569ff25c2a621a03a319c77fd602 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 14:12:28 -0700 Subject: [PATCH 15/26] Fixed error in GPIO signature --- .../privilege/references/WALLY-gpio-01.reference_output | 2 +- .../riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output index 73f898ca..3cbf56ae 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output @@ -11,7 +11,7 @@ A55A0000 # Test interrupt pending bits: high_ip 00000000 # fall_ip A4AA0000 # input_val A5FA0000 # high_ip -5BF50000 # low_ip +5BF5FFFF # low_ip 00A00000 # rise_ip 01500000 # fall_ip 00000000 # MEIP diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index b8a751c5..be40c0e2 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -109,7 +109,7 @@ SETUP_PLIC .4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts .4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output .4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) -.4byte low_ip, 0x5BF50000, read32_test # low interrupt pending should be opposite high for enabled pins +.4byte low_ip, 0x5BF5FFFF, read32_test # low interrupt pending should be opposite high for enabled pins .4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) .4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) .4byte 0x0, 0x00000000, readmip_test # Check no external interrupt has been generated @@ -129,7 +129,7 @@ SETUP_PLIC .4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised -.4byte low_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending +.4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte 0x0, 0x0, terminate_test # terminate tests From ceddc99ac94cfc9c7bd536a8e41ffd93da21980b Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 23 Jun 2022 21:20:55 +0000 Subject: [PATCH 16/26] Reset mtimecmp in clint --- pipelined/src/uncore/clint.sv | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pipelined/src/uncore/clint.sv b/pipelined/src/uncore/clint.sv index 47acfddc..3f6210ff 100644 --- a/pipelined/src/uncore/clint.sv +++ b/pipelined/src/uncore/clint.sv @@ -60,7 +60,7 @@ module clint ( flopr #(16) entrydflop(HCLK, ~HRESETn, entry, entryd); assign HRESPCLINT = 0; // OK - assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during accesses + assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during asynchronous MTIME accesses // word aligned reads if (`XLEN==64) assign #2 entry = {HADDR[15:3], 3'b000}; @@ -87,8 +87,7 @@ module clint ( always_ff @(posedge HCLK or negedge HRESETn) if (~HRESETn) begin MSIP <= 0; - MTIMECMP <= 0; - // MTIMECMP is not reset + MTIMECMP <= 0xFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts end else if (memwrite) begin if (entryd == 16'h0000) MSIP <= HWDATA[0]; if (entryd == 16'h4000) begin @@ -104,7 +103,6 @@ module clint ( always_ff @(posedge HCLK or negedge HRESETn) if (~HRESETn) begin MTIME <= 0; - // MTIMECMP is not reset end else if (memwrite & entryd == 16'hBFF8) begin // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed for(j=0;j<`XLEN/8;j++) From 2c4b86c7033c1c268cf4f289ec4e0c9ef553d07f Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 23 Jun 2022 21:27:46 +0000 Subject: [PATCH 17/26] Fixed typo in clint --- pipelined/src/uncore/clint.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/src/uncore/clint.sv b/pipelined/src/uncore/clint.sv index 3f6210ff..4781360e 100644 --- a/pipelined/src/uncore/clint.sv +++ b/pipelined/src/uncore/clint.sv @@ -87,7 +87,7 @@ module clint ( always_ff @(posedge HCLK or negedge HRESETn) if (~HRESETn) begin MSIP <= 0; - MTIMECMP <= 0xFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts + MTIMECMP <= 64'hFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts end else if (memwrite) begin if (entryd == 16'h0000) MSIP <= HWDATA[0]; if (entryd == 16'h4000) begin From a77fb485dbdc1e7afe72fbf88d5e6daa95c68f55 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 14:37:18 -0700 Subject: [PATCH 18/26] Added wally32periph to regression --- pipelined/regression/regression-wally | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index 664f9964..07058241 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -71,7 +71,7 @@ for test in tests64gc: grepstr="All tests ran without failures") configs.append(tc) -tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv"] #, "imperas32mmu""wally32i", +tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i", for test in tests32gc: tc = TestCase( name=test, From 5133b08161834738cb555762d6325fd8785400f0 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 21:38:04 +0000 Subject: [PATCH 19/26] generate qsel4 in verilog --- pipelined/regression/wave-fpu.do | 3 ++ pipelined/srt/srt-radix4.sv | 52 ++++++++++++++++++++++++++++- pipelined/testbench/testbench-fp.sv | 6 +++- 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 906eb256..60835ef6 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -22,3 +22,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/* +add wave -group {Testbench} -noupdate /testbenchfp/* +add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index 6894a0f9..52bd4c20 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -164,7 +164,57 @@ module qsel4 ( // Wmsbs = | | logic [3:0] QSel4[1023:0]; - initial $readmemh("../srt/qsel4.dat", QSel4); + + initial begin + integer d, w, i, w2; + for(d=0; d<8; d++) + for(w=0; w<128; w++)begin + i = d*128+w; + w2 = w-128*(w>=64); // convert to two's complement + case(d) + 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-4) QSel4[i] = 4'b0000; + else if(w2>=-13) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 1: if(w2>=14) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-15) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 2: if(w2>=15) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-16) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 3: if(w2>=16) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-18) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 4: if(w2>=18) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 5: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 6: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-22) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 7: if(w2>=24) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-24) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + endcase + end + end assign q = QSel4[{Dmsbs,Wmsbs}]; endmodule diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 70787b3c..7a551490 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -899,7 +899,7 @@ module readvectors ( // apply test vectors on rising edge of clk // Format of vectors Inputs(1/2/3)_AnsFlg - always @(TestNum) begin + always @(VectorNum) begin #1; AnsFlg = TestVector[4:0]; DivStart = 1'b0; @@ -971,6 +971,7 @@ module readvectors ( X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; Ans = TestVector[8+(`Q_LEN-1):8]; + if (~clk) #5; DivStart = 1'b1; #10 // one clk cycle DivStart = 1'b0; end @@ -978,6 +979,7 @@ module readvectors ( X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + if (~clk) #5; DivStart = 1'b1; #10 DivStart = 1'b0; end @@ -985,6 +987,7 @@ module readvectors ( X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + if (~clk) #5; DivStart = 1'b1; #10 DivStart = 1'b0; end @@ -992,6 +995,7 @@ module readvectors ( X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + if (~clk) #5; DivStart = 1'b1; #10 DivStart = 1'b0; end From ded2631567773884c2339699225e28344b2b79eb Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 14:39:53 -0700 Subject: [PATCH 20/26] Removed big64.txt reference, fixing a warning --- pipelined/src/generic/flop/bram1p1rw.sv | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pipelined/src/generic/flop/bram1p1rw.sv b/pipelined/src/generic/flop/bram1p1rw.sv index d0d3c40a..51fe5421 100644 --- a/pipelined/src/generic/flop/bram1p1rw.sv +++ b/pipelined/src/generic/flop/bram1p1rw.sv @@ -54,10 +54,6 @@ module bram1p1rw logic [DATA_WIDTH-1:0] RAM [(2**ADDR_WIDTH)-1:0]; integer i; - initial begin - $readmemh("big64.txt", RAM); - end - always @ (posedge clk) begin dout <= RAM[addr]; if(we) begin From b54d84195f868103408fafc88ea9b5b3b083b246 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 22:36:19 +0000 Subject: [PATCH 21/26] added radix-4 0/d handling --- pipelined/config/rv64fp/wally-config.vh | 2 +- pipelined/src/fpu/postprocess.sv | 10 ++-- pipelined/src/fpu/round.sv | 1 + pipelined/srt/srt-radix4.sv | 68 ++++++++++++++----------- pipelined/testbench/testbench-fp.sv | 6 +-- 5 files changed, 48 insertions(+), 39 deletions(-) diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index bcc79133..68b3b84c 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,7 +32,7 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 +`define XLEN 32 // IEEE 754 compliance `define IEEE754 0 diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 4b2870da..9138f9df 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -59,7 +59,7 @@ module postprocess( input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) input logic IntZeroM, // is the input zero input logic [1:0] PostProcSelM, // select result to be written to fp register - input logic [`DIVLEN-1:0] Quot, + input logic [`DIVLEN+2:0] Quot, output logic [`FLEN-1:0] PostProcResM, // FMA final result output logic [4:0] PostProcFlgM, output logic [`XLEN-1:0] FCvtIntResM // the int conversion result @@ -84,6 +84,7 @@ module postprocess( logic PreResultDenorm; // is the result denormalized - calculated before LZA corection logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count + logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic Plus1; // add one to the final result? @@ -137,6 +138,7 @@ module postprocess( .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp, .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + divshiftcalc divshiftcalc(.Quot, .DivShiftAmt); always_comb case(PostProcSelM) @@ -149,8 +151,8 @@ module postprocess( ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end 2'b01: begin //div ***prob can take out - ShiftAmt = {$clog2(`NORMSHIFTSZ){1'b0}};//{DivShiftAmt}; - ShiftIn = {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}}; + ShiftAmt = DivShiftAmt; + ShiftIn = {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}}; end default: begin ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; @@ -175,7 +177,7 @@ module postprocess( round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .DivCalcExpM, .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf, - .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); + .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); /////////////////////////////////////////////////////////////////////////////// // Sign calculation diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 8e3b9fe4..1fd471e9 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -11,6 +11,7 @@ module round( input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single input logic [2:0] FrmM, // rounding mode input logic FmaOp, + input logic DivOp, input logic [1:0] PostProcSelM, input logic CvtResDenormUfM, input logic ToInt, diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index 52bd4c20..8fd8d541 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -34,14 +34,15 @@ module srtradix4 ( input logic clk, input logic DivStart, input logic [`NE-1:0] XExpE, YExpE, - input logic [`NF-1:0] XFrac, YFrac, + input logic [`NF:0] XManE, YManE, input logic [`XLEN-1:0] SrcA, SrcB, + input logic XZeroE, input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic DivDone, - output logic [`DIVLEN-1:0] Quot, + output logic [`DIVLEN+2:0] Quot, output logic [`XLEN-1:0] Rem, // *** later handle integers output logic [`NE:0] DivCalcExpE ); @@ -49,14 +50,15 @@ module srtradix4 ( // logic qp, qz, qm; // quotient is +1, 0, or -1 logic [3:0] q; logic [`NE:0] DivCalcExp; - logic [`DIVLEN-1:0] X, Dpreproc; + logic [`DIVLEN:0] X; + logic [`DIVLEN-1:0] Dpreproc; logic [`DIVLEN+3:0] WS, WSA, WSN; logic [`DIVLEN+3:0] WC, WCA, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; - srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); + srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -68,7 +70,7 @@ module srtradix4 ( // - otherwise load WSA into the flipflop // *** what does N and A stand for? // *** change shift amount for radix4 - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, DivStart, WSN); + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN); flop #(`DIVLEN+4) wsflop(clk, WSN, WS); mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC); @@ -110,9 +112,9 @@ module srtradix4 ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); //*** change for radix 4 - otfc4 otfc4(clk, DivStart, q, Quot); + otfc4 otfc4(.clk, .DivStart, .q, .Quot); - expcalc expcalc(.XExpE, .YExpE, .DivCalcExp); + expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp); divcounter divcounter(clk, DivStart, DivDone); @@ -224,39 +226,42 @@ endmodule /////////////////// module srtpreproc ( input logic [`XLEN-1:0] SrcA, SrcB, - input logic [`NF-1:0] XFrac, YFrac, + input logic [`NF:0] XManE, YManE, input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide - output logic [`DIVLEN-1:0] X, D, + output logic [`DIVLEN:0] X, + output logic [`DIVLEN-1:0] Dpreproc, output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent output logic intSign // Quotient integer sign ); - logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; - logic [`XLEN-1:0] PosA, PosB; - logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; + // logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; + // logic [`XLEN-1:0] PosA, PosB; + // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; + logic [`DIVLEN:0] PreprocA, PreprocX; + logic [`DIVLEN-1:0] PreprocB, PreprocY; - assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; - assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; + // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; + // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; - lzc #(`XLEN) lzcA (PosA, zeroCntA); - lzc #(`XLEN) lzcB (PosB, zeroCntB); + // lzc #(`XLEN) lzcA (PosA, zeroCntA); + // lzc #(`XLEN) lzcB (PosB, zeroCntB); - assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; - assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; + // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; + // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; - assign PreprocA = ExtraA << zeroCntA; - assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}}; - assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}}; + // assign PreprocA = ExtraA << zeroCntA; + // assign PreprocB = ExtraB << (zeroCntB + 1); + assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}}; + assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}}; assign X = Int ? PreprocA : PreprocX; - assign D = Int ? PreprocB : PreprocY; - assign intExp = zeroCntB - zeroCntA + 1; - assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); + assign Dpreproc = Int ? PreprocB : PreprocY; + // assign intExp = zeroCntB - zeroCntA + 1; + // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); endmodule /////////////////////////////////// @@ -266,7 +271,7 @@ module otfc4 ( input logic clk, input logic DivStart, input logic [3:0] q, - output logic [`DIVLEN-1:0] Quot + output logic [`DIVLEN+2:0] Quot ); // The on-the-fly converter transfers the quotient @@ -278,7 +283,7 @@ module otfc4 ( // // QM is Q-1. It allows us to write negative bits // without using a costly CPA. - logic [`DIVLEN+2:0] Q, QM, QNext, QMNext, QMux, QMMux; + logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux; // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. @@ -286,7 +291,7 @@ module otfc4 ( // if starting a new divison set Q to 0 and QM to -1 mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux); mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux); - flop #(`DIVLEN+3) Qreg(clk, QMux, Q); + flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); // shift Q (quotent) and QM (quotent-1) @@ -298,7 +303,7 @@ module otfc4 ( // *** how does the 0 concatination numbers work? always_comb begin - QR = Q[`DIVLEN:0]; + QR = Quot[`DIVLEN:0]; QMR = QM[`DIVLEN:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; @@ -318,7 +323,7 @@ module otfc4 ( end end // Quot is in the range [.5, 2) so normalize the result if nesissary - assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1]; + // assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1]; endmodule @@ -352,9 +357,10 @@ endmodule ////////////// module expcalc( input logic [`NE-1:0] XExpE, YExpE, + input logic XZeroE, output logic [`NE:0] DivCalcExp ); - assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS); + assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}}; endmodule diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 7a551490..e8afb299 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -53,6 +53,7 @@ module testbenchfp; logic CvtResSgnE; logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by + logic [`DIVLEN+2:0] Quot; logic CvtResDenormUfE; logic DivStart, DivDone; @@ -69,7 +70,6 @@ module testbenchfp; logic ZSgnEffE; logic PSgnE; logic DivSgn; - logic [`DIVLEN-1:0] Quot; logic [`NE:0] DivCalcExp; @@ -659,8 +659,8 @@ module testbenchfp; fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), - .XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), + srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), + .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .DivDone, .Quot, .Rem()); assign CmpFlg[3:0] = 0; From d17596353b744db0b2025051d4ef5517b1f7491f Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 22:37:44 +0000 Subject: [PATCH 22/26] lint warning fix --- pipelined/src/fpu/fpu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index da46d73e..ff83079a 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -123,7 +123,7 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`DIVLEN-1:0] Quot; + logic [`DIVLEN+2:0] Quot; logic [`NE:0] DivCalcExpM; // result and flag signals From 97ded2cdd96e8e09083a6995cf29cc72dad02b4c Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 22:59:43 +0000 Subject: [PATCH 23/26] div debug - accounted for 1 bit normalization in exponent calculation --- pipelined/regression/wave-fpu.do | 1 + pipelined/src/fpu/postprocess.sv | 5 +++-- pipelined/src/fpu/round.sv | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 60835ef6..a58400cc 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -23,5 +23,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 9138f9df..d970fdbc 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -94,6 +94,7 @@ module postprocess( logic IntToFp; // is the opperation an int->fp conversion? logic ToInt; // is the opperation an fp->int conversion? logic [`NE+1:0] RoundExp; + logic [`NE:0] CorrDivExp; logic [1:0] NegResMSBS; logic CvtOp; logic FmaOp; @@ -138,7 +139,7 @@ module postprocess( .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp, .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Quot, .DivShiftAmt); + divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt); always_comb case(PostProcSelM) @@ -175,7 +176,7 @@ module postprocess( // round to infinity // round to nearest max magnitude - round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .DivCalcExpM, + round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp, .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf, .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 1fd471e9..73395cae 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -24,7 +24,7 @@ module round( input logic [`NE+1:0] SumExp, // exponent of the normalized sum input logic RoundSgn, // the result's sign input logic [`NE:0] CvtCalcExpM, // the calculated expoent - input logic [`NE:0] DivCalcExpM, // the calculated expoent + input logic [`NE:0] CorrDivExp, // the calculated expoent output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow output logic [`NF-1:0] ResFrac, // Result fraction @@ -305,7 +305,7 @@ module round( case(PostProcSelM) 2'b10: RoundExp = SumExp; // fma 2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt - 2'b01: RoundExp = {DivCalcExpM[`NE], DivCalcExpM[`NE:0]}; // divide + 2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide default: RoundExp = 0; endcase From 86cdbd90e67176690fb39dcbc2bc1b8b08ad494f Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 23:01:30 +0000 Subject: [PATCH 24/26] forgot a file --- pipelined/src/fpu/divshiftcalc.sv | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 pipelined/src/fpu/divshiftcalc.sv diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv new file mode 100644 index 00000000..57022e5a --- /dev/null +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -0,0 +1,15 @@ +`include "wally-config.vh" + +module divshiftcalc( + input logic [`DIVLEN+2:0] Quot, + input logic [`NE:0] DivCalcExpM, + output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, + output logic [`NE:0] CorrDivExp +); + + assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]}; + // the quotent is in the range [.5,2) + // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift + assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]}; + +endmodule From dc5f80a3ca7c35e1a772c9c833571ce25c278e32 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 23 Jun 2022 23:16:43 +0000 Subject: [PATCH 25/26] Default value of Drive in Makefile --- synthDC/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/synthDC/Makefile b/synthDC/Makefile index 193153ca..3de66665 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -15,6 +15,7 @@ export MAXCORES ?= 4 # MAXOPT turns on flattening, boundary optimization, and retiming # The output netlist is hard to interpret, but significantly better PPA export MAXOPT ?= 0 +export DRIVE ?= FLOP time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) From 7c019ea074dfc0ca2c43b31e35b77b927fba3602 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 16:50:27 -0700 Subject: [PATCH 26/26] Removed references to initialization files --- pipelined/config/buildroot/wally-config.vh | 2 -- pipelined/config/rv32e/wally-config.vh | 2 -- pipelined/config/rv32gc/wally-config.vh | 2 -- pipelined/config/rv32i/wally-config.vh | 2 -- pipelined/config/rv32ic/wally-config.vh | 2 -- pipelined/config/rv64BP/wally-config.vh | 2 -- pipelined/config/rv64fp/wally-config.vh | 2 -- pipelined/config/rv64fpquad/wally-config.vh | 2 -- pipelined/config/rv64gc/wally-config.vh | 2 -- pipelined/config/rv64i/wally-config.vh | 2 -- pipelined/config/rv64ic/wally-config.vh | 2 -- 11 files changed, 22 deletions(-) diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh index dc6c9bb0..f11b71c0 100644 --- a/pipelined/config/buildroot/wally-config.vh +++ b/pipelined/config/buildroot/wally-config.vh @@ -124,8 +124,6 @@ `define PLIC_NUM_SRC 53 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/buildroot/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/buildroot/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh index 15b2e08e..7d083f3b 100644 --- a/pipelined/config/rv32e/wally-config.vh +++ b/pipelined/config/rv32e/wally-config.vh @@ -130,8 +130,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt" `define BPRED_ENABLED 0 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh index 3522fd1e..70124d55 100644 --- a/pipelined/config/rv32gc/wally-config.vh +++ b/pipelined/config/rv32gc/wally-config.vh @@ -128,8 +128,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32i/wally-config.vh b/pipelined/config/rv32i/wally-config.vh index 80d167a3..d44072d6 100644 --- a/pipelined/config/rv32i/wally-config.vh +++ b/pipelined/config/rv32i/wally-config.vh @@ -130,8 +130,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32i/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32i/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index 13b2eb74..e42fd310 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -128,8 +128,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh index 82f8446b..3bc745eb 100644 --- a/pipelined/config/rv64BP/wally-config.vh +++ b/pipelined/config/rv64BP/wally-config.vh @@ -130,8 +130,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt" `define BPRED_ENABLED 1 //`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE `define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index 68b3b84c..cc8d1b2b 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -132,8 +132,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/shared/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64fpquad/wally-config.vh b/pipelined/config/rv64fpquad/wally-config.vh index 08e8006c..0dee000e 100644 --- a/pipelined/config/rv64fpquad/wally-config.vh +++ b/pipelined/config/rv64fpquad/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/shared/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index 042364ac..9afa1a67 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/shared/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64i/wally-config.vh b/pipelined/config/rv64i/wally-config.vh index 402c3b36..67ca51a7 100644 --- a/pipelined/config/rv64i/wally-config.vh +++ b/pipelined/config/rv64i/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv64i/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64i/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh index 49175935..fca1f260 100644 --- a/pipelined/config/rv64ic/wally-config.vh +++ b/pipelined/config/rv64ic/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0