From cd53ae67d9a2ba5f5ec8ace9cacaf3de56e3ccb2 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 8 Jul 2022 23:56:57 +0000 Subject: [PATCH 01/36] moved fpu ieu write data mux to lsu --- pipelined/regression/sim-wally-batch | 2 +- pipelined/src/cache/cache.sv | 14 ++++---------- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/lsu.sv | 17 +++++++++++------ pipelined/testbench/testbench.sv | 1 + pipelined/testbench/tests.vh | 2 +- tests/riscof/Makefile | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch index 91f11697..7e821e58 100755 --- a/pipelined/regression/sim-wally-batch +++ b/pipelined/regression/sim-wally-batch @@ -1 +1 @@ -vsim -c -do "do wally-pipelined-batch.do rv64gc imperas64f" +vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d" diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index f6aad78e..b80df13a 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -42,10 +42,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(`XLEN-1)/8:0] ByteMask, - input logic [`XLEN-1:0] FinalWriteData, - input logic [`FLEN-1:0] FWriteDataM, + input logic [WORDLEN-1:0] FinalWriteData, input logic FLoad2, - input logic FpLoadStoreM, output logic CacheCommitted, output logic CacheStall, // to performance counters to cpu @@ -72,7 +70,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER localparam SETLEN = $clog2(NUMLINES); localparam SETTOP = SETLEN+OFFSETLEN; localparam TAGLEN = `PA_BITS - SETTOP; - localparam WORDSPERLINE = LINELEN/`XLEN; + localparam WORDSPERLINE = LINELEN/WORDLEN; localparam FlushAdrThreshold = NUMLINES - 1; logic SelAdr; @@ -162,12 +160,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write data and address. Muxes between writes from bus and writes from CPU. ///////////////////////////////////////////////////////////////////////////////////////////// - if (`LLEN>`XLEN) - mux3 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1({WORDSPERLINE/2{FWriteDataM}}), .d2(CacheBusWriteData), .s({SetValid,FpLoadStoreM&~SetValid}), .y(CacheWriteData)); - else - mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); + mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), + .d1(CacheBusWriteData), .s(SetValid), .y(CacheWriteData)); mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), .d2({VictimTag, FlushAdr, {OFFSETLEN{1'b0}}}), diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 0c43c736..c96396ab 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -226,7 +226,7 @@ module ifu ( icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), - .CacheFetchLine(ICacheFetchLine), .FWriteDataM(), .FpLoadStoreM(), .FLoad2(), + .CacheFetchLine(ICacheFetchLine), .FLoad2(), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .Cacheable(CacheableF), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 4b200f70..a63f813e 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -192,7 +192,8 @@ module lsu ( // Memory System // Either Data Cache or Data Tightly Integrated Memory or just bus interface ///////////////////////////////////////////////////////////////////////////////////////////// - logic [`XLEN-1:0] AMOWriteDataM, FinalWriteDataM, LittleEndianWriteDataM; + logic [`XLEN-1:0] AMOWriteDataM, IEUWriteDataM, LittleEndianWriteDataM; + logic [`LLEN-1:0] FinalWriteDataM; logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; logic [`LLEN-1:0] ReadDataWordMuxM; logic IgnoreRequest; @@ -202,7 +203,7 @@ module lsu ( if (`DMEM == `MEM_TIM) begin : dtim // *** directly instantiate RAM or ROM here. Instantiate SRAM1P1RW. // Merge SimpleRAM and SRAM1p1rw into one that is good for synthesis and RAM libraries and flops - dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM, + dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM(IEUWriteDataM), //*** fix the dtim FinalWriteData .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM, .DCacheStallM, .DCacheCommittedM, .ByteMaskM, .Cacheable(CacheableM), .DCacheMiss, .DCacheAccess); @@ -230,15 +231,19 @@ module lsu ( mux2 #(`LLEN) UnCachedDataMux(.d0(LittleEndianReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, DCacheBusWriteData[`XLEN-1:0]}), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); - mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(FinalWriteDataM), + mux2 #(`XLEN) LsuBushwdataMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(IEUWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); if(CACHE_ENABLED) begin : dcache + if (`LLEN>`FLEN) + mux2 #(`LLEN) datamux({(`LLEN-`XLEN)'(0), IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); + else + assign FinalWriteDataM[`XLEN-1:0] = IEUWriteDataM; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(ByteMaskM), .WordCount, .FpLoadStoreM, .FWriteDataM, .FLoad2, + .ByteMask(ByteMaskM), .WordCount, .FLoad2, .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), @@ -286,10 +291,10 @@ module lsu ( // swap the bytes when read from big-endian memory ///////////////////////////////////////////////////////////////////////////////////////////// if (`BIGENDIAN_SUPPORTED) begin:endian - bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(FinalWriteDataM)); + bigendianswap #(`XLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(IEUWriteDataM)); bigendianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordM), .y(LittleEndianReadDataWordM)); end else begin - assign FinalWriteDataM = LittleEndianWriteDataM; + assign IEUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordM; end diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index c248a750..40ea9a58 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -114,6 +114,7 @@ logic [3:0] dummy; "arch32f": if (`F_SUPPORTED) tests = arch32f; "imperas32i": tests = imperas32i; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; + "wally32d": if (`D_SUPPORTED) tests = wally32d; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; "wally32a": if (`A_SUPPORTED) tests = wally32a; "imperas32c": if (`C_SUPPORTED) tests = imperas32c; diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 0d57dbdd..c6ebf08c 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -34,7 +34,7 @@ string tvpaths[] = '{ "../../addins/imperas-riscv-tests/work/", "../../tests/riscof/work/riscv-arch-test/", - "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", + "../../tests/riscof/work/wally-riscv-arch-test/", //"../../tests/wally-riscv-arch-test/work/", // "../../tests/imperas-riscv-tests/work/", "../../benchmarks/coremark/work/", "../../addins/embench-iot/" diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index 830b9eef..621a5b54 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) XLEN ?= 64 -all: root build_arch # build_wally memfile +all: root build_arch build_wally memfile root: mkdir -p $(work_dir) From ca4fe08fd9bbb8b9b38862c31ee8583f02a5c873 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Sat, 9 Jul 2022 00:26:45 +0000 Subject: [PATCH 02/36] renamed FLoad2 to FStore2 --- pipelined/src/cache/cache.sv | 4 ++-- pipelined/src/cache/cacheway.sv | 4 ++-- pipelined/src/fpu/fpu.sv | 6 +++--- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/lsu.sv | 8 ++++---- pipelined/src/wally/wallypipelinedcore.sv | 6 +++--- pipelined/testbench/tests.vh | 2 +- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index b80df13a..ca6a5c9c 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -43,7 +43,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER input logic [`PA_BITS-1:0] PAdr, // physical address input logic [(`XLEN-1)/8:0] ByteMask, input logic [WORDLEN-1:0] FinalWriteData, - input logic FLoad2, + input logic FStore2, output logic CacheCommitted, output logic CacheStall, // to performance counters to cpu @@ -121,7 +121,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGWPL, WORDLEN, MUXINTER // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) - CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FLoad2, + CacheWays[NUMWAYS-1:0](.clk, .reset, .RAdr, .PAdr, .CacheWriteData, .ByteMask, .FStore2, .SetValidWay, .ClearValidWay, .SetDirtyWay, .ClearDirtyWay, .SelEvict, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .VictimDirtyWay, .VictimTagWay, .Invalidate(InvalidateCacheM)); diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index ac1e26e8..d1c85675 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -38,7 +38,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [`PA_BITS-1:0] PAdr, input logic [LINELEN-1:0] CacheWriteData, - input logic FLoad2, + input logic FStore2, input logic SetValidWay, input logic ClearValidWay, input logic SetDirtyWay, @@ -79,7 +79,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic [2**LOGWPL-1:0] MemPAdrDecodedtmp; onehotdecoder #(LOGWPL) adrdec( .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecodedtmp)); - assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FLoad2}}, 1'b0}; + assign MemPAdrDecoded = MemPAdrDecodedtmp|{MemPAdrDecodedtmp[2**LOGWPL-2:0]&{2**LOGWPL-1{FStore2}}, 1'b0}; end else onehotdecoder #(LOGWPL) adrdec( .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 7cf10901..255feb44 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -42,7 +42,7 @@ module fpu ( input logic [1:0] STATUS_FS, // Is floating-point enabled? output logic FRegWriteM, // FP register write enable output logic FpLoadStoreM, // Fp load instruction? - output logic FLoad2, + output logic FStore2, output logic FStallD, // Stall the decode stage output logic FWriteIntE, // integer register write enables output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory @@ -298,8 +298,8 @@ module fpu ( assign FWriteDataE = FSrcYE[`XLEN-1:0]; end else begin logic [`FLEN-1:0] FWriteDataE; - if(`FMTBITS == 2) assign FLoad2 = FmtM == `FMT; - else assign FLoad2 = FmtM; + if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT; + else assign FStore2 = FmtM; if (`FPSIZES==1) assign FWriteDataE = FSrcYE; else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index c96396ab..5c2f799d 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -226,7 +226,7 @@ module ifu ( icache(.clk, .reset, .CPUBusy, .IgnoreRequestTLB(ITLBMissF), .TrapM(TrapM), .IgnoreRequestTrapM('0), .CacheBusWriteData(ICacheBusWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), - .CacheFetchLine(ICacheFetchLine), .FLoad2(), + .CacheFetchLine(ICacheFetchLine), .FStore2(), .CacheWriteLine(), .ReadDataWord(FinalInstrRawF), .Cacheable(CacheableF), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index a63f813e..50ecdb18 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -58,7 +58,7 @@ module lsu ( input logic sfencevmaM, // fpu input logic [`FLEN-1:0] FWriteDataM, - input logic FLoad2, + input logic FStore2, input logic FpLoadStoreM, // faults output logic LoadPageFaultM, StoreAmoPageFaultM, @@ -236,14 +236,14 @@ module lsu ( if(CACHE_ENABLED) begin : dcache if (`LLEN>`FLEN) - mux2 #(`LLEN) datamux({(`LLEN-`XLEN)'(0), IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); + mux2 #(`LLEN) datamux({{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); else - assign FinalWriteDataM[`XLEN-1:0] = IEUWriteDataM; + assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(`DCACHE_NUMWAYS), .LOGWPL(LOGWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .LSUBusWriteCrit, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .ByteMask(ByteMaskM), .WordCount, .FLoad2, + .ByteMask(ByteMaskM), .WordCount, .FStore2, .FinalWriteData(FinalWriteDataM), .Cacheable(CacheableM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .IgnoreRequestTrapM, .TrapM(1'b0), .CacheCommitted(DCacheCommittedM), diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 7538a541..372f4aba 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -93,7 +93,7 @@ module wallypipelinedcore ( logic FStallD; logic FWriteIntE; logic [`XLEN-1:0] FWriteDataE; - logic FLoad2; + logic FStore2; logic [`FLEN-1:0] FWriteDataM; logic [`XLEN-1:0] FIntResM; logic [`XLEN-1:0] FCvtIntResW; @@ -259,7 +259,7 @@ module wallypipelinedcore ( .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, .FpLoadStoreM, - .FWriteDataM, .FLoad2, + .FWriteDataM, .FStore2, //.DataMisalignedM(DataMisalignedM), .IEUAdrE, .IEUAdrM, .WriteDataE, .ReadDataW, .FlushDCacheM, @@ -400,7 +400,7 @@ module wallypipelinedcore ( .STATUS_FS, // is floating-point enabled? .FRegWriteM, // FP register write enable .FpLoadStoreM, - .FLoad2, + .FStore2, .FStallD, // Stall the decode stage .FWriteIntE, // integer register write enable .FWriteDataE, // Data to be written to memory diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index c6ebf08c..e1b69cd2 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -34,7 +34,7 @@ string tvpaths[] = '{ "../../addins/imperas-riscv-tests/work/", "../../tests/riscof/work/riscv-arch-test/", - "../../tests/riscof/work/wally-riscv-arch-test/", //"../../tests/wally-riscv-arch-test/work/", // + "../../tests/wally-riscv-arch-test/work/", //"../../tests/riscof/work/wally-riscv-arch-test/", // "../../tests/imperas-riscv-tests/work/", "../../benchmarks/coremark/work/", "../../addins/embench-iot/" From b728e5054d2fb581321985668efabfca439b61b6 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 11 Jul 2022 18:30:21 -0700 Subject: [PATCH 03/36] variable interations implemented in radix-4 divider --- pipelined/config/rv64fp/wally-config.vh | 4 +- pipelined/config/shared/wally-shared.vh | 12 +- pipelined/regression/wave-fpu.do | 10 +- pipelined/src/fpu/divshiftcalc.sv | 9 +- pipelined/src/fpu/divsqrt.sv | 10 +- pipelined/src/fpu/flags.sv | 38 ++-- pipelined/src/fpu/fmashiftcalc.sv | 24 +- pipelined/src/fpu/fpu.sv | 10 +- pipelined/src/fpu/lzacorrection.sv | 4 +- pipelined/src/fpu/postprocess.sv | 26 +-- pipelined/src/fpu/resultselect.sv | 290 ------------------------ pipelined/src/fpu/resultsign.sv | 4 +- pipelined/src/fpu/round.sv | 6 +- pipelined/src/fpu/srt-radix4.sv | 140 ++++++++---- pipelined/src/fpu/srtfsm.sv | 10 +- pipelined/src/fpu/srtpreproc.sv | 16 +- pipelined/testbench/testbench-fp.sv | 14 +- 17 files changed, 203 insertions(+), 424 deletions(-) delete mode 100644 pipelined/src/fpu/resultselect.sv diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index b92bc07a..cc8d1b2b 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,14 +32,14 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 +`define XLEN 32 // IEEE 754 compliance `define IEEE754 0 // MISA RISC-V configuration per specification // ZYXWVUTSRQPONMLKJIHGFEDCBA -`define MISA 32'b0000000000101000001000100100101 +`define MISA 32'b0000000000101000001000100101101 `define ZICSR_SUPPORTED 1 `define ZIFENCEI_SUPPORTED 1 `define COUNTERS 32 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 671f7343..c064783c 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -95,12 +95,22 @@ // largest length in IEU/FPU `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) -`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) `define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9)) `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) +// division constants +`define RADIX 4 +`define DIVCOPIES 4 +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) +`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) +`define LOGR ((`RADIX==2) ? 1 : 2) +`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES)) +`define DURLEN ($clog2($rtoi(`FPDUR)+1)) +`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES) + + `define USE_SRAM 0 // Disable spurious Verilator warnings diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 9e7ba49b..58f782bd 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -11,7 +11,7 @@ add wave -noupdate /testbenchfp/DivStart add wave -noupdate /testbenchfp/DivBusy add wave -noupdate /testbenchfp/srtfsm/state add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* @@ -21,8 +21,12 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/* +add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/* +add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/* +add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/* add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index 935ed3c1..a4f3feff 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,9 +1,9 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`DIVLEN+2:0] Quot, + input logic [`QLEN-1:0] Quot, input logic [`FMTBITS-1:0] Fmt, - input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2, + input logic [`DURLEN-1:0] DivEarlyTermShift, input logic [`NE+1:0] DivCalcExp, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, @@ -32,9 +32,10 @@ module divshiftcalc( // inital Left shift amount = NF assign NormShift = (`NE+2)'(`NF); // if the shift amount is negitive then dont shift (keep sticky bit) - assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-$clog2(`DIVLEN/2+3)-1{1'b0}}, DivEarlyTermShiftDiv2&{$clog2(`DIVLEN/2+3){~DivDenormShift[`NE+1]}}, 1'b0}; + // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) + assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)}; // *** may be able to reduce shifter size - assign DivShiftIn = {{`NF{1'b0}}, Quot[`DIVLEN+2:0], {`NORMSHIFTSZ-`DIVLEN-3-`NF{1'b0}}}; + assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 086b97d8..c4f09aea 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -47,8 +47,8 @@ module divsqrt( output logic DivBusy, output logic DivDone, output logic [`NE+1:0] DivCalcExpM, - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2M, - output logic [`DIVLEN+2:0] QuotM + output logic [`DURLEN-1:0] EarlyTermShiftM, + output logic [`QLEN-1:0] QuotM // output logic [`XLEN-1:0] RemM, ); @@ -57,12 +57,12 @@ module divsqrt( logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] Dpreproc; - logic [$clog2(`DIVLEN/2+3)-1:0] Dur; + logic [`DURLEN-1:0] Dur; srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2M)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 98250a45..4e16bc96 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -34,20 +34,20 @@ module flags( input logic XInf, YInf, ZInf, // inputs are infinity input logic Plus1, input logic InfIn, // is a Inf input being used + input logic NaNIn, // is a NaN input being used + input logic [`FMTBITS-1:0] OutFmt, // output format input logic XZero, YZero, // inputs are zero input logic XNaN, YNaN, // inputs are NaN - input logic NaNIn, // is a NaN input being used input logic Sqrt, // Sqrt? input logic ToInt, // convert to integer input logic IntToFp, // convert integer to floating point input logic Int64, // convert to 64 bit integer input logic Signed, // convert to a signed integer - input logic [`FMTBITS-1:0] OutFmt, // output format input logic [`NE:0] CvtCe, // the calculated expoent - Cvt input logic CvtOp, // conversion opperation? input logic DivOp, // conversion opperation? input logic FmaOp, // Fma opperation? - input logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow + input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow input logic [`NE+1:0] Nexp, // exponent of the normalized sum input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits input logic FmaAs, FmaPs, // the product and modified Z signs @@ -73,30 +73,30 @@ module flags( if (`FPSIZES == 1) begin - assign ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 2) begin - assign ResExpGteMax = OutFmt ? &FullResExp[`NE-1:0] | FullResExp[`NE] : &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); + assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 3) begin always_comb case (OutFmt) - `FMT: ResExpGteMax = &FullResExp[`NE-1:0] | FullResExp[`NE]; - `FMT1: ResExpGteMax = &FullResExp[`NE1-1:0] | (|FullResExp[`NE:`NE1]); - `FMT2: ResExpGteMax = &FullResExp[`NE2-1:0] | (|FullResExp[`NE:`NE2]); + `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); + `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]); default: ResExpGteMax = 1'bx; endcase - assign ShiftGtIntSz = (|FullResExp[`NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end else if (`FPSIZES == 4) begin always_comb case (OutFmt) - `Q_FMT: ResExpGteMax = &FullResExp[`Q_NE-1:0] | FullResExp[`Q_NE]; - `D_FMT: ResExpGteMax = &FullResExp[`D_NE-1:0] | (|FullResExp[`Q_NE:`D_NE]); - `S_FMT: ResExpGteMax = &FullResExp[`S_NE-1:0] | (|FullResExp[`Q_NE:`S_NE]); - `H_FMT: ResExpGteMax = &FullResExp[`H_NE-1:0] | (|FullResExp[`Q_NE:`H_NE]); + `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE]; + `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]); + `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]); + `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]); endcase // a left shift of intlen+1 is still in range but any more than that is an overflow // inital: | 64 0's | XLEN | @@ -110,14 +110,14 @@ module flags( // - any of the bits after the most significan 1 is one // - the most signifcant in 65 or 33 is still a one in the number and // one of the later bits is one - assign ShiftGtIntSz = (|FullResExp[`Q_NE:7]|(FullResExp[6]&~Int64)) | ((|FullResExp[4:0]|(FullResExp[5]&Int64))&((FullResExp[5]&~Int64) | FullResExp[6]&Int64)); + assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); end // if the result is greater than or equal to the max exponent(not taking into account sign) // | and the exponent isn't negitive // | | if the input isnt infinity or NaN // | | | - assign Overflow = ResExpGteMax & ~FullResExp[`NE+1]&~(InfIn|NaNIn|DivByZero); + assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero); // detecting tininess after rounding // the exponent is negitive @@ -127,7 +127,7 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed @@ -153,7 +153,7 @@ module flags( // | | | | or the res rounds up out of bounds // | | | | and the res didn't underflow // | | | | | - assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullResExp[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); + assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); // | // or when the positive res rounds up out of range assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 5f55e17b..d4898e80 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -37,7 +37,7 @@ module fmashiftcalc( input logic FmaKillProd, // is the product set to zero input logic ZDenorm, output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results - output logic FmaSmZero, // is the result denormalized - calculated before LZA corection + output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+8:0] FmaShiftIn // is the sum zero @@ -50,7 +50,7 @@ module fmashiftcalc( /////////////////////////////////////////////////////////////////////////////// //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero - assign FmaSmZero = ~(|FmaSm); + assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4); @@ -90,7 +90,7 @@ module fmashiftcalc( logic Sum0LEZ, Sum0GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); - assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; + assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; end else if (`FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; @@ -98,7 +98,7 @@ module fmashiftcalc( assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; - assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSmZero; + assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; @@ -110,9 +110,9 @@ module fmashiftcalc( assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp; always_comb begin case (Fmt) - `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; - `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero; - `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero; + `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + `FMT1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + `FMT2: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; default: FmaPreResultDenorm = 1'bx; endcase end @@ -129,10 +129,10 @@ module fmashiftcalc( assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp; always_comb begin case (Fmt) - 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSmZero; - 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSmZero; - 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSmZero; - 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSmZero; + 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking end @@ -144,7 +144,7 @@ module fmashiftcalc( // - if kill prod dont add to exp // Determine if the result is denormal - // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSmZero; + // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 8336c39c..e1c9e5fa 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -125,12 +125,12 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`DIVLEN+2:0] QuotE, QuotM; + logic [`QLEN-1:0] QuotM; logic [`NE+1:0] DivCalcExpE, DivCalcExpM; logic DivNegStickyE, DivNegStickyM; logic DivStickyE, DivStickyM; logic DivDoneM; - logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, EarlyTermShiftDiv2M; + logic [`DURLEN-1:0] EarlyTermShiftM; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -289,7 +289,7 @@ module fpu ( divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal - .EarlyTermShiftDiv2M, .QuotM, .DivDone(DivDoneM)); + .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); // other FP execution units fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE); @@ -381,12 +381,12 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; - postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2M), + postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), - .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .W(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); + .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv index 03b36f4f..17db0c0b 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/lzacorrection.sv @@ -38,7 +38,7 @@ module lzacorrection( input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection input logic FmaKillProd, // is the product set to zero - input logic FmaSmZero, + input logic FmaSZero, output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction output logic [`NE+1:0] DivCorrExp, output logic [`NE+1:0] FmaSe // exponent of the normalized sum @@ -59,7 +59,7 @@ module lzacorrection( assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSmZero|ResDenorm)}}; + assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index e165e7e1..18452abd 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -54,12 +54,12 @@ module postprocess( input logic FmaInvA, // do you invert Z input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count //divide signals - input logic [$clog2(`DIVLEN/2+3)-1:0] DivEarlyTermShiftDiv2, + input logic [`DURLEN-1:0] DivEarlyTermShift, input logic DivSticky, input logic DivNegSticky, input logic DivDone, input logic [`NE+1:0] DivCalcExp, - input logic [`DIVLEN+2:0] Quot, + input logic [`QLEN-1:0] Quot, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -69,7 +69,7 @@ module postprocess( input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (priority encoder) input logic IntZero, // is the input zero // final results - output logic [`FLEN-1:0] W, // FMA final result + output logic [`FLEN-1:0] PostProcRes, // FMA final result output logic [4:0] PostProcFlg, output logic [`XLEN-1:0] FCvtIntRes // the int conversion result ); @@ -81,7 +81,7 @@ module postprocess( logic Nsgn; logic [`NE+1:0] Nexp; logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction - logic [`NE+1:0] FullResExp; // Re with bits to determine sign and overflow + logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) logic R; // bits needed to determine rounding @@ -95,7 +95,7 @@ module postprocess( logic [`FMTBITS-1:0] OutFmt; // fma signals logic [`NE+1:0] FmaSe; // exponent of the normalized sum - logic FmaSmZero; // is the sum zero + logic FmaSZero; // is the sum zero logic [3*`NF+8:0] FmaShiftIn; // shift input logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection @@ -153,8 +153,8 @@ module postprocess( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, - .ZDenorm, .FmaSmZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShiftDiv2, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + .ZDenorm, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) @@ -185,7 +185,7 @@ module postprocess( lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, - .DivCorrExp, .FmaSmZero, .Shifted, .FmaSe, .Nfrac); + .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -204,14 +204,14 @@ module postprocess( round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, .DivSticky, .DivNegSticky, .DivDone, - .DivOp, .UfPlus1, .FullResExp, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, - .FmaOp, .ZInf, .InfIn, .FmaSmZero, .Mult, .Nsgn, .Ws); + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws); /////////////////////////////////////////////////////////////////////////////// // Flags @@ -220,7 +220,7 @@ module postprocess( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, - .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1, + .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); /////////////////////////////////////////////////////////////////////////////// @@ -228,10 +228,10 @@ module postprocess( /////////////////////////////////////////////////////////////////////////////// negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); - resultselect resultselect(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, .XInf, .YInf, .DivOp, - .DivByZero, .FullResExp, .CvtCe, .Ws, .Re, .Rf, .W, .FCvtIntRes); + .DivByZero, .FullRe, .CvtCe, .Ws, .Re, .Rf, .PostProcRes, .FCvtIntRes); endmodule diff --git a/pipelined/src/fpu/resultselect.sv b/pipelined/src/fpu/resultselect.sv deleted file mode 100644 index 4389056f..00000000 --- a/pipelined/src/fpu/resultselect.sv +++ /dev/null @@ -1,290 +0,0 @@ -/////////////////////////////////////////// -// -// Written: me@KatherineParry.com -// Modified: 7/5/2022 -// -// Purpose: special case selection -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module resultselect( - input logic Xs, // input signs - input logic [`NF:0] Xm, Ym, Zm, // input mantissas - input logic XNaN, YNaN, ZNaN, // inputs are NaN - input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic InfIn, - input logic XInf, YInf, - input logic XZero, - input logic IntZero, - input logic NaNIn, - input logic IntToFp, - input logic Int64, - input logic Signed, - input logic CvtOp, - input logic DivOp, - input logic FmaOp, - input logic Plus1, - input logic DivByZero, - input logic [`NE:0] CvtCe, // the calculated expoent - input logic Ws, // the res's sign - input logic IntInvalid, Invalid, Overflow, // flags - input logic CvtResUf, - input logic [`NE-1:0] Re, // Res exponent - input logic [`NE+1:0] FullResExp, // Res exponent - input logic [`NF-1:0] Rf, // Res fraction - input logic [`XLEN+1:0] CvtNegRes, // the negation of the result - output logic [`FLEN-1:0] W, // final res - output logic [`XLEN-1:0] FCvtIntRes // final res -); - logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results - logic OfResMax; - logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output - logic KillRes; - logic SelOfRes; - - - // does the overflow result output the maximum normalized floating point number - // output infinity if the input is infinity - assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws)); - - if (`FPSIZES == 1) begin - - //NaN res selection depending on standard - if(`IEEE754) begin - assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - assign OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = {Ws, Re, Rf}; - - end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? - if(`IEEE754) begin - assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end - - assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : - OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; - assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; - - end else if (`FPSIZES == 3) begin - always_comb - case (OutFmt) - `FMT: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Ws, Re, Rf}; - end - `FMT1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; - UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; - end - `FMT2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; - YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; - ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)}; - UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; - end - default: begin - if(`IEEE754) begin - XNaNRes = (`FLEN)'(0); - YNaNRes = (`FLEN)'(0); - ZNaNRes = (`FLEN)'(0); - InvalidRes = (`FLEN)'(0); - end else begin - InvalidRes = (`FLEN)'(0); - end - OfRes = (`FLEN)'(0); - UfRes = (`FLEN)'(0); - NormRes = (`FLEN)'(0); - end - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (OutFmt) - 2'h3: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Ws, Re, Rf}; - end - 2'h1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; - YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; - ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)}; - UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; - end - 2'h0: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; - YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; - ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)}; - UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; - end - 2'h2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; - YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; - ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)}; - // zero is exact fi dividing by infinity so don't add 1 - UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; - end - endcase - - end - - - - - - // determine if you shoould kill the res - Cvt - // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 - // - dont set to zero if fp input is zero but not using the fp input - // - dont set to zero if int input is zero but not using the int input - assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullResExp[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); - assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); - // output infinity with result sign if divide by zero - if(`IEEE754) begin - assign W = XNaN&~(IntToFp&CvtOp) ? XNaNRes : - YNaN&~CvtOp ? YNaNRes : - ZNaN&FmaOp ? ZNaNRes : - Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end else begin - assign W = NaNIn|Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end - - /////////////////////////////////////////////////////////////////////////////////////// - // - // ||||||||||| ||| ||| ||||||||||||| - // ||| |||||| ||| ||| - // ||| ||| ||| ||| ||| - // ||| ||| |||||| ||| - // ||||||||||| ||| ||| ||| - // - /////////////////////////////////////////////////////////////////////////////////////// - - // *** probably can optimize the negation - // select the overflow integer res - // - negitive infinity and out of range negitive input - // | int | long | - // signed | -2^31 | -2^63 | - // unsigned | 0 | 0 | - // - // - positive infinity and out of range positive input and NaNs - // | int | long | - // signed | 2^31-1 | 2^63-1 | - // unsigned | 2^32-1 | 2^64-1 | - // - // other: 32 bit unsinged res should be sign extended as if it were a signed number - assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive - Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive - Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive - {`XLEN{1'b1}};// unsigned positive - - - // select the integer output - // - if the input is invalid (out of bounds NaN or Inf) then output overflow res - // - if the input underflows - // - if rounding and signed opperation and negitive input, output -1 - // - otherwise output a rounded 0 - // - otherwise output the normal res (trmined and sign extended if nessisary) - assign FCvtIntRes = IntInvalid ? OfIntRes : - CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? - Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; -endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index 05c3b461..e6de0c18 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -35,7 +35,7 @@ module resultsign( input logic InfIn, input logic FmaOp, input logic [`NE+1:0] FmaSe, - input logic FmaSmZero, + input logic FmaSZero, input logic Mult, input logic R, input logic S, @@ -61,6 +61,6 @@ module resultsign( // if -p + z is the Sum positive // if -p - z then the Sum is negitive assign InfSgn = ZInf ? FmaAs : FmaPs; - assign Ws = InfIn&FmaOp ? InfSgn : FmaSmZero&FmaOp ? ZeroSgn : Nsgn; + assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 4c185ff3..c73edc08 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -57,7 +57,7 @@ module round( input logic DivSticky, // sticky bit input logic DivNegSticky, output logic UfPlus1, // do you add or subtract on from the result - output logic [`NE+1:0] FullResExp, // Re with bits to determine sign and overflow + output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction output logic [`NE-1:0] Re, // Result exponent output logic S, // sticky bit @@ -344,8 +344,8 @@ module round( // round the result // - if the fraction overflows one should be added to the exponent - assign {FullResExp, Rf} = {Nexp, RoundFrac} + RoundAdd; - assign Re = FullResExp[`NE-1:0]; + assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd; + assign Re = FullRe[`NE-1:0]; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv index 741d4e83..1c7b9648 100644 --- a/pipelined/src/fpu/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" -module srtradix4 ( +module srtradix4( input logic clk, input logic DivStart, input logic DivBusy, @@ -40,20 +40,29 @@ module srtradix4 ( input logic [`DIVLEN-1:0] X, input logic [`DIVLEN-1:0] Dpreproc, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`DIVLEN+2:0] Quot, + output logic [`QLEN-1:0] Quot, output logic [`DIVLEN+3:0] WSN, WCN, - output logic [`DIVLEN+3:0] WS, WC, + output logic [`DIVLEN+3:0] FirstWS, FirstWC, output logic [`NE+1:0] DivCalcExpM, output logic [`XLEN-1:0] Rem ); - logic [3:0] q; - logic [`DIVLEN+3:0] WSA; - logic [`DIVLEN+3:0] WCA; - logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel; + + /* verilator lint_off UNOPTFLAT */ + logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; + /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] D, DBar, D2, DBar2; logic [`NE+1:0] DivCalcExp; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; + logic [`QLEN-1:0] QMux, QMMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -63,47 +72,43 @@ module srtradix4 ( // - otherwise load WSA into the flipflop // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); - flop #(`DIVLEN+4) wsflop(clk, WSN, WS); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); - flop #(`DIVLEN+4) wcflop(clk, WCN, WC); + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); + flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]); + mux2 #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); + flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); - // Quotient Selection logic - // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - // *** change this for radix 4 - generate w/ stine code - // q encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 - qsel4 qsel4(.D, .WS, .WC, .q); - // Divisor Selection logic - // *** radix 4 change to choose -2 to 2 + // Divisor Selections // - choose the negitive version of what's being selected assign DBar = ~D; assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; assign D2 = {D[`DIVLEN+2:0], 1'b0}; - always_comb - case (q) - 4'b1000: Dsel = DBar2; - 4'b0100: Dsel = DBar; - 4'b0000: Dsel = {(`DIVLEN+4){1'b0}}; - 4'b0010: Dsel = D; - 4'b0001: Dsel = D2; - default: Dsel = {`DIVLEN+4{1'bx}}; - endcase + genvar i; + generate + for(i=0; i<`DIVCOPIES; i++) begin + divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, + .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); + if(i<3) begin + assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; + assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; + assign Q[i+1] = QNext[i]; + assign QM[i+1] = QMNext[i]; + end + end + endgenerate - // Partial Product Generation - // WSA, WCA = WS + WC - qD - csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); - - //*** change for radix 4 - otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Quot); + // if starting a new divison set Q to 0 and QM to -1 + mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux); + mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); + flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage + flop #(`QLEN) QMreg(clk, QMMux, QM[0]); + + assign Quot = Q[0]; + assign FirstWS = WS[0]; + assign FirstWC = WC[0]; expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); @@ -113,7 +118,50 @@ endmodule // Submodules // //////////////// + /* verilator lint_off UNOPTFLAT */ +module divinteration ( + input logic clk, + input logic DivStart, + input logic DivBusy, + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] DBar, D2, DBar2, + input logic [`QLEN-1:0] Q, QM, + input logic [`DIVLEN+3:0] WS, WC, + output logic [`QLEN-1:0] QNext, QMNext, + output logic [`DIVLEN+3:0] WSA, WCA +); + /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] Dsel; + logic [3:0] q; + + // Quotient Selection logic + // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) + // q encoding: + // 1000 = +2 + // 0100 = +1 + // 0000 = 0 + // 0010 = -1 + // 0001 = -2 + qsel4 qsel4(.D, .WS, .WC, .q); + + always_comb + case (q) + 4'b1000: Dsel = DBar2; + 4'b0100: Dsel = DBar; + 4'b0000: Dsel = {`DIVLEN+4{1'b0}}; + 4'b0010: Dsel = D; + 4'b0001: Dsel = D2; + default: Dsel = {`DIVLEN+4{1'bx}}; + endcase + + // Partial Product Generation + // WSA, WCA = WS + WC - qD + csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); + + otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Q, .QM, .QNext, .QMNext); + +endmodule module qsel4 ( input logic [`DIVLEN+3:0] D, @@ -195,7 +243,8 @@ module otfc4 ( input logic DivStart, input logic DivBusy, input logic [3:0] q, - output logic [`DIVLEN+2:0] Quot + input logic [`QLEN-1:0] Q, QM, + output logic [`QLEN-1:0] QNext, QMNext ); // The on-the-fly converter transfers the quotient @@ -207,16 +256,11 @@ module otfc4 ( // // QM is Q-1. It allows us to write negative bits // without using a costly CPA. - logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux; + // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. - logic [`DIVLEN:0] QR, QMR; - // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux); - mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux); - flopen #(`DIVLEN+3) Qreg(clk, DivBusy|DivStart, QMux, Quot); // *** have to connect Quot directly to M stage - flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); + logic [`QLEN-3:0] QR, QMR; // shift Q (quotent) and QM (quotent-1) // if q = 2 Q = {Q, 10} QM = {Q, 01} @@ -227,8 +271,8 @@ module otfc4 ( // *** how does the 0 concatination numbers work? always_comb begin - QR = Quot[`DIVLEN:0]; - QMR = QM[`DIVLEN:0]; // Shift Q and QM + QR = Q[`QLEN-3:0]; + QMR = QM[`QLEN-3:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; QMNext = {QR, 2'b01}; diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 008b234d..fc73cf71 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -40,8 +40,8 @@ module srtfsm( input logic DivStart, input logic StallE, input logic StallM, - input logic [$clog2(`DIVLEN/2+3)-1:0] Dur, - output logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2E, + input logic [`DURLEN-1:0] Dur, + output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivStickyE, output logic DivDone, output logic DivNegStickyE, @@ -51,7 +51,7 @@ module srtfsm( typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; statetype state; - logic [$clog2(`DIVLEN/2+3)-1:0] step; + logic [`DURLEN-1:0] step; logic WZero; //logic [$clog2(`DIVLEN/2+3)-1:0] Dur; logic [`DIVLEN+3:0] W; @@ -63,7 +63,7 @@ module srtfsm( assign DivDone = (state == DONE); assign W = WC+WS; assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? - assign EarlyTermShiftDiv2E = step; + assign EarlyTermShiftE = step; always_ff @(posedge clk) begin if (reset) begin @@ -73,7 +73,7 @@ module srtfsm( if (XZeroE|YZeroE|XInfE|YInfE|XNaNE|YNaNE) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin - if ((~|step[$clog2(`DIVLEN/2+3)-1:1]&step[0])|WZero) begin + if ((~|step[`DURLEN-1:1]&step[0])|WZero) begin state <= #1 DONE; end step <= step - 1; diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index d17d2abd..fa76c051 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -35,7 +35,7 @@ module srtpreproc ( output logic [`DIVLEN-1:0] X, output logic [`DIVLEN-1:0] Dpreproc, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [$clog2(`DIVLEN/2+3)-1:0] Dur + output logic [`DURLEN-1:0] Dur ); // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; @@ -63,10 +63,20 @@ module srtpreproc ( assign X = PreprocX; assign Dpreproc = PreprocY; - - assign Dur = ($clog2(`DIVLEN/2+3))'(`DIVLEN/2+2); + + assign Dur = (`DURLEN)'($rtoi(`FPDUR)); // assign intExp = zeroCntB - zeroCntA + 1; // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); + // radix 2 radix 4 + // 1 copies DIVLEN+2 DIVLEN+2/2 + // 2 copies DIVLEN+2/2 DIVLEN+2/2*2 + // 4 copies DIVLEN+2/4 DIVLEN+2/2*4 + // 8 copies DIVLEN+2/8 DIVLEN+2/2*8 + + // DIVRESLEN = DIVLEN or DIVLEN+2 + // r = 1 or 2 + // DIVRESLEN/(r*`DIVCOPIES) + endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index fa46a060..ba14499e 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -80,9 +80,9 @@ module testbenchfp; logic CvtResSgnE; logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by - logic [`DIVLEN+2:0] Quot; + logic [`QLEN-1:0] Quot; logic CvtResDenormUfE; - logic [$clog2(`DIVLEN/2+3)-1:0] EarlyTermShiftDiv2; + logic [`DURLEN-1:0] EarlyTermShift; logic DivStart, DivBusy; logic reset = 1'b0; logic [`DIVLEN-1:0] DivX; @@ -90,7 +90,7 @@ module testbenchfp; logic [`DIVLEN+3:0] WSN, WS; logic [`DIVLEN+3:0] WCN, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [$clog2(`DIVLEN/2+3)-1:0] Dur; + logic [`DURLEN-1:0] Dur; // in-between FMA signals logic Mult; @@ -686,8 +686,8 @@ module testbenchfp; .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone, - .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShiftDiv2(EarlyTermShiftDiv2), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), - .PostProcFlg(Flg), .W(FpRes), .FCvtIntRes(IntRes)); + .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero, @@ -697,8 +697,8 @@ module testbenchfp; .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), - .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftDiv2E(EarlyTermShiftDiv2)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .WS, .WC, .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), + .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); + srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); assign CmpFlg[3:0] = 0; From 5c0ecfa4334c4c8c53800813625b52620e4d7c3c Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 11 Jul 2022 18:31:51 -0700 Subject: [PATCH 04/36] forgot a file --- pipelined/src/fpu/specialcase.sv | 290 +++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 pipelined/src/fpu/specialcase.sv diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv new file mode 100644 index 00000000..3c28eae2 --- /dev/null +++ b/pipelined/src/fpu/specialcase.sv @@ -0,0 +1,290 @@ +/////////////////////////////////////////// +// +// Written: me@KatherineParry.com +// Modified: 7/5/2022 +// +// Purpose: special case selection +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module specialcase( + input logic Xs, // input signs + input logic [`NF:0] Xm, Ym, Zm, // input mantissas + input logic XNaN, YNaN, ZNaN, // inputs are NaN + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FMTBITS-1:0] OutFmt, // output format + input logic InfIn, + input logic NaNIn, + input logic XInf, YInf, + input logic XZero, + input logic IntZero, + input logic IntToFp, + input logic Int64, + input logic Signed, + input logic CvtOp, + input logic DivOp, + input logic FmaOp, + input logic Plus1, + input logic DivByZero, + input logic [`NE:0] CvtCe, // the calculated expoent + input logic Ws, // the res's sign + input logic IntInvalid, Invalid, Overflow, // flags + input logic CvtResUf, + input logic [`NE-1:0] Re, // Res exponent + input logic [`NE+1:0] FullRe, // Res exponent + input logic [`NF-1:0] Rf, // Res fraction + input logic [`XLEN+1:0] CvtNegRes, // the negation of the result + output logic [`FLEN-1:0] PostProcRes, // final res + output logic [`XLEN-1:0] FCvtIntRes // final res +); + logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, UfRes, NormRes; // possible results + logic OfResMax; + logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output + logic KillRes; + logic SelOfRes; + + + // does the overflow result output the maximum normalized floating point number + // output infinity if the input is infinity + assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Ws) | (Frm[1:0]==2'b11&Ws)); + + if (`FPSIZES == 1) begin + + //NaN res selection depending on standard + if(`IEEE754) begin + assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + assign OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + assign UfRes = {Ws, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = {Ws, Re, Rf}; + + end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions? + if(`IEEE754) begin + assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; + assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end + + assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : + OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; + assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + + end else if (`FPSIZES == 3) begin + always_comb + case (OutFmt) + `FMT: begin + if(`IEEE754) begin + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Ws, Re, Rf}; + end + `FMT1: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; + InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end + OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; + UfRes = {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + end + `FMT2: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; + YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; + ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; + InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Ws, {`NE2{1'b1}}, (`NF2)'(0)}; + UfRes = {{`FLEN-`LEN2{1'b1}}, Ws, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN2{1'b1}}, Ws, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; + end + default: begin + if(`IEEE754) begin + XNaNRes = (`FLEN)'(0); + YNaNRes = (`FLEN)'(0); + ZNaNRes = (`FLEN)'(0); + InvalidRes = (`FLEN)'(0); + end else begin + InvalidRes = (`FLEN)'(0); + end + OfRes = (`FLEN)'(0); + UfRes = (`FLEN)'(0); + NormRes = (`FLEN)'(0); + end + endcase + + end else if (`FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + if(`IEEE754) begin + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Ws, Re, Rf}; + end + 2'h1: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; + YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; + ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; + InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end + OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Ws, {`D_NE{1'b1}}, (`D_NF)'(0)}; + UfRes = {{`FLEN-`D_LEN{1'b1}}, Ws, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`D_LEN{1'b1}}, Ws, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; + end + 2'h0: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; + YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; + ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; + InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Ws, {`S_NE{1'b1}}, (`S_NF)'(0)}; + UfRes = {{`FLEN-`S_LEN{1'b1}}, Ws, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`S_LEN{1'b1}}, Ws, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; + end + 2'h2: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; + YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; + ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; + InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Ws, {`H_NE{1'b1}}, (`H_NF)'(0)}; + // zero is exact fi dividing by infinity so don't add 1 + UfRes = {{`FLEN-`H_LEN{1'b1}}, Ws, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`H_LEN{1'b1}}, Ws, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; + end + endcase + + end + + + + + + // determine if you shoould kill the res - Cvt + // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 + // - dont set to zero if fp input is zero but not using the fp input + // - dont set to zero if int input is zero but not using the int input + assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); + assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); + // output infinity with result sign if divide by zero + if(`IEEE754) begin + assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes : + YNaN&~CvtOp ? YNaNRes : + ZNaN&FmaOp ? ZNaNRes : + Invalid ? InvalidRes : + SelOfRes ? OfRes : + KillRes ? UfRes : + NormRes; + end else begin + assign PostProcRes = NaNIn|Invalid ? InvalidRes : + SelOfRes ? OfRes : + KillRes ? UfRes : + NormRes; + end + + /////////////////////////////////////////////////////////////////////////////////////// + // + // ||||||||||| ||| ||| ||||||||||||| + // ||| |||||| ||| ||| + // ||| ||| ||| ||| ||| + // ||| ||| |||||| ||| + // ||||||||||| ||| ||| ||| + // + /////////////////////////////////////////////////////////////////////////////////////// + + // *** probably can optimize the negation + // select the overflow integer res + // - negitive infinity and out of range negitive input + // | int | long | + // signed | -2^31 | -2^63 | + // unsigned | 0 | 0 | + // + // - positive infinity and out of range positive input and NaNs + // | int | long | + // signed | 2^31-1 | 2^63-1 | + // unsigned | 2^32-1 | 2^64-1 | + // + // other: 32 bit unsinged res should be sign extended as if it were a signed number + assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive + Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive + Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive + {`XLEN{1'b1}};// unsigned positive + + + // select the integer output + // - if the input is invalid (out of bounds NaN or Inf) then output overflow res + // - if the input underflows + // - if rounding and signed opperation and negitive input, output -1 + // - otherwise output a rounded 0 + // - otherwise output the normal res (trmined and sign extended if nessisary) + assign FCvtIntRes = IntInvalid ? OfIntRes : + CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? + Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; +endmodule \ No newline at end of file From 3483b92480a32d52a1e3d88137aee17a1d82d18e Mon Sep 17 00:00:00 2001 From: cturek Date: Tue, 12 Jul 2022 02:21:38 +0000 Subject: [PATCH 05/36] On the fly conversion for square root --- pipelined/srt/srt.sv | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 6e8cd560..7886af47 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -229,7 +229,7 @@ module otfc2 #(parameter N=64) ( logic [N+1:0] QR, QMR; flopr #(N+3) Qreg(clk, Start, QNext, Q); - mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux); + mux2 #(`DIVLEN+3) Qmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux); flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); always_comb begin @@ -254,12 +254,36 @@ endmodule // Square Root OTFC, Radix 2 // /////////////////////////////// module softc2( - input logic clk, - input logic Start, - input logic sp, sn, - output logic S, + input logic clk, + input logic Start, + input logic sp, sn, + input logic [N+3:0] C, + output logic [N-1:0] Sq, ); + + // The on-the-fly converter transfers the square root + // bits to the quotient as they come. + logic [N+2:0] S, SM, SNext, SMNext, SMMux; + + flopr #(N+3) Sreg(clk, Start, SNext, S); + mux2 #(`DIVLEN+3) Smux(SMNext, {`DIVLEN+3{1'b1}}, Start, SMMux); + flop #(`DIVLEN+3) SMreg(clk, SMMux, SM); + + always_comb begin + if (sp) begin + SNext = S | ((C << 2) & ~(C << 1)); + SMNext = S; + end else if (sn) begin + SNext = SM | ((C << 2) & ~(C << 1)); + SMNext = SM; + end else begin // If sp and sn are not true, then sz is + SNext = S; + SMNext = SM | ((C << 2) & ~(C << 1)); + end + end + assign Sq = S[N+2] ? S[N+1:2] : S[N:1]; + endmodule ///////////// // counter // From 9d4acc9ddbb16e3471e6c59baf573c113b790cc5 Mon Sep 17 00:00:00 2001 From: cturek Date: Tue, 12 Jul 2022 22:18:56 +0000 Subject: [PATCH 06/36] C register and other various fixes --- pipelined/srt/srt.sv | 54 +++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 7886af47..7e6d22a4 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -54,13 +54,13 @@ module srt ( output logic [3:0] Flags ); - logic qp, qz, qm; // quotient is +1, 0, or -1 - logic [`NE-1:0] calcExp; - logic calcSign; - logic [`DIVLEN+3:0] X, Dpreproc; - logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; + logic qp, qz, qm; // quotient is +1, 0, or -1 + logic [`NE-1:0] calcExp; + logic calcSign; + logic [`DIVLEN+3:0] X, Dpreproc, C; + logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; - logic intSign; + logic intSign; srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign); @@ -91,7 +91,11 @@ module srt ( // Partial Product Generation csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); - otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + // If only implementing division, use divide otfc + // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + // otherwise use sotfc + creg sotfcC(clk, Start, C); + sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot); expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); @@ -138,9 +142,9 @@ module srtpreproc ( assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}}; assign DivX = Int ? PreprocA : PreprocX; - assign SqrtX = {XExp[0] ? 4'b0000 : 4'b1111, SrcXFrac}; + assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac}; - assign X = Sqrt ? SqrtX : {4'b0001, DivX}; + assign X = Sqrt ? {SqrtX, {(`EXTRAINTBITS-1){1'b0}}} : {4'b0001, DivX}; assign D = {4'b0001, Int ? PreprocB : PreprocY}; assign intExp = zeroCntB - zeroCntA + 1; assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); @@ -253,22 +257,22 @@ endmodule /////////////////////////////// // Square Root OTFC, Radix 2 // /////////////////////////////// -module softc2( +module sotfc2( input logic clk, input logic Start, input logic sp, sn, - input logic [N+3:0] C, - output logic [N-1:0] Sq, + input logic [`DIVLEN+3:0] C, + output logic [`DIVLEN-1:0] Sq, ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. - logic [N+2:0] S, SM, SNext, SMNext, SMMux; + logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; - flopr #(N+3) Sreg(clk, Start, SNext, S); - mux2 #(`DIVLEN+3) Smux(SMNext, {`DIVLEN+3{1'b1}}, Start, SMMux); - flop #(`DIVLEN+3) SMreg(clk, SMMux, SM); + flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); + mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, (`DIVLEN){1'b0}}, Start, SMux); + flop #(`DIVLEN+4) SMreg(clk, SMux, M); always_comb begin if (sp) begin @@ -282,9 +286,23 @@ module softc2( SMNext = SM | ((C << 2) & ~(C << 1)); end end - assign Sq = S[N+2] ? S[N+1:2] : S[N:1]; + assign Sq = S[`DIVLEN-1:0]; endmodule + +////////////////////////// +// C Register for SOTFC // +////////////////////////// +module creg(input logic clk, + input logic Start, + output logic [`DIVLEN+3:0] C +); + logic [`DIVLEN+3:0] CMux; + + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, (`DIVLEN-2){1'b0}}, Start, CMux); + flop #(`DIVLEN+4) cflop(clk, CMux, C); +endmodule + ///////////// // counter // ///////////// @@ -293,7 +311,7 @@ module counter(input logic clk, input logic [$clog2(`XLEN+1)-1:0] dur, output logic done); - logic [$clog2(`XLEN+1)-1:0] count; + logic [$clog2(`XLEN+1)-1:0] count; // This block of control logic sequences the divider // through its iterations. You may modify it if you From 452b017f9a87751624b0d584ca62686771dd857e Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 12 Jul 2022 22:42:19 +0000 Subject: [PATCH 07/36] found the bug in the store modification --- pipelined/src/lsu/lsu.sv | 4 ++-- tests/riscof/spike/spike_rv32imc_isa.yaml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 50ecdb18..e9f41e65 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -235,8 +235,8 @@ module lsu ( .s(SelUncachedAdr), .y(LSUBusHWDATA)); if(CACHE_ENABLED) begin : dcache - if (`LLEN>`FLEN) - mux2 #(`LLEN) datamux({{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); + if (`LLEN>`XLEN) + mux2 #(`LLEN) datamux({IEUWriteDataM, IEUWriteDataM}, FWriteDataM, FpLoadStoreM, FinalWriteDataM); else assign FinalWriteDataM = {{`LLEN-`XLEN{1'b0}}, IEUWriteDataM}; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), diff --git a/tests/riscof/spike/spike_rv32imc_isa.yaml b/tests/riscof/spike/spike_rv32imc_isa.yaml index 5a76fd97..04a5da18 100644 --- a/tests/riscof/spike/spike_rv32imc_isa.yaml +++ b/tests/riscof/spike/spike_rv32imc_isa.yaml @@ -1,11 +1,11 @@ hart_ids: [0] hart0: - ISA: RV32IMAFCZicsr_Zifencei + ISA: RV32IMAFDCZicsr_Zifencei physical_addr_sz: 32 User_Spec_Version: '2.3' supported_xlen: [32] misa: - reset-val: 0x40001125 + reset-val: 0x4000112D rv32: accessible: true mxl: @@ -23,6 +23,6 @@ hart0: warl: dependency_fields: [] legal: - - extensions[25:0] bitmask [0x0001125, 0x0000000] + - extensions[25:0] bitmask [0x000112D, 0x0000000] wr_illegal: - Unchanged \ No newline at end of file From ed9106128fe828be813b7f3d2b81e4ab86a869ba Mon Sep 17 00:00:00 2001 From: cturek Date: Tue, 12 Jul 2022 22:45:54 +0000 Subject: [PATCH 08/36] Square root implemented --- pipelined/srt/srt.sv | 8 ++++---- pipelined/srt/testbench.sv | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 7e6d22a4..1037c9e2 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -92,10 +92,10 @@ module srt ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); // If only implementing division, use divide otfc - // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); // otherwise use sotfc - creg sotfcC(clk, Start, C); - sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot); + // creg sotfcC(clk, Start, C); + // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot); expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); @@ -272,7 +272,7 @@ module sotfc2( flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, (`DIVLEN){1'b0}}, Start, SMux); - flop #(`DIVLEN+4) SMreg(clk, SMux, M); + flop #(`DIVLEN+4) SMreg(clk, SMux, S); always_comb begin if (sp) begin diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 83f33707..1f6f1561 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -82,7 +82,7 @@ module testbench; // Equip Int test or Sqrt test assign Int = 1'b0; - assign Sqrt = 1'b1; + assign Sqrt = 1'b0; // Divider srt srt(.clk, .Start(req), @@ -111,7 +111,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("sqrttestvectors", Tests); + $readmemh ("testvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; From 5c9f0115610bf4f37c5282a8e733f1f66a0b2917 Mon Sep 17 00:00:00 2001 From: cturek Date: Tue, 12 Jul 2022 23:04:33 +0000 Subject: [PATCH 09/36] little fix --- pipelined/srt/srt.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 1037c9e2..b3de448f 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -144,7 +144,7 @@ module srtpreproc ( assign DivX = Int ? PreprocA : PreprocX; assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac}; - assign X = Sqrt ? {SqrtX, {(`EXTRAINTBITS-1){1'b0}}} : {4'b0001, DivX}; + assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX}; assign D = {4'b0001, Int ? PreprocB : PreprocY}; assign intExp = zeroCntB - zeroCntA + 1; assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); From e05b2a07d2dada0135a6f33c62962fa65576cefd Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 12 Jul 2022 18:32:17 -0700 Subject: [PATCH 10/36] removed warnings and took a mux out of the critical path --- pipelined/config/rv64fp/wally-config.vh | 2 +- pipelined/config/shared/wally-shared.vh | 13 +++++++------ pipelined/regression/wave-fpu.do | 3 --- pipelined/src/fpu/divsqrt.sv | 6 +++--- pipelined/src/fpu/fmashiftcalc.sv | 3 ++- pipelined/src/fpu/postprocess.sv | 2 +- pipelined/src/fpu/srt-radix4.sv | 26 +++++++++++++------------ pipelined/src/fpu/srtfsm.sv | 6 +++--- pipelined/src/fpu/srtpreproc.sv | 3 +-- pipelined/src/generic/lzc.sv | 2 +- pipelined/testbench/testbench-fp.sv | 8 ++++---- synthDC/scripts/synth.tcl | 2 +- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index cc8d1b2b..8f13b2e3 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,7 +32,7 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 32 +`define XLEN 64 // IEEE 754 compliance `define IEEE754 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index c064783c..54fa7a9b 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -101,14 +101,15 @@ `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) // division constants -`define RADIX 4 -`define DIVCOPIES 4 +`define RADIX 32'h4 +`define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) -`define LOGR ((`RADIX==2) ? 1 : 2) -`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES)) -`define DURLEN ($clog2($rtoi(`FPDUR)+1)) -`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES) +`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) +// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) +`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)) +`define DURLEN ($clog2(`FPDUR+1)) +`define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define USE_SRAM 0 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 58f782bd..9a3d7e06 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -24,9 +24,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/* -add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/* -add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/* -add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/* add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index c4f09aea..8420baa1 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -52,7 +52,7 @@ module divsqrt( // output logic [`XLEN-1:0] RemM, ); - logic [`DIVLEN+3:0] WSN, WCN; + logic [`DIVLEN+3:0] NextWSN, NextWCN; logic [`DIVLEN+3:0] WS, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVLEN-1:0] X; @@ -61,8 +61,8 @@ module divsqrt( srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index d4898e80..3c286b50 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -53,7 +53,8 @@ module fmashiftcalc( assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4); + // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4 + assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 18452abd..30945532 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -29,7 +29,7 @@ `include "wally-config.vh" -module postprocess( +module postprocess ( // general signals input logic Xs, Ys, // input signs input logic [`NE-1:0] Ze, // input exponents diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv index 1c7b9648..5a7e96e2 100644 --- a/pipelined/src/fpu/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -41,7 +41,7 @@ module srtradix4( input logic [`DIVLEN-1:0] Dpreproc, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, output logic [`QLEN-1:0] Quot, - output logic [`DIVLEN+3:0] WSN, WCN, + output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] FirstWS, FirstWC, output logic [`NE+1:0] DivCalcExpM, output logic [`XLEN-1:0] Rem @@ -58,11 +58,12 @@ module srtradix4( logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2; logic [`NE+1:0] DivCalcExp; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; - logic [`QLEN-1:0] QMux, QMMux; + logic [`QLEN-1:0] QMMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -72,9 +73,11 @@ module srtradix4( // - otherwise load WSA into the flipflop // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); + mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); @@ -88,10 +91,10 @@ module srtradix4( genvar i; generate - for(i=0; i<`DIVCOPIES; i++) begin + for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); - if(i<3) begin + if(i<(`DIVCOPIES-1)) begin assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; assign Q[i+1] = QNext[i]; @@ -101,9 +104,8 @@ module srtradix4( endgenerate // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux); mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); - flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage + flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flop #(`QLEN) QMreg(clk, QMMux, QM[0]); assign Quot = Q[0]; @@ -181,7 +183,7 @@ module qsel4 ( logic [3:0] QSel4[1023:0]; - initial begin + always_comb begin integer d, w, i, w2; for(d=0; d<8; d++) for(w=0; w<128; w++)begin @@ -270,9 +272,9 @@ module otfc4 ( // else if q = -2 Q = {QM, 10} QM = {QM, 01} // *** how does the 0 concatination numbers work? + assign QR = Q[`QLEN-3:0]; + assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM always_comb begin - QR = Q[`QLEN-3:0]; - QMR = QM[`QLEN-3:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; QMNext = {QR, 2'b01}; @@ -352,5 +354,5 @@ module expcalc( endcase end // correct exponent for denormalized input's normalization shifts - assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; + assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index fc73cf71..21e35c36 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -33,7 +33,7 @@ module srtfsm( input logic clk, input logic reset, - input logic [`DIVLEN+3:0] WSN, WCN, WS, WC, + input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC, input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, @@ -58,8 +58,8 @@ module srtfsm( //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); - assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0}); - assign DivStickyE = ~WZero; + assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0}); + assign DivStickyE = |W; assign DivDone = (state == DONE); assign W = WC+WS; assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index fa76c051..7386332f 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -63,8 +63,7 @@ module srtpreproc ( assign X = PreprocX; assign Dpreproc = PreprocY; - - assign Dur = (`DURLEN)'($rtoi(`FPDUR)); + assign Dur = (`DURLEN)'(`FPDUR); // assign intExp = zeroCntB - zeroCntA + 1; // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv index 9f6e5981..71aabbc6 100644 --- a/pipelined/src/generic/lzc.sv +++ b/pipelined/src/generic/lzc.sv @@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) ( /* verilator lint_off CMPCONST */ /* verilator lint_off WIDTH */ - int i; + logic [31:0] i; always_comb begin i = 0; while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index ba14499e..2aec1ab1 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -87,8 +87,8 @@ module testbenchfp; logic reset = 1'b0; logic [`DIVLEN-1:0] DivX; logic [`DIVLEN-1:0] Dpreproc; - logic [`DIVLEN+3:0] WSN, WS; - logic [`DIVLEN+3:0] WCN, WC; + logic [`DIVLEN+3:0] NextWSN, WS; + logic [`DIVLEN+3:0] NextWCN, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DURLEN-1:0] Dur; @@ -696,9 +696,9 @@ module testbenchfp; .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), + srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); assign CmpFlg[3:0] = 0; diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 9f2b4647..251522dc 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets - redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" } -redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 } +redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 } From 26e39dd32584b163dbc2d2cfea1acc9d04fe8658 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 13 Jul 2022 09:41:35 -0700 Subject: [PATCH 11/36] removed the +1 in the cvt --- pipelined/src/fpu/fcvt.sv | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 2d9fc21c..4820cf28 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -68,7 +68,8 @@ module fcvt ( logic Signed; // is the opperation with a signed integer? logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? - logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC + logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder) + logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC // seperate OpCtrl for code readability @@ -102,10 +103,11 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign LzcIn = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} : - {Xm[`NF-1:0], {`CVTLEN-`NF{1'b0}}}; + assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} : + {Xm, {`CVTLEN-`NF{1'b0}}}; + assign LzcIn = LzcInFull[`CVTLEN-1:0]; - lzc #(`CVTLEN) lzc (.num(LzcIn), .ZeroCnt(LeadingZeros)); + lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros)); /////////////////////////////////////////////////////////////////////////// // shifter @@ -119,13 +121,13 @@ module fcvt ( // denormalized/undeflowed result fp -> fp: // - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0 // ??? -> fp: - // - shift left by LeadingZeros+1 - to shift till the result is normalized + // - shift left by LeadingZeros - to shift till the result is normalized // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : - (LeadingZeros+1)&{`LOGCVTLEN{XDenorm|IntToFp}}; + (LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}}; /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -197,14 +199,14 @@ module fcvt ( // | 0's | Mantissa | 0's if nessisary | // | keep | // - // - if the input is denormalized then we dont shift... so the "- (LeadingZeros+1)" is just leftovers from other options - // int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - LeadingZeros = XLEN + NewBias - 1 - LeadingZeros + // - if the input is denormalized then we dont shift... so the "- LeadingZeros" is just leftovers from other options + // int -> fp : largest bias + XLEN - Largest bias + new bias - LeadingZeros = XLEN + NewBias - LeadingZeros // Process: // - shifted right by XLEN (XLEN) - // - shift left to normilize (-1-LeadingZeros) + // - shift left to normilize (-LeadingZeros) // - newBias to make the biased exponent - // oldexp - biasold +newbias - (LeadingZeros+1)&(XDenorm|IntToFp) - assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenorm|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})}; + // oldexp - biasold +newbias - LeadingZeros&(XDenorm|IntToFp) + assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros&{`LOGCVTLEN{XDenorm|IntToFp}})}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion From 31db938e7eaa4ca3dfc369f60431a1494f6fdf6d Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 17:47:27 +0000 Subject: [PATCH 12/36] Added adder input selection to on the fly converter --- pipelined/srt/srt.sv | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index b3de448f..f04aa718 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -87,6 +87,7 @@ module srt ( // Divisor Selection logic assign Db = ~D; mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel); + fsel2 fsel(qp, qn, ) // Partial Product Generation csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); @@ -95,7 +96,7 @@ module srt ( otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); // otherwise use sotfc // creg sotfcC(clk, Start, C); - // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot); + // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F); expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); @@ -263,6 +264,7 @@ module sotfc2( input logic sp, sn, input logic [`DIVLEN+3:0] C, output logic [`DIVLEN-1:0] Sq, + output logic [`DIVLEN+3:0] F, ); @@ -288,6 +290,8 @@ module sotfc2( end assign Sq = S[`DIVLEN-1:0]; + fsel2 fsel(sp, sn, C, S, SM, F); + endmodule ////////////////////////// From 110b762b554adfe5a2964404a776d73383d04b49 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 17:56:23 +0000 Subject: [PATCH 13/36] Finalized sqrt, ready for debugging --- pipelined/srt/srt.sv | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index f04aa718..23869225 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -57,7 +57,7 @@ module srt ( logic qp, qz, qm; // quotient is +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; - logic [`DIVLEN+3:0] X, Dpreproc, C; + logic [`DIVLEN+3:0] X, Dpreproc, C, F, AddIn; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; logic intSign; @@ -87,17 +87,19 @@ module srt ( // Divisor Selection logic assign Db = ~D; mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel); - fsel2 fsel(qp, qn, ) + + // If only implementing division, use divide otfc + // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + // otherwise use sotfc + creg sotfcC(clk, Start, C); + sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F); + + // Adder input selection + assign AddIn = Sqrt ? F : Dsel; // Partial Product Generation - csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); + csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA); - // If only implementing division, use divide otfc - otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); - // otherwise use sotfc - // creg sotfcC(clk, Start, C); - // sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F); - expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); signcalc signcalc(.XSign, .YSign, .calcSign); From 11bb3f0a3eee60dc8329e9f28c1840a537c1e80a Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 18:11:13 +0000 Subject: [PATCH 14/36] Test generation files in common format --- pipelined/srt/exptestgen.c | 4 ++++ pipelined/srt/sqrttestgen | Bin 22792 -> 22792 bytes pipelined/srt/sqrttestgen.c | 17 +++++++++++------ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c index 61fe74aa..d6bebb77 100644 --- a/pipelined/srt/exptestgen.c +++ b/pipelined/srt/exptestgen.c @@ -96,6 +96,10 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, // Print r in standard double format fprintf(fptr, "%03x", rExp|(rSign<<11)); printhex(fptr, rFrac); + fprintf(fptr, "_"); + + // Spacing for testbench, value doesn't matter + fprintf(fptr, "%016x", 0); fprintf(fptr, "\n"); } diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen index dadc5dc5c8d36055ee1c68c8296279c302aa4efb..06615165395cb6abb37c9d60152d5fe70f15e11a 100755 GIT binary patch delta 1496 zcmZwFUu+ab7y$5@y+8D>dwYBQyH=zeO}P{r=pj&o!QPT~+YM1tlqL57$b-gGeJCc{ zKAL_2l*X%rF5edal=QY zZ}9UV%d5MF@h%wydfw>2HvS^M=PLiUZPD70h+A4~H!Mqoqq|MHAFeT6moMdDNNru-Uz9)6K`SB*ZJ=lh4Y z-J{RDd)}Jg^7esL*q|9-9t_6DLn3EhFRuv?8V(|C8Is9$(?{e7e7_mbRP9xeqw z^TjsTb+UDxjKKlr_40q>W}*BUKQkoC!}ycwqxhSL-;~p96LYpbh-)d!+jhIy4Av*G zUch<+>oxF6FzhSA&#iIBI^mmO&4#l$rWX5H;}TWHDu(|n*oNP_P^$3Ri2t!!SgJ;O z9qdroEZdHUJSDn!DQd6O5b#PlcvXFlPr#RIGtXH(^sx-ns}0Fetfwp9Tj4YCKtySQ zTU#3Z*?=Qmu`0IS;EUd-z(~yox!Ao82SInus-Pr>3W=gSLH`kh+AD8c2ezN^`t)KD z=2`W)pt`=w*Fya!+=?%QNxs}VR=<;T1G&N#WC^UpA80GxAr?_BN!#S&S=ihVmN4n= zhC7J0jp3(?sSC@&2giHOq`=||i|r9=$8EMXdO3dyj_Mse0}u3CNmJm7ragQLjx@#b zlwhPOReqQjR~#SpG8STDRmYpdn0u)CRQYXLD|Fwapbm^#$^z`1waXZm`OENNU#-7* m+Bk2eSriQRngYvTg delta 1453 zcmYk+Uu;uV7y$5dZr9m%_x9fFI@?ZRby*g0VMAt|@z2`1b-kG)CUVsn6nz2~6E7xa zJWN(Ige}Bv_&FRt5O`5p)DWs>zH}H8I(@*A5Y&*EzEryeO+!*CI(Pg|@7J-D_I~$w z&Uel|=iJjyT;LNI_{_53Fj=)2Cw^Tt9o>baC#p9NoZ~k_W#<3x+OhUvHu24e9~XOu z%Aqe$XXcdGyuOl_U{>z?G_2k8^Vl&jW4Cnk{e%$WmTq{*$QF5o{5<(B^2_9XFCSHo}5L3uOI~ zv7>eFau?Rz<*t!0lK&yEwlDvgHMc$6iCDn@XFqVx36Kt^=@_{XUJNO%3<%eNk!!~8f11B5a z;Wnu1rk%U+ELq2s6>IYH;$FNjw*D)`al0hzvNjH2P39Klc`HV-0e3i=chhsl(1xx7 znehi)QQzeA@Pszh_R7YtJN^-3Jh@so{_bk`;g4p1xMW+Ch2kij)7t%=ddm?xK7(Z~ z65egZm~DNqO~x0V-CD55Z~BU z&fgYd4hM58LO2-DV3fnZct?nf7)Mrx$YWIh6M}KZZeWaIH1ItoFs7l&=-?ITGaBy; zVP8C2)zXI70sI=bVc2NrQ*gp)X`IF#nU&3bLF1^>uhlCvaLL%om%ubr-V#2_n|Z^W zR>3j%KxvbuRJ6?Fo2Q!m75r&d3ta8)52Q(DEwlNNANjfq8q9Y*q}G}XmyGt!qs>q51S`9{0<{(6xUKf1rjN$;1>H{j;eg%cuPxkx Pvvv>6*+!;l$2|W7ZMD|( diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c index b4ece147..710fc32f 100644 --- a/pipelined/srt/sqrttestgen.c +++ b/pipelined/srt/sqrttestgen.c @@ -33,11 +33,7 @@ void main(void) double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625, 1.75, 1.875, 1.99999, 1.1, 1.2, 1.01, 1.001, 1.0001, -<<<<<<< Updated upstream - 1/1.1, 1/1.5, 1/1.25, 1/1.125}; -======= 2/1.1, 2/1.5, 2/1.25, 2/1.125}; ->>>>>>> Stashed changes double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; int i; @@ -69,14 +65,23 @@ void main(void) void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac) { + // Print a in standard double format fprintf(fptr, "%03x", aExp); printhex(fptr, aFrac); fprintf(fptr, "_"); + + // Spacing for testbench, value doesn't matter + fprintf(fptr, "%016x", 0); + fprintf(fptr, "_"); + + // Print r in standard double format fprintf(fptr, "%03x", rExp); printhex(fptr, rFrac); + fprintf(fptr, "_"); + + // Spacing for testbench, value doesn't matter + fprintf(fptr, "%016x", 0); fprintf(fptr, "\n"); - - } void printhex(FILE *fptr, double m) From 81f396f885d4f9ec62f7d4b6e2bc27f43e6df5a7 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 18:30:18 +0000 Subject: [PATCH 15/36] Testbench accepts standard test vector files --- pipelined/srt/testbench.sv | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 1f6f1561..537fbb3e 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -55,21 +55,11 @@ module testbench; parameter MEM_SIZE = 40000; parameter MEM_WIDTH = 64+64+64+64; - // INT TEST SIZES - // `define memrem 63:0 - // `define memr 127:64 - // `define memb 191:128 - // `define mema 255:192 - - // FLOAT TEST SIZES - // `define memr 63:0 - // `define memb 127:64 - // `define mema 191:128 - - // SQRT TEST SIZES - `define memr 63:0 - `define mema 127:64 + // Test sizes + `define memrem 63:0 + `define memr 127:64 `define memb 191:128 + `define mema 255:192 // Test logicisters logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file From 9b7e63f48227fa5b28228b50d3b7653dc99448b9 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 19:34:04 +0000 Subject: [PATCH 16/36] Lint error fixed and added comments to preprocessing --- pipelined/srt/srt.sv | 62 +++++++++++++++++++++++--------------- pipelined/srt/testbench.sv | 2 +- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 23869225..3e41c16c 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -54,13 +54,14 @@ module srt ( output logic [3:0] Flags ); - logic qp, qz, qm; // quotient is +1, 0, or -1 + logic qp, qz, qn; // quotient is +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; logic [`DIVLEN+3:0] X, Dpreproc, C, F, AddIn; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; logic intSign; + logic cin; srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign); @@ -76,29 +77,30 @@ module srt ( // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qm); + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qn); flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(1) signflop(clk, Start, calcSign, rsign); flopen #(7) durflop(clk, Start, calcDur, dur); - counter divcounter(clk, Start, dur, done); + srtcounter divcounter(clk, Start, dur, done); // Divisor Selection logic assign Db = ~D; - mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qm, Dsel); + mux3onehot #(`DIVLEN) divisorsel(Db, {(`DIVLEN+4){1'b0}}, D, qp, qz, qn, Dsel); // If only implementing division, use divide otfc - // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qm, Quot); + // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot); // otherwise use sotfc - creg sotfcC(clk, Start, C); - sotfc2 #(`DIVLEN) sotfc2(clk, Start, qp, qn, C, Quot, F); + creg sotfcC(clk, Start, C); + sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F); // Adder input selection assign AddIn = Sqrt ? F : Dsel; // Partial Product Generation - csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA); + assign cin = ~Sqrt & qp; + csa #(`DIVLEN+4) csa(WS, WC, AddIn, cin, WSA, WCA); expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); @@ -128,30 +130,40 @@ module srtpreproc ( logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; logic [`XLEN-1:0] PosA, PosB; - logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX, SqrtX; + logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX; + logic [`NF+4:0] SqrtX; + // Generate positive integer inputs if they are signed assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; + // Calculate leading zeros of integer inputs lzc #(`XLEN) lzcA (PosA, zeroCntA); lzc #(`XLEN) lzcB (PosB, zeroCntB); + // Make integers have DIVLEN bits assign ExtraA = {PosA, {`EXTRAINTBITS{1'b0}}}; assign ExtraB = {PosB, {`EXTRAINTBITS{1'b0}}}; + // Shift integers to have leading ones assign PreprocA = ExtraA << (zeroCntA + 1); assign PreprocB = ExtraB << (zeroCntB + 1); + + // Make mantissas have DIVLEN bits assign PreprocX = {SrcXFrac, {`EXTRAFRACBITS{1'b0}}}; assign PreprocY = {SrcYFrac, {`EXTRAFRACBITS{1'b0}}}; + // Selecting correct divider inputs assign DivX = Int ? PreprocA : PreprocX; assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac}; - assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX}; assign D = {4'b0001, Int ? PreprocB : PreprocY}; + + // Integer exponent and sign calculations assign intExp = zeroCntB - zeroCntA + 1; assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); + // Number of cycles of divider assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2); endmodule @@ -160,7 +172,7 @@ endmodule ///////////////////////////////// module qsel2 ( // *** eventually just change to 4 bits input logic [`DIVLEN+3:`DIVLEN] ps, pc, - output logic qp, qz, qm + output logic qp, qz, qn ); logic [`DIVLEN+3:`DIVLEN] p, g; @@ -187,7 +199,7 @@ module qsel2 ( // *** eventually just change to 4 bits // Produce quotient = +1, 0, or -1 assign #1 qp = magnitude & ~sign; assign #1 qz = ~magnitude; - assign #1 qm = magnitude & sign; + assign #1 qn = magnitude & sign; endmodule //////////////////////////////////// @@ -198,15 +210,16 @@ module fsel2 ( input logic [`DIVLEN+3:0] C, S, SM, output logic [`DIVLEN+3:0] F ); - logic [`DIVLEN+3:0] FP, FN; + logic [`DIVLEN+3:0] FP, FN, FZ; // Generate for both positive and negative bits assign FP = ~S & C; assign FN = SM | (C & (~C << 2)); + assign FZ = {(`DIVLEN+4){1'B0}}; // Choose which adder input will be used - assign F = sp ? FP : (sn ? FN : (`DIVLEN+4){1'b0}); + assign F = sp ? FP : (sn ? FN : FZ); endmodule @@ -216,7 +229,7 @@ endmodule module otfc2 #(parameter N=64) ( input logic clk, input logic Start, - input logic qp, qz, qm, + input logic qp, qz, qn, output logic [N-1:0] r ); @@ -236,7 +249,7 @@ module otfc2 #(parameter N=64) ( logic [N+1:0] QR, QMR; flopr #(N+3) Qreg(clk, Start, QNext, Q); - mux2 #(`DIVLEN+3) Qmux(QMNext, {`DIVLEN+3{1'b1}}, Start, QMMux); + mux2 #(`DIVLEN+3) Qmux(QMNext, {(`DIVLEN+3){1'b1}}, Start, QMMux); flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); always_comb begin @@ -248,7 +261,7 @@ module otfc2 #(parameter N=64) ( end else if (qz) begin QNext = {QR, 1'b0}; QMNext = {QMR, 1'b1}; - end else begin // If qp and qz are not true, then qm is + end else begin // If qp and qz are not true, then qn is QNext = {QMR, 1'b1}; QMNext = {QMR, 1'b0}; end @@ -266,7 +279,7 @@ module sotfc2( input logic sp, sn, input logic [`DIVLEN+3:0] C, output logic [`DIVLEN-1:0] Sq, - output logic [`DIVLEN+3:0] F, + output logic [`DIVLEN+3:0] F ); @@ -275,7 +288,7 @@ module sotfc2( logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, (`DIVLEN){1'b0}}, Start, SMux); + mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux); flop #(`DIVLEN+4) SMreg(clk, SMux, S); always_comb begin @@ -305,17 +318,18 @@ module creg(input logic clk, ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, (`DIVLEN-2){1'b0}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule ///////////// // counter // ///////////// -module counter(input logic clk, - input logic req, - input logic [$clog2(`XLEN+1)-1:0] dur, - output logic done); +module srtcounter(input logic clk, + input logic req, + input logic [$clog2(`XLEN+1)-1:0] dur, + output logic done +); logic [$clog2(`XLEN+1)-1:0] count; diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 537fbb3e..b83e6b00 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("testvectors", Tests); + $readmemh ("sqrttestvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; From d57fb6f98a364e1323371cc8a05776c034ebf63b Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 19:46:48 +0000 Subject: [PATCH 17/36] radix 4 files removed from srt and divlen modified for sqrt --- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/srt/lint-srt | 1 - pipelined/srt/qslc_r4a2.c | 198 ----- pipelined/srt/qslc_r4a2b | Bin 16064 -> 0 bytes pipelined/srt/qslc_r4a2b.c | 190 ----- pipelined/srt/qslc_r4a2b.tv | 1024 ---------------------- pipelined/srt/qslc_sqrt_r4a2 | Bin 16152 -> 0 bytes pipelined/srt/qslc_sqrt_r4a2.c | 198 ----- pipelined/srt/qslc_sqrt_r4a2.sv | 1026 ----------------------- pipelined/srt/srt.sv | 4 +- 10 files changed, 3 insertions(+), 2640 deletions(-) delete mode 100644 pipelined/srt/qslc_r4a2.c delete mode 100755 pipelined/srt/qslc_r4a2b delete mode 100644 pipelined/srt/qslc_r4a2b.c delete mode 100644 pipelined/srt/qslc_r4a2b.tv delete mode 100755 pipelined/srt/qslc_sqrt_r4a2 delete mode 100644 pipelined/srt/qslc_sqrt_r4a2.c delete mode 100644 pipelined/srt/qslc_sqrt_r4a2.sv diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 54fa7a9b..ad52be2e 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -103,7 +103,7 @@ // division constants `define RADIX 32'h4 `define DIVCOPIES 32'h4 -`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 1)) `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) diff --git a/pipelined/srt/lint-srt b/pipelined/srt/lint-srt index fd42df88..399201be 100755 --- a/pipelined/srt/lint-srt +++ b/pipelined/srt/lint-srt @@ -1,2 +1 @@ verilator --lint-only --top-module srt srt.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv -verilator --lint-only --top-module srtradix4 srt-radix4.sv qsel4.sv -I../config/rv64gc -I../config/shared ../src/generic/*.sv ../src/generic/flop/*.sv diff --git a/pipelined/srt/qslc_r4a2.c b/pipelined/srt/qslc_r4a2.c deleted file mode 100644 index 8e68f998..00000000 --- a/pipelined/srt/qslc_r4a2.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - Program: qslc_r4a2.c - Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) - User: James E. Stine - -*/ - -#include -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - ; - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - ; - - } - -} - -int main() { - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - printf("\tcase({D[5:3],Wmsbs})\n"); - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - printf("\t\t10'b"); - disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); - printf("_"); - disp_binary((double) pla.tot, TOT_SIZE, 0); - printf(": q = 4'b"); - - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - */ - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 12) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -4) - printf("0000"); - else if ((pla.tot) >= -13) - printf("0010"); - else - printf("0001"); - break; - case 1: - if ((pla.tot) >= 14) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -15) - printf("0010"); - else - printf("0001"); - break; - case 2: - if ((pla.tot) >= 15) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -16) - printf("0010"); - else - printf("0001"); - break; - case 3: - if ((pla.tot) >= 16) - printf("1000"); - else if ((pla.tot) >= 4) - printf("0100"); - else if ((pla.tot) >= -6) - printf("0000"); - else if ((pla.tot) >= -18) - printf("0010"); - else - printf("0001"); - break; - case 4: - if ((pla.tot) >= 18) - printf("1000"); - else if ((pla.tot) >= 6) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -20) - printf("0010"); - else - printf("0001"); - break; - case 5: - if ((pla.tot) >= 20) - printf("1000"); - else if ((pla.tot) >= 6) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -20) - printf("0010"); - else - printf("0001"); - break; - case 6: - if ((pla.tot) >= 20) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -22) - printf("0010"); - else - printf("0001"); - break; - case 7: - if ((pla.tot) >= 24) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -24) - printf("0010"); - else - printf("0001"); - break; - default: printf ("XXX"); - - } - - printf(";\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - printf("\tendcase\n"); - -} diff --git a/pipelined/srt/qslc_r4a2b b/pipelined/srt/qslc_r4a2b deleted file mode 100755 index f719bbf471bfc1094ffe5bdd0d6441d7c2426e9d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16064 zcmeHOeQ;FO6~CLXB*BmkpF;d#p-TBMO9COq4>SZ87AFYApwem|o9rgpHrb848wm~{ zv8k05VXVy9I8*$n(-B*(`GfIeoJx=?GPM@Qahlp0jSkFg@B;`~SB<>>&b{}%ygUa;WcV^8M2bI&=ybMJZQ-uH6vzWa9gt5#;YT!JZE+$czDFI1WeHvE7KLR5$)Vm!_> z#Fb(!)MIc|XtR^|W+?;nT#d^HC%>v#9Zj>AU`)5wCv>T zbzscdZnftnXOLH(EnY(a5Ocd9h4MtPG>a#e4(Ba8FKwE$ABp7Gt^K;SA9KNY%{iZx z6FQdYd^&jvY(&hRevR7C$yX>h!JOJ4y7go-^j+Fpr~Lw6S&eY=8xWG`kIZ#H`ajpNc_jH168`dK)> zMaJryHAW~L4>v>-$#A@8&GM#LG+Yy`Z3+v+XlRZ_jYKjSPa1}h&=G+xvCRP2wFQm( zNHo|KxtDy=vvu0HIT(qGmeyolV=ykDZ>A0|r&66Nt*TmGx!hRjTX?PD;-WIExX`zl z;?6=Zxp4Qn@W*{c1muYy{Yv)c3FSSrl;|rp-cuc8OUTHlYvYIG8xEZAd`XrZ3fPHuKX&{@kwrAm-`tmdssGXE`Av50rTp;V4fL*@Qq{{=>O0(&4;8+i-dvx<17QcPoh@{ ziHCfbL*8JQOFv!RdHlzL&W{2eC(f*^@t5|L9u4eXO7l~5$|4}o*ZVv(elFvIej_x2 z-R^2)*QS$G5c*UqBlJA8&oiT)fQK~)ey+qXo<(f#g+S+-z@g7>3LHvj2V8Fij$cSl zMg=E*s)Agzuii5QakD>NU;9!ri`MxY0v$_Vqhg@*WHLXnYv~&hQhB)UR0yXx+<$`X zx(mL;{QEbACg@%;F;i0b#6P++1fYYDfFk#dA5&#sz8W(Pt75+_3rZbebHO* zIo&hhnb(8x9+Ad5Z`KDq(Vo;x7fchS?L8zOtM@F}(0k9Em#7ow`%iTG&wpHSumnP? z9k>qvdDkxg`Ro0C+fVgB3oX?O7k}!WYjgwt6KJ7qSPQqo6OpY+4S4R55vA!H-s#?mg6BndHCX-CIfM0`?o}ur@eiU_(SJKuDo6ZjS)PhAJm*Ny9M20q z9qAm;1J7cu(b*H&J^6kDVKj{yFT}1fS%I#=?kVYfs9eWx#9(^SeHts*Lw#fG52f>N zxMaKM;1b|dvk`ZH9CZNjwhfRCKp6qt1X!;C&)EQjTRkcZCk6p1QGf?+fO7!QFAS-} z1n>Z$GIfAN`h^EA-f6>+=aBXi-~%k(0Jz=;utJIx;8g`EvH`y0kk%7mp8|Y2f9R^u za!4fv*b0COX(oxy3V+mwFW`^_0jd;WzYSo8^ugBv%vXR1Yyc~yR|zmq0qSjlGaS-B z0`%iSp+ee2;(HL0gmN3+!yz>j;AI7vVgpzqRS}>|0nX1GN`e*Ad;)A%fOl+w&p4!U z1h@?V71GyVL0qNqPucJjIHdOoFk1nBW&^kZkYnj(0=N|*Xal6VcHRbjkB<#7k^Kp?5-Q9d}1O20!1-ql0i9eq1GdR#cWdXA;(G1s?a z_vF`srG(4_j-Hcw$MB@Q0z!vB?dsT;-r?EPhn~XoF_)BmIkJiz6gppn^4$03<2LWu zPIrom{y{hm=6Loz4yTu>?xTlsqKCAcLs+v0GG!=}flLN68OUTHlYvYI{$DdN^u|g> zSa8lXpTtpi-ZYQk_~GZK`6-U}f0^c_FR+ePw8L>d6|T#&ucvpm^e(9Pv}v}0=bkam zF7RpK-QY{WX&y84tZBXpe)|9x&frA!Zj{J%Z?$l>6}m3V&&hoV(>>B(j)Ur)O@I|S z@|D?DDAGJ@6ZEv$6$OPW3#Q%dxny&0ySS<7x_OJPnnh~zzY|9zaMF>KAkrsskiQq4 ziQ0M)$9lwCj`oGWpzs%2%g2w+E_NX#8OyU!H{cq6i>2j|{ZTjBGnC0dCIguaWHOM+ zKqdp33}iBp$v`FpnGAf_4Ddcb-q%M<5+d4LNW?PjB_zsM%-UFp*IJTROjCTimU%zn zWm@KyOOcj&Um>lBh}i!51v5r+H(tsR(GrNL3$Lb#crReDw$rcGM9(NDF40_vBc^DX znRXN1dS4)3dq|RNJd|kNM#OsyFIA#w)D5$KzYdJ!?$NkB&AI(6wahrYyph+3yh;_` zWlH%DT_2XeQs%g@Oq6n^w&PcSDf4yc>i=uZ+6V5au|zL!E9{G02j*g8ZiTY0TE6@` z@0<;_t^m$qNLS0yF`&FtWYxFFSNVbC8i1<=gKO?RurgyzVBGSQ{tys z`*2zRGzufdKiYL2ko8O!>t=EdLwuxqUX%Dsg|pxPBHbog`{%iq_a%Osb^oybJPH$# zhshO6qiKMR zP%u5F@r4fj?=}8$eR1+PBH*9b`iS12Eq_~r{uSscPG>zwp!dS=qz|E=mhZ-I+VGJs4D zgWjEJsx#tc!G*Pm$}m<|FI(d`{I{&2-Tf;z-m+}X>g7=5qX@$v(AL0;YGJIZT3flS z%2>N{<*oi2qh?uUmERC8O+nt&U%|WfX{-LXw*FgI?CHPg_XgomFo`;2n=KD~5FjrY zm6A<;PXJ-~hY3a~5i=Tt(GY#KuzD>}p-9wdO@u=NIl{@PO(e9h{1zb;NwgTX_*Mbw zGYow6z)7I5Ax8YfVfg0_L-En|QksSECAKstgSFtvxZ;gmibU~gMT_u7W67|uA=>I| ziN{*P@#GepsvhK!%%L<2p!=r`p;D~f5NSY{~3y$-?Q=jNt-2>%f!EjgXS#kKiYR&geBs6 zijssVN5Ssz{C!Cw=4}73Fi`vK zAMh$9^Ba_Cot^Ff6BfJwPOpM8f6MYz1s*@FK8XUhj>>zGbiA(Q?=78pmYJW0kH_ZE z^YQ(~RNK&aRnm~m{@=hrW0%)kJWuBDXE`z4jqJyH`~!UH`YnIG?(ALwC1{;u)|vg8 zkA<_{UsPyliQ?Q2SHONO(>$AW+&<3(3$?!!@9e(`x_u4jY6bhxwlu0hu@c!ij~!)E z=Owzr{W`EKpr$D;$HUK`Jm@%f<*Cknv>*dzIV$INm6hh=Bl~|OzpNCZ$l>4+#lHca C!tRp* diff --git a/pipelined/srt/qslc_r4a2b.c b/pipelined/srt/qslc_r4a2b.c deleted file mode 100644 index 94a3a4cd..00000000 --- a/pipelined/srt/qslc_r4a2b.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - Program: qslc_r4a2.c - Description: Prints out Quotient Selection Table (assumes CPA is utilized to reduce memory) - User: James E. Stine - -*/ - -#include -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - ; - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - ; - - } - -} - -int main() { - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - */ - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 12) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -4) - printf("0"); - else if ((pla.tot) >= -13) - printf("2"); - else - printf("1"); - break; - case 1: - if ((pla.tot) >= 14) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -6) - printf("0"); - else if ((pla.tot) >= -15) - printf("2"); - else - printf("1"); - break; - case 2: - if ((pla.tot) >= 15) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -6) - printf("0"); - else if ((pla.tot) >= -16) - printf("2"); - else - printf("1"); - break; - case 3: - if ((pla.tot) >= 16) - printf("8"); - else if ((pla.tot) >= 4) - printf("4"); - else if ((pla.tot) >= -6) - printf("0"); - else if ((pla.tot) >= -18) - printf("2"); - else - printf("1"); - break; - case 4: - if ((pla.tot) >= 18) - printf("8"); - else if ((pla.tot) >= 6) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -20) - printf("2"); - else - printf("1"); - break; - case 5: - if ((pla.tot) >= 20) - printf("8"); - else if ((pla.tot) >= 6) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -20) - printf("2"); - else - printf("1"); - break; - case 6: - if ((pla.tot) >= 20) - printf("8"); - else if ((pla.tot) >= 8) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -22) - printf("2"); - else - printf("1"); - break; - case 7: - if ((pla.tot) >= 24) - printf("8"); - else if ((pla.tot) >= 8) - printf("4"); - else if ((pla.tot) >= -8) - printf("0"); - else if ((pla.tot) >= -24) - printf("2"); - else - printf("1"); - break; - default: printf ("X"); - - } - - printf("\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - -} diff --git a/pipelined/srt/qslc_r4a2b.tv b/pipelined/srt/qslc_r4a2b.tv deleted file mode 100644 index b92d81e8..00000000 --- a/pipelined/srt/qslc_r4a2b.tv +++ /dev/null @@ -1,1024 +0,0 @@ -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -0 -0 -0 -0 -0 -0 -0 -0 diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2 deleted file mode 100755 index 5cff70cdf9d63dd415b92ba2ce9092b7da87695f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16152 zcmeHOdvsLA8J`WrB!X;$pnwYN1Lc(^;Z;B$0kUD^0i_5BeBNxbn`DnkcHQ0ZQbIJM zMEPpw*|r#-UxKii_>%XBh5Jh6HI0NSiV!SvF z<)Koi>_tkzs!~Trf0?cILm-JahB5=lbCp`K(#OMN-T^qOG#oLSRr{P{J(y82D|o952RpEJuH4Y_B?CQ!RvIA}~QzkDrS zmN@Bj7)3^-DziVbjna0)U$iKH%R&F^>xSLk_;$f#_cYy_^Vu}QP`imk7)s=iFFBPd zpM!%iDjv5Wr*tF_1b$mfaql zCCZoKxL^Riuu9jgwETfcpeh)R1tM!!E~^RE1=je=YXZWus%k@ZRy5{|#4Jle2Ly&g z8v(A^_4u{OC&QX3FvNRrCIGVqG4=(pm@g3wT5vLiF z$N96;K1BRb>*~h^F-$VVV+2Ox=MUfayYiI2O+Q~)U#9ihMas^4E_W6a zd=C&ZVd(XvSvu%8^jrrSdJR2|b56Upo_si12^lx^bpLSLZ|LQ7UmMyDy}Eyxk(TKQ zq$7}yKso~H2&5yBj==v%1dch!pYS&QE!*3Y`N!!(c(=!68Oe5U)63Z}$p9p0-wJh4 za$*yNbAljzHCcMP-c2TxJ7t+BWIbI6Qe~PL_H^w{m1#oQ)Achh!`oKrtyoBZDCgVC z6K&=5Y~|5rx$xuC<^$Jzo8R*`9qL}S##0zCe9haskmje!(Z}3E9ItdcC)|ccpo6Uo z$Aar^%^XQ$UT)Rw1ezqXQguj`o(;TPvb%|i~6Z+y-f>mrL4F4aBR4@W#R28B)U!|lLL%9CJ3D@yrMzDxGWglC7h~ z)A@NvrSnL<$2ldA<~=NpllE77optfVqo@$MhBy`5E1lEVc5Ilmn=ay1&!J||@sCFA zDL^4n23(WpxTD2${8CS6!-ri@Kucah@gz1((-n9Qp@!mqHI#sfmaRS-B~15a zaM-VTS^da4+&piyr8vB)@|&m0L+IWg;GWpid0vKSR7htt)0H4%g7S3M_DiRfy`SFmPn>dbOJhxi?(MONCy4ia zW-EAmJ5Q_J+neKjfw*si+fLke#iccm;(iVWYLA(XzC2ZXQ*V~@j>TBqG{?QI zqqc)gEslcIGBm^6=4~BaMSMs5B8>LuGe>}^t6y+-!&pEkkh%F))wve9@5+QXK;NI* z2hEbuFNPV=GJ^gIt1Uox8_?dH$?NY-=xl=iR6$z}=m!$|qzOHpptk`EANQGtqhG>s zgYr>o_&7(l>}nIbkDx^gI^KY)PRuf)_Y-uqf*zaF*Wb@%%SM>cFhM`WsuTU)Z$J-8 zXySAuG#-NP0Ti7$Xd0%G;bGguZnG&{YcRHlW94 z%Suh?;{=_kphFC3r-WW;LK_Ks0wWonaGHiHGQ0}|0eequuN2->2cK?ABfahnPK;0r*@6m*dR{j2Q6^(OR}1f2~iI&qn4cz_J&Djz2% z^>yMxc_oIK(2WE=iq{MJ`-TA>CZUJ%dq%3iD+szzK_4`rsuO!m==lV_UqLq;(6?nL z?lYmE^#V#iS)vo$O~YTvuul1yZTPrEwyea2K26YR3VON$Rh@8|&^rj~P|$ZS=?hJ_ zY}pBVB=Y0pT7tfXcL_rCtO4zi(1Rwlkf4tPicZ8$!x%E$t$c(HAF30#n9z?u2ee8- zJqFY%`+JQEeUYFG6m*;cRh^h^LhmJLo`N2^xUatlWha~_w1%J`s7LIVcuJ-MaU~gE zhXH|j-0(3~w(M6XbP_=yP|!vL>TEa*p`qXXQeMCM3@^VeaJk;Vb>B?5sEPYM;Z_4z zVBp@AxK$?ZNy1G5o8A|xTJ9z7C}`cL_7-TN8?SVU)hMEZHzuay)6}`GDel185pVCs zYY@5CQLmt16oW^AqgQQCJ6^WmN1@5n>uB2AyUlsm6S&@Ze{LsbXO?u4g-r8{P==+l zH>Z6^W11Rg)bVIbW`y)v**veZrZxqXnAz&WRa6oSa?afnD4sLwa_&Sxx#{i0#Sgo zK!#3*GFK?puU{`N6*+-AKRFWON5_&$|M6sUGM>+$;CKdajZJvpEyUwH0}~#dwwY}} zzQ}RTkW1-5V7zR^jw8t={cqU;*#?<_dcQUyiGVf?IxgT?|C)(61u>h}@D9V=i+J+nC)b7LS`h zW#&Z}kec|{;+PAZB$5(|^f4R-&=)~+qPp(Hu>@_MgZhPMMDFbw%g#7$NWKG2l5Kec z>UHSDaZD`w>>p*3J)P1KNJk(Yfpi4Y5lBZM9f5QN(h*2U;D0v)yf2UU;n9+T5-*Ur z+^p0Bf7Pg@7b9or;=yq$Ggg;*|KB;f%qt5G6TAWW#P z&<*2u$2AT=+snlDkJn|!;Xf7GKg3n4@aLQ?w`qS^pUN_~3(GiJZqRltVP%>7q22%2 zL~7r+&0~SS-bGrj(y~lT8~mRZvx^jD`Lbp6U6a<9*Vo1BU4@128SaAo!g=+wTsT>4 zzQ!Ak&!OUI(hi~P;DF^$9VnLuoC!G)o~}~{cDHuh`a?X@yYxO)6lI!;GwJ!h(P~kO zj6;kQxkXCD_Ydu^c8If5<6c>XxEUGp!}lTUcS!u`)IM6)KZ49a?H{aPyQQB|V%PcX zp-&FvXP?BME$rjq4M{sQwSS(yyeIKvQuh<jlO-wNfZa#51xxa4W!S9 z-lbghJGV$Zo;F4O)S`3{KUYJaE5@d-3$uKK;KQWP$P@frWBi6e@Xgx4y*>9p?_#nc zAA&wFW31r&fK5-6o+6=19JjsD=OUhCiWsTnx1b*hOI(jz`5O~V`!t@%A+z4p_;+-$ z)8kzJb_D(V&{I3@evUxzVyA}8!Gr4pqs-ZhpdXomw}MJYV6{2r{y%npqDrkp&6i9!_s;UW<`)VwIEEI`azWPm~B2*i$ z3B&?^cfr62+F~EHe36K6vlXa|MK+7dh_5za`Ri+IH-lo7EFfe3pf^TquqXdUZ0C>6 z^36cYEURSol9e9Ib9piC@GrjV@+B)vmqCpWDJ+jyTfN1rg|&Re6-$?_u&yX6xze-7 zTC-&73XdhiH9p?cU&OohX^Z|pZ2wPLv6uhrzB!<;1X5V(Dd6|TP)TTGpL(8OYCpVjN%>GCPcb-; zs(iIUI8$_lTevagYcZ1f7Wz_i{7wbCGrQ-ZlCB-e>7zpq6(2I_U9NL5SPW^G}vsKhHny?e~FC zb5G{;{FG(4u9wzYA7;7{1)4iDpXZA#d0xxyXB^80 z@M$i}WuA|+T%{}H7<2o%4XwyfOqtK~W0r9Z1ulps~yAI-Y;?cexxGy8kFKkK_0_bkzS8U$1X>O((PvW%As2Vm`~$ zKsNcJNMj3>I7&h;{urp*KK=hKS~X`GE -#include - -#define DIVISOR_SIZE 3 -#define CARRY_SIZE 7 -#define SUM_SIZE 7 -#define TOT_SIZE 7 - -void disp_binary(double, int, int); - -struct bits { - unsigned int divisor : DIVISOR_SIZE; - int tot : TOT_SIZE; -} pla; - -/* - - Function: disp_binary - Description: This function displays a Double-Precision number into - four 16 bit integers using the global union variable - dp_number - Argument List: double x The value to be converted - int bits_to_left Number of bits left of radix point - int bits_to_right Number of bits right of radix point - Return value: none - -*/ -void disp_binary(double x, int bits_to_left, int bits_to_right) { - int i; - double diff; - - if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - printf("0"); - } - if (i == bits_to_right+1) - ; - - return; - } - - if (x < 0.0) - x = pow(2.0, ((double) bits_to_left)) + x; - - for (i = -bits_to_left + 1; i <= bits_to_right; i++) { - diff = pow(2.0, ((double) -i) ); - if (x < diff) - printf("0"); - else { - printf("1"); - x -= diff; - } - if (i == 0) - ; - - } - -} - -int main() { - int m; - int n; - int o; - pla.divisor = 0; - pla.tot = 0; - printf("\tcase({D[5:3],Wmsbs})\n"); - for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { - for (m=0; m < pow(2.0, TOT_SIZE); m++) { - printf("\t\t11'b"); - disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); - printf("_"); - disp_binary((double) pla.tot, TOT_SIZE, 0); - printf(": q = 4'b"); - - /* - 4 bits for Radix 4 (a=2) - 1000 = +2 - 0100 = +1 - 0000 = 0 - 0010 = -1 - 0001 = -2 - */ - switch (pla.divisor) { - case 0: - if ((pla.tot) >= 24) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -8) - printf("0000"); - else if ((pla.tot) >= -26) - printf("0010"); - else - printf("0001"); - break; - case 1: - if ((pla.tot) >= 28) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -10) - printf("0000"); - else if ((pla.tot) >= -28) - printf("0010"); - else - printf("0001"); - break; - case 2: - if ((pla.tot) >= 32) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -12) - printf("0000"); - else if ((pla.tot) >= -32) - printf("0010"); - else - printf("0001"); - break; - case 3: - if ((pla.tot) >= 32) - printf("1000"); - else if ((pla.tot) >= 8) - printf("0100"); - else if ((pla.tot) >= -12) - printf("0000"); - else if ((pla.tot) >= -34) - printf("0010"); - else - printf("0001"); - break; - case 4: - if ((pla.tot) >= 36) - printf("1000"); - else if ((pla.tot) >= 12) - printf("0100"); - else if ((pla.tot) >= -12) - printf("0000"); - else if ((pla.tot) >= -36) - printf("0010"); - else - printf("0001"); - break; - case 5: - if ((pla.tot) >= 40) - printf("1000"); - else if ((pla.tot) >= 12) - printf("0100"); - else if ((pla.tot) >= -16) - printf("0000"); - else if ((pla.tot) >= -40) - printf("0010"); - else - printf("0001"); - break; - case 6: - if ((pla.tot) >= 40) - printf("1000"); - else if ((pla.tot) >= 16) - printf("0100"); - else if ((pla.tot) >= -16) - printf("0000"); - else if ((pla.tot) >= -44) - printf("0010"); - else - printf("0001"); - break; - case 7: - if ((pla.tot) >= 44) - printf("1000"); - else if ((pla.tot) >= 16) - printf("0100"); - else if ((pla.tot) >= -16) - printf("0000"); - else if ((pla.tot) >= -46) - printf("0010"); - else - printf("0001"); - break; - default: printf ("XXX"); - - } - - printf(";\n"); - (pla.tot)++; - } - (pla.divisor)++; - } - printf("\tendcase\n"); - -} diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv deleted file mode 100644 index 805dbbae..00000000 --- a/pipelined/srt/qslc_sqrt_r4a2.sv +++ /dev/null @@ -1,1026 +0,0 @@ - case({D[5:3],Wmsbs}) - 11'b000_0000000: q = 4'b0000; - 11'b000_0000001: q = 4'b0000; - 11'b000_0000010: q = 4'b0000; - 11'b000_0000011: q = 4'b0000; - 11'b000_0000100: q = 4'b0000; - 11'b000_0000101: q = 4'b0000; - 11'b000_0000110: q = 4'b0000; - 11'b000_0000111: q = 4'b0000; - 11'b000_0001000: q = 4'b0100; - 11'b000_0001001: q = 4'b0100; - 11'b000_0001010: q = 4'b0100; - 11'b000_0001011: q = 4'b0100; - 11'b000_0001100: q = 4'b0100; - 11'b000_0001101: q = 4'b0100; - 11'b000_0001110: q = 4'b0100; - 11'b000_0001111: q = 4'b0100; - 11'b000_0010000: q = 4'b0100; - 11'b000_0010001: q = 4'b0100; - 11'b000_0010010: q = 4'b0100; - 11'b000_0010011: q = 4'b0100; - 11'b000_0010100: q = 4'b0100; - 11'b000_0010101: q = 4'b0100; - 11'b000_0010110: q = 4'b0100; - 11'b000_0010111: q = 4'b0100; - 11'b000_0011000: q = 4'b1000; - 11'b000_0011001: q = 4'b1000; - 11'b000_0011010: q = 4'b1000; - 11'b000_0011011: q = 4'b1000; - 11'b000_0011100: q = 4'b1000; - 11'b000_0011101: q = 4'b1000; - 11'b000_0011110: q = 4'b1000; - 11'b000_0011111: q = 4'b1000; - 11'b000_0100000: q = 4'b1000; - 11'b000_0100001: q = 4'b1000; - 11'b000_0100010: q = 4'b1000; - 11'b000_0100011: q = 4'b1000; - 11'b000_0100100: q = 4'b1000; - 11'b000_0100101: q = 4'b1000; - 11'b000_0100110: q = 4'b1000; - 11'b000_0100111: q = 4'b1000; - 11'b000_0101000: q = 4'b1000; - 11'b000_0101001: q = 4'b1000; - 11'b000_0101010: q = 4'b1000; - 11'b000_0101011: q = 4'b1000; - 11'b000_0101100: q = 4'b1000; - 11'b000_0101101: q = 4'b1000; - 11'b000_0101110: q = 4'b1000; - 11'b000_0101111: q = 4'b1000; - 11'b000_0110000: q = 4'b1000; - 11'b000_0110001: q = 4'b1000; - 11'b000_0110010: q = 4'b1000; - 11'b000_0110011: q = 4'b1000; - 11'b000_0110100: q = 4'b1000; - 11'b000_0110101: q = 4'b1000; - 11'b000_0110110: q = 4'b1000; - 11'b000_0110111: q = 4'b1000; - 11'b000_0111000: q = 4'b1000; - 11'b000_0111001: q = 4'b1000; - 11'b000_0111010: q = 4'b1000; - 11'b000_0111011: q = 4'b1000; - 11'b000_0111100: q = 4'b1000; - 11'b000_0111101: q = 4'b1000; - 11'b000_0111110: q = 4'b1000; - 11'b000_0111111: q = 4'b1000; - 11'b000_1000000: q = 4'b0001; - 11'b000_1000001: q = 4'b0001; - 11'b000_1000010: q = 4'b0001; - 11'b000_1000011: q = 4'b0001; - 11'b000_1000100: q = 4'b0001; - 11'b000_1000101: q = 4'b0001; - 11'b000_1000110: q = 4'b0001; - 11'b000_1000111: q = 4'b0001; - 11'b000_1001000: q = 4'b0001; - 11'b000_1001001: q = 4'b0001; - 11'b000_1001010: q = 4'b0001; - 11'b000_1001011: q = 4'b0001; - 11'b000_1001100: q = 4'b0001; - 11'b000_1001101: q = 4'b0001; - 11'b000_1001110: q = 4'b0001; - 11'b000_1001111: q = 4'b0001; - 11'b000_1010000: q = 4'b0001; - 11'b000_1010001: q = 4'b0001; - 11'b000_1010010: q = 4'b0001; - 11'b000_1010011: q = 4'b0001; - 11'b000_1010100: q = 4'b0001; - 11'b000_1010101: q = 4'b0001; - 11'b000_1010110: q = 4'b0001; - 11'b000_1010111: q = 4'b0001; - 11'b000_1011000: q = 4'b0001; - 11'b000_1011001: q = 4'b0001; - 11'b000_1011010: q = 4'b0001; - 11'b000_1011011: q = 4'b0001; - 11'b000_1011100: q = 4'b0001; - 11'b000_1011101: q = 4'b0001; - 11'b000_1011110: q = 4'b0001; - 11'b000_1011111: q = 4'b0001; - 11'b000_1100000: q = 4'b0001; - 11'b000_1100001: q = 4'b0001; - 11'b000_1100010: q = 4'b0001; - 11'b000_1100011: q = 4'b0001; - 11'b000_1100100: q = 4'b0001; - 11'b000_1100101: q = 4'b0001; - 11'b000_1100110: q = 4'b0010; - 11'b000_1100111: q = 4'b0010; - 11'b000_1101000: q = 4'b0010; - 11'b000_1101001: q = 4'b0010; - 11'b000_1101010: q = 4'b0010; - 11'b000_1101011: q = 4'b0010; - 11'b000_1101100: q = 4'b0010; - 11'b000_1101101: q = 4'b0010; - 11'b000_1101110: q = 4'b0010; - 11'b000_1101111: q = 4'b0010; - 11'b000_1110000: q = 4'b0010; - 11'b000_1110001: q = 4'b0010; - 11'b000_1110010: q = 4'b0010; - 11'b000_1110011: q = 4'b0010; - 11'b000_1110100: q = 4'b0010; - 11'b000_1110101: q = 4'b0010; - 11'b000_1110110: q = 4'b0010; - 11'b000_1110111: q = 4'b0010; - 11'b000_1111000: q = 4'b0000; - 11'b000_1111001: q = 4'b0000; - 11'b000_1111010: q = 4'b0000; - 11'b000_1111011: q = 4'b0000; - 11'b000_1111100: q = 4'b0000; - 11'b000_1111101: q = 4'b0000; - 11'b000_1111110: q = 4'b0000; - 11'b000_1111111: q = 4'b0000; - 11'b001_0000000: q = 4'b0000; - 11'b001_0000001: q = 4'b0000; - 11'b001_0000010: q = 4'b0000; - 11'b001_0000011: q = 4'b0000; - 11'b001_0000100: q = 4'b0000; - 11'b001_0000101: q = 4'b0000; - 11'b001_0000110: q = 4'b0000; - 11'b001_0000111: q = 4'b0000; - 11'b001_0001000: q = 4'b0100; - 11'b001_0001001: q = 4'b0100; - 11'b001_0001010: q = 4'b0100; - 11'b001_0001011: q = 4'b0100; - 11'b001_0001100: q = 4'b0100; - 11'b001_0001101: q = 4'b0100; - 11'b001_0001110: q = 4'b0100; - 11'b001_0001111: q = 4'b0100; - 11'b001_0010000: q = 4'b0100; - 11'b001_0010001: q = 4'b0100; - 11'b001_0010010: q = 4'b0100; - 11'b001_0010011: q = 4'b0100; - 11'b001_0010100: q = 4'b0100; - 11'b001_0010101: q = 4'b0100; - 11'b001_0010110: q = 4'b0100; - 11'b001_0010111: q = 4'b0100; - 11'b001_0011000: q = 4'b0100; - 11'b001_0011001: q = 4'b0100; - 11'b001_0011010: q = 4'b0100; - 11'b001_0011011: q = 4'b0100; - 11'b001_0011100: q = 4'b1000; - 11'b001_0011101: q = 4'b1000; - 11'b001_0011110: q = 4'b1000; - 11'b001_0011111: q = 4'b1000; - 11'b001_0100000: q = 4'b1000; - 11'b001_0100001: q = 4'b1000; - 11'b001_0100010: q = 4'b1000; - 11'b001_0100011: q = 4'b1000; - 11'b001_0100100: q = 4'b1000; - 11'b001_0100101: q = 4'b1000; - 11'b001_0100110: q = 4'b1000; - 11'b001_0100111: q = 4'b1000; - 11'b001_0101000: q = 4'b1000; - 11'b001_0101001: q = 4'b1000; - 11'b001_0101010: q = 4'b1000; - 11'b001_0101011: q = 4'b1000; - 11'b001_0101100: q = 4'b1000; - 11'b001_0101101: q = 4'b1000; - 11'b001_0101110: q = 4'b1000; - 11'b001_0101111: q = 4'b1000; - 11'b001_0110000: q = 4'b1000; - 11'b001_0110001: q = 4'b1000; - 11'b001_0110010: q = 4'b1000; - 11'b001_0110011: q = 4'b1000; - 11'b001_0110100: q = 4'b1000; - 11'b001_0110101: q = 4'b1000; - 11'b001_0110110: q = 4'b1000; - 11'b001_0110111: q = 4'b1000; - 11'b001_0111000: q = 4'b1000; - 11'b001_0111001: q = 4'b1000; - 11'b001_0111010: q = 4'b1000; - 11'b001_0111011: q = 4'b1000; - 11'b001_0111100: q = 4'b1000; - 11'b001_0111101: q = 4'b1000; - 11'b001_0111110: q = 4'b1000; - 11'b001_0111111: q = 4'b1000; - 11'b001_1000000: q = 4'b0001; - 11'b001_1000001: q = 4'b0001; - 11'b001_1000010: q = 4'b0001; - 11'b001_1000011: q = 4'b0001; - 11'b001_1000100: q = 4'b0001; - 11'b001_1000101: q = 4'b0001; - 11'b001_1000110: q = 4'b0001; - 11'b001_1000111: q = 4'b0001; - 11'b001_1001000: q = 4'b0001; - 11'b001_1001001: q = 4'b0001; - 11'b001_1001010: q = 4'b0001; - 11'b001_1001011: q = 4'b0001; - 11'b001_1001100: q = 4'b0001; - 11'b001_1001101: q = 4'b0001; - 11'b001_1001110: q = 4'b0001; - 11'b001_1001111: q = 4'b0001; - 11'b001_1010000: q = 4'b0001; - 11'b001_1010001: q = 4'b0001; - 11'b001_1010010: q = 4'b0001; - 11'b001_1010011: q = 4'b0001; - 11'b001_1010100: q = 4'b0001; - 11'b001_1010101: q = 4'b0001; - 11'b001_1010110: q = 4'b0001; - 11'b001_1010111: q = 4'b0001; - 11'b001_1011000: q = 4'b0001; - 11'b001_1011001: q = 4'b0001; - 11'b001_1011010: q = 4'b0001; - 11'b001_1011011: q = 4'b0001; - 11'b001_1011100: q = 4'b0001; - 11'b001_1011101: q = 4'b0001; - 11'b001_1011110: q = 4'b0001; - 11'b001_1011111: q = 4'b0001; - 11'b001_1100000: q = 4'b0001; - 11'b001_1100001: q = 4'b0001; - 11'b001_1100010: q = 4'b0001; - 11'b001_1100011: q = 4'b0001; - 11'b001_1100100: q = 4'b0010; - 11'b001_1100101: q = 4'b0010; - 11'b001_1100110: q = 4'b0010; - 11'b001_1100111: q = 4'b0010; - 11'b001_1101000: q = 4'b0010; - 11'b001_1101001: q = 4'b0010; - 11'b001_1101010: q = 4'b0010; - 11'b001_1101011: q = 4'b0010; - 11'b001_1101100: q = 4'b0010; - 11'b001_1101101: q = 4'b0010; - 11'b001_1101110: q = 4'b0010; - 11'b001_1101111: q = 4'b0010; - 11'b001_1110000: q = 4'b0010; - 11'b001_1110001: q = 4'b0010; - 11'b001_1110010: q = 4'b0010; - 11'b001_1110011: q = 4'b0010; - 11'b001_1110100: q = 4'b0010; - 11'b001_1110101: q = 4'b0010; - 11'b001_1110110: q = 4'b0000; - 11'b001_1110111: q = 4'b0000; - 11'b001_1111000: q = 4'b0000; - 11'b001_1111001: q = 4'b0000; - 11'b001_1111010: q = 4'b0000; - 11'b001_1111011: q = 4'b0000; - 11'b001_1111100: q = 4'b0000; - 11'b001_1111101: q = 4'b0000; - 11'b001_1111110: q = 4'b0000; - 11'b001_1111111: q = 4'b0000; - 11'b010_0000000: q = 4'b0000; - 11'b010_0000001: q = 4'b0000; - 11'b010_0000010: q = 4'b0000; - 11'b010_0000011: q = 4'b0000; - 11'b010_0000100: q = 4'b0000; - 11'b010_0000101: q = 4'b0000; - 11'b010_0000110: q = 4'b0000; - 11'b010_0000111: q = 4'b0000; - 11'b010_0001000: q = 4'b0100; - 11'b010_0001001: q = 4'b0100; - 11'b010_0001010: q = 4'b0100; - 11'b010_0001011: q = 4'b0100; - 11'b010_0001100: q = 4'b0100; - 11'b010_0001101: q = 4'b0100; - 11'b010_0001110: q = 4'b0100; - 11'b010_0001111: q = 4'b0100; - 11'b010_0010000: q = 4'b0100; - 11'b010_0010001: q = 4'b0100; - 11'b010_0010010: q = 4'b0100; - 11'b010_0010011: q = 4'b0100; - 11'b010_0010100: q = 4'b0100; - 11'b010_0010101: q = 4'b0100; - 11'b010_0010110: q = 4'b0100; - 11'b010_0010111: q = 4'b0100; - 11'b010_0011000: q = 4'b0100; - 11'b010_0011001: q = 4'b0100; - 11'b010_0011010: q = 4'b0100; - 11'b010_0011011: q = 4'b0100; - 11'b010_0011100: q = 4'b0100; - 11'b010_0011101: q = 4'b0100; - 11'b010_0011110: q = 4'b0100; - 11'b010_0011111: q = 4'b0100; - 11'b010_0100000: q = 4'b1000; - 11'b010_0100001: q = 4'b1000; - 11'b010_0100010: q = 4'b1000; - 11'b010_0100011: q = 4'b1000; - 11'b010_0100100: q = 4'b1000; - 11'b010_0100101: q = 4'b1000; - 11'b010_0100110: q = 4'b1000; - 11'b010_0100111: q = 4'b1000; - 11'b010_0101000: q = 4'b1000; - 11'b010_0101001: q = 4'b1000; - 11'b010_0101010: q = 4'b1000; - 11'b010_0101011: q = 4'b1000; - 11'b010_0101100: q = 4'b1000; - 11'b010_0101101: q = 4'b1000; - 11'b010_0101110: q = 4'b1000; - 11'b010_0101111: q = 4'b1000; - 11'b010_0110000: q = 4'b1000; - 11'b010_0110001: q = 4'b1000; - 11'b010_0110010: q = 4'b1000; - 11'b010_0110011: q = 4'b1000; - 11'b010_0110100: q = 4'b1000; - 11'b010_0110101: q = 4'b1000; - 11'b010_0110110: q = 4'b1000; - 11'b010_0110111: q = 4'b1000; - 11'b010_0111000: q = 4'b1000; - 11'b010_0111001: q = 4'b1000; - 11'b010_0111010: q = 4'b1000; - 11'b010_0111011: q = 4'b1000; - 11'b010_0111100: q = 4'b1000; - 11'b010_0111101: q = 4'b1000; - 11'b010_0111110: q = 4'b1000; - 11'b010_0111111: q = 4'b1000; - 11'b010_1000000: q = 4'b0001; - 11'b010_1000001: q = 4'b0001; - 11'b010_1000010: q = 4'b0001; - 11'b010_1000011: q = 4'b0001; - 11'b010_1000100: q = 4'b0001; - 11'b010_1000101: q = 4'b0001; - 11'b010_1000110: q = 4'b0001; - 11'b010_1000111: q = 4'b0001; - 11'b010_1001000: q = 4'b0001; - 11'b010_1001001: q = 4'b0001; - 11'b010_1001010: q = 4'b0001; - 11'b010_1001011: q = 4'b0001; - 11'b010_1001100: q = 4'b0001; - 11'b010_1001101: q = 4'b0001; - 11'b010_1001110: q = 4'b0001; - 11'b010_1001111: q = 4'b0001; - 11'b010_1010000: q = 4'b0001; - 11'b010_1010001: q = 4'b0001; - 11'b010_1010010: q = 4'b0001; - 11'b010_1010011: q = 4'b0001; - 11'b010_1010100: q = 4'b0001; - 11'b010_1010101: q = 4'b0001; - 11'b010_1010110: q = 4'b0001; - 11'b010_1010111: q = 4'b0001; - 11'b010_1011000: q = 4'b0001; - 11'b010_1011001: q = 4'b0001; - 11'b010_1011010: q = 4'b0001; - 11'b010_1011011: q = 4'b0001; - 11'b010_1011100: q = 4'b0001; - 11'b010_1011101: q = 4'b0001; - 11'b010_1011110: q = 4'b0001; - 11'b010_1011111: q = 4'b0001; - 11'b010_1100000: q = 4'b0010; - 11'b010_1100001: q = 4'b0010; - 11'b010_1100010: q = 4'b0010; - 11'b010_1100011: q = 4'b0010; - 11'b010_1100100: q = 4'b0010; - 11'b010_1100101: q = 4'b0010; - 11'b010_1100110: q = 4'b0010; - 11'b010_1100111: q = 4'b0010; - 11'b010_1101000: q = 4'b0010; - 11'b010_1101001: q = 4'b0010; - 11'b010_1101010: q = 4'b0010; - 11'b010_1101011: q = 4'b0010; - 11'b010_1101100: q = 4'b0010; - 11'b010_1101101: q = 4'b0010; - 11'b010_1101110: q = 4'b0010; - 11'b010_1101111: q = 4'b0010; - 11'b010_1110000: q = 4'b0010; - 11'b010_1110001: q = 4'b0010; - 11'b010_1110010: q = 4'b0010; - 11'b010_1110011: q = 4'b0010; - 11'b010_1110100: q = 4'b0000; - 11'b010_1110101: q = 4'b0000; - 11'b010_1110110: q = 4'b0000; - 11'b010_1110111: q = 4'b0000; - 11'b010_1111000: q = 4'b0000; - 11'b010_1111001: q = 4'b0000; - 11'b010_1111010: q = 4'b0000; - 11'b010_1111011: q = 4'b0000; - 11'b010_1111100: q = 4'b0000; - 11'b010_1111101: q = 4'b0000; - 11'b010_1111110: q = 4'b0000; - 11'b010_1111111: q = 4'b0000; - 11'b011_0000000: q = 4'b0000; - 11'b011_0000001: q = 4'b0000; - 11'b011_0000010: q = 4'b0000; - 11'b011_0000011: q = 4'b0000; - 11'b011_0000100: q = 4'b0000; - 11'b011_0000101: q = 4'b0000; - 11'b011_0000110: q = 4'b0000; - 11'b011_0000111: q = 4'b0000; - 11'b011_0001000: q = 4'b0100; - 11'b011_0001001: q = 4'b0100; - 11'b011_0001010: q = 4'b0100; - 11'b011_0001011: q = 4'b0100; - 11'b011_0001100: q = 4'b0100; - 11'b011_0001101: q = 4'b0100; - 11'b011_0001110: q = 4'b0100; - 11'b011_0001111: q = 4'b0100; - 11'b011_0010000: q = 4'b0100; - 11'b011_0010001: q = 4'b0100; - 11'b011_0010010: q = 4'b0100; - 11'b011_0010011: q = 4'b0100; - 11'b011_0010100: q = 4'b0100; - 11'b011_0010101: q = 4'b0100; - 11'b011_0010110: q = 4'b0100; - 11'b011_0010111: q = 4'b0100; - 11'b011_0011000: q = 4'b0100; - 11'b011_0011001: q = 4'b0100; - 11'b011_0011010: q = 4'b0100; - 11'b011_0011011: q = 4'b0100; - 11'b011_0011100: q = 4'b0100; - 11'b011_0011101: q = 4'b0100; - 11'b011_0011110: q = 4'b0100; - 11'b011_0011111: q = 4'b0100; - 11'b011_0100000: q = 4'b1000; - 11'b011_0100001: q = 4'b1000; - 11'b011_0100010: q = 4'b1000; - 11'b011_0100011: q = 4'b1000; - 11'b011_0100100: q = 4'b1000; - 11'b011_0100101: q = 4'b1000; - 11'b011_0100110: q = 4'b1000; - 11'b011_0100111: q = 4'b1000; - 11'b011_0101000: q = 4'b1000; - 11'b011_0101001: q = 4'b1000; - 11'b011_0101010: q = 4'b1000; - 11'b011_0101011: q = 4'b1000; - 11'b011_0101100: q = 4'b1000; - 11'b011_0101101: q = 4'b1000; - 11'b011_0101110: q = 4'b1000; - 11'b011_0101111: q = 4'b1000; - 11'b011_0110000: q = 4'b1000; - 11'b011_0110001: q = 4'b1000; - 11'b011_0110010: q = 4'b1000; - 11'b011_0110011: q = 4'b1000; - 11'b011_0110100: q = 4'b1000; - 11'b011_0110101: q = 4'b1000; - 11'b011_0110110: q = 4'b1000; - 11'b011_0110111: q = 4'b1000; - 11'b011_0111000: q = 4'b1000; - 11'b011_0111001: q = 4'b1000; - 11'b011_0111010: q = 4'b1000; - 11'b011_0111011: q = 4'b1000; - 11'b011_0111100: q = 4'b1000; - 11'b011_0111101: q = 4'b1000; - 11'b011_0111110: q = 4'b1000; - 11'b011_0111111: q = 4'b1000; - 11'b011_1000000: q = 4'b0001; - 11'b011_1000001: q = 4'b0001; - 11'b011_1000010: q = 4'b0001; - 11'b011_1000011: q = 4'b0001; - 11'b011_1000100: q = 4'b0001; - 11'b011_1000101: q = 4'b0001; - 11'b011_1000110: q = 4'b0001; - 11'b011_1000111: q = 4'b0001; - 11'b011_1001000: q = 4'b0001; - 11'b011_1001001: q = 4'b0001; - 11'b011_1001010: q = 4'b0001; - 11'b011_1001011: q = 4'b0001; - 11'b011_1001100: q = 4'b0001; - 11'b011_1001101: q = 4'b0001; - 11'b011_1001110: q = 4'b0001; - 11'b011_1001111: q = 4'b0001; - 11'b011_1010000: q = 4'b0001; - 11'b011_1010001: q = 4'b0001; - 11'b011_1010010: q = 4'b0001; - 11'b011_1010011: q = 4'b0001; - 11'b011_1010100: q = 4'b0001; - 11'b011_1010101: q = 4'b0001; - 11'b011_1010110: q = 4'b0001; - 11'b011_1010111: q = 4'b0001; - 11'b011_1011000: q = 4'b0001; - 11'b011_1011001: q = 4'b0001; - 11'b011_1011010: q = 4'b0001; - 11'b011_1011011: q = 4'b0001; - 11'b011_1011100: q = 4'b0001; - 11'b011_1011101: q = 4'b0001; - 11'b011_1011110: q = 4'b0010; - 11'b011_1011111: q = 4'b0010; - 11'b011_1100000: q = 4'b0010; - 11'b011_1100001: q = 4'b0010; - 11'b011_1100010: q = 4'b0010; - 11'b011_1100011: q = 4'b0010; - 11'b011_1100100: q = 4'b0010; - 11'b011_1100101: q = 4'b0010; - 11'b011_1100110: q = 4'b0010; - 11'b011_1100111: q = 4'b0010; - 11'b011_1101000: q = 4'b0010; - 11'b011_1101001: q = 4'b0010; - 11'b011_1101010: q = 4'b0010; - 11'b011_1101011: q = 4'b0010; - 11'b011_1101100: q = 4'b0010; - 11'b011_1101101: q = 4'b0010; - 11'b011_1101110: q = 4'b0010; - 11'b011_1101111: q = 4'b0010; - 11'b011_1110000: q = 4'b0010; - 11'b011_1110001: q = 4'b0010; - 11'b011_1110010: q = 4'b0010; - 11'b011_1110011: q = 4'b0010; - 11'b011_1110100: q = 4'b0000; - 11'b011_1110101: q = 4'b0000; - 11'b011_1110110: q = 4'b0000; - 11'b011_1110111: q = 4'b0000; - 11'b011_1111000: q = 4'b0000; - 11'b011_1111001: q = 4'b0000; - 11'b011_1111010: q = 4'b0000; - 11'b011_1111011: q = 4'b0000; - 11'b011_1111100: q = 4'b0000; - 11'b011_1111101: q = 4'b0000; - 11'b011_1111110: q = 4'b0000; - 11'b011_1111111: q = 4'b0000; - 11'b100_0000000: q = 4'b0000; - 11'b100_0000001: q = 4'b0000; - 11'b100_0000010: q = 4'b0000; - 11'b100_0000011: q = 4'b0000; - 11'b100_0000100: q = 4'b0000; - 11'b100_0000101: q = 4'b0000; - 11'b100_0000110: q = 4'b0000; - 11'b100_0000111: q = 4'b0000; - 11'b100_0001000: q = 4'b0000; - 11'b100_0001001: q = 4'b0000; - 11'b100_0001010: q = 4'b0000; - 11'b100_0001011: q = 4'b0000; - 11'b100_0001100: q = 4'b0100; - 11'b100_0001101: q = 4'b0100; - 11'b100_0001110: q = 4'b0100; - 11'b100_0001111: q = 4'b0100; - 11'b100_0010000: q = 4'b0100; - 11'b100_0010001: q = 4'b0100; - 11'b100_0010010: q = 4'b0100; - 11'b100_0010011: q = 4'b0100; - 11'b100_0010100: q = 4'b0100; - 11'b100_0010101: q = 4'b0100; - 11'b100_0010110: q = 4'b0100; - 11'b100_0010111: q = 4'b0100; - 11'b100_0011000: q = 4'b0100; - 11'b100_0011001: q = 4'b0100; - 11'b100_0011010: q = 4'b0100; - 11'b100_0011011: q = 4'b0100; - 11'b100_0011100: q = 4'b0100; - 11'b100_0011101: q = 4'b0100; - 11'b100_0011110: q = 4'b0100; - 11'b100_0011111: q = 4'b0100; - 11'b100_0100000: q = 4'b0100; - 11'b100_0100001: q = 4'b0100; - 11'b100_0100010: q = 4'b0100; - 11'b100_0100011: q = 4'b0100; - 11'b100_0100100: q = 4'b1000; - 11'b100_0100101: q = 4'b1000; - 11'b100_0100110: q = 4'b1000; - 11'b100_0100111: q = 4'b1000; - 11'b100_0101000: q = 4'b1000; - 11'b100_0101001: q = 4'b1000; - 11'b100_0101010: q = 4'b1000; - 11'b100_0101011: q = 4'b1000; - 11'b100_0101100: q = 4'b1000; - 11'b100_0101101: q = 4'b1000; - 11'b100_0101110: q = 4'b1000; - 11'b100_0101111: q = 4'b1000; - 11'b100_0110000: q = 4'b1000; - 11'b100_0110001: q = 4'b1000; - 11'b100_0110010: q = 4'b1000; - 11'b100_0110011: q = 4'b1000; - 11'b100_0110100: q = 4'b1000; - 11'b100_0110101: q = 4'b1000; - 11'b100_0110110: q = 4'b1000; - 11'b100_0110111: q = 4'b1000; - 11'b100_0111000: q = 4'b1000; - 11'b100_0111001: q = 4'b1000; - 11'b100_0111010: q = 4'b1000; - 11'b100_0111011: q = 4'b1000; - 11'b100_0111100: q = 4'b1000; - 11'b100_0111101: q = 4'b1000; - 11'b100_0111110: q = 4'b1000; - 11'b100_0111111: q = 4'b1000; - 11'b100_1000000: q = 4'b0001; - 11'b100_1000001: q = 4'b0001; - 11'b100_1000010: q = 4'b0001; - 11'b100_1000011: q = 4'b0001; - 11'b100_1000100: q = 4'b0001; - 11'b100_1000101: q = 4'b0001; - 11'b100_1000110: q = 4'b0001; - 11'b100_1000111: q = 4'b0001; - 11'b100_1001000: q = 4'b0001; - 11'b100_1001001: q = 4'b0001; - 11'b100_1001010: q = 4'b0001; - 11'b100_1001011: q = 4'b0001; - 11'b100_1001100: q = 4'b0001; - 11'b100_1001101: q = 4'b0001; - 11'b100_1001110: q = 4'b0001; - 11'b100_1001111: q = 4'b0001; - 11'b100_1010000: q = 4'b0001; - 11'b100_1010001: q = 4'b0001; - 11'b100_1010010: q = 4'b0001; - 11'b100_1010011: q = 4'b0001; - 11'b100_1010100: q = 4'b0001; - 11'b100_1010101: q = 4'b0001; - 11'b100_1010110: q = 4'b0001; - 11'b100_1010111: q = 4'b0001; - 11'b100_1011000: q = 4'b0001; - 11'b100_1011001: q = 4'b0001; - 11'b100_1011010: q = 4'b0001; - 11'b100_1011011: q = 4'b0001; - 11'b100_1011100: q = 4'b0010; - 11'b100_1011101: q = 4'b0010; - 11'b100_1011110: q = 4'b0010; - 11'b100_1011111: q = 4'b0010; - 11'b100_1100000: q = 4'b0010; - 11'b100_1100001: q = 4'b0010; - 11'b100_1100010: q = 4'b0010; - 11'b100_1100011: q = 4'b0010; - 11'b100_1100100: q = 4'b0010; - 11'b100_1100101: q = 4'b0010; - 11'b100_1100110: q = 4'b0010; - 11'b100_1100111: q = 4'b0010; - 11'b100_1101000: q = 4'b0010; - 11'b100_1101001: q = 4'b0010; - 11'b100_1101010: q = 4'b0010; - 11'b100_1101011: q = 4'b0010; - 11'b100_1101100: q = 4'b0010; - 11'b100_1101101: q = 4'b0010; - 11'b100_1101110: q = 4'b0010; - 11'b100_1101111: q = 4'b0010; - 11'b100_1110000: q = 4'b0010; - 11'b100_1110001: q = 4'b0010; - 11'b100_1110010: q = 4'b0010; - 11'b100_1110011: q = 4'b0010; - 11'b100_1110100: q = 4'b0000; - 11'b100_1110101: q = 4'b0000; - 11'b100_1110110: q = 4'b0000; - 11'b100_1110111: q = 4'b0000; - 11'b100_1111000: q = 4'b0000; - 11'b100_1111001: q = 4'b0000; - 11'b100_1111010: q = 4'b0000; - 11'b100_1111011: q = 4'b0000; - 11'b100_1111100: q = 4'b0000; - 11'b100_1111101: q = 4'b0000; - 11'b100_1111110: q = 4'b0000; - 11'b100_1111111: q = 4'b0000; - 11'b101_0000000: q = 4'b0000; - 11'b101_0000001: q = 4'b0000; - 11'b101_0000010: q = 4'b0000; - 11'b101_0000011: q = 4'b0000; - 11'b101_0000100: q = 4'b0000; - 11'b101_0000101: q = 4'b0000; - 11'b101_0000110: q = 4'b0000; - 11'b101_0000111: q = 4'b0000; - 11'b101_0001000: q = 4'b0000; - 11'b101_0001001: q = 4'b0000; - 11'b101_0001010: q = 4'b0000; - 11'b101_0001011: q = 4'b0000; - 11'b101_0001100: q = 4'b0100; - 11'b101_0001101: q = 4'b0100; - 11'b101_0001110: q = 4'b0100; - 11'b101_0001111: q = 4'b0100; - 11'b101_0010000: q = 4'b0100; - 11'b101_0010001: q = 4'b0100; - 11'b101_0010010: q = 4'b0100; - 11'b101_0010011: q = 4'b0100; - 11'b101_0010100: q = 4'b0100; - 11'b101_0010101: q = 4'b0100; - 11'b101_0010110: q = 4'b0100; - 11'b101_0010111: q = 4'b0100; - 11'b101_0011000: q = 4'b0100; - 11'b101_0011001: q = 4'b0100; - 11'b101_0011010: q = 4'b0100; - 11'b101_0011011: q = 4'b0100; - 11'b101_0011100: q = 4'b0100; - 11'b101_0011101: q = 4'b0100; - 11'b101_0011110: q = 4'b0100; - 11'b101_0011111: q = 4'b0100; - 11'b101_0100000: q = 4'b0100; - 11'b101_0100001: q = 4'b0100; - 11'b101_0100010: q = 4'b0100; - 11'b101_0100011: q = 4'b0100; - 11'b101_0100100: q = 4'b0100; - 11'b101_0100101: q = 4'b0100; - 11'b101_0100110: q = 4'b0100; - 11'b101_0100111: q = 4'b0100; - 11'b101_0101000: q = 4'b1000; - 11'b101_0101001: q = 4'b1000; - 11'b101_0101010: q = 4'b1000; - 11'b101_0101011: q = 4'b1000; - 11'b101_0101100: q = 4'b1000; - 11'b101_0101101: q = 4'b1000; - 11'b101_0101110: q = 4'b1000; - 11'b101_0101111: q = 4'b1000; - 11'b101_0110000: q = 4'b1000; - 11'b101_0110001: q = 4'b1000; - 11'b101_0110010: q = 4'b1000; - 11'b101_0110011: q = 4'b1000; - 11'b101_0110100: q = 4'b1000; - 11'b101_0110101: q = 4'b1000; - 11'b101_0110110: q = 4'b1000; - 11'b101_0110111: q = 4'b1000; - 11'b101_0111000: q = 4'b1000; - 11'b101_0111001: q = 4'b1000; - 11'b101_0111010: q = 4'b1000; - 11'b101_0111011: q = 4'b1000; - 11'b101_0111100: q = 4'b1000; - 11'b101_0111101: q = 4'b1000; - 11'b101_0111110: q = 4'b1000; - 11'b101_0111111: q = 4'b1000; - 11'b101_1000000: q = 4'b0001; - 11'b101_1000001: q = 4'b0001; - 11'b101_1000010: q = 4'b0001; - 11'b101_1000011: q = 4'b0001; - 11'b101_1000100: q = 4'b0001; - 11'b101_1000101: q = 4'b0001; - 11'b101_1000110: q = 4'b0001; - 11'b101_1000111: q = 4'b0001; - 11'b101_1001000: q = 4'b0001; - 11'b101_1001001: q = 4'b0001; - 11'b101_1001010: q = 4'b0001; - 11'b101_1001011: q = 4'b0001; - 11'b101_1001100: q = 4'b0001; - 11'b101_1001101: q = 4'b0001; - 11'b101_1001110: q = 4'b0001; - 11'b101_1001111: q = 4'b0001; - 11'b101_1010000: q = 4'b0001; - 11'b101_1010001: q = 4'b0001; - 11'b101_1010010: q = 4'b0001; - 11'b101_1010011: q = 4'b0001; - 11'b101_1010100: q = 4'b0001; - 11'b101_1010101: q = 4'b0001; - 11'b101_1010110: q = 4'b0001; - 11'b101_1010111: q = 4'b0001; - 11'b101_1011000: q = 4'b0010; - 11'b101_1011001: q = 4'b0010; - 11'b101_1011010: q = 4'b0010; - 11'b101_1011011: q = 4'b0010; - 11'b101_1011100: q = 4'b0010; - 11'b101_1011101: q = 4'b0010; - 11'b101_1011110: q = 4'b0010; - 11'b101_1011111: q = 4'b0010; - 11'b101_1100000: q = 4'b0010; - 11'b101_1100001: q = 4'b0010; - 11'b101_1100010: q = 4'b0010; - 11'b101_1100011: q = 4'b0010; - 11'b101_1100100: q = 4'b0010; - 11'b101_1100101: q = 4'b0010; - 11'b101_1100110: q = 4'b0010; - 11'b101_1100111: q = 4'b0010; - 11'b101_1101000: q = 4'b0010; - 11'b101_1101001: q = 4'b0010; - 11'b101_1101010: q = 4'b0010; - 11'b101_1101011: q = 4'b0010; - 11'b101_1101100: q = 4'b0010; - 11'b101_1101101: q = 4'b0010; - 11'b101_1101110: q = 4'b0010; - 11'b101_1101111: q = 4'b0010; - 11'b101_1110000: q = 4'b0000; - 11'b101_1110001: q = 4'b0000; - 11'b101_1110010: q = 4'b0000; - 11'b101_1110011: q = 4'b0000; - 11'b101_1110100: q = 4'b0000; - 11'b101_1110101: q = 4'b0000; - 11'b101_1110110: q = 4'b0000; - 11'b101_1110111: q = 4'b0000; - 11'b101_1111000: q = 4'b0000; - 11'b101_1111001: q = 4'b0000; - 11'b101_1111010: q = 4'b0000; - 11'b101_1111011: q = 4'b0000; - 11'b101_1111100: q = 4'b0000; - 11'b101_1111101: q = 4'b0000; - 11'b101_1111110: q = 4'b0000; - 11'b101_1111111: q = 4'b0000; - 11'b110_0000000: q = 4'b0000; - 11'b110_0000001: q = 4'b0000; - 11'b110_0000010: q = 4'b0000; - 11'b110_0000011: q = 4'b0000; - 11'b110_0000100: q = 4'b0000; - 11'b110_0000101: q = 4'b0000; - 11'b110_0000110: q = 4'b0000; - 11'b110_0000111: q = 4'b0000; - 11'b110_0001000: q = 4'b0000; - 11'b110_0001001: q = 4'b0000; - 11'b110_0001010: q = 4'b0000; - 11'b110_0001011: q = 4'b0000; - 11'b110_0001100: q = 4'b0000; - 11'b110_0001101: q = 4'b0000; - 11'b110_0001110: q = 4'b0000; - 11'b110_0001111: q = 4'b0000; - 11'b110_0010000: q = 4'b0100; - 11'b110_0010001: q = 4'b0100; - 11'b110_0010010: q = 4'b0100; - 11'b110_0010011: q = 4'b0100; - 11'b110_0010100: q = 4'b0100; - 11'b110_0010101: q = 4'b0100; - 11'b110_0010110: q = 4'b0100; - 11'b110_0010111: q = 4'b0100; - 11'b110_0011000: q = 4'b0100; - 11'b110_0011001: q = 4'b0100; - 11'b110_0011010: q = 4'b0100; - 11'b110_0011011: q = 4'b0100; - 11'b110_0011100: q = 4'b0100; - 11'b110_0011101: q = 4'b0100; - 11'b110_0011110: q = 4'b0100; - 11'b110_0011111: q = 4'b0100; - 11'b110_0100000: q = 4'b0100; - 11'b110_0100001: q = 4'b0100; - 11'b110_0100010: q = 4'b0100; - 11'b110_0100011: q = 4'b0100; - 11'b110_0100100: q = 4'b0100; - 11'b110_0100101: q = 4'b0100; - 11'b110_0100110: q = 4'b0100; - 11'b110_0100111: q = 4'b0100; - 11'b110_0101000: q = 4'b1000; - 11'b110_0101001: q = 4'b1000; - 11'b110_0101010: q = 4'b1000; - 11'b110_0101011: q = 4'b1000; - 11'b110_0101100: q = 4'b1000; - 11'b110_0101101: q = 4'b1000; - 11'b110_0101110: q = 4'b1000; - 11'b110_0101111: q = 4'b1000; - 11'b110_0110000: q = 4'b1000; - 11'b110_0110001: q = 4'b1000; - 11'b110_0110010: q = 4'b1000; - 11'b110_0110011: q = 4'b1000; - 11'b110_0110100: q = 4'b1000; - 11'b110_0110101: q = 4'b1000; - 11'b110_0110110: q = 4'b1000; - 11'b110_0110111: q = 4'b1000; - 11'b110_0111000: q = 4'b1000; - 11'b110_0111001: q = 4'b1000; - 11'b110_0111010: q = 4'b1000; - 11'b110_0111011: q = 4'b1000; - 11'b110_0111100: q = 4'b1000; - 11'b110_0111101: q = 4'b1000; - 11'b110_0111110: q = 4'b1000; - 11'b110_0111111: q = 4'b1000; - 11'b110_1000000: q = 4'b0001; - 11'b110_1000001: q = 4'b0001; - 11'b110_1000010: q = 4'b0001; - 11'b110_1000011: q = 4'b0001; - 11'b110_1000100: q = 4'b0001; - 11'b110_1000101: q = 4'b0001; - 11'b110_1000110: q = 4'b0001; - 11'b110_1000111: q = 4'b0001; - 11'b110_1001000: q = 4'b0001; - 11'b110_1001001: q = 4'b0001; - 11'b110_1001010: q = 4'b0001; - 11'b110_1001011: q = 4'b0001; - 11'b110_1001100: q = 4'b0001; - 11'b110_1001101: q = 4'b0001; - 11'b110_1001110: q = 4'b0001; - 11'b110_1001111: q = 4'b0001; - 11'b110_1010000: q = 4'b0001; - 11'b110_1010001: q = 4'b0001; - 11'b110_1010010: q = 4'b0001; - 11'b110_1010011: q = 4'b0001; - 11'b110_1010100: q = 4'b0010; - 11'b110_1010101: q = 4'b0010; - 11'b110_1010110: q = 4'b0010; - 11'b110_1010111: q = 4'b0010; - 11'b110_1011000: q = 4'b0010; - 11'b110_1011001: q = 4'b0010; - 11'b110_1011010: q = 4'b0010; - 11'b110_1011011: q = 4'b0010; - 11'b110_1011100: q = 4'b0010; - 11'b110_1011101: q = 4'b0010; - 11'b110_1011110: q = 4'b0010; - 11'b110_1011111: q = 4'b0010; - 11'b110_1100000: q = 4'b0010; - 11'b110_1100001: q = 4'b0010; - 11'b110_1100010: q = 4'b0010; - 11'b110_1100011: q = 4'b0010; - 11'b110_1100100: q = 4'b0010; - 11'b110_1100101: q = 4'b0010; - 11'b110_1100110: q = 4'b0010; - 11'b110_1100111: q = 4'b0010; - 11'b110_1101000: q = 4'b0010; - 11'b110_1101001: q = 4'b0010; - 11'b110_1101010: q = 4'b0010; - 11'b110_1101011: q = 4'b0010; - 11'b110_1101100: q = 4'b0010; - 11'b110_1101101: q = 4'b0010; - 11'b110_1101110: q = 4'b0010; - 11'b110_1101111: q = 4'b0010; - 11'b110_1110000: q = 4'b0000; - 11'b110_1110001: q = 4'b0000; - 11'b110_1110010: q = 4'b0000; - 11'b110_1110011: q = 4'b0000; - 11'b110_1110100: q = 4'b0000; - 11'b110_1110101: q = 4'b0000; - 11'b110_1110110: q = 4'b0000; - 11'b110_1110111: q = 4'b0000; - 11'b110_1111000: q = 4'b0000; - 11'b110_1111001: q = 4'b0000; - 11'b110_1111010: q = 4'b0000; - 11'b110_1111011: q = 4'b0000; - 11'b110_1111100: q = 4'b0000; - 11'b110_1111101: q = 4'b0000; - 11'b110_1111110: q = 4'b0000; - 11'b110_1111111: q = 4'b0000; - 11'b111_0000000: q = 4'b0000; - 11'b111_0000001: q = 4'b0000; - 11'b111_0000010: q = 4'b0000; - 11'b111_0000011: q = 4'b0000; - 11'b111_0000100: q = 4'b0000; - 11'b111_0000101: q = 4'b0000; - 11'b111_0000110: q = 4'b0000; - 11'b111_0000111: q = 4'b0000; - 11'b111_0001000: q = 4'b0000; - 11'b111_0001001: q = 4'b0000; - 11'b111_0001010: q = 4'b0000; - 11'b111_0001011: q = 4'b0000; - 11'b111_0001100: q = 4'b0000; - 11'b111_0001101: q = 4'b0000; - 11'b111_0001110: q = 4'b0000; - 11'b111_0001111: q = 4'b0000; - 11'b111_0010000: q = 4'b0100; - 11'b111_0010001: q = 4'b0100; - 11'b111_0010010: q = 4'b0100; - 11'b111_0010011: q = 4'b0100; - 11'b111_0010100: q = 4'b0100; - 11'b111_0010101: q = 4'b0100; - 11'b111_0010110: q = 4'b0100; - 11'b111_0010111: q = 4'b0100; - 11'b111_0011000: q = 4'b0100; - 11'b111_0011001: q = 4'b0100; - 11'b111_0011010: q = 4'b0100; - 11'b111_0011011: q = 4'b0100; - 11'b111_0011100: q = 4'b0100; - 11'b111_0011101: q = 4'b0100; - 11'b111_0011110: q = 4'b0100; - 11'b111_0011111: q = 4'b0100; - 11'b111_0100000: q = 4'b0100; - 11'b111_0100001: q = 4'b0100; - 11'b111_0100010: q = 4'b0100; - 11'b111_0100011: q = 4'b0100; - 11'b111_0100100: q = 4'b0100; - 11'b111_0100101: q = 4'b0100; - 11'b111_0100110: q = 4'b0100; - 11'b111_0100111: q = 4'b0100; - 11'b111_0101000: q = 4'b0100; - 11'b111_0101001: q = 4'b0100; - 11'b111_0101010: q = 4'b0100; - 11'b111_0101011: q = 4'b0100; - 11'b111_0101100: q = 4'b1000; - 11'b111_0101101: q = 4'b1000; - 11'b111_0101110: q = 4'b1000; - 11'b111_0101111: q = 4'b1000; - 11'b111_0110000: q = 4'b1000; - 11'b111_0110001: q = 4'b1000; - 11'b111_0110010: q = 4'b1000; - 11'b111_0110011: q = 4'b1000; - 11'b111_0110100: q = 4'b1000; - 11'b111_0110101: q = 4'b1000; - 11'b111_0110110: q = 4'b1000; - 11'b111_0110111: q = 4'b1000; - 11'b111_0111000: q = 4'b1000; - 11'b111_0111001: q = 4'b1000; - 11'b111_0111010: q = 4'b1000; - 11'b111_0111011: q = 4'b1000; - 11'b111_0111100: q = 4'b1000; - 11'b111_0111101: q = 4'b1000; - 11'b111_0111110: q = 4'b1000; - 11'b111_0111111: q = 4'b1000; - 11'b111_1000000: q = 4'b0001; - 11'b111_1000001: q = 4'b0001; - 11'b111_1000010: q = 4'b0001; - 11'b111_1000011: q = 4'b0001; - 11'b111_1000100: q = 4'b0001; - 11'b111_1000101: q = 4'b0001; - 11'b111_1000110: q = 4'b0001; - 11'b111_1000111: q = 4'b0001; - 11'b111_1001000: q = 4'b0001; - 11'b111_1001001: q = 4'b0001; - 11'b111_1001010: q = 4'b0001; - 11'b111_1001011: q = 4'b0001; - 11'b111_1001100: q = 4'b0001; - 11'b111_1001101: q = 4'b0001; - 11'b111_1001110: q = 4'b0001; - 11'b111_1001111: q = 4'b0001; - 11'b111_1010000: q = 4'b0001; - 11'b111_1010001: q = 4'b0001; - 11'b111_1010010: q = 4'b0010; - 11'b111_1010011: q = 4'b0010; - 11'b111_1010100: q = 4'b0010; - 11'b111_1010101: q = 4'b0010; - 11'b111_1010110: q = 4'b0010; - 11'b111_1010111: q = 4'b0010; - 11'b111_1011000: q = 4'b0010; - 11'b111_1011001: q = 4'b0010; - 11'b111_1011010: q = 4'b0010; - 11'b111_1011011: q = 4'b0010; - 11'b111_1011100: q = 4'b0010; - 11'b111_1011101: q = 4'b0010; - 11'b111_1011110: q = 4'b0010; - 11'b111_1011111: q = 4'b0010; - 11'b111_1100000: q = 4'b0010; - 11'b111_1100001: q = 4'b0010; - 11'b111_1100010: q = 4'b0010; - 11'b111_1100011: q = 4'b0010; - 11'b111_1100100: q = 4'b0010; - 11'b111_1100101: q = 4'b0010; - 11'b111_1100110: q = 4'b0010; - 11'b111_1100111: q = 4'b0010; - 11'b111_1101000: q = 4'b0010; - 11'b111_1101001: q = 4'b0010; - 11'b111_1101010: q = 4'b0010; - 11'b111_1101011: q = 4'b0010; - 11'b111_1101100: q = 4'b0010; - 11'b111_1101101: q = 4'b0010; - 11'b111_1101110: q = 4'b0010; - 11'b111_1101111: q = 4'b0010; - 11'b111_1110000: q = 4'b0000; - 11'b111_1110001: q = 4'b0000; - 11'b111_1110010: q = 4'b0000; - 11'b111_1110011: q = 4'b0000; - 11'b111_1110100: q = 4'b0000; - 11'b111_1110101: q = 4'b0000; - 11'b111_1110110: q = 4'b0000; - 11'b111_1110111: q = 4'b0000; - 11'b111_1111000: q = 4'b0000; - 11'b111_1111001: q = 4'b0000; - 11'b111_1111010: q = 4'b0000; - 11'b111_1111011: q = 4'b0000; - 11'b111_1111100: q = 4'b0000; - 11'b111_1111101: q = 4'b0000; - 11'b111_1111110: q = 4'b0000; - 11'b111_1111111: q = 4'b0000; - endcase diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 3e41c16c..4cd23488 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0) -`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN)) +`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 1)) module srt ( input logic clk, @@ -164,7 +164,7 @@ module srtpreproc ( assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); // Number of cycles of divider - assign dur = Int ? (intExp & {7{~intExp[6]}}) : (`DIVLEN + 2); + assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN + 2); endmodule ///////////////////////////////// From 77ea4e47cb7425186a98646684baa6d4db48cd7a Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 13 Jul 2022 15:01:38 -0700 Subject: [PATCH 18/36] removed minus 1 case in rounding --- pipelined/src/fpu/divsqrt.sv | 6 ++-- pipelined/src/fpu/fma.sv | 41 +++++++++++----------- pipelined/src/fpu/fmashiftcalc.sv | 7 ++-- pipelined/src/fpu/fpu.sv | 5 ++- pipelined/src/fpu/lzacorrection.sv | 3 +- pipelined/src/fpu/postprocess.sv | 7 ++-- pipelined/src/fpu/round.sv | 54 ++++++++++------------------- pipelined/src/fpu/srt-radix4.sv | 5 +-- pipelined/src/fpu/srtfsm.sv | 4 +-- pipelined/testbench/testbench-fp.sv | 8 ++--- 10 files changed, 60 insertions(+), 80 deletions(-) diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 8420baa1..7e240420 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -43,7 +43,6 @@ module divsqrt( input logic StallM, input logic StallE, output logic DivStickyM, - output logic DivNegStickyM, output logic DivBusy, output logic DivDone, output logic [`NE+1:0] DivCalcExpM, @@ -58,11 +57,12 @@ module divsqrt( logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] Dpreproc; logic [`DURLEN-1:0] Dur; + logic NegSticky; srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 57b053da..039876e9 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -70,20 +70,21 @@ module fma( /////////////////////////////////////////////////////////////////////////////// // Alignment shifter /////////////////////////////////////////////////////////////////////////////// - - align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, - .Am, .ZmSticky, .KillProd); - // calculate the signs and take the opperation into account sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, + .Ps, .As, .Am, .ZmSticky, .KillProd); + + + // /////////////////////////////////////////////////////////////////////////////// // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); + add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); - loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA}), .P(PmKilled), .NCnt); + loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -142,6 +143,7 @@ endmodule module align( + input logic As, Ps, input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [`NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero @@ -172,7 +174,7 @@ module align( // the 1'b0 before the added is because the product's mantissa has two bits before the binary point (xx.xxxxxxxxxx...) assign ZmPreshifted = {Zm,(3*`NF+5)'(0)}; - assign KillProd = ACnt[`NE+1]|XZero|YZero; + assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)); always_comb @@ -183,7 +185,7 @@ module align( // | 54'b0 | 106'b(product) | 2'b0 | // | addnend | if (KillProd) begin - ZmShifted = ZmPreshifted; + ZmShifted = {(`NF+3)'(0), Zm, (2*`NF+2)'(0)}; ZmSticky = ~(XZero|YZero); // If the addend is too small to effect the addition @@ -221,6 +223,7 @@ module add( input logic [2*`NF+1:0] Pm, // the product's mantissa input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic KillProd, // should the product be set to 0 + input logic ZmSticky, input logic XZero, YZero, // is the input zero output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed @@ -243,13 +246,14 @@ module add( assign AmInv = InvA ? {1'b1, ~Am} : {1'b0, Am}; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign PmKilled = Pm&{2*`NF+2{~KillProd}}; - - - // Do the addition // - calculate a positive and negitive sum in parallel - assign PreSum = {{`NF+3{1'b0}}, PmKilled, 2'b0} + AmInv + {{3*`NF+6{1'b0}}, InvA}; - assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b0} + {(3*`NF+7)'(4)}; + // Zsticky Psticky + // PreSum -1 = don't add 1 +1 = add 2 + // NegPreSum +1 = add 2 -1 = don't add 1 + // for NegPreSum the product is set to -1 whenever the product is killed, therefore add 1, 2 or 0 + assign PreSum = {{`NF+3{1'b0}}, PmKilled, 1'b0, InvA&ZmSticky&KillProd} + AmInv + {{3*`NF+6{1'b0}}, InvA&~((ZmSticky&~KillProd))}; + assign NegPreSum = {1'b0, Am} + {{`NF+3{1'b1}}, ~PmKilled, 2'b11} + {(3*`NF+5)'(0), ZmSticky&~KillProd, ~(ZmSticky)}; // Is the sum negitive assign NegSum = PreSum[3*`NF+6]; @@ -261,7 +265,7 @@ endmodule module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend - input logic [2*`NF+1:0] P, // product + input logic [2*`NF+3:0] P, // product output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result ); @@ -273,12 +277,9 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; assign G[3*`NF+6:2*`NF+4] = 0; assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; - assign T[2*`NF+3:2] = A[2*`NF+3:2]^P; - assign G[2*`NF+3:2] = A[2*`NF+3:2]&P; - assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P; - assign T[1:0] = A[1:0]; - assign G[1:0] = 0; - assign Z[1:0] = ~A[1:0]; + assign T[2*`NF+3:0] = A[2*`NF+3:0]^P; + assign G[2*`NF+3:0] = A[2*`NF+3:0]&P; + assign Z[2*`NF+3:0] = ~A[2*`NF+3:0]&~P; // Apply function to determine Leading pattern diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 3c286b50..ae974eb0 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -35,7 +35,6 @@ module fmashiftcalc( input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero - input logic ZDenorm, output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection @@ -54,7 +53,7 @@ module fmashiftcalc( // calculate the sum's exponent // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4 - assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin @@ -149,9 +148,9 @@ module fmashiftcalc( // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm&~FmaKillProd ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; + assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; - assign FmaShiftAmt = (FmaNCnt&{$clog2(3*`NF+7){~FmaKillProd}})+DenormShift; + assign FmaShiftAmt = FmaNCnt+DenormShift; endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index bd018253..5428481d 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -127,7 +127,6 @@ module fpu ( //divide signals logic [`QLEN-1:0] QuotM; logic [`NE+1:0] DivCalcExpE, DivCalcExpM; - logic DivNegStickyE, DivNegStickyM; logic DivStickyE, DivStickyM; logic DivDoneM; logic [`DURLEN-1:0] EarlyTermShiftM; @@ -288,7 +287,7 @@ module fpu ( // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), - .StallE, .StallM, .DivStickyM, .DivNegStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal + .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); // other FP execution units fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, @@ -384,7 +383,7 @@ module fpu ( postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), - .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .DivNegSticky(DivNegStickyM), + .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/lzacorrection.sv index 17db0c0b..eb9d3559 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/lzacorrection.sv @@ -37,7 +37,6 @@ module lzacorrection( input logic [`NE+1:0] DivDenormShift, input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection - input logic FmaKillProd, // is the product set to zero input logic FmaSZero, output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction output logic [`NE+1:0] DivCorrExp, @@ -59,7 +58,7 @@ module lzacorrection( assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~FmaKillProd}+{{`NE{1'b0}}, LZAPlus2&~FmaKillProd, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm&~FmaKillProd}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]&~FmaKillProd}) & {`NE+2{~(FmaSZero|ResDenorm)}}; + assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 30945532..d7fcb2a0 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -56,7 +56,6 @@ module postprocess ( //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, input logic DivSticky, - input logic DivNegSticky, input logic DivDone, input logic [`NE+1:0] DivCalcExp, input logic [`QLEN-1:0] Quot, @@ -153,7 +152,7 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, - .ZDenorm, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb @@ -183,7 +182,7 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - lzacorrection lzacorrection(.FmaOp, .FmaKillProd, .FmaPreResultDenorm, .FmaConvNormSumExp, + lzacorrection lzacorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); @@ -203,7 +202,7 @@ module postprocess ( round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, - .DivSticky, .DivNegSticky, .DivDone, + .DivSticky, .DivDone, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); /////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index c73edc08..e2b9cb3e 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -55,7 +55,6 @@ module round( input logic [`NE:0] CvtCe, // the calculated expoent input logic [`NE+1:0] DivCorrExp, // the calculated expoent input logic DivSticky, // sticky bit - input logic DivNegSticky, output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction @@ -67,7 +66,6 @@ module round( output logic R, UfLSBRes // bits needed to calculate rounding ); logic LSBRes; // bit used for rounding - least significant bit of the normalized sum - logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result logic NormSumSticky; // normalized sum's sticky bit logic UfSticky; // sticky bit for underlow calculation @@ -254,40 +252,25 @@ module round( assign S = UfSticky | UfRound; - // Deterimine if a small number was supposed to be subtrated - // - for FMA or if division has a negitive sticky bit - assign SubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~(NormSumSticky|UfRound); - assign UfSubBySmallNum = ((FmaZmSticky&FmaOp&~ZZero&FmaInvA) | (DivNegSticky&DivOp)) & ~NormSumSticky; - - always_comb begin // Determine if you add 1 case (Frm) - 3'b000: CalcPlus1 = R & ((S| LSBRes)&~SubBySmallNum);//round to nearest even + 3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = Nsgn & ~(SubBySmallNum & ~R);//round down - 3'b011: CalcPlus1 = ~Nsgn & ~(SubBySmallNum & ~R);//round up - 3'b100: CalcPlus1 = R & ~SubBySmallNum;//round to nearest max magnitude + 3'b010: CalcPlus1 = Nsgn;//round down + 3'b011: CalcPlus1 = ~Nsgn;//round up + 3'b100: CalcPlus1 = R;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (Frm) - 3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even + 3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = Nsgn & ~(UfSubBySmallNum & ~UfRound);//round down - 3'b011: UfCalcPlus1 = ~Nsgn & ~(UfSubBySmallNum & ~UfRound);//round up - 3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude + 3'b010: UfCalcPlus1 = Nsgn;//round down + 3'b011: UfCalcPlus1 = ~Nsgn;//round up + 3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase - // Determine if you subtract 1 - case (Frm) - 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~R;//round to zero - 3'b010: CalcMinus1 = ~Nsgn & ~R & SubBySmallNum;//round down - 3'b011: CalcMinus1 = Nsgn & ~R & SubBySmallNum;//round up - 3'b100: CalcMinus1 = 0;//round to nearest max magnitude - default: CalcMinus1 = 1'bx; - endcase end @@ -295,26 +278,25 @@ module round( assign Plus1 = CalcPlus1 & (S | R); assign FpPlus1 = Plus1&~(ToInt&CvtOp); assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky - assign Minus1 = CalcMinus1 & (S | R); // Compute rounded result if (`FPSIZES == 1) begin - assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1}; + assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; end else if (`FPSIZES == 2) begin // \/FLEN+1 // | NE+2 | NF | // '-NE+2-^----NF1----^ // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 - assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} : - Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; + assign RoundAdd = OutFmt ? {{{`FLEN{1'b0}}}, FpPlus1} : + {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; end else if (`FPSIZES == 3) begin always_comb begin case (OutFmt) - `FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1}; - `FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; - `FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)}; + `FMT: RoundAdd = {{{`FLEN{1'b0}}}, FpPlus1}; + `FMT1: RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; + `FMT2: RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)}; default: RoundAdd = (`FLEN+1)'(0); endcase end @@ -322,10 +304,10 @@ module round( end else if (`FPSIZES == 4) begin always_comb begin case (OutFmt) - 2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1}; - 2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)}; - 2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)}; - 2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)}; + 2'h3: RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; + 2'h1: RoundAdd = {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)}; + 2'h0: RoundAdd = {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)}; + 2'h2: RoundAdd = {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)}; endcase end diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv index 5a7e96e2..b1bf6f56 100644 --- a/pipelined/src/fpu/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -40,6 +40,7 @@ module srtradix4( input logic [`DIVLEN-1:0] X, input logic [`DIVLEN-1:0] Dpreproc, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, output logic [`QLEN-1:0] Quot, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] FirstWS, FirstWC, @@ -106,9 +107,9 @@ module srtradix4( // if starting a new divison set Q to 0 and QM to -1 mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); - flop #(`QLEN) QMreg(clk, QMMux, QM[0]); + flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - assign Quot = Q[0]; + assign Quot = NegSticky ? QM[0] : Q[0]; assign FirstWS = WS[0]; assign FirstWC = WC[0]; diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 21e35c36..481b1b22 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -44,7 +44,7 @@ module srtfsm( output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivStickyE, output logic DivDone, - output logic DivNegStickyE, + output logic NegSticky, output logic DivBusy ); @@ -62,7 +62,7 @@ module srtfsm( assign DivStickyE = |W; assign DivDone = (state == DONE); assign W = WC+WS; - assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? + assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? assign EarlyTermShiftE = step; always_ff @(posedge clk) begin diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 2aec1ab1..033045e7 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -681,7 +681,7 @@ module testbenchfp; postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp), .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), - .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .DivNegSticky, + .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, @@ -697,8 +697,8 @@ module testbenchfp; .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), - .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), + .XInfE(XInf), .YInfE(YInf), .NegSticky(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); + srtradix4 srtradix4(.clk, .FmtE(ModFmt), .NegSticky(DivNegSticky), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); assign CmpFlg[3:0] = 0; @@ -854,7 +854,7 @@ end // check if result is correct // - wait till the division result is done or one extra cylcle for early termination (to simulate the EM pipline stage) - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~(DivBusy|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&~((DivBusy===1'b1)|DivStart)&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); From 7e163e22a3eb17a876f574089ec8722cc2e1140e Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 13 Jul 2022 15:28:22 -0700 Subject: [PATCH 19/36] some code cleanup --- pipelined/src/fpu/fma.sv | 11 ++++------- pipelined/src/fpu/postprocess.sv | 7 +++---- pipelined/src/fpu/round.sv | 31 ++++++------------------------- 3 files changed, 13 insertions(+), 36 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 039876e9..44cd3616 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -51,7 +51,6 @@ module fma( logic [3*`NF+5:0] Am; // addend aligned's mantissa for addition in U(NF+5.2NF+1) logic [3*`NF+6:0] AmInv; // aligned addend's mantissa possibly inverted logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed - logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum /////////////////////////////////////////////////////////////////////////////// // Calculate the product // - When multipliying two fp numbers, add the exponents @@ -74,7 +73,7 @@ module fma( sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, - .Ps, .As, .Am, .ZmSticky, .KillProd); + .Am, .ZmSticky, .KillProd); @@ -82,7 +81,7 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .PreSum, .NegPreSum, .InvA, .XZero, .YZero, .Sm); + add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm); loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -143,7 +142,6 @@ endmodule module align( - input logic As, Ps, input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format input logic [`NF:0] Zm, // significand in U(0.NF) format] input logic XZero, YZero, ZZero, // is the input zero @@ -224,14 +222,13 @@ module add( input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic KillProd, // should the product be set to 0 input logic ZmSticky, - input logic XZero, YZero, // is the input zero output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend - output logic [3*`NF+5:0] Sm, // the positive sum - output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum + output logic [3*`NF+5:0] Sm // the positive sum ); + logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum /////////////////////////////////////////////////////////////////////////////// // Addition diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index d7fcb2a0..3060e51d 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -84,7 +84,6 @@ module postprocess ( logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) logic R; // bits needed to determine rounding - logic [`FLEN:0] RoundAdd; // how much to add to the result logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result @@ -200,10 +199,10 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, .Xs, .Ys, .CvtCs, .Nsgn); - round round(.OutFmt, .Frm, .S, .FmaZmSticky, .ZZero, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, - .FmaInvA, .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, + round round(.OutFmt, .Frm, .S, .FmaZmSticky, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, + .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, .DivSticky, .DivDone, - .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .RoundAdd, .UfLSBRes, .Nexp); + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfLSBRes, .Nexp); /////////////////////////////////////////////////////////////////////////////// // Sign calculation diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index e2b9cb3e..38bacce0 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -48,8 +48,6 @@ module round( input logic CvtResUf, input logic [`CORRSHIFTSZ-1:0] Nfrac, input logic FmaZmSticky, // addend's sticky bit - input logic ZZero, // is Z zero - input logic FmaInvA, // invert Z input logic [`NE+1:0] FmaSe, // exponent of the normalized sum input logic Nsgn, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -62,11 +60,10 @@ module round( output logic S, // sticky bit output logic [`NE+1:0] Nexp, output logic Plus1, - output logic [`FLEN:0] RoundAdd, // how much to add to the result output logic R, UfLSBRes // bits needed to calculate rounding ); logic LSBRes; // bit used for rounding - least significant bit of the normalized sum - logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result + logic UfCalcPlus1; logic NormSumSticky; // normalized sum's sticky bit logic UfSticky; // sticky bit for underlow calculation logic [`NF-1:0] RoundFrac; @@ -74,6 +71,7 @@ module round( logic UfRound; logic FpRound, FpLSBRes, FpUfRound; logic CalcPlus1, FpPlus1; + logic [`FLEN:0] RoundAdd; // how much to add to the result /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -288,30 +286,13 @@ module round( // | NE+2 | NF | // '-NE+2-^----NF1----^ // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 - assign RoundAdd = OutFmt ? {{{`FLEN{1'b0}}}, FpPlus1} : - {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; + assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt}; end else if (`FPSIZES == 3) begin - always_comb begin - case (OutFmt) - `FMT: RoundAdd = {{{`FLEN{1'b0}}}, FpPlus1}; - `FMT1: RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)}; - `FMT2: RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)}; - default: RoundAdd = (`FLEN+1)'(0); - endcase - end + assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)}; - end else if (`FPSIZES == 4) begin - always_comb begin - case (OutFmt) - 2'h3: RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; - 2'h1: RoundAdd = {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)}; - 2'h0: RoundAdd = {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)}; - 2'h2: RoundAdd = {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)}; - endcase - end - - end + end else if (`FPSIZES == 4) + assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; // determine the result to be roundned assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; From 7629173b152f6ab6d12c7a6070c5db09bbb61d3d Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 13 Jul 2022 22:42:39 +0000 Subject: [PATCH 20/36] DIVLEN and counter updated for sqrt computation and rounding --- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/srt/srt-waves.do | 2 +- pipelined/srt/srt.sv | 35 +++++++++---------------- 3 files changed, 14 insertions(+), 25 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index ad52be2e..73810c68 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -103,7 +103,7 @@ // division constants `define RADIX 32'h4 `define DIVCOPIES 32'h4 -`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 1)) +`define DIVLEN ((`NF < `XLEN) ? (`XLEN + 2) : (`NF + 2)) `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) diff --git a/pipelined/srt/srt-waves.do b/pipelined/srt/srt-waves.do index 340c5b1f..1e0c3f28 100644 --- a/pipelined/srt/srt-waves.do +++ b/pipelined/srt/srt-waves.do @@ -1,5 +1,5 @@ add wave -noupdate /testbench/* add wave -noupdate /testbench/srt/* -add wave -noupdate /testbench/srt/otfc2/* +add wave -noupdate /testbench/srt/sotfc2/* add wave -noupdate /testbench/srt/preproc/* add wave -noupdate /testbench/srt/divcounter/* diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 4cd23488..74ce48cd 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -29,8 +29,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" -`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 0) -`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 1)) +`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF + 2) : 2) +`define EXTRAINTBITS ((`NF<(`XLEN)) ? 2 : (`NF - `XLEN + 2)) module srt ( input logic clk, @@ -49,7 +49,7 @@ module srt ( input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic rsign, done, - output logic [`DIVLEN-1:0] Rem, Quot, // *** later handle integers + output logic [`DIVLEN-3:0] Rem, Quot, // *** later handle integers output logic [`NE-1:0] rExp, output logic [3:0] Flags ); @@ -164,7 +164,7 @@ module srtpreproc ( assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); // Number of cycles of divider - assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN + 2); + assign dur = Int ? (intExp & {7{~intExp[6]}}) : (7)'(`DIVLEN); endmodule ///////////////////////////////// @@ -226,26 +226,16 @@ endmodule /////////////////////////////////// // On-The-Fly Converter, Radix 2 // /////////////////////////////////// -module otfc2 #(parameter N=64) ( +module otfc2 #(parameter N=66) ( input logic clk, input logic Start, input logic qp, qz, qn, - output logic [N-1:0] r + output logic [N-3:0] r ); - // The on-the-fly converter transfers the quotient - // bits to the quotient as they come. - // - // This code follows the psuedocode presented in the - // floating point chapter of the book. Right now, - // it is written for Radix-2 division. - // - // QM is Q-1. It allows us to write negative bits - // without using a costly CPA. + // bits to the quotient as they come. + // Use this otfc for division only. logic [N+2:0] Q, QM, QNext, QMNext, QMMux; - // QR and QMR are the shifted versions of Q and QM. - // They are treated as [N-1:r] size signals, and - // discard the r most significant bits of Q and QM. logic [N+1:0] QR, QMR; flopr #(N+3) Qreg(clk, Start, QNext, Q); @@ -266,7 +256,7 @@ module otfc2 #(parameter N=64) ( QMNext = {QMR, 1'b0}; end end - assign r = Q[N+2] ? Q[N+1:2] : Q[N:1]; + assign r = Q[N] ? Q[N-1:2] : Q[N-2:1]; endmodule @@ -278,13 +268,12 @@ module sotfc2( input logic Start, input logic sp, sn, input logic [`DIVLEN+3:0] C, - output logic [`DIVLEN-1:0] Sq, + output logic [`DIVLEN-3:0] Sq, output logic [`DIVLEN+3:0] F ); - - // The on-the-fly converter transfers the square root // bits to the quotient as they come. + // Use this otfc for division and square root. logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); @@ -303,7 +292,7 @@ module sotfc2( SMNext = SM | ((C << 2) & ~(C << 1)); end end - assign Sq = S[`DIVLEN-1:0]; + assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:2] : S[`DIVLEN-2:1]; fsel2 fsel(sp, sn, C, S, SM, F); From e5a8ac2a442c1d2afea46a56ddf9eea8b9d43029 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 13 Jul 2022 23:44:54 +0000 Subject: [PATCH 21/36] renamed a file to fit diagram --- pipelined/src/fpu/postprocess.sv | 2 +- pipelined/src/fpu/{lzacorrection.sv => shiftcorrection.sv} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename pipelined/src/fpu/{lzacorrection.sv => shiftcorrection.sv} (99%) diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 3060e51d..bc9c46a2 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -181,7 +181,7 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - lzacorrection lzacorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); diff --git a/pipelined/src/fpu/lzacorrection.sv b/pipelined/src/fpu/shiftcorrection.sv similarity index 99% rename from pipelined/src/fpu/lzacorrection.sv rename to pipelined/src/fpu/shiftcorrection.sv index eb9d3559..f12cb831 100644 --- a/pipelined/src/fpu/lzacorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -28,7 +28,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" -module lzacorrection( +module shiftcorrection( input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction input logic FmaOp, input logic DivOp, From 8506d2be4cb66eec64e177bcb7ca98ac485a3cbd Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 14 Jul 2022 00:01:07 +0000 Subject: [PATCH 22/36] fixed uncommented line in makefile --- tests/riscof/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index 621a5b54..af67a535 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -8,7 +8,7 @@ wally_workdir = $(work)/wally-riscv-arch-test current_dir = $(shell pwd) XLEN ?= 64 -all: root build_arch build_wally memfile +all: root build_arch #build_wally memfile root: mkdir -p $(work_dir) From f49c2a969f3b7537a3fe189f2ab3f1941ae05987 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 14 Jul 2022 00:39:30 +0000 Subject: [PATCH 23/36] S and SM are updating but are not correct yet --- pipelined/srt/srt.sv | 12 ++++++------ pipelined/srt/testbench.sv | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 74ce48cd..b87fabfe 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -215,7 +215,7 @@ module fsel2 ( // Generate for both positive and negative bits assign FP = ~S & C; assign FN = SM | (C & (~C << 2)); - assign FZ = {(`DIVLEN+4){1'B0}}; + assign FZ = {(`DIVLEN+4){1'b0}}; // Choose which adder input will be used @@ -276,20 +276,20 @@ module sotfc2( // Use this otfc for division and square root. logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; - flopr #(`DIVLEN+4) Sreg(clk, Start, SMNext, SM); + flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux); - flop #(`DIVLEN+4) SMreg(clk, SMux, S); + flop #(`DIVLEN+4) Sreg(clk, SMux, S); always_comb begin if (sp) begin - SNext = S | ((C << 2) & ~(C << 1)); + SNext = S | ((C << 1) & ~(C << 2)); SMNext = S; end else if (sn) begin - SNext = SM | ((C << 2) & ~(C << 1)); + SNext = SM | ((C << 1) & ~(C << 2)); SMNext = SM; end else begin // If sp and sn are not true, then sz is SNext = S; - SMNext = SM | ((C << 2) & ~(C << 1)); + SMNext = SM | ((C << 1) & ~(C << 2)); end end assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:2] : S[`DIVLEN-2:1]; diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index b83e6b00..02cd0bca 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -72,7 +72,7 @@ module testbench; // Equip Int test or Sqrt test assign Int = 1'b0; - assign Sqrt = 1'b0; + assign Sqrt = 1'b1; // Divider srt srt(.clk, .Start(req), @@ -155,7 +155,7 @@ module testbench; req <= #5 1; diffp = correctr[51:0] - r; diffn = r - correctr[51:0]; - if (rExp !== correctr[62:52]) // check if accurate to 1 ulp + if ((rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp begin errors = errors + 1; $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp); From b069cfbec2201566fbb02bd0c532362e05e12101 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 14 Jul 2022 18:16:00 +0000 Subject: [PATCH 24/36] fixed error in divsqrt --- pipelined/config/shared/wally-shared.vh | 8 ++++---- pipelined/regression/sim-testfloat | 4 ++-- pipelined/regression/sim-testfloat-batch | 2 ++ pipelined/regression/wave-fpu.do | 18 +++++++++--------- pipelined/src/fpu/divshiftcalc.sv | 2 +- pipelined/src/fpu/divsqrt.sv | 4 ++-- pipelined/testbench/testbench-fp.sv | 9 ++++----- 7 files changed, 24 insertions(+), 23 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 73810c68..5dc008bb 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -97,14 +97,14 @@ `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) -`define NORMSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+9)) -`define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) +`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+3) : (3*`NF+9)) +`define CORRSHIFTSZ ((`DIVRESLEN+`NF+3) > (3*`NF+8) ? (`DIVRESLEN+`NF+3) : (3*`NF+6)) // division constants `define RADIX 32'h4 `define DIVCOPIES 32'h4 -`define DIVLEN ((`NF < `XLEN) ? (`XLEN + 2) : (`NF + 2)) -`define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) +`define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) +`define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) `define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)) diff --git a/pipelined/regression/sim-testfloat b/pipelined/regression/sim-testfloat index 18f174a9..25fe09a1 100755 --- a/pipelined/regression/sim-testfloat +++ b/pipelined/regression/sim-testfloat @@ -6,7 +6,7 @@ # fma - test fma # sub - test subtraction # div - test division -# sqrt - test square ro +# sqrt - test square root # all - test everything -vsim -do "do testfloat.do rv64fp mul" +vsim -do "do testfloat.do rv64fp $1" diff --git a/pipelined/regression/sim-testfloat-batch b/pipelined/regression/sim-testfloat-batch index f1178f1d..c7f28a55 100755 --- a/pipelined/regression/sim-testfloat-batch +++ b/pipelined/regression/sim-testfloat-batch @@ -1,7 +1,9 @@ + # cvtint - test integer conversion unit (fcvtint) # cvtfp - test floating-point conversion unit (fcvtfp) # cmp - test comparison unit's LT, LE, EQ opperations (fcmp) # add - test addition +# fma - test fma # sub - test subtraction # div - test division # sqrt - test square root diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 9a3d7e06..9caf75de 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -9,23 +9,23 @@ add wave -noupdate /testbenchfp/Res add wave -noupdate /testbenchfp/Ans add wave -noupdate /testbenchfp/DivStart add wave -noupdate /testbenchfp/DivBusy -add wave -noupdate /testbenchfp/srtfsm/state +add wave -noupdate /testbenchfp/divsqrt/srtfsm/state add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/specialcase/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* -add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/shiftcorrection/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/* -add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* -add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* -add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/qsel4/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/otfc4/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/expcalc/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index a4f3feff..3d31d863 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -35,7 +35,7 @@ module divshiftcalc( // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)}; - // *** may be able to reduce shifter size + // *** QLEN can be changed to DIVLEN if we figure out what divLEN is - chenge normshiftsize definifion assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 7e240420..91e07b08 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -59,10 +59,10 @@ module divsqrt( logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.XManE, .Dur, .YManE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); + .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 033045e7..1493903e 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -695,11 +695,10 @@ module testbenchfp; fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), - .XInfE(XInf), .YInfE(YInf), .NegSticky(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .NegSticky(DivNegSticky), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), - .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); + divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp), + .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), + .StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp), + .EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone); assign CmpFlg[3:0] = 0; From 38bbd19abfaa7cda7393c8dc6d3bc727fc2872b8 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 14 Jul 2022 19:38:27 +0000 Subject: [PATCH 25/36] Six tests passing and a bunch of sizizing issues fixed --- pipelined/config/shared/wally-shared.vh | 2 ++ pipelined/srt/sqrttestgen.c | 15 +++++++++--- pipelined/srt/srt.sv | 8 +++---- pipelined/srt/testbench.sv | 32 ++++++++++++------------- 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 5dc008bb..1237ef18 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -104,6 +104,8 @@ `define RADIX 32'h4 `define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) +`define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) +`define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c index 710fc32f..07f34c3c 100644 --- a/pipelined/srt/sqrttestgen.c +++ b/pipelined/srt/sqrttestgen.c @@ -30,11 +30,11 @@ void main(void) FILE *fptr; double aFrac, rFrac; int aExp, rExp; - double mans[ENTRIES] = {1, 1.5, 1.25, 1.125, 1.0625, + double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625, 1.75, 1.875, 1.99999, 1.1, 1.2, 1.01, 1.001, 1.0001, 2/1.1, 2/1.5, 2/1.25, 2/1.125}; - double exps[ENTRIES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; int i; int bias = 1023; @@ -47,10 +47,19 @@ void main(void) for (i=0; i Date: Thu, 14 Jul 2022 21:19:45 +0000 Subject: [PATCH 26/36] Square root --- pipelined/srt/sqrttestgen.c | 4 ++-- pipelined/srt/srt.sv | 10 +++++----- pipelined/srt/testbench.sv | 3 +-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c index 07f34c3c..76c6a664 100644 --- a/pipelined/srt/sqrttestgen.c +++ b/pipelined/srt/sqrttestgen.c @@ -32,9 +32,9 @@ void main(void) int aExp, rExp; double mans[ENTRIES] = {1, 1849.0/1024, 1.25, 1.125, 1.0625, 1.75, 1.875, 1.99999, - 1.1, 1.2, 1.01, 1.001, 1.0001, + 1.1, 1.5, 1.01, 1.001, 1.0001, 2/1.1, 2/1.5, 2/1.25, 2/1.125}; - double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, + double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16}; int i; int bias = 1023; diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 8e143efb..5dcf7e96 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -75,7 +75,7 @@ module srt ( // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz, qn); + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], qp, qz, qn); flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(1) signflop(clk, Start, calcSign, rsign); @@ -169,11 +169,11 @@ endmodule // Quotient Selection, Radix 2 // ///////////////////////////////// module qsel2 ( // *** eventually just change to 4 bits - input logic [`DIVLEN+3:`DIVLEN] ps, pc, + input logic [`DIVLEN+3:`DIVLEN-1] ps, pc, output logic qp, qz, qn ); - logic [`DIVLEN+3:`DIVLEN] p, g; + logic [`DIVLEN+3:`DIVLEN-1] p, g; logic magnitude, sign, cout; // The quotient selection logic is presented for simplicity, not @@ -184,8 +184,8 @@ module qsel2 ( // *** eventually just change to 4 bits assign p = ps ^ pc; assign g = ps & pc; - assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); - assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); + assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]); + assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (p[`DIVLEN] & g[`DIVLEN-1])))); assign #1 sign = p[`DIVLEN+3] ^ cout; /* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & (ps[52]^pc[52])); diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index bbb6dee2..39696af4 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -160,10 +160,9 @@ module testbench; errors = errors + 1; $display("result was %h, should be %h %h %h\n", r, correctr, diffn, diffp); $display("failed\n"); - $stop; end if (afrac === 52'hxxxxxxxxxxxxx) begin - $display("%d Tests completed successfully", testnum); + $display("%d Tests completed successfully", testnum-errors); $stop; end end end From ec9536f983e5229b5e2d48ce5df4de9a46903a58 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 14 Jul 2022 22:52:09 +0000 Subject: [PATCH 27/36] Square root radix 2 working, does not work with division --- pipelined/srt/srt.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 5dcf7e96..949335bf 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -75,7 +75,7 @@ module srt ( // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], qp, qz, qn); + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], Sqrt, qp, qz, qn); flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(1) signflop(clk, Start, calcSign, rsign); @@ -170,6 +170,7 @@ endmodule ///////////////////////////////// module qsel2 ( // *** eventually just change to 4 bits input logic [`DIVLEN+3:`DIVLEN-1] ps, pc, + input logic Sqrt, output logic qp, qz, qn ); @@ -185,7 +186,7 @@ module qsel2 ( // *** eventually just change to 4 bits assign g = ps & pc; assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]); - assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (p[`DIVLEN] & g[`DIVLEN-1])))); + assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (Sqrt & (p[`DIVLEN] & g[`DIVLEN-1]))))); assign #1 sign = p[`DIVLEN+3] ^ cout; /* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & (ps[52]^pc[52])); From e2510222696dbb3c43b26bcc42365218861db961 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 15 Jul 2022 20:16:59 +0000 Subject: [PATCH 28/36] merged floating-point radix-2 divider with radix-4 --- pipelined/config/shared/wally-shared.vh | 11 +- pipelined/regression/sim-wally | 2 +- pipelined/regression/wave-fpu.do | 18 +- pipelined/src/fpu/divshiftcalc.sv | 8 +- pipelined/src/fpu/divsqrt.sv | 11 +- pipelined/src/fpu/fpu.sv | 2 +- pipelined/src/fpu/postprocess.sv | 2 +- pipelined/src/fpu/shiftcorrection.sv | 6 +- pipelined/src/fpu/srt-radix4.sv | 359 ------------------------ pipelined/src/fpu/srtfsm.sv | 14 +- pipelined/src/fpu/srtpreproc.sv | 10 +- pipelined/testbench/testbench-fp.sv | 2 +- 12 files changed, 52 insertions(+), 393 deletions(-) delete mode 100644 pipelined/src/fpu/srt-radix4.sv diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 1237ef18..015ef261 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -97,19 +97,20 @@ `define CVTLEN ((`NF<`XLEN) ? (`XLEN) : (`NF)) `define LLEN ((`FLEN<`XLEN) ? (`XLEN) : (`FLEN)) `define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) -`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+3) : (3*`NF+9)) -`define CORRSHIFTSZ ((`DIVRESLEN+`NF+3) > (3*`NF+8) ? (`DIVRESLEN+`NF+3) : (3*`NF+6)) +`define NORMSHIFTSZ ((`QLEN+`NF+3) > (3*`NF+8) ? (`QLEN+`NF+1) : (3*`NF+9)) +`define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h4 -`define DIVCOPIES 32'h4 +`define RADIX 32'h2 +`define DIVCOPIES 32'h1 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) `define EXTRAINTBITS ((`NF<(`XLEN)) ? 0 : (`NF - `XLEN + 3)) `define DIVRESLEN ((`NF>`XLEN) ? `NF+4 : `XLEN) `define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) -`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)) +// one interation is required for the integer bit for minimally redundent radix-4 +`define FPDUR ((`DIVLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) diff --git a/pipelined/regression/sim-wally b/pipelined/regression/sim-wally index 06985148..51c8b3ed 100755 --- a/pipelined/regression/sim-wally +++ b/pipelined/regression/sim-wally @@ -1,2 +1,2 @@ -vsim -do "do wally-pipelined.do rv32gc arch32i" +vsim -do "do wally-pipelined.do rv64gc arch64d" diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 9caf75de..98c72f17 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -20,12 +20,20 @@ add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/divshiftcalc/* add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* -add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/qsel4/* -add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srtradix4/genblk1[0]/divinteration/otfc4/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WC +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WS +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WCA +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/WSA +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/Q +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QM +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QNext +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/QMNext +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/* +add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/* +# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/* +# add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* -add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtradix4/expcalc/* +add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index 3d31d863..af321b25 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,7 +1,7 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`QLEN-1:0] Quot, + input logic [`QLEN-1-(`RADIX/4):0] Quot, input logic [`FMTBITS-1:0] Fmt, input logic [`DURLEN-1:0] DivEarlyTermShift, input logic [`NE+1:0] DivCalcExp, @@ -30,12 +30,12 @@ module divshiftcalc( // 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) // inital Left shift amount = NF + // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit assign NormShift = (`NE+2)'(`NF); // if the shift amount is negitive then dont shift (keep sticky bit) // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) - assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, ($clog2(`LOGR*`DIVCOPIES))'(0)}; + assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; - // *** QLEN can be changed to DIVLEN if we figure out what divLEN is - chenge normshiftsize definifion - assign DivShiftIn = {{`NF-1{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+1-`NF{1'b0}}}; + assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index 91e07b08..cbf7f95f 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -47,22 +47,23 @@ module divsqrt( output logic DivDone, output logic [`NE+1:0] DivCalcExpM, output logic [`DURLEN-1:0] EarlyTermShiftM, - output logic [`QLEN-1:0] QuotM + output logic [`QLEN-1-(`RADIX/4):0] QuotM // output logic [`XLEN-1:0] RemM, ); logic [`DIVLEN+3:0] NextWSN, NextWCN; logic [`DIVLEN+3:0] WS, WC; + logic [`DIVLEN+3:0] StickyWSA; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVLEN-1:0] X; logic [`DIVLEN-1:0] Dpreproc; logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.XManE, .Dur, .YManE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, - .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, - .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); + .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); + srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 5428481d..1bbd0aea 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -125,7 +125,7 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`QLEN-1:0] QuotM; + logic [`QLEN-1-(`RADIX/4):0] QuotM; logic [`NE+1:0] DivCalcExpE, DivCalcExpM; logic DivStickyE, DivStickyM; logic DivDoneM; diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index bc9c46a2..e0eb50ac 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -58,7 +58,7 @@ module postprocess ( input logic DivSticky, input logic DivDone, input logic [`NE+1:0] DivCalcExp, - input logic [`QLEN-1:0] Quot, + input logic [`QLEN-1-(`RADIX/4):0] Quot, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index f12cb831..ecfd9ba0 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -43,7 +43,7 @@ module shiftcorrection( output logic [`NE+1:0] FmaSe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction - logic [`CORRSHIFTSZ:0] CorrQuotShifted; + logic [`CORRSHIFTSZ-1:0] CorrQuotShifted; logic ResDenorm; // is the result denormalized logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction @@ -53,9 +53,9 @@ module shiftcorrection( // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) - assign CorrQuotShifted = {LZAPlus2|(DivCalcExp==1&~LZAPlus2) ? Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ] : {Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ], 1'b0}, 1'b0}; + assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits - assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted[`CORRSHIFTSZ-1:0] : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv deleted file mode 100644 index b1bf6f56..00000000 --- a/pipelined/src/fpu/srt-radix4.sv +++ /dev/null @@ -1,359 +0,0 @@ -/////////////////////////////////////////// -// srt.sv -// -// Written: David_Harris@hmc.edu, me@KatherineParry.com, Cedar Turek -// Modified:13 January 2022 -// -// Purpose: Combined Divide and Square Root Floating Point and Integer Unit -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module srtradix4( - input logic clk, - input logic DivStart, - input logic DivBusy, - input logic [`FMTBITS-1:0] FmtE, - input logic [`NE-1:0] XExpE, YExpE, - input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, - output logic [`QLEN-1:0] Quot, - output logic [`DIVLEN+3:0] NextWSN, NextWCN, - output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] DivCalcExpM, - output logic [`XLEN-1:0] Rem -); - - - /* verilator lint_off UNOPTFLAT */ - logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; - logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; - logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; - /* verilator lint_on UNOPTFLAT */ - logic [`DIVLEN+3:0] WSN, WCN; - logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [`NE+1:0] DivCalcExp; - logic [$clog2(`XLEN+1)-1:0] intExp; - logic intSign; - logic [`QLEN-1:0] QMMux; - - // Top Muxes and Registers - // When start is asserted, the inputs are loaded into the divider. - // Otherwise, the divisor is retained and the partial remainder - // is fed back for the next iteration. - // - when the start signal is asserted X and 0 are loaded into WS and WC - // - otherwise load WSA into the flipflop - // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) - // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; - assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; - mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); - flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]); - mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); - flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]); - flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); - flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); - - - // Divisor Selections - // - choose the negitive version of what's being selected - assign DBar = ~D; - assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; - assign D2 = {D[`DIVLEN+2:0], 1'b0}; - - genvar i; - generate - for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin - divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, - .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); - if(i<(`DIVCOPIES-1)) begin - assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; - assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; - assign Q[i+1] = QNext[i]; - assign QM[i+1] = QMNext[i]; - end - end - endgenerate - - // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); - flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); - flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - - assign Quot = NegSticky ? QM[0] : Q[0]; - assign FirstWS = WS[0]; - assign FirstWC = WC[0]; - - expcalc expcalc(.FmtE, .XExpE, .YExpE, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); - -endmodule - -//////////////// -// Submodules // -//////////////// - - /* verilator lint_off UNOPTFLAT */ -module divinteration ( - input logic clk, - input logic DivStart, - input logic DivBusy, - input logic [`DIVLEN+3:0] D, - input logic [`DIVLEN+3:0] DBar, D2, DBar2, - input logic [`QLEN-1:0] Q, QM, - input logic [`DIVLEN+3:0] WS, WC, - output logic [`QLEN-1:0] QNext, QMNext, - output logic [`DIVLEN+3:0] WSA, WCA -); - /* verilator lint_on UNOPTFLAT */ - - logic [`DIVLEN+3:0] Dsel; - logic [3:0] q; - - // Quotient Selection logic - // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - // q encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 - qsel4 qsel4(.D, .WS, .WC, .q); - - always_comb - case (q) - 4'b1000: Dsel = DBar2; - 4'b0100: Dsel = DBar; - 4'b0000: Dsel = {`DIVLEN+4{1'b0}}; - 4'b0010: Dsel = D; - 4'b0001: Dsel = D2; - default: Dsel = {`DIVLEN+4{1'bx}}; - endcase - - // Partial Product Generation - // WSA, WCA = WS + WC - qD - csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); - - otfc4 otfc4(.clk, .DivStart, .DivBusy, .q, .Q, .QM, .QNext, .QMNext); - -endmodule - -module qsel4 ( - input logic [`DIVLEN+3:0] D, - input logic [`DIVLEN+3:0] WS, WC, - output logic [3:0] q -); - logic [6:0] Wmsbs; - logic [7:0] PreWmsbs; - logic [2:0] Dmsbs; - assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; - assign Wmsbs = PreWmsbs[7:1]; - assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; - // D = 0001.xxx... - // Dmsbs = | | - // W = xxxx.xxx... - // Wmsbs = | | - - logic [3:0] QSel4[1023:0]; - - always_comb begin - integer d, w, i, w2; - for(d=0; d<8; d++) - for(w=0; w<128; w++)begin - i = d*128+w; - w2 = w-128*(w>=64); // convert to two's complement - case(d) - 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-4) QSel4[i] = 4'b0000; - else if(w2>=-13) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 1: if(w2>=14) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-15) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 2: if(w2>=15) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-16) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 3: if(w2>=16) QSel4[i] = 4'b1000; - else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-18) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 4: if(w2>=18) QSel4[i] = 4'b1000; - else if(w2>=6) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 5: if(w2>=20) QSel4[i] = 4'b1000; - else if(w2>=6) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 6: if(w2>=20) QSel4[i] = 4'b1000; - else if(w2>=8) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-22) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 7: if(w2>=24) QSel4[i] = 4'b1000; - else if(w2>=8) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-24) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - endcase - end - end - assign q = QSel4[{Dmsbs,Wmsbs}]; - -endmodule - -/////////////////////////////////// -// On-The-Fly Converter, Radix 2 // -/////////////////////////////////// -module otfc4 ( - input logic clk, - input logic DivStart, - input logic DivBusy, - input logic [3:0] q, - input logic [`QLEN-1:0] Q, QM, - output logic [`QLEN-1:0] QNext, QMNext -); - - // The on-the-fly converter transfers the quotient - // bits to the quotient as they come. - // - // This code follows the psuedocode presented in the - // floating point chapter of the book. Right now, - // it is written for Radix-4 division. - // - // QM is Q-1. It allows us to write negative bits - // without using a costly CPA. - - // QR and QMR are the shifted versions of Q and QM. - // They are treated as [N-1:r] size signals, and - // discard the r most significant bits of Q and QM. - logic [`QLEN-3:0] QR, QMR; - - // shift Q (quotent) and QM (quotent-1) - // if q = 2 Q = {Q, 10} QM = {Q, 01} - // else if q = 1 Q = {Q, 01} QM = {Q, 00} - // else if q = 0 Q = {Q, 00} QM = {QM, 11} - // else if q = -1 Q = {QM, 11} QM = {QM, 10} - // else if q = -2 Q = {QM, 10} QM = {QM, 01} - // *** how does the 0 concatination numbers work? - - assign QR = Q[`QLEN-3:0]; - assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM - always_comb begin - if (q[3]) begin // +2 - QNext = {QR, 2'b10}; - QMNext = {QR, 2'b01}; - end else if (q[2]) begin // +1 - QNext = {QR, 2'b01}; - QMNext = {QR, 2'b00}; - end else if (q[1]) begin // -1 - QNext = {QMR, 2'b11}; - QMNext = {QMR, 2'b10}; - end else if (q[0]) begin // -2 - QNext = {QMR, 2'b10}; - QMNext = {QMR, 2'b01}; - end else begin // 0 - QNext = {QR, 2'b00}; - QMNext = {QMR, 2'b11}; - end - end - // Final Quoteint is in the range [.5, 2) - -endmodule - - - -///////// -// csa // -///////// -module csa #(parameter N=69) ( - input logic [N-1:0] in1, in2, in3, - input logic cin, - output logic [N-1:0] out1, out2 -); - - // This block adds in1, in2, in3, and cin to produce - // a result out1 / out2 in carry-save redundant form. - // cin is just added to the least significant bit and - // is Startuired to handle adding a negative divisor. - // Fortunately, the carry (out2) is shifted left by one - // bit, leaving room in the least significant bit to - // insert cin. - - assign out1 = in1 ^ in2 ^ in3; - assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | - (in2[N-2:0] & in3[N-2:0]), cin}; -endmodule - -module expcalc( - input logic [`FMTBITS-1:0] FmtE, - input logic [`NE-1:0] XExpE, YExpE, - input logic XZeroE, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`NE+1:0] DivCalcExp - ); - logic [`NE-2:0] Bias; - - if (`FPSIZES == 1) begin - assign Bias = (`NE-1)'(`BIAS); - - end else if (`FPSIZES == 2) begin - assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); - - end else if (`FPSIZES == 3) begin - always_comb - case (FmtE) - `FMT: Bias = (`NE-1)'(`BIAS); - `FMT1: Bias = (`NE-1)'(`BIAS1); - `FMT2: Bias = (`NE-1)'(`BIAS2); - default: Bias = 'x; - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (FmtE) - 2'h3: Bias = (`NE-1)'(`Q_BIAS); - 2'h1: Bias = (`NE-1)'(`D_BIAS); - 2'h0: Bias = (`NE-1)'(`S_BIAS); - 2'h2: Bias = (`NE-1)'(`H_BIAS); - endcase - end - // correct exponent for denormalized input's normalization shifts - assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; - endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 481b1b22..634ecc1d 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -38,8 +38,9 @@ module srtfsm( input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, input logic DivStart, - input logic StallE, - input logic StallM, + input logic StallE, + input logic StallM, + input logic [`DIVLEN+3:0] StickyWSA, input logic [`DURLEN-1:0] Dur, output logic [`DURLEN-1:0] EarlyTermShiftE, output logic DivStickyE, @@ -59,7 +60,14 @@ module srtfsm( //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0}); - assign DivStickyE = |W; + // calculate sticky bit + // - there is a chance that a value is subtracted infinitly, resulting in an exact QM result + // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant + // radix-4 division can't create a QM that continually adds 0's + if (`RADIX == 2) + assign DivStickyE = |W&~(StickyWSA == WS); + else + assign DivStickyE = |W; assign DivDone = (state == DONE); assign W = WC+WS; assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index 7386332f..b9fb8bb8 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -31,7 +31,7 @@ `include "wally-config.vh" module srtpreproc ( - input logic [`NF:0] XManE, YManE, + input logic [`NF:0] Xm, Ym, output logic [`DIVLEN-1:0] X, output logic [`DIVLEN-1:0] Dpreproc, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, @@ -49,16 +49,16 @@ module srtpreproc ( // ***can probably merge X LZC with conversion // cout the number of leading zeros - lzc #(`NF+1) lzcA (XManE, XZeroCnt); - lzc #(`NF+1) lzcB (YManE, YZeroCnt); + lzc #(`NF+1) lzcA (Xm, XZeroCnt); + lzc #(`NF+1) lzcB (Ym, YZeroCnt); // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; // assign PreprocA = ExtraA << zeroCntA; // assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {XManE[`NF-1:0]< Date: Fri, 15 Jul 2022 21:42:45 +0000 Subject: [PATCH 29/36] forgot some files --- pipelined/src/fpu/otfc.sv | 112 +++++++++++++++++ pipelined/src/fpu/qsel.sv | 135 ++++++++++++++++++++ pipelined/src/fpu/srt.sv | 259 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 506 insertions(+) create mode 100644 pipelined/src/fpu/otfc.sv create mode 100644 pipelined/src/fpu/qsel.sv create mode 100644 pipelined/src/fpu/srt.sv diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv new file mode 100644 index 00000000..8d11273a --- /dev/null +++ b/pipelined/src/fpu/otfc.sv @@ -0,0 +1,112 @@ +/////////////////////////////////////////// +// otfc.sv +// +// Written: me@KatherineParry.com, cturek@hmc.edu +// Modified:7/14/2022 +// +// Purpose: On the fly conversion +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module otfc2 ( + input logic qp, qz, + input logic [`QLEN-1:0] Q, QM, + output logic [`QLEN-1:0] QNext, QMNext +); + // The on-the-fly converter transfers the quotient + // bits to the quotient as they come. + // Use this otfc for division only. + logic [`QLEN-2:0] QR, QMR; + + assign QR = Q[`QLEN-2:0]; + assign QMR = QM[`QLEN-2:0]; // Shifted Q and QM + + always_comb begin + if (qp) begin + QNext = {QR, 1'b1}; + QMNext = {QR, 1'b0}; + end else if (qz) begin + QNext = {QR, 1'b0}; + QMNext = {QMR, 1'b1}; + end else begin // If qp and qz are not true, then qn is + QNext = {QMR, 1'b1}; + QMNext = {QMR, 1'b0}; + end + end + +endmodule + + +module otfc4 ( + input logic [3:0] q, + input logic [`QLEN-1:0] Q, QM, + output logic [`QLEN-1:0] QNext, QMNext +); + + // The on-the-fly converter transfers the quotient + // bits to the quotient as they come. + // + // This code follows the psuedocode presented in the + // floating point chapter of the book. Right now, + // it is written for Radix-4 division. + // + // QM is Q-1. It allows us to write negative bits + // without using a costly CPA. + + // QR and QMR are the shifted versions of Q and QM. + // They are treated as [N-1:r] size signals, and + // discard the r most significant bits of Q and QM. + logic [`QLEN-3:0] QR, QMR; + + // shift Q (quotent) and QM (quotent-1) + // if q = 2 Q = {Q, 10} QM = {Q, 01} + // else if q = 1 Q = {Q, 01} QM = {Q, 00} + // else if q = 0 Q = {Q, 00} QM = {QM, 11} + // else if q = -1 Q = {QM, 11} QM = {QM, 10} + // else if q = -2 Q = {QM, 10} QM = {QM, 01} + + assign QR = Q[`QLEN-3:0]; + assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM + always_comb begin + if (q[3]) begin // +2 + QNext = {QR, 2'b10}; + QMNext = {QR, 2'b01}; + end else if (q[2]) begin // +1 + QNext = {QR, 2'b01}; + QMNext = {QR, 2'b00}; + end else if (q[1]) begin // -1 + QNext = {QMR, 2'b11}; + QMNext = {QMR, 2'b10}; + end else if (q[0]) begin // -2 + QNext = {QMR, 2'b10}; + QMNext = {QMR, 2'b01}; + end else begin // 0 + QNext = {QR, 2'b00}; + QMNext = {QMR, 2'b11}; + end + end + // Final Quoteint is in the range [.5, 2) + +endmodule diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv new file mode 100644 index 00000000..396ca776 --- /dev/null +++ b/pipelined/src/fpu/qsel.sv @@ -0,0 +1,135 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module qsel2 ( // *** eventually just change to 4 bits + input logic [`DIVLEN+3:`DIVLEN] ps, pc, + output logic qp, qz//, qn +); + + logic [`DIVLEN+3:`DIVLEN] p, g; + logic magnitude, sign, cout; + + // The quotient selection logic is presented for simplicity, not + // for efficiency. You can probably optimize your logic to + // select the proper divisor with less delay. + + // Quotient equations from EE371 lecture notes 13-20 + assign p = ps ^ pc; + assign g = ps & pc; + + assign magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); + assign cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); + assign sign = p[`DIVLEN+3] ^ cout; +/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & + (ps[52]^pc[52])); + assign #1 sign = (ps[55]^pc[55])^ + (ps[54] & pc[54] | ((ps[54]^pc[54]) & + (ps[53]&pc[53] | ((ps[53]^pc[53]) & + (ps[52]&pc[52]))))); */ + + // Produce quotient = +1, 0, or -1 + assign qp = magnitude & ~sign; + assign qz = ~magnitude; +// assign #1 qn = magnitude & sign; +endmodule + +module qsel4 ( + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] WS, WC, + output logic [3:0] q +); + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] Dmsbs; + assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; + assign Wmsbs = PreWmsbs[7:1]; + assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + // D = 0001.xxx... + // Dmsbs = | | + // W = xxxx.xxx... + // Wmsbs = | | + + logic [3:0] QSel4[1023:0]; + + always_comb begin + integer d, w, i, w2; + for(d=0; d<8; d++) + for(w=0; w<128; w++)begin + i = d*128+w; + w2 = w-128*(w>=64); // convert to two's complement + case(d) + 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-4) QSel4[i] = 4'b0000; + else if(w2>=-13) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 1: if(w2>=14) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-15) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 2: if(w2>=15) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-16) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 3: if(w2>=16) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-18) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 4: if(w2>=18) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 5: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 6: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-22) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 7: if(w2>=24) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-24) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + endcase + end + end + assign q = QSel4[{Dmsbs,Wmsbs}]; + +endmodule diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv new file mode 100644 index 00000000..9e031511 --- /dev/null +++ b/pipelined/src/fpu/srt.sv @@ -0,0 +1,259 @@ +/////////////////////////////////////////// +// srt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module srt( + input logic clk, + input logic DivStart, + input logic DivBusy, + input logic [`FMTBITS-1:0] FmtE, + input logic [`NE-1:0] Xe, Ye, + input logic XZeroE, YZeroE, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, + output logic [`QLEN-1-(`RADIX/4):0] Quot, + output logic [`DIVLEN+3:0] NextWSN, NextWCN, + output logic [`DIVLEN+3:0] StickyWSA, + output logic [`DIVLEN+3:0] FirstWS, FirstWC, + output logic [`NE+1:0] DivCalcExpM, + output logic [`XLEN-1:0] Rem +); + + + /* verilator lint_off UNOPTFLAT */ + logic [`DIVLEN+3:0] WSA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WCA[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WS[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] WC[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] Q[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; + logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; + /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] WSN, WCN; + logic [`DIVLEN+3:0] D, DBar, D2, DBar2; + logic [`NE+1:0] DivCalcExp; + logic [$clog2(`XLEN+1)-1:0] intExp; + logic intSign; + logic [`QLEN-1:0] QMMux; + + // Top Muxes and Registers + // When start is asserted, the inputs are loaded into the divider. + // Otherwise, the divisor is retained and the partial remainder + // is fed back for the next iteration. + // - when the start signal is asserted X and 0 are loaded into WS and WC + // - otherwise load WSA into the flipflop + // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) + // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized + if (`RADIX == 2) begin : nextw + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+2:0], 1'b0}; + end else begin + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + end + + mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); + flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); + mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); + flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); + flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); + flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); + + + // Divisor Selections + // - choose the negitive version of what's being selected + assign DBar = ~D; + if(`RADIX == 4) begin : d2 + assign DBar2 = {~D[`DIVLEN+2:0], 1'b1}; + assign D2 = {D[`DIVLEN+2:0], 1'b0}; + end + + genvar i; + generate + for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations + divinteration divinteration(.D, .DBar, .D2, .DBar2, + .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); + if(i<(`DIVCOPIES-1)) begin + if (`RADIX==2)begin + assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0}; + assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 1'b0}; + end else begin + assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; + assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; + end + assign Q[i+1] = QNext[i]; + assign QM[i+1] = QMNext[i]; + end + end + endgenerate + + // if starting a new divison set Q to 0 and QM to -1 + mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); + flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); + flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); + + assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; + assign FirstWS = WS[0]; + assign FirstWC = WC[0]; + if(`RADIX==2) + if (`DIVCOPIES == 1) + assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0}; + else + assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; + + expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); + +endmodule + +//////////////// +// Submodules // +//////////////// + + /* verilator lint_off UNOPTFLAT */ +module divinteration ( + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] DBar, D2, DBar2, + input logic [`QLEN-1:0] Q, QM, + input logic [`DIVLEN+3:0] WS, WC, + output logic [`QLEN-1:0] QNext, QMNext, + output logic [`DIVLEN+3:0] WSA, WCA +); + /* verilator lint_on UNOPTFLAT */ + + logic [`DIVLEN+3:0] Dsel; + logic [3:0] q; + logic qp, qz;//, qn; + + // Quotient Selection logic + // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) + // q encoding: + // 1000 = +2 + // 0100 = +1 + // 0000 = 0 + // 0010 = -1 + // 0001 = -2 + if(`RADIX == 2) begin : qsel + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn); + end else begin + qsel4 qsel4(.D, .WS, .WC, .q); + end + + if(`RADIX == 2) begin : dsel + assign Dsel = {`DIVLEN+4{~qz}}&(qp ? DBar : D); + end else begin + always_comb + case (q) + 4'b1000: Dsel = DBar2; + 4'b0100: Dsel = DBar; + 4'b0000: Dsel = '0; + 4'b0010: Dsel = D; + 4'b0001: Dsel = D2; + default: Dsel = 'x; + endcase + end + // Partial Product Generation + // WSA, WCA = WS + WC - qD + if (`RADIX == 2) begin : csa + csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); + end else begin + csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); + end + + if (`RADIX == 2) begin : otfc + otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext); + end else begin + otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); + end + +endmodule + + +///////// +// csa // +///////// +module csa #(parameter N=69) ( + input logic [N-1:0] in1, in2, in3, + input logic cin, + output logic [N-1:0] out1, out2 +); + + // This block adds in1, in2, in3, and cin to produce + // a result out1 / out2 in carry-save redundant form. + // cin is just added to the least significant bit and + // is Startuired to handle adding a negative divisor. + // Fortunately, the carry (out2) is shifted left by one + // bit, leaving room in the least significant bit to + // insert cin. + + assign out1 = in1 ^ in2 ^ in3; + assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | + (in2[N-2:0] & in3[N-2:0]), cin}; +endmodule + +module expcalc( + input logic [`FMTBITS-1:0] FmtE, + input logic [`NE-1:0] Xe, Ye, + input logic XZeroE, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + output logic [`NE+1:0] DivCalcExp + ); + logic [`NE-2:0] Bias; + + if (`FPSIZES == 1) begin + assign Bias = (`NE-1)'(`BIAS); + + end else if (`FPSIZES == 2) begin + assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + + end else if (`FPSIZES == 3) begin + always_comb + case (FmtE) + `FMT: Bias = (`NE-1)'(`BIAS); + `FMT1: Bias = (`NE-1)'(`BIAS1); + `FMT2: Bias = (`NE-1)'(`BIAS2); + default: Bias = 'x; + endcase + + end else if (`FPSIZES == 4) begin + always_comb + case (FmtE) + 2'h3: Bias = (`NE-1)'(`Q_BIAS); + 2'h1: Bias = (`NE-1)'(`D_BIAS); + 2'h0: Bias = (`NE-1)'(`S_BIAS); + 2'h2: Bias = (`NE-1)'(`H_BIAS); + endcase + end + // correct exponent for denormalized input's normalization shifts + assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; + endmodule \ No newline at end of file From 6e1d4ec4edeccec2d211dee45da04bdce3a5af2d Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 16 Jul 2022 17:43:31 -0700 Subject: [PATCH 30/36] restored intPending logic to be sticky for PLIC --- pipelined/src/uncore/plic_apb.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelined/src/uncore/plic_apb.sv b/pipelined/src/uncore/plic_apb.sv index f83033c4..51e94d7f 100644 --- a/pipelined/src/uncore/plic_apb.sv +++ b/pipelined/src/uncore/plic_apb.sv @@ -172,8 +172,8 @@ module plic_apb ( end // pending interrupt requests - //assign nextIntPending = (intPending | requests) & ~intInProgress; // - assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion + assign nextIntPending = (intPending | requests) & ~intInProgress; // dh changed back 7/9/22 see if Buildroot still boots. Confirmed to boot successfully. + //assign nextIntPending = requests; // DH: RT made this change May 2022, but it seems to be a bug to not consider intInProgress; see May 23, 2022 slack discussion flopr #(`N) intPendingFlop(PCLK,~PRESETn,nextIntPending,intPending); // context-dependent signals From 2a965cf634685bee8115063a08612a27de60b9e0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 17 Jul 2022 01:39:57 +0000 Subject: [PATCH 31/36] Don't delete hdl directory at end of run --- synthDC/Makefile | 2 +- synthDC/extractSummary.py | 7 +++++++ synthDC/scripts/synth.tcl | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/synthDC/Makefile b/synthDC/Makefile index 369529e3..98b71942 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -107,7 +107,7 @@ ifeq ($(SAIFPOWER), 1) cp -f ../pipelined/regression/power.saif . endif dc_shell-xg-t -64bit -f scripts/$(NAME).tcl | tee $(OUTPUTDIR)/$(NAME).out - rm -rf $(OUTPUTDIR)/hdl +# rm -rf $(OUTPUTDIR)/hdl rm -rf $(OUTPUTDIR)/WORK rm -rf $(OUTPUTDIR)/alib-52 diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 93363a06..29e1c802 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -78,6 +78,13 @@ def freqPlot(tech, width, config): ''' plots delay, area for syntheses with specified tech, module, width ''' + current_directory = os.getcwd() + final_directory = os.path.join(current_directory, 'plots/wally') +# if not os.path.exists(final_directory): +# os.makedirs(final_directory) +# os.chdir(final_directory) + + freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: if (width == oneSynth.width) & (config == oneSynth.config) & (tech == oneSynth.tech) & ('' == oneSynth.special): diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 251522dc..9b72849f 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -56,7 +56,7 @@ set vhdlout_show_unconnected_pins "true" # Due to parameterized Verilog must use analyze/elaborate and not # read_verilog/vhdl (change to pull in Verilog and/or VHDL) # -set alib_library_analysis_path ./$outputDir +#set alib_library_analysis_path ./$outputDir define_design_lib WORK -path ./$outputDir/WORK analyze -f sverilog -lib WORK $my_verilog_files elaborate $my_toplevel -lib WORK From 3815f197633a140c4237566e460cf8690ec86f88 Mon Sep 17 00:00:00 2001 From: James Stine Date: Sun, 17 Jul 2022 11:06:30 -0500 Subject: [PATCH 32/36] Add import os in extractSummary.py --- synthDC/extractSummary.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index 29e1c802..d4f86fb3 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -11,6 +11,7 @@ import numpy as np from ppa.ppaAnalyze import noOutliers from matplotlib import ticker import argparse +import os def synthsintocsv(): From 2753699fb2737d39027f7801f5686b9722f1a7f2 Mon Sep 17 00:00:00 2001 From: James Stine Date: Sun, 17 Jul 2022 13:00:44 -0500 Subject: [PATCH 33/36] Add back extractSummary mkdir plots --- synthDC/extractSummary.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/synthDC/extractSummary.py b/synthDC/extractSummary.py index d4f86fb3..978365b1 100755 --- a/synthDC/extractSummary.py +++ b/synthDC/extractSummary.py @@ -60,6 +60,7 @@ def synthsintocsv(): writer.writerow([width, config, special, tech, freq, delay, area]) file.close() + def synthsfromcsv(filename): Synth = namedtuple("Synth", "width config special tech freq delay area") with open(filename, newline='') as csvfile: @@ -75,16 +76,15 @@ def synthsfromcsv(filename): allSynths[i] = Synth(*allSynths[i]) return allSynths + def freqPlot(tech, width, config): ''' plots delay, area for syntheses with specified tech, module, width ''' current_directory = os.getcwd() final_directory = os.path.join(current_directory, 'plots/wally') -# if not os.path.exists(final_directory): -# os.makedirs(final_directory) -# os.chdir(final_directory) - + if not os.path.exists(final_directory): + os.makedirs(final_directory) freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: @@ -159,6 +159,7 @@ def areaDelay(tech, delays, areas, labels, fig, ax, norm=False): return fig + def plotFeatures(tech, width, config): delays, areas, labels = ([] for i in range(3)) freq = techdict[tech].targfreq @@ -176,7 +177,8 @@ def plotFeatures(tech, width, config): titlestr = tech+'_'+width+config plt.title(titlestr) plt.savefig('./plots/wally/features_'+titlestr+'.png') - + + def plotConfigs(tech, special=''): delays, areas, labels = ([] for i in range(3)) freq = techdict[tech].targfreq @@ -215,7 +217,8 @@ def normAreaDelay(special=''): ax.set_ylabel('Area (add32)') ax.legend(handles = fullLeg, loc='upper left') plt.savefig('./plots/wally/normAreaDelay.png') - + + def addFO4axis(fig, ax, tech): fo4 = techdict[tech].fo4 From 5bb14788596d9f1cc53803f6d1b1fac06ee1fd93 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 18 Jul 2022 17:31:17 +0000 Subject: [PATCH 34/36] renamed signals in ocde to match book --- pipelined/src/fpu/divshiftcalc.sv | 28 ++-- pipelined/src/fpu/fcvt.sv | 2 +- pipelined/src/fpu/flags.sv | 6 +- pipelined/src/fpu/fmashiftcalc.sv | 26 ++-- pipelined/src/fpu/fpu.sv | 6 +- pipelined/src/fpu/postprocess.sv | 46 +++---- pipelined/src/fpu/resultsign.sv | 15 +-- pipelined/src/fpu/round.sv | 186 +++++++++++++-------------- pipelined/src/fpu/roundsign.sv | 8 +- pipelined/src/fpu/shiftcorrection.sv | 16 +-- pipelined/testbench/testbench-fp.sv | 6 +- 11 files changed, 171 insertions(+), 174 deletions(-) diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv index af321b25..3fbc9419 100644 --- a/pipelined/src/fpu/divshiftcalc.sv +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -1,10 +1,10 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`QLEN-1-(`RADIX/4):0] Quot, + input logic [`QLEN-1-(`RADIX/4):0] DivQm, input logic [`FMTBITS-1:0] Fmt, input logic [`DURLEN-1:0] DivEarlyTermShift, - input logic [`NE+1:0] DivCalcExp, + input logic [`NE+1:0] DivQe, output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, output logic [`NORMSHIFTSZ-1:0] DivShiftIn, output logic DivResDenorm, @@ -14,21 +14,21 @@ module divshiftcalc( // is the result denromalized // if the exponent is 1 then the result needs to be normalized then the result is denormalizes - assign DivResDenorm = DivCalcExp[`NE+1]|(~|DivCalcExp[`NE+1:0]); + assign DivResDenorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]); // if the result is denormalized - // 00000000x.xxxxxx... Exp = DivCalcExp - // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // .00xxxxxxxxxxxxx... << DivCalcExp+NF+1 Exp = +1 + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 // .0000xxxxxxxxxxx... >> 1 Exp = 1 - // Left shift amount = DivCalcExp+NF+1-1 - assign DivDenormShift = (`NE+2)'(`NF)+DivCalcExp; + // Left shift amount = DivQe+NF+1-1 + assign DivDenormShift = (`NE+2)'(`NF)+DivQe; // if the result is normalized - // 00000000x.xxxxxx... Exp = DivCalcExp - // .00000000xxxxxxx... >> NF+1 Exp = DivCalcExp+NF+1 - // 00000000.xxxxxxx... << NF Exp = DivCalcExp+1 - // 00000000x.xxxxxx... << NF Exp = DivCalcExp (extra shift done afterwards) - // 00000000xx.xxxxx... << 1? Exp = DivCalcExp-1 (determined after) + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivQe+1 + // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) // inital Left shift amount = NF // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit assign NormShift = (`NE+2)'(`NF); @@ -36,6 +36,6 @@ module divshiftcalc( // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) assign DivShiftAmt = (DivResDenorm ? DivDenormShift[$clog2(`NORMSHIFTSZ)-1:0]&{$clog2(`NORMSHIFTSZ){~DivDenormShift[`NE+1]}} : NormShift[$clog2(`NORMSHIFTSZ)-1:0])+{{$clog2(`NORMSHIFTSZ)-`DURLEN-$clog2(`LOGR*`DIVCOPIES){1'b0}}, DivEarlyTermShift&{`DURLEN{~DivDenormShift[`NE+1]}}, {$clog2(`LOGR*`DIVCOPIES){1'b0}}}; - assign DivShiftIn = {{`NF{1'b0}}, Quot, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; + assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`QLEN+(`RADIX/4)-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 4820cf28..b9932523 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -127,7 +127,7 @@ module fcvt ( // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : - (LeadingZeros)&{`LOGCVTLEN{XDenorm|IntToFp}}; + (LeadingZeros); /////////////////////////////////////////////////////////////////////////// // exp calculations diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 4e16bc96..71f2a919 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -48,10 +48,10 @@ module flags( input logic DivOp, // conversion opperation? input logic FmaOp, // Fma opperation? input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow - input logic [`NE+1:0] Nexp, // exponent of the normalized sum + input logic [`NE+1:0] Me, // exponent of the normalized sum input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits input logic FmaAs, FmaPs, // the product and modified Z signs - input logic R, UfLSBRes, S, UfPlus1, // bits used to determine rounding + input logic R, UfL, S, UfPlus1, // bits used to determine rounding output logic DivByZero, output logic IntInvalid, Invalid, Overflow, // flags used to select the res output logic [4:0] PostProcFlg // flags @@ -127,7 +127,7 @@ module flags( // | | | | and if the result is not exact // | | | | | and if the input isnt infinity or NaN // | | | | | | - assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Nexp == 0) & ~(UfPlus1&UfLSBRes)))&(R|S))&~(InfIn|NaNIn|DivByZero); + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&UfL)))&(R|S))&~(InfIn|NaNIn|DivByZero); // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index ae974eb0..a6c1a1c6 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -35,7 +35,7 @@ module fmashiftcalc( input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero - output logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results + output logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count @@ -57,28 +57,28 @@ module fmashiftcalc( //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin - assign FmaConvNormSumExp = NormSumExp; + assign FmaNe = NormSumExp; end else if (`FPSIZES == 2) begin - assign FmaConvNormSumExp = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; + assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; end else if (`FPSIZES == 3) begin always_comb begin case (Fmt) - `FMT: FmaConvNormSumExp = NormSumExp; - `FMT1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; - `FMT2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; - default: FmaConvNormSumExp = {`NE+2{1'bx}}; + `FMT: FmaNe = NormSumExp; + `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; + `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; + default: FmaNe = {`NE+2{1'bx}}; endcase end end else if (`FPSIZES == 4) begin always_comb begin case (Fmt) - 2'h3: FmaConvNormSumExp = NormSumExp; - 2'h1: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; - 2'h0: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; - 2'h2: FmaConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; + 2'h3: FmaNe = NormSumExp; + 2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; + 2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; + 2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; endcase end @@ -144,11 +144,11 @@ module fmashiftcalc( // - if kill prod dont add to exp // Determine if the result is denormal - // assign FmaPreResultDenorm = $signed(FmaConvNormSumExp)<=0 & ($signed(FmaConvNormSumExp)>=$signed(-FracLen)) & ~FmaSZero; + // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm ? FmaConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1; + assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 1bbd0aea..65be2997 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -381,10 +381,10 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), - .FmaZmSticky(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .Quot(QuotM), - .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivCalcExp(DivCalcExpM), .DivDone(DivDoneM), + .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), + .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), - .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivSticky(DivStickyM), + .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index e0eb50ac..f9ccd255 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -48,17 +48,17 @@ module postprocess ( input logic FmaPs, // the product's sign input logic [`NE+1:0] FmaPe, // Product exponent input logic [3*`NF+5:0] FmaSm, // the positive sum - input logic FmaZmSticky, // sticky bit that is calculated during alignment + input logic FmaZmS, // sticky bit that is calculated during alignment input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter input logic FmaNegSum, // was the sum negitive input logic FmaInvA, // do you invert Z input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, - input logic DivSticky, + input logic DivS, input logic DivDone, - input logic [`NE+1:0] DivCalcExp, - input logic [`QLEN-1-(`RADIX/4):0] Quot, + input logic [`NE+1:0] DivQe, + input logic [`QLEN-1-(`RADIX/4):0] DivQm, // conversion signals input logic CvtCs, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent @@ -77,9 +77,9 @@ module postprocess ( logic Ws; logic [`NF-1:0] Rf; // Result fraction logic [`NE-1:0] Re; // Result exponent - logic Nsgn; - logic [`NE+1:0] Nexp; - logic [`CORRSHIFTSZ-1:0] Nfrac; // corectly shifted fraction + logic Ms; + logic [`NE+1:0] Me; + logic [`CORRSHIFTSZ-1:0] Mf; // corectly shifted fraction logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow logic S; // S bit logic UfPlus1; // do you add one (for determining underflow flag) @@ -89,19 +89,19 @@ module postprocess ( logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic Plus1; // add one to the final result? logic IntInvalid, Overflow, Invalid; // flags - logic UfLSBRes; + logic UfL; logic [`FMTBITS-1:0] OutFmt; // fma signals logic [`NE+1:0] FmaSe; // exponent of the normalized sum logic FmaSZero; // is the sum zero logic [3*`NF+8:0] FmaShiftIn; // shift input - logic [`NE+1:0] FmaConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results + logic [`NE+1:0] FmaNe; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count // division singals logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] DivShiftIn; - logic [`NE+1:0] DivCorrExp; + logic [`NE+1:0] Qe; logic DivByZero; logic DivResDenorm; logic [`NE+1:0] DivDenormShift; @@ -150,9 +150,9 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaConvNormSumExp, + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Fmt, .DivCalcExp, .Quot, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); always_comb case(PostProcSel) @@ -181,9 +181,9 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaConvNormSumExp, - .DivResDenorm, .DivDenormShift, .DivOp, .DivCalcExp, - .DivCorrExp, .FmaSZero, .Shifted, .FmaSe, .Nfrac); + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe, + .DivResDenorm, .DivDenormShift, .DivOp, .DivQe, + .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -197,19 +197,19 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, - .Xs, .Ys, .CvtCs, .Nsgn); + .Xs, .Ys, .CvtCs, .Ms); - round round(.OutFmt, .Frm, .S, .FmaZmSticky, .Plus1, .PostProcSel, .CvtCe, .DivCorrExp, - .Nsgn, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Nfrac, .ToInt, .CvtResUf, - .DivSticky, .DivDone, - .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfLSBRes, .Nexp); + round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, + .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, + .DivS, .DivDone, + .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me); /////////////////////////////////////////////////////////////////////////////// // Sign calculation /////////////////////////////////////////////////////////////////////////////// resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, - .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Nsgn, .Ws); + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws); /////////////////////////////////////////////////////////////////////////////// // Flags @@ -218,8 +218,8 @@ module postprocess ( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, - .UfLSBRes, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, - .Nexp, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); + .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, + .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); /////////////////////////////////////////////////////////////////////////////// // Select the result diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index e6de0c18..e1ea5e41 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -39,28 +39,25 @@ module resultsign( input logic Mult, input logic R, input logic S, - input logic Nsgn, + input logic Ms, output logic Ws ); - logic ZeroSgn; - logic InfSgn; - logic Underflow; - // logic ResultSgnTmp; + logic Zeros; + logic Infs; // Determine the sign if the sum is zero // if cancelation then 0 unless round to -infinity // if multiply then Psgn // otherwise psign - assign Underflow = FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)); - assign ZeroSgn = (FmaPs^FmaAs)&~Underflow&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive // if p - z is the Sum negitive // if -p + z is the Sum positive // if -p - z then the Sum is negitive - assign InfSgn = ZInf ? FmaAs : FmaPs; - assign Ws = InfIn&FmaOp ? InfSgn : FmaSZero&FmaOp ? ZeroSgn : Nsgn; + assign Infs = ZInf ? FmaAs : FmaPs; + assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 38bacce0..6132dba4 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -46,29 +46,29 @@ module round( input logic [1:0] PostProcSel, input logic CvtResDenormUf, input logic CvtResUf, - input logic [`CORRSHIFTSZ-1:0] Nfrac, - input logic FmaZmSticky, // addend's sticky bit + input logic [`CORRSHIFTSZ-1:0] Mf, + input logic FmaZmS, // addend's sticky bit input logic [`NE+1:0] FmaSe, // exponent of the normalized sum - input logic Nsgn, // the result's sign + input logic Ms, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent - input logic [`NE+1:0] DivCorrExp, // the calculated expoent - input logic DivSticky, // sticky bit + input logic [`NE+1:0] Qe, // the calculated expoent + input logic DivS, // sticky bit output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow output logic [`NF-1:0] Rf, // Result fraction output logic [`NE-1:0] Re, // Result exponent output logic S, // sticky bit - output logic [`NE+1:0] Nexp, + output logic [`NE+1:0] Me, output logic Plus1, - output logic R, UfLSBRes // bits needed to calculate rounding + output logic R, UfL // bits needed to calculate rounding ); - logic LSBRes; // bit used for rounding - least significant bit of the normalized sum + logic L; // bit used for rounding - least significant bit of the normalized sum logic UfCalcPlus1; - logic NormSumSticky; // normalized sum's sticky bit - logic UfSticky; // sticky bit for underlow calculation + logic NormS; // normalized sum's sticky bit + logic UfS; // sticky bit for underlow calculation logic [`NF-1:0] RoundFrac; logic FpRes, IntRes; - logic UfRound; + logic UfR; logic FpRound, FpLSBRes, FpUfRound; logic CalcPlus1, FpPlus1; logic [`FLEN:0] RoundAdd; // how much to add to the result @@ -114,61 +114,61 @@ module round( // | NF |1|1| // ^ ^ if floating point result // ^ if not an FMA result - if (`XLENPOS == 1)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1)assign NormS = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN - if (`XLENPOS == 2)assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2)assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 2) begin // XLEN is either 64 or 32 // so half and single are always smaller then XLEN // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 3) begin // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:0]); + if (`XLENPOS == 1) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); end else if (`FPSIZES == 4) begin // Quad precision will always be greater than XLEN // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]); + if (`XLENPOS == 2) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); // 3: NF > NF1 > XLEN // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer - if (`XLENPOS == 3) assign NormSumSticky = (|Nfrac[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Nfrac[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Nfrac[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Nfrac[`CORRSHIFTSZ-`Q_NF-2:0]); + if (`XLENPOS == 3) assign NormS = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); end @@ -176,37 +176,37 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign UfSticky = FmaZmSticky&FmaOp | NormSumSticky | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivSticky&DivOp; + assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag if (`FPSIZES == 1) begin - assign FpRound = Nfrac[`CORRSHIFTSZ-`NF-1]; - assign FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF]; - assign FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2]; + assign FpRound = Mf[`CORRSHIFTSZ-`NF-1]; + assign FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; + assign FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; end else if (`FPSIZES == 2) begin - assign FpRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-1] : Nfrac[`CORRSHIFTSZ-`NF1-1]; - assign FpLSBRes = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF] : Nfrac[`CORRSHIFTSZ-`NF1]; - assign FpUfRound = OutFmt ? Nfrac[`CORRSHIFTSZ-`NF-2] : Nfrac[`CORRSHIFTSZ-`NF1-2]; + assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; + assign FpLSBRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; + assign FpUfRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; end else if (`FPSIZES == 3) begin always_comb case (OutFmt) `FMT: begin - FpRound = Nfrac[`CORRSHIFTSZ-`NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF-2]; end `FMT1: begin - FpRound = Nfrac[`CORRSHIFTSZ-`NF1-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF1]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`NF1-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF1-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF1]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF1-2]; end `FMT2: begin - FpRound = Nfrac[`CORRSHIFTSZ-`NF2-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`NF2]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`NF2-2]; + FpRound = Mf[`CORRSHIFTSZ-`NF2-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`NF2]; + FpUfRound = Mf[`CORRSHIFTSZ-`NF2-2]; end default: begin FpRound = 1'bx; @@ -218,55 +218,55 @@ module round( always_comb case (OutFmt) 2'h3: begin - FpRound = Nfrac[`CORRSHIFTSZ-`Q_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`Q_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`Q_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`Q_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`Q_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; end 2'h1: begin - FpRound = Nfrac[`CORRSHIFTSZ-`D_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`D_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`D_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`D_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`D_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`D_NF-2]; end 2'h0: begin - FpRound = Nfrac[`CORRSHIFTSZ-`S_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`S_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`S_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`S_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`S_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`S_NF-2]; end 2'h2: begin - FpRound = Nfrac[`CORRSHIFTSZ-`H_NF-1]; - FpLSBRes = Nfrac[`CORRSHIFTSZ-`H_NF]; - FpUfRound = Nfrac[`CORRSHIFTSZ-`H_NF-2]; + FpRound = Mf[`CORRSHIFTSZ-`H_NF-1]; + FpLSBRes = Mf[`CORRSHIFTSZ-`H_NF]; + FpUfRound = Mf[`CORRSHIFTSZ-`H_NF-2]; end endcase end - assign R = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-1] : FpRound; - assign LSBRes = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN] : FpLSBRes; - assign UfRound = ToInt&CvtOp ? Nfrac[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; + assign R = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpRound; + assign L = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLSBRes; + assign UfR = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpUfRound; // used to determine underflow flag - assign UfLSBRes = FpRound; + assign UfL = FpRound; // determine sticky - assign S = UfSticky | UfRound; + assign S = UfS | UfR; always_comb begin // Determine if you add 1 case (Frm) - 3'b000: CalcPlus1 = R & (S| LSBRes);//round to nearest even + 3'b000: CalcPlus1 = R & (S| L);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = Nsgn;//round down - 3'b011: CalcPlus1 = ~Nsgn;//round up + 3'b010: CalcPlus1 = Ms;//round down + 3'b011: CalcPlus1 = ~Ms;//round up 3'b100: CalcPlus1 = R;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (Frm) - 3'b000: UfCalcPlus1 = UfRound & (UfSticky| UfLSBRes);//round to nearest even + 3'b000: UfCalcPlus1 = UfR & (UfS| UfL);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = Nsgn;//round down - 3'b011: UfCalcPlus1 = ~Nsgn;//round up - 3'b100: UfCalcPlus1 = UfRound;//round to nearest max magnitude + 3'b010: UfCalcPlus1 = Ms;//round down + 3'b011: UfCalcPlus1 = ~Ms;//round up + 3'b100: UfCalcPlus1 = UfR;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase @@ -275,7 +275,7 @@ module round( // If an answer is exact don't round assign Plus1 = CalcPlus1 & (S | R); assign FpPlus1 = Plus1&~(ToInt&CvtOp); - assign UfPlus1 = UfCalcPlus1 & S; // UfRound is part of sticky + assign UfPlus1 = UfCalcPlus1 & S; // UfR is part of sticky // Compute rounded result if (`FPSIZES == 1) begin @@ -295,19 +295,19 @@ module round( assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; // determine the result to be roundned - assign RoundFrac = Nfrac[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; + assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; always_comb case(PostProcSel) - 2'b10: Nexp = FmaSe; // fma - 2'b00: Nexp = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt - 2'b01: Nexp = DivDone ? DivCorrExp : '0; // divide - default: Nexp = '0; + 2'b10: Me = FmaSe; // fma + 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt + 2'b01: Me = DivDone ? Qe : '0; // divide + default: Me = '0; endcase // round the result // - if the fraction overflows one should be added to the exponent - assign {FullRe, Rf} = {Nexp, RoundFrac} + RoundAdd; + assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; assign Re = FullRe[`NE-1:0]; diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv index 22686b24..55e322bc 100644 --- a/pipelined/src/fpu/roundsign.sv +++ b/pipelined/src/fpu/roundsign.sv @@ -38,11 +38,11 @@ module roundsign( input logic DivOp, input logic CvtOp, input logic CvtCs, - output logic Nsgn + output logic Ms ); logic FmaResSgnTmp; - logic DivSgn; + logic Qs; // is the result negitive // if p - z is the Sum negitive @@ -52,9 +52,9 @@ module roundsign( // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs); - assign DivSgn = Xs^Ys; + assign Qs = Xs^Ys; // Sign for rounding calulation - assign Nsgn = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (DivSgn&DivOp); + assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index ecfd9ba0..71a2393a 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -33,13 +33,13 @@ module shiftcorrection( input logic FmaOp, input logic DivOp, input logic DivResDenorm, - input logic [`NE+1:0] DivCalcExp, + input logic [`NE+1:0] DivQe, input logic [`NE+1:0] DivDenormShift, - input logic [`NE+1:0] FmaConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results + input logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection input logic FmaSZero, - output logic [`CORRSHIFTSZ-1:0] Nfrac, // the shifted sum before LZA correction - output logic [`NE+1:0] DivCorrExp, + output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction + output logic [`NE+1:0] Qe, output logic [`NE+1:0] FmaSe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction @@ -53,16 +53,16 @@ module shiftcorrection( // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) - assign CorrQuotShifted = (LZAPlus2|(DivCalcExp==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; + assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits - assign Nfrac = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaConvNormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; + assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; // the quotent is in the range [.5,2) if there is no early termination // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift - assign DivCorrExp = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivCalcExp - {(`NE+1)'(0), ~LZAPlus2}; + assign Qe = ((DivResDenorm)&~DivDenormShift[`NE+1]) ? (`NE+2)'(0) : DivQe - {(`NE+1)'(0), ~LZAPlus2}; endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index a95a6624..b90c3d3d 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -679,13 +679,13 @@ module testbenchfp; .Pe, .ZmSticky, .KillProd); postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), - .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .Quot, .DivCalcExp(DivCalcExp), - .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivSticky(DivSticky), + .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, - .FmaKillProd(KillProd), .FmaZmSticky(ZmSticky), .FmaPe(Pe), .DivDone, + .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); From 921debf930d74f7c572ab708886335938cd0449a Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 18 Jul 2022 17:51:18 +0000 Subject: [PATCH 35/36] removed underflow from inexactct calculation --- pipelined/regression/sim-wally | 2 +- pipelined/src/fpu/flags.sv | 2 +- pipelined/testbench/testbench.sv | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelined/regression/sim-wally b/pipelined/regression/sim-wally index 51c8b3ed..6163ab8b 100755 --- a/pipelined/regression/sim-wally +++ b/pipelined/regression/sim-wally @@ -1,2 +1,2 @@ -vsim -do "do wally-pipelined.do rv64gc arch64d" +vsim -do "do wally-pipelined.do rv32gc wally32periph" diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 71f2a919..6b1bc638 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -131,7 +131,7 @@ module flags( // Set Inexact flag if the res is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed res isn't outputed - assign FpInexact = (S|Overflow|R|Underflow)&~(InfIn|NaNIn|DivByZero); + assign FpInexact = (S|Overflow|R)&~(InfIn|NaNIn|DivByZero); // if the res is too small to be represented and not 0 // | and if the res is not invalid (outside the integer bounds) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 6d537b14..0fb5f5e6 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -114,7 +114,7 @@ logic [3:0] dummy; "arch32f": if (`F_SUPPORTED) tests = arch32f; "imperas32i": tests = imperas32i; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; - "wally32d": if (`D_SUPPORTED) tests = wally32d; + // "wally32d": if (`D_SUPPORTED) tests = wally32d; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; "wally32a": if (`A_SUPPORTED) tests = wally32a; "imperas32c": if (`C_SUPPORTED) tests = imperas32c; From 3f5a5e10936714c1dd7eca07a07269218a0d5e2c Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Mon, 18 Jul 2022 12:13:15 -0700 Subject: [PATCH 36/36] added the sail change to spike to let it all run normally --- tests/riscof/spike/riscof_spike.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py index 88a6269e..fd429395 100644 --- a/tests/riscof/spike/riscof_spike.py +++ b/tests/riscof/spike/riscof_spike.py @@ -108,7 +108,7 @@ class spike(pluginTemplate): #TODO: The following assumes you are using the riscv-gcc toolchain. If # not please change appropriately - self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else 'ilp32 ') + self.compile_cmd = self.compile_cmd+' -mabi='+('lp64 ' if 64 in ispec['supported_xlen'] else ('ilp32e ' if "E" in ispec["ISA"] else 'ilp32 ')) def runTests(self, testList): @@ -158,7 +158,12 @@ class spike(pluginTemplate): # echo statement. if self.target_run: # set up the simulation command. Template is for spike. Please change. - simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf) + if ('NO_SAIL=True' in testentry['macros']): + # if the tests can't run on SAIL we copy the reference output to the src directory + reference_output = re.sub("/src/","/references/", re.sub(".S",".reference_output", test)) + simcmd = 'cut -c-{0:g} {1} > {2}'.format(8, reference_output, sig_file) #use cut to remove comments when copying + else: + simcmd = self.dut_exe + ' --isa={0} +signature={1} +signature-granularity=4 {2}'.format(self.isa, sig_file, elf) else: simcmd = 'echo "NO RUN"'