From 81741925aa996c57a9d3f3a4b52513f684fb9121 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 29 Dec 2021 17:12:20 -0600 Subject: [PATCH 1/4] Moved LSU Bus interface control path into it's own module. --- wally-pipelined/regression/wave.do | 5 +- wally-pipelined/src/cache/dcache.sv | 3 +- wally-pipelined/src/lsu/busfsm.sv | 136 ++++++++++++++++++++++++++++ wally-pipelined/src/lsu/lsu.sv | 109 +++------------------- 4 files changed, 150 insertions(+), 103 deletions(-) create mode 100644 wally-pipelined/src/lsu/busfsm.sv diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 8cb29a8d..663a7dea 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -213,15 +213,13 @@ add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataWordMuxM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/ReadDataM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/WriteDataM add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/SelUncachedAdr -add wave -noupdate -expand -group lsu -expand -group bus -color Gold /testbench/dut/hart/lsu/BusCurrState +add wave -noupdate -expand -group lsu -expand -group bus -color Gold /testbench/dut/hart/lsu/busfsm/BusCurrState add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/hart/lsu/BusStall add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/hart/lsu/LsuBusRead add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/hart/lsu/LsuBusWrite add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/hart/lsu/LsuBusAdr add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/hart/lsu/LsuBusAck add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/hart/lsu/LsuBusHWDATA -add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/hart/lsu/UnCachedLsuBusRead -add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/hart/lsu/UnCachedLsuBusWrite add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/hart/lsu/dcache/dcachefsm/CurrState add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WayHit add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMBlockWriteEnableM @@ -474,7 +472,6 @@ add wave -noupdate -group {pc selection} /testbench/dut/hart/ifu/PrivilegedNextP add wave -noupdate -group {pc selection} /testbench/dut/hart/ifu/PrivilegedChangePCM add wave -noupdate /testbench/dut/hart/priv/priv/csr/MEPC_REGW add wave -noupdate /testbench/dut/hart/lsu/LocalLsuBusAdr -add wave -noupdate /testbench/dut/hart/lsu/BasePAdrMaskedM TreeUpdate [SetDefaultTree] WaveRestoreCursors {{Cursor 7} {36865 ns} 1} {{Cursor 5} {49445 ns} 1} {{Cursor 3} {35522 ns} 0} {{Cursor 4} {49574 ns} 1} quietly wave cursor active 3 diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index 0023f97c..92b468ce 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -53,8 +53,6 @@ module dcache output logic [`PA_BITS-1:0] DCacheBusAdr, output logic [`XLEN-1:0] ReadDataBlockSetsM [(`DCACHE_BLOCKLENINBITS/`XLEN)-1:0], - output logic SelFlush, - input logic [`DCACHE_BLOCKLENINBITS-1:0] DCacheMemWriteData, @@ -119,6 +117,7 @@ module dcache logic SelEvict; logic LRUWriteEn; logic [NUMWAYS-1:0] VDWriteEnableWay; + logic SelFlush; // Read Path CPU (IEU) side diff --git a/wally-pipelined/src/lsu/busfsm.sv b/wally-pipelined/src/lsu/busfsm.sv new file mode 100644 index 00000000..8eb70a03 --- /dev/null +++ b/wally-pipelined/src/lsu/busfsm.sv @@ -0,0 +1,136 @@ +/////////////////////////////////////////// +// busfsm.sv +// +// Written: Ross Thompson ross1728@gmail.com +// Modified: +// +// Purpose: Load/Store Unit's interface to BUS +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + + +module busfsm #(parameter integer WordCountThreshold, + parameter integer LOGWPL) + (input logic clk, + input logic reset, + + input logic IgnoreRequest, + input logic [1:0] LsuRWM, + input logic DCacheFetchLine, + input logic DCacheWriteLine, + input logic LsuBusAck, + input logic CPUBusy, + input logic CacheableM, + + output logic BusStall, + output logic LsuBusWrite, + output logic LsuBusRead, + output logic DCacheBusAck, + output logic BusCommittedM, + output logic SelUncachedAdr, + output logic [LOGWPL-1:0] WordCount); + + + + logic UnCachedLsuBusRead; + logic UnCachedLsuBusWrite; + logic CntEn, PreCntEn; + logic CntReset; + logic WordCountFlag; + logic [LOGWPL-1:0] NextWordCount; + + + typedef enum {STATE_BUS_READY, + STATE_BUS_FETCH, + STATE_BUS_WRITE, + STATE_BUS_UNCACHED_WRITE, + STATE_BUS_UNCACHED_WRITE_DONE, + STATE_BUS_UNCACHED_READ, + STATE_BUS_UNCACHED_READ_DONE, + STATE_BUS_CPU_BUSY} busstatetype; + + (* mark_debug = "true" *) busstatetype BusCurrState, BusNextState; + + + flopenr #(LOGWPL) + WordCountReg(.clk(clk), + .reset(reset | CntReset), + .en(CntEn), + .d(NextWordCount), + .q(WordCount)); + + assign NextWordCount = WordCount + 1'b1; + + assign WordCountFlag = (WordCount == WordCountThreshold[LOGWPL-1:0]); + assign CntEn = PreCntEn & LsuBusAck; + + always_ff @(posedge clk) + if (reset) BusCurrState <= #1 STATE_BUS_READY; + else BusCurrState <= #1 BusNextState; + + always_comb begin + case(BusCurrState) + STATE_BUS_READY: if(IgnoreRequest) BusNextState = STATE_BUS_READY; + else if(LsuRWM[0] & ~CacheableM) BusNextState = STATE_BUS_UNCACHED_WRITE; + else if(LsuRWM[1] & ~CacheableM) BusNextState = STATE_BUS_UNCACHED_READ; + else if(DCacheFetchLine) BusNextState = STATE_BUS_FETCH; + else if(DCacheWriteLine) BusNextState = STATE_BUS_WRITE; + STATE_BUS_UNCACHED_WRITE: if(LsuBusAck) BusNextState = STATE_BUS_UNCACHED_WRITE_DONE; + else BusNextState = STATE_BUS_UNCACHED_WRITE; + STATE_BUS_UNCACHED_READ: if(LsuBusAck) BusNextState = STATE_BUS_UNCACHED_READ_DONE; + else BusNextState = STATE_BUS_UNCACHED_READ; + STATE_BUS_UNCACHED_WRITE_DONE: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY; + else BusNextState = STATE_BUS_READY; + STATE_BUS_UNCACHED_READ_DONE: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY; + else BusNextState = STATE_BUS_READY; + STATE_BUS_CPU_BUSY: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY; + else BusNextState = STATE_BUS_READY; + STATE_BUS_FETCH: if (WordCountFlag & LsuBusAck) BusNextState = STATE_BUS_READY; + else BusNextState = STATE_BUS_FETCH; + STATE_BUS_WRITE: if(WordCountFlag & LsuBusAck) BusNextState = STATE_BUS_READY; + else BusNextState = STATE_BUS_WRITE; + endcase + end + + + assign CntReset = BusCurrState == STATE_BUS_READY; + assign BusStall = (BusCurrState == STATE_BUS_READY & ~IgnoreRequest & ((~CacheableM & (|LsuRWM)) | DCacheFetchLine | DCacheWriteLine)) | + (BusCurrState == STATE_BUS_UNCACHED_WRITE) | + (BusCurrState == STATE_BUS_UNCACHED_READ) | + (BusCurrState == STATE_BUS_FETCH) | + (BusCurrState == STATE_BUS_WRITE); + assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE; + assign UnCachedLsuBusWrite = (BusCurrState == STATE_BUS_READY & ~CacheableM & (LsuRWM[0])) | + (BusCurrState == STATE_BUS_UNCACHED_WRITE); + assign LsuBusWrite = UnCachedLsuBusWrite | (BusCurrState == STATE_BUS_WRITE); + + assign UnCachedLsuBusRead = (BusCurrState == STATE_BUS_READY & ~CacheableM & (|LsuRWM[1])) | + (BusCurrState == STATE_BUS_UNCACHED_READ); + assign LsuBusRead = UnCachedLsuBusRead | (BusCurrState == STATE_BUS_FETCH); + + assign DCacheBusAck = (BusCurrState == STATE_BUS_FETCH & WordCountFlag & LsuBusAck) | + (BusCurrState == STATE_BUS_WRITE & WordCountFlag & LsuBusAck); + assign BusCommittedM = BusCurrState != STATE_BUS_READY; + assign SelUncachedAdr = (BusCurrState == STATE_BUS_READY & (|LsuRWM & ~CacheableM)) | + (BusCurrState == STATE_BUS_UNCACHED_READ | + BusCurrState == STATE_BUS_UNCACHED_READ_DONE | + BusCurrState == STATE_BUS_UNCACHED_WRITE | + BusCurrState == STATE_BUS_UNCACHED_WRITE_DONE); +endmodule diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 74cf0d38..d2207be0 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -342,17 +342,15 @@ module lsu localparam integer WORDSPERLINE = `DCACHE_BLOCKLENINBITS/`XLEN; localparam integer LOGWPL = $clog2(WORDSPERLINE); localparam integer BLOCKLEN = `DCACHE_BLOCKLENINBITS; - localparam integer WordCountThreshold = WORDSPERLINE - 1; + localparam integer BLOCKBYTELEN = BLOCKLEN/8; localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); // temp - logic WordCountFlag; logic [`XLEN-1:0] FinalAMOWriteDataM, FinalWriteDataM; (* mark_debug = "true" *) logic [`XLEN-1:0] PreLsuBusHWDATA; - logic SelFlush; logic [`XLEN-1:0] ReadDataWordM; logic [`DCACHE_BLOCKLENINBITS-1:0] DCacheMemWriteData; @@ -360,12 +358,7 @@ module lsu logic [`XLEN-1:0] ReadDataWordMuxM; - logic [LOGWPL-1:0] WordCount, NextWordCount; - logic [`PA_BITS-1:0] BasePAdrMaskedM; - logic [OFFSETLEN-1:0] BasePAdrOffsetM; - logic CntEn, PreCntEn; - logic CntReset; logic [`PA_BITS-1:0] DCacheBusAdr; logic [`XLEN-1:0] ReadDataBlockSetsM [(`DCACHE_BLOCKLENINBITS/`XLEN)-1:0]; @@ -375,8 +368,6 @@ module lsu logic DCacheFetchLine; logic DCacheBusAck; - logic UnCachedLsuBusRead; - logic UnCachedLsuBusWrite; logic SelUncachedAdr; @@ -393,7 +384,6 @@ module lsu .DCacheCommittedM, .DCacheBusAdr, .ReadDataBlockSetsM, - .SelFlush, .DCacheMemWriteData, .DCacheFetchLine, .DCacheWriteLine, @@ -401,6 +391,8 @@ module lsu ); + + // sub word selection for read and writes and optional amo alu. mux2 #(`XLEN) UnCachedDataMux(.d0(ReadDataWordM), .d1(DCacheMemWriteData[`XLEN-1:0]), .s(SelUncachedAdr), @@ -429,15 +421,12 @@ module lsu .HWDATA(FinalWriteDataM)); - generate - if (`XLEN == 32) assign LsuBusSize = SelUncachedAdr ? LsuFunct3M : 3'b010; - else assign LsuBusSize = SelUncachedAdr ? LsuFunct3M : 3'b011; - endgenerate; // Bus Side logic // register the fetch data from the next level of memory. // This register should be necessary for timing. There is no register in the uncore or // ahblite controller between the memories and this cache. + logic [LOGWPL-1:0] WordCount; genvar index; generate @@ -452,93 +441,19 @@ module lsu assign LocalLsuBusAdr = SelUncachedAdr ? LsuPAdrM : DCacheBusAdr ; - assign LsuBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalLsuBusAdr; - assign PreLsuBusHWDATA = ReadDataBlockSetsM[WordCount]; - assign LsuBusHWDATA = SelUncachedAdr ? WriteDataM : PreLsuBusHWDATA; // *** why is this not FinalWriteDataM? which does not work. + generate + if (`XLEN == 32) assign LsuBusSize = SelUncachedAdr ? LsuFunct3M : 3'b010; + else assign LsuBusSize = SelUncachedAdr ? LsuFunct3M : 3'b011; + endgenerate; - - - assign WordCountFlag = (WordCount == WordCountThreshold[LOGWPL-1:0]); - assign CntEn = PreCntEn & LsuBusAck; - - flopenr #(LOGWPL) - WordCountReg(.clk(clk), - .reset(reset | CntReset), - .en(CntEn), - .d(NextWordCount), - .q(WordCount)); - - assign NextWordCount = WordCount + 1'b1; - - typedef enum {STATE_BUS_READY, - STATE_BUS_FETCH, - STATE_BUS_WRITE, - STATE_BUS_UNCACHED_WRITE, - STATE_BUS_UNCACHED_WRITE_DONE, - STATE_BUS_UNCACHED_READ, - STATE_BUS_UNCACHED_READ_DONE, - STATE_BUS_CPU_BUSY} busstatetype; - - (* mark_debug = "true" *) busstatetype BusCurrState, BusNextState; - - always_ff @(posedge clk) - if (reset) BusCurrState <= #1 STATE_BUS_READY; - else BusCurrState <= #1 BusNextState; - - always_comb begin - BusNextState = STATE_BUS_READY; - - case(BusCurrState) - STATE_BUS_READY: if(IgnoreRequest) BusNextState = STATE_BUS_READY; - else if(LsuRWM[0] & ~CacheableM) BusNextState = STATE_BUS_UNCACHED_WRITE; - else if(LsuRWM[1] & ~CacheableM) BusNextState = STATE_BUS_UNCACHED_READ; - else if(DCacheFetchLine) BusNextState = STATE_BUS_FETCH; - else if(DCacheWriteLine) BusNextState = STATE_BUS_WRITE; - STATE_BUS_UNCACHED_WRITE: if(LsuBusAck) BusNextState = STATE_BUS_UNCACHED_WRITE_DONE; - else BusNextState = STATE_BUS_UNCACHED_WRITE; - STATE_BUS_UNCACHED_READ: if(LsuBusAck) BusNextState = STATE_BUS_UNCACHED_READ_DONE; - else BusNextState = STATE_BUS_UNCACHED_READ; - STATE_BUS_UNCACHED_WRITE_DONE: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY; - else BusNextState = STATE_BUS_READY; - STATE_BUS_UNCACHED_READ_DONE: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY; - else BusNextState = STATE_BUS_READY; - STATE_BUS_CPU_BUSY: if(CPUBusy) BusNextState = STATE_BUS_CPU_BUSY; - else BusNextState = STATE_BUS_READY; - STATE_BUS_FETCH: if (WordCountFlag & LsuBusAck) BusNextState = STATE_BUS_READY; - else BusNextState = STATE_BUS_FETCH; - STATE_BUS_WRITE: if(WordCountFlag & LsuBusAck) BusNextState = STATE_BUS_READY; - else BusNextState = STATE_BUS_WRITE; - endcase - end - - - assign CntReset = BusCurrState == STATE_BUS_READY; - assign BusStall = (BusCurrState == STATE_BUS_READY & ~IgnoreRequest & ((~CacheableM & (|LsuRWM)) | DCacheFetchLine | DCacheWriteLine)) | - (BusCurrState == STATE_BUS_UNCACHED_WRITE) | - (BusCurrState == STATE_BUS_UNCACHED_READ) | - (BusCurrState == STATE_BUS_FETCH) | - (BusCurrState == STATE_BUS_WRITE); - assign PreCntEn = BusCurrState == STATE_BUS_FETCH | BusCurrState == STATE_BUS_WRITE; - assign UnCachedLsuBusWrite = (BusCurrState == STATE_BUS_READY & ~CacheableM & (LsuRWM[0])) | - (BusCurrState == STATE_BUS_UNCACHED_WRITE); - assign LsuBusWrite = UnCachedLsuBusWrite | (BusCurrState == STATE_BUS_WRITE); - - assign UnCachedLsuBusRead = (BusCurrState == STATE_BUS_READY & ~CacheableM & (|LsuRWM[1])) | - (BusCurrState == STATE_BUS_UNCACHED_READ); - assign LsuBusRead = UnCachedLsuBusRead | (BusCurrState == STATE_BUS_FETCH); - - assign DCacheBusAck = (BusCurrState == STATE_BUS_FETCH & WordCountFlag & LsuBusAck) | - (BusCurrState == STATE_BUS_WRITE & WordCountFlag & LsuBusAck); - assign BusCommittedM = BusCurrState != STATE_BUS_READY; - assign SelUncachedAdr = (BusCurrState == STATE_BUS_READY & (|LsuRWM & ~CacheableM)) | - (BusCurrState == STATE_BUS_UNCACHED_READ | - BusCurrState == STATE_BUS_UNCACHED_READ_DONE | - BusCurrState == STATE_BUS_UNCACHED_WRITE | - BusCurrState == STATE_BUS_UNCACHED_WRITE_DONE); + busfsm #(WordCountThreshold, LOGWPL) + busfsm(.clk, .reset, .IgnoreRequest, .LsuRWM, .DCacheFetchLine, .DCacheWriteLine, + .LsuBusAck, .CPUBusy, .CacheableM, .BusStall, .LsuBusWrite, .LsuBusRead, + .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount); endmodule From 90ccc94e5e686ed685a30e6cb92741eed816a025 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 29 Dec 2021 17:40:24 -0600 Subject: [PATCH 2/4] Moved lsu interlock fpm to separate module. --- wally-pipelined/regression/wave.do | 2 +- wally-pipelined/src/lsu/busfsm.sv | 2 +- wally-pipelined/src/lsu/interlockfsm.sv | 113 ++++++++++++++++++++++++ wally-pipelined/src/lsu/lsu.sv | 97 ++++---------------- 4 files changed, 131 insertions(+), 83 deletions(-) create mode 100644 wally-pipelined/src/lsu/interlockfsm.sv diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 663a7dea..e68c3239 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -205,7 +205,7 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED -add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/MEM_VIRTMEM/InterlockCurrState +add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/MEM_VIRTMEM/interlockfsm/InterlockCurrState add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/SelHPTW add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/InterlockStall add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/LSUStall diff --git a/wally-pipelined/src/lsu/busfsm.sv b/wally-pipelined/src/lsu/busfsm.sv index 8eb70a03..1f96aa65 100644 --- a/wally-pipelined/src/lsu/busfsm.sv +++ b/wally-pipelined/src/lsu/busfsm.sv @@ -1,7 +1,7 @@ /////////////////////////////////////////// // busfsm.sv // -// Written: Ross Thompson ross1728@gmail.com +// Written: Ross Thompson ross1728@gmail.com December 29, 2021 // Modified: // // Purpose: Load/Store Unit's interface to BUS diff --git a/wally-pipelined/src/lsu/interlockfsm.sv b/wally-pipelined/src/lsu/interlockfsm.sv new file mode 100644 index 00000000..6256a731 --- /dev/null +++ b/wally-pipelined/src/lsu/interlockfsm.sv @@ -0,0 +1,113 @@ +/////////////////////////////////////////// +// interlockfsm.sv +// +// Written: Ross Thompson ross1728@gmail.com December 29, 2021 +// Modified: +// +// Purpose: Allows the HPTW to take control of the dcache to walk page table and then replay the memory operation if +// there was on. +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module interlockfsm + (input logic clk, + input logic reset, + input logic AnyCPUReqM, + input logic ITLBMissF, + input logic ITLBWriteF, + input logic DTLBMissM, + input logic DTLBWriteM, + input logic ExceptionM, + input logic PendingInterruptM, + input logic DCacheStall, + + output logic InterlockStall, + output logic SelReplayCPURequest, + output logic SelHPTW, + output logic IgnoreRequest); + + + typedef enum {STATE_T0_READY, + STATE_T0_REPLAY, + STATE_T3_DTLB_MISS, + STATE_T4_ITLB_MISS, + STATE_T5_ITLB_MISS, + STATE_T7_DITLB_MISS} statetype; + + statetype InterlockCurrState, InterlockNextState; + + + always_ff @(posedge clk) + if (reset) InterlockCurrState <= #1 STATE_T0_READY; + else InterlockCurrState <= #1 InterlockNextState; + + always_comb begin + case(InterlockCurrState) + STATE_T0_READY: if(~ITLBMissF & DTLBMissM & AnyCPUReqM) InterlockNextState = STATE_T3_DTLB_MISS; + else if(ITLBMissF & ~DTLBMissM & ~AnyCPUReqM) InterlockNextState = STATE_T4_ITLB_MISS; + else if(ITLBMissF & ~DTLBMissM & AnyCPUReqM) InterlockNextState = STATE_T5_ITLB_MISS; + else if(ITLBMissF & DTLBMissM & AnyCPUReqM) InterlockNextState = STATE_T7_DITLB_MISS; + else InterlockNextState = STATE_T0_READY; + STATE_T0_REPLAY: if(DCacheStall) InterlockNextState = STATE_T0_REPLAY; + else InterlockNextState = STATE_T0_READY; + STATE_T3_DTLB_MISS: if(DTLBWriteM) InterlockNextState = STATE_T0_REPLAY; + else InterlockNextState = STATE_T3_DTLB_MISS; + STATE_T4_ITLB_MISS: if(ITLBWriteF) InterlockNextState = STATE_T0_READY; + else InterlockNextState = STATE_T4_ITLB_MISS; + STATE_T5_ITLB_MISS: if(ITLBWriteF) InterlockNextState = STATE_T0_REPLAY; + else InterlockNextState = STATE_T5_ITLB_MISS; + STATE_T7_DITLB_MISS: if(DTLBWriteM) InterlockNextState = STATE_T5_ITLB_MISS; + else InterlockNextState = STATE_T7_DITLB_MISS; + default: InterlockNextState = STATE_T0_READY; + endcase + end // always_comb + + // signal to CPU it needs to wait on HPTW. + /* -----\/----- EXCLUDED -----\/----- + // this code has a problem with imperas64mmu as it reads in an invalid uninitalized instruction. InterlockStall becomes x and it propagates + // everywhere. The case statement below implements the same logic but any x on the inputs will resolve to 0. + // Note this will cause a problem for post synthesis gate simulation. + assign InterlockStall = (InterlockCurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | + (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) | + (InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS); + + -----/\----- EXCLUDED -----/\----- */ + + always_comb begin + InterlockStall = 1'b0; + case(InterlockCurrState) + STATE_T0_READY: if(DTLBMissM | ITLBMissF) InterlockStall = 1'b1; + STATE_T3_DTLB_MISS: InterlockStall = 1'b1; + STATE_T4_ITLB_MISS: InterlockStall = 1'b1; + STATE_T5_ITLB_MISS: InterlockStall = 1'b1; + STATE_T7_DITLB_MISS: InterlockStall = 1'b1; + default: InterlockStall = 1'b0; + endcase + end + + + assign SelReplayCPURequest = (InterlockNextState == STATE_T0_REPLAY); + assign SelHPTW = (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) | + (InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS); + assign IgnoreRequest = (InterlockCurrState == STATE_T0_READY & (ITLBMissF | DTLBMissM | ExceptionM | PendingInterruptM)) | + ((InterlockCurrState == STATE_T0_REPLAY) + & (ExceptionM | PendingInterruptM)); + +endmodule diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index d2207be0..7a64aeb1 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -128,78 +128,13 @@ module lsu logic [2:0] HPTWSize; logic SelReplayCPURequest; - typedef enum {STATE_T0_READY, - STATE_T0_REPLAY, - STATE_T3_DTLB_MISS, - STATE_T4_ITLB_MISS, - STATE_T5_ITLB_MISS, - STATE_T7_DITLB_MISS} statetype; + assign AnyCPUReqM = (|MemRWM) | (|AtomicM); - statetype InterlockCurrState, InterlockNextState; - - assign AnyCPUReqM = (|MemRWM) | (|AtomicM); - - always_ff @(posedge clk) - if (reset) InterlockCurrState <= #1 STATE_T0_READY; - else InterlockCurrState <= #1 InterlockNextState; - - always_comb begin - case(InterlockCurrState) - STATE_T0_READY: if(~ITLBMissF & DTLBMissM & AnyCPUReqM) InterlockNextState = STATE_T3_DTLB_MISS; - else if(ITLBMissF & ~DTLBMissM & ~AnyCPUReqM) InterlockNextState = STATE_T4_ITLB_MISS; - else if(ITLBMissF & ~DTLBMissM & AnyCPUReqM) InterlockNextState = STATE_T5_ITLB_MISS; - else if(ITLBMissF & DTLBMissM & AnyCPUReqM) InterlockNextState = STATE_T7_DITLB_MISS; - else InterlockNextState = STATE_T0_READY; - STATE_T0_REPLAY: if(DCacheStall) InterlockNextState = STATE_T0_REPLAY; - else InterlockNextState = STATE_T0_READY; - STATE_T3_DTLB_MISS: if(DTLBWriteM) InterlockNextState = STATE_T0_REPLAY; - else InterlockNextState = STATE_T3_DTLB_MISS; - STATE_T4_ITLB_MISS: if(ITLBWriteF) InterlockNextState = STATE_T0_READY; - else InterlockNextState = STATE_T4_ITLB_MISS; - STATE_T5_ITLB_MISS: if(ITLBWriteF) InterlockNextState = STATE_T0_REPLAY; - else InterlockNextState = STATE_T5_ITLB_MISS; - STATE_T7_DITLB_MISS: if(DTLBWriteM) InterlockNextState = STATE_T5_ITLB_MISS; - else InterlockNextState = STATE_T7_DITLB_MISS; - default: InterlockNextState = STATE_T0_READY; - endcase - end // always_comb + interlockfsm interlockfsm (.clk, .reset, .AnyCPUReqM, .ITLBMissF, .ITLBWriteF, + .DTLBMissM, .DTLBWriteM, .ExceptionM, .PendingInterruptM, .DCacheStall, + .InterlockStall, .SelReplayCPURequest, .SelHPTW, + .IgnoreRequest); - // signal to CPU it needs to wait on HPTW. - /* -----\/----- EXCLUDED -----\/----- - // this code has a problem with imperas64mmu as it reads in an invalid uninitalized instruction. InterlockStall becomes x and it propagates - // everywhere. The case statement below implements the same logic but any x on the inputs will resolve to 0. - assign InterlockStall = (InterlockCurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | - (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) | - (InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS); - - -----/\----- EXCLUDED -----/\----- */ - - always_comb begin - InterlockStall = 1'b0; - case(InterlockCurrState) - STATE_T0_READY: if(DTLBMissM | ITLBMissF) InterlockStall = 1'b1; - STATE_T3_DTLB_MISS: InterlockStall = 1'b1; - STATE_T4_ITLB_MISS: InterlockStall = 1'b1; - STATE_T5_ITLB_MISS: InterlockStall = 1'b1; - STATE_T7_DITLB_MISS: InterlockStall = 1'b1; - default: InterlockStall = 1'b0; - endcase - end - - - // When replaying CPU memory request after PTW select the IEUAdrM for correct address. - assign SelReplayCPURequest = (InterlockNextState == STATE_T0_REPLAY); - assign SelHPTW = (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) | - (InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS); - assign IgnoreRequest = (InterlockCurrState == STATE_T0_READY & (ITLBMissF | DTLBMissM | ExceptionM | PendingInterruptM)) | - ((InterlockCurrState == STATE_T0_REPLAY) - & (ExceptionM | PendingInterruptM)); - - - - - // *** add generate to conditionally create hptw, lsuArb, and mmu - // based on `MEM_VIRTMEM hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM, .ITLBMissF(ITLBMissF & ~PendingInterruptM), .DTLBMissM(DTLBMissM & ~PendingInterruptM), @@ -216,25 +151,18 @@ module lsu mux2 #(12) adremux(IEUAdrE[11:0], HPTWAdr[11:0], SelHPTW, LsuAdrE); mux2 #(`PA_BITS) lsupadrmux(IEUAdrExtM[`PA_BITS-1:0], HPTWAdr, SelHPTW, PreLsuPAdrM); + // always block interrupts when using the hardware page table walker. assign CPUBusy = StallW & ~SelHPTW; - // always block interrupts when using the hardware page table walker. - - // this is for the d cache SRAM. - // turns out because we cannot pipeline hptw requests we don't need this register + // It is not possible to pipeline hptw as the following load will depend on the previous load's + // data. Therefore we don't need a pipeline register //flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle - - //assign PreLsuRWM = SelHPTW ? {HPTWRead, 1'b0} : MemRWM; - //assign LsuAdrE = SelHPTW ? HPTWAdr[11:0] : IEUAdrE[11:0]; - //assign LsuAtomicM = SelHPTW ? 2'b00 : AtomicM; - //assign PreLsuPAdrM = SelHPTW ? HPTWAdr : IEUAdrExtM[`PA_BITS-1:0]; - // Specify which type of page fault is occurring - // *** `MEM_VIRTMEM assign DTLBLoadPageFaultM = DTLBPageFaultM & PreLsuRWM[1]; assign DTLBStorePageFaultM = DTLBPageFaultM & PreLsuRWM[0]; + // When replaying CPU memory request after PTW select the IEUAdrM for correct address. assign DCacheAdrE = SelReplayCPURequest ? IEUAdrM[11:0] : LsuAdrE; end // if (`MEM_VIRTMEM) @@ -263,6 +191,13 @@ module lsu endgenerate // **** look into this confusing signal. + // This signal is confusing. CommittedM tells the CPU's trap unit the current instruction + // in the memory stage is a memory operaton and that memory operation is either completed + // or is partially executed. This signal is only low for the first cycle of a memory + // operation. + // **** I think there is also a bug here. Data cache misses and TLB misses both + // set this bit in the first cycle. It is not strickly wrong, but it may be better + // to flush the memory operation at that time. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; generate From a16b97cfb4f7518040ce3fde6cabebea1cbdaab6 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 29 Dec 2021 17:53:24 -0600 Subject: [PATCH 3/4] Added default to busfsm. --- wally-pipelined/regression/wave.do | 7 +++++-- wally-pipelined/src/lsu/busfsm.sv | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index e68c3239..2ed86bd7 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -472,8 +472,11 @@ add wave -noupdate -group {pc selection} /testbench/dut/hart/ifu/PrivilegedNextP add wave -noupdate -group {pc selection} /testbench/dut/hart/ifu/PrivilegedChangePCM add wave -noupdate /testbench/dut/hart/priv/priv/csr/MEPC_REGW add wave -noupdate /testbench/dut/hart/lsu/LocalLsuBusAdr +add wave -noupdate /testbench/dut/hart/lsu/busfsm/BusNextState +add wave -noupdate /testbench/dut/hart/lsu/busfsm/DCacheFetchLine +add wave -noupdate /testbench/dut/hart/lsu/busfsm/DCacheWriteLine TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 7} {36865 ns} 1} {{Cursor 5} {49445 ns} 1} {{Cursor 3} {35522 ns} 0} {{Cursor 4} {49574 ns} 1} +WaveRestoreCursors {{Cursor 7} {36865 ns} 1} {{Cursor 5} {49445 ns} 1} {{Cursor 3} {9745 ns} 0} {{Cursor 4} {49574 ns} 1} quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 314 @@ -489,4 +492,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {35088 ns} {35954 ns} +WaveRestoreZoom {9530 ns} {9952 ns} diff --git a/wally-pipelined/src/lsu/busfsm.sv b/wally-pipelined/src/lsu/busfsm.sv index 1f96aa65..56313364 100644 --- a/wally-pipelined/src/lsu/busfsm.sv +++ b/wally-pipelined/src/lsu/busfsm.sv @@ -92,6 +92,7 @@ module busfsm #(parameter integer WordCountThreshold, else if(LsuRWM[1] & ~CacheableM) BusNextState = STATE_BUS_UNCACHED_READ; else if(DCacheFetchLine) BusNextState = STATE_BUS_FETCH; else if(DCacheWriteLine) BusNextState = STATE_BUS_WRITE; + else BusNextState = STATE_BUS_READY; STATE_BUS_UNCACHED_WRITE: if(LsuBusAck) BusNextState = STATE_BUS_UNCACHED_WRITE_DONE; else BusNextState = STATE_BUS_UNCACHED_WRITE; STATE_BUS_UNCACHED_READ: if(LsuBusAck) BusNextState = STATE_BUS_UNCACHED_READ_DONE; @@ -106,6 +107,7 @@ module busfsm #(parameter integer WordCountThreshold, else BusNextState = STATE_BUS_FETCH; STATE_BUS_WRITE: if(WordCountFlag & LsuBusAck) BusNextState = STATE_BUS_READY; else BusNextState = STATE_BUS_WRITE; + default: BusNextState = STATE_BUS_READY; endcase end From 30562bcadaa0cf57c59489f834ea8d1a134b4937 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 30 Dec 2021 00:19:40 +0000 Subject: [PATCH 4/4] all FCVT imperas tests pass --- addins/riscv-arch-test | 2 +- wally-pipelined/src/fpu/fcvt.sv | 18 ++-- wally-pipelined/src/fpu/fma.sv | 170 +++++++++++++++----------------- 3 files changed, 90 insertions(+), 100 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b2..be67c99b 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/wally-pipelined/src/fpu/fcvt.sv b/wally-pipelined/src/fpu/fcvt.sv index 087de263..f48b3fd9 100644 --- a/wally-pipelined/src/fpu/fcvt.sv +++ b/wally-pipelined/src/fpu/fcvt.sv @@ -1,6 +1,7 @@ `include "wally-config.vh" // `include "../../config/rv64icfd/wally-config.vh" +// `define XLEN 64 module fcvt ( input logic XSgnE, // X's sign input logic [10:0] XExpE, // X's exponent @@ -59,7 +60,7 @@ module fcvt ( // fcvt.lu.d = 111 // fcvt.d.l = 100 // fcvt.d.lu = 110 - // {long, unsigned, to int, from int} + // {long, unsigned, to int} // calculate signals based off the input and output's size assign Res64 = (FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&~FOpCtrlE[0]); @@ -158,19 +159,24 @@ module fcvt ( // select the integer result assign CvtIntRes = Of ? FOpCtrlE[1] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : - Uf ? FOpCtrlE[1] ? 64'b0 : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} : + Uf ? FOpCtrlE[1] ? {63'b0, Plus1&~XSgnE} : SgnRes ? {32'b0, 1'b1, 31'b0} : {1'b1, 63'b0} : Rounded[64-1:0]; // select the floating point result assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]}; // select the result - assign CvtResE = ~FOpCtrlE[0] ? CvtFPRes : CvtIntRes; + assign CvtResE = FOpCtrlE[0] ? CvtIntRes : CvtFPRes; // calculate the flags - // - to int only sets the invalid flag - // - from int only sets the inexact flag - assign CvtFlgE = {(Of | Uf)&FOpCtrlE[0], 3'b0, (Guard|Round|Sticky)&~FOpCtrlE[0]}; + // - only set invalid flag for out-of-range vales if it isn't be indicated by the inexact + // - don't set inexact flag if converting a really large number (closest __ bit integer value is the max value) + // - don't set inexact flag if converting negitive or tiny number to unsigned (closest integer value is 0 or 1) + logic Invalid, Inexact; + assign Invalid = (Of | Uf)&FOpCtrlE[0]; + assign Inexact = (Guard|Round|Sticky)&~((&FOpCtrlE[1:0]&Uf&~(Plus1&~XSgnE))|(FOpCtrlE[0]&Of)); + assign CvtFlgE = {Invalid&~Inexact, 3'b0, Inexact}; + // assign CvtFlgE = {(Of | Uf)&FOpCtrlE[0], 3'b0, (Guard|Round|Sticky)&~FOpCtrlE[0]}; diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index a90848f5..32130ffe 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -28,7 +28,6 @@ // `define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8) // `define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23) // `define XLEN 64 -`define NANPAYLOAD 1 module fma( input logic clk, input logic reset, @@ -117,7 +116,6 @@ module fma1( logic [3*`NF+5:0] AlignedAddendE; // Z aligned for addition in U(NF+5.2NF+1) logic [3*`NF+6:0] AlignedAddendInv; // aligned addend possibly inverted logic [2*`NF+1:0] ProdManKilled; // the product's mantissa possibly killed - logic [3*`NF+4:0] NegProdManKilled; // a negated ProdManKilled logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum logic [`NE-1:0] XExpVal, YExpVal; // exponent value after taking into accound denormals /////////////////////////////////////////////////////////////////////////////// @@ -321,7 +319,6 @@ module add( output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum ); - logic [3*`NF+4:0] NegProdManKilled; // a negated ProdManKilled /////////////////////////////////////////////////////////////////////////////// // Addition /////////////////////////////////////////////////////////////////////////////// @@ -335,15 +332,13 @@ module add( assign AlignedAddendInv = InvZE ? {1'b1, ~AlignedAddendE} : {1'b0, AlignedAddendE}; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign ProdManKilled = ProdManE&{2*`NF+2{~KillProdE}}; - // Negate ProdMan for LZA and the negitive sum calculation - assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, ~ProdManKilled&{2*`NF+2{~(XZeroE|YZeroE|KillProdE)}}}; // Do the addition // - calculate a positive and negitive sum in parallel assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE}; - assign NegPreSum = AlignedAddendE + {NegProdManKilled, 2'b0} + {{(3*`NF+3){1'b0}},~(XZeroE|YZeroE|KillProdE),2'b0}; + assign NegPreSum = XZeroE|YZeroE|KillProdE ? {1'b0, AlignedAddendE} : {1'b0, AlignedAddendE} + {{`NF+3{1'b1}}, ~ProdManKilled, 2'b0} + {(3*`NF+7)'(4)}; // Is the sum negitive assign NegSumE = PreSum[3*`NF+6]; @@ -360,6 +355,8 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098 logic [3*`NF+6:0] T; logic [3*`NF+6:0] G; logic [3*`NF+6:0] Z; + logic [3*`NF+6:0] f; + assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; assign G[3*`NF+6:2*`NF+4] = 0; assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; @@ -375,7 +372,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098 // - note: the paper linked above uses the numbering system where 0 is the most significant bit //f[n] = ~T[n]&T[n-1] note: n is the MSB //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1])) - logic [3*`NF+6:0] f; assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5]; assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); @@ -440,11 +436,12 @@ module fma2( logic SumZero; // is the sum zero logic ResultDenorm; // is the result denormalized logic Sticky, UfSticky; // Sticky bit - logic Plus1, Minus1, CalcPlus1; // do you add or subtract one for rounding + logic CalcPlus1; // do you add or subtract one for rounding logic UfPlus1; // do you add one (for determining underflow flag) logic Invalid,Underflow,Overflow; // flags logic Guard, Round; // bits needed to determine rounding logic UfLSBNormSum; // bits needed to determine rounding for underflow flag + logic [`FLEN:0] RoundAdd; // how much to add to the result @@ -471,7 +468,7 @@ module fma2( // round to nearest max magnitude fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp, - .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfLSBNormSum); + .CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum); @@ -503,8 +500,8 @@ module fma2( /////////////////////////////////////////////////////////////////////////////// resultselect resultselect(.XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, - .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, - .ZSgnEffM, .PSgnM, .ResultSgn, .Minus1, .Plus1, .CalcPlus1, .Invalid, .Overflow, .Underflow, + .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, + .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM); // *** use NF where needed @@ -539,61 +536,6 @@ module resultsign( endmodule -module resultselect( - input logic XSgnM, YSgnM, // input signs - input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents - input logic [`NF:0] XManM, YManM, ZManM, // input mantissas - input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic FmtM, // precision 1 = double 0 = single - input logic AddendStickyM, // sticky bit that is calculated during alignment - input logic KillProdM, // set the product to zero before addition if the product is too small to matter - input logic XInfM, YInfM, ZInfM, // inputs are infinity - input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - input logic ZSgnEffM, // the modified Z sign - depends on instruction - input logic PSgnM, // the product's sign - input logic ResultSgn, // the result's sign - input logic Minus1, Plus1, CalcPlus1, // rounding bits - input logic Invalid, Overflow, Underflow, // flags - input logic ResultDenorm, // is the result denormalized - input logic [`NE-1:0] ResultExp, // Result exponent - input logic [`NF-1:0] ResultFrac, // Result fraction - output logic [`FLEN-1:0] FMAResM // FMA final result -); - logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results - - generate if(`NANPAYLOAD) begin - assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]}; - assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]}; - assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]}; - end else begin - assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, 22'b0}; - assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, 22'b0}; - assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, 22'b0}; - end - endgenerate - - - assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : - {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : - ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} : - {{32{1'b1}}, ResultSgn, 8'hff, 23'b0}; - assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - {62'b0, (Minus1&AddendStickyM)} + {62'b0, (Plus1&AddendStickyM)}} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; - assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; - assign FMAResM = XNaNM ? XNaNResult : - YNaNM ? YNaNResult : - ZNaNM ? ZNaNResult : - Invalid ? InvalidResult : - XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM, XExpM[7:0], XManM[51:29]} : - YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM, YExpM[7:0], YManM[51:29]} : - ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} : - KillProdM ? KillProdResult : - Overflow ? OverflowResult : - Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult : - FmtM ? {ResultSgn, ResultExp, ResultFrac} : - {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]}; - -endmodule module normalize( input logic [3*`NF+5:0] SumM, // the positive sum @@ -624,19 +566,6 @@ module normalize( // Normalization /////////////////////////////////////////////////////////////////////////////// - - // logic [8:0] supposedNormCnt; - // logic [8:0] i; - // always_comb begin - // i = 0; - // while (~SumM[3*`NF+5-i] && $unsigned(i) <= $unsigned(3*`NF+5)) i = i+1; // search for leading one - // supposedNormCnt = i; // compute shift count - // end - - // always_comb begin - // assert (NormCntM == supposedNormCnt | NormCntM == supposedNormCnt+1 | NormCntM == supposedNormCnt+2) else $fatal ("normcnt not expected"); - // end - // Determine if the sum is zero assign SumZero = ~(|SumM); @@ -644,18 +573,15 @@ module normalize( assign FracLen = FmtM ? `NF+1 : 13'd24; // calculate the sum's exponent - assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); // ****try moving this into previous stage - assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; // ***move this ^ the subtraction by a constant isn't simplified + assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); + assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL; assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; - assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd1))); + assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd2))); assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127); - assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd24+13'd1023-13'd127)) | ~|SumExpTmpTmp; - assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; //***make sure math good - // always_comb begin - // assert (PreResultDenorm == PreResultDenorm2) else $fatal ("PreResultDenorms not equal"); - // end + assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd25+13'd1023-13'd127)) | ~|SumExpTmpTmp; + assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; // 010. when should be 001. // - shift left one @@ -667,9 +593,9 @@ module normalize( // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation + assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; // Normalize the sum - assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified + assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift; // LZA correction assign LZAPlus1 = SumShifted[3*`NF+7]; assign LZAPlus2 = SumShifted[3*`NF+8]; @@ -699,18 +625,18 @@ module fmaround( input logic InvZM, // invert Z input logic [`NE+1:0] SumExp, // exponent of the normalized sum input logic ResultSgnTmp, // the result's sign - output logic CalcPlus1, Plus1, UfPlus1, Minus1, // do you add or subtract on from the result + output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow output logic [`NF-1:0] ResultFrac, // Result fraction output logic [`NE-1:0] ResultExp, // Result exponent output logic Sticky, // sticky bit + output logic [`FLEN:0] RoundAdd, // how much to add to the result output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding ); logic LSBNormSum; // bit used for rounding - least significant bit of the normalized sum logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number - logic UfGuard; // gaurd bit used to caluculate underflow - logic UfCalcPlus1, CalcMinus1; // do you add or subtract on from the result - logic [`FLEN:0] RoundAdd; // how much to add to the result + logic UfGuard; // guard bit used to caluculate underflow + logic UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result logic [`NF-1:0] NormSumTruncated; // the normalized sum trimed to fit the mantissa logic UfRound; @@ -857,4 +783,62 @@ module fmaflags( // - Don't set the underflow flag if the result was rounded up to a normal number assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact}; +endmodule + + +module resultselect( + input logic XSgnM, YSgnM, // input signs + input logic [`NE-1:0] XExpM, YExpM, ZExpM, // input exponents + input logic [`NF:0] XManM, YManM, ZManM, // input mantissas + input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic FmtM, // precision 1 = double 0 = single + input logic AddendStickyM, // sticky bit that is calculated during alignment + input logic KillProdM, // set the product to zero before addition if the product is too small to matter + input logic XInfM, YInfM, ZInfM, // inputs are infinity + input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN + input logic ZSgnEffM, // the modified Z sign - depends on instruction + input logic PSgnM, // the product's sign + input logic ResultSgn, // the result's sign + input logic CalcPlus1, // rounding bits + input logic [`FLEN:0] RoundAdd, // how much to add to the result + input logic Invalid, Overflow, Underflow, // flags + input logic ResultDenorm, // is the result denormalized + input logic [`NE-1:0] ResultExp, // Result exponent + input logic [`NF-1:0] ResultFrac, // Result fraction + output logic [`FLEN-1:0] FMAResM // FMA final result +); + logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results + + generate if(`IEEE754) begin + assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]}; + assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]}; + assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]}; + end else begin + assign XNaNResult = FmtM ? {1'b0, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, XExpM[7:0], 1'b1, 22'b0}; + assign YNaNResult = FmtM ? {1'b0, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, YExpM[7:0], 1'b1, 22'b0}; + assign ZNaNResult = FmtM ? {1'b0, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, ZExpM[7:0], 1'b1, 22'b0}; + end + endgenerate + + + assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : + {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : + ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} : + {{32{1'b1}}, ResultSgn, 8'hff, 23'b0}; + assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} + (RoundAdd[59:29]&{31{AddendStickyM}})}; + assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; + assign FMAResM = XNaNM ? XNaNResult : + YNaNM ? YNaNResult : + ZNaNM ? ZNaNResult : + Invalid ? InvalidResult : + XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM, XExpM[7:0], XManM[51:29]} : + YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM, YExpM[7:0], YManM[51:29]} : + ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} : + KillProdM ? KillProdResult : + Overflow ? OverflowResult : + Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult : + FmtM ? {ResultSgn, ResultExp, ResultFrac} : + {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]}; + endmodule \ No newline at end of file