From 596bc138bc8d5092e9ead654f37ac15ed21dcb10 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Wed, 25 Aug 2021 22:30:05 -0500 Subject: [PATCH] Forgot to include a few files in the last few commits. Also reorganized the dcache by read cpu path, write cpu path, and bus interface path. Changed i/o names on subwordread to match signals in dcache. --- ...{cacheLRU.sv => cachereplacementpolicy.sv} | 46 +- wally-pipelined/src/cache/dcache.sv | 133 ++- wally-pipelined/src/cache/dcachefsm.sv | 792 ++++++++++++++++++ wally-pipelined/src/lsu/subwordread.sv | 185 ++-- 4 files changed, 968 insertions(+), 188 deletions(-) rename wally-pipelined/src/cache/{cacheLRU.sv => cachereplacementpolicy.sv} (78%) create mode 100644 wally-pipelined/src/cache/dcachefsm.sv diff --git a/wally-pipelined/src/cache/cacheLRU.sv b/wally-pipelined/src/cache/cachereplacementpolicy.sv similarity index 78% rename from wally-pipelined/src/cache/cacheLRU.sv rename to wally-pipelined/src/cache/cachereplacementpolicy.sv index a7e7028ea..a0b37cec8 100644 --- a/wally-pipelined/src/cache/cacheLRU.sv +++ b/wally-pipelined/src/cache/cachereplacementpolicy.sv @@ -24,13 +24,13 @@ `include "wally-config.vh" -module cacheLRU +module cachereplacementpolicy #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES) (input logic clk, reset, - input logic [NUMWAYS-1:0] WayIn, + input logic [NUMWAYS-1:0] WayHit, output logic [NUMWAYS-1:0] VictimWay, input logic [INDEXLEN+OFFSETLEN-1:OFFSETLEN] MemPAdrM, - input logic [INDEXLEN-1:0] SRAMAdr, + input logic [INDEXLEN-1:0] RAdr, input logic LRUWriteEn ); @@ -48,7 +48,7 @@ module cacheLRU for(int index = 0; index < NUMLINES; index++) ReplacementBits[index] <= '0; end else begin - BlockReplacementBits <= ReplacementBits[SRAMAdr]; + BlockReplacementBits <= ReplacementBits[RAdr]; if (LRUWriteEn) begin ReplacementBits[MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]] <= NewReplacement; end @@ -62,7 +62,7 @@ module cacheLRU assign LRUEn[0] = 1'b0; - assign NewReplacement[0] = WayIn[1]; + assign NewReplacement[0] = WayHit[1]; assign VictimWay[1] = ~BlockReplacementBits[0]; assign VictimWay[0] = BlockReplacementBits[0]; @@ -71,13 +71,13 @@ module cacheLRU // selects assign LRUEn[2] = 1'b1; - assign LRUEn[1] = WayIn[3]; - assign LRUEn[0] = WayIn[3] | WayIn[2]; + assign LRUEn[1] = WayHit[3]; + assign LRUEn[0] = WayHit[3] | WayHit[2]; // mask - assign LRUMask[0] = WayIn[1]; - assign LRUMask[1] = WayIn[3]; - assign LRUMask[2] = WayIn[3] | WayIn[2]; + assign LRUMask[0] = WayHit[1]; + assign LRUMask[1] = WayHit[3]; + assign LRUMask[2] = WayHit[3] | WayHit[2]; for(index = 0; index < NUMWAYS-1; index++) assign NewReplacement[index] = LRUEn[index] ? LRUMask[index] : BlockReplacementBits[index]; @@ -93,21 +93,21 @@ module cacheLRU // selects assign LRUEn[6] = 1'b1; - assign LRUEn[5] = WayIn[7] | WayIn[6] | WayIn[5] | WayIn[4]; - assign LRUEn[4] = WayIn[7] | WayIn[6]; - assign LRUEn[3] = WayIn[5] | WayIn[4]; - assign LRUEn[2] = WayIn[3] | WayIn[2] | WayIn[1] | WayIn[0]; - assign LRUEn[1] = WayIn[3] | WayIn[2]; - assign LRUEn[0] = WayIn[1] | WayIn[0]; + assign LRUEn[5] = WayHit[7] | WayHit[6] | WayHit[5] | WayHit[4]; + assign LRUEn[4] = WayHit[7] | WayHit[6]; + assign LRUEn[3] = WayHit[5] | WayHit[4]; + assign LRUEn[2] = WayHit[3] | WayHit[2] | WayHit[1] | WayHit[0]; + assign LRUEn[1] = WayHit[3] | WayHit[2]; + assign LRUEn[0] = WayHit[1] | WayHit[0]; // mask - assign LRUMask[6] = WayIn[7] | WayIn[6] | WayIn[5] | WayIn[4]; - assign LRUMask[5] = WayIn[7] | WayIn[6]; - assign LRUMask[4] = WayIn[7]; - assign LRUMask[3] = WayIn[5]; - assign LRUMask[2] = WayIn[3] | WayIn[2]; - assign LRUMask[1] = WayIn[2]; - assign LRUMask[0] = WayIn[0]; + assign LRUMask[6] = WayHit[7] | WayHit[6] | WayHit[5] | WayHit[4]; + assign LRUMask[5] = WayHit[7] | WayHit[6]; + assign LRUMask[4] = WayHit[7]; + assign LRUMask[3] = WayHit[5]; + assign LRUMask[2] = WayHit[3] | WayHit[2]; + assign LRUMask[1] = WayHit[2]; + assign LRUMask[0] = WayHit[0]; for(index = 0; index < NUMWAYS-1; index++) assign NewReplacement[index] = LRUEn[index] ? LRUMask[index] : BlockReplacementBits[index]; diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index 228163632..d9620cf98 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -90,7 +90,7 @@ module dcache logic [1:0] SelAdrM; - logic [INDEXLEN-1:0] SRAMAdr; + logic [INDEXLEN-1:0] RAdr; logic [BLOCKLEN-1:0] SRAMWriteData; logic [BLOCKLEN-1:0] DCacheMemWriteData; logic SetValid, ClearValid; @@ -135,84 +135,65 @@ module dcache logic LRUWriteEn; - // data path + // Read Path CPU (IEU) side mux3 #(INDEXLEN) AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), - .d1(MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), - .d2(VAdr[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .d1(VAdr[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .d2(MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .s(SelAdrM), - .y(SRAMAdr)); + .y(RAdr)); - onehotdecoder #(LOGWPL) - adrdec(.bin(MemPAdrM[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), - .decoded(MemPAdrDecodedW)); - - - assign SRAMWordEnable = SRAMBlockWriteEnableM ? '1 : MemPAdrDecodedW; - - cacheway #(.NUMLINES(NUMLINES), .BLOCKLEN(BLOCKLEN), .TAGLEN(TAGLEN), .OFFSETLEN(OFFSETLEN), .INDEXLEN(INDEXLEN)) MemWay[NUMWAYS-1:0](.clk, - .reset, - .RAdr(SRAMAdr), - .MemPAdrM(MemPAdrM[`PA_BITS-1:0]), - .WriteEnable(SRAMWayWriteEnable), - .WriteWordEnable(SRAMWordEnable), - .TagWriteEnable(SRAMBlockWayWriteEnableM), - .WriteData(SRAMWriteData), - .SetValid, - .ClearValid, - .SetDirty, - .ClearDirty, - .SelEvict, - .VictimWay, - .ReadDataBlockWayMaskedM, - .WayHit, - .VictimDirtyWay, - .VictimTagWay); + .reset, + .RAdr, + .MemPAdrM(MemPAdrM[`PA_BITS-1:0]), + .WriteEnable(SRAMWayWriteEnable), + .WriteWordEnable(SRAMWordEnable), + .TagWriteEnable(SRAMBlockWayWriteEnableM), + .WriteData(SRAMWriteData), + .SetValid, + .ClearValid, + .SetDirty, + .ClearDirty, + .SelEvict, + .VictimWay, + .ReadDataBlockWayMaskedM, + .WayHit, + .VictimDirtyWay, + .VictimTagWay); generate if(NUMWAYS > 1) begin - cacheLRU #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES) - cacheLRU(.clk, .reset, - .WayIn(WayHit), - .VictimWay, - .MemPAdrM(MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), - .SRAMAdr, - .LRUWriteEn); + cachereplacementpolicy #(NUMWAYS, INDEXLEN, OFFSETLEN, NUMLINES) + cachereplacementpolicy(.clk, .reset, + .WayHit, + .VictimWay, + .MemPAdrM(MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .RAdr, + .LRUWriteEn); end else begin - assign VictimWay = 1'b1; + assign VictimWay = 1'b1; // one hot. end endgenerate - assign SRAMBlockWayWriteEnableM = SRAMBlockWriteEnableM ? VictimWay : '0; - - mux2 #(NUMWAYS) WriteEnableMux(.d0(SRAMWordWriteEnableM ? WayHit : '0), - .d1(SRAMBlockWayWriteEnableM), - .s(SRAMBlockWriteEnableM), - .y(SRAMWayWriteEnable)); + assign CacheHit = | WayHit; + assign VictimDirty = | VictimDirtyWay; - - - - assign CacheHit = |WayHit; // ReadDataBlockWayMaskedM is a 2d array of cache block len by number of ways. // Need to OR together each way in a bitwise manner. - // Final part of the AO Mux. + // Final part of the AO Mux. First is the AND in the cacheway. or_rows #(NUMWAYS, BLOCKLEN) ReadDataAOMux(.a(ReadDataBlockWayMaskedM), .y(ReadDataBlockM)); or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); - assign VictimDirty = | VictimDirtyWay; - // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can // easily build a variable input mux. // *** consider using a limited range shift to do this final muxing. genvar index; - generate for (index = 0; index < WORDSPERLINE; index++) begin assign ReadDataBlockSetsM[index] = ReadDataBlockM[((index+1)*`XLEN)-1: (index*`XLEN)]; @@ -222,20 +203,31 @@ module dcache // variable input mux assign ReadDataWordM = ReadDataBlockSetsM[MemPAdrM[$clog2(WORDSPERLINE+`XLEN/8) : $clog2(`XLEN/8)]]; - - assign HWDATA = CacheableM ? ReadDataBlockSetsM[FetchCount] : WriteDataM; - mux2 #(`XLEN) UnCachedDataMux(.d0(ReadDataWordM), .d1(DCacheMemWriteData[`XLEN-1:0]), .s(SelUncached), .y(ReadDataWordMuxM)); // finally swr - // *** BUG fix HSIZED? why was it this way? - subwordread subwordread(.HRDATA(ReadDataWordMuxM), - .HADDRD(MemPAdrM[2:0]), - .HSIZED({Funct3M[2], 1'b0, Funct3M[1:0]}), - .HRDATAMasked(ReadDataM)); + subwordread subwordread(.ReadDataWordMuxM, + .MemPAdrM(MemPAdrM[2:0]), + .Funct3M, + .ReadDataM); + + // Write Path CPU (IEU) side + + onehotdecoder #(LOGWPL) + adrdec(.bin(MemPAdrM[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), + .decoded(MemPAdrDecodedW)); + + assign SRAMWordEnable = SRAMBlockWriteEnableM ? '1 : MemPAdrDecodedW; + + assign SRAMBlockWayWriteEnableM = SRAMBlockWriteEnableM ? VictimWay : '0; + + mux2 #(NUMWAYS) WriteEnableMux(.d0(SRAMWordWriteEnableM ? WayHit : '0), + .d1(SRAMBlockWayWriteEnableM), + .s(SRAMBlockWriteEnableM), + .y(SRAMWayWriteEnable)); generate if (`A_SUPPORTED) begin @@ -247,8 +239,6 @@ module dcache assign FinalAMOWriteDataM = WriteDataM; endgenerate - - // write path subwordwrite subwordwrite(.HRDATA(ReadDataWordM), .HADDRD(MemPAdrM[2:0]), .HSIZED({Funct3M[2], 1'b0, Funct3M[1:0]}), @@ -256,7 +246,15 @@ module dcache .HWDATA(FinalWriteDataM)); + mux2 #(BLOCKLEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteDataM}}), + .d1(DCacheMemWriteData), + .s(SRAMBlockWriteEnableM), + .y(SRAMWriteData)); + + // Bus Side logic // register the fetch data from the next level of memory. + // This register should be necessary for timing. There is no register in the uncore or + // ahblite controller between the memories and this cache. generate for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer flopen #(`XLEN) fb(.clk(clk), @@ -278,13 +276,7 @@ module dcache assign AHBPAdr = ({{`PA_BITS-LOGWPL{1'b0}}, FetchCount} << $clog2(`XLEN/8)) + BasePAdrMaskedM; - - - mux2 #(BLOCKLEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteDataM}}), - .d1(DCacheMemWriteData), - .s(SRAMBlockWriteEnableM), - .y(SRAMWriteData)); - + assign HWDATA = CacheableM ? ReadDataBlockSetsM[FetchCount] : WriteDataM; localparam FetchCountThreshold = WORDSPERLINE - 1; @@ -302,9 +294,8 @@ module dcache assign SRAMWriteEnable = SRAMBlockWriteEnableM | SRAMWordWriteEnableM; + // controller - // control path *** eventually move to own module. - dcachefsm dcachefsm(.clk, .reset, .MemRWM, diff --git a/wally-pipelined/src/cache/dcachefsm.sv b/wally-pipelined/src/cache/dcachefsm.sv new file mode 100644 index 000000000..8a0b9d148 --- /dev/null +++ b/wally-pipelined/src/cache/dcachefsm.sv @@ -0,0 +1,792 @@ +/////////////////////////////////////////// +// dcache (data cache) fsm +// +// Written: ross1728@gmail.com August 25, 2021 +// Implements the L1 data cache fsm +// +// Purpose: Controller for the dcache fsm +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +`include "wally-config.vh" + +module dcachefsm + (input logic clk, + input logic reset, + // inputs from IEU + input logic [1:0] MemRWM, + input logic [1:0] AtomicM, + + // hazard inputs + input logic ExceptionM, + input logic PendingInterruptM, + input logic StallWtoDCache, + // mmu inputs + input logic DTLBMissM, + input logic ITLBMissF, + input logic CacheableM, + input logic DTLBWriteM, + input logic ITLBWriteF, + input logic WalkerInstrPageFaultF, + // hptw inputs + input logic SelPTW, + input logic WalkerPageFaultM, + // Bus inputs + input logic AHBAck, // from ahb + // dcache internals + input logic CacheHit, + input logic FetchCountFlag, + input logic VictimDirty, + + // hazard outputs + output logic DCacheStall, + output logic CommittedM, + // counter outputs + output logic DCacheMiss, + output logic DCacheAccess, + // hptw outputs + output logic MemAfterIWalkDone, + // Bus outputs + output logic AHBRead, + output logic AHBWrite, + + // dcache internals + output logic [1:0] SelAdrM, + output logic CntEn, + output logic SetValid, + output logic ClearValid, + output logic SetDirty, + output logic ClearDirty, + output logic SRAMWordWriteEnableM, + output logic SRAMBlockWriteEnableM, + output logic CntReset, + output logic SelUncached, + output logic SelEvict, + output logic LRUWriteEn + ); + + logic PreCntEn; + logic AnyCPUReqM; + + typedef enum {STATE_READY, + + STATE_MISS_FETCH_WDV, + STATE_MISS_FETCH_DONE, + STATE_MISS_EVICT_DIRTY, + STATE_MISS_WRITE_CACHE_BLOCK, + STATE_MISS_READ_WORD, + STATE_MISS_READ_WORD_DELAY, + STATE_MISS_WRITE_WORD, + + STATE_PTW_READY, + STATE_PTW_READ_MISS_FETCH_WDV, + STATE_PTW_READ_MISS_FETCH_DONE, + STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK, + STATE_PTW_READ_MISS_EVICT_DIRTY, + STATE_PTW_READ_MISS_READ_WORD, + STATE_PTW_READ_MISS_READ_WORD_DELAY, + STATE_PTW_ACCESS_AFTER_WALK, + + STATE_UNCACHED_WRITE, + STATE_UNCACHED_WRITE_DONE, + STATE_UNCACHED_READ, + STATE_UNCACHED_READ_DONE, + + STATE_PTW_FAULT_READY, + STATE_PTW_FAULT_CPU_BUSY, + STATE_PTW_FAULT_MISS_FETCH_WDV, + STATE_PTW_FAULT_MISS_FETCH_DONE, + STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK, + STATE_PTW_FAULT_MISS_READ_WORD, + STATE_PTW_FAULT_MISS_READ_WORD_DELAY, + STATE_PTW_FAULT_MISS_WRITE_WORD, + STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY, + STATE_PTW_FAULT_MISS_EVICT_DIRTY, + + STATE_PTW_FAULT_UNCACHED_WRITE, + STATE_PTW_FAULT_UNCACHED_WRITE_DONE, + STATE_PTW_FAULT_UNCACHED_READ, + STATE_PTW_FAULT_UNCACHED_READ_DONE, + + STATE_CPU_BUSY, + STATE_CPU_BUSY_FINISH_AMO} statetype; + + statetype CurrState, NextState; + + assign AnyCPUReqM = |MemRWM | (|AtomicM); + assign CntEn = PreCntEn & AHBAck; + + + always_ff @(posedge clk, posedge reset) + if (reset) CurrState <= #1 STATE_READY; + else CurrState <= #1 NextState; + + + // next state logic and some state ouputs. + always_comb begin + DCacheStall = 1'b0; + SelAdrM = 2'b00; + PreCntEn = 1'b0; + SetValid = 1'b0; + ClearValid = 1'b0; + SetDirty = 1'b0; + ClearDirty = 1'b0; + SRAMWordWriteEnableM = 1'b0; + SRAMBlockWriteEnableM = 1'b0; + CntReset = 1'b0; + AHBRead = 1'b0; + AHBWrite = 1'b0; + CommittedM = 1'b0; + SelUncached = 1'b0; + SelEvict = 1'b0; + DCacheAccess = 1'b0; + DCacheMiss = 1'b0; + LRUWriteEn = 1'b0; + MemAfterIWalkDone = 1'b0; + + case (CurrState) + STATE_READY: begin + // TLB Miss + if((AnyCPUReqM & DTLBMissM) | ITLBMissF) begin + // the LSU arbiter has not yet selected the PTW. + // The CPU needs to be stalled until that happens. + // If we set DCacheStall for 1 cycle before going to + // PTW ready the CPU will stall. + // The page table walker asserts it's control 1 cycle + // after the TLBs miss. + CommittedM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_PTW_READY; + end + // amo hit + else if(AtomicM[1] & (&MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + SelAdrM = 2'b10; + DCacheStall = 1'b0; + + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY_FINISH_AMO; + SelAdrM = 2'b10; + end + else begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end + end + // read hit valid cached + else if(MemRWM[1] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + DCacheStall = 1'b0; + DCacheAccess = 1'b1; + LRUWriteEn = 1'b1; + + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end + end + // write hit valid cached + else if (MemRWM[0] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + SelAdrM = 2'b10; + DCacheStall = 1'b0; + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end + end + // read or write miss valid cached + else if((|MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & ~CacheHit & ~DTLBMissM) begin + NextState = STATE_MISS_FETCH_WDV; + CntReset = 1'b1; + DCacheStall = 1'b1; + DCacheAccess = 1'b1; + DCacheMiss = 1'b1; + end + // uncached write + else if(MemRWM[0] & ~CacheableM & ~(ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + NextState = STATE_UNCACHED_WRITE; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBWrite = 1'b1; + end + // uncached read + else if(MemRWM[1] & ~CacheableM & ~(ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + NextState = STATE_UNCACHED_READ; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBRead = 1'b1; + end + // fault + else if(AnyCPUReqM & (ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + NextState = STATE_READY; + end + else NextState = STATE_READY; + end + + STATE_MISS_FETCH_WDV: begin + DCacheStall = 1'b1; + PreCntEn = 1'b1; + AHBRead = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + + if (FetchCountFlag & AHBAck) begin + NextState = STATE_MISS_FETCH_DONE; + end else begin + NextState = STATE_MISS_FETCH_WDV; + end + end + + STATE_MISS_FETCH_DONE: begin + DCacheStall = 1'b1; + SelAdrM = 2'b10; + CntReset = 1'b1; + CommittedM = 1'b1; + if(VictimDirty) begin + NextState = STATE_MISS_EVICT_DIRTY; + end else begin + NextState = STATE_MISS_WRITE_CACHE_BLOCK; + end + end + + STATE_MISS_WRITE_CACHE_BLOCK: begin + SRAMBlockWriteEnableM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_MISS_READ_WORD; + SelAdrM = 2'b10; + SetValid = 1'b1; + ClearDirty = 1'b1; + CommittedM = 1'b1; + //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write + end + + STATE_MISS_READ_WORD: begin + SelAdrM = 2'b10; + DCacheStall = 1'b1; + CommittedM = 1'b1; + if (MemRWM[0]) begin // handles stores and amo write. + NextState = STATE_MISS_WRITE_WORD; + end else begin + NextState = STATE_MISS_READ_WORD_DELAY; + // delay state is required as the read signal MemRWM[1] is still high when we + // return to the ready state because the cache is stalling the cpu. + end + end + + STATE_MISS_READ_WORD_DELAY: begin + //SelAdrM = 2'b10; + CommittedM = 1'b1; + if(&MemRWM & AtomicM[1]) begin // amo write + SelAdrM = 2'b10; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY_FINISH_AMO; + end + else begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end + end else begin + LRUWriteEn = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end + end + end + + STATE_MISS_WRITE_WORD: begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + LRUWriteEn = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end + end + + STATE_MISS_EVICT_DIRTY: begin + DCacheStall = 1'b1; + PreCntEn = 1'b1; + AHBWrite = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + SelEvict = 1'b1; + if(FetchCountFlag & AHBAck) begin + NextState = STATE_MISS_WRITE_CACHE_BLOCK; + end else begin + NextState = STATE_MISS_EVICT_DIRTY; + end + end + + STATE_PTW_READY: begin + // now all output connect to PTW instead of CPU. + CommittedM = 1'b1; + + // In this branch we remove stall and go back to ready. There is no request for memory from the + // datapath or the walker had a fault. + // types 3b, 4a, 4b, and 7c. + if ((DTLBMissM & WalkerPageFaultM) | // 3b + (ITLBMissF & (WalkerInstrPageFaultF | ITLBWriteF) & ~AnyCPUReqM & ~DTLBMissM) | // 4a and 4b + (DTLBMissM & ITLBMissF & WalkerPageFaultM)) begin // 7c + NextState = STATE_READY; + DCacheStall = 1'b0; + end + // in this branch we go back to ready, but there is a memory operation from + // the datapath so we MUST stall and replay the operation. + // types 3a and 5a + else if ((DTLBMissM & DTLBWriteM) | // 3a + (ITLBMissF & ITLBWriteF & AnyCPUReqM)) begin // 5a + NextState = STATE_READY; + DCacheStall = 1'b1; + SelAdrM = 2'b01; + end + + // like 5a we want to stall and go to the ready state, but we also have to save + // the WalkerInstrPageFaultF so it is held until the end of the memory operation + // from the datapath. + // types 5b + else if (ITLBMissF & WalkerInstrPageFaultF & AnyCPUReqM) begin // 5b + NextState = STATE_PTW_FAULT_READY; + DCacheStall = 1'b1; + SelAdrM = 2'b01; + end + + // in this branch we stay in ptw_ready because we are doing an itlb walk + // after a dtlb walk. + // types 7a and 7b. + else if (DTLBMissM & DTLBWriteM & ITLBMissF)begin + NextState = STATE_PTW_READY; + DCacheStall = 1'b0; + + // read hit valid cached + end else if(MemRWM[1] & CacheableM & ~ExceptionM & CacheHit) begin + NextState = STATE_PTW_READY; + DCacheStall = 1'b0; + LRUWriteEn = 1'b1; + end + + // read miss valid cached + else if(SelPTW & MemRWM[1] & CacheableM & ~ExceptionM & ~CacheHit) begin + NextState = STATE_PTW_READ_MISS_FETCH_WDV; + CntReset = 1'b1; + DCacheStall = 1'b1; + end + + else begin + NextState = STATE_PTW_READY; + DCacheStall = 1'b0; + end + end + + STATE_PTW_READ_MISS_FETCH_WDV: begin + DCacheStall = 1'b1; + PreCntEn = 1'b1; + AHBRead = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + + if(FetchCountFlag & AHBAck) begin + NextState = STATE_PTW_READ_MISS_FETCH_DONE; + end else begin + NextState = STATE_PTW_READ_MISS_FETCH_WDV; + end + end + + STATE_PTW_READ_MISS_FETCH_DONE: begin + DCacheStall = 1'b1; + SelAdrM = 2'b10; + CntReset = 1'b1; + CommittedM = 1'b1; + CntReset = 1'b1; + if(VictimDirty) begin + NextState = STATE_PTW_READ_MISS_EVICT_DIRTY; + end else begin + NextState = STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK; + end + end + + STATE_PTW_READ_MISS_EVICT_DIRTY: begin + DCacheStall = 1'b1; + PreCntEn = 1'b1; + AHBWrite = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + SelEvict = 1'b1; + if(FetchCountFlag & AHBAck) begin + NextState = STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK; + end else begin + NextState = STATE_PTW_READ_MISS_EVICT_DIRTY; + end + end + + + STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK: begin + SRAMBlockWriteEnableM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_PTW_READ_MISS_READ_WORD; + SelAdrM = 2'b10; + SetValid = 1'b1; + ClearDirty = 1'b1; + CommittedM = 1'b1; + //LRUWriteEn = 1'b1; + end + + STATE_PTW_READ_MISS_READ_WORD: begin + SelAdrM = 2'b10; + DCacheStall = 1'b1; + CommittedM = 1'b1; + NextState = STATE_PTW_READ_MISS_READ_WORD_DELAY; + end + + STATE_PTW_READ_MISS_READ_WORD_DELAY: begin + SelAdrM = 2'b10; + NextState = STATE_PTW_READY; + CommittedM = 1'b1; + end + + STATE_PTW_ACCESS_AFTER_WALK: begin + DCacheStall = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end + + STATE_CPU_BUSY: begin + CommittedM = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end + end + + STATE_CPU_BUSY_FINISH_AMO: begin + CommittedM = 1'b1; + SelAdrM = 2'b10; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY_FINISH_AMO; + end + else begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_READY; + end + end + + STATE_UNCACHED_WRITE : begin + DCacheStall = 1'b1; + AHBWrite = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_UNCACHED_WRITE_DONE; + end else begin + NextState = STATE_UNCACHED_WRITE; + end + end + + STATE_UNCACHED_READ : begin + DCacheStall = 1'b1; + AHBRead = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_UNCACHED_READ_DONE; + end else begin + NextState = STATE_UNCACHED_READ; + end + end + + STATE_UNCACHED_WRITE_DONE: begin + CommittedM = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end + end + + STATE_UNCACHED_READ_DONE: begin + CommittedM = 1'b1; + SelUncached = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + NextState = STATE_READY; + end + end + + + // itlb => instruction page fault states with memory request. + STATE_PTW_FAULT_READY: begin + // read hit valid cached + if(MemRWM[1] & CacheableM & CacheHit & ~DTLBMissM) begin + DCacheStall = 1'b0; + DCacheAccess = 1'b1; + LRUWriteEn = 1'b1; + + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + + // write hit valid cached + else if (MemRWM[0] & CacheableM & CacheHit & ~DTLBMissM) begin + SelAdrM = 2'b10; + DCacheStall = 1'b0; + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + LRUWriteEn = 1'b1; + + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + // read or write miss valid cached + else if((|MemRWM) & CacheableM & ~CacheHit & ~DTLBMissM) begin + NextState = STATE_PTW_FAULT_MISS_FETCH_WDV; + CntReset = 1'b1; + DCacheStall = 1'b1; + DCacheAccess = 1'b1; + DCacheMiss = 1'b1; + end + // uncached write + else if(MemRWM[0] & ~CacheableM & ~DTLBMissM) begin + NextState = STATE_PTW_FAULT_UNCACHED_WRITE; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBWrite = 1'b1; + end + // uncached read + else if(MemRWM[1] & ~CacheableM & ~DTLBMissM) begin + NextState = STATE_PTW_FAULT_UNCACHED_READ; + CntReset = 1'b1; + DCacheStall = 1'b1; + AHBRead = 1'b1; + end + // fault + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + + STATE_PTW_FAULT_CPU_BUSY: begin + CommittedM = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + + STATE_PTW_FAULT_MISS_FETCH_WDV: begin + DCacheStall = 1'b1; + PreCntEn = 1'b1; + AHBRead = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + + if(FetchCountFlag & AHBAck) begin + NextState = STATE_PTW_FAULT_MISS_FETCH_DONE; + end else begin + NextState = STATE_PTW_FAULT_MISS_FETCH_WDV; + end + end + + STATE_PTW_FAULT_MISS_FETCH_DONE: begin + DCacheStall = 1'b1; + SelAdrM = 2'b10; + CntReset = 1'b1; + CommittedM = 1'b1; + if(VictimDirty) begin + NextState = STATE_PTW_FAULT_MISS_EVICT_DIRTY; + end else begin + NextState = STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK; + end + end + + STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK: begin + SRAMBlockWriteEnableM = 1'b1; + DCacheStall = 1'b1; + NextState = STATE_PTW_FAULT_MISS_READ_WORD; + SelAdrM = 2'b10; + SetValid = 1'b1; + ClearDirty = 1'b1; + CommittedM = 1'b1; + //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write + end + + STATE_PTW_FAULT_MISS_READ_WORD: begin + SelAdrM = 2'b10; + DCacheStall = 1'b1; + CommittedM = 1'b1; + if(MemRWM[1]) begin + NextState = STATE_PTW_FAULT_MISS_READ_WORD_DELAY; + // delay state is required as the read signal MemRWM[1] is still high when we + // return to the ready state because the cache is stalling the cpu. + end else begin + NextState = STATE_PTW_FAULT_MISS_WRITE_WORD; + end + end + + STATE_PTW_FAULT_MISS_READ_WORD_DELAY: begin + CommittedM = 1'b1; + LRUWriteEn = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + + STATE_PTW_FAULT_MISS_WRITE_WORD: begin + SRAMWordWriteEnableM = 1'b1; + SetDirty = 1'b1; + SelAdrM = 2'b10; + DCacheStall = 1'b1; + CommittedM = 1'b1; + LRUWriteEn = 1'b1; + NextState = STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY; + end + + STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY: begin + CommittedM = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + + STATE_PTW_FAULT_MISS_EVICT_DIRTY: begin + DCacheStall = 1'b1; + PreCntEn = 1'b1; + AHBWrite = 1'b1; + SelAdrM = 2'b10; + CommittedM = 1'b1; + SelEvict = 1'b1; + if(FetchCountFlag & AHBAck) begin + NextState = STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK; + end else begin + NextState = STATE_PTW_FAULT_MISS_EVICT_DIRTY; + end + end + + + STATE_PTW_FAULT_UNCACHED_WRITE : begin + DCacheStall = 1'b1; + AHBWrite = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_PTW_FAULT_UNCACHED_WRITE_DONE; + end else begin + NextState = STATE_PTW_FAULT_UNCACHED_WRITE; + end + end + + STATE_PTW_FAULT_UNCACHED_READ : begin + DCacheStall = 1'b1; + AHBRead = 1'b1; + CommittedM = 1'b1; + if(AHBAck) begin + NextState = STATE_PTW_FAULT_UNCACHED_READ_DONE; + end else begin + NextState = STATE_PTW_FAULT_UNCACHED_READ; + end + end + + STATE_PTW_FAULT_UNCACHED_WRITE_DONE: begin + CommittedM = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + + STATE_PTW_FAULT_UNCACHED_READ_DONE: begin + CommittedM = 1'b1; + SelUncached = 1'b1; + if(StallWtoDCache) begin + NextState = STATE_PTW_FAULT_CPU_BUSY; + SelAdrM = 2'b10; + end + else begin + MemAfterIWalkDone = 1'b1; + NextState = STATE_READY; + end + end + + default: begin + end + endcase + end + + + +endmodule // dcachefsm + diff --git a/wally-pipelined/src/lsu/subwordread.sv b/wally-pipelined/src/lsu/subwordread.sv index 892cb2c0e..50a205bfb 100644 --- a/wally-pipelined/src/lsu/subwordread.sv +++ b/wally-pipelined/src/lsu/subwordread.sv @@ -26,13 +26,10 @@ `include "wally-config.vh" module subwordread ( - // from AHB Interface - input logic [`XLEN-1:0] HRDATA, - input logic [2:0] HADDRD, - //input logic UnsignedLoadM, - input logic [3:0] HSIZED, - // to ifu/dmems - output logic [`XLEN-1:0] HRDATAMasked + input logic [`XLEN-1:0] ReadDataWordMuxM, + input logic [2:0] MemPAdrM, + input logic [2:0] Funct3M, + output logic [`XLEN-1:0] ReadDataM ); logic [7:0] ByteM; @@ -40,8 +37,8 @@ module subwordread ( logic [`XLEN-1:0] offset0, offset1, offset2, offset3; - // HSIZED[3] is the unsigned bit. mask upper bits. - // HSIZED[1:0] is the size of the memory access. + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. generate if (`XLEN == 64) begin @@ -50,52 +47,52 @@ module subwordread ( logic [`XLEN-1:0] offset4, offset5, offset6, offset7; always_comb - case(HSIZED[1:0]) - 3: offset0 = HRDATA; //ld - 2: offset0 = HSIZED[3] ? {{32'b0}, HRDATA[31:0]} : {{32{HRDATA[31]}}, HRDATA[31:0]}; //lw(u) - 1: offset0 = HSIZED[3] ? {{48'b0}, HRDATA[15:0]} : {{48{HRDATA[15]}}, HRDATA[15:0]}; //lh(u) - 0: offset0 = HSIZED[3] ? {{56'b0}, HRDATA[7:0]} : {{56{HRDATA[7]}}, HRDATA[7:0]}; //lb(u) + case(Funct3M[1:0]) + 3: offset0 = ReadDataWordMuxM; //ld + 2: offset0 = Funct3M[2] ? {{32'b0}, ReadDataWordMuxM[31:0]} : {{32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; //lw(u) + 1: offset0 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[15:0]} : {{48{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:0]}; //lh(u) + 0: offset0 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[7:0]} : {{56{ReadDataWordMuxM[7]}}, ReadDataWordMuxM[7:0]}; //lb(u) endcase - assign offset1 = HSIZED[3] ? {{56'b0}, HRDATA[15:8]} : {{56{HRDATA[15]}}, HRDATA[15:8]}; //lb(u) + assign offset1 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[15:8]} : {{56{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:8]}; //lb(u) always_comb - case(HSIZED[0]) - 1: offset2 = HSIZED[3] ? {{48'b0}, HRDATA[31:16]} : {{48{HRDATA[31]}}, HRDATA[31:16]};//lh(u) - 0: offset2 = HSIZED[3] ? {{56'b0}, HRDATA[23:16]} : {{56{HRDATA[23]}}, HRDATA[23:16]};//lb(u) + case(Funct3M[0]) + 1: offset2 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[31:16]} : {{48{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:16]};//lh(u) + 0: offset2 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[23:16]} : {{56{ReadDataWordMuxM[23]}}, ReadDataWordMuxM[23:16]};//lb(u) endcase - assign offset3 = HSIZED[3] ? {{56'b0}, HRDATA[31:24]} : {{56{HRDATA[31]}}, HRDATA[31:24]};//lb(u) + assign offset3 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[31:24]} : {{56{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:24]};//lb(u) always_comb - case(HSIZED[1:0]) - 3: offset4 = HSIZED[3] ? {{32'b0}, HRDATA[63:32]} : {{32{HRDATA[63]}}, HRDATA[63:32]};//ld(u) // unaligned will cause fault. - 2: offset4 = HSIZED[3] ? {{32'b0}, HRDATA[63:32]} : {{32{HRDATA[63]}}, HRDATA[63:32]};//lw(u) - 1: offset4 = HSIZED[3] ? {{48'b0}, HRDATA[47:32]} : {{48{HRDATA[47]}}, HRDATA[47:32]};//lh(u) - 0: offset4 = HSIZED[3] ? {{56'b0}, HRDATA[39:32]} : {{56{HRDATA[39]}}, HRDATA[39:32]};//lb(u) + case(Funct3M[1:0]) + 3: offset4 = Funct3M[2] ? {{32'b0}, ReadDataWordMuxM[63:32]} : {{32{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:32]};//ld(u) // unaligned will cause fault. + 2: offset4 = Funct3M[2] ? {{32'b0}, ReadDataWordMuxM[63:32]} : {{32{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:32]};//lw(u) + 1: offset4 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[47:32]} : {{48{ReadDataWordMuxM[47]}}, ReadDataWordMuxM[47:32]};//lh(u) + 0: offset4 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[39:32]} : {{56{ReadDataWordMuxM[39]}}, ReadDataWordMuxM[39:32]};//lb(u) endcase - assign offset5 = HSIZED[3] ? {{56'b0}, HRDATA[47:40]} : {{56{HRDATA[47]}}, HRDATA[47:40]};//lb(u) + assign offset5 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[47:40]} : {{56{ReadDataWordMuxM[47]}}, ReadDataWordMuxM[47:40]};//lb(u) always_comb - case(HSIZED[0]) - 1: offset6 = HSIZED[3] ? {{48'b0}, HRDATA[63:48]} : {{48{HRDATA[63]}}, HRDATA[63:48]};//lh(u) - 0: offset6 = HSIZED[3] ? {{56'b0}, HRDATA[55:48]} : {{56{HRDATA[55]}}, HRDATA[55:48]};//lb(u) + case(Funct3M[0]) + 1: offset6 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[63:48]} : {{48{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:48]};//lh(u) + 0: offset6 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[55:48]} : {{56{ReadDataWordMuxM[55]}}, ReadDataWordMuxM[55:48]};//lb(u) endcase - assign offset7 = HSIZED[3] ? {{56'b0}, HRDATA[63:56]} : {{56{HRDATA[63]}}, HRDATA[63:56]};//lb(u) + assign offset7 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[63:56]} : {{56{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:56]};//lb(u) // address mux always_comb - case(HADDRD[2:0]) - 0: HRDATAMasked = offset0; - 1: HRDATAMasked = offset1; - 2: HRDATAMasked = offset2; - 3: HRDATAMasked = offset3; - 4: HRDATAMasked = offset4; - 5: HRDATAMasked = offset5; - 6: HRDATAMasked = offset6; - 7: HRDATAMasked = offset7; + case(MemPAdrM[2:0]) + 0: ReadDataM = offset0; + 1: ReadDataM = offset1; + 2: ReadDataM = offset2; + 3: ReadDataM = offset3; + 4: ReadDataM = offset4; + 5: ReadDataM = offset5; + 6: ReadDataM = offset6; + 7: ReadDataM = offset7; endcase // easier to understand but slower @@ -103,45 +100,45 @@ module subwordread ( /* -----\/----- EXCLUDED -----\/----- // ByteMe mux always_comb - case(HADDRD[2:0]) - 3'b000: ByteM = HRDATA[7:0]; - 3'b001: ByteM = HRDATA[15:8]; - 3'b010: ByteM = HRDATA[23:16]; - 3'b011: ByteM = HRDATA[31:24]; - 3'b100: ByteM = HRDATA[39:32]; - 3'b101: ByteM = HRDATA[47:40]; - 3'b110: ByteM = HRDATA[55:48]; - 3'b111: ByteM = HRDATA[63:56]; + case(MemPAdrM[2:0]) + 3'b000: ByteM = ReadDataWordMuxM[7:0]; + 3'b001: ByteM = ReadDataWordMuxM[15:8]; + 3'b010: ByteM = ReadDataWordMuxM[23:16]; + 3'b011: ByteM = ReadDataWordMuxM[31:24]; + 3'b100: ByteM = ReadDataWordMuxM[39:32]; + 3'b101: ByteM = ReadDataWordMuxM[47:40]; + 3'b110: ByteM = ReadDataWordMuxM[55:48]; + 3'b111: ByteM = ReadDataWordMuxM[63:56]; endcase // halfword mux always_comb - case(HADDRD[2:1]) - 2'b00: HalfwordM = HRDATA[15:0]; - 2'b01: HalfwordM = HRDATA[31:16]; - 2'b10: HalfwordM = HRDATA[47:32]; - 2'b11: HalfwordM = HRDATA[63:48]; + case(MemPAdrM[2:1]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b10: HalfwordM = ReadDataWordMuxM[47:32]; + 2'b11: HalfwordM = ReadDataWordMuxM[63:48]; endcase logic [31:0] WordM; always_comb - case(HADDRD[2]) - 1'b0: WordM = HRDATA[31:0]; - 1'b1: WordM = HRDATA[63:32]; + case(MemPAdrM[2]) + 1'b0: WordM = ReadDataWordMuxM[31:0]; + 1'b1: WordM = ReadDataWordMuxM[63:32]; endcase // sign extension always_comb - case({HSIZED[3], HSIZED[1:0]}) // HSIZED[3] indicates unsigned load - 3'b000: HRDATAMasked = {{56{ByteM[7]}}, ByteM}; // lb - 3'b001: HRDATAMasked = {{48{HalfwordM[15]}}, HalfwordM[15:0]}; // lh - 3'b010: HRDATAMasked = {{32{WordM[31]}}, WordM[31:0]}; // lw - 3'b011: HRDATAMasked = HRDATA; // ld - 3'b100: HRDATAMasked = {56'b0, ByteM[7:0]}; // lbu - 3'b101: HRDATAMasked = {48'b0, HalfwordM[15:0]}; // lhu - 3'b110: HRDATAMasked = {32'b0, WordM[31:0]}; // lwu - default: HRDATAMasked = HRDATA; // Shouldn't happen + case({Funct3M[2], Funct3M[1:0]}) // Funct3M[2] indicates unsigned load + 3'b000: ReadDataM = {{56{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{48{HalfwordM[15]}}, HalfwordM[15:0]}; // lh + 3'b010: ReadDataM = {{32{WordM[31]}}, WordM[31:0]}; // lw + 3'b011: ReadDataM = ReadDataWordMuxM; // ld + 3'b100: ReadDataM = {56'b0, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {48'b0, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {32'b0, WordM[31:0]}; // lwu + default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase -----/\----- EXCLUDED -----/\----- */ end else begin // 32-bit @@ -149,58 +146,58 @@ module subwordread ( // fast but more complex always_comb - case(HSIZED[1:0]) - 3: offset0 = HRDATA; //ld illegal - 2: offset0 = HRDATA[31:0]; //lw - 1: offset0 = HSIZED[3] ? {{16'b0}, HRDATA[15:0]} : {{16{HRDATA[15]}}, HRDATA[15:0]}; //lh(u) - 0: offset0 = HSIZED[3] ? {{24'b0}, HRDATA[7:0]} : {{24{HRDATA[7]}}, HRDATA[7:0]}; //lb(u) + case(Funct3M[1:0]) + 3: offset0 = ReadDataWordMuxM; //ld illegal + 2: offset0 = ReadDataWordMuxM[31:0]; //lw + 1: offset0 = Funct3M[2] ? {{16'b0}, ReadDataWordMuxM[15:0]} : {{16{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:0]}; //lh(u) + 0: offset0 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[7:0]} : {{24{ReadDataWordMuxM[7]}}, ReadDataWordMuxM[7:0]}; //lb(u) endcase - assign offset1 = HSIZED[3] ? {{24'b0}, HRDATA[15:8]} : {{24{HRDATA[15]}}, HRDATA[15:8]}; //lb(u) + assign offset1 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[15:8]} : {{24{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:8]}; //lb(u) always_comb - case(HSIZED[0]) - 1: offset2 = HSIZED[3] ? {{16'b0}, HRDATA[31:16]} : {{16{HRDATA[31]}}, HRDATA[31:16]};//lh(u) - 0: offset2 = HSIZED[3] ? {{24'b0}, HRDATA[23:16]} : {{24{HRDATA[23]}}, HRDATA[23:16]};//lb(u) + case(Funct3M[0]) + 1: offset2 = Funct3M[2] ? {{16'b0}, ReadDataWordMuxM[31:16]} : {{16{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:16]};//lh(u) + 0: offset2 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[23:16]} : {{24{ReadDataWordMuxM[23]}}, ReadDataWordMuxM[23:16]};//lb(u) endcase - assign offset3 = HSIZED[3] ? {{24'b0}, HRDATA[31:24]} : {{24{HRDATA[31]}}, HRDATA[31:24]};//lb(u) + assign offset3 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[31:24]} : {{24{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:24]};//lb(u) // address mux always_comb - case(HADDRD[1:0]) - 0: HRDATAMasked = offset0; - 1: HRDATAMasked = offset1; - 2: HRDATAMasked = offset2; - 3: HRDATAMasked = offset3; + case(MemPAdrM[1:0]) + 0: ReadDataM = offset0; + 1: ReadDataM = offset1; + 2: ReadDataM = offset2; + 3: ReadDataM = offset3; endcase // slow but easier to understand /* -----\/----- EXCLUDED -----\/----- always_comb - case(HADDRD[1:0]) - 2'b00: ByteM = HRDATA[7:0]; - 2'b01: ByteM = HRDATA[15:8]; - 2'b10: ByteM = HRDATA[23:16]; - 2'b11: ByteM = HRDATA[31:24]; + case(MemPAdrM[1:0]) + 2'b00: ByteM = ReadDataWordMuxM[7:0]; + 2'b01: ByteM = ReadDataWordMuxM[15:8]; + 2'b10: ByteM = ReadDataWordMuxM[23:16]; + 2'b11: ByteM = ReadDataWordMuxM[31:24]; endcase // halfword mux always_comb - case(HADDRD[1]) - 1'b0: HalfwordM = HRDATA[15:0]; - 1'b1: HalfwordM = HRDATA[31:16]; + case(MemPAdrM[1]) + 1'b0: HalfwordM = ReadDataWordMuxM[15:0]; + 1'b1: HalfwordM = ReadDataWordMuxM[31:16]; endcase // sign extension always_comb - case({HSIZED[3], HSIZED[1:0]}) - 3'b000: HRDATAMasked = {{24{ByteM[7]}}, ByteM}; // lb - 3'b001: HRDATAMasked = {{16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh - 3'b010: HRDATAMasked = HRDATA; // lw - 3'b100: HRDATAMasked = {24'b0, ByteM[7:0]}; // lbu - 3'b101: HRDATAMasked = {16'b0, HalfwordM[15:0]}; // lhu - default: HRDATAMasked = HRDATA; + case({Funct3M[2], Funct3M[1:0]}) + 3'b000: ReadDataM = {{24{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh + 3'b010: ReadDataM = ReadDataWordMuxM; // lw + 3'b100: ReadDataM = {24'b0, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {16'b0, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM; endcase -----/\----- EXCLUDED -----/\----- */ end