From d366a1f50f9140905af850481d92fda6b951cb71 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 27 Dec 2021 16:45:49 -0600 Subject: [PATCH] Moved dcache fetch logic outside the dcache except for the fsm. --- wally-pipelined/src/cache/dcache.sv | 113 +++++++++------------------- wally-pipelined/src/lsu/lsu.sv | 79 ++++++++++++++++--- 2 files changed, 106 insertions(+), 86 deletions(-) diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index 6fb3cca8..4b323ebf 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -27,44 +27,50 @@ module dcache (input logic clk, - input logic reset, - input logic CPUBusy, + input logic reset, + input logic CPUBusy, // cpu side - input logic [1:0] MemRWM, - input logic [2:0] Funct3M, - input logic [6:0] Funct7M, - input logic [1:0] AtomicM, - input logic FlushDCacheM, - input logic [11:0] MemAdrE, // virtual address, but we only use the lower 12 bits. - input logic [`PA_BITS-1:0] MemPAdrM, // physical address - input logic [11:0] VAdr, // when hptw writes dtlb we use this address to index SRAM. + input logic [1:0] MemRWM, + input logic [2:0] Funct3M, + input logic [6:0] Funct7M, + input logic [1:0] AtomicM, + input logic FlushDCacheM, + input logic [11:0] MemAdrE, // virtual address, but we only use the lower 12 bits. + input logic [`PA_BITS-1:0] MemPAdrM, // physical address + input logic [11:0] VAdr, // when hptw writes dtlb we use this address to index SRAM. - input logic [`XLEN-1:0] FinalWriteDataM, - output logic [`XLEN-1:0] ReadDataWordM, - output logic DCacheStall, - output logic CommittedM, - output logic DCacheMiss, - output logic DCacheAccess, + input logic [`XLEN-1:0] FinalWriteDataM, + output logic [`XLEN-1:0] ReadDataWordM, + output logic DCacheStall, + output logic CommittedM, + output logic DCacheMiss, + output logic DCacheAccess, + + output logic [`PA_BITS-1:0] BasePAdrM, + output logic [`XLEN-1:0] ReadDataBlockSetsM [(`DCACHE_BLOCKLENINBITS/`XLEN)-1:0], // temp - output logic SelUncached, - output logic SelFlush, - output logic [`XLEN-1:0] DCacheMemWriteDataFirstWord, + output logic SelUncached, + output logic SelFlush, + input logic FetchCountFlag, + output logic CntEn, + output logic CntReset, + + + input logic [`DCACHE_BLOCKLENINBITS-1:0] DCacheMemWriteData, + // inputs from TLB and PMA/P - input logic ExceptionM, - input logic PendingInterruptM, - input logic CacheableM, + input logic ExceptionM, + input logic PendingInterruptM, + input logic CacheableM, // from ptw - input logic IgnoreRequest, + input logic IgnoreRequest, // ahb side - (* mark_debug = "true" *)output logic [`PA_BITS-1:0] AHBPAdr, // to ahb - (* mark_debug = "true" *)output logic AHBRead, - (* mark_debug = "true" *)output logic AHBWrite, - (* mark_debug = "true" *)input logic AHBAck, // from ahb - (* mark_debug = "true" *)input logic [`XLEN-1:0] HRDATA, // from ahb - (* mark_debug = "true" *)output logic [`XLEN-1:0] DC_HWDATA_FIXNAME, // to ahb + (* mark_debug = "true" *)output logic AHBRead, + (* mark_debug = "true" *)output logic AHBWrite, + (* mark_debug = "true" *)input logic AHBAck, // from ahb (* mark_debug = "true" *)output logic [2:0] DCtoAHBSizeM ); @@ -80,23 +86,19 @@ module dcache localparam integer LOGWPL = $clog2(WORDSPERLINE); localparam integer LOGXLENBYTES = $clog2(`XLEN/8); - localparam integer FetchCountThreshold = WORDSPERLINE - 1; localparam integer FlushAdrThreshold = NUMLINES - 1; logic [1:0] SelAdrM; logic [INDEXLEN-1:0] RAdr; logic [INDEXLEN-1:0] WAdr; logic [BLOCKLEN-1:0] SRAMWriteData; - logic [BLOCKLEN-1:0] DCacheMemWriteData; logic SetValid, ClearValid; logic SetDirty, ClearDirty; logic [BLOCKLEN-1:0] ReadDataBlockWayMaskedM [NUMWAYS-1:0]; logic [NUMWAYS-1:0] WayHit; logic CacheHit; logic [BLOCKLEN-1:0] ReadDataBlockM; - logic [`XLEN-1:0] ReadDataBlockSetsM [(WORDSPERLINE)-1:0]; logic [`XLEN-1:0] ReadDataWordMuxM; - logic [LOGWPL-1:0] FetchCount, NextFetchCount; logic [WORDSPERLINE-1:0] SRAMWordEnable; logic SRAMWordWriteEnableM; @@ -112,9 +114,6 @@ module dcache logic [2**LOGWPL-1:0] MemPAdrDecodedW; - logic [`PA_BITS-1:0] BasePAdrM; - logic [OFFSETLEN-1:0] BasePAdrOffsetM; - logic [`PA_BITS-1:0] BasePAdrMaskedM; logic [TAGLEN-1:0] VictimTagWay [NUMWAYS-1:0]; logic [TAGLEN-1:0] VictimTag; @@ -131,9 +130,6 @@ module dcache logic VDWriteEnable; - logic FetchCountFlag; - logic CntEn; - logic CntReset; logic SelEvict; logic LRUWriteEn; @@ -246,49 +242,14 @@ module dcache .s(SRAMBlockWriteEnableM), .y(SRAMWriteData)); - // Bus Side logic - // register the fetch data from the next level of memory. - // This register should be necessary for timing. There is no register in the uncore or - // ahblite controller between the memories and this cache. - - generate - for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer - flopen #(`XLEN) fb(.clk(clk), - .en(AHBAck & AHBRead & (index == FetchCount)), - .d(HRDATA), - .q(DCacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN])); - end - endgenerate - - // temp - assign DCacheMemWriteDataFirstWord = DCacheMemWriteData[`XLEN-1:0]; - + + //assign HWDATA = CacheableM | SelFlush ? ReadDataBlockSetsM[FetchCount] : WriteDataM; mux3 #(`PA_BITS) BaseAdrMux(.d0(MemPAdrM), .d1({VictimTag, MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}), .s({SelFlush, SelEvict}), .y(BasePAdrM)); - // if not cacheable the offset bits needs to be sent to the EBU. - // if cacheable the offset bits are discarded. $ FSM will fetch the whole block. - assign BasePAdrOffsetM = CacheableM ? {{OFFSETLEN}{1'b0}} : BasePAdrM[OFFSETLEN-1:0]; - assign BasePAdrMaskedM = {BasePAdrM[`PA_BITS-1:OFFSETLEN], BasePAdrOffsetM}; - - assign AHBPAdr = ({{`PA_BITS-LOGWPL{1'b0}}, FetchCount} << $clog2(`XLEN/8)) + BasePAdrMaskedM; - - //assign HWDATA = CacheableM | SelFlush ? ReadDataBlockSetsM[FetchCount] : WriteDataM; - assign DC_HWDATA_FIXNAME = ReadDataBlockSetsM[FetchCount]; - - assign FetchCountFlag = (FetchCount == FetchCountThreshold[LOGWPL-1:0]); - - flopenr #(LOGWPL) - FetchCountReg(.clk(clk), - .reset(reset | CntReset), - .en(CntEn), - .d(NextFetchCount), - .q(FetchCount)); - - assign NextFetchCount = FetchCount + 1'b1; // flush address and way generation. flopenr #(INDEXLEN) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 5db5a756..95548023 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -63,11 +63,11 @@ module lsu output logic StoreMisalignedFaultM, StoreAccessFaultM, // connect to ahb - output logic [`PA_BITS-1:0] DCtoAHBPAdrM, +(* mark_debug = "true" *) output logic [`PA_BITS-1:0] DCtoAHBPAdrM, output logic DCtoAHBReadM, output logic DCtoAHBWriteM, input logic DCfromAHBAck, - input logic [`XLEN-1:0] DCfromAHBReadData, +(* mark_debug = "true" *) input logic [`XLEN-1:0] DCfromAHBReadData, output logic [`XLEN-1:0] DCtoAHBWriteData, output logic [2:0] DCtoAHBSizeM, @@ -301,18 +301,40 @@ module lsu // 3. wire pass-through assign MemAdrE_RENAME = SelReplayCPURequest ? IEUAdrM[11:0] : MemAdrE[11:0]; + localparam integer WORDSPERLINE = `DCACHE_BLOCKLENINBITS/`XLEN; + localparam integer LOGWPL = $clog2(WORDSPERLINE); + localparam integer BLOCKLEN = `DCACHE_BLOCKLENINBITS; + + localparam integer FetchCountThreshold = WORDSPERLINE - 1; + localparam integer BLOCKBYTELEN = BLOCKLEN/8; + localparam integer OFFSETLEN = $clog2(BLOCKBYTELEN); + // temp logic SelUncached; + logic FetchCountFlag; + logic [`XLEN-1:0] FinalAMOWriteDataM, FinalWriteDataM; - logic [`XLEN-1:0] DC_HWDATA_FIXNAME; + (* mark_debug = "true" *) logic [`XLEN-1:0] DC_HWDATA_FIXNAME; logic SelFlush; logic [`XLEN-1:0] ReadDataWordM; - logic [`XLEN-1:0] DCacheMemWriteDataFirstWord; + logic [`DCACHE_BLOCKLENINBITS-1:0] DCacheMemWriteData; // keep logic [`XLEN-1:0] ReadDataWordMuxM; + + + logic [LOGWPL-1:0] FetchCount, NextFetchCount; + logic [`PA_BITS-1:0] BasePAdrMaskedM; + logic [OFFSETLEN-1:0] BasePAdrOffsetM; + + logic CntEn; + logic CntReset; + logic [`PA_BITS-1:0] BasePAdrM; + logic [`XLEN-1:0] ReadDataBlockSetsM [(`DCACHE_BLOCKLENINBITS/`XLEN)-1:0]; - + + + dcache dcache(.clk, .reset, .CPUBusy, .MemRWM(MemRWMtoDCache), @@ -328,24 +350,26 @@ module lsu .PendingInterruptM(PendingInterruptMtoDCache), .CacheableM(CacheableMtoDCache), + .BasePAdrM, + .ReadDataBlockSetsM, // temp .SelUncached, .SelFlush, - .DCacheMemWriteDataFirstWord, + .DCacheMemWriteData, + .FetchCountFlag, + .CntEn, + .CntReset, // AHB connection - .AHBPAdr(DCtoAHBPAdrM), .AHBRead(DCtoAHBReadM), .AHBWrite(DCtoAHBWriteM), .AHBAck(DCfromAHBAck), - .DC_HWDATA_FIXNAME(DC_HWDATA_FIXNAME), - .HRDATA(DCfromAHBReadData), .DCtoAHBSizeM ); mux2 #(`XLEN) UnCachedDataMux(.d0(ReadDataWordM), - .d1(DCacheMemWriteDataFirstWord), + .d1(DCacheMemWriteData[`XLEN-1:0]), .s(SelUncached), .y(ReadDataWordMuxM)); @@ -372,8 +396,43 @@ module lsu .HWDATA(FinalWriteDataM)); assign DCtoAHBWriteData = CacheableMtoDCache | SelFlush ? DC_HWDATA_FIXNAME : WriteDataM; + + + // Bus Side logic + // register the fetch data from the next level of memory. + // This register should be necessary for timing. There is no register in the uncore or + // ahblite controller between the memories and this cache. + + genvar index; + generate + for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer + flopen #(`XLEN) fb(.clk(clk), + .en(DCfromAHBAck & DCtoAHBReadM & (index == FetchCount)), + .d(DCfromAHBReadData), + .q(DCacheMemWriteData[(index+1)*`XLEN-1:index*`XLEN])); + end + endgenerate + + + // if not cacheable the offset bits needs to be sent to the EBU. + // if cacheable the offset bits are discarded. $ FSM will fetch the whole block. + assign BasePAdrOffsetM = CacheableM ? {{OFFSETLEN}{1'b0}} : BasePAdrM[OFFSETLEN-1:0]; + assign BasePAdrMaskedM = {BasePAdrM[`PA_BITS-1:OFFSETLEN], BasePAdrOffsetM}; + assign DCtoAHBPAdrM = ({{`PA_BITS-LOGWPL{1'b0}}, FetchCount} << $clog2(`XLEN/8)) + BasePAdrMaskedM; + assign DC_HWDATA_FIXNAME = ReadDataBlockSetsM[FetchCount]; + + assign FetchCountFlag = (FetchCount == FetchCountThreshold[LOGWPL-1:0]); + + flopenr #(LOGWPL) + FetchCountReg(.clk(clk), + .reset(reset | CntReset), + .en(CntEn), + .d(NextFetchCount), + .q(FetchCount)); + + assign NextFetchCount = FetchCount + 1'b1; endmodule