From 29f45d6203e07f4f2cd4262a02daf13fec3dc48e Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 09:41:18 -0600 Subject: [PATCH 01/12] Imperas found a bug with the Fence.I instruction. If a fence.i directly followed a store miss, the d$ would release Stall during the cache line write. Then transition to ReadHold. This cause the d$ flush to go high while in ReadHold. The solution is to ensure the cache continues to assert Stall while in WriteLine state. There was a second issue also. The D$ flush asserted FlushD which flushed the I$ invalidate. Finally the third issue was CacheEn from the FSM needs to be asserted on an InvalidateCache. --- pipelined/src/cache/cachefsm.sv | 4 ++-- pipelined/src/cache/cacheway.sv | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index a98b7a53..5dba257d 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -142,7 +142,7 @@ module cachefsm ( assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss)) | (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | - (CurrState == STATE_WRITE_LINE & ~(StoreAMO)) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. + (CurrState == STATE_WRITE_LINE) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_WRITEBACK); // write enables internal to cache @@ -182,6 +182,6 @@ module cachefsm ( resetDelay; assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_READ_HOLD; - assign CacheEn = (~Stall | FlushCache | AnyMiss) | (CurrState != STATE_READY) | reset; + assign CacheEn = (~Stall | FlushCache | AnyMiss) | (CurrState != STATE_READY) | reset | InvalidateCache; endmodule // cachefsm diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 288da959..cb4b343b 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -150,7 +150,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, if (reset) ValidBits <= #1 '0; if(CacheEn) begin ValidWay <= #1 ValidBits[CAdr]; - if(InvalidateCache & ~FlushStage) ValidBits <= #1 '0; + if(InvalidateCache) ValidBits <= #1 '0; else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CAdr] <= #1 SetValidWay; end end From 5b5a615e4a95ab98e98b270d33ce8c015f0ebecd Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 10:34:49 -0600 Subject: [PATCH 02/12] Integrated the missing zifence tests into the regression test. --- pipelined/regression/regression-wally | 6 +++--- pipelined/testbench/testbench.sv | 2 ++ pipelined/testbench/tests.vh | 10 ++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index 520e162b..6bbaa04b 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -66,7 +66,7 @@ tc = TestCase( configs.append(tc) tests64gcimperas = ["imperas64i", "imperas64f", "imperas64d", "imperas64m", "imperas64c"] # unused -tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "wally64a", "wally64periph", "wally64priv"] +tests64gc = ["arch64f", "arch64d", "arch64i", "arch64priv", "arch64c", "arch64m", "arch64zi", "wally64a", "wally64periph", "wally64priv"] for test in tests64gc: tc = TestCase( name=test, @@ -85,7 +85,7 @@ for test in tests64i: configs.append(tc) tests32gcimperas = ["imperas32i", "imperas32f", "imperas32m", "imperas32c"] # unused -tests32gc = ["arch32f", "arch32d", "arch32i", "arch32priv", "arch32c", "arch32m", "wally32a", "wally32priv", "wally32periph"] +tests32gc = ["arch32f", "arch32d", "arch32i", "arch32priv", "arch32c", "arch32m", "arch32zi", "wally32a", "wally32priv", "wally32periph"] for test in tests32gc: tc = TestCase( name=test, @@ -95,7 +95,7 @@ for test in tests32gc: configs.append(tc) tests32icimperas = ["imperas32i", "imperas32c"] # unused -tests32ic = ["arch32i", "arch32c","wally32periph"] +tests32ic = ["arch32i", "arch32c", "wally32periph"] for test in tests32ic: tc = TestCase( name=test, diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 4fb646f7..23d036b9 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -94,6 +94,7 @@ logic [3:0] dummy; "arch64m": if (`M_SUPPORTED) tests = arch64m; "arch64f": if (`F_SUPPORTED) tests = arch64f; "arch64d": if (`D_SUPPORTED) tests = arch64d; + "arch64zi": if (`ZIFENCEI_SUPPORTED) tests = arch64zi; "imperas64i": tests = imperas64i; "imperas64f": if (`F_SUPPORTED) tests = imperas64f; "imperas64d": if (`D_SUPPORTED) tests = imperas64d; @@ -119,6 +120,7 @@ logic [3:0] dummy; "arch32m": if (`M_SUPPORTED) tests = arch32m; "arch32f": if (`F_SUPPORTED) tests = arch32f; "arch32d": if (`D_SUPPORTED) tests = arch32d; + "arch32zi": if (`ZIFENCEI_SUPPORTED) tests = arch32zi; "imperas32i": tests = imperas32i; "imperas32f": if (`F_SUPPORTED) tests = imperas32f; "imperas32m": if (`M_SUPPORTED) tests = imperas32m; diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 886d3fef..ec83f8c6 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -934,6 +934,16 @@ string imperas32f[] = '{ "rv64i_m/privilege/src/misalign-sw-01.S" }; + string arch64zi[] = '{ + `RISCVARCHTEST, + "rv64i_m/Zifencei/src/Fencei.S" + }; + + string arch32zi[] = '{ + `RISCVARCHTEST, + "rv32i_m/Zifencei/src/Fencei.S" + }; + string arch64m[] = '{ `RISCVARCHTEST, "rv64i_m/M/src/div-01.S", From c5169a3e39b8238867b63cf0fe7f71373f5020b1 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 11:51:10 -0600 Subject: [PATCH 03/12] Formatting. --- pipelined/src/generic/mem/ram1p1rwbe.sv | 11 ++- pipelined/src/ifu/bpred.sv | 20 ++--- pipelined/src/ifu/ifu.sv | 105 +++++++++++++----------- pipelined/src/lsu/lsu.sv | 6 +- 4 files changed, 76 insertions(+), 66 deletions(-) diff --git a/pipelined/src/generic/mem/ram1p1rwbe.sv b/pipelined/src/generic/mem/ram1p1rwbe.sv index 5abb8504..7b79eb35 100644 --- a/pipelined/src/generic/mem/ram1p1rwbe.sv +++ b/pipelined/src/generic/mem/ram1p1rwbe.sv @@ -1,14 +1,17 @@ /////////////////////////////////////////// // 1 port sram. // -// Written: ross1728@gmail.com May 3, 2021 +// Written: ross1728@gmail.com +// Created: 3 May 2021 +// Modified: 20 January 2023 +// +// Purpose: Storage and read/write access to data cache data, tag valid, dirty, and replacement. // Basic sram with 1 read write port. // When clk rises Addr and LineWriteData are sampled. // Following the clk edge read data is output from the sampled Addr. -// Write -// -// Purpose: Storage and read/write access to data cache data, tag valid, dirty, and replacement. // +// Documentation: +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University diff --git a/pipelined/src/ifu/bpred.sv b/pipelined/src/ifu/bpred.sv index 103e7d8c..ab47d678 100644 --- a/pipelined/src/ifu/bpred.sv +++ b/pipelined/src/ifu/bpred.sv @@ -38,13 +38,13 @@ module bpred ( input logic [`XLEN-1:0] PCNextF, // Next Fetch Address input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4 output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction - output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage. + output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage // Update Predictor - input logic [`XLEN-1:0] PCF, // Fetch stage instruction address. - input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took. - input logic [`XLEN-1:0] PCE, // Execution stage instruction address. - input logic [`XLEN-1:0] PCM, // Memory stage instruction address. + input logic [`XLEN-1:0] PCF, // Fetch stage instruction address + input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took + input logic [`XLEN-1:0] PCE, // Execution stage instruction address + input logic [`XLEN-1:0] PCM, // Memory stage instruction address // Branch and jump outcome input logic PCSrcE, // Executation stage branch is taken @@ -53,11 +53,11 @@ module bpred ( output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br // Report branch prediction status - output logic BPPredWrongE, // Prediction is wrong. - output logic DirPredictionWrongM, // Prediction direction is wrong. - output logic BTBPredPCWrongM, // Prediction target wrong. - output logic RASPredPCWrongM, // RAS prediction is wrong. - output logic PredictionInstrClassWrongM // Class prediction is wrong. + output logic BPPredWrongE, // Prediction is wrong + output logic DirPredictionWrongM, // Prediction direction is wrong + output logic BTBPredPCWrongM, // Prediction target wrong + output logic RASPredPCWrongM, // RAS prediction is wrong + output logic PredictionInstrClassWrongM // Class prediction is wrong ); logic BTBValidF; diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 8e89ceb0..5f9e5598 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -28,58 +28,65 @@ `include "wally-config.vh" module ifu ( - input logic clk, reset, - input logic StallF, StallD, StallE, StallM, StallW, - input logic FlushD, FlushE, FlushM, FlushW, + input logic clk, reset, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, +(* mark_debug = "true" *) output logic IFUStallF, // IFU stalsl pipeline during a multicycle operation + // Command from CPU + input logic InvalidateICacheM, // Clears all instruction cache valid bits + input logic CSRWriteFenceM, // CSR write or fence instruction, PCNextF = the next valid PC (typically PCE) // Bus interface -(* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, -(* mark_debug = "true" *) output logic [`PA_BITS-1:0] IFUHADDR, -(* mark_debug = "true" *) output logic IFUStallF, -(* mark_debug = "true" *) output logic [2:0] IFUHBURST, -(* mark_debug = "true" *) output logic [1:0] IFUHTRANS, -(* mark_debug = "true" *) output logic [2:0] IFUHSIZE, -(* mark_debug = "true" *) output logic IFUHWRITE, -(* mark_debug = "true" *) input logic IFUHREADY, - (* mark_debug = "true" *) output logic [`XLEN-1:0] PCF, +(* mark_debug = "true" *) output logic [`PA_BITS-1:0] IFUHADDR, // Bus address from IFU to EBU +(* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, // Bus read data from IFU to EBU +(* mark_debug = "true" *) input logic IFUHREADY, // Bus ready from IFU to EBU +(* mark_debug = "true" *) output logic IFUHWRITE, // Bus write operation from IFU to EBU +(* mark_debug = "true" *) output logic [2:0] IFUHSIZE, // Bus operation size from IFU to EBU +(* mark_debug = "true" *) output logic [2:0] IFUHBURST, // Bus burst from IFU to EBU +(* mark_debug = "true" *) output logic [1:0] IFUHTRANS, // Bus transaction type from IFU to EBU + +(* mark_debug = "true" *) output logic [`XLEN-1:0] PCF, // Fetch stage instruction address // Execute - output logic [`XLEN-1:0] PCLinkE, - input logic PCSrcE, - input logic [`XLEN-1:0] IEUAdrE, - output logic [`XLEN-1:0] PCE, - output logic BPPredWrongE, + output logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) + input logic PCSrcE, // Executation stage branch is taken + input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address + output logic [`XLEN-1:0] PCE, // Execution stage instruction address + output logic BPPredWrongE, // Prediction is wrong // Mem - output logic CommittedF, - input logic [`XLEN-1:0] UnalignedPCNextF, - output logic [`XLEN-1:0] PCNext2F, - input logic CSRWriteFenceM, - input logic InvalidateICacheM, - output logic [31:0] InstrD, InstrM, - output logic [`XLEN-1:0] PCM, - // branch predictor - output logic [3:0] InstrClassM, - output logic DirPredictionWrongM, - output logic BTBPredPCWrongM, - output logic RASPredPCWrongM, - output logic PredictionInstrClassWrongM, - // Faults - input logic IllegalBaseInstrFaultD, - output logic InstrPageFaultF, - output logic IllegalIEUInstrFaultD, - output logic InstrMisalignedFaultM, - // mmu management - input logic [1:0] PrivilegeModeW, - input logic [`XLEN-1:0] PTE, - input logic [1:0] PageType, - input logic [`XLEN-1:0] SATP_REGW, - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, - input logic [1:0] STATUS_MPP, - input logic ITLBWriteF, sfencevmaM, - output logic ITLBMissF, InstrDAPageFaultF, - input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], - output logic InstrAccessFaultF, - output logic ICacheAccess, - output logic ICacheMiss + output logic CommittedF, // I$ or bus memory operation started, delay interrupts + input logic [`XLEN-1:0] UnalignedPCNextF, // The next PCF, but not aligned to 2 bytes. + output logic [`XLEN-1:0] PCNext2F, // Selected PC between branch prediction and next valid PC if CSRWriteFence + output logic [31:0] InstrD, // The decoded instruction in Decode stage + output logic [31:0] InstrM, // The decoded instruction in Memory stage + output logic [`XLEN-1:0] PCM, // Memory stage instruction address + // branch predictor + output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br + output logic DirPredictionWrongM, // Prediction direction is wrong + output logic BTBPredPCWrongM, // Prediction target wrong + output logic RASPredPCWrongM, // RAS prediction is wrong + output logic PredictionInstrClassWrongM, // Class prediction is wrong + // Faults + input logic IllegalBaseInstrFaultD, // Illegal non-compressed instruction + output logic InstrPageFaultF, // Instruction page fault + output logic IllegalIEUInstrFaultD, // Illegal instruction including compressed + output logic InstrMisalignedFaultM, // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed) + // mmu management + input logic [1:0] PrivilegeModeW, // Priviledge mode in Writeback stage + input logic [`XLEN-1:0] PTE, // Hardware page table walker (HPTW) writes Page table entry (PTE) to ITLB + input logic [1:0] PageType, // Hardware page table walker (HPTW) writes PageType to ITLB + input logic ITLBWriteF, // Writes PTE and PageType to ITLB + input logic [`XLEN-1:0] SATP_REGW, // Location of the root page table and page table configuration + input logic STATUS_MXR, // Status CSR: make executable page readable + input logic STATUS_SUM, // Status CSR: Supervisor access to user memory + input logic STATUS_MPRV, // Status CSR: modify machine privilege + input logic [1:0] STATUS_MPP, // Status CSR: previous machine privilege level + input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries + output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk + output logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], + output logic InstrAccessFaultF, + output logic ICacheAccess, + output logic ICacheMiss ); (* mark_debug = "true" *) logic [`XLEN-1:0] PCNextF; logic BranchMisalignedFaultE; diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 6d958d02..0ebd84f8 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -34,7 +34,7 @@ module lsu ( input logic clk, reset, input logic StallM, FlushM, StallW, FlushW, - output logic LSUStallM, // LSU stalls pipeline during a multicycle operation. + output logic LSUStallM, // LSU stalls pipeline during a multicycle operation // connected to cpu (controls) input logic [1:0] MemRWM, // Read/Write control input logic [2:0] Funct3M, // Size of memory operation @@ -53,7 +53,7 @@ module lsu ( // cpu privilege input logic [1:0] PrivilegeModeW, // Current privilege mode input logic BigEndianM, // Swap byte order to big endian - input logic sfencevmaM, // Virtual memory address fence + input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries // fpu input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU input logic FpLoadStoreM, // Selects FPU as store for write data @@ -126,7 +126,7 @@ module lsu ( logic [(`LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write logic DTLBMissM; // DTLB miss causes HPTW walk - logic DTLBWriteM; // Writes PTE to DTLB + logic DTLBWriteM; // Writes PTE and PageType to DTLB logic DataDAPageFaultM; // DTLB hit needs to update dirty or access bits logic LSULoadAccessFaultM; // Load acces fault logic LSUStoreAmoAccessFaultM; // Store access fault From 340e1797ea7fee74b7d99d5a611a17d642b63300 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 12:09:21 -0600 Subject: [PATCH 04/12] More cleanup and formatting. --- pipelined/src/ifu/ifu.sv | 49 ++++++++++++++------------ pipelined/testbench/testbench-linux.sv | 2 +- pipelined/testbench/testbench.sv | 2 +- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 5f9e5598..9c990750 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -82,31 +82,35 @@ module ifu ( input logic sfencevmaM, // Virtual memory address fence, invalidate TLB entries output logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk output logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits - input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], - output logic InstrAccessFaultF, - output logic ICacheAccess, - output logic ICacheMiss + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP address from privileged unit + output logic InstrAccessFaultF, // Instruction access fault + output logic ICacheAccess, // Report I$ read to performance counters + output logic ICacheMiss // Report I$ miss to performance counters ); - (* mark_debug = "true" *) logic [`XLEN-1:0] PCNextF; - logic BranchMisalignedFaultE; - logic [`XLEN-1:0] PCPlus2or4F, PCLinkD; - logic [`XLEN-1:2] PCPlus4F; - logic CompressedF; - logic [31:0] InstrRawD, InstrRawF, IROMInstrF, ICacheInstrF; - logic [31:0] FinalInstrRawF; - logic [1:0] IFURWF; - - logic [31:0] InstrE; - logic [`XLEN-1:0] PCD; - localparam [31:0] nop = 32'h00000013; // instruction for NOP - logic [31:0] NextInstrD, NextInstrE; + localparam [31:0] nop = 32'h00000013; // instruction for NOP - logic [`XLEN-1:0] NextValidPCE; + (* mark_debug = "true" *) logic [`XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4 + logic BranchMisalignedFaultE; // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed) + logic [`XLEN-1:0] PCPlus2or4F; // PCF + 2 (CompressedF) or PCF + 4 (Non-compressed) + logic [`XLEN-1:0] PCLinkD; // PCF2or4F delayed 1 cycle. This is next PC after a control flow instruction (br or j) + logic [`XLEN-1:2] PCPlus4F; // PCPlus4F is always PCF + 4. Fancy way to compute PCPlus2or4F + logic [`XLEN-1:0] PCD; // Decode stage instruction address + logic [`XLEN-1:0] NextValidPCE; // The PC of the next valid instruction in the pipeline after csr write or fence +(* mark_debug = "true" *) logic [`PA_BITS-1:0] PCPF; // Physical address after address translation + logic [`XLEN+1:0] PCFExt; // + + logic [31:0] IROMInstrF; // Instruction from the IROM + logic [31:0] ICacheInstrF; // Instruction from the I$ + logic [31:0] InstrRawF; // Instruction from the IROM, I$, or bus + logic CompressedF; // The fetched instruction is compressed + logic [31:0] InstrRawD; // Non-decompressed instruction in the Decode stage -(* mark_debug = "true" *) logic [`PA_BITS-1:0] PCPF; // used to either truncate or expand PCPF and PCNextF into `PA_BITS width. - logic [`XLEN+1:0] PCFExt; + logic [1:0] IFURWF; // IFU alreays read IFURWF = 10 + logic [31:0] InstrE; // Instruction in the Execution stage + logic [31:0] NextInstrD, NextInstrE; // Instruction into the next stage after possible stage flush + logic CacheableF; logic [`XLEN-1:0] PCNextFSpill; @@ -264,7 +268,7 @@ module ifu ( if(`IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(FetchBuffer, IROMInstrF, SelIROM, InstrRawF); else assign InstrRawF = FetchBuffer; assign IFUHBURST = 3'b0; - assign {ICacheFetchLine, ICacheStallF, FinalInstrRawF} = '0; + assign {ICacheFetchLine, ICacheStallF} = '0; assign {ICacheMiss, ICacheAccess} = '0; end end else begin : nobus // block: bus @@ -366,6 +370,7 @@ module ifu ( flopenr #(1) InstrMisalginedReg(clk, reset, ~StallM, BranchMisalignedFaultE, InstrMisalignedFaultM); // Instruction and PC/PCLink pipeline registers + // Cannot use flopenrc for Instr(E/M) as it resets to NOP not 0. mux2 #(32) FlushInstrEMux(InstrD, nop, FlushE, NextInstrD); mux2 #(32) FlushInstrMMux(InstrE, nop, FlushM, NextInstrE); flopenr #(32) InstrEReg(clk, reset, ~StallE, NextInstrD, InstrE); diff --git a/pipelined/testbench/testbench-linux.sv b/pipelined/testbench/testbench-linux.sv index f24a7a61..da369250 100644 --- a/pipelined/testbench/testbench-linux.sv +++ b/pipelined/testbench/testbench-linux.sv @@ -799,7 +799,7 @@ module testbench; // For waveview convenience string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE, - dut.core.ifu.FinalInstrRawF[31:0], + dut.core.ifu.InstrRawF[31:0], dut.core.ifu.InstrD, dut.core.ifu.InstrE, dut.core.ifu.InstrM, InstrW, InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 23d036b9..8c05c8a4 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -198,7 +198,7 @@ logic [3:0] dummy; // Track names of instructions instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE, - dut.core.ifu.FinalInstrRawF[31:0], + dut.core.ifu.InstrRawF[31:0], dut.core.ifu.InstrD, dut.core.ifu.InstrE, dut.core.ifu.InstrM, InstrW, InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); From 64080ac098cb64691763ea7379154d1fe809a29f Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 20 Jan 2023 10:13:20 -0800 Subject: [PATCH 05/12] Updated HMC Synopysys license manager --- setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.sh b/setup.sh index 4d9e6910..dc1f418d 100755 --- a/setup.sh +++ b/setup.sh @@ -14,7 +14,7 @@ echo \$WALLY set to ${WALLY} # License servers and commercial CAD tool paths # Must edit these based on your local environment. Ask your sysadmin. export MGLS_LICENSE_FILE=1717@solidworks.eng.hmc.edu # Change this to your Siemens license server -export SNPSLMD_LICENSE_FILE=27020@134.173.38.184 # Change this to your Synopsys license server +export SNPSLMD_LICENSE_FILE=27020@zircon.eng.hmc.edu # Change this to your Synopsys license server export PATH=/cad/mentor/questa_sim-2021.2_1/questasim/bin:$PATH # Change this for your path to Questa export PATH=/cad/synopsys/SYN/bin:$PATH # Change this for your path to Design Compiler From 26cb45e2409f08a98bd87bd8c176762f5c9741b9 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 20 Jan 2023 10:13:47 -0800 Subject: [PATCH 06/12] renamed comparator module --- pipelined/src/ieu/comparator.sv | 2 +- pipelined/src/ieu/datapath.sv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelined/src/ieu/comparator.sv b/pipelined/src/ieu/comparator.sv index eacc8723..5f504dce 100644 --- a/pipelined/src/ieu/comparator.sv +++ b/pipelined/src/ieu/comparator.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" // This comparator is best -module comparator_dc_flip #(parameter WIDTH=64) ( +module comparator #(parameter WIDTH=64) ( input logic [WIDTH-1:0] a, b, // Operands input logic sgnd, // Signed operands output logic [1:0] flags); // Output flags: {eq, lt} diff --git a/pipelined/src/ieu/datapath.sv b/pipelined/src/ieu/datapath.sv index 8249f019..5c4ad5ef 100644 --- a/pipelined/src/ieu/datapath.sv +++ b/pipelined/src/ieu/datapath.sv @@ -105,7 +105,7 @@ module datapath ( mux3 #(`XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE); mux3 #(`XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE); - comparator_dc_flip #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE); + comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE); mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ImmExtE, ALUSrcBE, SrcBE); alu #(`XLEN) alu(SrcAE, SrcBE, ALUControlE, Funct3E, ALUResultE, IEUAdrE); From 74ab3867359ebe8170f2fb9bfc1a524212a7aa9c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 12:29:25 -0600 Subject: [PATCH 07/12] More cleanup and formatting. --- pipelined/src/ifu/ifu.sv | 32 +++++++++++++++----------------- pipelined/src/lsu/lsu.sv | 12 ++++++------ pipelined/src/mmu/hptw.sv | 14 +++++++------- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 9c990750..1101e6dd 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -94,6 +94,8 @@ module ifu ( (* mark_debug = "true" *) logic [`XLEN-1:0] PCNextF; // Next PCF, selected from Branch predictor, Privilege, or PC+2/4 logic BranchMisalignedFaultE; // Branch target not aligned to 4 bytes if no compressed allowed (2 bytes if allowed) logic [`XLEN-1:0] PCPlus2or4F; // PCF + 2 (CompressedF) or PCF + 4 (Non-compressed) + logic [`XLEN-1:0] PCNextFSpill; // Next PCF after possible + 2 to handle spill + logic [`XLEN-1:0] PCFSpill; // PCF with possible + 2 to handle spill logic [`XLEN-1:0] PCLinkD; // PCF2or4F delayed 1 cycle. This is next PC after a control flow instruction (br or j) logic [`XLEN-1:2] PCPlus4F; // PCPlus4F is always PCF + 4. Fancy way to compute PCPlus2or4F logic [`XLEN-1:0] PCD; // Decode stage instruction address @@ -105,6 +107,7 @@ module ifu ( logic [31:0] ICacheInstrF; // Instruction from the I$ logic [31:0] InstrRawF; // Instruction from the IROM, I$, or bus logic CompressedF; // The fetched instruction is compressed +(* mark_debug = "true" *) logic [31:0] PostSpillInstrRawF; // Fetch instruction after merge two halves of spill logic [31:0] InstrRawD; // Non-decompressed instruction in the Decode stage logic [1:0] IFURWF; // IFU alreays read IFURWF = 10 @@ -112,19 +115,17 @@ module ifu ( logic [31:0] NextInstrD, NextInstrE; // Instruction into the next stage after possible stage flush - logic CacheableF; - logic [`XLEN-1:0] PCNextFSpill; - logic [`XLEN-1:0] PCFSpill; - logic SelNextSpillF; - logic ICacheFetchLine; - logic BusStall; - logic ICacheStallF, IFUCacheBusStallD; - logic GatedStallD; -(* mark_debug = "true" *) logic [31:0] PostSpillInstrRawF; + logic CacheableF; // PMA indicates isntruction address is cacheable + logic SelNextSpillF; // In a spill, stall pipeline and gate local stallF + logic BusStall; // Bus interface busy with multicycle operation + logic ICacheStallF; // I$ busy with multicycle operation + logic IFUCacheBusStallD; // EIther I$ or bus busy with multicycle operation + logic GatedStallD; // StallD gated by selected next spill // branch predictor signal - logic [`XLEN-1:0] PCNext1F, PCNext0F; - logic BusCommittedF, CacheCommittedF; - logic SelIROM; + logic [`XLEN-1:0] PCNext1F; // Branch predictor next PCF + logic BusCommittedF; // Bus memory operation in flight, delay interrupts + logic CacheCommittedF; // I$ memory operation started, delay interrupts + logic SelIROM; // PMA indicates instruction address is in the IROM assign PCFExt = {2'b00, PCFSpill}; @@ -213,13 +214,12 @@ module ifu ( localparam integer LOGBWPL = `ICACHE ? $clog2(WORDSPERLINE) : 1; if(`ICACHE) begin : icache localparam integer LINELEN = `ICACHE ? `ICACHE_LINELENINBITS : `XLEN; - localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) + localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) logic [LINELEN-1:0] FetchBuffer; logic [`PA_BITS-1:0] ICacheBusAdr; logic ICacheBusAck; logic [1:0] CacheBusRW, BusRW, CacheRWF; - //assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{CacheableF, CacheableF} & ~{SelIROM, SelIROM}; assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : '0; assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0; cache #(.LINELEN(`ICACHE_LINELENINBITS), @@ -268,8 +268,7 @@ module ifu ( if(`IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(FetchBuffer, IROMInstrF, SelIROM, InstrRawF); else assign InstrRawF = FetchBuffer; assign IFUHBURST = 3'b0; - assign {ICacheFetchLine, ICacheStallF} = '0; - assign {ICacheMiss, ICacheAccess} = '0; + assign {ICacheMiss, ICacheAccess, ICacheStallF} = '0; end end else begin : nobus // block: bus assign {BusStall, CacheCommittedF} = '0; @@ -335,7 +334,6 @@ module ifu ( mux2 #(`XLEN) pcmux1(.d0(PCPlus2or4F), .d1(IEUAdrE), .s(PCSrcE), .y(PCNext1F)); assign BPPredWrongE = PCSrcE; assign {InstrClassM, DirPredictionWrongM, BTBPredPCWrongM, RASPredPCWrongM, PredictionInstrClassWrongM} = '0; - assign PCNext0F = PCPlus2or4F; assign NextValidPCE = PCE; end diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 0ebd84f8..d84334a8 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -103,7 +103,7 @@ module lsu ( logic GatedStallW; // Hazard unit StallW gated when SelHPTW = 1 - logic DCacheStallW; // D$ busy with multicycle operation + logic DCacheStallM; // D$ busy with multicycle operation logic BusStall; // Bus interface busy with multicycle operation logic HPTWStall; // HPTW busy with multicycle operation @@ -152,7 +152,7 @@ module lsu ( if(`VIRTMEM_SUPPORTED) begin : VIRTMEM_SUPPORTED hptw hptw(.clk, .reset, .MemRWM, .AtomicM, .ITLBMissF, .ITLBWriteF, .DTLBMissM, .DTLBWriteM, .InstrDAPageFaultF, .DataDAPageFaultM, - .FlushW, .DCacheStallW, .SATP_REGW, .PCF, + .FlushW, .DCacheStallM, .SATP_REGW, .PCF, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .ReadDataM(ReadDataM[`XLEN-1:0]), // ReadDataM is LLEN, but HPTW only needs XLEN .WriteDataM, .Funct3M, .LSUFunct3M, .Funct7M, .LSUFunct7M, @@ -179,7 +179,7 @@ module lsu ( // the trap module. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; - assign LSUStallM = DCacheStallW | HPTWStall | BusStall; + assign LSUStallM = DCacheStallM | HPTWStall | BusStall; ///////////////////////////////////////////////////////////////////////////////////////////// // MMU and misalignment fault logic required if privileged unit exists @@ -267,7 +267,7 @@ module lsu ( .FlushCache(FlushDCacheM), .NextAdr(IEUAdrE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataM), .SelHPTW, - .CacheStall(DCacheStallW), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), + .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), .FetchBuffer, .CacheBusRW, @@ -307,14 +307,14 @@ module lsu ( if(`DTIM_SUPPORTED) mux2 #(`XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM, SelDTIM, ReadDataWordMuxM); else assign ReadDataWordMuxM = FetchBuffer[`XLEN-1:0]; assign LSUHBURST = 3'b0; - assign {DCacheStallW, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0; + assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0; end end else begin: nobus // block: bus, only DTIM assign LSUHWDATA = '0; assign ReadDataWordMuxM = DTIMReadDataWordM; assign {BusStall, BusCommittedM} = '0; assign {DCacheMiss, DCacheAccess} = '0; - assign {DCacheStallW, DCacheCommittedM} = '0; + assign {DCacheStallM, DCacheCommittedM} = '0; end ///////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv index e7fdc416..5e798eed 100644 --- a/pipelined/src/mmu/hptw.sv +++ b/pipelined/src/mmu/hptw.sv @@ -43,7 +43,7 @@ module hptw ( input logic [1:0] PrivilegeModeW, input logic [`XLEN-1:0] ReadDataM, // page table entry from LSU input logic [`XLEN-1:0] WriteDataM, - input logic DCacheStallW, // stall from LSU + input logic DCacheStallM, // stall from LSU input logic [2:0] Funct3M, input logic [6:0] Funct7M, input logic ITLBMissF, @@ -114,7 +114,7 @@ module hptw ( // State flops flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrDAFaultM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) - assign PRegEn = HPTWRW[1] & ~DCacheStallW | UpdatePTE; + assign PRegEn = HPTWRW[1] & ~DCacheStallM | UpdatePTE; flopenr #(`XLEN) PTEReg(clk, reset, PRegEn, NextPTE, PTE); // Capture page table entry from data cache // Assign PTE descriptors common across all XLEN values @@ -248,24 +248,24 @@ module hptw ( IDLE: if (TLBMiss) NextWalkerState = InitialWalkerState; else NextWalkerState = IDLE; L3_ADR: NextWalkerState = L3_RD; // first access in SV48 - L3_RD: if (DCacheStallW) NextWalkerState = L3_RD; + L3_RD: if (DCacheStallM) NextWalkerState = L3_RD; else NextWalkerState = L2_ADR; L2_ADR: if (InitialWalkerState == L2_ADR | ValidNonLeafPTE) NextWalkerState = L2_RD; // first access in SV39 else NextWalkerState = LEAF; - L2_RD: if (DCacheStallW) NextWalkerState = L2_RD; + L2_RD: if (DCacheStallM) NextWalkerState = L2_RD; else NextWalkerState = L1_ADR; L1_ADR: if (InitialWalkerState == L1_ADR | ValidNonLeafPTE) NextWalkerState = L1_RD; // first access in SV32 else if (ValidNonLeafPTE) NextWalkerState = L1_RD; else NextWalkerState = LEAF; - L1_RD: if (DCacheStallW) NextWalkerState = L1_RD; + L1_RD: if (DCacheStallM) NextWalkerState = L1_RD; else NextWalkerState = L0_ADR; L0_ADR: if (ValidNonLeafPTE) NextWalkerState = L0_RD; else NextWalkerState = LEAF; - L0_RD: if (DCacheStallW) NextWalkerState = L0_RD; + L0_RD: if (DCacheStallM) NextWalkerState = L0_RD; else NextWalkerState = LEAF; LEAF: if (`HPTW_WRITES_SUPPORTED & DAPageFault) NextWalkerState = UPDATE_PTE; else NextWalkerState = IDLE; - UPDATE_PTE: if(DCacheStallW) NextWalkerState = UPDATE_PTE; + UPDATE_PTE: if(DCacheStallM) NextWalkerState = UPDATE_PTE; else NextWalkerState = LEAF; default: NextWalkerState = IDLE; // should never be reached endcase // case (WalkerState) From d3df8e062ed8dfb09e9817bb231e46ef315b897f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 12:41:57 -0600 Subject: [PATCH 08/12] Formatting. --- pipelined/src/cache/cache.sv | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 62fc3a1d..9cf0a41f 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -6,6 +6,8 @@ // // Purpose: Storage for data and meta data. // +// Documentation: RISC-V System on Chip Design Chapter 7 (Figures 7.9, 7.11, and 7.20) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -63,11 +65,11 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE localparam SETLEN = $clog2(NUMLINES); // Number of set bits localparam SETTOP = SETLEN+OFFSETLEN; // Number of set plus offset bits localparam TAGLEN = `PA_BITS - SETTOP; // Number of tag bits - localparam WORDSPERLINE = LINELEN/WORDLEN; // Number of words in cache line + localparam CACHEWORDSPERLINE = LINELEN/WORDLEN;// Number of words in cache line + localparam LOGCWPL = $clog2(CACHEWORDSPERLINE);// Log2 of ^ localparam FLUSHADRTHRESHOLD = NUMLINES - 1; // Used to determine when flush is complete localparam LOGLLENBYTES = $clog2(WORDLEN/8); // Number of bits to address a word - localparam CACHEWORDSPERLINE = `DCACHE_LINELENINBITS/WORDLEN; // *** see if this is the same as WORDSPERLINE - localparam LOGCWPL = $clog2(CACHEWORDSPERLINE); // *** + logic SelAdr; logic [1:0] AdrSelMuxSel; From 3d202ed2fd3c5d3bcf94663f8a9497eba6958028 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 12:49:55 -0600 Subject: [PATCH 09/12] Reformatting cachefsm. --- pipelined/src/cache/cache.sv | 2 +- pipelined/src/cache/cachefsm.sv | 56 +++++++++++++++------------------ 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 9cf0a41f..59e9435a 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -55,7 +55,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE input logic SelBusBeat, // Word in cache line comes from BeatCount input logic [LOGBWPL-1:0] BeatCount, // Beat in burst input logic [LINELEN-1:0] FetchBuffer, // Buffer long enough to hold entire cache line arriving from bus - output logic [1:0] CacheBusRW, // [1] Read or [0] write bus + output logic [1:0] CacheBusRW, // [1] Read (cache line fetch) or [0] write bus (cache line writeback) output logic [`PA_BITS-1:0] CacheBusAdr // Address for bus access ); diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 5dba257d..3afb5858 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -29,30 +29,28 @@ module cachefsm ( input logic clk, input logic reset, + // hazard and privilege unit + input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY + input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) + output logic CacheCommitted, // Cache has started bus operation that shouldn't be interrupted + output logic CacheStall, // Cache stalls pipeline during multicycle operation // inputs from IEU - input logic FlushStage, - input logic [1:0] CacheRW, - input logic [1:0] CacheAtomic, - input logic FlushCache, - input logic InvalidateCache, - // hazard inputs - input logic Stall, - // Bus inputs - input logic CacheBusAck, - // dcache internals - input logic CacheHit, - input logic LineDirty, - input logic FlushAdrFlag, - input logic FlushWayFlag, - - // hazard outputs - output logic CacheStall, - // counter outputs - output logic CacheMiss, - output logic CacheAccess, + input logic [1:0] CacheRW, // [1] Read, [0] Write + input logic [1:0] CacheAtomic, // Atomic operation + input logic FlushCache, // Flush all dirty lines back to memory + input logic InvalidateCache, // Clear all valid bits + // cache internals + input logic CacheHit, // Exactly 1 way hits + input logic LineDirty, // The selected line and way is dirty + input logic FlushAdrFlag, // On last set of a cache flush + input logic FlushWayFlag, // On the last way for any set of a cache flush + // Bus controls + input logic CacheBusAck, // Bus operation completed + output logic [1:0] CacheBusRW, // [1] Read (cache line fetch) or [0] write bus (cache line writeback) + // performance counter outputs + output logic CacheMiss, // Cache miss + output logic CacheAccess, // Cache access // Bus outputs - output logic CacheCommitted, - output logic [1:0] CacheBusRW, // dcache internals output logic SelAdr, @@ -114,8 +112,6 @@ module cachefsm ( case (CurrState) STATE_READY: if(InvalidateCache) NextState = STATE_READY; else if(FlushCache) NextState = STATE_FLUSH; - // Delayed LRU update. Cannot check if victim line is dirty on this cycle. - // To optimize do the fetch first, then eviction if necessary. else if(AnyMiss & ~LineDirty) NextState = STATE_FETCH; else if(AnyMiss & LineDirty) NextState = STATE_WRITEBACK; else NextState = STATE_READY; @@ -128,11 +124,11 @@ module cachefsm ( else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; - else if (FlushFlag) NextState = STATE_READ_HOLD; - else NextState = STATE_FLUSH; - STATE_FLUSH_WRITEBACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; - else NextState = STATE_FLUSH_WRITEBACK; + else if (FlushFlag) NextState = STATE_READ_HOLD; + else NextState = STATE_FLUSH; + STATE_FLUSH_WRITEBACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH; + else if(CacheBusAck) NextState = STATE_READ_HOLD; + else NextState = STATE_FLUSH_WRITEBACK; default: NextState = STATE_READY; endcase end @@ -174,7 +170,7 @@ module cachefsm ( assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & LineDirty) | (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck); - // **** can this be simplified? + assign SelAdr = (CurrState == STATE_READY & (StoreAMO | AnyMiss)) | // changes if store delay hazard removed (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | From ecceea177a6e32a78a0f291df8b026a9a4d957ee Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 13:05:10 -0600 Subject: [PATCH 10/12] Formatting. --- pipelined/src/cache/cache.sv | 7 ++--- pipelined/src/cache/cacheLRU.sv | 34 ++++++++++++++----------- pipelined/src/cache/cachefsm.sv | 45 +++++++++++++++++---------------- 3 files changed, 46 insertions(+), 40 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 59e9435a..4761241d 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -1,10 +1,11 @@ /////////////////////////////////////////// // cache // -// Written: ross1728@gmail.com July 07, 2021 -// Implements the L1 instruction/data cache +// Written: Ross Thompson ross1728@gmail.com +// Created: 7 July 2021 +// Modified: 20 January 2023 // -// Purpose: Storage for data and meta data. +// Purpose: Implements the I$ and D$. Interfaces with requests from IEU and HPTW and ahbcacheinterface // // Documentation: RISC-V System on Chip Design Chapter 7 (Figures 7.9, 7.11, and 7.20) // diff --git a/pipelined/src/cache/cacheLRU.sv b/pipelined/src/cache/cacheLRU.sv index 47d5cf6a..8dfce679 100644 --- a/pipelined/src/cache/cacheLRU.sv +++ b/pipelined/src/cache/cacheLRU.sv @@ -1,10 +1,13 @@ /////////////////////////////////////////// // dcache (data cache) // -// Written: ross1728@gmail.com July 20, 2021 -// Implements Pseudo LRU -// Tested for Powers of 2. +// Written: Ross Thompson ross1728@gmail.com +// Created: 20 July 2021 +// Modified: 20 January 2023 // +// Purpose: Implements Pseudo LRU. Tested for Powers of 2. +// +// Documentation: RISC-V System on Chip Design Chapter 7 (Figures 7.8 and 7.16 to 7.19) // // A component of the CORE-V-WALLY configurable RISC-V project. // @@ -28,18 +31,19 @@ module cacheLRU #(parameter NUMWAYS = 4, SETLEN = 9, OFFSETLEN = 5, NUMLINES = 128) ( - input logic clk, reset, - input logic CacheEn, - input logic FlushStage, - input logic [NUMWAYS-1:0] HitWay, - input logic [NUMWAYS-1:0] ValidWay, - input logic [SETLEN-1:0] CAdr, - input logic [SETLEN-1:0] PAdr, - input logic LRUWriteEn, - input logic SetValid, - input logic InvalidateCache, - input logic FlushCache, - output logic [NUMWAYS-1:0] VictimWay + input logic clk, + input logic reset, + input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) + input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant + input logic [NUMWAYS-1:0] HitWay, // Which way is valid and matches PAdr's tag + input logic [NUMWAYS-1:0] ValidWay, // Which ways for a particular set are valid, ignores tag + input logic [SETLEN-1:0] CAdr, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr + input logic [SETLEN-1:0] PAdr, // Physical address + input logic LRUWriteEn, // Update the LRU state + input logic SetValid, // Set the dirty bit in the selected way and set + input logic InvalidateCache, // Clear all valid bits + input logic FlushCache, // Flush all dirty lines back to memory + output logic [NUMWAYS-1:0] VictimWay // LRU selects a victim to evict ); localparam LOGNUMWAYS = $clog2(NUMWAYS); diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 3afb5858..4f209c3b 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -1,11 +1,14 @@ /////////////////////////////////////////// // dcache (data cache) fsm // -// Written: ross1728@gmail.com August 25, 2021 -// Implements the L1 data cache fsm +// Written: Ross Thompson ross1728@gmail.com +// Created: 25 August 2021 +// Modified: 20 January 2023 // // Purpose: Controller for the dcache fsm // +// Documentation: RISC-V System on Chip Design Chapter 7 (Figure 7.15 and Table 7.1) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -39,33 +42,31 @@ module cachefsm ( input logic [1:0] CacheAtomic, // Atomic operation input logic FlushCache, // Flush all dirty lines back to memory input logic InvalidateCache, // Clear all valid bits - // cache internals - input logic CacheHit, // Exactly 1 way hits - input logic LineDirty, // The selected line and way is dirty - input logic FlushAdrFlag, // On last set of a cache flush - input logic FlushWayFlag, // On the last way for any set of a cache flush // Bus controls input logic CacheBusAck, // Bus operation completed output logic [1:0] CacheBusRW, // [1] Read (cache line fetch) or [0] write bus (cache line writeback) // performance counter outputs output logic CacheMiss, // Cache miss output logic CacheAccess, // Cache access - // Bus outputs - // dcache internals - output logic SelAdr, - output logic ClearValid, - output logic ClearDirty, - output logic SetDirty, - output logic SetValid, - output logic SelWriteback, - output logic LRUWriteEn, - output logic SelFlush, - output logic FlushAdrCntEn, - output logic FlushWayCntEn, - output logic FlushCntRst, - output logic SelFetchBuffer, - output logic CacheEn + // cache internals + input logic CacheHit, // Exactly 1 way hits + input logic LineDirty, // The selected line and way is dirty + input logic FlushAdrFlag, // On last set of a cache flush + input logic FlushWayFlag, // On the last way for any set of a cache flush + output logic SelAdr, // [0] SRAM reads from NextAdr, [1] SRAM reads from PAdr + output logic ClearValid, // Clear the valid bit in the selected way and set + output logic ClearDirty, // Clear the dirty bit in the selected way and set + output logic SetValid, // Set the dirty bit in the selected way and set + output logic SetDirty, // Set the dirty bit in the selected way and set + output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback + output logic LRUWriteEn, // Update the LRU state + output logic SelFlush, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr + output logic FlushAdrCntEn, // Enable the counter for Flush Adr + output logic FlushWayCntEn, // Enable the way counter during a flush + output logic FlushCntRst, // Reset both flush counters + output logic SelFetchBuffer, // Bypass the SRAM for a load hit by directly using the read data from the ahbcacheinterface's FetchBuffer + output logic CacheEn // Enable the cache memory arrays. Disable hold read data constant ); logic resetDelay; From bcadbd710425a364f0b0e851b780bd56283ec501 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 13:09:42 -0600 Subject: [PATCH 11/12] Formatting. --- pipelined/src/cache/cachefsm.sv | 2 +- pipelined/src/cache/cacheway.sv | 40 ++++++++++++++++----------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 4f209c3b..2f9bca06 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -56,8 +56,8 @@ module cachefsm ( input logic FlushWayFlag, // On the last way for any set of a cache flush output logic SelAdr, // [0] SRAM reads from NextAdr, [1] SRAM reads from PAdr output logic ClearValid, // Clear the valid bit in the selected way and set - output logic ClearDirty, // Clear the dirty bit in the selected way and set output logic SetValid, // Set the dirty bit in the selected way and set + output logic ClearDirty, // Clear the dirty bit in the selected way and set output logic SetDirty, // Set the dirty bit in the selected way and set output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback output logic LRUWriteEn, // Update the LRU state diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index cb4b343b..96e30245 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -29,28 +29,28 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, OFFSETLEN = 5, INDEXLEN = 9, DIRTY_BITS = 1) ( input logic clk, - input logic CacheEn, input logic reset, - input logic [$clog2(NUMLINES)-1:0] CAdr, - input logic [`PA_BITS-1:0] PAdr, - input logic [LINELEN-1:0] LineWriteData, - input logic SetValid, - input logic ClearValid, - input logic SetDirty, - input logic ClearDirty, - input logic SelWriteback, - input logic SelFlush, - input logic VictimWay, - input logic FlushWay, - input logic InvalidateCache, - input logic FlushStage, - input logic [LINELEN/8-1:0] LineByteMask, + input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) + input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant + input logic [$clog2(NUMLINES)-1:0] CAdr, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr + input logic [`PA_BITS-1:0] PAdr, // Physical address + input logic [LINELEN-1:0] LineWriteData, // Final data written to cache (D$ only) + input logic SetValid, // Set the dirty bit in the selected way and set + input logic ClearValid, // Clear the valid bit in the selected way and set + input logic SetDirty, // Set the dirty bit in the selected way and set + input logic ClearDirty, // Clear the dirty bit in the selected way and set + input logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback + input logic SelFlush, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr + input logic VictimWay, // LRU selected this way as victim to evict + input logic FlushWay, // This way is selected for flush and possible writeback if dirty + input logic InvalidateCache,//Clear all valid bits + input logic [LINELEN/8-1:0] LineByteMask, // Final byte enables to cache (D$ only) - output logic [LINELEN-1:0] ReadDataLineWay, - output logic HitWay, - output logic ValidWay, - output logic DirtyWay, - output logic [TAGLEN-1:0] TagWay); + output logic [LINELEN-1:0] ReadDataLineWay,// This way's read data if valid + output logic HitWay, // This way hits + output logic ValidWay, // This way is valid + output logic DirtyWay, // This way is dirty + output logic [TAGLEN-1:0] TagWay); // THis way's tag if valid localparam integer WORDSPERLINE = LINELEN/`XLEN; localparam integer BYTESPERLINE = LINELEN/8; From 2e9b5f9ae4c8c457955f899cb0c23c4f1ba93690 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 20 Jan 2023 13:13:05 -0600 Subject: [PATCH 12/12] Formatting. --- pipelined/src/cache/cacheway.sv | 7 +++++-- pipelined/src/cache/subcachelineread.sv | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 96e30245..d5fc0b7d 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -1,11 +1,14 @@ /////////////////////////////////////////// // cacheway // -// Written: ross1728@gmail.com July 07, 2021 -// Implements the data, tag, valid, dirty, and replacement bits. +// Written: Ross Thompson ross1728@gmail.com +// Created: 7 July 2021 +// Modified: 20 January 2023 // // Purpose: Storage and read/write access to data cache data, tag valid, dirty, and replacement. // +// Documentation: RISC-V System on Chip Design Chapter 7 (Figure 7.12) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University diff --git a/pipelined/src/cache/subcachelineread.sv b/pipelined/src/cache/subcachelineread.sv index a963791b..346ec710 100644 --- a/pipelined/src/cache/subcachelineread.sv +++ b/pipelined/src/cache/subcachelineread.sv @@ -1,11 +1,14 @@ /////////////////////////////////////////// // subcachelineread // -// Written: Ross Thompson ross1728@gmail.com February 04, 2022 -// Muxes the cache line downto the word size. Also include possilbe save/restore registers/muxes. +// Written: Ross Thompson ross1728@gmail.com +// Created: 4 February 2022 +// Modified: 20 January 2023 // -// Purpose: Controller for the dcache fsm +// Purpose: Muxes the cache line downto the word size. Also include possilbe save/restore registers/muxes. // +// Documentation: RISC-V System on Chip Design Chapter 7 + // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -26,10 +29,12 @@ `include "wally-config.vh" -module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL)( - input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr, - input logic [LINELEN-1:0] ReadDataLine, - output logic [WORDLEN-1:0] ReadDataWord +module subcachelineread #(parameter LINELEN, WORDLEN, + parameter MUXINTERVAL // The number of bits between mux. Set to 16 for I$ to support compressed. Set to `LLEN for D$ +)( + input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr, // Physical address + input logic [LINELEN-1:0] ReadDataLine,// Read data of the whole cacheline + output logic [WORDLEN-1:0] ReadDataWord // read data of selected word. ); localparam WORDSPERLINE = LINELEN/MUXINTERVAL;