diff --git a/pipelined/src/ebu/ahbcacheinterface.sv b/pipelined/src/ebu/ahbcacheinterface.sv index 8b9e29e8..a127e0fa 100644 --- a/pipelined/src/ebu/ahbcacheinterface.sv +++ b/pipelined/src/ebu/ahbcacheinterface.sv @@ -1,14 +1,13 @@ /////////////////////////////////////////// // ahbcacheinterface.sv // -// Written: Ross Thompson ross1728@gmail.com August 29, 2022 -// Modified: +// Written: Ross Thompson ross1728@gmail.com +// Created: August 29, 2022 +// Modified: 18 January 2023 // -// Purpose: Cache/Bus data path. -// Bus Side logic -// register the fetch data from the next level of memory. -// This register should be necessary for timing. There is no register in the uncore or -// ahblite controller between the memories and this cache. +// Purpose: Translates cache bus requests and uncached ieu memory requests into AHB transactions. +// +// Documentation: RISC-V System on Chip Design Chapter 9 (Figure 9.8) // // A component of the CORE-V-WALLY configurable RISC-V project. // @@ -30,7 +29,12 @@ `include "wally-config.vh" -module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, LLENPOVERAHBW) ( +module ahbcacheinterface #( + parameter integer BEATSPERLINE, // Number of AHBW words (beats) in cacheline + parameter integer AHBWLOGBWPL, // Log2 of ^ + parameter integer LINELEN, // Number of bits in cacheline + parameter integer LLENPOVERAHBW // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) +)( input logic HCLK, HRESETn, // bus interface controls input logic HREADY, // AHB peripheral ready @@ -52,7 +56,7 @@ module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, LLENPOVERAHB input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch output logic CacheBusAck, // Handshack to $ indicating bus transaction completed output logic [LINELEN-1:0] FetchBuffer, // Register to hold beats of cache line as the arrive from bus - output logic [LOGWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase + output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr // uncached interface @@ -70,7 +74,7 @@ module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, LLENPOVERAHB localparam integer BeatCountThreshold = BEATSPERLINE - 1; // Largest beat index logic [`PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation - logic [LOGWPL-1:0] BeatCountDelayed; // Beat within the cache line in the second (Data) cache stage + logic [AHBWLOGBWPL-1:0] BeatCountDelayed; // Beat within the cache line in the second (Data) cache stage logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s logic [`AHBW-1:0] PreHWDATA; // AHB Address phase write data @@ -86,7 +90,7 @@ module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, LLENPOVERAHB end mux2 #(`PA_BITS) localadrmux(PAdr, CacheBusAdr, Cacheable, LocalHADDR); - assign HADDR = ({{`PA_BITS-LOGWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR; + assign HADDR = ({{`PA_BITS-AHBWLOGBWPL{1'b0}}, BeatCount} << $clog2(`AHBW/8)) + LocalHADDR; mux2 #(3) sizemux(.d0(Funct3), .d1(`AHBW == 32 ? 3'b010 : 3'b011), .s(Cacheable), .y(HSIZE)); @@ -111,7 +115,7 @@ module ahbcacheinterface #(parameter BEATSPERLINE, LINELEN, LOGWPL, LLENPOVERAHB flopen #(`AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[`AHBW/8-1:0], HWSTRB); - buscachefsm #(BeatCountThreshold, LOGWPL) AHBBuscachefsm( + buscachefsm #(BeatCountThreshold, AHBWLOGBWPL) AHBBuscachefsm( .HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, .CacheBusRW, .CacheBusAck, .BeatCount, .BeatCountDelayed, .HREADY, .HTRANS, .HWRITE, .HBURST); diff --git a/pipelined/src/ebu/ahbinterface.sv b/pipelined/src/ebu/ahbinterface.sv index 63449fe8..ff50f54f 100644 --- a/pipelined/src/ebu/ahbinterface.sv +++ b/pipelined/src/ebu/ahbinterface.sv @@ -1,15 +1,14 @@ /////////////////////////////////////////// // ahbinterface.sv // -// Written: Ross Thompson ross1728@gmail.com August 29, 2022 -// Modified: +// Written: Ross Thompson ross1728@gmail.com +// Created: August 29, 2022 +// Modified: 18 January 2023 // -// Purpose: Cache/Bus data path. -// Bus Side logic -// register the fetch data from the next level of memory. -// This register should be necessary for timing. There is no register in the uncore or -// ahblite controller between the memories and this cache. +// Purpose: Translates LSU simple memory requests into AHB transactions (NON_SEQ). // +// Documentation: RISC-V System on Chip Design Chapter 6 (Figure 6.21) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -30,7 +29,9 @@ `include "wally-config.vh" -module ahbinterface #(parameter LSU = 0) ( // **** modify to use LSU/ifu parameter to control widths of buses +module ahbinterface #( + parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits +)( input logic HCLK, HRESETn, // bus interface input logic HREADY, // AHB peripheral ready diff --git a/pipelined/src/ebu/buscachefsm.sv b/pipelined/src/ebu/buscachefsm.sv index d9952a45..97002cab 100644 --- a/pipelined/src/ebu/buscachefsm.sv +++ b/pipelined/src/ebu/buscachefsm.sv @@ -1,11 +1,14 @@ /////////////////////////////////////////// // busfsm.sv // -// Written: Ross Thompson ross1728@gmail.com December 29, 2021 -// Modified: +// Written: Ross Thompson ross1728@gmail.com +// Created: December 29, 2021 +// Modified: 18 January 2023 // -// Purpose: Load/Store Unit's interface to BUS for cacheless system +// Purpose: Controller for cache to AHB bus interface // +// Documentation: RISC-V System on Chip Design Chapter 9 (Figure 9.9) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -25,37 +28,40 @@ //////////////////////////////////////////////////////////////////////////////////////////////// `include "wally-config.vh" -`define BURST_EN 1 +`define BURST_EN 1 // Enables burst mode. Disable to show the lost performance. // HCLK and clk must be the same clock! -module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) ( +module buscachefsm #( + parameter integer BeatCountThreshold, // Largest beat index + parameter integer AHBWLOGBWPL // Log2 of BEATSPERLINE +)( input logic HCLK, input logic HRESETn, // IEU interface - input logic Stall, // Core pipeline is stalled - input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting - input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write - output logic BusStall, // Bus is busy with an in flight memory operation - output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt - - // ahb cache interface locals. - output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA - - // cache interface - input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch - output logic CacheBusAck, // Handshack to $ indicating bus transaction completed + input logic Stall, // Core pipeline is stalled + input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting + input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write + output logic BusStall, // Bus is busy with an in flight memory operation + output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt + + // ahb cache interface locals. + output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA + + // cache interface + input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch + output logic CacheBusAck, // Handshack to $ indicating bus transaction completed // lsu interface - output logic [LOGWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase - output logic [LOGWPL-1:0] BeatCountDelayed, // Beat within the cache line in the second (Data) cache stage - output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr + output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase + output logic [AHBWLOGBWPL-1:0] BeatCountDelayed, // Beat within the cache line in the second (Data) cache stage + output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr // BUS interface - input logic HREADY, // AHB peripheral ready - output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ - output logic HWRITE, // AHB 0: Read operation 1: Write operation - output logic [2:0] HBURST // AHB burst length + input logic HREADY, // AHB peripheral ready + output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ + output logic HWRITE, // AHB 0: Read operation 1: Write operation + output logic [2:0] HBURST // AHB burst length ); typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype; @@ -63,7 +69,7 @@ module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) ( (* mark_debug = "true" *) busstatetype CurrState, NextState; - logic [LOGWPL-1:0] NextBeatCount; + logic [AHBWLOGBWPL-1:0] NextBeatCount; logic FinalBeatCount; logic [2:0] LocalBurstType; logic BeatCntEn; @@ -76,14 +82,14 @@ module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) ( always_comb begin case(CurrState) - ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; + ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK; else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; else NextState = ADR_PHASE; DATA_PHASE: if(HREADY) NextState = MEM3; - else NextState = DATA_PHASE; + else NextState = DATA_PHASE; MEM3: if(Stall) NextState = MEM3; - else NextState = ADR_PHASE; + else NextState = ADR_PHASE; CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; @@ -92,17 +98,17 @@ module buscachefsm #(parameter integer BeatCountThreshold, LOGWPL) ( else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_WRITEBACK; - default: NextState = ADR_PHASE; + default: NextState = ADR_PHASE; endcase end // IEU, LSU, and IFU controls // Used to store data from data phase of AHB. - flopenr #(LOGWPL) BeatCountReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, NextBeatCount, BeatCount); - flopenr #(LOGWPL) BeatCountDelayedReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, BeatCount, BeatCountDelayed); + flopenr #(AHBWLOGBWPL) BeatCountReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, NextBeatCount, BeatCount); + flopenr #(AHBWLOGBWPL) BeatCountDelayedReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, BeatCount, BeatCountDelayed); assign NextBeatCount = BeatCount + 1'b1; - assign FinalBeatCount = BeatCountDelayed == BeatCountThreshold[LOGWPL-1:0]; + assign FinalBeatCount = BeatCountDelayed == BeatCountThreshold[AHBWLOGBWPL-1:0]; assign BeatCntEn = ((NextState == CACHE_WRITEBACK | NextState == CACHE_FETCH) & HREADY & ~Flush) | (NextState == ADR_PHASE & |CacheBusRW & HREADY); assign BeatCntReset = NextState == ADR_PHASE; diff --git a/pipelined/src/ebu/busfsm.sv b/pipelined/src/ebu/busfsm.sv index 1e8af036..83025322 100644 --- a/pipelined/src/ebu/busfsm.sv +++ b/pipelined/src/ebu/busfsm.sv @@ -1,10 +1,13 @@ /////////////////////////////////////////// // busfsm.sv // -// Written: Ross Thompson ross1728@gmail.com December 29, 2021 -// Modified: +// Written: Ross Thompson ross1728@gmail.com +// Created: December 29, 2021 +// Modified: 18 January 2023 // -// Purpose: Load/Store Unit's interface to BUS for cacheless system +// Purpose: Simple NON_SEQ (no burst) AHB controller. +// +// Documentation: RISC-V System on Chip Design Chapter 6 (Figure 6.23) // // A component of the CORE-V-WALLY configurable RISC-V project. // @@ -55,13 +58,13 @@ module busfsm ( always_comb begin case(CurrState) - ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; - else NextState = ADR_PHASE; - DATA_PHASE: if(HREADY) NextState = MEM3; - else NextState = DATA_PHASE; - MEM3: if(Stall) NextState = MEM3; - else NextState = ADR_PHASE; - default: NextState = ADR_PHASE; + ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; + else NextState = ADR_PHASE; + DATA_PHASE: if(HREADY) NextState = MEM3; + else NextState = DATA_PHASE; + MEM3: if(Stall) NextState = MEM3; + else NextState = ADR_PHASE; + default: NextState = ADR_PHASE; endcase end diff --git a/pipelined/src/ebu/controllerinputstage.sv b/pipelined/src/ebu/controllerinputstage.sv index e90206ec..681f12bc 100644 --- a/pipelined/src/ebu/controllerinputstage.sv +++ b/pipelined/src/ebu/controllerinputstage.sv @@ -1,17 +1,18 @@ /////////////////////////////////////////// // controller input stage // -// Written: Ross Thompson August 31, 2022 -// ross1728@gmail.com -// Modified: +// Written: Ross Thompson ross1728@gmail.com +// Created: August 31, 2022 +// Modified: 18 January 2023 // // Purpose: AHB multi controller interface to merge LSU and IFU controls. // See ARM_HIH0033A_AMBA_AHB-Lite_SPEC 1.0 // Arbitrates requests from instruction and data streams // Connects core to peripherals and I/O pins on SOC // Bus width presently matches XLEN -// Anticipate replacing this with an AXI bus interface to communicate with FPGA DRAM/Flash controllers // +// Documentation: RISC-V System on Chip Design Chapter 6 (Figure 6.25) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -32,25 +33,29 @@ `include "wally-config.vh" -module controllerinputstage #(parameter SAVE_ENABLED = 1) ( - input logic HCLK, - input logic HRESETn, - input logic Save, Restore, Disable, - output logic Request, +module controllerinputstage #( + parameter SAVE_ENABLED = 1 // 1: Save manager inputs if Save = 1, 0: Don't save inputs +)( + input logic HCLK, + input logic HRESETn, + input logic Save, // Two or more managers requesting (HTRANS != 00) at the same time. Save the non-granted manager inputs + input logic Restore, // Restore a saved manager inputs when it is finally granted + input logic Disable, // Supress HREADY to the non-granted manager + output logic Request, // This manager is making a request // controller input - input logic HWRITEIn, - input logic [2:0] HSIZEIn, - input logic [2:0] HBURSTIn, - input logic [1:0] HTRANSIn, - input logic [`PA_BITS-1:0] HADDRIn, - output logic HREADYOut, + input logic [1:0] HTRANSIn, // Manager input. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ + input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation + input logic [2:0] HSIZEIn, // Manager input. AHB transaction width + input logic [2:0] HBURSTIn, // Manager input. AHB burst length + input logic [`PA_BITS-1:0] HADDRIn, // Manager input. AHB address + output logic HREADYOut, // Indicate to manager the peripherial is not busy and another manager does not have priority // controller output - output logic HWRITEOut, - output logic [2:0] HSIZEOut, - output logic [2:0] HBURSTOut, - output logic [1:0] HTRANSOut, - output logic [`PA_BITS-1:0] HADDROut, - input logic HREADYIn + output logic [1:0] HTRANSOut, // Aribrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ + output logic HWRITEOut, // Aribrated manager transaction. AHB 0: Read operation 1: Write operation + output logic [2:0] HSIZEOut, // Aribrated manager transaction. AHB transaction width + output logic [2:0] HBURSTOut, // Aribrated manager transaction. AHB burst length + output logic [`PA_BITS-1:0] HADDROut, // Aribrated manager transaction. AHB address + input logic HREADYIn // Peripherial ready ); logic HWRITESave; diff --git a/pipelined/src/ebu/ebu.sv b/pipelined/src/ebu/ebu.sv index f6792da3..bc162e08 100644 --- a/pipelined/src/ebu/ebu.sv +++ b/pipelined/src/ebu/ebu.sv @@ -1,17 +1,18 @@ /////////////////////////////////////////// // abhmulticontroller // -// Written: Ross Thompson August 29, 2022 -// ross1728@gmail.com -// Modified: +// Written: Ross Thompson ross1728@gmail.com +// Created: August 29, 2022 +// Modified: 18 January 2023 // // Purpose: AHB multi controller interface to merge LSU and IFU controls. // See ARM_HIH0033A_AMBA_AHB-Lite_SPEC 1.0 // Arbitrates requests from instruction and data streams // Connects core to peripherals and I/O pins on SOC // Bus width presently matches XLEN -// Anticipate replacing this with an AXI bus interface to communicate with FPGA DRAM/Flash controllers // +// Documentation: RISC-V System on Chip Design Chapter 6 (Figures 6.25 and 6.26) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -35,42 +36,46 @@ module ebu ( input logic clk, reset, // Signals from IFU - input logic [`PA_BITS-1:0] IFUHADDR, - input logic [2:0] IFUHSIZE, - input logic [2:0] IFUHBURST, - input logic [1:0] IFUHTRANS, - output logic IFUHREADY, + input logic [1:0] IFUHTRANS, // IFU AHB transaction request + input logic [2:0] IFUHSIZE, // IFU AHB transaction size + input logic [2:0] IFUHBURST, // IFU AHB burst length + input logic [`PA_BITS-1:0] IFUHADDR, // IFU AHB address + output logic IFUHREADY, // AHB peripheral ready gated by possible non-grant // Signals from LSU - input logic [`PA_BITS-1:0] LSUHADDR, + input logic [1:0] LSUHTRANS, // LSU AHB transaction request + input logic LSUHWRITE, // LSU AHB transaction direction. 1: write, 0: read + input logic [2:0] LSUHSIZE, // LSU AHB size + input logic [2:0] LSUHBURST, // LSU AHB burst length + input logic [`PA_BITS-1:0] LSUHADDR, // LSU AHB address input logic [`XLEN-1:0] LSUHWDATA, // initially support AHBW = XLEN - input logic [`XLEN/8-1:0] LSUHWSTRB, - input logic [2:0] LSUHSIZE, - input logic [2:0] LSUHBURST, - input logic [1:0] LSUHTRANS, - input logic LSUHWRITE, - output logic LSUHREADY, - // add LSUHWSTRB *** + input logic [`XLEN/8-1:0] LSUHWSTRB, // AHB byte mask + output logic LSUHREADY, // AHB peripheral. Never gated as LSU always has priority // AHB-Lite external signals - (* mark_debug = "true" *) input logic HREADY, HRESP, - (* mark_debug = "true" *) output logic HCLK, HRESETn, - (* mark_debug = "true" *) output logic [`PA_BITS-1:0] HADDR, - (* mark_debug = "true" *) output logic [`AHBW-1:0] HWDATA, - (* mark_debug = "true" *) output logic [`XLEN/8-1:0] HWSTRB, - (* mark_debug = "true" *) output logic HWRITE, - (* mark_debug = "true" *) output logic [2:0] HSIZE, - (* mark_debug = "true" *) output logic [2:0] HBURST, - (* mark_debug = "true" *) output logic [3:0] HPROT, - (* mark_debug = "true" *) output logic [1:0] HTRANS, - (* mark_debug = "true" *) output logic HMASTLOCK + (* mark_debug = "true" *) output logic HCLK, HRESETn, + (* mark_debug = "true" *) input logic HREADY, // AHB peripheral ready + (* mark_debug = "true" *) input logic HRESP, // AHB peripheral response. 0: OK 1: Error + (* mark_debug = "true" *) output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration + (* mark_debug = "true" *) output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration + (* mark_debug = "true" *) output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration + (* mark_debug = "true" *) output logic HWRITE, // AHB transaction direction after arbitration + (* mark_debug = "true" *) output logic [2:0] HSIZE, // AHB transaction size after arbitration + (* mark_debug = "true" *) output logic [2:0] HBURST, // AHB burst length after arbitration + (* mark_debug = "true" *) output logic [3:0] HPROT, // AHB protection. Wally does not use + (* mark_debug = "true" *) output logic [1:0] HTRANS, // AHB transaction request after arbitration + (* mark_debug = "true" *) output logic HMASTLOCK // AHB master lock. Wally does not use ); typedef enum logic [1:0] {IDLE, ARBITRATE} statetype; statetype CurrState, NextState; - logic LSUDisable, LSUSelect; - logic IFUSave, IFURestore, IFUDisable, IFUSelect; - logic both; + logic LSUDisable; + logic LSUSelect; + logic IFUSave; + logic IFURestore; + logic IFUDisable; + logic IFUSelect; + logic both; // Both the LSU and IFU request at the same time logic [`PA_BITS-1:0] IFUHADDROut; logic [1:0] IFUHTRANSOut; @@ -84,14 +89,15 @@ module ebu ( logic [2:0] LSUHSIZEOut; logic LSUHWRITEOut; - logic IFUReq, LSUReq; + logic IFUReq; + logic LSUReq; logic BeatCntEn; - logic [4-1:0] NextBeatCount, BeatCount; - logic FinalBeat, FinalBeatD; + logic [4-1:0] NextBeatCount, BeatCount; // Position within a burst transfer + logic FinalBeat, FinalBeatD; // Indicates the last beat of a burst logic CntReset; - logic [3:0] Threshold; - logic IFUReqD; + logic [3:0] Threshold; // Number of beats derived from HBURST + logic IFUReqD; // 1 cycle delayed IFU request. Part of arbitration assign HCLK = clk; @@ -100,14 +106,16 @@ module ebu ( // if two requests come in at once pick one to select and save the others Address phase // inputs. Abritration scheme is LSU always goes first. - // input stage IFU + //////////////////////////////////////////////////////////////////////////////////////////////////// + // input stages and muxing for IFU and LSU + //////////////////////////////////////////////////////////////////////////////////////////////////// + controllerinputstage IFUInput(.HCLK, .HRESETn, .Save(IFUSave), .Restore(IFURestore), .Disable(IFUDisable), .Request(IFUReq), .HWRITEIn(1'b0), .HSIZEIn(IFUHSIZE), .HBURSTIn(IFUHBURST), .HTRANSIn(IFUHTRANS), .HADDRIn(IFUHADDR), .HWRITEOut(IFUHWRITEOut), .HSIZEOut(IFUHSIZEOut), .HBURSTOut(IFUHBURSTOut), .HREADYOut(IFUHREADY), .HTRANSOut(IFUHTRANSOut), .HADDROut(IFUHADDROut), .HREADYIn(HREADY)); - // input stage LSU // LSU always has priority so there should never be a need to save and restore the address phase inputs. controllerinputstage #(0) LSUInput(.HCLK, .HRESETn, .Save(1'b0), .Restore(1'b0), .Disable(LSUDisable), .Request(LSUReq), @@ -115,7 +123,7 @@ module ebu ( .HWRITEOut(LSUHWRITEOut), .HSIZEOut(LSUHSIZEOut), .HBURSTOut(LSUHBURSTOut), .HTRANSOut(LSUHTRANSOut), .HADDROut(LSUHADDROut), .HREADYIn(HREADY)); - // output mux //*** rewrite for general number of controllers. + // output mux //*** switch to structural implementation assign HADDR = LSUSelect ? LSUHADDROut : IFUSelect ? IFUHADDROut : '0; assign HSIZE = LSUSelect ? LSUHSIZEOut : IFUSelect ? IFUHSIZEOut: '0; assign HBURST = LSUSelect ? LSUHBURSTOut : IFUSelect ? IFUHBURSTOut : '0; // If doing memory accesses, use LSUburst, else use Instruction burst. @@ -129,8 +137,13 @@ module ebu ( assign HWSTRB = LSUHWSTRB; // HRDATA is sent to all controllers at the core level. + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Aribtration scheme // FSM decides if arbitration needed. Arbitration is held until the last beat of // a burst is completed. + //////////////////////////////////////////////////////////////////////////////////////////////////// + assign both = LSUReq & IFUReq; flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextState, IDLE, CurrState); always_comb @@ -142,8 +155,27 @@ module ebu ( default: NextState = IDLE; endcase - // This part is only used when burst mode is supported. - // Controller needs to count beats. + // basic arb always selects LSU when both + // replace this block for more sophisticated arbitration as needed. + // Controller 0 (IFU) + assign IFUSave = CurrState == IDLE & both; + assign IFURestore = CurrState == ARBITRATE; + assign IFUDisable = CurrState == ARBITRATE; + assign IFUSelect = (NextState == ARBITRATE) ? 1'b0 : IFUReq; + // Controller 1 (LSU) + // When both the IFU and LSU request at the same time, the FSM will go into the arbitrate state. + // Once the LSU request is done the fsm returns to IDLE. To prevent the LSU from regaining + // priority and re issuing the same memroy operation, the delayed IFUReqD squashes the LSU request. + // This is necessary because the pipeline is stalled for the entire duration of both transactions, + // and the LSU memory request will stil be active. + flopr #(1) ifureqreg(clk, ~HRESETn, IFUReq, IFUReqD); + assign LSUDisable = CurrState == ARBITRATE ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeatD)); + assign LSUSelect = NextState == ARBITRATE ? 1'b1: LSUReq; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Burst mode logic + //////////////////////////////////////////////////////////////////////////////////////////////////// + flopenr #(4) BeatCountReg(HCLK, ~HRESETn | CntReset | FinalBeat, BeatCntEn, NextBeatCount, BeatCount); assign NextBeatCount = BeatCount + 1'b1; @@ -165,17 +197,6 @@ module ebu ( endcase end - // basic arb always selects LSU when both - // replace this block for more sophisticated arbitration as needed. - // Controller 0 (IFU) - assign IFUSave = CurrState == IDLE & both; - assign IFURestore = CurrState == ARBITRATE; - assign IFUDisable = CurrState == ARBITRATE; - assign IFUSelect = (NextState == ARBITRATE) ? 1'b0 : IFUReq; - // Controller 1 (LSU) - assign LSUDisable = CurrState == ARBITRATE ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeatD)); - assign LSUSelect = NextState == ARBITRATE ? 1'b1: LSUReq; - flopr #(1) ifureqreg(clk, ~HRESETn, IFUReq, IFUReqD); endmodule diff --git a/pipelined/src/ifu/bpred.sv b/pipelined/src/ifu/bpred.sv index 219d9b4e..103e7d8c 100644 --- a/pipelined/src/ifu/bpred.sv +++ b/pipelined/src/ifu/bpred.sv @@ -1,13 +1,12 @@ /////////////////////////////////////////// // bpred.sv // -// Written: Ross Thomposn -// Email: ross1728@gmail.com -// Created: February 12, 2021 -// Modified: +// Written: Ross Thomposn ross1728@gmail.com +// Created: 12 February 2021 +// Modified: 19 January 2023 // -// Purpose: Branch prediction unit -// Produces a branch prediction based on branch history. +// Purpose: Branch direction prediction and jump/branch target prediction. +// Prediction made during the fetch stage and corrected in the execution stage. // // A component of the CORE-V-WALLY configurable RISC-V project. // @@ -35,30 +34,29 @@ module bpred ( input logic FlushD, FlushE, FlushM, FlushW, // Fetch stage // the prediction - input logic [31:0] InstrD, // Decompressed decode stage instruction - input logic [`XLEN-1:0] PCNextF, // Next Fetch Address - input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4 - output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction - output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage. + input logic [31:0] InstrD, // Decompressed decode stage instruction. Used to decode instruction class + input logic [`XLEN-1:0] PCNextF, // Next Fetch Address + input logic [`XLEN-1:0] PCPlus2or4F, // PCF+2/4 + output logic [`XLEN-1:0] PCNext1F, // Branch Predictor predicted or corrected fetch address on miss prediction + output logic [`XLEN-1:0] NextValidPCE, // Address of next valid instruction after the instruction in the Memory stage. // Update Predictor - input logic [`XLEN-1:0] PCF, // Fetch stage instruction address. - input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took. - input logic [`XLEN-1:0] PCE, // Execution stage instruction address. - input logic [`XLEN-1:0] PCM, // Memory stage instruction address. + input logic [`XLEN-1:0] PCF, // Fetch stage instruction address. + input logic [`XLEN-1:0] PCD, // Decode stage instruction address. Also the address the branch predictor took. + input logic [`XLEN-1:0] PCE, // Execution stage instruction address. + input logic [`XLEN-1:0] PCM, // Memory stage instruction address. - // *** after reviewing the compressed instruction set I am leaning towards having the btb predict the instruction class. - // *** the specifics of how this is encode is subject to change. - input logic PCSrcE, // Executation stage branch is taken - input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address - input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) - output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br + // Branch and jump outcome + input logic PCSrcE, // Executation stage branch is taken + input logic [`XLEN-1:0] IEUAdrE, // The branch/jump target address + input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) + output logic [3:0] InstrClassM, // The valid instruction class. 1-hot encoded as jalr, ret, jr (not ret), j, br // Report branch prediction status - output logic BPPredWrongE, // Prediction is wrong. - output logic DirPredictionWrongM, // Prediction direction is wrong. - output logic BTBPredPCWrongM, // Prediction target wrong. - output logic RASPredPCWrongM, // RAS prediction is wrong. + output logic BPPredWrongE, // Prediction is wrong. + output logic DirPredictionWrongM, // Prediction direction is wrong. + output logic BTBPredPCWrongM, // Prediction target wrong. + output logic RASPredPCWrongM, // RAS prediction is wrong. output logic PredictionInstrClassWrongM // Class prediction is wrong. ); diff --git a/pipelined/src/ifu/decompress.sv b/pipelined/src/ifu/decompress.sv index aec215f6..190ff77f 100644 --- a/pipelined/src/ifu/decompress.sv +++ b/pipelined/src/ifu/decompress.sv @@ -1,10 +1,15 @@ /////////////////////////////////////////// // decompress.sv // -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 // // Purpose: Expand 16-bit compressed instructions to 32 bits +// +// Documentation: RISC-V System on Chip Design Chapter 11 (Section 11.3.1) +// RISC-V Specification 13 Dec 2019 Chapter 16 pg. 97 +// *** probably need more documentation in this file since the book is very light on decompression. // // A component of the CORE-V-WALLY configurable RISC-V project. // @@ -27,9 +32,10 @@ `include "wally-config.vh" module decompress ( - input logic [31:0] InstrRawD, - output logic [31:0] InstrD, - output logic IllegalCompInstrD); + input logic [31:0] InstrRawD, // 32-bit instruction or raw un decompress instruction + output logic [31:0] InstrD, // Decompressed instruction + output logic IllegalCompInstrD // Invalid decompressed instruction +); logic [15:0] instr16; logic [4:0] rds1, rs2, rs1p, rs2p, rds1p, rdp; diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 63cea9b4..8e89ceb0 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -121,11 +121,10 @@ module ifu ( // Spill Support ///////////////////////////////////////////////////////////////////////////////////////////// - if(`C_SUPPORTED) begin : SpillSupport - spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .Flush(FlushD), .PCF, .PCPlus4F, .PCNextF, .InstrRawF(InstrRawF), - .InstrDAPageFaultF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, - .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); - end else begin : NoSpillSupport + if(`C_SUPPORTED) begin : Spill + spill #(`ICACHE) spill(.clk, .reset, .StallD, .FlushD, .PCF, .PCPlus4F, .PCNextF, .InstrRawF, + .InstrDAPageFaultF, .IFUCacheBusStallD, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); + end else begin : NoSpill assign PCNextFSpill = PCNextF; assign PCFSpill = PCF; assign PostSpillInstrRawF = InstrRawF; @@ -189,9 +188,11 @@ module ifu ( assign IgnoreRequest = ITLBMissF | FlushD; // The IROM uses untranslated addresses, so it is not compatible with virtual memory. - if (`IROM_SUPPORTED) begin : irom + if (`IROM_SUPPORTED) begin : irom + logic IROMce; + assign IROMce = ~GatedStallD | reset; assign IFURWF = 2'b10; - irom irom(.clk, .ce(~GatedStallD | reset), .Adr(PCNextFSpill[`XLEN-1:0]), .ReadData(IROMInstrF)); + irom irom(.clk, .ce(IROMce), .Adr(PCNextFSpill[`XLEN-1:0]), .IROMInstrF); end else begin assign IFURWF = 2'b10; end @@ -227,7 +228,7 @@ module ifu ( .NextAdr(PCNextFSpill[11:0]), .PAdr(PCPF), .CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM)); - ahbcacheinterface #(WORDSPERLINE, LINELEN, LOGBWPL, LLENPOVERAHBW) + ahbcacheinterface #(WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW) ahbcacheinterface(.HCLK(clk), .HRESETn(~reset), .HRDATA, .Flush(FlushD), .CacheBusRW, .HSIZE(IFUHSIZE), .HBURST(IFUHBURST), .HTRANS(IFUHTRANS), .HWSTRB(), diff --git a/pipelined/src/ifu/irom.sv b/pipelined/src/ifu/irom.sv index 3e7e4633..ba23cf23 100644 --- a/pipelined/src/ifu/irom.sv +++ b/pipelined/src/ifu/irom.sv @@ -1,8 +1,9 @@ /////////////////////////////////////////// // irom.sv // -// Written: Ross Thompson ross1728@gmail.com January 30, 2022 -// Modified: +// Written: Ross Thompson ross1728@gmail.com +// Created: 30 January 2022 +// Modified: 18 January 2023 // // Purpose: simple instruction ROM // A component of the CORE-V-WALLY configurable RISC-V project. @@ -26,23 +27,30 @@ `include "wally-config.vh" module irom( - input logic clk, ce, - input logic [`XLEN-1:0] Adr, - output logic [31:0] ReadData + input logic clk, + input logic ce, // Chip Enable. 0: Holds IROMInstrF constant + input logic [`XLEN-1:0] Adr, // PCNextFSpill + output logic [31:0] IROMInstrF // Instruction read data ); localparam ADDR_WDITH = $clog2(`IROM_RANGE/8); localparam OFFSET = $clog2(`XLEN/8); - logic [`XLEN-1:0] ReadDataFull; + logic [`XLEN-1:0] IROMInstrFFull; + logic [31:0] RawIROMInstrF; - rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataFull)); - if (`XLEN == 32) assign ReadData = ReadDataFull; - // have to delay Ardr[OFFSET-1] by 1 cycle + logic [1:0] AdrD; + flopen #(2) AdrReg(clk, ce, Adr[2:1], AdrD); + + rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(IROMInstrFFull)); + if (`XLEN == 32) assign RawIROMInstrF = IROMInstrFFull; else begin - logic AdrD; - flopen #(1) AdrReg(clk, ce, Adr[OFFSET-1], AdrD); - assign ReadData = AdrD ? ReadDataFull[63:32] : ReadDataFull[31:0]; + // IROM is aligned to XLEN words, but instructions are 32 bits. Select between the two + // haves. Adr is the Next PCF not PCF so we delay 1 cycle. + assign RawIROMInstrF = AdrD[1] ? IROMInstrFFull[63:32] : IROMInstrFFull[31:0]; end + // If the memory addres is aligned to 2 bytes return the upper 2 bytes in the lower 2 bytes. + // The spill logic will handle merging the two together. + assign IROMInstrF = AdrD[0] ? {16'b0, RawIROMInstrF[31:16]} : RawIROMInstrF; endmodule diff --git a/pipelined/src/ifu/spill.sv b/pipelined/src/ifu/spill.sv new file mode 100644 index 00000000..c1a1d8fa --- /dev/null +++ b/pipelined/src/ifu/spill.sv @@ -0,0 +1,112 @@ +/////////////////////////////////////////// +// spill.sv +// +// Written: Ross Thompson ross1728@gmail.com +// Created: 28 January 2022 +// Modified: 19 January 2023 +// +// Purpose: allows the IFU to make extra memory request if instruction address crosses +// cache line boundaries or if instruction address without a cache crosses +// XLEN/8 boundary. +// +// Documentation: RISC-V System on Chip Design Chapter 11 (Figure 11.5) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module spill #( + parameter CACHE_ENABLED // Changes spill threshold to 1 if there is no cache +)(input logic clk, + input logic reset, + input logic StallD, FlushD, + input logic [`XLEN-1:0] PCF, // 2 byte aligned PC in Fetch stage + input logic [`XLEN-1:2] PCPlus4F, // PCF + 4 + input logic [`XLEN-1:0] PCNextF, // The next PCF + input logic [31:0] InstrRawF, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed + input logic IFUCacheBusStallD, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched + input logic ITLBMissF, // ITLB miss, ignore memory request + input logic InstrDAPageFaultF, // Ignore memory request if the hptw support write and a DA page fault occurs (hptw is still active) + output logic [`XLEN-1:0] PCNextFSpill, // The next PCF for one of the two memory addresses of the spill + output logic [`XLEN-1:0] PCFSpill, // PCF for one of the two memory addresses of the spill + output logic SelNextSpillF, // During the transition between the two spill operations, the IFU should stall the pipeline + output logic [31:0] PostSpillInstrRawF,// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic CompressedF); // The fetched instruction is compressed + + // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] + localparam integer SPILLTHRESHOLD = CACHE_ENABLED ? `ICACHE_LINELENINBITS/32 : 1; + logic [`XLEN-1:0] PCPlus2F; + logic TakeSpillF; + logic SpillF; + logic SelSpillF; + logic SpillSaveF; + logic [15:0] InstrFirstHalf; + typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; + (* mark_debug = "true" *) statetype CurrState, NextState; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // PC logic + //////////////////////////////////////////////////////////////////////////////////////////////////// + + // compute PCF+2 from the raw PC+4 + mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F)); + // select between PCNextF and PCF+2 + mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~FlushD), .y(PCNextFSpill)); + // select between PCF and PCF+2 + mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill)); + + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Detect spill + //////////////////////////////////////////////////////////////////////////////////////////////////// + + assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1]; + assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF)); + + always_ff @(posedge clk) + if (reset | FlushD) CurrState <= #1 STATE_READY; + else CurrState <= #1 NextState; + + always_comb begin + case (CurrState) + STATE_READY: if (TakeSpillF) NextState = STATE_SPILL; + else NextState = STATE_READY; + STATE_SPILL: if(IFUCacheBusStallD | StallD) NextState = STATE_SPILL; + else NextState = STATE_READY; + default: NextState = STATE_READY; + endcase + end + + assign SelSpillF = (CurrState == STATE_SPILL); + assign SelNextSpillF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallD); + assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushD; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Merge spilled instruction + //////////////////////////////////////////////////////////////////////////////////////////////////// + + // save the first 2 bytes + flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalf); + + // merge together + mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalf}, SpillF, PostSpillInstrRawF); + + assign CompressedF = PostSpillInstrRawF[1:0] != 2'b11; + +endmodule diff --git a/pipelined/src/ifu/spillsupport.sv b/pipelined/src/ifu/spillsupport.sv deleted file mode 100644 index 3cf6ea94..00000000 --- a/pipelined/src/ifu/spillsupport.sv +++ /dev/null @@ -1,98 +0,0 @@ -/////////////////////////////////////////// -// spillsupport.sv -// -// Written: Ross Thompson ross1728@gmail.com January 28, 2022 -// Modified: -// -// Purpose: allows the IFU to make extra memory request if instruction address crosses -// cache line boundaries or if instruction address without a cache crosses -// XLEN/8 boundary. -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" - -module spillsupport #(parameter CACHE_ENABLED) - (input logic clk, - input logic reset, - input logic StallF, Flush, - input logic [`XLEN-1:0] PCF, - input logic [`XLEN-1:2] PCPlus4F, - input logic [`XLEN-1:0] PCNextF, - input logic [31:0] InstrRawF, - input logic IFUCacheBusStallD, - input logic ITLBMissF, - input logic InstrDAPageFaultF, - output logic [`XLEN-1:0] PCNextFSpill, - output logic [`XLEN-1:0] PCFSpill, - output logic SelNextSpillF, - output logic [31:0] PostSpillInstrRawF, - output logic CompressedF); - - - localparam integer SPILLTHRESHOLD = CACHE_ENABLED ? `ICACHE_LINELENINBITS/32 : 1; - logic [`XLEN-1:0] PCPlus2F; - logic TakeSpillF; - logic SpillF; - logic SelSpillF, SpillSaveF; - logic [15:0] SpillDataLine0, SavedInstr; - typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; - (* mark_debug = "true" *) statetype CurrState, NextState; - - // compute PCF+2 - mux2 #(`XLEN) pcplus2mux(.d0({PCF[`XLEN-1:2], 2'b10}), .d1({PCPlus4F, 2'b00}), .s(PCF[1]), .y(PCPlus2F)); - // select between PCNextF and PCF+2 - mux2 #(`XLEN) pcnextspillmux(.d0(PCNextF), .d1(PCPlus2F), .s(SelNextSpillF & ~Flush), .y(PCNextFSpill)); - // select between PCF and PCF+2 - mux2 #(`XLEN) pcspillmux(.d0(PCF), .d1(PCPlus2F), .s(SelSpillF), .y(PCFSpill)); - - assign SpillF = &PCF[$clog2(SPILLTHRESHOLD)+1:1]; - assign TakeSpillF = SpillF & ~IFUCacheBusStallD & ~(ITLBMissF | (`HPTW_WRITES_SUPPORTED & InstrDAPageFaultF)); - - always_ff @(posedge clk) - if (reset | Flush) CurrState <= #1 STATE_READY; - else CurrState <= #1 NextState; - - always_comb begin - case (CurrState) - STATE_READY: if (TakeSpillF) NextState = STATE_SPILL; - else NextState = STATE_READY; - STATE_SPILL: if(IFUCacheBusStallD | StallF) NextState = STATE_SPILL; - else NextState = STATE_READY; - default: NextState = STATE_READY; - endcase - end - - assign SelSpillF = (CurrState == STATE_SPILL); - assign SelNextSpillF = (CurrState == STATE_READY & TakeSpillF) | - (CurrState == STATE_SPILL & IFUCacheBusStallD); - assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF; - assign SavedInstr = CACHE_ENABLED ? InstrRawF[15:0] : InstrRawF[31:16]; - - flopenr #(16) SpillInstrReg(.clk(clk), - .en(SpillSaveF & ~Flush), - .reset(reset), - .d(SavedInstr), - .q(SpillDataLine0)); - - mux2 #(32) postspillmux(.d0(InstrRawF), .d1({InstrRawF[15:0], SpillDataLine0}), .s(SpillF), - .y(PostSpillInstrRawF)); - assign CompressedF = PostSpillInstrRawF[1:0] != 2'b11; - -endmodule diff --git a/pipelined/src/lsu/amoalu.sv b/pipelined/src/lsu/amoalu.sv index fa8703b5..eda3576b 100644 --- a/pipelined/src/lsu/amoalu.sv +++ b/pipelined/src/lsu/amoalu.sv @@ -1,11 +1,14 @@ /////////////////////////////////////////// // amoalu.sv // -// Written: David_Harris@hmc.edu 10 March 2021 -// Modified: +// Written: David_Harris@hmc.edu +// Created: 10 March 2021 +// Modified: 18 January 2023 // // Purpose: Performs AMO operations // +// Documentation: RISC-V System on Chip Design Chapter 14 (Figure ***) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -26,13 +29,12 @@ `include "wally-config.vh" -// *** this should probably be moved into the LSU because it is instantiated in the D$ - module amoalu ( - input logic [`XLEN-1:0] srca, srcb, - input logic [6:0] funct, - input logic [1:0] width, - output logic [`XLEN-1:0] result + input logic [`XLEN-1:0] ReadDataM, // LSU's ReadData + input logic [`XLEN-1:0] IHWriteDataM, // LSU's WriteData + input logic [6:0] LSUFunct7M, // ALU Operation + input logic [2:0] LSUFunct3M, // Memoy access width + output logic [`XLEN-1:0] AMOResult // ALU output ); logic [`XLEN-1:0] a, b, y; @@ -41,7 +43,7 @@ module amoalu ( // a single carry chain should be shared for + and the four min/max // and the same mux can be used to select b for swap. always_comb - case (funct[6:2]) + case (LSUFunct7M[6:2]) 5'b00001: y = b; // amoswap 5'b00000: y = a + b; // amoadd 5'b00100: y = a ^ b; // amoxor @@ -56,19 +58,19 @@ module amoalu ( // sign extend if necessary if (`XLEN == 32) begin:sext - assign a = srca; - assign b = srcb; - assign result = y; + assign a = ReadDataM; + assign b = IHWriteDataM; + assign AMOResult = y; end else begin:sext // `XLEN = 64 always_comb - if (width == 2'b10) begin // sign-extend word-length operations - a = {{32{srca[31]}}, srca[31:0]}; - b = {{32{srcb[31]}}, srcb[31:0]}; - result = {{32{y[31]}}, y[31:0]}; + if (LSUFunct3M[1:0] == 2'b10) begin // sign-extend word-length operations + a = {{32{ReadDataM[31]}}, ReadDataM[31:0]}; + b = {{32{IHWriteDataM[31]}}, IHWriteDataM[31:0]}; + AMOResult = {{32{y[31]}}, y[31:0]}; end else begin - a = srca; - b = srcb; - result = y; + a = ReadDataM; + b = IHWriteDataM; + AMOResult = y; end end endmodule diff --git a/pipelined/src/lsu/atomic.sv b/pipelined/src/lsu/atomic.sv index 64c1dc4b..fc9ede11 100644 --- a/pipelined/src/lsu/atomic.sv +++ b/pipelined/src/lsu/atomic.sv @@ -1,10 +1,13 @@ /////////////////////////////////////////// // atomic.sv // -// Written: Ross Thompson ross1728@gmail.com January 31, 2022 -// Modified: +// Written: Ross Thompson ross1728@gmail.com +// Created: 31 January 2022 +// Modified: 18 January 2023 // -// Purpose: atomic data path. +// Purpose: Wrapper for amoalu and lrsc +// +// Documentation: RISC-V System on Chip Design Chapter 14 (Figure ***) // // A component of the CORE-V-WALLY configurable RISC-V project. // @@ -28,25 +31,25 @@ module atomic ( input logic clk, - input logic reset, StallW, - input logic [`XLEN-1:0] ReadDataM, - input logic [`XLEN-1:0] IHWriteDataM, - input logic [`PA_BITS-1:0] PAdrM, - input logic [6:0] LSUFunct7M, - input logic [2:0] LSUFunct3M, - input logic [1:0] LSUAtomicM, - input logic [1:0] PreLSURWM, - input logic IgnoreRequest, - output logic [`XLEN-1:0] IMAWriteDataM, - output logic SquashSCW, - output logic [1:0] LSURWM + input logic reset, + input logic StallW, + input logic [`XLEN-1:0] ReadDataM, // LSU ReadData XLEN because FPU does not issue atomic memory operation from FPU registers + input logic [`XLEN-1:0] IHWriteDataM, // LSU WriteData XLEN because FPU does not issue atomic memory operation from FPU registers + input logic [`PA_BITS-1:0] PAdrM, // Physical memory address + input logic [6:0] LSUFunct7M, // AMO alu operation gated by HPTW + input logic [2:0] LSUFunct3M, // IEU or HPTW memory operation size + input logic [1:0] LSUAtomicM, // 10: AMO operation, select AMOResult as the writedata output, 01: LR/SC operation + input logic [1:0] PreLSURWM, // IEU or HPTW Read/Write signal + input logic IgnoreRequest, // On FlushM or TLB miss ignore memory operation + output logic [`XLEN-1:0] IMAWriteDataM, // IEU, HPTW, or AMO write data + output logic SquashSCW, // Store conditional failed disable write to GPR + output logic [1:0] LSURWM // IEU or HPTW Read/Write signal gated by LR/SC ); logic [`XLEN-1:0] AMOResult; logic MemReadM; - amoalu amoalu(.srca(ReadDataM), .srcb(IHWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]), - .result(AMOResult)); + amoalu amoalu(.ReadDataM, .IHWriteDataM, .LSUFunct7M, .LSUFunct3M, .AMOResult); mux2 #(`XLEN) wdmux(IHWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM); assign MemReadM = PreLSURWM[1] & ~IgnoreRequest; diff --git a/pipelined/src/lsu/dtim.sv b/pipelined/src/lsu/dtim.sv index a5f74e91..4a5cba1f 100644 --- a/pipelined/src/lsu/dtim.sv +++ b/pipelined/src/lsu/dtim.sv @@ -1,10 +1,14 @@ /////////////////////////////////////////// // dtim.sv // -// Written: Ross Thompson ross1728@gmail.com January 30, 2022 -// Modified: +// Written: Ross Thompson ross1728@gmail.com +// Created: 30 January 2022 +// Modified: 18 January 2023 +// +// Purpose: tightly integrated memory into the LSU. +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.12) // -// Purpose: simple memory with bus or cache. // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University @@ -27,10 +31,10 @@ module dtim( input logic clk, - input logic ce, // Chip Enable + input logic FlushW, + input logic ce, // Chip Enable. 0: Holds ReadDataWordM input logic [1:0] MemRWM, // Read/Write control - input logic [`PA_BITS-1:0] AdrM, // Execution stage memory address - input logic FlushW, + input logic [`PA_BITS-1:0] DTIMAdr, // No stall: Execution stage memory address. Stall: Memory stage memory address input logic [`LLEN-1:0] WriteDataM, // Write data from IEU input logic [`LLEN/8-1:0] ByteMaskM, // Selects which bytes within a word to write output logic [`LLEN-1:0] ReadDataWordM // Read data before subword selection @@ -44,6 +48,6 @@ module dtim( assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap. ram1p1rwbe #(.DEPTH(`DTIM_RANGE/8), .WIDTH(`LLEN)) - ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(AdrM[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM)); + ram(.clk, .ce, .we, .bwe(ByteMaskM), .addr(DTIMAdr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataWordM), .din(WriteDataM)); endmodule diff --git a/pipelined/src/lsu/endianswap.sv b/pipelined/src/lsu/endianswap.sv index e1fa1963..97846d97 100644 --- a/pipelined/src/lsu/endianswap.sv +++ b/pipelined/src/lsu/endianswap.sv @@ -1,8 +1,9 @@ /////////////////////////////////////////// // endianswap.sv // -// Written: David_Harris@hmc.edu 7 May 2022 -// Modified: +// Written: David_Harris@hmc.edu +// Created: 7 May 2022 +// Modified: 18 January 2023 // // Purpose: Swap byte order for Big-Endian accesses // diff --git a/pipelined/src/lsu/lrsc.sv b/pipelined/src/lsu/lrsc.sv index 80b584dc..7edae6b8 100644 --- a/pipelined/src/lsu/lrsc.sv +++ b/pipelined/src/lsu/lrsc.sv @@ -1,11 +1,14 @@ /////////////////////////////////////////// // lrsc.sv // -// Written: David_Harris@hmc.edu 17 July 2021 -// Modified: +// Written: David_Harris@hmc.edu +// Created: 17 July 2021 +// Modified: 18 January 2023 // // Purpose: Load Reserved / Store Conditional unit // Track the reservation and squash the store if it fails +// +// Documentation: RISC-V System on Chip Design Chapter 14 (Figure ***) // // A component of the CORE-V-WALLY configurable RISC-V project. // diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 49988e79..6d958d02 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -131,7 +131,7 @@ module lsu ( logic LSULoadAccessFaultM; // Load acces fault logic LSUStoreAmoAccessFaultM; // Store access fault logic IgnoreRequestTLB; // On either ITLB or DTLB miss, ignore miss so HPTW can handle - logic IgnoreRequest; // On FlushM, ignore TLB miss + logic IgnoreRequest; // On FlushM or TLB miss ignore memory operation logic SelDTIM; // Select DTIM rather than bus or D$ @@ -232,17 +232,19 @@ module lsu ( // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. dtim dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), - .AdrM(DTIMAdr), .FlushW, .WriteDataM(LSUWriteDataM), + .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), .ReadDataWordM(DTIMReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0])); end else begin end if (`BUS) begin : bus - localparam integer LLENWORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`LLEN : 1; // Number of LLEN words in cacheline - localparam integer LLENLOGBWPL = `DCACHE ? $clog2(LLENWORDSPERLINE) : 1; // Log2 of ^ - localparam integer BEATSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`AHBW : 1; // Number of AHBW words (beats) in cacheline - localparam integer AHBWLOGBWPL = `DCACHE ? $clog2(BEATSPERLINE) : 1; // Log2 of ^ if(`DCACHE) begin : dcache - localparam integer LINELEN = `DCACHE ? `DCACHE_LINELENINBITS : `XLEN; // Number of bytes in cacheline + localparam integer LLENWORDSPERLINE = `DCACHE_LINELENINBITS/`LLEN; // Number of LLEN words in cacheline + localparam integer LLENLOGBWPL = $clog2(LLENWORDSPERLINE); // Log2 of ^ + localparam integer BEATSPERLINE = `DCACHE_LINELENINBITS/`AHBW; // Number of AHBW words (beats) in cacheline + localparam integer AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^ + localparam integer LINELEN = `DCACHE_LINELENINBITS; // Number of bits in cacheline + localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) + logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline logic [`PA_BITS-1:0] DCacheBusAdr; // Cacheline address to fetch or writeback. logic [AHBWLOGBWPL-1:0] BeatCount; // Position within a cacheline. ahbcacheinterface to cache @@ -250,7 +252,6 @@ module lsu ( logic SelBusBeat; // ahbcacheinterface selects postion in cacheline with BeatCount logic [1:0] CacheBusRW; // Cache sends request to ahbcacheinterface logic [1:0] BusRW; // Uncached bus memory access - localparam integer LLENPOVERAHBW = `LLEN / `AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) logic CacheableOrFlushCacheM; // Memory address is cacheable or operation is a cache flush logic [1:0] CacheRWM; // Cache read (10), write (01), AMO (11) logic [1:0] CacheAtomicM; // Cache AMO @@ -272,7 +273,7 @@ module lsu ( .FetchBuffer, .CacheBusRW, .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); - ahbcacheinterface #(.BEATSPERLINE(BEATSPERLINE), .LINELEN(LINELEN), .LOGWPL(AHBWLOGBWPL), .LLENPOVERAHBW(LLENPOVERAHBW)) ahbcacheinterface( + ahbcacheinterface #(.BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), diff --git a/pipelined/src/lsu/subwordread.sv b/pipelined/src/lsu/subwordread.sv index ade4d3d4..784db694 100644 --- a/pipelined/src/lsu/subwordread.sv +++ b/pipelined/src/lsu/subwordread.sv @@ -1,11 +1,14 @@ /////////////////////////////////////////// // subwordread.sv // -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 // // Purpose: Extract subwords and sign extend for reads // +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University diff --git a/pipelined/src/lsu/subwordwrite.sv b/pipelined/src/lsu/subwordwrite.sv index ae18b840..ee26b78f 100644 --- a/pipelined/src/lsu/subwordwrite.sv +++ b/pipelined/src/lsu/subwordwrite.sv @@ -1,11 +1,14 @@ /////////////////////////////////////////// // subwordwrite.sv // -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 // // Purpose: Masking and muxing for subword writes // +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University diff --git a/pipelined/src/lsu/swbytemask.sv b/pipelined/src/lsu/swbytemask.sv index 6167e4ea..17eedd4c 100644 --- a/pipelined/src/lsu/swbytemask.sv +++ b/pipelined/src/lsu/swbytemask.sv @@ -1,11 +1,14 @@ /////////////////////////////////////////// // swbytemask.sv // -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 // // Purpose: On-chip RAM, external to core // +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// // A component of the CORE-V-WALLY configurable RISC-V project. // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University