/////////////////////////////////////////// // spill.sv // // Written: Rose Thompson ross1728@gmail.com // Created: 26 October 2023 // Modified: 26 October 2023 // // Purpose: This module implements native alignment support for the Zicclsm extension // It is simlar to the IFU's spill module and probably could be merged together with // some effort. // // Documentation: RISC-V System on Chip Design Chapter 11 (Figure 11.5) // // A component of the CORE-V-WALLY configurable RISC-V project. // https://github.com/openhwgroup/cvw // // Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University // // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // // Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file // except in compliance with the License, or, at your option, the Apache License version 2.0. You // may obtain a copy of the License at // // https://solderpad.org/licenses/SHL-2.1/ // // Unless required by applicable law or agreed to in writing, any work distributed under the // License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// module align import cvw::*; #(parameter cvw_t P) ( input logic clk, input logic reset, input logic StallM, FlushM, input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation input logic FpLoadStoreM, // Floating point Load or Store input logic [1:0] MemRWM, input logic [P.LLEN*2-1:0] DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic SelHPTW, input logic [(P.LLEN-1)/8:0] ByteMaskM, input logic [(P.LLEN-1)/8:0] ByteMaskExtendedM, input logic [P.LLEN-1:0] LSUWriteDataM, output logic [(P.LLEN*2-1)/8:0] ByteMaskSpillM, output logic [P.LLEN*2-1:0] LSUWriteDataSpillM, output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic SpillStallM); localparam LLENINBYTES = P.LLEN/8; localparam OFFSET_BIT_POS = $clog2(P.DCACHE_LINELENINBITS/8); // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] typedef enum logic [1:0] {STATE_READY, STATE_SPILL, STATE_STORE_DELAY} statetype; statetype CurrState, NextState; logic ValidSpillM; logic SelSpillM; logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; logic [P.XLEN-1:0] IEUAdrIncrementM; localparam OFFSET_LEN = $clog2(LLENINBYTES); logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); //////////////////////////////////////////////////////////////////////////////////////////////////// // Detect spill //////////////////////////////////////////////////////////////////////////////////////////////////// // spill detection in lsu is more complex than ifu, depends on 3 factors // 1) operation size // 2) offset // 3) access location within the cacheline // compute misalignement always_comb begin case (Funct3M & {FpLoadStoreM, 2'b11}) 3'b000: AccessByteOffsetM = 0; // byte access 3'b001: AccessByteOffsetM = {{OFFSET_LEN-1{1'b0}}, IEUAdrM[0]}; // half access 3'b010: AccessByteOffsetM = {{OFFSET_LEN-2{1'b0}}, IEUAdrM[1:0]}; // word access 3'b011: if(P.LLEN >= 64) AccessByteOffsetM = {{OFFSET_LEN-3{1'b0}}, IEUAdrM[2:0]}; // double access else AccessByteOffsetM = 0; // shouldn't happen 3'b100: if(P.LLEN == 128) AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; // quad access else AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; default: AccessByteOffsetM = 0; // shouldn't happen endcase case (Funct3M[1:0]) 2'b00: PotentialSpillM = 0; // byte access 2'b01: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:1] == '1; // half access 2'b10: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:2] == '1; // word access 2'b11: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:3] == '1; // double access default: PotentialSpillM = 0; endcase end assign MisalignedM = (|MemRWM) & (AccessByteOffsetM != 0); assign ValidSpillM = MisalignedM & PotentialSpillM & ~CacheBusHPWTStall; // Don't take the spill if there is a stall always_ff @(posedge clk) if (reset | FlushM) CurrState <= STATE_READY; else CurrState <= NextState; always_comb begin case (CurrState) STATE_READY: if (ValidSpillM) NextState = STATE_SPILL; // load spill else NextState = STATE_READY; // no spill STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; default: NextState = STATE_READY; endcase end assign SelSpillM = CurrState == STATE_SPILL; assign SelSpillE = (CurrState == STATE_READY & ValidSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall); assign SpillSaveM = (CurrState == STATE_READY) & ValidSpillM & ~FlushM; assign SpillStallM = SelSpillE; //////////////////////////////////////////////////////////////////////////////////////////////////// // Merge spilled data //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first native word flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM); // merge together mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SelSpillM, ReadDataWordSpillAllM); // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits assign ShiftAmount = SelHPTW ? 0 : {AccessByteOffsetM, 3'b0}; // AND gate assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; mux3 #(2*P.LLEN/8) bytemaskspillmux({ByteMaskExtendedM, ByteMaskM}, // no spill {{{P.LLEN/8}{1'b0}}, ByteMaskM}, // spill, first half {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, // spill, second half {SelSpillM, SelSpillE}, ByteMaskSpillM); endmodule