mirror of
https://github.com/openhwgroup/cvw
synced 2025-02-11 06:05:49 +00:00
142 lines
7.0 KiB
Systemverilog
142 lines
7.0 KiB
Systemverilog
///////////////////////////////////////////
|
|
// spill.sv
|
|
//
|
|
// Written: Rose Thompson ross1728@gmail.com
|
|
// Created: 26 October 2023
|
|
// Modified: 26 October 2023
|
|
//
|
|
// Purpose: This module implements native alignment support for the Zicclsm extension
|
|
// It is simlar to the IFU's spill module and probably could be merged together with
|
|
// some effort.
|
|
//
|
|
// Documentation: RISC-V System on Chip Design Chapter 11 (Figure 11.5)
|
|
//
|
|
// A component of the CORE-V-WALLY configurable RISC-V project.
|
|
//
|
|
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
|
//
|
|
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
|
|
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
|
// may obtain a copy of the License at
|
|
//
|
|
// https://solderpad.org/licenses/SHL-2.1/
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
|
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
// and limitations under the License.
|
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
module align import cvw::*; #(parameter cvw_t P) (
|
|
input logic clk,
|
|
input logic reset,
|
|
input logic StallM, FlushM,
|
|
input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage
|
|
input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM
|
|
input logic [2:0] Funct3M, // Size of memory operation
|
|
input logic [P.LLEN*2-1:0]ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed
|
|
input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched
|
|
input logic DTLBMissM, // ITLB miss, ignore memory request
|
|
input logic DataUpdateDAM, // ITLB miss, ignore memory request
|
|
|
|
output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill
|
|
output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill
|
|
output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline
|
|
output logic [P.LLEN-1:0] ReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction
|
|
|
|
// Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1]
|
|
typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype;
|
|
|
|
statetype CurrState, NextState;
|
|
logic TakeSpillM, TakeSpillE;
|
|
logic SpillM;
|
|
logic SelSpillM;
|
|
logic SpillSaveM;
|
|
logic [P.LLEN-1:0] ReadDataWordFirstHalfM;
|
|
logic MisalignedM;
|
|
logic [P.LLEN*2-1:0] ReadDataWordSpillAllM;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
// PC logic
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
localparam LLENINBYTES = P.LLEN/8;
|
|
logic IEUAdrIncrementM;
|
|
assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES;
|
|
mux2 #(P.XLEN) pcplus2mux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(TakeSpillM), .y(IEUAdrSpillM));
|
|
mux2 #(P.XLEN) pcnextspillmux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(TakeSpillE), .y(IEUAdrSpillE));
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Detect spill
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// spill detection in lsu is more complex than ifu, depends on 3 factors
|
|
// 1) operation size
|
|
// 2) offset
|
|
// 3) access location within the cacheline
|
|
logic [P.DCACHE_LINELENINBITS/8-1:P.LLEN/8] WordOffsetM;
|
|
logic [P.LLEN/8-1:0] ByteOffsetM;
|
|
logic HalfSpillM, WordSpillM;
|
|
assign {WordOffsetM, ByteOffsetM} = IEUAdrM[P.DCACHE_LINELENINBITS/8-1:0];
|
|
assign HalfSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0;
|
|
assign WordSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00;
|
|
if(P.LLEN == 64) begin
|
|
logic DoubleSpillM;
|
|
assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00;
|
|
assign SpillM = HalfSpillM | WordOffsetM | DoubleSpillM;
|
|
end else begin
|
|
assign SpillM = HalfSpillM | WordOffsetM;
|
|
end
|
|
|
|
// Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits
|
|
assign TakeSpillM = SpillM & ~LSUStallM & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM));
|
|
|
|
always_ff @(posedge clk)
|
|
if (reset | FlushM) CurrState <= #1 STATE_READY;
|
|
else CurrState <= #1 NextState;
|
|
|
|
always_comb begin
|
|
case (CurrState)
|
|
STATE_READY: if (TakeSpillM) NextState = STATE_SPILL;
|
|
else NextState = STATE_READY;
|
|
STATE_SPILL: if(StallM) NextState = STATE_SPILL;
|
|
else NextState = STATE_READY;
|
|
default: NextState = STATE_READY;
|
|
endcase
|
|
end
|
|
|
|
assign SelSpillM = (CurrState == STATE_SPILL);
|
|
assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & LSUStallM);
|
|
assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM;
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
// Merge spilled data
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// save the first 2 bytes
|
|
flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM);
|
|
|
|
// merge together
|
|
mux2 #(2*P.LLEN) postspillmux(ReadDataWordMuxM, {ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM);
|
|
|
|
// align by shifting
|
|
// *** optimize by merging with halfSpill, WordSpill, etc
|
|
logic HalfMisalignedM, WordMisalignedM;
|
|
assign HalfMisalignedM = Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0;
|
|
assign WordMisalignedM = Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00;
|
|
if(P.LLEN == 64) begin
|
|
logic DoubleMisalignedM;
|
|
assign DoubleMisalignedM = Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00;
|
|
assign MisalignedM = HalfMisalignedM | WordMisalignedM | DoubleMisalignedM;
|
|
end else begin
|
|
assign MisalignedM = HalfMisalignedM | WordMisalignedM;
|
|
end
|
|
|
|
// shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit)
|
|
// 8 * is for shifting by bytes not bits
|
|
assign ReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0);
|
|
|
|
endmodule
|