From 12763b72977d9769b0b52a418c53560cb25b4659 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 26 Oct 2023 10:47:00 -0500 Subject: [PATCH] begin implemenation of Zicclsm. --- src/lsu/align.sv | 121 ++++++++++++++++++++++++++ src/lsu/subwordread.sv | 193 +++++++++++++++++++++++++++++++++-------- 2 files changed, 280 insertions(+), 34 deletions(-) create mode 100644 src/lsu/align.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv new file mode 100644 index 000000000..b3e810ee2 --- /dev/null +++ b/src/lsu/align.sv @@ -0,0 +1,121 @@ +/////////////////////////////////////////// +// spill.sv +// +// Written: Rose Thompson ross1728@gmail.com +// Created: 26 October 2023 +// Modified: 26 October 2023 +// +// Purpose: This module implements native alignment support for the Zicclsm extension +// It is simlar to the IFU's spill module and probably could be merged together with +// some effort. +// +// Documentation: RISC-V System on Chip Design Chapter 11 (Figure 11.5) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module align import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic reset, + input logic StallM, FlushM, + input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage + input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM + input logic [31:0] ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed + input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched + input logic DTLBMissM, // ITLB miss, ignore memory request + + output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill + output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill + output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline + output logic [31:0] ReadDataWordSpillM)// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + + // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] + typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; + + statetype CurrState, NextState; + logic TakeSpillM, TakeSpillE; + logic SpillF; + logic SelSpillF; + logic SpillSaveF; + logic [15:0] InstrFirstHalfF; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // PC logic + //////////////////////////////////////////////////////////////////////////////////////////////////// + + localparam LLENINBYTES = LLEN/8; + logic IEUAdrIncrementM; + assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + mux2 #(P.XLEN) pcplus2mux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(TakeSpillM), .y(IEUAdrSpillM)); + mux2 #(P.XLEN) pcnextspillmux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(TakeSpillE), .y(IEUAdrSpillE)); + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Detect spill + //////////////////////////////////////////////////////////////////////////////////////////////////// + + // spill detection in lsu is more complex than ifu, depends on 3 factors + // 1) operation size + // 2) offset + // 3) access location within the cacheline or is the access is uncached. + // first consider uncached operations + // accesses are always aligned to the natural size of the bus (XLEN or AHBW) + + if (P.ICACHE_SUPPORTED) begin + logic SpillCachedF, SpillUncachedF; + assign SpillCachedF = &IEUAdrM[$clog2(P.ICACHE_LINELENINBITS/32)+1:1]; + assign SpillUncachedF = IEUAdrM[1]; // *** try to optimize this based on whether the next instruction is 16 bits and by fetching 64 bits in RV64 + assign SpillF = CacheableF ? SpillCachedF : SpillUncachedF; + end else + assign SpillF = IEUAdrM[1]; // *** might relax - only spill if next instruction is uncompressed + // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits + assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF)); + + always_ff @(posedge clk) + if (reset | FlushM) CurrState <= #1 STATE_READY; + else CurrState <= #1 NextState; + + always_comb begin + case (CurrState) + STATE_READY: if (TakeSpillF) NextState = STATE_SPILL; + else NextState = STATE_READY; + STATE_SPILL: if(StallM) NextState = STATE_SPILL; + else NextState = STATE_READY; + default: NextState = STATE_READY; + endcase + end + + assign SelSpillF = (CurrState == STATE_SPILL); + assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallF); + assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushM; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Merge spilled instruction + //////////////////////////////////////////////////////////////////////////////////////////////////// + + // save the first 2 bytes + flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF); + + // merge together + mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); + + // Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x + always_comb + if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1; + else CompressedF = 1'b0; + +endmodule diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index e5666eb84..ae3e3c78b 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -29,22 +29,125 @@ module subwordread #(parameter LLEN) ( - input logic [LLEN-1:0] ReadDataWordMuxM, - input logic [2:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN-1:0] ReadDataM + input logic [LLEN-1:0] ReadDataWordMuxM, + input logic [$clog(LLEN/8)-1:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN/2-1:0] ReadDataM ); + localparam OFFSET_LEN = $clog(LLEN/8); + localparam HLEN = LLEN/2; logic [7:0] ByteM; logic [15:0] HalfwordM; - logic [2:0] PAdrSwap; + logic [OFFSET_LEN-1:0] PAdrSwap; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = PAdrM ^ {3{BigEndianM}}; + assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; - if (LLEN == 64) begin:swrmux + if (LLEN == 128) begin:swrmux + // ByteMe mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: ByteM = ReadDataWordMuxM[7:0]; + 4'b0001: ByteM = ReadDataWordMuxM[15:8]; + 4'b0010: ByteM = ReadDataWordMuxM[23:16]; + 4'b0011: ByteM = ReadDataWordMuxM[31:24]; + 4'b0100: ByteM = ReadDataWordMuxM[39:32]; + 4'b0101: ByteM = ReadDataWordMuxM[47:40]; + 4'b0110: ByteM = ReadDataWordMuxM[55:48]; + 4'b0111: ByteM = ReadDataWordMuxM[63:56]; + 4'b1000: ByteM = ReadDataWordMuxM[71:64]; + 4'b1001: ByteM = ReadDataWordMuxM[79:72]; + 4'b1010: ByteM = ReadDataWordMuxM[87:80]; + 4'b1011: ByteM = ReadDataWordMuxM[95:88]; + 4'b1100: ByteM = ReadDataWordMuxM[103:96]; + 4'b1101: ByteM = ReadDataWordMuxM[111:104]; + 4'b1110: ByteM = ReadDataWordMuxM[119:112]; + 4'b1111: ByteM = ReadDataWordMuxM[127:120]; + endcase + + // halfword mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; + 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; + 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; + 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; + 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; + 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; + 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; + 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; + 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; + 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; + 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; + 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; + 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; + 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; + 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; + //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + endcase + + logic [31:0] WordM; + + always_comb + case(PAdrSwap[3:0]) + 4'b0000: WordM = ReadDataWordMuxM[31:0]; + 4'b0001: WordM = ReadDataWordMuxM[39:8]; + 4'b0010: WordM = ReadDataWordMuxM[47:16]; + 4'b0011: WordM = ReadDataWordMuxM[55:24]; + 4'b0100: WordM = ReadDataWordMuxM[63:32]; + 4'b0101: WordM = ReadDataWordMuxM[71:40]; + 4'b0111: WordM = ReadDataWordMuxM[79:48]; + 4'b1000: WordM = ReadDataWordMuxM[87:56]; + 4'b1001: WordM = ReadDataWordMuxM[95:64]; + 4'b1010: WordM = ReadDataWordMuxM[103:72]; + 4'b1011: WordM = ReadDataWordMuxM[111:80]; + 4'b1011: WordM = ReadDataWordMuxM[119:88]; + 4'b1100: WordM = ReadDataWordMuxM[127:96]; + 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; + endcase + + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; + 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; + 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; + 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; + 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; + 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; + 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; + 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux // ByteMe mux always_comb case(PAdrSwap[2:0]) @@ -60,35 +163,55 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[2:1]) - 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; - 2'b01: HalfwordM = ReadDataWordMuxM[31:16]; - 2'b10: HalfwordM = ReadDataWordMuxM[47:32]; - 2'b11: HalfwordM = ReadDataWordMuxM[63:48]; + case(PAdrSwap[2:0]) + 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; + 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; + 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; + 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; + 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; + 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; + 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; + 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; endcase logic [31:0] WordM; always_comb - case(PAdrSwap[2]) - 1'b0: WordM = ReadDataWordMuxM[31:0]; - 1'b1: WordM = ReadDataWordMuxM[63:32]; + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; endcase logic [63:0] DblWordM; - assign DblWordM = ReadDataWordMuxM[63:0]; + always_comb + case(PAdrSwap[2:0]) + 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; + 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; + 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; + 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; + 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; + 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; + 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; + 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; + endcase // sign extension/ NaN boxing always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase @@ -104,20 +227,22 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[1]) - 1'b0: HalfwordM = ReadDataWordMuxM[15:0]; - 1'b1: HalfwordM = ReadDataWordMuxM[31:16]; + case(PAdrSwap[1:0]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; + 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; endcase // sign extension always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw 3'b011: ReadDataM = ReadDataWordMuxM; // fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase end