begin implemenation of Zicclsm.

This commit is contained in:
Rose Thompson 2023-10-26 10:47:00 -05:00
parent 3322ff915e
commit 12763b7297
2 changed files with 280 additions and 34 deletions

121
src/lsu/align.sv Normal file
View File

@ -0,0 +1,121 @@
///////////////////////////////////////////
// spill.sv
//
// Written: Rose Thompson ross1728@gmail.com
// Created: 26 October 2023
// Modified: 26 October 2023
//
// Purpose: This module implements native alignment support for the Zicclsm extension
// It is simlar to the IFU's spill module and probably could be merged together with
// some effort.
//
// Documentation: RISC-V System on Chip Design Chapter 11 (Figure 11.5)
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
//
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
//
// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
// may obtain a copy of the License at
//
// https://solderpad.org/licenses/SHL-2.1/
//
// Unless required by applicable law or agreed to in writing, any work distributed under the
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
module align import cvw::*; #(parameter cvw_t P) (
input logic clk,
input logic reset,
input logic StallM, FlushM,
input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage
input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM
input logic [31:0] ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed
input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched
input logic DTLBMissM, // ITLB miss, ignore memory request
output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill
output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill
output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline
output logic [31:0] ReadDataWordSpillM)// The final 32 bit instruction after merging the two spilled fetches into 1 instruction
// Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1]
typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype;
statetype CurrState, NextState;
logic TakeSpillM, TakeSpillE;
logic SpillF;
logic SelSpillF;
logic SpillSaveF;
logic [15:0] InstrFirstHalfF;
////////////////////////////////////////////////////////////////////////////////////////////////////
// PC logic
////////////////////////////////////////////////////////////////////////////////////////////////////
localparam LLENINBYTES = LLEN/8;
logic IEUAdrIncrementM;
assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES;
mux2 #(P.XLEN) pcplus2mux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(TakeSpillM), .y(IEUAdrSpillM));
mux2 #(P.XLEN) pcnextspillmux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(TakeSpillE), .y(IEUAdrSpillE));
////////////////////////////////////////////////////////////////////////////////////////////////////
// Detect spill
////////////////////////////////////////////////////////////////////////////////////////////////////
// spill detection in lsu is more complex than ifu, depends on 3 factors
// 1) operation size
// 2) offset
// 3) access location within the cacheline or is the access is uncached.
// first consider uncached operations
// accesses are always aligned to the natural size of the bus (XLEN or AHBW)
if (P.ICACHE_SUPPORTED) begin
logic SpillCachedF, SpillUncachedF;
assign SpillCachedF = &IEUAdrM[$clog2(P.ICACHE_LINELENINBITS/32)+1:1];
assign SpillUncachedF = IEUAdrM[1]; // *** try to optimize this based on whether the next instruction is 16 bits and by fetching 64 bits in RV64
assign SpillF = CacheableF ? SpillCachedF : SpillUncachedF;
end else
assign SpillF = IEUAdrM[1]; // *** might relax - only spill if next instruction is uncompressed
// Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits
assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF));
always_ff @(posedge clk)
if (reset | FlushM) CurrState <= #1 STATE_READY;
else CurrState <= #1 NextState;
always_comb begin
case (CurrState)
STATE_READY: if (TakeSpillF) NextState = STATE_SPILL;
else NextState = STATE_READY;
STATE_SPILL: if(StallM) NextState = STATE_SPILL;
else NextState = STATE_READY;
default: NextState = STATE_READY;
endcase
end
assign SelSpillF = (CurrState == STATE_SPILL);
assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallF);
assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushM;
////////////////////////////////////////////////////////////////////////////////////////////////////
// Merge spilled instruction
////////////////////////////////////////////////////////////////////////////////////////////////////
// save the first 2 bytes
flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF);
// merge together
mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF);
// Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x
always_comb
if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1;
else CompressedF = 1'b0;
endmodule

View File

@ -29,22 +29,125 @@
module subwordread #(parameter LLEN)
(
input logic [LLEN-1:0] ReadDataWordMuxM,
input logic [2:0] PAdrM,
input logic [2:0] Funct3M,
input logic FpLoadStoreM,
input logic BigEndianM,
output logic [LLEN-1:0] ReadDataM
input logic [LLEN-1:0] ReadDataWordMuxM,
input logic [$clog(LLEN/8)-1:0] PAdrM,
input logic [2:0] Funct3M,
input logic FpLoadStoreM,
input logic BigEndianM,
output logic [LLEN/2-1:0] ReadDataM
);
localparam OFFSET_LEN = $clog(LLEN/8);
localparam HLEN = LLEN/2;
logic [7:0] ByteM;
logic [15:0] HalfwordM;
logic [2:0] PAdrSwap;
logic [OFFSET_LEN-1:0] PAdrSwap;
// Funct3M[2] is the unsigned bit. mask upper bits.
// Funct3M[1:0] is the size of the memory access.
assign PAdrSwap = PAdrM ^ {3{BigEndianM}};
assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}};
if (LLEN == 64) begin:swrmux
if (LLEN == 128) begin:swrmux
// ByteMe mux
always_comb
case(PAdrSwap[3:0])
4'b0000: ByteM = ReadDataWordMuxM[7:0];
4'b0001: ByteM = ReadDataWordMuxM[15:8];
4'b0010: ByteM = ReadDataWordMuxM[23:16];
4'b0011: ByteM = ReadDataWordMuxM[31:24];
4'b0100: ByteM = ReadDataWordMuxM[39:32];
4'b0101: ByteM = ReadDataWordMuxM[47:40];
4'b0110: ByteM = ReadDataWordMuxM[55:48];
4'b0111: ByteM = ReadDataWordMuxM[63:56];
4'b1000: ByteM = ReadDataWordMuxM[71:64];
4'b1001: ByteM = ReadDataWordMuxM[79:72];
4'b1010: ByteM = ReadDataWordMuxM[87:80];
4'b1011: ByteM = ReadDataWordMuxM[95:88];
4'b1100: ByteM = ReadDataWordMuxM[103:96];
4'b1101: ByteM = ReadDataWordMuxM[111:104];
4'b1110: ByteM = ReadDataWordMuxM[119:112];
4'b1111: ByteM = ReadDataWordMuxM[127:120];
endcase
// halfword mux
always_comb
case(PAdrSwap[3:0])
4'b0000: HalfwordM = ReadDataWordMuxM[15:0];
4'b0001: HalfwordM = ReadDataWordMuxM[23:8];
4'b0010: HalfwordM = ReadDataWordMuxM[31:16];
4'b0011: HalfwordM = ReadDataWordMuxM[39:24];
4'b0100: HalfwordM = ReadDataWordMuxM[47:32];
4'b0101: HalfwordM = ReadDataWordMuxM[55:40];
4'b0110: HalfwordM = ReadDataWordMuxM[63:48];
4'b0111: HalfwordM = ReadDataWordMuxM[71:56];
4'b1000: HalfwordM = ReadDataWordMuxM[79:64];
4'b1001: HalfwordM = ReadDataWordMuxM[87:72];
4'b1010: HalfwordM = ReadDataWordMuxM[95:80];
4'b1011: HalfwordM = ReadDataWordMuxM[103:88];
4'b1100: HalfwordM = ReadDataWordMuxM[111:96];
4'b1101: HalfwordM = ReadDataWordMuxM[119:104];
4'b1110: HalfwordM = ReadDataWordMuxM[127:112];
//4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around
4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around
endcase
logic [31:0] WordM;
always_comb
case(PAdrSwap[3:0])
4'b0000: WordM = ReadDataWordMuxM[31:0];
4'b0001: WordM = ReadDataWordMuxM[39:8];
4'b0010: WordM = ReadDataWordMuxM[47:16];
4'b0011: WordM = ReadDataWordMuxM[55:24];
4'b0100: WordM = ReadDataWordMuxM[63:32];
4'b0101: WordM = ReadDataWordMuxM[71:40];
4'b0111: WordM = ReadDataWordMuxM[79:48];
4'b1000: WordM = ReadDataWordMuxM[87:56];
4'b1001: WordM = ReadDataWordMuxM[95:64];
4'b1010: WordM = ReadDataWordMuxM[103:72];
4'b1011: WordM = ReadDataWordMuxM[111:80];
4'b1011: WordM = ReadDataWordMuxM[119:88];
4'b1100: WordM = ReadDataWordMuxM[127:96];
4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]};
4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]};
4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]};
endcase
logic [63:0] DblWordM;
always_comb
case(PAdrSwap[3:0])
4'b0000: DblWordMM = ReadDataWordMuxM[63:0];
4'b0001: DblWordMM = ReadDataWordMuxM[71:8];
4'b0010: DblWordMM = ReadDataWordMuxM[79:16];
4'b0011: DblWordMM = ReadDataWordMuxM[87:24];
4'b0100: DblWordMM = ReadDataWordMuxM[95:32];
4'b0101: DblWordMM = ReadDataWordMuxM[103:40];
4'b0111: DblWordMM = ReadDataWordMuxM[111:48];
4'b1000: DblWordMM = ReadDataWordMuxM[119:56];
4'b1001: DblWordMM = ReadDataWordMuxM[127:64];
4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]};
4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]};
4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]};
4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]};
4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]};
4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]};
4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]};
endcase
// sign extension/ NaN boxing
always_comb
case(Funct3M)
3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw
3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld
3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu
//3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128
3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu
default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen
endcase
end else if (LLEN == 64) begin:swrmux
// ByteMe mux
always_comb
case(PAdrSwap[2:0])
@ -60,35 +163,55 @@ module subwordread #(parameter LLEN)
// halfword mux
always_comb
case(PAdrSwap[2:1])
2'b00: HalfwordM = ReadDataWordMuxM[15:0];
2'b01: HalfwordM = ReadDataWordMuxM[31:16];
2'b10: HalfwordM = ReadDataWordMuxM[47:32];
2'b11: HalfwordM = ReadDataWordMuxM[63:48];
case(PAdrSwap[2:0])
3'b000: HalfwordM = ReadDataWordMuxM[15:0];
3'b001: HalfwordM = ReadDataWordMuxM[23:8];
3'b010: HalfwordM = ReadDataWordMuxM[31:16];
3'b011: HalfwordM = ReadDataWordMuxM[39:24];
3'b100: HalfwordM = ReadDataWordMuxM[47:32];
3'b011: HalfwordM = ReadDataWordMuxM[55:40];
3'b110: HalfwordM = ReadDataWordMuxM[63:48];
3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]};
endcase
logic [31:0] WordM;
always_comb
case(PAdrSwap[2])
1'b0: WordM = ReadDataWordMuxM[31:0];
1'b1: WordM = ReadDataWordMuxM[63:32];
case(PAdrSwap[2:0])
3'b000: WordM = ReadDataWordMuxM[31:0];
3'b001: WordM = ReadDataWordMuxM[39:8];
3'b010: WordM = ReadDataWordMuxM[47:16];
3'b011: WordM = ReadDataWordMuxM[55:24];
3'b100: WordM = ReadDataWordMuxM[63:32];
3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]};
3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]};
3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]};
endcase
logic [63:0] DblWordM;
assign DblWordM = ReadDataWordMuxM[63:0];
always_comb
case(PAdrSwap[2:0])
3'b000: DblWordMM = ReadDataWordMuxM[63:0];
3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]};
3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]};
3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]};
3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]};
3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]};
3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]};
3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]};
endcase
// sign extension/ NaN boxing
always_comb
case(Funct3M)
3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw
3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld
3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
//3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128
3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu
3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw
3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld
3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu
//3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128
3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
endcase
@ -104,20 +227,22 @@ module subwordread #(parameter LLEN)
// halfword mux
always_comb
case(PAdrSwap[1])
1'b0: HalfwordM = ReadDataWordMuxM[15:0];
1'b1: HalfwordM = ReadDataWordMuxM[31:16];
case(PAdrSwap[1:0])
2'b00: HalfwordM = ReadDataWordMuxM[15:0];
2'b01: HalfwordM = ReadDataWordMuxM[23:8];
2'b10: HalfwordM = ReadDataWordMuxM[31:16];
2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]};
endcase
// sign extension
always_comb
case(Funct3M)
3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw
3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb
3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh
3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw
3'b011: ReadDataM = ReadDataWordMuxM; // fld
3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu
3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu
default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen
endcase
end