From 00851dab2ad8da943efff1f25bd56030e1166527 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 26 Oct 2023 10:47:00 -0500 Subject: [PATCH 01/32] begin implemenation of Zicclsm. --- src/lsu/align.sv | 121 ++++++++++++++++++++++++++ src/lsu/subwordread.sv | 193 +++++++++++++++++++++++++++++++++-------- 2 files changed, 280 insertions(+), 34 deletions(-) create mode 100644 src/lsu/align.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv new file mode 100644 index 000000000..b3e810ee2 --- /dev/null +++ b/src/lsu/align.sv @@ -0,0 +1,121 @@ +/////////////////////////////////////////// +// spill.sv +// +// Written: Rose Thompson ross1728@gmail.com +// Created: 26 October 2023 +// Modified: 26 October 2023 +// +// Purpose: This module implements native alignment support for the Zicclsm extension +// It is simlar to the IFU's spill module and probably could be merged together with +// some effort. +// +// Documentation: RISC-V System on Chip Design Chapter 11 (Figure 11.5) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module align import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic reset, + input logic StallM, FlushM, + input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage + input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM + input logic [31:0] ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed + input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched + input logic DTLBMissM, // ITLB miss, ignore memory request + + output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill + output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill + output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline + output logic [31:0] ReadDataWordSpillM)// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + + // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] + typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; + + statetype CurrState, NextState; + logic TakeSpillM, TakeSpillE; + logic SpillF; + logic SelSpillF; + logic SpillSaveF; + logic [15:0] InstrFirstHalfF; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // PC logic + //////////////////////////////////////////////////////////////////////////////////////////////////// + + localparam LLENINBYTES = LLEN/8; + logic IEUAdrIncrementM; + assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + mux2 #(P.XLEN) pcplus2mux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(TakeSpillM), .y(IEUAdrSpillM)); + mux2 #(P.XLEN) pcnextspillmux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(TakeSpillE), .y(IEUAdrSpillE)); + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Detect spill + //////////////////////////////////////////////////////////////////////////////////////////////////// + + // spill detection in lsu is more complex than ifu, depends on 3 factors + // 1) operation size + // 2) offset + // 3) access location within the cacheline or is the access is uncached. + // first consider uncached operations + // accesses are always aligned to the natural size of the bus (XLEN or AHBW) + + if (P.ICACHE_SUPPORTED) begin + logic SpillCachedF, SpillUncachedF; + assign SpillCachedF = &IEUAdrM[$clog2(P.ICACHE_LINELENINBITS/32)+1:1]; + assign SpillUncachedF = IEUAdrM[1]; // *** try to optimize this based on whether the next instruction is 16 bits and by fetching 64 bits in RV64 + assign SpillF = CacheableF ? SpillCachedF : SpillUncachedF; + end else + assign SpillF = IEUAdrM[1]; // *** might relax - only spill if next instruction is uncompressed + // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits + assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF)); + + always_ff @(posedge clk) + if (reset | FlushM) CurrState <= #1 STATE_READY; + else CurrState <= #1 NextState; + + always_comb begin + case (CurrState) + STATE_READY: if (TakeSpillF) NextState = STATE_SPILL; + else NextState = STATE_READY; + STATE_SPILL: if(StallM) NextState = STATE_SPILL; + else NextState = STATE_READY; + default: NextState = STATE_READY; + endcase + end + + assign SelSpillF = (CurrState == STATE_SPILL); + assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallF); + assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushM; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Merge spilled instruction + //////////////////////////////////////////////////////////////////////////////////////////////////// + + // save the first 2 bytes + flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF); + + // merge together + mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); + + // Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x + always_comb + if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1; + else CompressedF = 1'b0; + +endmodule diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index e5666eb84..ae3e3c78b 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -29,22 +29,125 @@ module subwordread #(parameter LLEN) ( - input logic [LLEN-1:0] ReadDataWordMuxM, - input logic [2:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN-1:0] ReadDataM + input logic [LLEN-1:0] ReadDataWordMuxM, + input logic [$clog(LLEN/8)-1:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN/2-1:0] ReadDataM ); + localparam OFFSET_LEN = $clog(LLEN/8); + localparam HLEN = LLEN/2; logic [7:0] ByteM; logic [15:0] HalfwordM; - logic [2:0] PAdrSwap; + logic [OFFSET_LEN-1:0] PAdrSwap; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = PAdrM ^ {3{BigEndianM}}; + assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; - if (LLEN == 64) begin:swrmux + if (LLEN == 128) begin:swrmux + // ByteMe mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: ByteM = ReadDataWordMuxM[7:0]; + 4'b0001: ByteM = ReadDataWordMuxM[15:8]; + 4'b0010: ByteM = ReadDataWordMuxM[23:16]; + 4'b0011: ByteM = ReadDataWordMuxM[31:24]; + 4'b0100: ByteM = ReadDataWordMuxM[39:32]; + 4'b0101: ByteM = ReadDataWordMuxM[47:40]; + 4'b0110: ByteM = ReadDataWordMuxM[55:48]; + 4'b0111: ByteM = ReadDataWordMuxM[63:56]; + 4'b1000: ByteM = ReadDataWordMuxM[71:64]; + 4'b1001: ByteM = ReadDataWordMuxM[79:72]; + 4'b1010: ByteM = ReadDataWordMuxM[87:80]; + 4'b1011: ByteM = ReadDataWordMuxM[95:88]; + 4'b1100: ByteM = ReadDataWordMuxM[103:96]; + 4'b1101: ByteM = ReadDataWordMuxM[111:104]; + 4'b1110: ByteM = ReadDataWordMuxM[119:112]; + 4'b1111: ByteM = ReadDataWordMuxM[127:120]; + endcase + + // halfword mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; + 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; + 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; + 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; + 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; + 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; + 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; + 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; + 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; + 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; + 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; + 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; + 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; + 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; + 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; + //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + endcase + + logic [31:0] WordM; + + always_comb + case(PAdrSwap[3:0]) + 4'b0000: WordM = ReadDataWordMuxM[31:0]; + 4'b0001: WordM = ReadDataWordMuxM[39:8]; + 4'b0010: WordM = ReadDataWordMuxM[47:16]; + 4'b0011: WordM = ReadDataWordMuxM[55:24]; + 4'b0100: WordM = ReadDataWordMuxM[63:32]; + 4'b0101: WordM = ReadDataWordMuxM[71:40]; + 4'b0111: WordM = ReadDataWordMuxM[79:48]; + 4'b1000: WordM = ReadDataWordMuxM[87:56]; + 4'b1001: WordM = ReadDataWordMuxM[95:64]; + 4'b1010: WordM = ReadDataWordMuxM[103:72]; + 4'b1011: WordM = ReadDataWordMuxM[111:80]; + 4'b1011: WordM = ReadDataWordMuxM[119:88]; + 4'b1100: WordM = ReadDataWordMuxM[127:96]; + 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; + endcase + + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; + 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; + 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; + 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; + 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; + 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; + 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; + 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux // ByteMe mux always_comb case(PAdrSwap[2:0]) @@ -60,35 +163,55 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[2:1]) - 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; - 2'b01: HalfwordM = ReadDataWordMuxM[31:16]; - 2'b10: HalfwordM = ReadDataWordMuxM[47:32]; - 2'b11: HalfwordM = ReadDataWordMuxM[63:48]; + case(PAdrSwap[2:0]) + 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; + 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; + 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; + 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; + 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; + 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; + 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; + 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; endcase logic [31:0] WordM; always_comb - case(PAdrSwap[2]) - 1'b0: WordM = ReadDataWordMuxM[31:0]; - 1'b1: WordM = ReadDataWordMuxM[63:32]; + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; endcase logic [63:0] DblWordM; - assign DblWordM = ReadDataWordMuxM[63:0]; + always_comb + case(PAdrSwap[2:0]) + 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; + 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; + 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; + 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; + 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; + 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; + 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; + 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; + endcase // sign extension/ NaN boxing always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase @@ -104,20 +227,22 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[1]) - 1'b0: HalfwordM = ReadDataWordMuxM[15:0]; - 1'b1: HalfwordM = ReadDataWordMuxM[31:16]; + case(PAdrSwap[1:0]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; + 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; endcase // sign extension always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw 3'b011: ReadDataM = ReadDataWordMuxM; // fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase end From e7edd0084e78e691b8329fe5dd00ee394894282a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 09:35:44 -0500 Subject: [PATCH 02/32] Progress on misaligned load/stores. --- src/lsu/align.sv | 44 ++++++---- src/lsu/lsu.sv | 4 +- src/lsu/subwordread.sv | 193 ++++++++--------------------------------- 3 files changed, 61 insertions(+), 180 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index b3e810ee2..8cae76a02 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -35,9 +35,11 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic StallM, FlushM, input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM + input logic [2:0] Funct3M, // Size of memory operation input logic [31:0] ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request + input logic DataUpdateDAM, // ITLB miss, ignore memory request output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill @@ -49,10 +51,10 @@ module align import cvw::*; #(parameter cvw_t P) ( statetype CurrState, NextState; logic TakeSpillM, TakeSpillE; - logic SpillF; + logic SpillM; logic SelSpillF; logic SpillSaveF; - logic [15:0] InstrFirstHalfF; + logic [LLEN-8:0] ReadDataWordFirstHalfM; //////////////////////////////////////////////////////////////////////////////////////////////////// // PC logic @@ -71,19 +73,23 @@ module align import cvw::*; #(parameter cvw_t P) ( // spill detection in lsu is more complex than ifu, depends on 3 factors // 1) operation size // 2) offset - // 3) access location within the cacheline or is the access is uncached. - // first consider uncached operations - // accesses are always aligned to the natural size of the bus (XLEN or AHBW) - - if (P.ICACHE_SUPPORTED) begin - logic SpillCachedF, SpillUncachedF; - assign SpillCachedF = &IEUAdrM[$clog2(P.ICACHE_LINELENINBITS/32)+1:1]; - assign SpillUncachedF = IEUAdrM[1]; // *** try to optimize this based on whether the next instruction is 16 bits and by fetching 64 bits in RV64 - assign SpillF = CacheableF ? SpillCachedF : SpillUncachedF; - end else - assign SpillF = IEUAdrM[1]; // *** might relax - only spill if next instruction is uncompressed + // 3) access location within the cacheline + logic [P.DCACHE_LINELENINBITS/8-1:P.LLEN/8] WordOffsetM; + logic [P.LLEN/8-1:0] ByteOffsetM; + logic HalfSpillM, WordSpillM; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[P.DCACHE_LINELENINBITS/8-1:0]; + assign HalfSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; + assign WordSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; + if(P.LLEN == 64) begin + logic DoubleSpillM; + assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; + assign SpillM = HalfSpillM | WordOffsetM | DoubleSpillM; + end else begin + assign SpillM = HalfSpillM | WordOffsetM; + end + // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF)); + assign TakeSpillM = SpillM & ~LSUStallM & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -91,7 +97,7 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (TakeSpillF) NextState = STATE_SPILL; + STATE_READY: if (TakeSpillM) NextState = STATE_SPILL; else NextState = STATE_READY; STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; @@ -99,16 +105,16 @@ module align import cvw::*; #(parameter cvw_t P) ( endcase end - assign SelSpillF = (CurrState == STATE_SPILL); - assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallF); - assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushM; + assign SelSpillM = (CurrState == STATE_SPILL); + assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & LSUStallM); + assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; //////////////////////////////////////////////////////////////////////////////////////////////////// // Merge spilled instruction //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF); + flopenr #(P.LLEN-8) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[LLEN-1:8], ReadDataWordFirstHalfM); // merge together mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 191599f12..8dc843a38 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -234,6 +234,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. + // Add support for cboz dtim #(P) dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM[P.LLEN/8-1:0])); @@ -268,8 +269,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign CacheAtomicM = CacheableM & ~SelDTIM ? LSUAtomicM : '0; assign FlushDCache = FlushDCacheM & ~(SelHPTW); - // *** need RT to add support for CMOpM and LSUPrefetchM (DH 7/2/23) - // *** prefetch can just act as a read operation cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), @@ -285,6 +284,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DCacheStallM = CacheStall & ~IgnoreRequestTLB; assign CacheBusRW = CacheBusRWTemp; + // *** add support for cboz ahbcacheinterface #(.AHBW(P.AHBW), .LLEN(P.LLEN), .PA_BITS(P.PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index ae3e3c78b..e5666eb84 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -29,125 +29,22 @@ module subwordread #(parameter LLEN) ( - input logic [LLEN-1:0] ReadDataWordMuxM, - input logic [$clog(LLEN/8)-1:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN/2-1:0] ReadDataM + input logic [LLEN-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM ); - localparam OFFSET_LEN = $clog(LLEN/8); - localparam HLEN = LLEN/2; logic [7:0] ByteM; logic [15:0] HalfwordM; - logic [OFFSET_LEN-1:0] PAdrSwap; + logic [2:0] PAdrSwap; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; + assign PAdrSwap = PAdrM ^ {3{BigEndianM}}; - if (LLEN == 128) begin:swrmux - // ByteMe mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: ByteM = ReadDataWordMuxM[7:0]; - 4'b0001: ByteM = ReadDataWordMuxM[15:8]; - 4'b0010: ByteM = ReadDataWordMuxM[23:16]; - 4'b0011: ByteM = ReadDataWordMuxM[31:24]; - 4'b0100: ByteM = ReadDataWordMuxM[39:32]; - 4'b0101: ByteM = ReadDataWordMuxM[47:40]; - 4'b0110: ByteM = ReadDataWordMuxM[55:48]; - 4'b0111: ByteM = ReadDataWordMuxM[63:56]; - 4'b1000: ByteM = ReadDataWordMuxM[71:64]; - 4'b1001: ByteM = ReadDataWordMuxM[79:72]; - 4'b1010: ByteM = ReadDataWordMuxM[87:80]; - 4'b1011: ByteM = ReadDataWordMuxM[95:88]; - 4'b1100: ByteM = ReadDataWordMuxM[103:96]; - 4'b1101: ByteM = ReadDataWordMuxM[111:104]; - 4'b1110: ByteM = ReadDataWordMuxM[119:112]; - 4'b1111: ByteM = ReadDataWordMuxM[127:120]; - endcase - - // halfword mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; - 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; - 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; - 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; - 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; - 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; - 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; - 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; - 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; - 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; - 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; - 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; - 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; - 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; - 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; - //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - endcase - - logic [31:0] WordM; - - always_comb - case(PAdrSwap[3:0]) - 4'b0000: WordM = ReadDataWordMuxM[31:0]; - 4'b0001: WordM = ReadDataWordMuxM[39:8]; - 4'b0010: WordM = ReadDataWordMuxM[47:16]; - 4'b0011: WordM = ReadDataWordMuxM[55:24]; - 4'b0100: WordM = ReadDataWordMuxM[63:32]; - 4'b0101: WordM = ReadDataWordMuxM[71:40]; - 4'b0111: WordM = ReadDataWordMuxM[79:48]; - 4'b1000: WordM = ReadDataWordMuxM[87:56]; - 4'b1001: WordM = ReadDataWordMuxM[95:64]; - 4'b1010: WordM = ReadDataWordMuxM[103:72]; - 4'b1011: WordM = ReadDataWordMuxM[111:80]; - 4'b1011: WordM = ReadDataWordMuxM[119:88]; - 4'b1100: WordM = ReadDataWordMuxM[127:96]; - 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; - endcase - - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; - 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; - 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; - 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; - 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; - 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; - 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; - 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux + if (LLEN == 64) begin:swrmux // ByteMe mux always_comb case(PAdrSwap[2:0]) @@ -163,55 +60,35 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[2:0]) - 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; - 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; - 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; - 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; - 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; - 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; - 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; - 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; + case(PAdrSwap[2:1]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b10: HalfwordM = ReadDataWordMuxM[47:32]; + 2'b11: HalfwordM = ReadDataWordMuxM[63:48]; endcase logic [31:0] WordM; always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + case(PAdrSwap[2]) + 1'b0: WordM = ReadDataWordMuxM[31:0]; + 1'b1: WordM = ReadDataWordMuxM[63:32]; endcase logic [63:0] DblWordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; - 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; - 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; - 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; - 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; - 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; - 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; - 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; - endcase + assign DblWordM = ReadDataWordMuxM[63:0]; // sign extension/ NaN boxing always_comb case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase @@ -227,22 +104,20 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[1:0]) - 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; - 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; - 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; - 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; + case(PAdrSwap[1]) + 1'b0: HalfwordM = ReadDataWordMuxM[15:0]; + 1'b1: HalfwordM = ReadDataWordMuxM[31:16]; endcase // sign extension always_comb case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw 3'b011: ReadDataM = ReadDataWordMuxM; // fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase end From 839ff28d32a6da30d6af585e3e2343ae16c0084d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 09:49:44 -0500 Subject: [PATCH 03/32] Added file. --- src/lsu/subwordread-variant1.sv | 249 ++++++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 src/lsu/subwordread-variant1.sv diff --git a/src/lsu/subwordread-variant1.sv b/src/lsu/subwordread-variant1.sv new file mode 100644 index 000000000..c0cfe247b --- /dev/null +++ b/src/lsu/subwordread-variant1.sv @@ -0,0 +1,249 @@ +/////////////////////////////////////////// +// subwordread.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Extract subwords and sign extend for reads +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordreadVar1 #(parameter LLEN) + ( + input logic [LLEN-1:0] ReadDataWordMuxM, + input logic [$clog(LLEN/8)-1:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN/2-1:0] ReadDataM +); + + localparam OFFSET_LEN = $clog(LLEN/8); + localparam HLEN = LLEN/2; + logic [7:0] ByteM; + logic [15:0] HalfwordM; + logic [OFFSET_LEN-1:0] PAdrSwap; + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; + + if (LLEN == 128) begin:swrmux + // ByteMe mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: ByteM = ReadDataWordMuxM[7:0]; + 4'b0001: ByteM = ReadDataWordMuxM[15:8]; + 4'b0010: ByteM = ReadDataWordMuxM[23:16]; + 4'b0011: ByteM = ReadDataWordMuxM[31:24]; + 4'b0100: ByteM = ReadDataWordMuxM[39:32]; + 4'b0101: ByteM = ReadDataWordMuxM[47:40]; + 4'b0110: ByteM = ReadDataWordMuxM[55:48]; + 4'b0111: ByteM = ReadDataWordMuxM[63:56]; + 4'b1000: ByteM = ReadDataWordMuxM[71:64]; + 4'b1001: ByteM = ReadDataWordMuxM[79:72]; + 4'b1010: ByteM = ReadDataWordMuxM[87:80]; + 4'b1011: ByteM = ReadDataWordMuxM[95:88]; + 4'b1100: ByteM = ReadDataWordMuxM[103:96]; + 4'b1101: ByteM = ReadDataWordMuxM[111:104]; + 4'b1110: ByteM = ReadDataWordMuxM[119:112]; + 4'b1111: ByteM = ReadDataWordMuxM[127:120]; + endcase + + // halfword mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; + 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; + 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; + 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; + 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; + 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; + 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; + 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; + 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; + 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; + 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; + 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; + 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; + 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; + 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; + //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + endcase + + logic [31:0] WordM; + + always_comb + case(PAdrSwap[3:0]) + 4'b0000: WordM = ReadDataWordMuxM[31:0]; + 4'b0001: WordM = ReadDataWordMuxM[39:8]; + 4'b0010: WordM = ReadDataWordMuxM[47:16]; + 4'b0011: WordM = ReadDataWordMuxM[55:24]; + 4'b0100: WordM = ReadDataWordMuxM[63:32]; + 4'b0101: WordM = ReadDataWordMuxM[71:40]; + 4'b0111: WordM = ReadDataWordMuxM[79:48]; + 4'b1000: WordM = ReadDataWordMuxM[87:56]; + 4'b1001: WordM = ReadDataWordMuxM[95:64]; + 4'b1010: WordM = ReadDataWordMuxM[103:72]; + 4'b1011: WordM = ReadDataWordMuxM[111:80]; + 4'b1011: WordM = ReadDataWordMuxM[119:88]; + 4'b1100: WordM = ReadDataWordMuxM[127:96]; + 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; + endcase + + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; + 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; + 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; + 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; + 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; + 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; + 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; + 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux + // ByteMe mux + always_comb + case(PAdrSwap[2:0]) + 3'b000: ByteM = ReadDataWordMuxM[7:0]; + 3'b001: ByteM = ReadDataWordMuxM[15:8]; + 3'b010: ByteM = ReadDataWordMuxM[23:16]; + 3'b011: ByteM = ReadDataWordMuxM[31:24]; + 3'b100: ByteM = ReadDataWordMuxM[39:32]; + 3'b101: ByteM = ReadDataWordMuxM[47:40]; + 3'b110: ByteM = ReadDataWordMuxM[55:48]; + 3'b111: ByteM = ReadDataWordMuxM[63:56]; + endcase + + // halfword mux + always_comb + case(PAdrSwap[2:0]) + 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; + 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; + 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; + 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; + 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; + 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; + 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; + 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; + endcase + + logic [31:0] WordM; + + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase + + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; + 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; + 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; + 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; + 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; + 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; + 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; + 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; + endcase + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + // byte mux + always_comb + case(PAdrSwap[1:0]) + 2'b00: ByteM = ReadDataWordMuxM[7:0]; + 2'b01: ByteM = ReadDataWordMuxM[15:8]; + 2'b10: ByteM = ReadDataWordMuxM[23:16]; + 2'b11: ByteM = ReadDataWordMuxM[31:24]; + endcase + + // halfword mux + always_comb + case(PAdrSwap[1:0]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; + 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; + endcase + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM; // fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen + endcase + end +endmodule From d648e199e1c20eef3623b20a70175b0e07fdd089 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 11:41:49 -0500 Subject: [PATCH 04/32] The misaligned load alignment lints. --- src/lsu/align.sv | 40 +++-- src/lsu/subwordread-variant1.sv | 249 -------------------------------- src/wally/wallypipelinedcore.sv | 2 +- 3 files changed, 28 insertions(+), 263 deletions(-) delete mode 100644 src/lsu/subwordread-variant1.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 8cae76a02..897f0d181 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -36,7 +36,7 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation - input logic [31:0] ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed + input logic [P.LLEN*2-1:0]ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request input logic DataUpdateDAM, // ITLB miss, ignore memory request @@ -44,7 +44,7 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [31:0] ReadDataWordSpillM)// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [P.LLEN-1:0] ReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; @@ -52,15 +52,17 @@ module align import cvw::*; #(parameter cvw_t P) ( statetype CurrState, NextState; logic TakeSpillM, TakeSpillE; logic SpillM; - logic SelSpillF; - logic SpillSaveF; - logic [LLEN-8:0] ReadDataWordFirstHalfM; + logic SelSpillM; + logic SpillSaveM; + logic [P.LLEN-1:0] ReadDataWordFirstHalfM; + logic MisalignedM; + logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; //////////////////////////////////////////////////////////////////////////////////////////////////// // PC logic //////////////////////////////////////////////////////////////////////////////////////////////////// - localparam LLENINBYTES = LLEN/8; + localparam LLENINBYTES = P.LLEN/8; logic IEUAdrIncrementM; assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; mux2 #(P.XLEN) pcplus2mux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(TakeSpillM), .y(IEUAdrSpillM)); @@ -110,18 +112,30 @@ module align import cvw::*; #(parameter cvw_t P) ( assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; //////////////////////////////////////////////////////////////////////////////////////////////////// - // Merge spilled instruction + // Merge spilled data //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(P.LLEN-8) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[LLEN-1:8], ReadDataWordFirstHalfM); + flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM); // merge together - mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); + mux2 #(2*P.LLEN) postspillmux(ReadDataWordMuxM, {ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM); - // Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x - always_comb - if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1; - else CompressedF = 1'b0; + // align by shifting + // *** optimize by merging with halfSpill, WordSpill, etc + logic HalfMisalignedM, WordMisalignedM; + assign HalfMisalignedM = Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; + assign WordMisalignedM = Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; + if(P.LLEN == 64) begin + logic DoubleMisalignedM; + assign DoubleMisalignedM = Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; + assign MisalignedM = HalfMisalignedM | WordMisalignedM | DoubleMisalignedM; + end else begin + assign MisalignedM = HalfMisalignedM | WordMisalignedM; + end + // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) + // 8 * is for shifting by bytes not bits + assign ReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + endmodule diff --git a/src/lsu/subwordread-variant1.sv b/src/lsu/subwordread-variant1.sv deleted file mode 100644 index c0cfe247b..000000000 --- a/src/lsu/subwordread-variant1.sv +++ /dev/null @@ -1,249 +0,0 @@ -/////////////////////////////////////////// -// subwordread.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Extract subwords and sign extend for reads -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordreadVar1 #(parameter LLEN) - ( - input logic [LLEN-1:0] ReadDataWordMuxM, - input logic [$clog(LLEN/8)-1:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN/2-1:0] ReadDataM -); - - localparam OFFSET_LEN = $clog(LLEN/8); - localparam HLEN = LLEN/2; - logic [7:0] ByteM; - logic [15:0] HalfwordM; - logic [OFFSET_LEN-1:0] PAdrSwap; - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; - - if (LLEN == 128) begin:swrmux - // ByteMe mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: ByteM = ReadDataWordMuxM[7:0]; - 4'b0001: ByteM = ReadDataWordMuxM[15:8]; - 4'b0010: ByteM = ReadDataWordMuxM[23:16]; - 4'b0011: ByteM = ReadDataWordMuxM[31:24]; - 4'b0100: ByteM = ReadDataWordMuxM[39:32]; - 4'b0101: ByteM = ReadDataWordMuxM[47:40]; - 4'b0110: ByteM = ReadDataWordMuxM[55:48]; - 4'b0111: ByteM = ReadDataWordMuxM[63:56]; - 4'b1000: ByteM = ReadDataWordMuxM[71:64]; - 4'b1001: ByteM = ReadDataWordMuxM[79:72]; - 4'b1010: ByteM = ReadDataWordMuxM[87:80]; - 4'b1011: ByteM = ReadDataWordMuxM[95:88]; - 4'b1100: ByteM = ReadDataWordMuxM[103:96]; - 4'b1101: ByteM = ReadDataWordMuxM[111:104]; - 4'b1110: ByteM = ReadDataWordMuxM[119:112]; - 4'b1111: ByteM = ReadDataWordMuxM[127:120]; - endcase - - // halfword mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; - 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; - 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; - 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; - 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; - 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; - 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; - 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; - 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; - 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; - 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; - 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; - 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; - 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; - 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; - //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - endcase - - logic [31:0] WordM; - - always_comb - case(PAdrSwap[3:0]) - 4'b0000: WordM = ReadDataWordMuxM[31:0]; - 4'b0001: WordM = ReadDataWordMuxM[39:8]; - 4'b0010: WordM = ReadDataWordMuxM[47:16]; - 4'b0011: WordM = ReadDataWordMuxM[55:24]; - 4'b0100: WordM = ReadDataWordMuxM[63:32]; - 4'b0101: WordM = ReadDataWordMuxM[71:40]; - 4'b0111: WordM = ReadDataWordMuxM[79:48]; - 4'b1000: WordM = ReadDataWordMuxM[87:56]; - 4'b1001: WordM = ReadDataWordMuxM[95:64]; - 4'b1010: WordM = ReadDataWordMuxM[103:72]; - 4'b1011: WordM = ReadDataWordMuxM[111:80]; - 4'b1011: WordM = ReadDataWordMuxM[119:88]; - 4'b1100: WordM = ReadDataWordMuxM[127:96]; - 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; - endcase - - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; - 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; - 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; - 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; - 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; - 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; - 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; - 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux - // ByteMe mux - always_comb - case(PAdrSwap[2:0]) - 3'b000: ByteM = ReadDataWordMuxM[7:0]; - 3'b001: ByteM = ReadDataWordMuxM[15:8]; - 3'b010: ByteM = ReadDataWordMuxM[23:16]; - 3'b011: ByteM = ReadDataWordMuxM[31:24]; - 3'b100: ByteM = ReadDataWordMuxM[39:32]; - 3'b101: ByteM = ReadDataWordMuxM[47:40]; - 3'b110: ByteM = ReadDataWordMuxM[55:48]; - 3'b111: ByteM = ReadDataWordMuxM[63:56]; - endcase - - // halfword mux - always_comb - case(PAdrSwap[2:0]) - 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; - 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; - 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; - 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; - 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; - 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; - 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; - 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; - endcase - - logic [31:0] WordM; - - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase - - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; - 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; - 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; - 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; - 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; - 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; - 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; - 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; - endcase - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - // byte mux - always_comb - case(PAdrSwap[1:0]) - 2'b00: ByteM = ReadDataWordMuxM[7:0]; - 2'b01: ByteM = ReadDataWordMuxM[15:8]; - 2'b10: ByteM = ReadDataWordMuxM[23:16]; - 2'b11: ByteM = ReadDataWordMuxM[31:24]; - endcase - - // halfword mux - always_comb - case(PAdrSwap[1:0]) - 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; - 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; - 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; - 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; - endcase - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM; // fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen - endcase - end -endmodule diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 5df543903..00b348660 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -264,7 +264,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( end // global stall and flush control - hazard hzu(.clk, .reset, + hazard hzu( .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LSUStallM, .IFUStallF, From ff8583245476a6f227591867073ec5ee03c38805 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 13:07:23 -0500 Subject: [PATCH 05/32] Addec ZICCLSM to config files and started on lsu instance. --- config/buildroot/config.vh | 1 + config/fpga/config.vh | 1 + config/rv32e/config.vh | 1 + config/rv32gc/config.vh | 1 + config/rv32i/config.vh | 1 + config/rv32imc/config.vh | 1 + config/rv64fpquad/config.vh | 1 + config/rv64gc/config.vh | 1 + config/rv64i/config.vh | 1 + config/shared/parameter-defs.vh | 1 + src/cvw.sv | 1 + src/lsu/lsu.sv | 3 ++- 12 files changed, 13 insertions(+), 1 deletion(-) diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index 05c8aa646..7b13a27f2 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -46,6 +46,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/fpga/config.vh b/config/fpga/config.vh index 9e2b4cbb9..e690335f3 100644 --- a/config/fpga/config.vh +++ b/config/fpga/config.vh @@ -48,6 +48,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh index bf5965fb5..915ab7677 100644 --- a/config/rv32e/config.vh +++ b/config/rv32e/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index 1d42e233a..a76b42302 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -48,6 +48,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh index 5c34ae413..d25f90135 100644 --- a/config/rv32i/config.vh +++ b/config/rv32i/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; diff --git a/config/rv32imc/config.vh b/config/rv32imc/config.vh index 1867b9f99..9fafafe71 100644 --- a/config/rv32imc/config.vh +++ b/config/rv32imc/config.vh @@ -46,6 +46,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; diff --git a/config/rv64fpquad/config.vh b/config/rv64fpquad/config.vh index 6e2e0a33d..343de8b11 100644 --- a/config/rv64fpquad/config.vh +++ b/config/rv64fpquad/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index f3057c287..fa603990b 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh index ea668a45e..35fe763a5 100644 --- a/config/rv64i/config.vh +++ b/config/rv64i/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index f3f216062..f6132f765 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -24,6 +24,7 @@ localparam cvw_t P = '{ ZICBOM_SUPPORTED : ZICBOM_SUPPORTED, ZICBOZ_SUPPORTED : ZICBOZ_SUPPORTED, ZICBOP_SUPPORTED : ZICBOP_SUPPORTED, + ZICCLSM_SUPPORTED : ZICCLSM_SUPPORTED, SVPBMT_SUPPORTED : SVPBMT_SUPPORTED, SVNAPOT_SUPPORTED : SVNAPOT_SUPPORTED, SVINVAL_SUPPORTED : SVINVAL_SUPPORTED, diff --git a/src/cvw.sv b/src/cvw.sv index 01e0d6376..cdcd983b6 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -59,6 +59,7 @@ typedef struct packed { logic ZICBOM_SUPPORTED; logic ZICBOZ_SUPPORTED; logic ZICBOP_SUPPORTED; + logic ZICCLSM_SUPPORTED; logic SVPBMT_SUPPORTED; logic SVNAPOT_SUPPORTED; logic SVINVAL_SUPPORTED; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 8dc843a38..f2c7647eb 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -248,6 +248,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( localparam AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^ localparam LINELEN = P.DCACHE_LINELENINBITS; // Number of bits in cacheline localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) + localparam CACHEWORDLEN = P.ZICCLSM_SUPPORTED ? 2*P.LLEN : P.LLEN; // Width of the cache's input and output data buses. Misaligned doubles width for fast access logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline logic [P.PA_BITS-1:0] DCacheBusAdr; // Cacheline address to fetch or writeback. @@ -270,7 +271,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign FlushDCache = FlushDCacheM & ~(SelHPTW); cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), - .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( + .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), From 42b2dad6ad2d0a170c6d259b77d1fef154a17e8a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 13:55:16 -0500 Subject: [PATCH 06/32] At least have the aligner integrated, but not tested. --- src/lsu/align.sv | 16 ++++++++-------- src/lsu/lsu.sv | 32 +++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 897f0d181..0e399d19a 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -36,15 +36,15 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation - input logic [P.LLEN*2-1:0]ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed - input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched + input logic [P.LLEN*2-1:0]DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed + input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request input logic DataUpdateDAM, // ITLB miss, ignore memory request output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [P.LLEN-1:0] ReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [P.LLEN-1:0] DCacheReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; @@ -91,7 +91,7 @@ module align import cvw::*; #(parameter cvw_t P) ( end // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign TakeSpillM = SpillM & ~LSUStallM & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); + assign TakeSpillM = SpillM & ~CacheBusHPWTStall & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -108,7 +108,7 @@ module align import cvw::*; #(parameter cvw_t P) ( end assign SelSpillM = (CurrState == STATE_SPILL); - assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & LSUStallM); + assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall); assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -116,10 +116,10 @@ module align import cvw::*; #(parameter cvw_t P) ( //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM); + flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM); // merge together - mux2 #(2*P.LLEN) postspillmux(ReadDataWordMuxM, {ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM); + mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM); // align by shifting // *** optimize by merging with halfSpill, WordSpill, etc @@ -136,6 +136,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + assign DCacheReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index f2c7647eb..ab0b36d7d 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -92,6 +92,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit ); + localparam MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED; logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer @@ -108,13 +109,18 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic BusStall; // Bus interface busy with multicycle operation logic HPTWStall; // HPTW busy with multicycle operation + logic CacheBusHPWTStall; // Cache, bus, or hptw is requesting a stall + logic SelSpillE; // Align logic detected a spill and needs to stall logic CacheableM; // PMA indicates memory address is cacheable logic BusCommittedM; // Bus memory operation in flight, delay interrupts logic DCacheCommittedM; // D$ memory operation started, delay interrupts logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data - logic [P.LLEN-1:0] DCacheReadDataWordM; // D$ read data + /* verilator lint_off WIDTHEXPAND */ + logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data + /* verilator lint_on WIDTHEXPAND */ + logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection @@ -142,8 +148,19 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// flopenrc #(P.XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); - assign IEUAdrExtM = {2'b00, IEUAdrM}; - assign IEUAdrExtE = {2'b00, IEUAdrE}; + if(MISALIGN_SUPPORT) begin : ziccslm_align + logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; + align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, + .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); + assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; + assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; + end else begin : no_ziccslm_align + assign IEUAdrExtM = {2'b00, IEUAdrM}; + assign IEUAdrExtE = {2'b00, IEUAdrE}; + assign SelSpillE = '0; + assign DCacheReadDataWordSpillM = DCacheReadDataWordM; + end ///////////////////////////////////////////////////////////////////////////////////////////// // HPTW (only needed if VM supported) @@ -180,7 +197,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // the trap module. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; - assign LSUStallM = DCacheStallM | HPTWStall | BusStall; + assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall; + assign LSUStallM = CacheBusHPWTStall | SelSpillE; ///////////////////////////////////////////////////////////////////////////////////////////// // MMU and misalignment fault logic required if privileged unit exists @@ -273,7 +291,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), - .FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM), + .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), @@ -290,7 +308,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), - .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM), .WriteDataM(LSUWriteDataM), + .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), @@ -300,7 +318,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. // pma should generate exception for LLEN read to periph. - mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), + mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordSpillM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), .d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface From b2c61737bf01ceb8e3053d22c22684eaa32b091e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 14:41:42 -0500 Subject: [PATCH 07/32] Passes lint with some exceptions. Still need to add misaligned store support. --- src/lsu/align.sv | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 0e399d19a..a04aa386e 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -57,16 +57,17 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; + logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; //////////////////////////////////////////////////////////////////////////////////////////////////// // PC logic //////////////////////////////////////////////////////////////////////////////////////////////////// localparam LLENINBYTES = P.LLEN/8; - logic IEUAdrIncrementM; + logic [XLEN-1:0] IEUAdrIncrementM; assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; - mux2 #(P.XLEN) pcplus2mux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(TakeSpillM), .y(IEUAdrSpillM)); - mux2 #(P.XLEN) pcnextspillmux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(TakeSpillE), .y(IEUAdrSpillE)); + mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); + mux2 #(P.XLEN) ieuadrspillmmux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); //////////////////////////////////////////////////////////////////////////////////////////////////// // Detect spill @@ -85,9 +86,9 @@ module align import cvw::*; #(parameter cvw_t P) ( if(P.LLEN == 64) begin logic DoubleSpillM; assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; - assign SpillM = HalfSpillM | WordOffsetM | DoubleSpillM; + assign SpillM = HalfSpillM | WordSpillM | DoubleSpillM; end else begin - assign SpillM = HalfSpillM | WordOffsetM; + assign SpillM = HalfSpillM | WordSpillM; end // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits @@ -136,6 +137,7 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign DCacheReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; endmodule From 610969726e151c48c5ef5d33933aeb911b9fe7cb Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 16:31:22 -0500 Subject: [PATCH 08/32] Progress. --- src/cache/cache.sv | 14 ++++++++++---- src/lsu/align.sv | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 1714544ec..23fd6163e 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -175,10 +175,16 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN/8-1:0] DemuxedByteMask, FetchBufferByteSel; // Adjust byte mask from word to cache line - onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded)); - for(index = 0; index < 2**LOGCWPL; index++) begin - assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; - end + + localparam CACHEMUXINVERALPERLINE = LINELEN/MUXINTERVAL;// Number of words in cache line + localparam LOGMIPL = $clog2(CACHEMUXINVERALPERLINE);// Log2 of ^ + + logic [LINELEN/8-1:0] BlankByteMask; + assign BlankByteMask[WORDLEN/8-1:0] = ByteMask; + assign BlankByteMask[LINELEN/8-1:WORDLEN/8] = '0; + + assign DemuxedByteMask = BlankByteMask << ((MUXINTERVAL/8) * WordOffsetAddr); + assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. // Merge write data into fetched cache line for store miss diff --git a/src/lsu/align.sv b/src/lsu/align.sv index a04aa386e..18b387e49 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -64,7 +64,7 @@ module align import cvw::*; #(parameter cvw_t P) ( //////////////////////////////////////////////////////////////////////////////////////////////////// localparam LLENINBYTES = P.LLEN/8; - logic [XLEN-1:0] IEUAdrIncrementM; + logic [P.XLEN-1:0] IEUAdrIncrementM; assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); From 560a843cea436685bd49d968480c6176eec29f2b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 14:00:49 -0500 Subject: [PATCH 09/32] Finally lints cleanly. --- src/lsu/align.sv | 17 +++++++++++++++++ src/lsu/lsu.sv | 9 +++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 18b387e49..ae6e3985b 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -41,6 +41,12 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic DTLBMissM, // ITLB miss, ignore memory request input logic DataUpdateDAM, // ITLB miss, ignore memory request + input logic [(P.LLEN-1)/8:0] ByteMaskM, + input logic [P.LLEN-1:0] LSUWriteDataM, + + output logic [(P.LLEN*2-1)/8:0] ByteMaskSpillM, + output logic [P.LLEN*2-1:0] LSUWriteDataSpillM, + output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline @@ -65,7 +71,9 @@ module align import cvw::*; #(parameter cvw_t P) ( localparam LLENINBYTES = P.LLEN/8; logic [P.XLEN-1:0] IEUAdrIncrementM; + /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); @@ -139,5 +147,14 @@ module align import cvw::*; #(parameter cvw_t P) ( // 8 * is for shifting by bytes not bits assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; + + // write path. Also has the 8:1 shifter muxing for the byteoffset + // then it also has the mux to select when a spill occurs + logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; + assign LSUWriteDataShiftedM = {{{P.LLEN}{1'b0}}, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); + mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {{{P.LLEN}{1'b0}}, LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); + logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; + assign ByteMaskShiftedM = {{{P.LLEN/8}{1'b0}}, ByteMaskM} << (MisalignedM ? ByteMaskM : '0); + mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskShiftedM[P.LLEN*2/8-1:P.LLEN/8]}, SelSpillM, ByteMaskSpillM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index ab0b36d7d..44fdffe58 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -119,6 +119,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data /* verilator lint_off WIDTHEXPAND */ logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data + logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] LSUWriteDataSpillM; // Final write data + logic [((MISALIGN_SUPPORT+1)*P.LLEN-1)/8:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data @@ -152,6 +154,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, + .ByteMaskM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -160,6 +163,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign IEUAdrExtE = {2'b00, IEUAdrE}; assign SelSpillE = '0; assign DCacheReadDataWordSpillM = DCacheReadDataWordM; + assign ByteMaskSpillM = ByteMaskM; + assign LSUWriteDataSpillM = LSUWriteDataM; end ///////////////////////////////////////////////////////////////////////////////////////////// @@ -292,8 +297,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), - .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), - .CacheWriteData(LSUWriteDataM), .SelHPTW, + .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), + .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), From f7b00c7af9f36b7c800fc5fcd602e62a9eae91a3 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 14:54:58 -0500 Subject: [PATCH 10/32] Aligner is integrated and enabled in rv64gc and passes the regression test; however, there are no new tests. --- src/lsu/align.sv | 14 ++++++++------ src/lsu/lsu.sv | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index ae6e3985b..48cf2f035 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -36,6 +36,8 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation + input logic [1:0] MemRWM, + input logic CacheableM, input logic [P.LLEN*2-1:0]DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request @@ -56,7 +58,7 @@ module align import cvw::*; #(parameter cvw_t P) ( typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; statetype CurrState, NextState; - logic TakeSpillM, TakeSpillE; + logic TakeSpillM; logic SpillM; logic SelSpillM; logic SpillSaveM; @@ -75,7 +77,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); - mux2 #(P.XLEN) ieuadrspillmmux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); + mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); //////////////////////////////////////////////////////////////////////////////////////////////////// // Detect spill @@ -94,9 +96,9 @@ module align import cvw::*; #(parameter cvw_t P) ( if(P.LLEN == 64) begin logic DoubleSpillM; assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; - assign SpillM = HalfSpillM | WordSpillM | DoubleSpillM; + assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin - assign SpillM = HalfSpillM | WordSpillM; + assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM); end // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits @@ -151,10 +153,10 @@ module align import cvw::*; #(parameter cvw_t P) ( // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; - assign LSUWriteDataShiftedM = {{{P.LLEN}{1'b0}}, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); + assign LSUWriteDataShiftedM = {LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {{{P.LLEN}{1'b0}}, LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; - assign ByteMaskShiftedM = {{{P.LLEN/8}{1'b0}}, ByteMaskM} << (MisalignedM ? ByteMaskM : '0); + assign ByteMaskShiftedM = {{{P.LLEN/8}{1'b0}}, ByteMaskM} << (MisalignedM ? ByteMaskM : '0); // *** merge with subword byte mask mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskShiftedM[P.LLEN*2/8-1:P.LLEN/8]}, SelSpillM, ByteMaskSpillM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 44fdffe58..5b9533504 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -153,6 +153,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin : ziccslm_align logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, + .MemRWM, .CacheableM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, .ByteMaskM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); From 3824c3be8d4782627b24aebeffdd6bc67e827a15 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 15:30:09 -0500 Subject: [PATCH 11/32] rv32gc now also works with the alignment module. Still not tested with misligned access. --- src/lsu/align.sv | 6 +++--- src/lsu/lsu.sv | 2 +- src/lsu/swbytemask.sv | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 48cf2f035..18dd6b2ff 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -87,10 +87,10 @@ module align import cvw::*; #(parameter cvw_t P) ( // 1) operation size // 2) offset // 3) access location within the cacheline - logic [P.DCACHE_LINELENINBITS/8-1:P.LLEN/8] WordOffsetM; - logic [P.LLEN/8-1:0] ByteOffsetM; + logic [$clog2(P.DCACHE_LINELENINBITS/8)-1:$clog2(LLENINBYTES)] WordOffsetM; + logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; - assign {WordOffsetM, ByteOffsetM} = IEUAdrM[P.DCACHE_LINELENINBITS/8-1:0]; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[$clog2(P.DCACHE_LINELENINBITS/8)-1:0]; assign HalfSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; assign WordSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; if(P.LLEN == 64) begin diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 5b9533504..6cb123be8 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -151,7 +151,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( flopenrc #(P.XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); if(MISALIGN_SUPPORT) begin : ziccslm_align - logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; + logic [P.XLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, .CacheableM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, diff --git a/src/lsu/swbytemask.sv b/src/lsu/swbytemask.sv index ad20a4414..9313456f3 100644 --- a/src/lsu/swbytemask.sv +++ b/src/lsu/swbytemask.sv @@ -33,7 +33,7 @@ module swbytemask #(parameter WORDLEN)( output logic [WORDLEN/8-1:0] ByteMask ); - assign ByteMask = ((2**(2**Size))-1) << Adr; + assign ByteMask = ((2**(2**Size))-1) << Adr; // merge with align. /* Equivalent to the following From 2f5deff7bc80bf05302bb1ccf1984fa0ebc74980 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 15:47:46 -0500 Subject: [PATCH 12/32] Preemptively fixed the bytemask bug before testing. --- src/ebu/ahbcacheinterface.sv | 2 +- src/lsu/align.sv | 5 +++-- src/lsu/lsu.sv | 9 +++++---- src/lsu/swbytemask.sv | 16 ++++++++++++---- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/ebu/ahbcacheinterface.sv b/src/ebu/ahbcacheinterface.sv index 9c2ff3a89..054022106 100644 --- a/src/ebu/ahbcacheinterface.sv +++ b/src/ebu/ahbcacheinterface.sv @@ -113,7 +113,7 @@ module ahbcacheinterface #( // *** bummer need a second byte mask for bus as it is AHBW rather than LLEN. // probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0. - swbytemask #(AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(AHBW/8)-1:0]), .ByteMask(BusByteMaskM)); + swbytemask #(AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(AHBW/8)-1:0]), .ByteMask(BusByteMaskM), .ByteMaskExtended()); flopen #(AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[AHBW/8-1:0], HWSTRB); diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 18dd6b2ff..b517dfcdb 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -44,6 +44,7 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic DataUpdateDAM, // ITLB miss, ignore memory request input logic [(P.LLEN-1)/8:0] ByteMaskM, + input logic [(P.LLEN-1)/8:0] ByteMaskExtendedM, input logic [P.LLEN-1:0] LSUWriteDataM, output logic [(P.LLEN*2-1)/8:0] ByteMaskSpillM, @@ -156,7 +157,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign LSUWriteDataShiftedM = {LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {{{P.LLEN}{1'b0}}, LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; - assign ByteMaskShiftedM = {{{P.LLEN/8}{1'b0}}, ByteMaskM} << (MisalignedM ? ByteMaskM : '0); // *** merge with subword byte mask - mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskShiftedM[P.LLEN*2/8-1:P.LLEN/8]}, SelSpillM, ByteMaskSpillM); + assign ByteMaskShiftedM = {ByteMaskExtendedM, ByteMaskM}; + mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, SelSpillM, ByteMaskSpillM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 6cb123be8..ef9edb72b 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -134,6 +134,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data logic [P.LLEN-1:0] LSUWriteDataM; // Final write data logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write + logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB @@ -155,7 +156,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, .CacheableM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, - .ByteMaskM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -261,7 +262,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Add support for cboz dtim #(P) dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), - .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM[P.LLEN/8-1:0])); + .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM)); end else begin end if (P.BUS_SUPPORTED) begin : bus @@ -337,7 +338,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ahbinterface #(P.XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), - .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM[P.XLEN/8-1:0]), .WriteData(LSUWriteDataM[P.XLEN-1:0]), + .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM[P.XLEN-1:0]), .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); // Mux between the 2 sources of read data, 0: Bus, 1: DTIM @@ -379,7 +380,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks - swbytemask #(P.LLEN) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM)); + swbytemask #(P.LLEN, P.ZICCLSM_SUPPORTED) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM), .ByteMaskExtended(ByteMaskExtendedM)); ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register diff --git a/src/lsu/swbytemask.sv b/src/lsu/swbytemask.sv index 9313456f3..e0981e934 100644 --- a/src/lsu/swbytemask.sv +++ b/src/lsu/swbytemask.sv @@ -27,13 +27,21 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module swbytemask #(parameter WORDLEN)( +module swbytemask #(parameter WORDLEN, EXTEND = 0)( input logic [2:0] Size, input logic [$clog2(WORDLEN/8)-1:0] Adr, - output logic [WORDLEN/8-1:0] ByteMask + output logic [WORDLEN/8-1:0] ByteMask, + output logic [WORDLEN/8-1:0] ByteMaskExtended ); - - assign ByteMask = ((2**(2**Size))-1) << Adr; // merge with align. + if(EXTEND) begin + logic [WORDLEN*2/8-1:0] ExtendedByteMask; + assign ExtendedByteMask = ((2**(2**Size))-1) << Adr; + assign ByteMask = ExtendedByteMask[WORDLEN/8-1:0]; + assign ByteMaskExtended = ExtendedByteMask[WORDLEN*2/8-1:WORDLEN/8]; + end else begin + assign ByteMask = ((2**(2**Size))-1) << Adr; + assign ByteMaskExtended = '0; + end /* Equivalent to the following From 7e8d132eadf3fccb43354971d783d111432b56f4 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 18:26:11 -0500 Subject: [PATCH 13/32] Updated mmu to not generate trap on cacheable misaligned access when supported. Updated tests with David's help. --- src/mmu/mmu.sv | 4 +- .../riscv-test-suite/rv64i_m/I/Makefrag | 6 +- .../rv64i_m/privilege/Makefrag | 1 + ...ALLY-misaligned-access-01.reference_output | 24 +++ .../references/WALLY-trap-01.reference_output | 9 +- .../WALLY-trap-s-01.reference_output | 8 +- .../WALLY-trap-u-01.reference_output | 8 +- .../rv64i_m/privilege/src/WALLY-TEST-LIB-64.h | 6 +- .../src/WALLY-misaligned-access-01.S | 139 ++++++++++++++++++ 9 files changed, 186 insertions(+), 19 deletions(-) create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 32fed853d..a497b6da7 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -138,8 +138,8 @@ module mmu import cvw::*; #(parameter cvw_t P, 2'b10: DataMisalignedM = VAdr[1] | VAdr[0]; // lw, sw, flw, fsw, lwu 2'b11: DataMisalignedM = |VAdr[2:0]; // ld, sd, fld, fsd endcase - assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM; - assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM; + assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); + assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); // Specify which type of page fault is occurring assign InstrPageFaultF = TLBPageFault & ExecuteAccessF; diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/Makefrag index 5758ecc33..19bb5bd01 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/Makefrag @@ -28,11 +28,11 @@ # Description: Makefrag for RV64I architectural tests rv64i_sc_tests = \ - WALLY-ADD \ + WALLY-ADD \ WALLY-SUB \ WALLY-SLT \ - WALLY-SLTU \ - WALLY-XOR + WALLY-SLTU \ + WALLY-XOR \ rv64i_tests = $(addsuffix .elf, $(rv64i_sc_tests)) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag index bd522e9a4..36f3e8075 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag @@ -57,6 +57,7 @@ target_tests_nosim = \ WALLY-wfi-01 \ WALLY-cbom-01 \ WALLY-cboz-01 \ + WALLY-misaligned-access-01 \ # unclear why status-fp-enabled and wfi aren't simulating ok diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output new file mode 100644 index 000000000..7e1ab4344 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -0,0 +1,24 @@ +00000000 +00000000 +00000001 +00000000 +ffffffff +ffffffff +00000001 +00000000 +00000002 +00000000 +00000000 +00000000 +ffffffff +ffffffff +00000000 +00000000 +fffffffe +ffffffff +393cb5d1 +72ca6f49 +7b12609b +245889d8 +7f42ac28 +af17a2d3 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output index 5c9b816fb..d613b4996 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output @@ -1,3 +1,4 @@ + FFFFFFFF # stimecmp low bits 00000000 # stimecmp high bits 00000000 # menvcfg low bits @@ -24,7 +25,7 @@ FFFFFFFF # stimecmp low bits 00000000 00000004 # mcause from load address misaligned 00000000 -80000411 # mtval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 @@ -36,7 +37,7 @@ FFFFFFFF # stimecmp low bits 00000000 00000006 # mcause from store misaligned 00000000 -80000429 # mtval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 @@ -136,7 +137,7 @@ FFFFFFFF # stimecmp low bits 00000000 00000004 # mcause from load address misaligned 00000000 -80000411 # mtval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 @@ -148,7 +149,7 @@ FFFFFFFF # stimecmp low bits 00000000 00000006 # mcause from store misaligned 00000000 -80000429 # mtval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output index 9f3ddc647..6aef0eb5d 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output @@ -26,7 +26,7 @@ 00000000 00000004 # scause from load address misaligned 00000000 -80000411 # stval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -38,7 +38,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000429 # stval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -128,7 +128,7 @@ 00000000 00000004 # scause from load address misaligned 00000000 -80000411 # stval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 @@ -140,7 +140,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000429 # stval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output index 36f08113a..1eea9f389 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output @@ -26,7 +26,7 @@ 00000000 00000004 # scause from load address misaligned 00000000 -80000411 # stval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -38,7 +38,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000429 # stval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -122,7 +122,7 @@ 00000000 00000004 # scause from load address misaligned 00000000 -80000411 # stval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 @@ -134,7 +134,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000429 # stval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h index 23f105cbc..07a31d7d5 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h @@ -98,7 +98,8 @@ cause_breakpnt: ret cause_load_addr_misaligned: - auipc t3, 0 // get current PC, which is aligned + li t3, 0x02000000 // base address of clint, because with zicclsm misaligned cached access won't trap + //auipc t3, 0 // get current PC, which is aligned addi t3, t3, 1 lw t4, 0(t3) // load from a misaligned address ret @@ -108,7 +109,8 @@ cause_load_acc: ret cause_store_addr_misaligned: - auipc t3, 0 // get current PC, which is aligned + li t3, 0x02000000 // base address of clint, because with zicclsm misaligned cached access won't trap + //auipc t3, 0 // get current PC, which is aligned addi t3, t3, 1 sw t4, 0(t3) // store to a misaligned address ret diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S new file mode 100644 index 000000000..792acc715 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -0,0 +1,139 @@ +/////////////////////////////////////////// +// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S +// David_Harris@hmc.edu & Katherine Parry +// Created 2022-06-17 22:58:09.916813// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV64I") + +.section .text.init +.globl rvtest_entry_point +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +RVTEST_SIGBASE( x6, wally_signature) + +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",ld) + +# Testcase 0: rs1:x18(0x0000000000000000), rs2:x9(0x0000000000000000), result rd:x5(0x0000000000000000) +li x18, MASK_XLEN(0x0000000000000000) +li x9, MASK_XLEN(0x0000000000000000) +SLT x5, x18, x9 +sd x5, 0(x6) + +# Testcase 1: rs1:x8(0x0000000000000000), rs2:x25(0x0000000000000001), result rd:x31(0x0000000000000001) +li x8, MASK_XLEN(0x0000000000000000) +li x25, MASK_XLEN(0x0000000000000001) +SLT x31, x8, x25 +sd x31, 8(x6) + +# Testcase 2: rs1:x16(0x0000000000000000), rs2:x12(0xffffffffffffffff), result rd:x20(0x0000000000000000) +li x16, MASK_XLEN(0x0000000000000000) +li x12, MASK_XLEN(0xffffffffffffffff) +SLT x20, x16, x12 +sd x20, 16(x6) + +# Testcase 3: rs1:x10(0x0000000000000001), rs2:x22(0x0000000000000000), result rd:x12(0x0000000000000000) +li x10, MASK_XLEN(0x0000000000000001) +li x22, MASK_XLEN(0x0000000000000000) +SLT x12, x10, x22 +sd x12, 24(x6) + +# Testcase 4: rs1:x19(0x0000000000000001), rs2:x31(0x0000000000000001), result rd:x29(0x0000000000000000) +li x19, MASK_XLEN(0x0000000000000001) +li x31, MASK_XLEN(0x0000000000000001) +SLT x29, x19, x31 +sd x29, 32(x6) + +# Testcase 5: rs1:x21(0x0000000000000001), rs2:x28(0xffffffffffffffff), result rd:x20(0x0000000000000000) +li x21, MASK_XLEN(0x0000000000000001) +li x28, MASK_XLEN(0xffffffffffffffff) +SLT x20, x21, x28 +sd x20, 40(x6) + +# Testcase 6: rs1:x5(0xffffffffffffffff), rs2:x23(0x0000000000000000), result rd:x10(0x0000000000000001) +li x5, MASK_XLEN(0xffffffffffffffff) +li x23, MASK_XLEN(0x0000000000000000) +SLT x10, x5, x23 +sd x10, 48(x6) + +# Testcase 7: rs1:x13(0xffffffffffffffff), rs2:x24(0x0000000000000001), result rd:x14(0x0000000000000001) +li x13, MASK_XLEN(0xffffffffffffffff) +li x24, MASK_XLEN(0x0000000000000001) +SLT x14, x13, x24 +sd x14, 56(x6) + +# Testcase 8: rs1:x27(0xffffffffffffffff), rs2:x21(0xffffffffffffffff), result rd:x3(0x0000000000000000) +li x27, MASK_XLEN(0xffffffffffffffff) +li x21, MASK_XLEN(0xffffffffffffffff) +SLT x3, x27, x21 +sd x3, 64(x6) + +# Testcase 9: rs1:x8(0x983631890063e42f), rs2:x21(0xb2d650af313b32b7), result rd:x15(0x0000000000000001) +li x8, MASK_XLEN(0x983631890063e42f) +li x21, MASK_XLEN(0xb2d650af313b32b7) +SLT x15, x8, x21 +sd x15, 72(x6) + +# Testcase 10: rs1:x19(0xb5d97ef760ef1471), rs2:x28(0xac7c8803e01bbf50), result rd:x14(0x0000000000000000) +li x19, MASK_XLEN(0xb5d97ef760ef1471) +li x28, MASK_XLEN(0xac7c8803e01bbf50) +SLT x14, x19, x28 +sd x14, 80(x6) + +# Testcase 11: rs1:x19(0x66faf98908135d58), rs2:x14(0xb3ab1b2cdf26f517), result rd:x25(0x0000000000000000) +li x19, MASK_XLEN(0x66faf98908135d58) +li x14, MASK_XLEN(0xb3ab1b2cdf26f517) +SLT x25, x19, x14 +sd x25, 88(x6) + +.EQU NUMTESTS,12 + +RVTEST_CODE_END +RVMODEL_HALT + +RVTEST_DATA_BEGIN +.align 4 +rvtest_data: +.word 0x98765432 +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + + +wally_signature: + .fill NUMTESTS*(XLEN/32),4,0xdeadbeef + +#ifdef rvtest_mtrap_routine + +mtrap_sigptr: + .fill 64*(XLEN/32),4,0xdeadbeef + +#endif + +#ifdef rvtest_gpr_save + +gpr_save: + .fill 32*(XLEN/32),4,0xdeadbeef + +#endif + +RVMODEL_DATA_END +// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S +// David_Harris@hmc.edu & Katherine Parry From 6223e8382d0357236465805e13470f2eac3e4ac4 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 31 Oct 2023 12:30:10 -0500 Subject: [PATCH 14/32] First stab at the misaligned test. --- ...ALLY-misaligned-access-01.reference_output | 551 ++++++++++++- .../src/WALLY-misaligned-access-01.S | 750 ++++++++++++++++-- 2 files changed, 1205 insertions(+), 96 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 7e1ab4344..b0078f9ac 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -1,24 +1,537 @@ -00000000 -00000000 -00000001 -00000000 -ffffffff -ffffffff -00000001 -00000000 -00000002 +03020100 # ByteDstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +03020100 # Half0DstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +04030201 # Half1DstData +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +03020100 # Word0DstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +04030201 # Word1DstData +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +05040302 # Word2DstData +09080706 +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +06050403 # Word3DstData +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +03020100 # Double0DstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +04030201 # Double1DstData +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +05040302 # Double2DstData +09080706 +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +06050403 # Double3DstData +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +07060504 # Double4DestData +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +deadbeef +08070605 # Double5DestData +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +deadbeef +09080706 # Double6DstData +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +deadbeef +0a090807 # Double7DstData +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +deadbeef +00000000 #signature 00000000 00000000 00000000 -ffffffff -ffffffff +00000000 00000000 00000000 -fffffffe -ffffffff -393cb5d1 -72ca6f49 -7b12609b -245889d8 -7f42ac28 -af17a2d3 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 792acc715..76496ff47 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -27,113 +27,709 @@ rvtest_entry_point: RVMODEL_BOOT RVTEST_CODE_BEGIN -RVTEST_SIGBASE( x6, wally_signature) - RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",ld) -# Testcase 0: rs1:x18(0x0000000000000000), rs2:x9(0x0000000000000000), result rd:x5(0x0000000000000000) -li x18, MASK_XLEN(0x0000000000000000) -li x9, MASK_XLEN(0x0000000000000000) -SLT x5, x18, x9 -sd x5, 0(x6) + # This test checks the misaligned load and stores work correctly and across D$ line spills. + # The general approach is to + # 1. load a region of memory using load doubles equal to two cache lines. And copy to a new + # region but using stores of bytes, half, word, or doubles. Each are repeated for all possible + # misaligned access. Bytes are always aligned, halves are 0, and 1, words are 0, 1, 2, and 3, and + # doubles are 0 through 7. Then the new region is compared against the reference region. Because + # of the misalignment the last few bytes will not be written so they will be some portion of deadbeef. + # The comparison is done using using same abyte, half, word, and double misaligned approach. -# Testcase 1: rs1:x8(0x0000000000000000), rs2:x25(0x0000000000000001), result rd:x31(0x0000000000000001) -li x8, MASK_XLEN(0x0000000000000000) -li x25, MASK_XLEN(0x0000000000000001) -SLT x31, x8, x25 -sd x31, 8(x6) + la a3, signature # does not get overwritten by any functions -# Testcase 2: rs1:x16(0x0000000000000000), rs2:x12(0xffffffffffffffff), result rd:x20(0x0000000000000000) -li x16, MASK_XLEN(0x0000000000000000) -li x12, MASK_XLEN(0xffffffffffffffff) -SLT x20, x16, x12 -sd x20, 16(x6) + # byte copy region. always naturally aligned + la a0, SourceData + la a1, ByteDstData + li a2, 16 + jal ra, memcpy8_1 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, ByteDstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData + la a1, Half0DstData + li a2, 16 + jal ra, memcpy8_2 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, Half0DstData + li a2, 16 + jal ra, CheckAllWriteSignature -# Testcase 3: rs1:x10(0x0000000000000001), rs2:x22(0x0000000000000000), result rd:x12(0x0000000000000000) -li x10, MASK_XLEN(0x0000000000000001) -li x22, MASK_XLEN(0x0000000000000000) -SLT x12, x10, x22 -sd x12, 24(x6) + la a0, SourceData+1 + la a1, Half1DstData + li a2, 16 + jal ra, memcpy8_2 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+1 + la a1, Half1DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData + la a1, Word0DstData + li a2, 16 + jal ra, memcpy8_4 -# Testcase 4: rs1:x19(0x0000000000000001), rs2:x31(0x0000000000000001), result rd:x29(0x0000000000000000) -li x19, MASK_XLEN(0x0000000000000001) -li x31, MASK_XLEN(0x0000000000000001) -SLT x29, x19, x31 -sd x29, 32(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, Word0DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+1 + la a1, Word1DstData + li a2, 16 + jal ra, memcpy8_4 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+1 + la a1, Word1DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+2 + la a1, Word2DstData + li a2, 16 + jal ra, memcpy8_4 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+2 + la a1, Word2DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+3 + la a1, Word3DstData + li a2, 16 + jal ra, memcpy8_4 -# Testcase 5: rs1:x21(0x0000000000000001), rs2:x28(0xffffffffffffffff), result rd:x20(0x0000000000000000) -li x21, MASK_XLEN(0x0000000000000001) -li x28, MASK_XLEN(0xffffffffffffffff) -SLT x20, x21, x28 -sd x20, 40(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+3 + la a1, Word3DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData + la a1, Double0DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, Double0DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+1 + la a1, Double1DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+1 + la a1, Double1DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+2 + la a1, Double2DstData + li a2, 16 + jal ra, memcpy8_8 -# Testcase 6: rs1:x5(0xffffffffffffffff), rs2:x23(0x0000000000000000), result rd:x10(0x0000000000000001) -li x5, MASK_XLEN(0xffffffffffffffff) -li x23, MASK_XLEN(0x0000000000000000) -SLT x10, x5, x23 -sd x10, 48(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+2 + la a1, Double2DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+3 + la a1, Double3DstData + li a2, 16 + jal ra, memcpy8_8 -# Testcase 7: rs1:x13(0xffffffffffffffff), rs2:x24(0x0000000000000001), result rd:x14(0x0000000000000001) -li x13, MASK_XLEN(0xffffffffffffffff) -li x24, MASK_XLEN(0x0000000000000001) -SLT x14, x13, x24 -sd x14, 56(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+3 + la a1, Double3DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+4 + la a1, Double4DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+4 + la a1, Double4DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+5 + la a1, Double5DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+5 + la a1, Double5DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+6 + la a1, Double6DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+6 + la a1, Double6DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+7 + la a1, Double7DstData + li a2, 16 + jal ra, memcpy8_8 -# Testcase 8: rs1:x27(0xffffffffffffffff), rs2:x21(0xffffffffffffffff), result rd:x3(0x0000000000000000) -li x27, MASK_XLEN(0xffffffffffffffff) -li x21, MASK_XLEN(0xffffffffffffffff) -SLT x3, x27, x21 -sd x3, 64(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+7 + la a1, Double7DstData + li a2, 16 + jal ra, CheckAllWriteSignature + +.type CheckAll, @function +# a0 is the SourceData, (golden), a1 is the data to be checked. +# a2 is the number of doubles +# a3 is the signature pointer +# returns a0 as 0 for no mismatch, 1 for mismatch, +# returns a3 as incremented signature pointer +CheckAllWriteSignature: + mv s0, a0 + mv s1, a1 + mv s2, a2 + mv s3, a3 + # there is no stack so I'm saving ra into s4 + mv s4, ra -# Testcase 9: rs1:x8(0x983631890063e42f), rs2:x21(0xb2d650af313b32b7), result rd:x15(0x0000000000000001) -li x8, MASK_XLEN(0x983631890063e42f) -li x21, MASK_XLEN(0xb2d650af313b32b7) -SLT x15, x8, x21 -sd x15, 72(x6) + # check values byte by byte + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 3 # * 8 + jal ra, memcmp1 + sb a0, 0(s3) + mv s4, a0 -# Testcase 10: rs1:x19(0xb5d97ef760ef1471), rs2:x28(0xac7c8803e01bbf50), result rd:x14(0x0000000000000000) -li x19, MASK_XLEN(0xb5d97ef760ef1471) -li x28, MASK_XLEN(0xac7c8803e01bbf50) -SLT x14, x19, x28 -sd x14, 80(x6) + # check values half by half + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 2 # * 4 + jal ra, memcmp2 + sb a0, 1(s3) + or s4, s4, a0 -# Testcase 11: rs1:x19(0x66faf98908135d58), rs2:x14(0xb3ab1b2cdf26f517), result rd:x25(0x0000000000000000) -li x19, MASK_XLEN(0x66faf98908135d58) -li x14, MASK_XLEN(0xb3ab1b2cdf26f517) -SLT x25, x19, x14 -sd x25, 88(x6) + # check values half by half + mv a0, s0 # SourceData + addi a1, s1, 1 # ie: ByteDstData+1 + srli a2, s2, 2 # * 4 -1 + subi a2, a2, 1 + jal ra, memcmp2 + sb a0, 2(s3) + or s4, s4, a0 + + # check values word by word + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 1 # * 2 + jal ra, memcmp4 + sb a0, 3(s3) + or s4, s4, a0 -.EQU NUMTESTS,12 + # check values word by word + mv a0, s0 # SourceData + addi a1, s1, 1 # ie: ByteDstData+1 + srli a2, s2, 1 # * 2 -1 + subi a2, a2, 1 + jal ra, memcmp4 + sb a0, 4(s3) + or s4, s4, a0 + # check values word by word + mv a0, s0 # SourceData + addi a1, s1, 2 # ie: ByteDstData+2 + srli a2, s2, 1 # * 2 -1 + subi a2, a2, 1 + jal ra, memcmp4 + sb a0, 5(s3) + or s4, s4, a0 + + # check values word by word + mv a0, s0 # SourceData + addi a1, s1, 3 # ie: ByteDstData+3 + srli a2, s2, 1 # * 2 -1 + subi a2, a2, 1 + jal ra, memcmp4 + sb a0, 6(s3) + or s4, s4, a0 + + # check values double by double + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 0 # * 1 + jal ra, memcmp8 + sb a0, 7(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 1 # ie: ByteDstData+1 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 8(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 2 # ie: ByteDstData+2 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 9(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 3 # ie: ByteDstData+3 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 10(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 4 # ie: ByteDstData+4 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 11(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 5 # ie: ByteDstData+5 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 12(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 6 # ie: ByteDstData+6 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 13(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 7 # ie: ByteDstData+7 + srli a2, s2, 0 # * 1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 14(s3) + + addi s3, s3, 15 + mv a3, s3 + or a0, s4, a0 + mv ra, s4 + ret ra + + +.type memcmp1, @function +# returns which index mismatch, -1 if none +memcmp1: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 1 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp1_loop: + lbu t3, 0(t0) + lbu t4, 0(t1) + bne t3, t4, memcmp1_ne + addi t0, t0, 1 + addi t1, t1, 1 + addi t2, t2, 1 + blt t2, a2, memcmp1_loop + li a0, -1 + ret +memcmp1_ne: + mv a0, t2 + ret + +.type memcmp2, @function +# returns which index mismatch, -1 if none +memcmp2: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 2 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp2_loop: + lhu t3, 0(t0) + lhu t4, 0(t1) + bne t3, t4, memcmp2_ne + addi t0, t0, 2 + addi t1, t1, 2 + addi t2, t2, 1 + blt t2, a2, memcmp2_loop + li a0, -1 + ret +memcmp2_ne: + mv a0, t2 + ret + +.type memcmp4, @function +# returns which index mismatch, -1 if none +memcmp4: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp4_loop: + lwu t3, 0(t0) + lwu t4, 0(t1) + bne t3, t4, memcmp4_ne + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcmp4_loop + li a0, -1 + ret +memcmp4_ne: + mv a0, t2 + ret + +.type memcmp8, @function +# returns which index mismatch, -1 if none +memcmp8: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp8_loop: + ld t3, 0(t0) + ld t4, 0(t1) + bne t3, t4, memcmp8_ne + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcmp8_loop + li a0, -1 + ret +memcmp8_ne: + mv a0, t2 + ret + + RVTEST_CODE_END RVMODEL_HALT +.type memcpy8_1, @function +# load 8 bytes using load double then store using 8 sb +memcpy8_1: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy8_1_loop: + ld t3, 0(t0) + andi t4, t3, 0xff + sb t4, 0(t1) + slli t4, t3, 8 + andi t4, t4, 0xff + sb t4, 1(t1) + + slli t4, t3, 16 + andi t4, t4, 0xff + sb t4, 2(t1) + + slli t4, t3, 24 + andi t4, t4, 0xff + sb t4, 3(t1) + + slli t4, t3, 32 + andi t4, t4, 0xff + sb t4, 4(t1) + + slli t4, t3, 40 + andi t4, t4, 0xff + sb t4, 5(t1) + + slli t4, t3, 48 + andi t4, t4, 0xff + sb t4, 6(t1) + + slli t4, t3, 56 + andi t4, t4, 0xff + sb t4, 7(t1) + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_1_loop + ret + +.type memcpy8_2, @function +# load 8 bytes using load double then store using 4 sh +memcpy8_2: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 + + # 16 bit mask + lui t4, 0xf + ori t4, t4, 0xfff + +memcpy8_2_loop: + ld t3, 0(t0) + and t4, t4, t3 + sh t4, 0(t1) + + slli t4, t3, 16 + and t4, t4, t3 + sh t4, 2(t1) + + slli t4, t3, 32 + and t4, t4, t3 + sh t4, 4(t1) + + slli t4, t3, 48 + and t4, t4, t3 + sh t4, 6(t1) + + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_2_loop + ret + +.type memcpy8_4, @function +# load 8 bytes using load double then store using 2 sw +memcpy8_4: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 + + # 32 bit mask + lui t4, 0xffff + ori t4, t4, 0xfff + +memcpy8_4_loop: + ld t3, 0(t0) + and t4, t4, t3 + sw t4, 0(t1) + + slli t4, t3, 32 + and t4, t4, t3 + sw t4, 4(t1) + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_4_loop + ret + +.type memcpy8_8, @function +# load 8 bytes using load double then store using 1 sd +memcpy8_8: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 + +memcpy8_8_loop: + ld t3, 0(t0) + sd t4, 0(t1) + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_8_loop + ret + + RVTEST_DATA_BEGIN -.align 4 +.align 3 rvtest_data: -.word 0x98765432 +SourceData: +.8byte 0x0706050403020100, 0x0f0e0d0c0b0a0908, 0x1716151413021110, 0x1f1e1d1c1b1a1918 +.8byte 0x2726252423222120, 0x2f2e2d2c2b2a2928, 0x3736353433023130, 0x3f3e3d3c3b3a3938 +.8byte 0x4746454443424140, 0x4f4e4d4c4b4a4948, 0x5756555453025150, 0x5f5e5d5c5b5a5958 +.8byte 0x6766656463626160, 0x6f6e6d6c6b6a6968, 0x7776757473027170, 0x7f7e7d7c7b7a7978 +.8byte 0xdeadbeefdeadbeef + +Response1ByteOffsetData: +.8byte 0x0807060504030201, 0x100f0e0d0c0b0a09, 0x1817161514130211, 0x201f1e1d1c1b1a19 +.8byte 0x2827262524232221, 0x302f2e2d2c2b2a29, 0x3837363534330231, 0x403f3e3d3c3b3a39 +.8byte 0x4847464544434241, 0x504f4e4d4c4b4a49, 0x5857565554530251, 0x605f5e5d5c5b5a59 +.8byte 0x6867666564636261, 0x706f6e6d6c6b6a69, 0x7877767574730271, 0xde7f7e7d7c7b7a79 + +Response2ByteOffsetData: +.8byte 0x0908070605040302, 0x11100f0e0d0c0b0a, 0x1918171615141302, 0x21201f1e1d1c1b1a +.8byte 0x2928272625242322, 0x31302f2e2d2c2b2a, 0x3938373635343302, 0x41403f3e3d3c3b3a +.8byte 0x4948474645444342, 0x51504f4e4d4c4b4a, 0x5958575655545302, 0x61605f5e5d5c5b5a +.8byte 0x6968676665646362, 0x71706f6e6d6c6b6a, 0x7978777675747302, 0xdead7f7e7d7c7b7a + +Response3ByteOffsetData: +.8byte 0x0a09080706050403, 0x0211100f0e0d0c0b, 0x1a19181716151413, 0x2221201f1e1d1c1b +.8byte 0x2a29282726252423, 0x0231302f2e2d2c2b, 0x3a39383736353433, 0x4241403f3e3d3c3b +.8byte 0x4a49484746454443, 0x0251504f4e4d4c4b, 0x5a59585756555453, 0x6261605f5e5d5c5b +.8byte 0x6a69686766656463, 0x0271706f6e6d6c6b, 0x7a79787776757473, 0xdeadbe7f7e7d7c7b + +Response4ByteOffsetData: +.8byte 0x0b0a090807060504, 0x130211100f0e0d0c, 0x1b1a191817161514, 0x232221201f1e1d1c +.8byte 0x2b2a292827262524, 0x330231302f2e2d2c, 0x3b3a393837363534, 0x434241403f3e3d3c +.8byte 0x4b4a494847464544, 0x530251504f4e4d4c, 0x5b5a595857565554, 0x636261605f5e5d5c +.8byte 0x6b6a696867666564, 0x730271706f6e6d6c, 0x7b7a797877767574, 0xdeadbeef7f7e7d7c + +Response5ByteOffsetData: +.8byte 0x0c0b0a0908070605, 0x14130211100f0e0d, 0x1c1b1a1918171615, 0x24232221201f1e1d +.8byte 0x2c2b2a2928272625, 0x34330231302f2e2d, 0x3c3b3a3938373635, 0x44434241403f3e3d +.8byte 0x4c4b4a4948474645, 0x54530251504f4e4d, 0x5c5b5a5958575655, 0x64636261605f5e5d +.8byte 0x6c6b6a6968676665, 0x74730271706f6e6d, 0x7c7b7a7978777675, 0xdeadbeefde7f7e7d + +Response6ByteOffsetData: +.8byte 0x0d0c0b0a09080706, 0x1514130211100f0e, 0x1d1c1b1a19181716, 0x2524232221201f1e +.8byte 0x2d2c2b2a29282726, 0x3534330231302f2e, 0x3d3c3b3a39383736, 0x4544434241403f3e +.8byte 0x4d4c4b4a49484746, 0x5554530251504f4e, 0x5d5c5b5a59585756, 0x6564636261605f5e +.8byte 0x6d6c6b6a69686766, 0x7574730271706f6e, 0x7d7c7b7a79787776, 0xdeadbeefdead7f7e + +Response7ByteOffsetData: +.8byte 0x0e0d0c0b0a090807, 0x161514130211100f, 0x1e1d1c1b1a191817, 0x262524232221201f +.8byte 0x2e2d2c2b2a292827, 0x363534330231302f, 0x3e3d3c3b3a393837, 0x464544434241403f +.8byte 0x4e4d4c4b4a494847, 0x565554530251504f, 0x5e5d5c5b5a595857, 0x666564636261605f +.8byte 0x6e6d6c6b6a696867, 0x767574730271706f, 0x7e7d7c7b7a797877, 0xdeadbeefdeadbe7f + RVTEST_DATA_END RVMODEL_DATA_BEGIN +ByteDstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -wally_signature: - .fill NUMTESTS*(XLEN/32),4,0xdeadbeef +Half0DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Half1DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -#ifdef rvtest_mtrap_routine +Word0DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -mtrap_sigptr: - .fill 64*(XLEN/32),4,0xdeadbeef +Word1DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -#endif +Word2DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Word3DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double0DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -#ifdef rvtest_gpr_save - -gpr_save: - .fill 32*(XLEN/32),4,0xdeadbeef - -#endif +Double1DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double2DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double3DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double4DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double5DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double6DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double7DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +signature: + .fill 225, 1, 0xff + RVMODEL_DATA_END // ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S // David_Harris@hmc.edu & Katherine Parry From 0dd516e90fcedd6db57db9d5cad1e717d9d1c86e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 31 Oct 2023 12:49:35 -0500 Subject: [PATCH 15/32] Fixed bugs in misaligned test. --- .../src/WALLY-misaligned-access-01.S | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 76496ff47..325238270 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -240,7 +240,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 1 # ie: ByteDstData+1 srli a2, s2, 2 # * 4 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp2 sb a0, 2(s3) or s4, s4, a0 @@ -257,7 +257,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 1 # ie: ByteDstData+1 srli a2, s2, 1 # * 2 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp4 sb a0, 4(s3) or s4, s4, a0 @@ -266,7 +266,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 2 # ie: ByteDstData+2 srli a2, s2, 1 # * 2 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp4 sb a0, 5(s3) or s4, s4, a0 @@ -275,7 +275,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 3 # ie: ByteDstData+3 srli a2, s2, 1 # * 2 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp4 sb a0, 6(s3) or s4, s4, a0 @@ -291,7 +291,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 1 # ie: ByteDstData+1 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 8(s3) @@ -299,7 +299,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 2 # ie: ByteDstData+2 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 9(s3) @@ -307,7 +307,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 3 # ie: ByteDstData+3 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 10(s3) @@ -315,7 +315,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 4 # ie: ByteDstData+4 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 11(s3) @@ -323,7 +323,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 5 # ie: ByteDstData+5 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 12(s3) @@ -331,7 +331,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 6 # ie: ByteDstData+6 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 13(s3) @@ -339,7 +339,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 7 # ie: ByteDstData+7 srli a2, s2, 0 # * 1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 14(s3) @@ -347,7 +347,7 @@ CheckAllWriteSignature: mv a3, s3 or a0, s4, a0 mv ra, s4 - ret ra + ret .type memcmp1, @function @@ -505,7 +505,8 @@ memcpy8_2: # 16 bit mask lui t4, 0xf - ori t4, t4, 0xfff + li t3, 0xfff + or t4, t4, t3 memcpy8_2_loop: ld t3, 0(t0) @@ -543,7 +544,8 @@ memcpy8_4: # 32 bit mask lui t4, 0xffff - ori t4, t4, 0xfff + li t3, 0xfff + or t4, t4, t3 memcpy8_4_loop: ld t3, 0(t0) From 53bcb458449e3d7ecc028e1bc221e83de5c502a0 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 31 Oct 2023 14:50:33 -0500 Subject: [PATCH 16/32] Progress --- ...ALLY-misaligned-access-01.reference_output | 114 +++++++------- .../src/WALLY-misaligned-access-01.S | 140 ++++++++++-------- 2 files changed, 135 insertions(+), 119 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index b0078f9ac..9c1539122 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -478,60 +478,60 @@ deadbeef 7e7d7c7b deadbe7f deadbeef -00000000 #signature -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00 +0fffffff #signature +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ff diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 325238270..9ceff3694 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -40,6 +40,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ la a3, signature # does not get overwritten by any functions +TEST_BYTE: # byte copy region. always naturally aligned la a0, SourceData la a1, ByteDstData @@ -52,6 +53,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_HALF0: la a0, SourceData la a1, Half0DstData li a2, 16 @@ -63,6 +65,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_HALF1: la a0, SourceData+1 la a1, Half1DstData li a2, 16 @@ -74,6 +77,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_WORD0: la a0, SourceData la a1, Word0DstData li a2, 16 @@ -85,6 +89,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_WORD1: la a0, SourceData+1 la a1, Word1DstData li a2, 16 @@ -96,6 +101,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_WORD2: la a0, SourceData+2 la a1, Word2DstData li a2, 16 @@ -107,6 +113,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_WORD3: la a0, SourceData+3 la a1, Word3DstData li a2, 16 @@ -118,6 +125,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE0: la a0, SourceData la a1, Double0DstData li a2, 16 @@ -129,6 +137,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE1: la a0, SourceData+1 la a1, Double1DstData li a2, 16 @@ -140,6 +149,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE2: la a0, SourceData+2 la a1, Double2DstData li a2, 16 @@ -151,6 +161,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE3: la a0, SourceData+3 la a1, Double3DstData li a2, 16 @@ -162,6 +173,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE4: la a0, SourceData+4 la a1, Double4DstData li a2, 16 @@ -173,6 +185,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE5: la a0, SourceData+5 la a1, Double5DstData li a2, 16 @@ -184,6 +197,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE6: la a0, SourceData+6 la a1, Double6DstData li a2, 16 @@ -195,6 +209,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE7: la a0, SourceData+7 la a1, Double7DstData li a2, 16 @@ -206,6 +221,8 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +RVMODEL_HALT + .type CheckAll, @function # a0 is the SourceData, (golden), a1 is the data to be checked. # a2 is the number of doubles @@ -217,13 +234,13 @@ CheckAllWriteSignature: mv s1, a1 mv s2, a2 mv s3, a3 - # there is no stack so I'm saving ra into s4 - mv s4, ra + # there is no stack so I'm saving ra into s5 + mv s5, ra # check values byte by byte mv a0, s0 # SourceData mv a1, s1 # ie: ByteDstData - srli a2, s2, 3 # * 8 + slli a2, s2, 3 # * 8 jal ra, memcmp1 sb a0, 0(s3) mv s4, a0 @@ -231,50 +248,50 @@ CheckAllWriteSignature: # check values half by half mv a0, s0 # SourceData mv a1, s1 # ie: ByteDstData - srli a2, s2, 2 # * 4 + slli a2, s2, 2 # * 4 jal ra, memcmp2 sb a0, 1(s3) or s4, s4, a0 # check values half by half - mv a0, s0 # SourceData - addi a1, s1, 1 # ie: ByteDstData+1 - srli a2, s2, 2 # * 4 -1 + addi a0, s0, 1 # SourceData+1 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 2 # * 4 -1 addi a2, a2, -1 jal ra, memcmp2 sb a0, 2(s3) or s4, s4, a0 # check values word by word - mv a0, s0 # SourceData + addi a0, s0, 0 # SourceData mv a1, s1 # ie: ByteDstData - srli a2, s2, 1 # * 2 + slli a2, s2, 1 # * 2 jal ra, memcmp4 sb a0, 3(s3) or s4, s4, a0 # check values word by word - mv a0, s0 # SourceData - addi a1, s1, 1 # ie: ByteDstData+1 - srli a2, s2, 1 # * 2 -1 + addi a0, s0, 1 # SourceData+1 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 sb a0, 4(s3) or s4, s4, a0 # check values word by word - mv a0, s0 # SourceData - addi a1, s1, 2 # ie: ByteDstData+2 - srli a2, s2, 1 # * 2 -1 + addi a0, s0, 2 # SourceData+2 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 sb a0, 5(s3) or s4, s4, a0 # check values word by word - mv a0, s0 # SourceData - addi a1, s1, 3 # ie: ByteDstData+3 - srli a2, s2, 1 # * 2 -1 + addi a0, s0, 3 # SourceData+3 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 sb a0, 6(s3) @@ -283,62 +300,62 @@ CheckAllWriteSignature: # check values double by double mv a0, s0 # SourceData mv a1, s1 # ie: ByteDstData - srli a2, s2, 0 # * 1 + slli a2, s2, 0 # * 1 jal ra, memcmp8 sb a0, 7(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 1 # ie: ByteDstData+1 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 1 # SourceData+1 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 8(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 2 # ie: ByteDstData+2 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 2 # SourceData+2 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 9(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 3 # ie: ByteDstData+3 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 3 # SourceData+3 + addi a1, s1, 2 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 10(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 4 # ie: ByteDstData+4 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 4 # SourceData+4 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 11(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 5 # ie: ByteDstData+5 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 5 # SourceData+5 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 12(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 6 # ie: ByteDstData+6 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 6 # SourceData+6 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 13(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 7 # ie: ByteDstData+7 - srli a2, s2, 0 # * 1 + addi a0, s0, 7 # SourceData+7 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 14(s3) @@ -346,7 +363,7 @@ CheckAllWriteSignature: addi s3, s3, 15 mv a3, s3 or a0, s4, a0 - mv ra, s4 + mv ra, s5 ret @@ -444,7 +461,6 @@ memcmp8_ne: RVTEST_CODE_END -RVMODEL_HALT .type memcpy8_1, @function # load 8 bytes using load double then store using 8 sb @@ -459,31 +475,31 @@ memcpy8_1_loop: ld t3, 0(t0) andi t4, t3, 0xff sb t4, 0(t1) - slli t4, t3, 8 + srli t4, t3, 8 andi t4, t4, 0xff sb t4, 1(t1) - slli t4, t3, 16 + srli t4, t3, 16 andi t4, t4, 0xff sb t4, 2(t1) - slli t4, t3, 24 + srli t4, t3, 24 andi t4, t4, 0xff sb t4, 3(t1) - slli t4, t3, 32 + srli t4, t3, 32 andi t4, t4, 0xff sb t4, 4(t1) - slli t4, t3, 40 + srli t4, t3, 40 andi t4, t4, 0xff sb t4, 5(t1) - slli t4, t3, 48 + srli t4, t3, 48 andi t4, t4, 0xff sb t4, 6(t1) - slli t4, t3, 56 + srli t4, t3, 56 andi t4, t4, 0xff sb t4, 7(t1) @@ -506,23 +522,23 @@ memcpy8_2: # 16 bit mask lui t4, 0xf li t3, 0xfff - or t4, t4, t3 + or t5, t4, t3 memcpy8_2_loop: ld t3, 0(t0) - and t4, t4, t3 + and t4, t3, t5 sh t4, 0(t1) - slli t4, t3, 16 - and t4, t4, t3 + srli t4, t3, 16 + and t4, t4, t5 sh t4, 2(t1) - slli t4, t3, 32 - and t4, t4, t3 + srli t4, t3, 32 + and t4, t4, t5 sh t4, 4(t1) - slli t4, t3, 48 - and t4, t4, t3 + srli t4, t3, 48 + and t4, t4, t5 sh t4, 6(t1) @@ -545,15 +561,15 @@ memcpy8_4: # 32 bit mask lui t4, 0xffff li t3, 0xfff - or t4, t4, t3 + or t5, t4, t3 memcpy8_4_loop: ld t3, 0(t0) - and t4, t4, t3 + and t4, t3, t5 sw t4, 0(t1) - slli t4, t3, 32 - and t4, t4, t3 + srli t4, t3, 32 + and t4, t4, t5 sw t4, 4(t1) addi t0, t0, 8 @@ -730,7 +746,7 @@ Double7DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef signature: - .fill 225, 1, 0xff + .fill 225, 1, 0x00 RVMODEL_DATA_END // ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S From b5ecae205604c64cc5150f65a1fde9adec041ddf Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 31 Oct 2023 18:50:13 -0500 Subject: [PATCH 17/32] Working through issues with the psill logic. --- src/lsu/align.sv | 50 +++++--- src/lsu/lsu.sv | 2 +- ...ALLY-misaligned-access-01.reference_output | 65 ++++++----- .../src/WALLY-misaligned-access-01.S | 110 +++++++++--------- 4 files changed, 124 insertions(+), 103 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index b517dfcdb..3708674aa 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -68,14 +68,25 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; - //////////////////////////////////////////////////////////////////////////////////////////////////// - // PC logic - //////////////////////////////////////////////////////////////////////////////////////////////////// - localparam LLENINBYTES = P.LLEN/8; logic [P.XLEN-1:0] IEUAdrIncrementM; + logic [3:0] IncrementAmount; + + logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; + logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; + + always_comb begin + case(MemRWM) + 2'b00: IncrementAmount = 4'd0; + 2'b01: IncrementAmount = 4'd1; + 2'b10: IncrementAmount = 4'd3; + 2'b11: IncrementAmount = 4'd7; + default: IncrementAmount = 4'd7; + endcase + end /* verilator lint_off WIDTHEXPAND */ - assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + //assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + assign IEUAdrIncrementM = IEUAdrM + IncrementAmount; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); @@ -88,15 +99,16 @@ module align import cvw::*; #(parameter cvw_t P) ( // 1) operation size // 2) offset // 3) access location within the cacheline - logic [$clog2(P.DCACHE_LINELENINBITS/8)-1:$clog2(LLENINBYTES)] WordOffsetM; + localparam OFFSET_BIT_POS = $clog2(P.DCACHE_LINELENINBITS/8); + logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; - assign {WordOffsetM, ByteOffsetM} = IEUAdrM[$clog2(P.DCACHE_LINELENINBITS/8)-1:0]; - assign HalfSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; - assign WordSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; + assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:0] == '1) & Funct3M[1:0] == 2'b01; + assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & Funct3M[1:0] == 2'b10; if(P.LLEN == 64) begin logic DoubleSpillM; - assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; + assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & Funct3M[1:0] == 2'b11; assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM); @@ -154,10 +166,18 @@ module align import cvw::*; #(parameter cvw_t P) ( // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; - assign LSUWriteDataShiftedM = {LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); - mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {{{P.LLEN}{1'b0}}, LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); + logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. + + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); + assign LSUWriteDataShiftedM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; + assign LSUWriteDataSpillM = LSUWriteDataShiftedM; + //mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN], LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); + logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; - assign ByteMaskShiftedM = {ByteMaskExtendedM, ByteMaskM}; - mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, SelSpillM, ByteMaskSpillM); - + assign ByteMaskShiftedM = ByteMaskMuxM; + mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskM}, + {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, {SelSpillM, SelSpillE}, ByteMaskSpillM); + + flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SelSpillE, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); + mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index ef9edb72b..44689a1d1 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -297,7 +297,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( - .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), + .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 9c1539122..dd8a642fc 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -62,38 +62,39 @@ 77767574 7b7a7978 7f7e7d7c -04030201 # Half1DstData -08070605 -0c0b0a09 -100f0e0d -14130211 -18171615 -1c1b1a19 -201f1e1d -24232221 -28272625 -2c2b2a29 -302f2e2d -34330231 -38373635 -3c3b3a39 -403f3e3d -44434241 -48474645 -4c4b4a49 -504f4e4d -54530251 -58575655 -5c5b5a59 -605f5e5d -64636261 -68676665 -6c6b6a69 -706f6e6d -74730271 -78777675 -7c7b7a79 -de7f7e7d +020100ef # Half1DstData +06050403 +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +7fdeadbe 03020100 # Word0DstData 07060504 0b0a0908 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 9ceff3694..d6ae2603f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -66,14 +66,14 @@ TEST_HALF0: jal ra, CheckAllWriteSignature TEST_HALF1: - la a0, SourceData+1 - la a1, Half1DstData + la a0, SourceData + la a1, Half1DstData+1 li a2, 16 jal ra, memcpy8_2 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+1 - la a1, Half1DstData + la a0, SourceData + la a1, Half1DstData+1 li a2, 16 jal ra, CheckAllWriteSignature @@ -90,38 +90,38 @@ TEST_WORD0: jal ra, CheckAllWriteSignature TEST_WORD1: - la a0, SourceData+1 - la a1, Word1DstData + la a0, SourceData + la a1, Word1DstData+1 li a2, 16 jal ra, memcpy8_4 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+1 - la a1, Word1DstData + la a0, SourceData + la a1, Word1DstData+1 li a2, 16 jal ra, CheckAllWriteSignature TEST_WORD2: - la a0, SourceData+2 - la a1, Word2DstData + la a0, SourceData + la a1, Word2DstData+2 li a2, 16 jal ra, memcpy8_4 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+2 - la a1, Word2DstData + la a0, SourceData + la a1, Word2DstData+2 li a2, 16 jal ra, CheckAllWriteSignature TEST_WORD3: - la a0, SourceData+3 - la a1, Word3DstData + la a0, SourceData + la a1, Word3DstData+3 li a2, 16 jal ra, memcpy8_4 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+3 - la a1, Word3DstData + la a0, SourceData + la a1, Word3DstData+3 li a2, 16 jal ra, CheckAllWriteSignature @@ -138,86 +138,86 @@ TEST_DOUBLE0: jal ra, CheckAllWriteSignature TEST_DOUBLE1: - la a0, SourceData+1 - la a1, Double1DstData + la a0, SourceData + la a1, Double1DstData+1 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+1 - la a1, Double1DstData + la a0, SourceData + la a1, Double1DstData+1 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE2: - la a0, SourceData+2 - la a1, Double2DstData + la a0, SourceData + la a1, Double2DstData+2 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+2 - la a1, Double2DstData + la a0, SourceData + la a1, Double2DstData+2 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE3: - la a0, SourceData+3 - la a1, Double3DstData + la a0, SourceData + la a1, Double3DstData+3 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+3 - la a1, Double3DstData + la a0, SourceData + la a1, Double3DstData+3 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE4: - la a0, SourceData+4 - la a1, Double4DstData + la a0, SourceData + la a1, Double4DstData+4 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+4 - la a1, Double4DstData + la a0, SourceData + la a1, Double4DstData+4 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE5: - la a0, SourceData+5 - la a1, Double5DstData + la a0, SourceData + la a1, Double5DstData+5 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+5 - la a1, Double5DstData + la a0, SourceData + la a1, Double5DstData+5 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE6: - la a0, SourceData+6 - la a1, Double6DstData + la a0, SourceData + la a1, Double6DstData+6 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+6 - la a1, Double6DstData + la a0, SourceData + la a1, Double6DstData+6 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE7: - la a0, SourceData+7 - la a1, Double7DstData + la a0, SourceData + la a1, Double7DstData+7 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+7 - la a1, Double7DstData + la a0, SourceData + la a1, Double7DstData+7 li a2, 16 jal ra, CheckAllWriteSignature @@ -672,7 +672,7 @@ Half1DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Word0DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef @@ -684,19 +684,19 @@ Word1DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Word2DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Word3DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double0DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef @@ -708,43 +708,43 @@ Double1DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double2DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double3DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double4DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double5DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double6DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double7DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef signature: .fill 225, 1, 0x00 From 7b22b269f167d8d366d1c53603dc565e1d1a8e0b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 1 Nov 2023 14:25:18 -0500 Subject: [PATCH 18/32] Finally the d$ spill works. At least until the next bug. Definitely needs a lot of cleanup. --- src/lsu/align.sv | 22 ++++++++++++++++------ src/lsu/lsu.sv | 13 +++++++++---- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 3708674aa..8dda91a1c 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -53,10 +53,13 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [P.LLEN-1:0] DCacheReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [1:0] MemRWSpillM, + output logic SelStoreDelay, //*** this is bad. really don't like moving this outside + output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic SpillStallM); // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] - typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; + typedef enum logic [1:0] {STATE_READY, STATE_SPILL, STATE_STORE_DELAY} statetype; statetype CurrState, NextState; logic TakeSpillM; @@ -74,6 +77,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; + logic SaveByteMask; always_comb begin case(MemRWM) @@ -123,17 +127,23 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (TakeSpillM) NextState = STATE_SPILL; + STATE_READY: if (TakeSpillM & ~MemRWM[0]) NextState = STATE_SPILL; + else if(TakeSpillM & MemRWM[0])NextState = STATE_STORE_DELAY; else NextState = STATE_READY; STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; + STATE_STORE_DELAY: NextState = STATE_SPILL; default: NextState = STATE_READY; endcase end - assign SelSpillM = (CurrState == STATE_SPILL); - assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall); + assign SelSpillM = (CurrState == STATE_SPILL | CurrState == STATE_STORE_DELAY); + assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); + assign SaveByteMask = (CurrState == STATE_READY & TakeSpillM); assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; + assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); + assign SpillStallM = SelSpillE | CurrState == STATE_STORE_DELAY; + mux2 #(2) memrwmux(MemRWM, 2'b00, SelStoreDelay, MemRWSpillM); //////////////////////////////////////////////////////////////////////////////////////////////////// // Merge spilled data @@ -178,6 +188,6 @@ module align import cvw::*; #(parameter cvw_t P) ( mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskM}, {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, {SelSpillM, SelSpillE}, ByteMaskSpillM); - flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SelSpillE, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); + flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SaveByteMask, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 44689a1d1..0d26fed6e 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -135,7 +135,10 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] LSUWriteDataM; // Final write data logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write - + logic [1:0] MemRWSpillM; + logic SpillStallM; + logic SelStoreDelay; + logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB logic DataUpdateDAM; // DTLB hit needs to update dirty or access bits @@ -157,7 +160,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .MemRWM, .CacheableM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .MemRWSpillM, .DCacheReadDataWordSpillM, .SpillStallM, + .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; end else begin : no_ziccslm_align @@ -167,6 +171,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DCacheReadDataWordSpillM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; + assign MemRWSpillM = MemRWM; end ///////////////////////////////////////////////////////////////////////////////////////////// @@ -205,7 +210,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall; - assign LSUStallM = CacheBusHPWTStall | SelSpillE; + assign LSUStallM = CacheBusHPWTStall | SpillStallM; ///////////////////////////////////////////////////////////////////////////////////////////// // MMU and misalignment fault logic required if privileged unit exists @@ -297,7 +302,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( - .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), + .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(SelStoreDelay ? 2'b00 : CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, From 3817d792f6e5dbff295f8997a6c236b4230c5dc6 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 1 Nov 2023 17:51:48 -0500 Subject: [PATCH 19/32] Progress. I think the remaining bugs are in the regression test's signature. --- .../references/WALLY-misaligned-access-01.reference_output | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index dd8a642fc..c63263f04 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -94,7 +94,8 @@ 76757473 7a797877 7e7d7c7b -7fdeadbe +deadbe7f +deadbeef 03020100 # Word0DstData 07060504 0b0a0908 From f89673d7e51fe7ad262b7393adebe0acfbd6487c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 2 Nov 2023 12:07:42 -0500 Subject: [PATCH 20/32] Doesn't yet fully work. Thomas is going to finish debugging while I'm on the RISCV summit next week. --- ...ALLY-misaligned-access-01.reference_output | 71 ++++++++++--------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index c63263f04..134074cb6 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -128,39 +128,42 @@ deadbeef 77767574 7b7a7978 7f7e7d7c -04030201 # Word1DstData -08070605 -0c0b0a09 -100f0e0d -14130211 -18171615 -1c1b1a19 -201f1e1d -24232221 -28272625 -2c2b2a29 -302f2e2d -34330231 -38373635 -3c3b3a39 -403f3e3d -44434241 -48474645 -4c4b4a49 -504f4e4d -54530251 -58575655 -5c5b5a59 -605f5e5d -64636261 -68676665 -6c6b6a69 -706f6e6d -74730271 -78777675 -7c7b7a79 -de7f7e7d -05040302 # Word2DstData +020100ef # Word1DstData +06050403 +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +deadbeef +0100beef # Word2DstData +05040302 09080706 0d0c0b0a 11100f0e @@ -192,6 +195,7 @@ de7f7e7d 79787776 7d7c7b7a dead7f7e +deadbeef 06050403 # Word3DstData 0a090807 0e0d0c0b @@ -224,6 +228,7 @@ dead7f7e 7a797877 7e7d7c7b deadbe7f +deadbeef 03020100 # Double0DstData 07060504 0b0a0908 From 7dafff27a52394dd0e4c55df91fd3d07390d3e0b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 2 Nov 2023 12:47:40 -0500 Subject: [PATCH 21/32] Enabled Zicclsm in rv64gc. --- config/rv64gc/config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index fa603990b..5853f87a4 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -47,7 +47,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; -localparam ZICCLSM_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 1; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; From 89bf1a5cf919a2ca2af67a98e72a3e0feb064531 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 16:08:04 -0600 Subject: [PATCH 22/32] Fixed bug which broke the non Zicclsm configs. --- src/lsu/lsu.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 0d26fed6e..ba7d8e119 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -172,6 +172,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; + assign {SpillStallM, SelStoreDelay} = '0; end ///////////////////////////////////////////////////////////////////////////////////////////// From 329f4456b0b35a17285bf22b967c30fe44ca5ac0 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 16:10:10 -0600 Subject: [PATCH 23/32] Missed tests.vh. --- testbench/tests.vh | 1 + 1 file changed, 1 insertion(+) diff --git a/testbench/tests.vh b/testbench/tests.vh index 5e4f607cb..7b9243368 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1971,6 +1971,7 @@ string arch64zbs[] = '{ string wally64priv[] = '{ `WALLYTEST, "rv64i_m/privilege/src/WALLY-minfo-01.S", + "rv64i_m/privilege/src/WALLY-misaligned-access-01.S", "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S", "rv64i_m/privilege/src/WALLY-cboz-01.S", "rv64i_m/privilege/src/WALLY-cbom-01.S", From b555620ac823ca8636f531a4f2cdc5ce6458533b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:02:15 -0600 Subject: [PATCH 24/32] Fixed bug in the misaligned access test. --- .../rv64i_m/privilege/src/WALLY-misaligned-access-01.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index d6ae2603f..90ef6283f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -559,9 +559,8 @@ memcpy8_4: li t2, 0 # 32 bit mask - lui t4, 0xffff - li t3, 0xfff - or t5, t4, t3 + addi t4, x0, -1 + srli t5, t4, 32 memcpy8_4_loop: ld t3, 0(t0) From bd9a75058393082d3b0ba0e6634dbbed5a4050e8 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:18:45 -0600 Subject: [PATCH 25/32] Fixed spill bugs in the aligner. --- src/lsu/align.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 8dda91a1c..149b7e0bc 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -80,7 +80,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SaveByteMask; always_comb begin - case(MemRWM) + case(Funct3M) 2'b00: IncrementAmount = 4'd0; 2'b01: IncrementAmount = 4'd1; 2'b10: IncrementAmount = 4'd3; @@ -108,8 +108,8 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; - assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:0] == '1) & Funct3M[1:0] == 2'b01; - assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & Funct3M[1:0] == 2'b10; + assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & (ByteOffsetM[0] != '0) & Funct3M[1:0] == 2'b01; + assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; if(P.LLEN == 64) begin logic DoubleSpillM; assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & Funct3M[1:0] == 2'b11; From 3245e2a99e5c52b9c1e24aff708c05175e0aaec7 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:34:23 -0600 Subject: [PATCH 26/32] Fixed bug in the Zicclsm test. --- .../rv64i_m/privilege/src/WALLY-misaligned-access-01.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 90ef6283f..2ee4e021c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -589,7 +589,7 @@ memcpy8_8: memcpy8_8_loop: ld t3, 0(t0) - sd t4, 0(t1) + sd t3, 0(t1) addi t0, t0, 8 addi t1, t1, 8 From 2491ef0e23f89dbb5d4eeb4378e8ac1038632b79 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:36:10 -0600 Subject: [PATCH 27/32] Fixed some more bugs in the Zicclsm signature. --- ...ALLY-misaligned-access-01.reference_output | 133 +++++++++--------- 1 file changed, 68 insertions(+), 65 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 134074cb6..9755a8520 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -196,38 +196,39 @@ deadbeef 7d7c7b7a dead7f7e deadbeef -06050403 # Word3DstData -0a090807 -0e0d0c0b -0211100f -16151413 -1a191817 -1e1d1c1b -2221201f -26252423 -2a292827 -2e2d2c2b -0231302f -36353433 -3a393837 -3e3d3c3b -4241403f -46454443 -4a494847 -4e4d4c4b -0251504f -56555453 -5a595857 -5e5d5c5b -6261605f -66656463 -6a696867 -6e6d6c6b -0271706f -76757473 -7a797877 -7e7d7c7b -deadbe7f +00adbeef # Word3DstData +04030201 +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d deadbeef 03020100 # Double0DstData 07060504 @@ -261,39 +262,41 @@ deadbeef 77767574 7b7a7978 7f7e7d7c -04030201 # Double1DstData -08070605 -0c0b0a09 -100f0e0d -14130211 -18171615 -1c1b1a19 -201f1e1d -24232221 -28272625 -2c2b2a29 -302f2e2d -34330231 -38373635 -3c3b3a39 -403f3e3d -44434241 -48474645 -4c4b4a49 -504f4e4d -54530251 -58575655 -5c5b5a59 -605f5e5d -64636261 -68676665 -6c6b6a69 -706f6e6d -74730271 -78777675 -7c7b7a79 -de7f7e7d -05040302 # Double2DstData +020100ef # Double1DstData +06050403 +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +0100beef # Double2DstData +05040302 09080706 0d0c0b0a 11100f0e From fa6e53d8cfddc3655b284ddec680dd55a0bfa8da Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:58:42 -0600 Subject: [PATCH 28/32] Fixed all the bugs associated with the signature and the store side of misaligned access. Load misaligned is still causing some issues. --- src/lsu/align.sv | 2 +- ...ALLY-misaligned-access-01.reference_output | 206 +++++++++--------- 2 files changed, 110 insertions(+), 98 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 149b7e0bc..7da314ffd 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -112,7 +112,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; if(P.LLEN == 64) begin logic DoubleSpillM; - assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & Funct3M[1:0] == 2'b11; + assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:3] == '1) & (ByteOffsetM[2:0] != '0) & Funct3M[1:0] == 2'b11; assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM); diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 9755a8520..b0a7caeb3 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -295,6 +295,7 @@ deadbeef 7a797877 7e7d7c7b deadbe7f +deadbeef 0100beef # Double2DstData 05040302 09080706 @@ -328,71 +329,10 @@ deadbe7f 79787776 7d7c7b7a dead7f7e -06050403 # Double3DstData -0a090807 -0e0d0c0b -0211100f -16151413 -1a191817 -1e1d1c1b -2221201f -26252423 -2a292827 -2e2d2c2b -0231302f -36353433 -3a393837 -3e3d3c3b -4241403f -46454443 -4a494847 -4e4d4c4b -0251504f -56555453 -5a595857 -5e5d5c5b -6261605f -66656463 -6a696867 -6e6d6c6b -0271706f -76757473 -7a797877 -7e7d7c7b -deadbe7f -07060504 # Double4DestData -0b0a0908 -0f0e0d0c -13021110 -17161514 -1b1a1918 -1f1e1d1c -23222120 -27262524 -2b2a2928 -2f2e2d2c -33023130 -37363534 -3b3a3938 -3f3e3d3c -43424140 -47464544 -4b4a4948 -4f4e4d4c -53025150 -57565554 -5b5a5958 -5f5e5d5c -63626160 -67666564 -6b6a6968 -6f6e6d6c -73027170 -77767574 -7b7a7978 -7f7e7d7c deadbeef -08070605 # Double5DestData +00adbeef # Double3DstData +04030201 +08070605 0c0b0a09 100f0e0d 14130211 @@ -424,39 +364,44 @@ deadbeef 7c7b7a79 de7f7e7d deadbeef -09080706 # Double6DstData -0d0c0b0a -11100f0e -15141302 -19181716 -1d1c1b1a -21201f1e -25242322 -29282726 -2d2c2b2a -31302f2e -35343302 -39383736 -3d3c3b3a -41403f3e -45444342 -49484746 -4d4c4b4a -51504f4e -55545302 -59585756 -5d5c5b5a -61605f5e -65646362 -69686766 -6d6c6b6a -71706f6e -75747302 -79787776 -7d7c7b7a -dead7f7e +deadbeef # Double4DstData +03020100 +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c deadbeef -0a090807 # Double7DstData +deadbeef # Double5DstData +020100ef +06050403 +0a090807 0e0d0c0b 0211100f 16151413 @@ -487,7 +432,74 @@ deadbeef 7a797877 7e7d7c7b deadbe7f -deadbeef +deadbeef # Double6DstData +0100beef +05040302 +09080706 +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +deadbeef # Double7DstData +00adbeef +04030201 +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d 0fffffff #signature ffffffff ffffffff From c29ef1666bf3e6b982873c6570fac845f84ccb32 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 18:26:55 -0600 Subject: [PATCH 29/32] Found another bug in the RTL's Zicclsm alignment. --- src/lsu/align.sv | 17 ++++++++++++-- ...ALLY-misaligned-access-01.reference_output | 2 +- .../src/WALLY-misaligned-access-01.S | 22 +++++++++---------- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 7da314ffd..8a48f310d 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -107,7 +107,20 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; + logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; + + always_comb begin + case (Funct3M[1:0]) + 2'b00: AccessByteOffsetM = '0; // byte access + 2'b01: AccessByteOffsetM = {2'b00, ByteOffsetM[0]}; // half access + 2'b10: AccessByteOffsetM = {1'b0, ByteOffsetM[1:0]}; // word access + 2'b11: AccessByteOffsetM = ByteOffsetM; // double access + default: AccessByteOffsetM = ByteOffsetM; + endcase + end + assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & (ByteOffsetM[0] != '0) & Funct3M[1:0] == 2'b01; assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; if(P.LLEN == 64) begin @@ -170,7 +183,7 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * AccessByteOffsetM : '0); assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset @@ -178,7 +191,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. - assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * AccessByteOffsetM : '0); assign LSUWriteDataShiftedM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; assign LSUWriteDataSpillM = LSUWriteDataShiftedM; //mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN], LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index b0a7caeb3..b8051ecdb 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -500,7 +500,7 @@ deadbeef # Double7DstData 78777675 7c7b7a79 de7f7e7d -0fffffff #signature +ffffffff #signature ffffffff ffffffff ffffffff diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 2ee4e021c..3ff89a237 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -255,7 +255,7 @@ CheckAllWriteSignature: # check values half by half addi a0, s0, 1 # SourceData+1 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 1 # ie: ByteDstData+1 slli a2, s2, 2 # * 4 -1 addi a2, a2, -1 jal ra, memcmp2 @@ -272,7 +272,7 @@ CheckAllWriteSignature: # check values word by word addi a0, s0, 1 # SourceData+1 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 1 # ie: ByteDstData+1 slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 @@ -281,7 +281,7 @@ CheckAllWriteSignature: # check values word by word addi a0, s0, 2 # SourceData+2 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 2 # ie: ByteDstData+2 slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 @@ -290,7 +290,7 @@ CheckAllWriteSignature: # check values word by word addi a0, s0, 3 # SourceData+3 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 3 # ie: ByteDstData+3 slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 @@ -306,7 +306,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 1 # SourceData+1 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 1 # ie: ByteDstData+1 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -314,7 +314,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 2 # SourceData+2 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 2 # ie: ByteDstData+2 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -322,7 +322,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 3 # SourceData+3 - addi a1, s1, 2 # ie: ByteDstData + addi a1, s1, 3 # ie: ByteDstData+3 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -330,7 +330,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 4 # SourceData+4 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 4 # ie: ByteDstData+4 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -338,7 +338,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 5 # SourceData+5 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 5 # ie: ByteDstData+5 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -346,7 +346,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 6 # SourceData+6 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 6 # ie: ByteDstData+6 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -354,7 +354,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 7 # SourceData+7 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 7 # ie: ByteDstData+7 slli a2, s2, 0 # * 1 addi a2, a2, -1 jal ra, memcmp8 From 7158aa8390cd4779b8a9c78234311b2ba5986d7c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 18:28:51 -0600 Subject: [PATCH 30/32] Yay! Zicclsm passes my regression test now. --- .../references/WALLY-misaligned-access-01.reference_output | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index b8051ecdb..209eb4cf4 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -556,4 +556,5 @@ ffffffff ffffffff ffffffff ffffffff -ff +000000ff +00000000 From 534538b216373d774310607ac42fbcc3ba54f67d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 18:39:36 -0600 Subject: [PATCH 31/32] Simplification. --- src/lsu/align.sv | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 8a48f310d..1a45da923 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -79,6 +79,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; logic SaveByteMask; +/* -----\/----- EXCLUDED -----\/----- always_comb begin case(Funct3M) 2'b00: IncrementAmount = 4'd0; @@ -88,9 +89,10 @@ module align import cvw::*; #(parameter cvw_t P) ( default: IncrementAmount = 4'd7; endcase end + -----/\----- EXCLUDED -----/\----- */ /* verilator lint_off WIDTHEXPAND */ - //assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; - assign IEUAdrIncrementM = IEUAdrM + IncrementAmount; + assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + //assign IEUAdrIncrementM = IEUAdrM + IncrementAmount; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); From 8860aa9af5472d2bbf13ced3fe4319efe5eb223b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 12:35:11 -0600 Subject: [PATCH 32/32] Cleanup. --- src/lsu/align.sv | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 1a45da923..a54474b07 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -73,26 +73,13 @@ module align import cvw::*; #(parameter cvw_t P) ( localparam LLENINBYTES = P.LLEN/8; logic [P.XLEN-1:0] IEUAdrIncrementM; - logic [3:0] IncrementAmount; logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; logic SaveByteMask; -/* -----\/----- EXCLUDED -----\/----- - always_comb begin - case(Funct3M) - 2'b00: IncrementAmount = 4'd0; - 2'b01: IncrementAmount = 4'd1; - 2'b10: IncrementAmount = 4'd3; - 2'b11: IncrementAmount = 4'd7; - default: IncrementAmount = 4'd7; - endcase - end - -----/\----- EXCLUDED -----/\----- */ /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; - //assign IEUAdrIncrementM = IEUAdrM + IncrementAmount; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); @@ -156,7 +143,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); assign SaveByteMask = (CurrState == STATE_READY & TakeSpillM); assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; - assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); + assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); // *** Can this be merged into the PreLSURWM logic? assign SpillStallM = SelSpillE | CurrState == STATE_STORE_DELAY; mux2 #(2) memrwmux(MemRWM, 2'b00, SelStoreDelay, MemRWSpillM);