From 12763b72977d9769b0b52a418c53560cb25b4659 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 26 Oct 2023 10:47:00 -0500 Subject: [PATCH 01/48] begin implemenation of Zicclsm. --- src/lsu/align.sv | 121 ++++++++++++++++++++++++++ src/lsu/subwordread.sv | 193 +++++++++++++++++++++++++++++++++-------- 2 files changed, 280 insertions(+), 34 deletions(-) create mode 100644 src/lsu/align.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv new file mode 100644 index 000000000..b3e810ee2 --- /dev/null +++ b/src/lsu/align.sv @@ -0,0 +1,121 @@ +/////////////////////////////////////////// +// spill.sv +// +// Written: Rose Thompson ross1728@gmail.com +// Created: 26 October 2023 +// Modified: 26 October 2023 +// +// Purpose: This module implements native alignment support for the Zicclsm extension +// It is simlar to the IFU's spill module and probably could be merged together with +// some effort. +// +// Documentation: RISC-V System on Chip Design Chapter 11 (Figure 11.5) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module align import cvw::*; #(parameter cvw_t P) ( + input logic clk, + input logic reset, + input logic StallM, FlushM, + input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage + input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM + input logic [31:0] ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed + input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched + input logic DTLBMissM, // ITLB miss, ignore memory request + + output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill + output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill + output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline + output logic [31:0] ReadDataWordSpillM)// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + + // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] + typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; + + statetype CurrState, NextState; + logic TakeSpillM, TakeSpillE; + logic SpillF; + logic SelSpillF; + logic SpillSaveF; + logic [15:0] InstrFirstHalfF; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // PC logic + //////////////////////////////////////////////////////////////////////////////////////////////////// + + localparam LLENINBYTES = LLEN/8; + logic IEUAdrIncrementM; + assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + mux2 #(P.XLEN) pcplus2mux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(TakeSpillM), .y(IEUAdrSpillM)); + mux2 #(P.XLEN) pcnextspillmux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(TakeSpillE), .y(IEUAdrSpillE)); + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Detect spill + //////////////////////////////////////////////////////////////////////////////////////////////////// + + // spill detection in lsu is more complex than ifu, depends on 3 factors + // 1) operation size + // 2) offset + // 3) access location within the cacheline or is the access is uncached. + // first consider uncached operations + // accesses are always aligned to the natural size of the bus (XLEN or AHBW) + + if (P.ICACHE_SUPPORTED) begin + logic SpillCachedF, SpillUncachedF; + assign SpillCachedF = &IEUAdrM[$clog2(P.ICACHE_LINELENINBITS/32)+1:1]; + assign SpillUncachedF = IEUAdrM[1]; // *** try to optimize this based on whether the next instruction is 16 bits and by fetching 64 bits in RV64 + assign SpillF = CacheableF ? SpillCachedF : SpillUncachedF; + end else + assign SpillF = IEUAdrM[1]; // *** might relax - only spill if next instruction is uncompressed + // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits + assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF)); + + always_ff @(posedge clk) + if (reset | FlushM) CurrState <= #1 STATE_READY; + else CurrState <= #1 NextState; + + always_comb begin + case (CurrState) + STATE_READY: if (TakeSpillF) NextState = STATE_SPILL; + else NextState = STATE_READY; + STATE_SPILL: if(StallM) NextState = STATE_SPILL; + else NextState = STATE_READY; + default: NextState = STATE_READY; + endcase + end + + assign SelSpillF = (CurrState == STATE_SPILL); + assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallF); + assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushM; + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Merge spilled instruction + //////////////////////////////////////////////////////////////////////////////////////////////////// + + // save the first 2 bytes + flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF); + + // merge together + mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); + + // Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x + always_comb + if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1; + else CompressedF = 1'b0; + +endmodule diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index e5666eb84..ae3e3c78b 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -29,22 +29,125 @@ module subwordread #(parameter LLEN) ( - input logic [LLEN-1:0] ReadDataWordMuxM, - input logic [2:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN-1:0] ReadDataM + input logic [LLEN-1:0] ReadDataWordMuxM, + input logic [$clog(LLEN/8)-1:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN/2-1:0] ReadDataM ); + localparam OFFSET_LEN = $clog(LLEN/8); + localparam HLEN = LLEN/2; logic [7:0] ByteM; logic [15:0] HalfwordM; - logic [2:0] PAdrSwap; + logic [OFFSET_LEN-1:0] PAdrSwap; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = PAdrM ^ {3{BigEndianM}}; + assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; - if (LLEN == 64) begin:swrmux + if (LLEN == 128) begin:swrmux + // ByteMe mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: ByteM = ReadDataWordMuxM[7:0]; + 4'b0001: ByteM = ReadDataWordMuxM[15:8]; + 4'b0010: ByteM = ReadDataWordMuxM[23:16]; + 4'b0011: ByteM = ReadDataWordMuxM[31:24]; + 4'b0100: ByteM = ReadDataWordMuxM[39:32]; + 4'b0101: ByteM = ReadDataWordMuxM[47:40]; + 4'b0110: ByteM = ReadDataWordMuxM[55:48]; + 4'b0111: ByteM = ReadDataWordMuxM[63:56]; + 4'b1000: ByteM = ReadDataWordMuxM[71:64]; + 4'b1001: ByteM = ReadDataWordMuxM[79:72]; + 4'b1010: ByteM = ReadDataWordMuxM[87:80]; + 4'b1011: ByteM = ReadDataWordMuxM[95:88]; + 4'b1100: ByteM = ReadDataWordMuxM[103:96]; + 4'b1101: ByteM = ReadDataWordMuxM[111:104]; + 4'b1110: ByteM = ReadDataWordMuxM[119:112]; + 4'b1111: ByteM = ReadDataWordMuxM[127:120]; + endcase + + // halfword mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; + 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; + 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; + 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; + 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; + 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; + 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; + 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; + 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; + 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; + 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; + 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; + 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; + 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; + 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; + //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + endcase + + logic [31:0] WordM; + + always_comb + case(PAdrSwap[3:0]) + 4'b0000: WordM = ReadDataWordMuxM[31:0]; + 4'b0001: WordM = ReadDataWordMuxM[39:8]; + 4'b0010: WordM = ReadDataWordMuxM[47:16]; + 4'b0011: WordM = ReadDataWordMuxM[55:24]; + 4'b0100: WordM = ReadDataWordMuxM[63:32]; + 4'b0101: WordM = ReadDataWordMuxM[71:40]; + 4'b0111: WordM = ReadDataWordMuxM[79:48]; + 4'b1000: WordM = ReadDataWordMuxM[87:56]; + 4'b1001: WordM = ReadDataWordMuxM[95:64]; + 4'b1010: WordM = ReadDataWordMuxM[103:72]; + 4'b1011: WordM = ReadDataWordMuxM[111:80]; + 4'b1011: WordM = ReadDataWordMuxM[119:88]; + 4'b1100: WordM = ReadDataWordMuxM[127:96]; + 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; + endcase + + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; + 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; + 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; + 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; + 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; + 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; + 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; + 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux // ByteMe mux always_comb case(PAdrSwap[2:0]) @@ -60,35 +163,55 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[2:1]) - 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; - 2'b01: HalfwordM = ReadDataWordMuxM[31:16]; - 2'b10: HalfwordM = ReadDataWordMuxM[47:32]; - 2'b11: HalfwordM = ReadDataWordMuxM[63:48]; + case(PAdrSwap[2:0]) + 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; + 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; + 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; + 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; + 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; + 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; + 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; + 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; endcase logic [31:0] WordM; always_comb - case(PAdrSwap[2]) - 1'b0: WordM = ReadDataWordMuxM[31:0]; - 1'b1: WordM = ReadDataWordMuxM[63:32]; + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; endcase logic [63:0] DblWordM; - assign DblWordM = ReadDataWordMuxM[63:0]; + always_comb + case(PAdrSwap[2:0]) + 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; + 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; + 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; + 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; + 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; + 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; + 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; + 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; + endcase // sign extension/ NaN boxing always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase @@ -104,20 +227,22 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[1]) - 1'b0: HalfwordM = ReadDataWordMuxM[15:0]; - 1'b1: HalfwordM = ReadDataWordMuxM[31:16]; + case(PAdrSwap[1:0]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; + 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; endcase // sign extension always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw 3'b011: ReadDataM = ReadDataWordMuxM; // fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase end From 449abef823684cafae8d29ffde863e906beeab3c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 09:35:44 -0500 Subject: [PATCH 02/48] Progress on misaligned load/stores. --- src/lsu/align.sv | 44 ++++++---- src/lsu/lsu.sv | 4 +- src/lsu/subwordread.sv | 193 ++++++++--------------------------------- 3 files changed, 61 insertions(+), 180 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index b3e810ee2..8cae76a02 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -35,9 +35,11 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic StallM, FlushM, input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM + input logic [2:0] Funct3M, // Size of memory operation input logic [31:0] ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request + input logic DataUpdateDAM, // ITLB miss, ignore memory request output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill @@ -49,10 +51,10 @@ module align import cvw::*; #(parameter cvw_t P) ( statetype CurrState, NextState; logic TakeSpillM, TakeSpillE; - logic SpillF; + logic SpillM; logic SelSpillF; logic SpillSaveF; - logic [15:0] InstrFirstHalfF; + logic [LLEN-8:0] ReadDataWordFirstHalfM; //////////////////////////////////////////////////////////////////////////////////////////////////// // PC logic @@ -71,19 +73,23 @@ module align import cvw::*; #(parameter cvw_t P) ( // spill detection in lsu is more complex than ifu, depends on 3 factors // 1) operation size // 2) offset - // 3) access location within the cacheline or is the access is uncached. - // first consider uncached operations - // accesses are always aligned to the natural size of the bus (XLEN or AHBW) - - if (P.ICACHE_SUPPORTED) begin - logic SpillCachedF, SpillUncachedF; - assign SpillCachedF = &IEUAdrM[$clog2(P.ICACHE_LINELENINBITS/32)+1:1]; - assign SpillUncachedF = IEUAdrM[1]; // *** try to optimize this based on whether the next instruction is 16 bits and by fetching 64 bits in RV64 - assign SpillF = CacheableF ? SpillCachedF : SpillUncachedF; - end else - assign SpillF = IEUAdrM[1]; // *** might relax - only spill if next instruction is uncompressed + // 3) access location within the cacheline + logic [P.DCACHE_LINELENINBITS/8-1:P.LLEN/8] WordOffsetM; + logic [P.LLEN/8-1:0] ByteOffsetM; + logic HalfSpillM, WordSpillM; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[P.DCACHE_LINELENINBITS/8-1:0]; + assign HalfSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; + assign WordSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; + if(P.LLEN == 64) begin + logic DoubleSpillM; + assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; + assign SpillM = HalfSpillM | WordOffsetM | DoubleSpillM; + end else begin + assign SpillM = HalfSpillM | WordOffsetM; + end + // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign TakeSpillF = SpillF & ~IFUCacheBusStallF & ~(ITLBMissF | (P.SVADU_SUPPORTED & InstrUpdateDAF)); + assign TakeSpillM = SpillM & ~LSUStallM & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -91,7 +97,7 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (TakeSpillF) NextState = STATE_SPILL; + STATE_READY: if (TakeSpillM) NextState = STATE_SPILL; else NextState = STATE_READY; STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; @@ -99,16 +105,16 @@ module align import cvw::*; #(parameter cvw_t P) ( endcase end - assign SelSpillF = (CurrState == STATE_SPILL); - assign SelSpillNextF = (CurrState == STATE_READY & TakeSpillF) | (CurrState == STATE_SPILL & IFUCacheBusStallF); - assign SpillSaveF = (CurrState == STATE_READY) & TakeSpillF & ~FlushM; + assign SelSpillM = (CurrState == STATE_SPILL); + assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & LSUStallM); + assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; //////////////////////////////////////////////////////////////////////////////////////////////////// // Merge spilled instruction //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(16) SpillInstrReg(clk, reset, SpillSaveF, InstrRawF[15:0], InstrFirstHalfF); + flopenr #(P.LLEN-8) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[LLEN-1:8], ReadDataWordFirstHalfM); // merge together mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 191599f12..8dc843a38 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -234,6 +234,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. + // Add support for cboz dtim #(P) dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM[P.LLEN/8-1:0])); @@ -268,8 +269,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign CacheAtomicM = CacheableM & ~SelDTIM ? LSUAtomicM : '0; assign FlushDCache = FlushDCacheM & ~(SelHPTW); - // *** need RT to add support for CMOpM and LSUPrefetchM (DH 7/2/23) - // *** prefetch can just act as a read operation cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), @@ -285,6 +284,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DCacheStallM = CacheStall & ~IgnoreRequestTLB; assign CacheBusRW = CacheBusRWTemp; + // *** add support for cboz ahbcacheinterface #(.AHBW(P.AHBW), .LLEN(P.LLEN), .PA_BITS(P.PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index ae3e3c78b..e5666eb84 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -29,125 +29,22 @@ module subwordread #(parameter LLEN) ( - input logic [LLEN-1:0] ReadDataWordMuxM, - input logic [$clog(LLEN/8)-1:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN/2-1:0] ReadDataM + input logic [LLEN-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM ); - localparam OFFSET_LEN = $clog(LLEN/8); - localparam HLEN = LLEN/2; logic [7:0] ByteM; logic [15:0] HalfwordM; - logic [OFFSET_LEN-1:0] PAdrSwap; + logic [2:0] PAdrSwap; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; + assign PAdrSwap = PAdrM ^ {3{BigEndianM}}; - if (LLEN == 128) begin:swrmux - // ByteMe mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: ByteM = ReadDataWordMuxM[7:0]; - 4'b0001: ByteM = ReadDataWordMuxM[15:8]; - 4'b0010: ByteM = ReadDataWordMuxM[23:16]; - 4'b0011: ByteM = ReadDataWordMuxM[31:24]; - 4'b0100: ByteM = ReadDataWordMuxM[39:32]; - 4'b0101: ByteM = ReadDataWordMuxM[47:40]; - 4'b0110: ByteM = ReadDataWordMuxM[55:48]; - 4'b0111: ByteM = ReadDataWordMuxM[63:56]; - 4'b1000: ByteM = ReadDataWordMuxM[71:64]; - 4'b1001: ByteM = ReadDataWordMuxM[79:72]; - 4'b1010: ByteM = ReadDataWordMuxM[87:80]; - 4'b1011: ByteM = ReadDataWordMuxM[95:88]; - 4'b1100: ByteM = ReadDataWordMuxM[103:96]; - 4'b1101: ByteM = ReadDataWordMuxM[111:104]; - 4'b1110: ByteM = ReadDataWordMuxM[119:112]; - 4'b1111: ByteM = ReadDataWordMuxM[127:120]; - endcase - - // halfword mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; - 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; - 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; - 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; - 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; - 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; - 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; - 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; - 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; - 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; - 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; - 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; - 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; - 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; - 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; - //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - endcase - - logic [31:0] WordM; - - always_comb - case(PAdrSwap[3:0]) - 4'b0000: WordM = ReadDataWordMuxM[31:0]; - 4'b0001: WordM = ReadDataWordMuxM[39:8]; - 4'b0010: WordM = ReadDataWordMuxM[47:16]; - 4'b0011: WordM = ReadDataWordMuxM[55:24]; - 4'b0100: WordM = ReadDataWordMuxM[63:32]; - 4'b0101: WordM = ReadDataWordMuxM[71:40]; - 4'b0111: WordM = ReadDataWordMuxM[79:48]; - 4'b1000: WordM = ReadDataWordMuxM[87:56]; - 4'b1001: WordM = ReadDataWordMuxM[95:64]; - 4'b1010: WordM = ReadDataWordMuxM[103:72]; - 4'b1011: WordM = ReadDataWordMuxM[111:80]; - 4'b1011: WordM = ReadDataWordMuxM[119:88]; - 4'b1100: WordM = ReadDataWordMuxM[127:96]; - 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; - endcase - - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; - 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; - 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; - 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; - 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; - 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; - 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; - 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux + if (LLEN == 64) begin:swrmux // ByteMe mux always_comb case(PAdrSwap[2:0]) @@ -163,55 +60,35 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[2:0]) - 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; - 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; - 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; - 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; - 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; - 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; - 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; - 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; + case(PAdrSwap[2:1]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b10: HalfwordM = ReadDataWordMuxM[47:32]; + 2'b11: HalfwordM = ReadDataWordMuxM[63:48]; endcase logic [31:0] WordM; always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + case(PAdrSwap[2]) + 1'b0: WordM = ReadDataWordMuxM[31:0]; + 1'b1: WordM = ReadDataWordMuxM[63:32]; endcase logic [63:0] DblWordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; - 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; - 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; - 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; - 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; - 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; - 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; - 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; - endcase + assign DblWordM = ReadDataWordMuxM[63:0]; // sign extension/ NaN boxing always_comb case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase @@ -227,22 +104,20 @@ module subwordread #(parameter LLEN) // halfword mux always_comb - case(PAdrSwap[1:0]) - 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; - 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; - 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; - 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; + case(PAdrSwap[1]) + 1'b0: HalfwordM = ReadDataWordMuxM[15:0]; + 1'b1: HalfwordM = ReadDataWordMuxM[31:16]; endcase // sign extension always_comb case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw 3'b011: ReadDataM = ReadDataWordMuxM; // fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen endcase end From 834c0df69766d24e7b4dd90fc04da2f93d98a27e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 09:49:44 -0500 Subject: [PATCH 03/48] Added file. --- src/lsu/subwordread-variant1.sv | 249 ++++++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 src/lsu/subwordread-variant1.sv diff --git a/src/lsu/subwordread-variant1.sv b/src/lsu/subwordread-variant1.sv new file mode 100644 index 000000000..c0cfe247b --- /dev/null +++ b/src/lsu/subwordread-variant1.sv @@ -0,0 +1,249 @@ +/////////////////////////////////////////// +// subwordread.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Extract subwords and sign extend for reads +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordreadVar1 #(parameter LLEN) + ( + input logic [LLEN-1:0] ReadDataWordMuxM, + input logic [$clog(LLEN/8)-1:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN/2-1:0] ReadDataM +); + + localparam OFFSET_LEN = $clog(LLEN/8); + localparam HLEN = LLEN/2; + logic [7:0] ByteM; + logic [15:0] HalfwordM; + logic [OFFSET_LEN-1:0] PAdrSwap; + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; + + if (LLEN == 128) begin:swrmux + // ByteMe mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: ByteM = ReadDataWordMuxM[7:0]; + 4'b0001: ByteM = ReadDataWordMuxM[15:8]; + 4'b0010: ByteM = ReadDataWordMuxM[23:16]; + 4'b0011: ByteM = ReadDataWordMuxM[31:24]; + 4'b0100: ByteM = ReadDataWordMuxM[39:32]; + 4'b0101: ByteM = ReadDataWordMuxM[47:40]; + 4'b0110: ByteM = ReadDataWordMuxM[55:48]; + 4'b0111: ByteM = ReadDataWordMuxM[63:56]; + 4'b1000: ByteM = ReadDataWordMuxM[71:64]; + 4'b1001: ByteM = ReadDataWordMuxM[79:72]; + 4'b1010: ByteM = ReadDataWordMuxM[87:80]; + 4'b1011: ByteM = ReadDataWordMuxM[95:88]; + 4'b1100: ByteM = ReadDataWordMuxM[103:96]; + 4'b1101: ByteM = ReadDataWordMuxM[111:104]; + 4'b1110: ByteM = ReadDataWordMuxM[119:112]; + 4'b1111: ByteM = ReadDataWordMuxM[127:120]; + endcase + + // halfword mux + always_comb + case(PAdrSwap[3:0]) + 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; + 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; + 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; + 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; + 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; + 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; + 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; + 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; + 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; + 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; + 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; + 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; + 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; + 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; + 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; + //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around + endcase + + logic [31:0] WordM; + + always_comb + case(PAdrSwap[3:0]) + 4'b0000: WordM = ReadDataWordMuxM[31:0]; + 4'b0001: WordM = ReadDataWordMuxM[39:8]; + 4'b0010: WordM = ReadDataWordMuxM[47:16]; + 4'b0011: WordM = ReadDataWordMuxM[55:24]; + 4'b0100: WordM = ReadDataWordMuxM[63:32]; + 4'b0101: WordM = ReadDataWordMuxM[71:40]; + 4'b0111: WordM = ReadDataWordMuxM[79:48]; + 4'b1000: WordM = ReadDataWordMuxM[87:56]; + 4'b1001: WordM = ReadDataWordMuxM[95:64]; + 4'b1010: WordM = ReadDataWordMuxM[103:72]; + 4'b1011: WordM = ReadDataWordMuxM[111:80]; + 4'b1011: WordM = ReadDataWordMuxM[119:88]; + 4'b1100: WordM = ReadDataWordMuxM[127:96]; + 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; + endcase + + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; + 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; + 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; + 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; + 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; + 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; + 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; + 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux + // ByteMe mux + always_comb + case(PAdrSwap[2:0]) + 3'b000: ByteM = ReadDataWordMuxM[7:0]; + 3'b001: ByteM = ReadDataWordMuxM[15:8]; + 3'b010: ByteM = ReadDataWordMuxM[23:16]; + 3'b011: ByteM = ReadDataWordMuxM[31:24]; + 3'b100: ByteM = ReadDataWordMuxM[39:32]; + 3'b101: ByteM = ReadDataWordMuxM[47:40]; + 3'b110: ByteM = ReadDataWordMuxM[55:48]; + 3'b111: ByteM = ReadDataWordMuxM[63:56]; + endcase + + // halfword mux + always_comb + case(PAdrSwap[2:0]) + 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; + 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; + 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; + 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; + 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; + 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; + 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; + 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; + endcase + + logic [31:0] WordM; + + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase + + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; + 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; + 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; + 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; + 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; + 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; + 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; + 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; + endcase + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + // byte mux + always_comb + case(PAdrSwap[1:0]) + 2'b00: ByteM = ReadDataWordMuxM[7:0]; + 2'b01: ByteM = ReadDataWordMuxM[15:8]; + 2'b10: ByteM = ReadDataWordMuxM[23:16]; + 2'b11: ByteM = ReadDataWordMuxM[31:24]; + endcase + + // halfword mux + always_comb + case(PAdrSwap[1:0]) + 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; + 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; + 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; + 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; + endcase + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM; // fld + 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen + endcase + end +endmodule From 6041bf20b3b03562daf624c59fc9457baf3d81b8 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 11:41:49 -0500 Subject: [PATCH 04/48] The misaligned load alignment lints. --- src/lsu/align.sv | 40 +++-- src/lsu/subwordread-variant1.sv | 249 -------------------------------- src/wally/wallypipelinedcore.sv | 2 +- 3 files changed, 28 insertions(+), 263 deletions(-) delete mode 100644 src/lsu/subwordread-variant1.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 8cae76a02..897f0d181 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -36,7 +36,7 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation - input logic [31:0] ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed + input logic [P.LLEN*2-1:0]ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request input logic DataUpdateDAM, // ITLB miss, ignore memory request @@ -44,7 +44,7 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [31:0] ReadDataWordSpillM)// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [P.LLEN-1:0] ReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; @@ -52,15 +52,17 @@ module align import cvw::*; #(parameter cvw_t P) ( statetype CurrState, NextState; logic TakeSpillM, TakeSpillE; logic SpillM; - logic SelSpillF; - logic SpillSaveF; - logic [LLEN-8:0] ReadDataWordFirstHalfM; + logic SelSpillM; + logic SpillSaveM; + logic [P.LLEN-1:0] ReadDataWordFirstHalfM; + logic MisalignedM; + logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; //////////////////////////////////////////////////////////////////////////////////////////////////// // PC logic //////////////////////////////////////////////////////////////////////////////////////////////////// - localparam LLENINBYTES = LLEN/8; + localparam LLENINBYTES = P.LLEN/8; logic IEUAdrIncrementM; assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; mux2 #(P.XLEN) pcplus2mux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(TakeSpillM), .y(IEUAdrSpillM)); @@ -110,18 +112,30 @@ module align import cvw::*; #(parameter cvw_t P) ( assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; //////////////////////////////////////////////////////////////////////////////////////////////////// - // Merge spilled instruction + // Merge spilled data //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(P.LLEN-8) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[LLEN-1:8], ReadDataWordFirstHalfM); + flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM); // merge together - mux2 #(32) postspillmux(InstrRawF, {InstrRawF[15:0], InstrFirstHalfF}, SpillF, PostSpillInstrRawF); + mux2 #(2*P.LLEN) postspillmux(ReadDataWordMuxM, {ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM); - // Need to use always comb to avoid pessimistic x propagation if PostSpillInstrRawF is x - always_comb - if (PostSpillInstrRawF[1:0] != 2'b11) CompressedF = 1'b1; - else CompressedF = 1'b0; + // align by shifting + // *** optimize by merging with halfSpill, WordSpill, etc + logic HalfMisalignedM, WordMisalignedM; + assign HalfMisalignedM = Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; + assign WordMisalignedM = Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; + if(P.LLEN == 64) begin + logic DoubleMisalignedM; + assign DoubleMisalignedM = Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; + assign MisalignedM = HalfMisalignedM | WordMisalignedM | DoubleMisalignedM; + end else begin + assign MisalignedM = HalfMisalignedM | WordMisalignedM; + end + // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) + // 8 * is for shifting by bytes not bits + assign ReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + endmodule diff --git a/src/lsu/subwordread-variant1.sv b/src/lsu/subwordread-variant1.sv deleted file mode 100644 index c0cfe247b..000000000 --- a/src/lsu/subwordread-variant1.sv +++ /dev/null @@ -1,249 +0,0 @@ -/////////////////////////////////////////// -// subwordread.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Extract subwords and sign extend for reads -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordreadVar1 #(parameter LLEN) - ( - input logic [LLEN-1:0] ReadDataWordMuxM, - input logic [$clog(LLEN/8)-1:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN/2-1:0] ReadDataM -); - - localparam OFFSET_LEN = $clog(LLEN/8); - localparam HLEN = LLEN/2; - logic [7:0] ByteM; - logic [15:0] HalfwordM; - logic [OFFSET_LEN-1:0] PAdrSwap; - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = PAdrM ^ {OFFSET_LEN{BigEndianM}}; - - if (LLEN == 128) begin:swrmux - // ByteMe mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: ByteM = ReadDataWordMuxM[7:0]; - 4'b0001: ByteM = ReadDataWordMuxM[15:8]; - 4'b0010: ByteM = ReadDataWordMuxM[23:16]; - 4'b0011: ByteM = ReadDataWordMuxM[31:24]; - 4'b0100: ByteM = ReadDataWordMuxM[39:32]; - 4'b0101: ByteM = ReadDataWordMuxM[47:40]; - 4'b0110: ByteM = ReadDataWordMuxM[55:48]; - 4'b0111: ByteM = ReadDataWordMuxM[63:56]; - 4'b1000: ByteM = ReadDataWordMuxM[71:64]; - 4'b1001: ByteM = ReadDataWordMuxM[79:72]; - 4'b1010: ByteM = ReadDataWordMuxM[87:80]; - 4'b1011: ByteM = ReadDataWordMuxM[95:88]; - 4'b1100: ByteM = ReadDataWordMuxM[103:96]; - 4'b1101: ByteM = ReadDataWordMuxM[111:104]; - 4'b1110: ByteM = ReadDataWordMuxM[119:112]; - 4'b1111: ByteM = ReadDataWordMuxM[127:120]; - endcase - - // halfword mux - always_comb - case(PAdrSwap[3:0]) - 4'b0000: HalfwordM = ReadDataWordMuxM[15:0]; - 4'b0001: HalfwordM = ReadDataWordMuxM[23:8]; - 4'b0010: HalfwordM = ReadDataWordMuxM[31:16]; - 4'b0011: HalfwordM = ReadDataWordMuxM[39:24]; - 4'b0100: HalfwordM = ReadDataWordMuxM[47:32]; - 4'b0101: HalfwordM = ReadDataWordMuxM[55:40]; - 4'b0110: HalfwordM = ReadDataWordMuxM[63:48]; - 4'b0111: HalfwordM = ReadDataWordMuxM[71:56]; - 4'b1000: HalfwordM = ReadDataWordMuxM[79:64]; - 4'b1001: HalfwordM = ReadDataWordMuxM[87:72]; - 4'b1010: HalfwordM = ReadDataWordMuxM[95:80]; - 4'b1011: HalfwordM = ReadDataWordMuxM[103:88]; - 4'b1100: HalfwordM = ReadDataWordMuxM[111:96]; - 4'b1101: HalfwordM = ReadDataWordMuxM[119:104]; - 4'b1110: HalfwordM = ReadDataWordMuxM[127:112]; - //4'b1111: HalfwordM = {ReadDataWordMuxM[7:0], ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - 4'b1111: HalfwordM = {8'b0, ReadDataWordMuxM[127:120]}; // *** might be ok to zero extend rather than wrap around - endcase - - logic [31:0] WordM; - - always_comb - case(PAdrSwap[3:0]) - 4'b0000: WordM = ReadDataWordMuxM[31:0]; - 4'b0001: WordM = ReadDataWordMuxM[39:8]; - 4'b0010: WordM = ReadDataWordMuxM[47:16]; - 4'b0011: WordM = ReadDataWordMuxM[55:24]; - 4'b0100: WordM = ReadDataWordMuxM[63:32]; - 4'b0101: WordM = ReadDataWordMuxM[71:40]; - 4'b0111: WordM = ReadDataWordMuxM[79:48]; - 4'b1000: WordM = ReadDataWordMuxM[87:56]; - 4'b1001: WordM = ReadDataWordMuxM[95:64]; - 4'b1010: WordM = ReadDataWordMuxM[103:72]; - 4'b1011: WordM = ReadDataWordMuxM[111:80]; - 4'b1011: WordM = ReadDataWordMuxM[119:88]; - 4'b1100: WordM = ReadDataWordMuxM[127:96]; - 4'b1101: WordM = {8'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: WordM = {16'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: WordM = {24'b0, ReadDataWordMuxM[127:120]}; - endcase - - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordMM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordMM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordMM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordMM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordMM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordMM = ReadDataWordMuxM[103:40]; - 4'b0111: DblWordMM = ReadDataWordMuxM[111:48]; - 4'b1000: DblWordMM = ReadDataWordMuxM[119:56]; - 4'b1001: DblWordMM = ReadDataWordMuxM[127:64]; - 4'b1010: DblWordMM = {8'b0, ReadDataWordMuxM[103:72]}; - 4'b1011: DblWordMM = {16'b0, ReadDataWordMuxM[111:80]}; - 4'b1011: DblWordMM = {24'b0, ReadDataWordMuxM[119:88]}; - 4'b1100: DblWordMM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordMM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordMM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordMM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = ReadDataWordMuxM[HLEN-1:0]; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux - // ByteMe mux - always_comb - case(PAdrSwap[2:0]) - 3'b000: ByteM = ReadDataWordMuxM[7:0]; - 3'b001: ByteM = ReadDataWordMuxM[15:8]; - 3'b010: ByteM = ReadDataWordMuxM[23:16]; - 3'b011: ByteM = ReadDataWordMuxM[31:24]; - 3'b100: ByteM = ReadDataWordMuxM[39:32]; - 3'b101: ByteM = ReadDataWordMuxM[47:40]; - 3'b110: ByteM = ReadDataWordMuxM[55:48]; - 3'b111: ByteM = ReadDataWordMuxM[63:56]; - endcase - - // halfword mux - always_comb - case(PAdrSwap[2:0]) - 3'b000: HalfwordM = ReadDataWordMuxM[15:0]; - 3'b001: HalfwordM = ReadDataWordMuxM[23:8]; - 3'b010: HalfwordM = ReadDataWordMuxM[31:16]; - 3'b011: HalfwordM = ReadDataWordMuxM[39:24]; - 3'b100: HalfwordM = ReadDataWordMuxM[47:32]; - 3'b011: HalfwordM = ReadDataWordMuxM[55:40]; - 3'b110: HalfwordM = ReadDataWordMuxM[63:48]; - 3'b011: HalfwordM = {8'b0, ReadDataWordMuxM[63:56]}; - endcase - - logic [31:0] WordM; - - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase - - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: DblWordMM = ReadDataWordMuxM[63:0]; - 3'b001: DblWordMM = {8'b0, ReadDataWordMuxM[63:8]}; - 3'b010: DblWordMM = {16'b0, ReadDataWordMuxM[63:16]}; - 3'b011: DblWordMM = {24'b0, ReadDataWordMuxM[63:24]}; - 3'b100: DblWordMM = {32'b0, ReadDataWordMuxM[63:32]}; - 3'b101: DblWordMM = {40'b0, ReadDataWordMuxM[63:40]}; - 3'b110: DblWordMM = {48'b0, ReadDataWordMuxM[63:48]}; - 3'b111: DblWordMM = {56'b0, ReadDataWordMuxM[63:56]}; - endcase - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{HLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{HLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - // byte mux - always_comb - case(PAdrSwap[1:0]) - 2'b00: ByteM = ReadDataWordMuxM[7:0]; - 2'b01: ByteM = ReadDataWordMuxM[15:8]; - 2'b10: ByteM = ReadDataWordMuxM[23:16]; - 2'b11: ByteM = ReadDataWordMuxM[31:24]; - endcase - - // halfword mux - always_comb - case(PAdrSwap[1:0]) - 2'b00: HalfwordM = ReadDataWordMuxM[15:0]; - 2'b01: HalfwordM = ReadDataWordMuxM[23:8]; - 2'b10: HalfwordM = ReadDataWordMuxM[31:16]; - 2'b11: HalfwordM = {8'b0, ReadDataWordMuxM[31:24]}; - endcase - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{HLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{HLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{HLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM; // fld - 3'b100: ReadDataM = {{HLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{HLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen - endcase - end -endmodule diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 5df543903..00b348660 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -264,7 +264,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( end // global stall and flush control - hazard hzu(.clk, .reset, + hazard hzu( .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LSUStallM, .IFUStallF, From 657409aec5725fbbf2dd2fc700377b7ac1438c35 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 13:07:23 -0500 Subject: [PATCH 05/48] Addec ZICCLSM to config files and started on lsu instance. --- config/buildroot/config.vh | 1 + config/fpga/config.vh | 1 + config/rv32e/config.vh | 1 + config/rv32gc/config.vh | 1 + config/rv32i/config.vh | 1 + config/rv32imc/config.vh | 1 + config/rv64fpquad/config.vh | 1 + config/rv64gc/config.vh | 1 + config/rv64i/config.vh | 1 + config/shared/parameter-defs.vh | 1 + src/cvw.sv | 1 + src/lsu/lsu.sv | 3 ++- 12 files changed, 13 insertions(+), 1 deletion(-) diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index 05c8aa646..7b13a27f2 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -46,6 +46,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/fpga/config.vh b/config/fpga/config.vh index 9e2b4cbb9..e690335f3 100644 --- a/config/fpga/config.vh +++ b/config/fpga/config.vh @@ -48,6 +48,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh index bf5965fb5..915ab7677 100644 --- a/config/rv32e/config.vh +++ b/config/rv32e/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index 1d42e233a..a76b42302 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -48,6 +48,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh index 5c34ae413..d25f90135 100644 --- a/config/rv32i/config.vh +++ b/config/rv32i/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; diff --git a/config/rv32imc/config.vh b/config/rv32imc/config.vh index 1867b9f99..9fafafe71 100644 --- a/config/rv32imc/config.vh +++ b/config/rv32imc/config.vh @@ -46,6 +46,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; diff --git a/config/rv64fpquad/config.vh b/config/rv64fpquad/config.vh index 6e2e0a33d..343de8b11 100644 --- a/config/rv64fpquad/config.vh +++ b/config/rv64fpquad/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index f3057c287..fa603990b 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh index ea668a45e..35fe763a5 100644 --- a/config/rv64i/config.vh +++ b/config/rv64i/config.vh @@ -47,6 +47,7 @@ localparam SSTC_SUPPORTED = 0; localparam ZICBOM_SUPPORTED = 0; localparam ZICBOZ_SUPPORTED = 0; localparam ZICBOP_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 0; diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index f3f216062..f6132f765 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -24,6 +24,7 @@ localparam cvw_t P = '{ ZICBOM_SUPPORTED : ZICBOM_SUPPORTED, ZICBOZ_SUPPORTED : ZICBOZ_SUPPORTED, ZICBOP_SUPPORTED : ZICBOP_SUPPORTED, + ZICCLSM_SUPPORTED : ZICCLSM_SUPPORTED, SVPBMT_SUPPORTED : SVPBMT_SUPPORTED, SVNAPOT_SUPPORTED : SVNAPOT_SUPPORTED, SVINVAL_SUPPORTED : SVINVAL_SUPPORTED, diff --git a/src/cvw.sv b/src/cvw.sv index 01e0d6376..cdcd983b6 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -59,6 +59,7 @@ typedef struct packed { logic ZICBOM_SUPPORTED; logic ZICBOZ_SUPPORTED; logic ZICBOP_SUPPORTED; + logic ZICCLSM_SUPPORTED; logic SVPBMT_SUPPORTED; logic SVNAPOT_SUPPORTED; logic SVINVAL_SUPPORTED; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 8dc843a38..f2c7647eb 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -248,6 +248,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( localparam AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^ localparam LINELEN = P.DCACHE_LINELENINBITS; // Number of bits in cacheline localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) + localparam CACHEWORDLEN = P.ZICCLSM_SUPPORTED ? 2*P.LLEN : P.LLEN; // Width of the cache's input and output data buses. Misaligned doubles width for fast access logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline logic [P.PA_BITS-1:0] DCacheBusAdr; // Cacheline address to fetch or writeback. @@ -270,7 +271,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign FlushDCache = FlushDCacheM & ~(SelHPTW); cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), - .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( + .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), From 36ca64c567f0d1a1423a0a40f0d95eee1f44e26e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 13:55:16 -0500 Subject: [PATCH 06/48] At least have the aligner integrated, but not tested. --- src/lsu/align.sv | 16 ++++++++-------- src/lsu/lsu.sv | 32 +++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 897f0d181..0e399d19a 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -36,15 +36,15 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation - input logic [P.LLEN*2-1:0]ReadDataWordMuxM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed - input logic LSUStallM, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched + input logic [P.LLEN*2-1:0]DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed + input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request input logic DataUpdateDAM, // ITLB miss, ignore memory request output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [P.LLEN-1:0] ReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [P.LLEN-1:0] DCacheReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; @@ -91,7 +91,7 @@ module align import cvw::*; #(parameter cvw_t P) ( end // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits - assign TakeSpillM = SpillM & ~LSUStallM & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); + assign TakeSpillM = SpillM & ~CacheBusHPWTStall & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); always_ff @(posedge clk) if (reset | FlushM) CurrState <= #1 STATE_READY; @@ -108,7 +108,7 @@ module align import cvw::*; #(parameter cvw_t P) ( end assign SelSpillM = (CurrState == STATE_SPILL); - assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & LSUStallM); + assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall); assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -116,10 +116,10 @@ module align import cvw::*; #(parameter cvw_t P) ( //////////////////////////////////////////////////////////////////////////////////////////////////// // save the first 2 bytes - flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM); + flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM); // merge together - mux2 #(2*P.LLEN) postspillmux(ReadDataWordMuxM, {ReadDataWordMuxM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM); + mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM); // align by shifting // *** optimize by merging with halfSpill, WordSpill, etc @@ -136,6 +136,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + assign DCacheReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index f2c7647eb..ab0b36d7d 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -92,6 +92,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( input var logic [7:0] PMPCFG_ARRAY_REGW[P.PMP_ENTRIES-1:0], // PMP configuration from privileged unit input var logic [P.PA_BITS-3:0] PMPADDR_ARRAY_REGW[P.PMP_ENTRIES-1:0] // PMP address from privileged unit ); + localparam MISALIGN_SUPPORT = P.ZICCLSM_SUPPORTED & P.DCACHE_SUPPORTED; logic [P.XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer logic [P.XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer @@ -108,13 +109,18 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic BusStall; // Bus interface busy with multicycle operation logic HPTWStall; // HPTW busy with multicycle operation + logic CacheBusHPWTStall; // Cache, bus, or hptw is requesting a stall + logic SelSpillE; // Align logic detected a spill and needs to stall logic CacheableM; // PMA indicates memory address is cacheable logic BusCommittedM; // Bus memory operation in flight, delay interrupts logic DCacheCommittedM; // D$ memory operation started, delay interrupts logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data - logic [P.LLEN-1:0] DCacheReadDataWordM; // D$ read data + /* verilator lint_off WIDTHEXPAND */ + logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data + /* verilator lint_on WIDTHEXPAND */ + logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection @@ -142,8 +148,19 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// flopenrc #(P.XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); - assign IEUAdrExtM = {2'b00, IEUAdrM}; - assign IEUAdrExtE = {2'b00, IEUAdrE}; + if(MISALIGN_SUPPORT) begin : ziccslm_align + logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; + align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, + .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); + assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; + assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; + end else begin : no_ziccslm_align + assign IEUAdrExtM = {2'b00, IEUAdrM}; + assign IEUAdrExtE = {2'b00, IEUAdrE}; + assign SelSpillE = '0; + assign DCacheReadDataWordSpillM = DCacheReadDataWordM; + end ///////////////////////////////////////////////////////////////////////////////////////////// // HPTW (only needed if VM supported) @@ -180,7 +197,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // the trap module. assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; - assign LSUStallM = DCacheStallM | HPTWStall | BusStall; + assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall; + assign LSUStallM = CacheBusHPWTStall | SelSpillE; ///////////////////////////////////////////////////////////////////////////////////////////// // MMU and misalignment fault logic required if privileged unit exists @@ -273,7 +291,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), - .FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM), + .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), @@ -290,7 +308,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), - .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM), .WriteDataM(LSUWriteDataM), + .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), @@ -300,7 +318,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. // pma should generate exception for LLEN read to periph. - mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), + mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordSpillM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), .d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface From 747f453bb53a8ae6ed3c4c41f3e93e00a7cf6cc3 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 14:41:42 -0500 Subject: [PATCH 07/48] Passes lint with some exceptions. Still need to add misaligned store support. --- src/lsu/align.sv | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 0e399d19a..a04aa386e 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -57,16 +57,17 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic MisalignedM; logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; + logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; //////////////////////////////////////////////////////////////////////////////////////////////////// // PC logic //////////////////////////////////////////////////////////////////////////////////////////////////// localparam LLENINBYTES = P.LLEN/8; - logic IEUAdrIncrementM; + logic [XLEN-1:0] IEUAdrIncrementM; assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; - mux2 #(P.XLEN) pcplus2mux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(TakeSpillM), .y(IEUAdrSpillM)); - mux2 #(P.XLEN) pcnextspillmux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(TakeSpillE), .y(IEUAdrSpillE)); + mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); + mux2 #(P.XLEN) ieuadrspillmmux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); //////////////////////////////////////////////////////////////////////////////////////////////////// // Detect spill @@ -85,9 +86,9 @@ module align import cvw::*; #(parameter cvw_t P) ( if(P.LLEN == 64) begin logic DoubleSpillM; assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; - assign SpillM = HalfSpillM | WordOffsetM | DoubleSpillM; + assign SpillM = HalfSpillM | WordSpillM | DoubleSpillM; end else begin - assign SpillM = HalfSpillM | WordOffsetM; + assign SpillM = HalfSpillM | WordSpillM; end // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits @@ -136,6 +137,7 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign DCacheReadDataWordSpillM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; endmodule From dce3c85105c3b3b09549926658a44f6f34deb8bb Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 27 Oct 2023 16:31:22 -0500 Subject: [PATCH 08/48] Progress. --- src/cache/cache.sv | 14 ++++++++++---- src/lsu/align.sv | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 1714544ec..23fd6163e 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -175,10 +175,16 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN/8-1:0] DemuxedByteMask, FetchBufferByteSel; // Adjust byte mask from word to cache line - onehotdecoder #(LOGCWPL) adrdec(.bin(PAdr[LOGCWPL+LOGLLENBYTES-1:LOGLLENBYTES]), .decoded(MemPAdrDecoded)); - for(index = 0; index < 2**LOGCWPL; index++) begin - assign DemuxedByteMask[(index+1)*(WORDLEN/8)-1:index*(WORDLEN/8)] = MemPAdrDecoded[index] ? ByteMask : '0; - end + + localparam CACHEMUXINVERALPERLINE = LINELEN/MUXINTERVAL;// Number of words in cache line + localparam LOGMIPL = $clog2(CACHEMUXINVERALPERLINE);// Log2 of ^ + + logic [LINELEN/8-1:0] BlankByteMask; + assign BlankByteMask[WORDLEN/8-1:0] = ByteMask; + assign BlankByteMask[LINELEN/8-1:WORDLEN/8] = '0; + + assign DemuxedByteMask = BlankByteMask << ((MUXINTERVAL/8) * WordOffsetAddr); + assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. // Merge write data into fetched cache line for store miss diff --git a/src/lsu/align.sv b/src/lsu/align.sv index a04aa386e..18b387e49 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -64,7 +64,7 @@ module align import cvw::*; #(parameter cvw_t P) ( //////////////////////////////////////////////////////////////////////////////////////////////////// localparam LLENINBYTES = P.LLEN/8; - logic [XLEN-1:0] IEUAdrIncrementM; + logic [P.XLEN-1:0] IEUAdrIncrementM; assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); From 569e3dc906e4fa0bb768cddf5b50718cb4d4f11f Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 14:00:49 -0500 Subject: [PATCH 09/48] Finally lints cleanly. --- src/lsu/align.sv | 17 +++++++++++++++++ src/lsu/lsu.sv | 9 +++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 18b387e49..ae6e3985b 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -41,6 +41,12 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic DTLBMissM, // ITLB miss, ignore memory request input logic DataUpdateDAM, // ITLB miss, ignore memory request + input logic [(P.LLEN-1)/8:0] ByteMaskM, + input logic [P.LLEN-1:0] LSUWriteDataM, + + output logic [(P.LLEN*2-1)/8:0] ByteMaskSpillM, + output logic [P.LLEN*2-1:0] LSUWriteDataSpillM, + output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline @@ -65,7 +71,9 @@ module align import cvw::*; #(parameter cvw_t P) ( localparam LLENINBYTES = P.LLEN/8; logic [P.XLEN-1:0] IEUAdrIncrementM; + /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); @@ -139,5 +147,14 @@ module align import cvw::*; #(parameter cvw_t P) ( // 8 * is for shifting by bytes not bits assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; + + // write path. Also has the 8:1 shifter muxing for the byteoffset + // then it also has the mux to select when a spill occurs + logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; + assign LSUWriteDataShiftedM = {{{P.LLEN}{1'b0}}, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); + mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {{{P.LLEN}{1'b0}}, LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); + logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; + assign ByteMaskShiftedM = {{{P.LLEN/8}{1'b0}}, ByteMaskM} << (MisalignedM ? ByteMaskM : '0); + mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskShiftedM[P.LLEN*2/8-1:P.LLEN/8]}, SelSpillM, ByteMaskSpillM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index ab0b36d7d..44fdffe58 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -119,6 +119,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data /* verilator lint_off WIDTHEXPAND */ logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] DCacheReadDataWordM; // D$ read data + logic [(MISALIGN_SUPPORT+1)*P.LLEN-1:0] LSUWriteDataSpillM; // Final write data + logic [((MISALIGN_SUPPORT+1)*P.LLEN-1)/8:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data @@ -152,6 +154,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, + .ByteMaskM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -160,6 +163,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign IEUAdrExtE = {2'b00, IEUAdrE}; assign SelSpillE = '0; assign DCacheReadDataWordSpillM = DCacheReadDataWordM; + assign ByteMaskSpillM = ByteMaskM; + assign LSUWriteDataSpillM = LSUWriteDataM; end ///////////////////////////////////////////////////////////////////////////////////////////// @@ -292,8 +297,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), - .ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), - .CacheWriteData(LSUWriteDataM), .SelHPTW, + .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), + .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), From 9cd2e477835fae0f6b793febcaf0714833aaa75e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 14:54:58 -0500 Subject: [PATCH 10/48] Aligner is integrated and enabled in rv64gc and passes the regression test; however, there are no new tests. --- src/lsu/align.sv | 14 ++++++++------ src/lsu/lsu.sv | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index ae6e3985b..48cf2f035 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -36,6 +36,8 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation + input logic [1:0] MemRWM, + input logic CacheableM, input logic [P.LLEN*2-1:0]DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched input logic DTLBMissM, // ITLB miss, ignore memory request @@ -56,7 +58,7 @@ module align import cvw::*; #(parameter cvw_t P) ( typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; statetype CurrState, NextState; - logic TakeSpillM, TakeSpillE; + logic TakeSpillM; logic SpillM; logic SelSpillM; logic SpillSaveM; @@ -75,7 +77,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); - mux2 #(P.XLEN) ieuadrspillmmux(.d0({IEUAdrM[P.XLEN-1:2], 2'b10}), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); + mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); //////////////////////////////////////////////////////////////////////////////////////////////////// // Detect spill @@ -94,9 +96,9 @@ module align import cvw::*; #(parameter cvw_t P) ( if(P.LLEN == 64) begin logic DoubleSpillM; assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; - assign SpillM = HalfSpillM | WordSpillM | DoubleSpillM; + assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin - assign SpillM = HalfSpillM | WordSpillM; + assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM); end // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits @@ -151,10 +153,10 @@ module align import cvw::*; #(parameter cvw_t P) ( // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; - assign LSUWriteDataShiftedM = {{{P.LLEN}{1'b0}}, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); + assign LSUWriteDataShiftedM = {LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {{{P.LLEN}{1'b0}}, LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; - assign ByteMaskShiftedM = {{{P.LLEN/8}{1'b0}}, ByteMaskM} << (MisalignedM ? ByteMaskM : '0); + assign ByteMaskShiftedM = {{{P.LLEN/8}{1'b0}}, ByteMaskM} << (MisalignedM ? ByteMaskM : '0); // *** merge with subword byte mask mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskShiftedM[P.LLEN*2/8-1:P.LLEN/8]}, SelSpillM, ByteMaskSpillM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 44fdffe58..5b9533504 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -153,6 +153,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin : ziccslm_align logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, + .MemRWM, .CacheableM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, .ByteMaskM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); From b5763e11e8c596850c265b6240cfbdc1e0825413 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 15:30:09 -0500 Subject: [PATCH 11/48] rv32gc now also works with the alignment module. Still not tested with misligned access. --- src/lsu/align.sv | 6 +++--- src/lsu/lsu.sv | 2 +- src/lsu/swbytemask.sv | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 48cf2f035..18dd6b2ff 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -87,10 +87,10 @@ module align import cvw::*; #(parameter cvw_t P) ( // 1) operation size // 2) offset // 3) access location within the cacheline - logic [P.DCACHE_LINELENINBITS/8-1:P.LLEN/8] WordOffsetM; - logic [P.LLEN/8-1:0] ByteOffsetM; + logic [$clog2(P.DCACHE_LINELENINBITS/8)-1:$clog2(LLENINBYTES)] WordOffsetM; + logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; - assign {WordOffsetM, ByteOffsetM} = IEUAdrM[P.DCACHE_LINELENINBITS/8-1:0]; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[$clog2(P.DCACHE_LINELENINBITS/8)-1:0]; assign HalfSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; assign WordSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; if(P.LLEN == 64) begin diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 5b9533504..6cb123be8 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -151,7 +151,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( flopenrc #(P.XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); if(MISALIGN_SUPPORT) begin : ziccslm_align - logic [P.LLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; + logic [P.XLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, .CacheableM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, diff --git a/src/lsu/swbytemask.sv b/src/lsu/swbytemask.sv index ad20a4414..9313456f3 100644 --- a/src/lsu/swbytemask.sv +++ b/src/lsu/swbytemask.sv @@ -33,7 +33,7 @@ module swbytemask #(parameter WORDLEN)( output logic [WORDLEN/8-1:0] ByteMask ); - assign ByteMask = ((2**(2**Size))-1) << Adr; + assign ByteMask = ((2**(2**Size))-1) << Adr; // merge with align. /* Equivalent to the following From f13b67b86977f7468bd0131219f2a65be590c724 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 15:47:46 -0500 Subject: [PATCH 12/48] Preemptively fixed the bytemask bug before testing. --- src/ebu/ahbcacheinterface.sv | 2 +- src/lsu/align.sv | 5 +++-- src/lsu/lsu.sv | 9 +++++---- src/lsu/swbytemask.sv | 16 ++++++++++++---- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/ebu/ahbcacheinterface.sv b/src/ebu/ahbcacheinterface.sv index 9c2ff3a89..054022106 100644 --- a/src/ebu/ahbcacheinterface.sv +++ b/src/ebu/ahbcacheinterface.sv @@ -113,7 +113,7 @@ module ahbcacheinterface #( // *** bummer need a second byte mask for bus as it is AHBW rather than LLEN. // probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0. - swbytemask #(AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(AHBW/8)-1:0]), .ByteMask(BusByteMaskM)); + swbytemask #(AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(AHBW/8)-1:0]), .ByteMask(BusByteMaskM), .ByteMaskExtended()); flopen #(AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[AHBW/8-1:0], HWSTRB); diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 18dd6b2ff..b517dfcdb 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -44,6 +44,7 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic DataUpdateDAM, // ITLB miss, ignore memory request input logic [(P.LLEN-1)/8:0] ByteMaskM, + input logic [(P.LLEN-1)/8:0] ByteMaskExtendedM, input logic [P.LLEN-1:0] LSUWriteDataM, output logic [(P.LLEN*2-1)/8:0] ByteMaskSpillM, @@ -156,7 +157,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign LSUWriteDataShiftedM = {LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {{{P.LLEN}{1'b0}}, LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; - assign ByteMaskShiftedM = {{{P.LLEN/8}{1'b0}}, ByteMaskM} << (MisalignedM ? ByteMaskM : '0); // *** merge with subword byte mask - mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskShiftedM[P.LLEN*2/8-1:P.LLEN/8]}, SelSpillM, ByteMaskSpillM); + assign ByteMaskShiftedM = {ByteMaskExtendedM, ByteMaskM}; + mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, SelSpillM, ByteMaskSpillM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 6cb123be8..ef9edb72b 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -134,6 +134,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data logic [P.LLEN-1:0] LSUWriteDataM; // Final write data logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write + logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB @@ -155,7 +156,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, .CacheableM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, - .ByteMaskM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -261,7 +262,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Add support for cboz dtim #(P) dtim(.clk, .ce(~GatedStallW), .MemRWM(DTIMMemRWM), .DTIMAdr, .FlushW, .WriteDataM(LSUWriteDataM), - .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM[P.LLEN/8-1:0])); + .ReadDataWordM(DTIMReadDataWordM[P.LLEN-1:0]), .ByteMaskM(ByteMaskM)); end else begin end if (P.BUS_SUPPORTED) begin : bus @@ -337,7 +338,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ahbinterface #(P.XLEN, 1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .Flush(FlushW), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), - .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM[P.XLEN/8-1:0]), .WriteData(LSUWriteDataM[P.XLEN-1:0]), + .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM[P.XLEN-1:0]), .Stall(GatedStallW), .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); // Mux between the 2 sources of read data, 0: Bus, 1: DTIM @@ -379,7 +380,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks - swbytemask #(P.LLEN) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM)); + swbytemask #(P.LLEN, P.ZICCLSM_SUPPORTED) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM), .ByteMaskExtended(ByteMaskExtendedM)); ///////////////////////////////////////////////////////////////////////////////////////////// // MW Pipeline Register diff --git a/src/lsu/swbytemask.sv b/src/lsu/swbytemask.sv index 9313456f3..e0981e934 100644 --- a/src/lsu/swbytemask.sv +++ b/src/lsu/swbytemask.sv @@ -27,13 +27,21 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module swbytemask #(parameter WORDLEN)( +module swbytemask #(parameter WORDLEN, EXTEND = 0)( input logic [2:0] Size, input logic [$clog2(WORDLEN/8)-1:0] Adr, - output logic [WORDLEN/8-1:0] ByteMask + output logic [WORDLEN/8-1:0] ByteMask, + output logic [WORDLEN/8-1:0] ByteMaskExtended ); - - assign ByteMask = ((2**(2**Size))-1) << Adr; // merge with align. + if(EXTEND) begin + logic [WORDLEN*2/8-1:0] ExtendedByteMask; + assign ExtendedByteMask = ((2**(2**Size))-1) << Adr; + assign ByteMask = ExtendedByteMask[WORDLEN/8-1:0]; + assign ByteMaskExtended = ExtendedByteMask[WORDLEN*2/8-1:WORDLEN/8]; + end else begin + assign ByteMask = ((2**(2**Size))-1) << Adr; + assign ByteMaskExtended = '0; + end /* Equivalent to the following From 2241976d29f2d5383a6725d526b0636104165b9c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 30 Oct 2023 18:26:11 -0500 Subject: [PATCH 13/48] Updated mmu to not generate trap on cacheable misaligned access when supported. Updated tests with David's help. --- src/mmu/mmu.sv | 4 +- .../riscv-test-suite/rv64i_m/I/Makefrag | 6 +- .../rv64i_m/privilege/Makefrag | 1 + ...ALLY-misaligned-access-01.reference_output | 24 +++ .../references/WALLY-trap-01.reference_output | 9 +- .../WALLY-trap-s-01.reference_output | 8 +- .../WALLY-trap-u-01.reference_output | 8 +- .../rv64i_m/privilege/src/WALLY-TEST-LIB-64.h | 6 +- .../src/WALLY-misaligned-access-01.S | 139 ++++++++++++++++++ 9 files changed, 186 insertions(+), 19 deletions(-) create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 32fed853d..a497b6da7 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -138,8 +138,8 @@ module mmu import cvw::*; #(parameter cvw_t P, 2'b10: DataMisalignedM = VAdr[1] | VAdr[0]; // lw, sw, flw, fsw, lwu 2'b11: DataMisalignedM = |VAdr[2:0]; // ld, sd, fld, fsd endcase - assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM; - assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM; + assign LoadMisalignedFaultM = DataMisalignedM & ReadNoAmoAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); + assign StoreAmoMisalignedFaultM = DataMisalignedM & WriteAccessM & ~(P.ZICCLSM_SUPPORTED & Cacheable); // Specify which type of page fault is occurring assign InstrPageFaultF = TLBPageFault & ExecuteAccessF; diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/Makefrag index 5758ecc33..19bb5bd01 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/Makefrag @@ -28,11 +28,11 @@ # Description: Makefrag for RV64I architectural tests rv64i_sc_tests = \ - WALLY-ADD \ + WALLY-ADD \ WALLY-SUB \ WALLY-SLT \ - WALLY-SLTU \ - WALLY-XOR + WALLY-SLTU \ + WALLY-XOR \ rv64i_tests = $(addsuffix .elf, $(rv64i_sc_tests)) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag index bd522e9a4..36f3e8075 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag @@ -57,6 +57,7 @@ target_tests_nosim = \ WALLY-wfi-01 \ WALLY-cbom-01 \ WALLY-cboz-01 \ + WALLY-misaligned-access-01 \ # unclear why status-fp-enabled and wfi aren't simulating ok diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output new file mode 100644 index 000000000..7e1ab4344 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -0,0 +1,24 @@ +00000000 +00000000 +00000001 +00000000 +ffffffff +ffffffff +00000001 +00000000 +00000002 +00000000 +00000000 +00000000 +ffffffff +ffffffff +00000000 +00000000 +fffffffe +ffffffff +393cb5d1 +72ca6f49 +7b12609b +245889d8 +7f42ac28 +af17a2d3 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output index 5c9b816fb..d613b4996 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-01.reference_output @@ -1,3 +1,4 @@ + FFFFFFFF # stimecmp low bits 00000000 # stimecmp high bits 00000000 # menvcfg low bits @@ -24,7 +25,7 @@ FFFFFFFF # stimecmp low bits 00000000 00000004 # mcause from load address misaligned 00000000 -80000411 # mtval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 @@ -36,7 +37,7 @@ FFFFFFFF # stimecmp low bits 00000000 00000006 # mcause from store misaligned 00000000 -80000429 # mtval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 @@ -136,7 +137,7 @@ FFFFFFFF # stimecmp low bits 00000000 00000004 # mcause from load address misaligned 00000000 -80000411 # mtval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 @@ -148,7 +149,7 @@ FFFFFFFF # stimecmp low bits 00000000 00000006 # mcause from store misaligned 00000000 -80000429 # mtval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00001880 # masked out mstatus.MPP = 11, mstatus.MPIE = 1, and mstatus.MIE = 0 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output index 9f3ddc647..6aef0eb5d 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-s-01.reference_output @@ -26,7 +26,7 @@ 00000000 00000004 # scause from load address misaligned 00000000 -80000411 # stval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -38,7 +38,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000429 # stval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00000800 # masked out mstatus.mpp = 1, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -128,7 +128,7 @@ 00000000 00000004 # scause from load address misaligned 00000000 -80000411 # stval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 @@ -140,7 +140,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000429 # stval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00000120 # masked out sstatus.SPP = 1, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output index 36f08113a..1eea9f389 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-trap-u-01.reference_output @@ -26,7 +26,7 @@ 00000000 00000004 # scause from load address misaligned 00000000 -80000411 # stval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -38,7 +38,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000429 # stval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00000000 # masked out mstatus.mpp = 0, mstatus.MPIE = 0, and mstatus.MIE = 0 00000000 @@ -122,7 +122,7 @@ 00000000 00000004 # scause from load address misaligned 00000000 -80000411 # stval of misaligned address (0x80000409) +02000001 # mtval of misaligned address 00000000 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 @@ -134,7 +134,7 @@ 00000000 00000006 # scause from store misaligned 00000000 -80000429 # stval of address with misaligned store instr (0x80000421) +02000001 # mtval of misaligned address 00000000 00000020 # masked out sstatus.SPP = 0, sstatus.SPIE = 1, and sstatus.SIE = 0 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h index 23f105cbc..07a31d7d5 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-TEST-LIB-64.h @@ -98,7 +98,8 @@ cause_breakpnt: ret cause_load_addr_misaligned: - auipc t3, 0 // get current PC, which is aligned + li t3, 0x02000000 // base address of clint, because with zicclsm misaligned cached access won't trap + //auipc t3, 0 // get current PC, which is aligned addi t3, t3, 1 lw t4, 0(t3) // load from a misaligned address ret @@ -108,7 +109,8 @@ cause_load_acc: ret cause_store_addr_misaligned: - auipc t3, 0 // get current PC, which is aligned + li t3, 0x02000000 // base address of clint, because with zicclsm misaligned cached access won't trap + //auipc t3, 0 // get current PC, which is aligned addi t3, t3, 1 sw t4, 0(t3) // store to a misaligned address ret diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S new file mode 100644 index 000000000..792acc715 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -0,0 +1,139 @@ +/////////////////////////////////////////// +// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S +// David_Harris@hmc.edu & Katherine Parry +// Created 2022-06-17 22:58:09.916813// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV64I") + +.section .text.init +.globl rvtest_entry_point +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +RVTEST_SIGBASE( x6, wally_signature) + +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",ld) + +# Testcase 0: rs1:x18(0x0000000000000000), rs2:x9(0x0000000000000000), result rd:x5(0x0000000000000000) +li x18, MASK_XLEN(0x0000000000000000) +li x9, MASK_XLEN(0x0000000000000000) +SLT x5, x18, x9 +sd x5, 0(x6) + +# Testcase 1: rs1:x8(0x0000000000000000), rs2:x25(0x0000000000000001), result rd:x31(0x0000000000000001) +li x8, MASK_XLEN(0x0000000000000000) +li x25, MASK_XLEN(0x0000000000000001) +SLT x31, x8, x25 +sd x31, 8(x6) + +# Testcase 2: rs1:x16(0x0000000000000000), rs2:x12(0xffffffffffffffff), result rd:x20(0x0000000000000000) +li x16, MASK_XLEN(0x0000000000000000) +li x12, MASK_XLEN(0xffffffffffffffff) +SLT x20, x16, x12 +sd x20, 16(x6) + +# Testcase 3: rs1:x10(0x0000000000000001), rs2:x22(0x0000000000000000), result rd:x12(0x0000000000000000) +li x10, MASK_XLEN(0x0000000000000001) +li x22, MASK_XLEN(0x0000000000000000) +SLT x12, x10, x22 +sd x12, 24(x6) + +# Testcase 4: rs1:x19(0x0000000000000001), rs2:x31(0x0000000000000001), result rd:x29(0x0000000000000000) +li x19, MASK_XLEN(0x0000000000000001) +li x31, MASK_XLEN(0x0000000000000001) +SLT x29, x19, x31 +sd x29, 32(x6) + +# Testcase 5: rs1:x21(0x0000000000000001), rs2:x28(0xffffffffffffffff), result rd:x20(0x0000000000000000) +li x21, MASK_XLEN(0x0000000000000001) +li x28, MASK_XLEN(0xffffffffffffffff) +SLT x20, x21, x28 +sd x20, 40(x6) + +# Testcase 6: rs1:x5(0xffffffffffffffff), rs2:x23(0x0000000000000000), result rd:x10(0x0000000000000001) +li x5, MASK_XLEN(0xffffffffffffffff) +li x23, MASK_XLEN(0x0000000000000000) +SLT x10, x5, x23 +sd x10, 48(x6) + +# Testcase 7: rs1:x13(0xffffffffffffffff), rs2:x24(0x0000000000000001), result rd:x14(0x0000000000000001) +li x13, MASK_XLEN(0xffffffffffffffff) +li x24, MASK_XLEN(0x0000000000000001) +SLT x14, x13, x24 +sd x14, 56(x6) + +# Testcase 8: rs1:x27(0xffffffffffffffff), rs2:x21(0xffffffffffffffff), result rd:x3(0x0000000000000000) +li x27, MASK_XLEN(0xffffffffffffffff) +li x21, MASK_XLEN(0xffffffffffffffff) +SLT x3, x27, x21 +sd x3, 64(x6) + +# Testcase 9: rs1:x8(0x983631890063e42f), rs2:x21(0xb2d650af313b32b7), result rd:x15(0x0000000000000001) +li x8, MASK_XLEN(0x983631890063e42f) +li x21, MASK_XLEN(0xb2d650af313b32b7) +SLT x15, x8, x21 +sd x15, 72(x6) + +# Testcase 10: rs1:x19(0xb5d97ef760ef1471), rs2:x28(0xac7c8803e01bbf50), result rd:x14(0x0000000000000000) +li x19, MASK_XLEN(0xb5d97ef760ef1471) +li x28, MASK_XLEN(0xac7c8803e01bbf50) +SLT x14, x19, x28 +sd x14, 80(x6) + +# Testcase 11: rs1:x19(0x66faf98908135d58), rs2:x14(0xb3ab1b2cdf26f517), result rd:x25(0x0000000000000000) +li x19, MASK_XLEN(0x66faf98908135d58) +li x14, MASK_XLEN(0xb3ab1b2cdf26f517) +SLT x25, x19, x14 +sd x25, 88(x6) + +.EQU NUMTESTS,12 + +RVTEST_CODE_END +RVMODEL_HALT + +RVTEST_DATA_BEGIN +.align 4 +rvtest_data: +.word 0x98765432 +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + + +wally_signature: + .fill NUMTESTS*(XLEN/32),4,0xdeadbeef + +#ifdef rvtest_mtrap_routine + +mtrap_sigptr: + .fill 64*(XLEN/32),4,0xdeadbeef + +#endif + +#ifdef rvtest_gpr_save + +gpr_save: + .fill 32*(XLEN/32),4,0xdeadbeef + +#endif + +RVMODEL_DATA_END +// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S +// David_Harris@hmc.edu & Katherine Parry From c061440141a21fbacc6dda847922001f4b50953e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 31 Oct 2023 12:30:10 -0500 Subject: [PATCH 14/48] First stab at the misaligned test. --- ...ALLY-misaligned-access-01.reference_output | 551 ++++++++++++- .../src/WALLY-misaligned-access-01.S | 750 ++++++++++++++++-- 2 files changed, 1205 insertions(+), 96 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 7e1ab4344..b0078f9ac 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -1,24 +1,537 @@ -00000000 -00000000 -00000001 -00000000 -ffffffff -ffffffff -00000001 -00000000 -00000002 +03020100 # ByteDstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +03020100 # Half0DstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +04030201 # Half1DstData +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +03020100 # Word0DstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +04030201 # Word1DstData +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +05040302 # Word2DstData +09080706 +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +06050403 # Word3DstData +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +03020100 # Double0DstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +04030201 # Double1DstData +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +05040302 # Double2DstData +09080706 +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +06050403 # Double3DstData +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +07060504 # Double4DestData +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +deadbeef +08070605 # Double5DestData +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +deadbeef +09080706 # Double6DstData +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +deadbeef +0a090807 # Double7DstData +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +deadbeef +00000000 #signature 00000000 00000000 00000000 -ffffffff -ffffffff +00000000 00000000 00000000 -fffffffe -ffffffff -393cb5d1 -72ca6f49 -7b12609b -245889d8 -7f42ac28 -af17a2d3 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 792acc715..76496ff47 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -27,113 +27,709 @@ rvtest_entry_point: RVMODEL_BOOT RVTEST_CODE_BEGIN -RVTEST_SIGBASE( x6, wally_signature) - RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",ld) -# Testcase 0: rs1:x18(0x0000000000000000), rs2:x9(0x0000000000000000), result rd:x5(0x0000000000000000) -li x18, MASK_XLEN(0x0000000000000000) -li x9, MASK_XLEN(0x0000000000000000) -SLT x5, x18, x9 -sd x5, 0(x6) + # This test checks the misaligned load and stores work correctly and across D$ line spills. + # The general approach is to + # 1. load a region of memory using load doubles equal to two cache lines. And copy to a new + # region but using stores of bytes, half, word, or doubles. Each are repeated for all possible + # misaligned access. Bytes are always aligned, halves are 0, and 1, words are 0, 1, 2, and 3, and + # doubles are 0 through 7. Then the new region is compared against the reference region. Because + # of the misalignment the last few bytes will not be written so they will be some portion of deadbeef. + # The comparison is done using using same abyte, half, word, and double misaligned approach. -# Testcase 1: rs1:x8(0x0000000000000000), rs2:x25(0x0000000000000001), result rd:x31(0x0000000000000001) -li x8, MASK_XLEN(0x0000000000000000) -li x25, MASK_XLEN(0x0000000000000001) -SLT x31, x8, x25 -sd x31, 8(x6) + la a3, signature # does not get overwritten by any functions -# Testcase 2: rs1:x16(0x0000000000000000), rs2:x12(0xffffffffffffffff), result rd:x20(0x0000000000000000) -li x16, MASK_XLEN(0x0000000000000000) -li x12, MASK_XLEN(0xffffffffffffffff) -SLT x20, x16, x12 -sd x20, 16(x6) + # byte copy region. always naturally aligned + la a0, SourceData + la a1, ByteDstData + li a2, 16 + jal ra, memcpy8_1 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, ByteDstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData + la a1, Half0DstData + li a2, 16 + jal ra, memcpy8_2 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, Half0DstData + li a2, 16 + jal ra, CheckAllWriteSignature -# Testcase 3: rs1:x10(0x0000000000000001), rs2:x22(0x0000000000000000), result rd:x12(0x0000000000000000) -li x10, MASK_XLEN(0x0000000000000001) -li x22, MASK_XLEN(0x0000000000000000) -SLT x12, x10, x22 -sd x12, 24(x6) + la a0, SourceData+1 + la a1, Half1DstData + li a2, 16 + jal ra, memcpy8_2 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+1 + la a1, Half1DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData + la a1, Word0DstData + li a2, 16 + jal ra, memcpy8_4 -# Testcase 4: rs1:x19(0x0000000000000001), rs2:x31(0x0000000000000001), result rd:x29(0x0000000000000000) -li x19, MASK_XLEN(0x0000000000000001) -li x31, MASK_XLEN(0x0000000000000001) -SLT x29, x19, x31 -sd x29, 32(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, Word0DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+1 + la a1, Word1DstData + li a2, 16 + jal ra, memcpy8_4 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+1 + la a1, Word1DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+2 + la a1, Word2DstData + li a2, 16 + jal ra, memcpy8_4 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+2 + la a1, Word2DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+3 + la a1, Word3DstData + li a2, 16 + jal ra, memcpy8_4 -# Testcase 5: rs1:x21(0x0000000000000001), rs2:x28(0xffffffffffffffff), result rd:x20(0x0000000000000000) -li x21, MASK_XLEN(0x0000000000000001) -li x28, MASK_XLEN(0xffffffffffffffff) -SLT x20, x21, x28 -sd x20, 40(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+3 + la a1, Word3DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData + la a1, Double0DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, Double0DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+1 + la a1, Double1DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+1 + la a1, Double1DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+2 + la a1, Double2DstData + li a2, 16 + jal ra, memcpy8_8 -# Testcase 6: rs1:x5(0xffffffffffffffff), rs2:x23(0x0000000000000000), result rd:x10(0x0000000000000001) -li x5, MASK_XLEN(0xffffffffffffffff) -li x23, MASK_XLEN(0x0000000000000000) -SLT x10, x5, x23 -sd x10, 48(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+2 + la a1, Double2DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+3 + la a1, Double3DstData + li a2, 16 + jal ra, memcpy8_8 -# Testcase 7: rs1:x13(0xffffffffffffffff), rs2:x24(0x0000000000000001), result rd:x14(0x0000000000000001) -li x13, MASK_XLEN(0xffffffffffffffff) -li x24, MASK_XLEN(0x0000000000000001) -SLT x14, x13, x24 -sd x14, 56(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+3 + la a1, Double3DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+4 + la a1, Double4DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+4 + la a1, Double4DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+5 + la a1, Double5DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+5 + la a1, Double5DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+6 + la a1, Double6DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+6 + la a1, Double6DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+7 + la a1, Double7DstData + li a2, 16 + jal ra, memcpy8_8 -# Testcase 8: rs1:x27(0xffffffffffffffff), rs2:x21(0xffffffffffffffff), result rd:x3(0x0000000000000000) -li x27, MASK_XLEN(0xffffffffffffffff) -li x21, MASK_XLEN(0xffffffffffffffff) -SLT x3, x27, x21 -sd x3, 64(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+7 + la a1, Double7DstData + li a2, 16 + jal ra, CheckAllWriteSignature + +.type CheckAll, @function +# a0 is the SourceData, (golden), a1 is the data to be checked. +# a2 is the number of doubles +# a3 is the signature pointer +# returns a0 as 0 for no mismatch, 1 for mismatch, +# returns a3 as incremented signature pointer +CheckAllWriteSignature: + mv s0, a0 + mv s1, a1 + mv s2, a2 + mv s3, a3 + # there is no stack so I'm saving ra into s4 + mv s4, ra -# Testcase 9: rs1:x8(0x983631890063e42f), rs2:x21(0xb2d650af313b32b7), result rd:x15(0x0000000000000001) -li x8, MASK_XLEN(0x983631890063e42f) -li x21, MASK_XLEN(0xb2d650af313b32b7) -SLT x15, x8, x21 -sd x15, 72(x6) + # check values byte by byte + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 3 # * 8 + jal ra, memcmp1 + sb a0, 0(s3) + mv s4, a0 -# Testcase 10: rs1:x19(0xb5d97ef760ef1471), rs2:x28(0xac7c8803e01bbf50), result rd:x14(0x0000000000000000) -li x19, MASK_XLEN(0xb5d97ef760ef1471) -li x28, MASK_XLEN(0xac7c8803e01bbf50) -SLT x14, x19, x28 -sd x14, 80(x6) + # check values half by half + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 2 # * 4 + jal ra, memcmp2 + sb a0, 1(s3) + or s4, s4, a0 -# Testcase 11: rs1:x19(0x66faf98908135d58), rs2:x14(0xb3ab1b2cdf26f517), result rd:x25(0x0000000000000000) -li x19, MASK_XLEN(0x66faf98908135d58) -li x14, MASK_XLEN(0xb3ab1b2cdf26f517) -SLT x25, x19, x14 -sd x25, 88(x6) + # check values half by half + mv a0, s0 # SourceData + addi a1, s1, 1 # ie: ByteDstData+1 + srli a2, s2, 2 # * 4 -1 + subi a2, a2, 1 + jal ra, memcmp2 + sb a0, 2(s3) + or s4, s4, a0 + + # check values word by word + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 1 # * 2 + jal ra, memcmp4 + sb a0, 3(s3) + or s4, s4, a0 -.EQU NUMTESTS,12 + # check values word by word + mv a0, s0 # SourceData + addi a1, s1, 1 # ie: ByteDstData+1 + srli a2, s2, 1 # * 2 -1 + subi a2, a2, 1 + jal ra, memcmp4 + sb a0, 4(s3) + or s4, s4, a0 + # check values word by word + mv a0, s0 # SourceData + addi a1, s1, 2 # ie: ByteDstData+2 + srli a2, s2, 1 # * 2 -1 + subi a2, a2, 1 + jal ra, memcmp4 + sb a0, 5(s3) + or s4, s4, a0 + + # check values word by word + mv a0, s0 # SourceData + addi a1, s1, 3 # ie: ByteDstData+3 + srli a2, s2, 1 # * 2 -1 + subi a2, a2, 1 + jal ra, memcmp4 + sb a0, 6(s3) + or s4, s4, a0 + + # check values double by double + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 0 # * 1 + jal ra, memcmp8 + sb a0, 7(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 1 # ie: ByteDstData+1 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 8(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 2 # ie: ByteDstData+2 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 9(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 3 # ie: ByteDstData+3 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 10(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 4 # ie: ByteDstData+4 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 11(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 5 # ie: ByteDstData+5 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 12(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 6 # ie: ByteDstData+6 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 13(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 7 # ie: ByteDstData+7 + srli a2, s2, 0 # * 1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 14(s3) + + addi s3, s3, 15 + mv a3, s3 + or a0, s4, a0 + mv ra, s4 + ret ra + + +.type memcmp1, @function +# returns which index mismatch, -1 if none +memcmp1: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 1 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp1_loop: + lbu t3, 0(t0) + lbu t4, 0(t1) + bne t3, t4, memcmp1_ne + addi t0, t0, 1 + addi t1, t1, 1 + addi t2, t2, 1 + blt t2, a2, memcmp1_loop + li a0, -1 + ret +memcmp1_ne: + mv a0, t2 + ret + +.type memcmp2, @function +# returns which index mismatch, -1 if none +memcmp2: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 2 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp2_loop: + lhu t3, 0(t0) + lhu t4, 0(t1) + bne t3, t4, memcmp2_ne + addi t0, t0, 2 + addi t1, t1, 2 + addi t2, t2, 1 + blt t2, a2, memcmp2_loop + li a0, -1 + ret +memcmp2_ne: + mv a0, t2 + ret + +.type memcmp4, @function +# returns which index mismatch, -1 if none +memcmp4: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp4_loop: + lwu t3, 0(t0) + lwu t4, 0(t1) + bne t3, t4, memcmp4_ne + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcmp4_loop + li a0, -1 + ret +memcmp4_ne: + mv a0, t2 + ret + +.type memcmp8, @function +# returns which index mismatch, -1 if none +memcmp8: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp8_loop: + ld t3, 0(t0) + ld t4, 0(t1) + bne t3, t4, memcmp8_ne + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcmp8_loop + li a0, -1 + ret +memcmp8_ne: + mv a0, t2 + ret + + RVTEST_CODE_END RVMODEL_HALT +.type memcpy8_1, @function +# load 8 bytes using load double then store using 8 sb +memcpy8_1: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy8_1_loop: + ld t3, 0(t0) + andi t4, t3, 0xff + sb t4, 0(t1) + slli t4, t3, 8 + andi t4, t4, 0xff + sb t4, 1(t1) + + slli t4, t3, 16 + andi t4, t4, 0xff + sb t4, 2(t1) + + slli t4, t3, 24 + andi t4, t4, 0xff + sb t4, 3(t1) + + slli t4, t3, 32 + andi t4, t4, 0xff + sb t4, 4(t1) + + slli t4, t3, 40 + andi t4, t4, 0xff + sb t4, 5(t1) + + slli t4, t3, 48 + andi t4, t4, 0xff + sb t4, 6(t1) + + slli t4, t3, 56 + andi t4, t4, 0xff + sb t4, 7(t1) + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_1_loop + ret + +.type memcpy8_2, @function +# load 8 bytes using load double then store using 4 sh +memcpy8_2: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 + + # 16 bit mask + lui t4, 0xf + ori t4, t4, 0xfff + +memcpy8_2_loop: + ld t3, 0(t0) + and t4, t4, t3 + sh t4, 0(t1) + + slli t4, t3, 16 + and t4, t4, t3 + sh t4, 2(t1) + + slli t4, t3, 32 + and t4, t4, t3 + sh t4, 4(t1) + + slli t4, t3, 48 + and t4, t4, t3 + sh t4, 6(t1) + + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_2_loop + ret + +.type memcpy8_4, @function +# load 8 bytes using load double then store using 2 sw +memcpy8_4: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 + + # 32 bit mask + lui t4, 0xffff + ori t4, t4, 0xfff + +memcpy8_4_loop: + ld t3, 0(t0) + and t4, t4, t3 + sw t4, 0(t1) + + slli t4, t3, 32 + and t4, t4, t3 + sw t4, 4(t1) + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_4_loop + ret + +.type memcpy8_8, @function +# load 8 bytes using load double then store using 1 sd +memcpy8_8: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 + +memcpy8_8_loop: + ld t3, 0(t0) + sd t4, 0(t1) + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_8_loop + ret + + RVTEST_DATA_BEGIN -.align 4 +.align 3 rvtest_data: -.word 0x98765432 +SourceData: +.8byte 0x0706050403020100, 0x0f0e0d0c0b0a0908, 0x1716151413021110, 0x1f1e1d1c1b1a1918 +.8byte 0x2726252423222120, 0x2f2e2d2c2b2a2928, 0x3736353433023130, 0x3f3e3d3c3b3a3938 +.8byte 0x4746454443424140, 0x4f4e4d4c4b4a4948, 0x5756555453025150, 0x5f5e5d5c5b5a5958 +.8byte 0x6766656463626160, 0x6f6e6d6c6b6a6968, 0x7776757473027170, 0x7f7e7d7c7b7a7978 +.8byte 0xdeadbeefdeadbeef + +Response1ByteOffsetData: +.8byte 0x0807060504030201, 0x100f0e0d0c0b0a09, 0x1817161514130211, 0x201f1e1d1c1b1a19 +.8byte 0x2827262524232221, 0x302f2e2d2c2b2a29, 0x3837363534330231, 0x403f3e3d3c3b3a39 +.8byte 0x4847464544434241, 0x504f4e4d4c4b4a49, 0x5857565554530251, 0x605f5e5d5c5b5a59 +.8byte 0x6867666564636261, 0x706f6e6d6c6b6a69, 0x7877767574730271, 0xde7f7e7d7c7b7a79 + +Response2ByteOffsetData: +.8byte 0x0908070605040302, 0x11100f0e0d0c0b0a, 0x1918171615141302, 0x21201f1e1d1c1b1a +.8byte 0x2928272625242322, 0x31302f2e2d2c2b2a, 0x3938373635343302, 0x41403f3e3d3c3b3a +.8byte 0x4948474645444342, 0x51504f4e4d4c4b4a, 0x5958575655545302, 0x61605f5e5d5c5b5a +.8byte 0x6968676665646362, 0x71706f6e6d6c6b6a, 0x7978777675747302, 0xdead7f7e7d7c7b7a + +Response3ByteOffsetData: +.8byte 0x0a09080706050403, 0x0211100f0e0d0c0b, 0x1a19181716151413, 0x2221201f1e1d1c1b +.8byte 0x2a29282726252423, 0x0231302f2e2d2c2b, 0x3a39383736353433, 0x4241403f3e3d3c3b +.8byte 0x4a49484746454443, 0x0251504f4e4d4c4b, 0x5a59585756555453, 0x6261605f5e5d5c5b +.8byte 0x6a69686766656463, 0x0271706f6e6d6c6b, 0x7a79787776757473, 0xdeadbe7f7e7d7c7b + +Response4ByteOffsetData: +.8byte 0x0b0a090807060504, 0x130211100f0e0d0c, 0x1b1a191817161514, 0x232221201f1e1d1c +.8byte 0x2b2a292827262524, 0x330231302f2e2d2c, 0x3b3a393837363534, 0x434241403f3e3d3c +.8byte 0x4b4a494847464544, 0x530251504f4e4d4c, 0x5b5a595857565554, 0x636261605f5e5d5c +.8byte 0x6b6a696867666564, 0x730271706f6e6d6c, 0x7b7a797877767574, 0xdeadbeef7f7e7d7c + +Response5ByteOffsetData: +.8byte 0x0c0b0a0908070605, 0x14130211100f0e0d, 0x1c1b1a1918171615, 0x24232221201f1e1d +.8byte 0x2c2b2a2928272625, 0x34330231302f2e2d, 0x3c3b3a3938373635, 0x44434241403f3e3d +.8byte 0x4c4b4a4948474645, 0x54530251504f4e4d, 0x5c5b5a5958575655, 0x64636261605f5e5d +.8byte 0x6c6b6a6968676665, 0x74730271706f6e6d, 0x7c7b7a7978777675, 0xdeadbeefde7f7e7d + +Response6ByteOffsetData: +.8byte 0x0d0c0b0a09080706, 0x1514130211100f0e, 0x1d1c1b1a19181716, 0x2524232221201f1e +.8byte 0x2d2c2b2a29282726, 0x3534330231302f2e, 0x3d3c3b3a39383736, 0x4544434241403f3e +.8byte 0x4d4c4b4a49484746, 0x5554530251504f4e, 0x5d5c5b5a59585756, 0x6564636261605f5e +.8byte 0x6d6c6b6a69686766, 0x7574730271706f6e, 0x7d7c7b7a79787776, 0xdeadbeefdead7f7e + +Response7ByteOffsetData: +.8byte 0x0e0d0c0b0a090807, 0x161514130211100f, 0x1e1d1c1b1a191817, 0x262524232221201f +.8byte 0x2e2d2c2b2a292827, 0x363534330231302f, 0x3e3d3c3b3a393837, 0x464544434241403f +.8byte 0x4e4d4c4b4a494847, 0x565554530251504f, 0x5e5d5c5b5a595857, 0x666564636261605f +.8byte 0x6e6d6c6b6a696867, 0x767574730271706f, 0x7e7d7c7b7a797877, 0xdeadbeefdeadbe7f + RVTEST_DATA_END RVMODEL_DATA_BEGIN +ByteDstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -wally_signature: - .fill NUMTESTS*(XLEN/32),4,0xdeadbeef +Half0DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Half1DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -#ifdef rvtest_mtrap_routine +Word0DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -mtrap_sigptr: - .fill 64*(XLEN/32),4,0xdeadbeef +Word1DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -#endif +Word2DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Word3DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double0DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -#ifdef rvtest_gpr_save - -gpr_save: - .fill 32*(XLEN/32),4,0xdeadbeef - -#endif +Double1DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double2DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double3DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double4DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double5DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double6DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double7DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +signature: + .fill 225, 1, 0xff + RVMODEL_DATA_END // ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S // David_Harris@hmc.edu & Katherine Parry From 5ca428d6a8c3b8a8a194a04a94ea61aa30e0c4d0 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 31 Oct 2023 12:49:35 -0500 Subject: [PATCH 15/48] Fixed bugs in misaligned test. --- .../src/WALLY-misaligned-access-01.S | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 76496ff47..325238270 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -240,7 +240,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 1 # ie: ByteDstData+1 srli a2, s2, 2 # * 4 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp2 sb a0, 2(s3) or s4, s4, a0 @@ -257,7 +257,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 1 # ie: ByteDstData+1 srli a2, s2, 1 # * 2 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp4 sb a0, 4(s3) or s4, s4, a0 @@ -266,7 +266,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 2 # ie: ByteDstData+2 srli a2, s2, 1 # * 2 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp4 sb a0, 5(s3) or s4, s4, a0 @@ -275,7 +275,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 3 # ie: ByteDstData+3 srli a2, s2, 1 # * 2 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp4 sb a0, 6(s3) or s4, s4, a0 @@ -291,7 +291,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 1 # ie: ByteDstData+1 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 8(s3) @@ -299,7 +299,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 2 # ie: ByteDstData+2 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 9(s3) @@ -307,7 +307,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 3 # ie: ByteDstData+3 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 10(s3) @@ -315,7 +315,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 4 # ie: ByteDstData+4 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 11(s3) @@ -323,7 +323,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 5 # ie: ByteDstData+5 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 12(s3) @@ -331,7 +331,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 6 # ie: ByteDstData+6 srli a2, s2, 0 # * 1 -1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 13(s3) @@ -339,7 +339,7 @@ CheckAllWriteSignature: mv a0, s0 # SourceData addi a1, s1, 7 # ie: ByteDstData+7 srli a2, s2, 0 # * 1 - subi a2, a2, 1 + addi a2, a2, -1 jal ra, memcmp8 sb a0, 14(s3) @@ -347,7 +347,7 @@ CheckAllWriteSignature: mv a3, s3 or a0, s4, a0 mv ra, s4 - ret ra + ret .type memcmp1, @function @@ -505,7 +505,8 @@ memcpy8_2: # 16 bit mask lui t4, 0xf - ori t4, t4, 0xfff + li t3, 0xfff + or t4, t4, t3 memcpy8_2_loop: ld t3, 0(t0) @@ -543,7 +544,8 @@ memcpy8_4: # 32 bit mask lui t4, 0xffff - ori t4, t4, 0xfff + li t3, 0xfff + or t4, t4, t3 memcpy8_4_loop: ld t3, 0(t0) From 4984b3935f749b903cac9573379378ae93c2474d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 31 Oct 2023 14:50:33 -0500 Subject: [PATCH 16/48] Progress --- ...ALLY-misaligned-access-01.reference_output | 114 +++++++------- .../src/WALLY-misaligned-access-01.S | 140 ++++++++++-------- 2 files changed, 135 insertions(+), 119 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index b0078f9ac..9c1539122 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -478,60 +478,60 @@ deadbeef 7e7d7c7b deadbe7f deadbeef -00000000 #signature -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00000000 -00 +0fffffff #signature +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ff diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 325238270..9ceff3694 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -40,6 +40,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ la a3, signature # does not get overwritten by any functions +TEST_BYTE: # byte copy region. always naturally aligned la a0, SourceData la a1, ByteDstData @@ -52,6 +53,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_HALF0: la a0, SourceData la a1, Half0DstData li a2, 16 @@ -63,6 +65,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_HALF1: la a0, SourceData+1 la a1, Half1DstData li a2, 16 @@ -74,6 +77,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_WORD0: la a0, SourceData la a1, Word0DstData li a2, 16 @@ -85,6 +89,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_WORD1: la a0, SourceData+1 la a1, Word1DstData li a2, 16 @@ -96,6 +101,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_WORD2: la a0, SourceData+2 la a1, Word2DstData li a2, 16 @@ -107,6 +113,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_WORD3: la a0, SourceData+3 la a1, Word3DstData li a2, 16 @@ -118,6 +125,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE0: la a0, SourceData la a1, Double0DstData li a2, 16 @@ -129,6 +137,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE1: la a0, SourceData+1 la a1, Double1DstData li a2, 16 @@ -140,6 +149,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE2: la a0, SourceData+2 la a1, Double2DstData li a2, 16 @@ -151,6 +161,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE3: la a0, SourceData+3 la a1, Double3DstData li a2, 16 @@ -162,6 +173,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE4: la a0, SourceData+4 la a1, Double4DstData li a2, 16 @@ -173,6 +185,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE5: la a0, SourceData+5 la a1, Double5DstData li a2, 16 @@ -184,6 +197,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE6: la a0, SourceData+6 la a1, Double6DstData li a2, 16 @@ -195,6 +209,7 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +TEST_DOUBLE7: la a0, SourceData+7 la a1, Double7DstData li a2, 16 @@ -206,6 +221,8 @@ RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_ li a2, 16 jal ra, CheckAllWriteSignature +RVMODEL_HALT + .type CheckAll, @function # a0 is the SourceData, (golden), a1 is the data to be checked. # a2 is the number of doubles @@ -217,13 +234,13 @@ CheckAllWriteSignature: mv s1, a1 mv s2, a2 mv s3, a3 - # there is no stack so I'm saving ra into s4 - mv s4, ra + # there is no stack so I'm saving ra into s5 + mv s5, ra # check values byte by byte mv a0, s0 # SourceData mv a1, s1 # ie: ByteDstData - srli a2, s2, 3 # * 8 + slli a2, s2, 3 # * 8 jal ra, memcmp1 sb a0, 0(s3) mv s4, a0 @@ -231,50 +248,50 @@ CheckAllWriteSignature: # check values half by half mv a0, s0 # SourceData mv a1, s1 # ie: ByteDstData - srli a2, s2, 2 # * 4 + slli a2, s2, 2 # * 4 jal ra, memcmp2 sb a0, 1(s3) or s4, s4, a0 # check values half by half - mv a0, s0 # SourceData - addi a1, s1, 1 # ie: ByteDstData+1 - srli a2, s2, 2 # * 4 -1 + addi a0, s0, 1 # SourceData+1 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 2 # * 4 -1 addi a2, a2, -1 jal ra, memcmp2 sb a0, 2(s3) or s4, s4, a0 # check values word by word - mv a0, s0 # SourceData + addi a0, s0, 0 # SourceData mv a1, s1 # ie: ByteDstData - srli a2, s2, 1 # * 2 + slli a2, s2, 1 # * 2 jal ra, memcmp4 sb a0, 3(s3) or s4, s4, a0 # check values word by word - mv a0, s0 # SourceData - addi a1, s1, 1 # ie: ByteDstData+1 - srli a2, s2, 1 # * 2 -1 + addi a0, s0, 1 # SourceData+1 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 sb a0, 4(s3) or s4, s4, a0 # check values word by word - mv a0, s0 # SourceData - addi a1, s1, 2 # ie: ByteDstData+2 - srli a2, s2, 1 # * 2 -1 + addi a0, s0, 2 # SourceData+2 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 sb a0, 5(s3) or s4, s4, a0 # check values word by word - mv a0, s0 # SourceData - addi a1, s1, 3 # ie: ByteDstData+3 - srli a2, s2, 1 # * 2 -1 + addi a0, s0, 3 # SourceData+3 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 sb a0, 6(s3) @@ -283,62 +300,62 @@ CheckAllWriteSignature: # check values double by double mv a0, s0 # SourceData mv a1, s1 # ie: ByteDstData - srli a2, s2, 0 # * 1 + slli a2, s2, 0 # * 1 jal ra, memcmp8 sb a0, 7(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 1 # ie: ByteDstData+1 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 1 # SourceData+1 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 8(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 2 # ie: ByteDstData+2 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 2 # SourceData+2 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 9(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 3 # ie: ByteDstData+3 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 3 # SourceData+3 + addi a1, s1, 2 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 10(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 4 # ie: ByteDstData+4 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 4 # SourceData+4 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 11(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 5 # ie: ByteDstData+5 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 5 # SourceData+5 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 12(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 6 # ie: ByteDstData+6 - srli a2, s2, 0 # * 1 -1 + addi a0, s0, 6 # SourceData+6 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 13(s3) # check values double by double - mv a0, s0 # SourceData - addi a1, s1, 7 # ie: ByteDstData+7 - srli a2, s2, 0 # * 1 + addi a0, s0, 7 # SourceData+7 + addi a1, s1, 0 # ie: ByteDstData + slli a2, s2, 0 # * 1 addi a2, a2, -1 jal ra, memcmp8 sb a0, 14(s3) @@ -346,7 +363,7 @@ CheckAllWriteSignature: addi s3, s3, 15 mv a3, s3 or a0, s4, a0 - mv ra, s4 + mv ra, s5 ret @@ -444,7 +461,6 @@ memcmp8_ne: RVTEST_CODE_END -RVMODEL_HALT .type memcpy8_1, @function # load 8 bytes using load double then store using 8 sb @@ -459,31 +475,31 @@ memcpy8_1_loop: ld t3, 0(t0) andi t4, t3, 0xff sb t4, 0(t1) - slli t4, t3, 8 + srli t4, t3, 8 andi t4, t4, 0xff sb t4, 1(t1) - slli t4, t3, 16 + srli t4, t3, 16 andi t4, t4, 0xff sb t4, 2(t1) - slli t4, t3, 24 + srli t4, t3, 24 andi t4, t4, 0xff sb t4, 3(t1) - slli t4, t3, 32 + srli t4, t3, 32 andi t4, t4, 0xff sb t4, 4(t1) - slli t4, t3, 40 + srli t4, t3, 40 andi t4, t4, 0xff sb t4, 5(t1) - slli t4, t3, 48 + srli t4, t3, 48 andi t4, t4, 0xff sb t4, 6(t1) - slli t4, t3, 56 + srli t4, t3, 56 andi t4, t4, 0xff sb t4, 7(t1) @@ -506,23 +522,23 @@ memcpy8_2: # 16 bit mask lui t4, 0xf li t3, 0xfff - or t4, t4, t3 + or t5, t4, t3 memcpy8_2_loop: ld t3, 0(t0) - and t4, t4, t3 + and t4, t3, t5 sh t4, 0(t1) - slli t4, t3, 16 - and t4, t4, t3 + srli t4, t3, 16 + and t4, t4, t5 sh t4, 2(t1) - slli t4, t3, 32 - and t4, t4, t3 + srli t4, t3, 32 + and t4, t4, t5 sh t4, 4(t1) - slli t4, t3, 48 - and t4, t4, t3 + srli t4, t3, 48 + and t4, t4, t5 sh t4, 6(t1) @@ -545,15 +561,15 @@ memcpy8_4: # 32 bit mask lui t4, 0xffff li t3, 0xfff - or t4, t4, t3 + or t5, t4, t3 memcpy8_4_loop: ld t3, 0(t0) - and t4, t4, t3 + and t4, t3, t5 sw t4, 0(t1) - slli t4, t3, 32 - and t4, t4, t3 + srli t4, t3, 32 + and t4, t4, t5 sw t4, 4(t1) addi t0, t0, 8 @@ -730,7 +746,7 @@ Double7DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef signature: - .fill 225, 1, 0xff + .fill 225, 1, 0x00 RVMODEL_DATA_END // ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S From 5660eff57d08863bf1ad42c6bb367a9e70a556de Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 31 Oct 2023 18:50:13 -0500 Subject: [PATCH 17/48] Working through issues with the psill logic. --- src/lsu/align.sv | 50 +++++--- src/lsu/lsu.sv | 2 +- ...ALLY-misaligned-access-01.reference_output | 65 ++++++----- .../src/WALLY-misaligned-access-01.S | 110 +++++++++--------- 4 files changed, 124 insertions(+), 103 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index b517dfcdb..3708674aa 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -68,14 +68,25 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; - //////////////////////////////////////////////////////////////////////////////////////////////////// - // PC logic - //////////////////////////////////////////////////////////////////////////////////////////////////// - localparam LLENINBYTES = P.LLEN/8; logic [P.XLEN-1:0] IEUAdrIncrementM; + logic [3:0] IncrementAmount; + + logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; + logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; + + always_comb begin + case(MemRWM) + 2'b00: IncrementAmount = 4'd0; + 2'b01: IncrementAmount = 4'd1; + 2'b10: IncrementAmount = 4'd3; + 2'b11: IncrementAmount = 4'd7; + default: IncrementAmount = 4'd7; + endcase + end /* verilator lint_off WIDTHEXPAND */ - assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + //assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + assign IEUAdrIncrementM = IEUAdrM + IncrementAmount; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); @@ -88,15 +99,16 @@ module align import cvw::*; #(parameter cvw_t P) ( // 1) operation size // 2) offset // 3) access location within the cacheline - logic [$clog2(P.DCACHE_LINELENINBITS/8)-1:$clog2(LLENINBYTES)] WordOffsetM; + localparam OFFSET_BIT_POS = $clog2(P.DCACHE_LINELENINBITS/8); + logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; - assign {WordOffsetM, ByteOffsetM} = IEUAdrM[$clog2(P.DCACHE_LINELENINBITS/8)-1:0]; - assign HalfSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; - assign WordSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; + assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:0] == '1) & Funct3M[1:0] == 2'b01; + assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & Funct3M[1:0] == 2'b10; if(P.LLEN == 64) begin logic DoubleSpillM; - assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; + assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & Funct3M[1:0] == 2'b11; assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM); @@ -154,10 +166,18 @@ module align import cvw::*; #(parameter cvw_t P) ( // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; - assign LSUWriteDataShiftedM = {LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); - mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {{{P.LLEN}{1'b0}}, LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); + logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. + + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); + assign LSUWriteDataShiftedM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; + assign LSUWriteDataSpillM = LSUWriteDataShiftedM; + //mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN], LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); + logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; - assign ByteMaskShiftedM = {ByteMaskExtendedM, ByteMaskM}; - mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, SelSpillM, ByteMaskSpillM); - + assign ByteMaskShiftedM = ByteMaskMuxM; + mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskM}, + {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, {SelSpillM, SelSpillE}, ByteMaskSpillM); + + flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SelSpillE, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); + mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index ef9edb72b..44689a1d1 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -297,7 +297,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( - .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), + .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 9c1539122..dd8a642fc 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -62,38 +62,39 @@ 77767574 7b7a7978 7f7e7d7c -04030201 # Half1DstData -08070605 -0c0b0a09 -100f0e0d -14130211 -18171615 -1c1b1a19 -201f1e1d -24232221 -28272625 -2c2b2a29 -302f2e2d -34330231 -38373635 -3c3b3a39 -403f3e3d -44434241 -48474645 -4c4b4a49 -504f4e4d -54530251 -58575655 -5c5b5a59 -605f5e5d -64636261 -68676665 -6c6b6a69 -706f6e6d -74730271 -78777675 -7c7b7a79 -de7f7e7d +020100ef # Half1DstData +06050403 +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +7fdeadbe 03020100 # Word0DstData 07060504 0b0a0908 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 9ceff3694..d6ae2603f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -66,14 +66,14 @@ TEST_HALF0: jal ra, CheckAllWriteSignature TEST_HALF1: - la a0, SourceData+1 - la a1, Half1DstData + la a0, SourceData + la a1, Half1DstData+1 li a2, 16 jal ra, memcpy8_2 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+1 - la a1, Half1DstData + la a0, SourceData + la a1, Half1DstData+1 li a2, 16 jal ra, CheckAllWriteSignature @@ -90,38 +90,38 @@ TEST_WORD0: jal ra, CheckAllWriteSignature TEST_WORD1: - la a0, SourceData+1 - la a1, Word1DstData + la a0, SourceData + la a1, Word1DstData+1 li a2, 16 jal ra, memcpy8_4 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+1 - la a1, Word1DstData + la a0, SourceData + la a1, Word1DstData+1 li a2, 16 jal ra, CheckAllWriteSignature TEST_WORD2: - la a0, SourceData+2 - la a1, Word2DstData + la a0, SourceData + la a1, Word2DstData+2 li a2, 16 jal ra, memcpy8_4 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+2 - la a1, Word2DstData + la a0, SourceData + la a1, Word2DstData+2 li a2, 16 jal ra, CheckAllWriteSignature TEST_WORD3: - la a0, SourceData+3 - la a1, Word3DstData + la a0, SourceData + la a1, Word3DstData+3 li a2, 16 jal ra, memcpy8_4 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+3 - la a1, Word3DstData + la a0, SourceData + la a1, Word3DstData+3 li a2, 16 jal ra, CheckAllWriteSignature @@ -138,86 +138,86 @@ TEST_DOUBLE0: jal ra, CheckAllWriteSignature TEST_DOUBLE1: - la a0, SourceData+1 - la a1, Double1DstData + la a0, SourceData + la a1, Double1DstData+1 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+1 - la a1, Double1DstData + la a0, SourceData + la a1, Double1DstData+1 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE2: - la a0, SourceData+2 - la a1, Double2DstData + la a0, SourceData + la a1, Double2DstData+2 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+2 - la a1, Double2DstData + la a0, SourceData + la a1, Double2DstData+2 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE3: - la a0, SourceData+3 - la a1, Double3DstData + la a0, SourceData + la a1, Double3DstData+3 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+3 - la a1, Double3DstData + la a0, SourceData + la a1, Double3DstData+3 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE4: - la a0, SourceData+4 - la a1, Double4DstData + la a0, SourceData + la a1, Double4DstData+4 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+4 - la a1, Double4DstData + la a0, SourceData + la a1, Double4DstData+4 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE5: - la a0, SourceData+5 - la a1, Double5DstData + la a0, SourceData + la a1, Double5DstData+5 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+5 - la a1, Double5DstData + la a0, SourceData + la a1, Double5DstData+5 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE6: - la a0, SourceData+6 - la a1, Double6DstData + la a0, SourceData + la a1, Double6DstData+6 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+6 - la a1, Double6DstData + la a0, SourceData + la a1, Double6DstData+6 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE7: - la a0, SourceData+7 - la a1, Double7DstData + la a0, SourceData + la a1, Double7DstData+7 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+7 - la a1, Double7DstData + la a0, SourceData + la a1, Double7DstData+7 li a2, 16 jal ra, CheckAllWriteSignature @@ -672,7 +672,7 @@ Half1DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Word0DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef @@ -684,19 +684,19 @@ Word1DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Word2DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Word3DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double0DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef @@ -708,43 +708,43 @@ Double1DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double2DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double3DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double4DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double5DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double6DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double7DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef signature: .fill 225, 1, 0x00 From 13333d3e829c18202604ec13be6088985edc2029 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 1 Nov 2023 14:25:18 -0500 Subject: [PATCH 18/48] Finally the d$ spill works. At least until the next bug. Definitely needs a lot of cleanup. --- src/lsu/align.sv | 22 ++++++++++++++++------ src/lsu/lsu.sv | 13 +++++++++---- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 3708674aa..8dda91a1c 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -53,10 +53,13 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [P.LLEN-1:0] DCacheReadDataWordSpillM);// The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [1:0] MemRWSpillM, + output logic SelStoreDelay, //*** this is bad. really don't like moving this outside + output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic SpillStallM); // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] - typedef enum logic [1:0] {STATE_READY, STATE_SPILL} statetype; + typedef enum logic [1:0] {STATE_READY, STATE_SPILL, STATE_STORE_DELAY} statetype; statetype CurrState, NextState; logic TakeSpillM; @@ -74,6 +77,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; + logic SaveByteMask; always_comb begin case(MemRWM) @@ -123,17 +127,23 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (TakeSpillM) NextState = STATE_SPILL; + STATE_READY: if (TakeSpillM & ~MemRWM[0]) NextState = STATE_SPILL; + else if(TakeSpillM & MemRWM[0])NextState = STATE_STORE_DELAY; else NextState = STATE_READY; STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; + STATE_STORE_DELAY: NextState = STATE_SPILL; default: NextState = STATE_READY; endcase end - assign SelSpillM = (CurrState == STATE_SPILL); - assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall); + assign SelSpillM = (CurrState == STATE_SPILL | CurrState == STATE_STORE_DELAY); + assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); + assign SaveByteMask = (CurrState == STATE_READY & TakeSpillM); assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; + assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); + assign SpillStallM = SelSpillE | CurrState == STATE_STORE_DELAY; + mux2 #(2) memrwmux(MemRWM, 2'b00, SelStoreDelay, MemRWSpillM); //////////////////////////////////////////////////////////////////////////////////////////////////// // Merge spilled data @@ -178,6 +188,6 @@ module align import cvw::*; #(parameter cvw_t P) ( mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskM}, {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, {SelSpillM, SelSpillE}, ByteMaskSpillM); - flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SelSpillE, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); + flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SaveByteMask, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 44689a1d1..0d26fed6e 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -135,7 +135,10 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] LSUWriteDataM; // Final write data logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write - + logic [1:0] MemRWSpillM; + logic SpillStallM; + logic SelStoreDelay; + logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB logic DataUpdateDAM; // DTLB hit needs to update dirty or access bits @@ -157,7 +160,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .MemRWM, .CacheableM, .DCacheReadDataWordM, .CacheBusHPWTStall, .DTLBMissM, .DataUpdateDAM, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM); + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .MemRWSpillM, .DCacheReadDataWordSpillM, .SpillStallM, + .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; end else begin : no_ziccslm_align @@ -167,6 +171,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign DCacheReadDataWordSpillM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; + assign MemRWSpillM = MemRWM; end ///////////////////////////////////////////////////////////////////////////////////////////// @@ -205,7 +210,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; assign GatedStallW = StallW & ~SelHPTW; assign CacheBusHPWTStall = DCacheStallM | HPTWStall | BusStall; - assign LSUStallM = CacheBusHPWTStall | SelSpillE; + assign LSUStallM = CacheBusHPWTStall | SpillStallM; ///////////////////////////////////////////////////////////////////////////////////////////// // MMU and misalignment fault logic required if privileged unit exists @@ -297,7 +302,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( - .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), + .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(SelStoreDelay ? 2'b00 : CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, From 7ba891f607320d8dd39b347cf961ad60436bcaae Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 1 Nov 2023 17:51:48 -0500 Subject: [PATCH 19/48] Progress. I think the remaining bugs are in the regression test's signature. --- .../references/WALLY-misaligned-access-01.reference_output | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index dd8a642fc..c63263f04 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -94,7 +94,8 @@ 76757473 7a797877 7e7d7c7b -7fdeadbe +deadbe7f +deadbeef 03020100 # Word0DstData 07060504 0b0a0908 From afa1d85e3b51dc411c22195f0b88b824865e466b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 2 Nov 2023 12:07:42 -0500 Subject: [PATCH 20/48] Doesn't yet fully work. Thomas is going to finish debugging while I'm on the RISCV summit next week. --- ...ALLY-misaligned-access-01.reference_output | 71 ++++++++++--------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index c63263f04..134074cb6 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -128,39 +128,42 @@ deadbeef 77767574 7b7a7978 7f7e7d7c -04030201 # Word1DstData -08070605 -0c0b0a09 -100f0e0d -14130211 -18171615 -1c1b1a19 -201f1e1d -24232221 -28272625 -2c2b2a29 -302f2e2d -34330231 -38373635 -3c3b3a39 -403f3e3d -44434241 -48474645 -4c4b4a49 -504f4e4d -54530251 -58575655 -5c5b5a59 -605f5e5d -64636261 -68676665 -6c6b6a69 -706f6e6d -74730271 -78777675 -7c7b7a79 -de7f7e7d -05040302 # Word2DstData +020100ef # Word1DstData +06050403 +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +deadbeef +0100beef # Word2DstData +05040302 09080706 0d0c0b0a 11100f0e @@ -192,6 +195,7 @@ de7f7e7d 79787776 7d7c7b7a dead7f7e +deadbeef 06050403 # Word3DstData 0a090807 0e0d0c0b @@ -224,6 +228,7 @@ dead7f7e 7a797877 7e7d7c7b deadbe7f +deadbeef 03020100 # Double0DstData 07060504 0b0a0908 From 7222aaa196e8a38ca287af65d54f23a67f482d0d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 2 Nov 2023 12:47:40 -0500 Subject: [PATCH 21/48] Enabled Zicclsm in rv64gc. --- config/rv64gc/config.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index fa603990b..5853f87a4 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -47,7 +47,7 @@ localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 1; -localparam ZICCLSM_SUPPORTED = 0; +localparam ZICCLSM_SUPPORTED = 1; localparam SVPBMT_SUPPORTED = 1; localparam SVNAPOT_SUPPORTED = 1; localparam SVINVAL_SUPPORTED = 1; From 9abd26aad996ca6090d8e1caa3ff484c3e7097b7 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 16:08:04 -0600 Subject: [PATCH 22/48] Fixed bug which broke the non Zicclsm configs. --- src/lsu/lsu.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 0d26fed6e..ba7d8e119 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -172,6 +172,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; + assign {SpillStallM, SelStoreDelay} = '0; end ///////////////////////////////////////////////////////////////////////////////////////////// From baacb6f6ebe3de800f88cfa00e7566a020fe2c7b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 16:10:10 -0600 Subject: [PATCH 23/48] Missed tests.vh. --- testbench/tests.vh | 1 + 1 file changed, 1 insertion(+) diff --git a/testbench/tests.vh b/testbench/tests.vh index 5e4f607cb..7b9243368 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1971,6 +1971,7 @@ string arch64zbs[] = '{ string wally64priv[] = '{ `WALLYTEST, "rv64i_m/privilege/src/WALLY-minfo-01.S", + "rv64i_m/privilege/src/WALLY-misaligned-access-01.S", "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S", "rv64i_m/privilege/src/WALLY-cboz-01.S", "rv64i_m/privilege/src/WALLY-cbom-01.S", From ada354f443cd3c04eb5cfe1563c7725de072a5a0 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:02:15 -0600 Subject: [PATCH 24/48] Fixed bug in the misaligned access test. --- .../rv64i_m/privilege/src/WALLY-misaligned-access-01.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index d6ae2603f..90ef6283f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -559,9 +559,8 @@ memcpy8_4: li t2, 0 # 32 bit mask - lui t4, 0xffff - li t3, 0xfff - or t5, t4, t3 + addi t4, x0, -1 + srli t5, t4, 32 memcpy8_4_loop: ld t3, 0(t0) From 84d86b19945784a666956e811dd58fc1b3467190 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:18:45 -0600 Subject: [PATCH 25/48] Fixed spill bugs in the aligner. --- src/lsu/align.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 8dda91a1c..149b7e0bc 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -80,7 +80,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SaveByteMask; always_comb begin - case(MemRWM) + case(Funct3M) 2'b00: IncrementAmount = 4'd0; 2'b01: IncrementAmount = 4'd1; 2'b10: IncrementAmount = 4'd3; @@ -108,8 +108,8 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; - assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:0] == '1) & Funct3M[1:0] == 2'b01; - assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & Funct3M[1:0] == 2'b10; + assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & (ByteOffsetM[0] != '0) & Funct3M[1:0] == 2'b01; + assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; if(P.LLEN == 64) begin logic DoubleSpillM; assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & Funct3M[1:0] == 2'b11; From efecb0c3463d330680088b9916761a3e97207ca1 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:34:23 -0600 Subject: [PATCH 26/48] Fixed bug in the Zicclsm test. --- .../rv64i_m/privilege/src/WALLY-misaligned-access-01.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 90ef6283f..2ee4e021c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -589,7 +589,7 @@ memcpy8_8: memcpy8_8_loop: ld t3, 0(t0) - sd t4, 0(t1) + sd t3, 0(t1) addi t0, t0, 8 addi t1, t1, 8 From bd866e1025763bdf2676d0b213f75b4e07ed6087 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:36:10 -0600 Subject: [PATCH 27/48] Fixed some more bugs in the Zicclsm signature. --- ...ALLY-misaligned-access-01.reference_output | 133 +++++++++--------- 1 file changed, 68 insertions(+), 65 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 134074cb6..9755a8520 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -196,38 +196,39 @@ deadbeef 7d7c7b7a dead7f7e deadbeef -06050403 # Word3DstData -0a090807 -0e0d0c0b -0211100f -16151413 -1a191817 -1e1d1c1b -2221201f -26252423 -2a292827 -2e2d2c2b -0231302f -36353433 -3a393837 -3e3d3c3b -4241403f -46454443 -4a494847 -4e4d4c4b -0251504f -56555453 -5a595857 -5e5d5c5b -6261605f -66656463 -6a696867 -6e6d6c6b -0271706f -76757473 -7a797877 -7e7d7c7b -deadbe7f +00adbeef # Word3DstData +04030201 +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d deadbeef 03020100 # Double0DstData 07060504 @@ -261,39 +262,41 @@ deadbeef 77767574 7b7a7978 7f7e7d7c -04030201 # Double1DstData -08070605 -0c0b0a09 -100f0e0d -14130211 -18171615 -1c1b1a19 -201f1e1d -24232221 -28272625 -2c2b2a29 -302f2e2d -34330231 -38373635 -3c3b3a39 -403f3e3d -44434241 -48474645 -4c4b4a49 -504f4e4d -54530251 -58575655 -5c5b5a59 -605f5e5d -64636261 -68676665 -6c6b6a69 -706f6e6d -74730271 -78777675 -7c7b7a79 -de7f7e7d -05040302 # Double2DstData +020100ef # Double1DstData +06050403 +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +0100beef # Double2DstData +05040302 09080706 0d0c0b0a 11100f0e From 02ab9fe99c4aabd3581d9d66ca27c06a7b469e48 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 17:58:42 -0600 Subject: [PATCH 28/48] Fixed all the bugs associated with the signature and the store side of misaligned access. Load misaligned is still causing some issues. --- src/lsu/align.sv | 2 +- ...ALLY-misaligned-access-01.reference_output | 206 +++++++++--------- 2 files changed, 110 insertions(+), 98 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 149b7e0bc..7da314ffd 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -112,7 +112,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; if(P.LLEN == 64) begin logic DoubleSpillM; - assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & Funct3M[1:0] == 2'b11; + assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:3] == '1) & (ByteOffsetM[2:0] != '0) & Funct3M[1:0] == 2'b11; assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM); diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 9755a8520..b0a7caeb3 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -295,6 +295,7 @@ deadbeef 7a797877 7e7d7c7b deadbe7f +deadbeef 0100beef # Double2DstData 05040302 09080706 @@ -328,71 +329,10 @@ deadbe7f 79787776 7d7c7b7a dead7f7e -06050403 # Double3DstData -0a090807 -0e0d0c0b -0211100f -16151413 -1a191817 -1e1d1c1b -2221201f -26252423 -2a292827 -2e2d2c2b -0231302f -36353433 -3a393837 -3e3d3c3b -4241403f -46454443 -4a494847 -4e4d4c4b -0251504f -56555453 -5a595857 -5e5d5c5b -6261605f -66656463 -6a696867 -6e6d6c6b -0271706f -76757473 -7a797877 -7e7d7c7b -deadbe7f -07060504 # Double4DestData -0b0a0908 -0f0e0d0c -13021110 -17161514 -1b1a1918 -1f1e1d1c -23222120 -27262524 -2b2a2928 -2f2e2d2c -33023130 -37363534 -3b3a3938 -3f3e3d3c -43424140 -47464544 -4b4a4948 -4f4e4d4c -53025150 -57565554 -5b5a5958 -5f5e5d5c -63626160 -67666564 -6b6a6968 -6f6e6d6c -73027170 -77767574 -7b7a7978 -7f7e7d7c deadbeef -08070605 # Double5DestData +00adbeef # Double3DstData +04030201 +08070605 0c0b0a09 100f0e0d 14130211 @@ -424,39 +364,44 @@ deadbeef 7c7b7a79 de7f7e7d deadbeef -09080706 # Double6DstData -0d0c0b0a -11100f0e -15141302 -19181716 -1d1c1b1a -21201f1e -25242322 -29282726 -2d2c2b2a -31302f2e -35343302 -39383736 -3d3c3b3a -41403f3e -45444342 -49484746 -4d4c4b4a -51504f4e -55545302 -59585756 -5d5c5b5a -61605f5e -65646362 -69686766 -6d6c6b6a -71706f6e -75747302 -79787776 -7d7c7b7a -dead7f7e +deadbeef # Double4DstData +03020100 +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c deadbeef -0a090807 # Double7DstData +deadbeef # Double5DstData +020100ef +06050403 +0a090807 0e0d0c0b 0211100f 16151413 @@ -487,7 +432,74 @@ deadbeef 7a797877 7e7d7c7b deadbe7f -deadbeef +deadbeef # Double6DstData +0100beef +05040302 +09080706 +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +deadbeef # Double7DstData +00adbeef +04030201 +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d 0fffffff #signature ffffffff ffffffff From c0e02ae190cfaab90650144119697eec9da86368 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 18:26:55 -0600 Subject: [PATCH 29/48] Found another bug in the RTL's Zicclsm alignment. --- src/lsu/align.sv | 17 ++++++++++++-- ...ALLY-misaligned-access-01.reference_output | 2 +- .../src/WALLY-misaligned-access-01.S | 22 +++++++++---------- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 7da314ffd..8a48f310d 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -107,7 +107,20 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; + logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; + + always_comb begin + case (Funct3M[1:0]) + 2'b00: AccessByteOffsetM = '0; // byte access + 2'b01: AccessByteOffsetM = {2'b00, ByteOffsetM[0]}; // half access + 2'b10: AccessByteOffsetM = {1'b0, ByteOffsetM[1:0]}; // word access + 2'b11: AccessByteOffsetM = ByteOffsetM; // double access + default: AccessByteOffsetM = ByteOffsetM; + endcase + end + assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & (ByteOffsetM[0] != '0) & Funct3M[1:0] == 2'b01; assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; if(P.LLEN == 64) begin @@ -170,7 +183,7 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * ByteOffsetM : '0); + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> (MisalignedM ? 8 * AccessByteOffsetM : '0); assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset @@ -178,7 +191,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. - assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * AccessByteOffsetM : '0); assign LSUWriteDataShiftedM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; assign LSUWriteDataSpillM = LSUWriteDataShiftedM; //mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN], LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index b0a7caeb3..b8051ecdb 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -500,7 +500,7 @@ deadbeef # Double7DstData 78777675 7c7b7a79 de7f7e7d -0fffffff #signature +ffffffff #signature ffffffff ffffffff ffffffff diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 2ee4e021c..3ff89a237 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -255,7 +255,7 @@ CheckAllWriteSignature: # check values half by half addi a0, s0, 1 # SourceData+1 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 1 # ie: ByteDstData+1 slli a2, s2, 2 # * 4 -1 addi a2, a2, -1 jal ra, memcmp2 @@ -272,7 +272,7 @@ CheckAllWriteSignature: # check values word by word addi a0, s0, 1 # SourceData+1 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 1 # ie: ByteDstData+1 slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 @@ -281,7 +281,7 @@ CheckAllWriteSignature: # check values word by word addi a0, s0, 2 # SourceData+2 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 2 # ie: ByteDstData+2 slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 @@ -290,7 +290,7 @@ CheckAllWriteSignature: # check values word by word addi a0, s0, 3 # SourceData+3 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 3 # ie: ByteDstData+3 slli a2, s2, 1 # * 2 -1 addi a2, a2, -1 jal ra, memcmp4 @@ -306,7 +306,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 1 # SourceData+1 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 1 # ie: ByteDstData+1 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -314,7 +314,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 2 # SourceData+2 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 2 # ie: ByteDstData+2 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -322,7 +322,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 3 # SourceData+3 - addi a1, s1, 2 # ie: ByteDstData + addi a1, s1, 3 # ie: ByteDstData+3 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -330,7 +330,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 4 # SourceData+4 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 4 # ie: ByteDstData+4 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -338,7 +338,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 5 # SourceData+5 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 5 # ie: ByteDstData+5 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -346,7 +346,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 6 # SourceData+6 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 6 # ie: ByteDstData+6 slli a2, s2, 0 # * 1 -1 addi a2, a2, -1 jal ra, memcmp8 @@ -354,7 +354,7 @@ CheckAllWriteSignature: # check values double by double addi a0, s0, 7 # SourceData+7 - addi a1, s1, 0 # ie: ByteDstData + addi a1, s1, 7 # ie: ByteDstData+7 slli a2, s2, 0 # * 1 addi a2, a2, -1 jal ra, memcmp8 From 9dfe421c558e6135b6bd461195ba3d469f0e53ad Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 18:28:51 -0600 Subject: [PATCH 30/48] Yay! Zicclsm passes my regression test now. --- .../references/WALLY-misaligned-access-01.reference_output | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index b8051ecdb..209eb4cf4 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -556,4 +556,5 @@ ffffffff ffffffff ffffffff ffffffff -ff +000000ff +00000000 From c8cca8dfb88b7c770878d7ebf9bdf0b40faf2d74 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 10 Nov 2023 18:39:36 -0600 Subject: [PATCH 31/48] Simplification. --- src/lsu/align.sv | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 8a48f310d..1a45da923 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -79,6 +79,7 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; logic SaveByteMask; +/* -----\/----- EXCLUDED -----\/----- always_comb begin case(Funct3M) 2'b00: IncrementAmount = 4'd0; @@ -88,9 +89,10 @@ module align import cvw::*; #(parameter cvw_t P) ( default: IncrementAmount = 4'd7; endcase end + -----/\----- EXCLUDED -----/\----- */ /* verilator lint_off WIDTHEXPAND */ - //assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; - assign IEUAdrIncrementM = IEUAdrM + IncrementAmount; + assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + //assign IEUAdrIncrementM = IEUAdrM + IncrementAmount; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); From cc7a0b211ac775f06811803b0df8fe5cbd1e5634 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 12:35:11 -0600 Subject: [PATCH 32/48] Cleanup. --- src/lsu/align.sv | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 1a45da923..a54474b07 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -73,26 +73,13 @@ module align import cvw::*; #(parameter cvw_t P) ( localparam LLENINBYTES = P.LLEN/8; logic [P.XLEN-1:0] IEUAdrIncrementM; - logic [3:0] IncrementAmount; logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; logic SaveByteMask; -/* -----\/----- EXCLUDED -----\/----- - always_comb begin - case(Funct3M) - 2'b00: IncrementAmount = 4'd0; - 2'b01: IncrementAmount = 4'd1; - 2'b10: IncrementAmount = 4'd3; - 2'b11: IncrementAmount = 4'd7; - default: IncrementAmount = 4'd7; - endcase - end - -----/\----- EXCLUDED -----/\----- */ /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; - //assign IEUAdrIncrementM = IEUAdrM + IncrementAmount; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); @@ -156,7 +143,7 @@ module align import cvw::*; #(parameter cvw_t P) ( assign SelSpillE = (CurrState == STATE_READY & TakeSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); assign SaveByteMask = (CurrState == STATE_READY & TakeSpillM); assign SpillSaveM = (CurrState == STATE_READY) & TakeSpillM & ~FlushM; - assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); + assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); // *** Can this be merged into the PreLSURWM logic? assign SpillStallM = SelSpillE | CurrState == STATE_STORE_DELAY; mux2 #(2) memrwmux(MemRWM, 2'b00, SelStoreDelay, MemRWSpillM); From 13908ac41c3b08fed13bae5842e6b964b406c845 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 12:36:32 -0600 Subject: [PATCH 33/48] Updated buildroot to use kernel 6.6 and added dedicated qemu emulation script. --- .../buildroot-2023.05.1/main.config | 10 ++-- linux/devicetree/wally-virt.dts | 2 +- linux/testvector-generation/EmulateLinux.sh | 49 +++++++++++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) create mode 100755 linux/testvector-generation/EmulateLinux.sh diff --git a/linux/buildroot-config-src/buildroot-2023.05.1/main.config b/linux/buildroot-config-src/buildroot-2023.05.1/main.config index bb0547d71..99ffa9cef 100644 --- a/linux/buildroot-config-src/buildroot-2023.05.1/main.config +++ b/linux/buildroot-config-src/buildroot-2023.05.1/main.config @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Buildroot 2023.05.2-166-gb362115b25 Configuration +# Buildroot 2023.05.3 Configuration # BR2_HAVE_DOT_CONFIG=y BR2_HOST_GCC_AT_LEAST_4_9=y @@ -399,15 +399,16 @@ BR2_ROOTFS_POST_IMAGE_SCRIPT="" # Kernel # BR2_LINUX_KERNEL=y -BR2_LINUX_KERNEL_LATEST_VERSION=y +# BR2_LINUX_KERNEL_LATEST_VERSION is not set # BR2_LINUX_KERNEL_LATEST_CIP_VERSION is not set # BR2_LINUX_KERNEL_LATEST_CIP_RT_VERSION is not set -# BR2_LINUX_KERNEL_CUSTOM_VERSION is not set +BR2_LINUX_KERNEL_CUSTOM_VERSION=y # BR2_LINUX_KERNEL_CUSTOM_TARBALL is not set # BR2_LINUX_KERNEL_CUSTOM_GIT is not set # BR2_LINUX_KERNEL_CUSTOM_HG is not set # BR2_LINUX_KERNEL_CUSTOM_SVN is not set -BR2_LINUX_KERNEL_VERSION="6.3.13" +BR2_LINUX_KERNEL_CUSTOM_VERSION_VALUE="6.6" +BR2_LINUX_KERNEL_VERSION="6.6" BR2_LINUX_KERNEL_PATCH="" # BR2_LINUX_KERNEL_USE_DEFCONFIG is not set # BR2_LINUX_KERNEL_USE_ARCH_DEFAULT_CONFIG is not set @@ -433,6 +434,7 @@ BR2_LINUX_KERNEL_GZIP=y # # Linux Kernel Extensions # +# BR2_LINUX_KERNEL_EXT_RTAI is not set # BR2_LINUX_KERNEL_EXT_EV3DEV_LINUX_DRIVERS is not set # BR2_LINUX_KERNEL_EXT_FBTFT is not set # BR2_LINUX_KERNEL_EXT_AUFS is not set diff --git a/linux/devicetree/wally-virt.dts b/linux/devicetree/wally-virt.dts index b2fa1e81c..7cc0f757a 100644 --- a/linux/devicetree/wally-virt.dts +++ b/linux/devicetree/wally-virt.dts @@ -15,7 +15,7 @@ memory@80000000 { device_type = "memory"; - reg = <0x00 0x80000000 0x00 0x08000000>; + reg = <0x00 0x80000000 0x00 0x10000000>; }; cpus { diff --git a/linux/testvector-generation/EmulateLinux.sh b/linux/testvector-generation/EmulateLinux.sh new file mode 100755 index 000000000..59b365675 --- /dev/null +++ b/linux/testvector-generation/EmulateLinux.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +usage() { echo "Usage: $0 [-h] [-b ] [-d ]" 1>&2; exit 1; } + +help() { + echo "Usage: $0 [OPTIONS] " + echo " -b get images from given buildroot" + echo " -d specify device tree to use" + exit 0; +} + +# defaults +imageDir=$RISCV/buildroot/output/images +DEVICE_TREE=../devicetree/wally-virt.dtb + +# Process options and arguments. The following code grabs the single +# sdcard device argument no matter where it is in the positional +# parameters list. +ARGS=() +while [ $OPTIND -le "$#" ] ; do + if getopts "hb:d:" arg ; then + case "${arg}" in + h) help + ;; + b) BUILDROOT=${OPTARG} + ;; + d) DEVICE_TREE=${OPTARG} + ;; + esac + else + ARGS+=("${!OPTIND}") + ((OPTIND++)) + fi +done + +# File location variables +imageDir=$BUILDROOT/output/images + +#imageDir=$RISCV/buildroot/output/images +imageDir=~/repos/buildroot-sept2023/output/images +tvDir=$RISCV/linux-testvectors +tcpPort=1239 + +# QEMU Simulation +qemu-system-riscv64 \ +-M virt -m 256M -dtb $DEVICE_TREE \ +-nographic \ +-bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" +-singlestep -rtc clock=vm -icount shift=0,align=off,sleep=on From 55bcc4dbc19d5030c2e56d8f9b9870351f94a03f Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 13:53:23 -0600 Subject: [PATCH 34/48] Updates to linux config files for sdc. --- .../buildroot-2023.05.1/linux.config | 129 ++++++++++++++---- .../buildroot-2023.05.1/main.config | 3 +- 2 files changed, 104 insertions(+), 28 deletions(-) diff --git a/linux/buildroot-config-src/buildroot-2023.05.1/linux.config b/linux/buildroot-config-src/buildroot-2023.05.1/linux.config index c88200345..396853c62 100644 --- a/linux/buildroot-config-src/buildroot-2023.05.1/linux.config +++ b/linux/buildroot-config-src/buildroot-2023.05.1/linux.config @@ -1,8 +1,8 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/riscv 6.3.12 Kernel Configuration +# Linux/riscv 6.6.0 Kernel Configuration # -CONFIG_CC_VERSION_TEXT="riscv64-buildroot-linux-gnu-gcc.br_real (Buildroot 2023.05.1-dirty) 12.3.0" +CONFIG_CC_VERSION_TEXT="riscv64-buildroot-linux-gnu-gcc.br_real (Buildroot 2023.05.3) 12.3.0" CONFIG_CC_IS_GCC=y CONFIG_GCC_VERSION=120300 CONFIG_CLANG_VERSION=0 @@ -85,6 +85,7 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set CONFIG_PREEMPT_COUNT=y +# CONFIG_PREEMPT_DYNAMIC is not set # # CPU/Task time and stats accounting @@ -101,7 +102,6 @@ CONFIG_TICK_CPU_ACCOUNTING=y # CONFIG_TINY_RCU=y # CONFIG_RCU_EXPERT is not set -CONFIG_SRCU=y CONFIG_TINY_SRCU=y # end of RCU Subsystem @@ -109,7 +109,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_IKHEADERS is not set CONFIG_LOG_BUF_SHIFT=17 -CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 # CONFIG_PRINTK_INDEX is not set CONFIG_GENERIC_SCHED_CLOCK=y @@ -127,7 +126,6 @@ CONFIG_ARCH_SUPPORTS_INT128=y # CONFIG_NAMESPACES is not set # CONFIG_CHECKPOINT_RESTORE is not set # CONFIG_SCHED_AUTOGROUP is not set -# CONFIG_SYSFS_DEPRECATED is not set # CONFIG_RELAY is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="${BR_BINARIES_DIR}/rootfs.cpio" @@ -146,6 +144,8 @@ CONFIG_INITRAMFS_COMPRESSION_GZIP=y CONFIG_INITRAMFS_PRESERVE_MTIME=y CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION=y +# CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not set CONFIG_LD_ORPHAN_WARN=y CONFIG_LD_ORPHAN_WARN_LEVEL="warn" CONFIG_SYSCTL=y @@ -177,8 +177,8 @@ CONFIG_KALLSYMS=y CONFIG_KALLSYMS_BASE_RELATIVE=y # CONFIG_KCMP is not set CONFIG_RSEQ=y +CONFIG_CACHESTAT_SYSCALL=y # CONFIG_DEBUG_RSEQ is not set -# CONFIG_EMBEDDED is not set CONFIG_HAVE_PERF_EVENTS=y # CONFIG_PC104 is not set @@ -189,10 +189,19 @@ CONFIG_HAVE_PERF_EVENTS=y # end of Kernel Performance Events And Counters # CONFIG_PROFILING is not set + +# +# Kexec and crash features +# +# CONFIG_KEXEC is not set +# CONFIG_KEXEC_FILE is not set +# CONFIG_CRASH_DUMP is not set +# end of Kexec and crash features # end of General setup CONFIG_64BIT=y CONFIG_RISCV=y +CONFIG_GCC_SUPPORTS_DYNAMIC_FTRACE=y CONFIG_ARCH_MMAP_RND_BITS_MIN=18 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 CONFIG_ARCH_MMAP_RND_BITS_MAX=24 @@ -213,7 +222,9 @@ CONFIG_GENERIC_HWEIGHT=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_PGTABLE_LEVELS=5 CONFIG_LOCKDEP_SUPPORT=y +CONFIG_RISCV_DMA_NONCOHERENT=y CONFIG_AS_HAS_INSN=y +CONFIG_AS_HAS_OPTION_ARCH=y # # SoC selection @@ -223,12 +234,14 @@ CONFIG_AS_HAS_INSN=y # CONFIG_SOC_SIFIVE is not set # CONFIG_SOC_STARFIVE is not set # CONFIG_ARCH_SUNXI is not set +# CONFIG_ARCH_THEAD is not set # CONFIG_SOC_VIRT is not set # end of SoC selection # # CPU errata selection # +# CONFIG_ERRATA_ANDES is not set # CONFIG_ERRATA_SIFIVE is not set # CONFIG_ERRATA_THEAD is not set # end of CPU errata selection @@ -245,11 +258,18 @@ CONFIG_MODULE_SECTIONS=y CONFIG_TUNE_GENERIC=y CONFIG_RISCV_ALTERNATIVE=y CONFIG_RISCV_ISA_C=y +CONFIG_RISCV_ISA_SVNAPOT=y CONFIG_RISCV_ISA_SVPBMT=y -# CONFIG_RISCV_ISA_ZICBOM is not set +CONFIG_TOOLCHAIN_HAS_V=y +CONFIG_RISCV_ISA_V=y +CONFIG_RISCV_ISA_V_DEFAULT_ENABLE=y +CONFIG_RISCV_ISA_ZICBOM=y +CONFIG_RISCV_ISA_ZICBOZ=y CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE=y CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI=y CONFIG_FPU=y +CONFIG_IRQ_STACKS=y +CONFIG_THREAD_SIZE_ORDER=2 # end of Platform type # @@ -261,10 +281,12 @@ CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set CONFIG_HZ=250 # CONFIG_RISCV_SBI_V01 is not set -# CONFIG_KEXEC is not set -# CONFIG_KEXEC_FILE is not set -# CONFIG_CRASH_DUMP is not set +CONFIG_ARCH_SUPPORTS_KEXEC=y +CONFIG_ARCH_SUPPORTS_KEXEC_FILE=y +CONFIG_ARCH_SUPPORTS_CRASH_DUMP=y CONFIG_COMPAT=y +# CONFIG_RELOCATABLE is not set +# CONFIG_RANDOMIZE_BASE is not set # end of Kernel features # @@ -275,6 +297,7 @@ CONFIG_EFI_STUB=y CONFIG_EFI=y CONFIG_CC_HAVE_STACKPROTECTOR_TLS=y CONFIG_STACKPROTECTOR_PER_TASK=y +CONFIG_RISCV_ISA_FALLBACK=y # end of Boot options CONFIG_PORTABLE=y @@ -282,7 +305,18 @@ CONFIG_PORTABLE=y # # Power management options # -# CONFIG_PM is not set +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +# CONFIG_SUSPEND_SKIP_SYNC is not set +CONFIG_PM_SLEEP=y +# CONFIG_PM_AUTOSLEEP is not set +# CONFIG_PM_USERSPACE_AUTOSLEEP is not set +# CONFIG_PM_WAKELOCKS is not set +CONFIG_PM=y +# CONFIG_PM_DEBUG is not set +CONFIG_PM_CLK=y +# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y # end of Power management options # @@ -303,10 +337,13 @@ CONFIG_PORTABLE=y # end of CPU Power Management # CONFIG_VIRTUALIZATION is not set +CONFIG_ARCH_SUPPORTS_ACPI=y +# CONFIG_ACPI is not set # # General architecture-dependent options # +CONFIG_GENERIC_ENTRY=y # CONFIG_KPROBES is not set CONFIG_JUMP_LABEL=y # CONFIG_STATIC_KEYS_SELFTEST is not set @@ -332,6 +369,7 @@ CONFIG_HAVE_PERF_REGS=y CONFIG_HAVE_PERF_USER_STACK_DUMP=y CONFIG_HAVE_ARCH_JUMP_LABEL=y CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y +CONFIG_MMU_LAZY_TLB_REFCOUNT=y CONFIG_HAVE_ARCH_SECCOMP=y CONFIG_HAVE_ARCH_SECCOMP_FILTER=y CONFIG_SECCOMP=y @@ -339,6 +377,7 @@ CONFIG_HAVE_STACKPROTECTOR=y CONFIG_STACKPROTECTOR=y CONFIG_STACKPROTECTOR_STRONG=y CONFIG_LTO_NONE=y +CONFIG_ARCH_SUPPORTS_CFI_CLANG=y CONFIG_HAVE_CONTEXT_TRACKING_USER=y CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y @@ -350,6 +389,9 @@ CONFIG_HAVE_ARCH_HUGE_VMALLOC=y CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y +CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y +CONFIG_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_HAVE_ARCH_MMAP_RND_BITS=y CONFIG_ARCH_MMAP_RND_BITS=18 @@ -370,9 +412,13 @@ CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y CONFIG_STRICT_MODULE_RWX=y CONFIG_ARCH_USE_MEMREMAP_PROT=y # CONFIG_LOCK_EVENT_COUNTS is not set +CONFIG_ARCH_HAS_VDSO_DATA=y +CONFIG_HAVE_PREEMPT_DYNAMIC=y +CONFIG_HAVE_PREEMPT_DYNAMIC_KEY=y CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_ARCH_SUPPORTS_PAGE_TABLE_CHECK=y +CONFIG_DYNAMIC_SIGFRAME=y # # GCOV-based kernel profiling @@ -389,6 +435,7 @@ CONFIG_FUNCTION_ALIGNMENT=0 CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 CONFIG_MODULES=y +# CONFIG_MODULE_DEBUG is not set # CONFIG_MODULE_FORCE_LOAD is not set # CONFIG_MODULE_UNLOAD is not set # CONFIG_MODVERSIONS is not set @@ -409,7 +456,6 @@ CONFIG_BLK_ICQ=y # CONFIG_BLK_DEV_ZONED is not set # CONFIG_BLK_WBT is not set CONFIG_BLK_DEBUG_FS=y -# CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # @@ -421,6 +467,7 @@ CONFIG_EFI_PARTITION=y # end of Partition Types CONFIG_BLK_MQ_VIRTIO=y +CONFIG_BLK_PM=y # # IO Schedulers @@ -435,6 +482,8 @@ CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y CONFIG_ARCH_USE_QUEUED_RWLOCKS=y CONFIG_ARCH_HAS_MMIOWB=y CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y +CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y +CONFIG_FREEZER=y # # Executable file formats @@ -459,14 +508,14 @@ CONFIG_SWAP=y # # SLAB allocator options # -# CONFIG_SLAB is not set +# CONFIG_SLAB_DEPRECATED is not set CONFIG_SLUB=y -# CONFIG_SLOB_DEPRECATED is not set # CONFIG_SLUB_TINY is not set CONFIG_SLAB_MERGE_DEFAULT=y # CONFIG_SLAB_FREELIST_RANDOM is not set # CONFIG_SLAB_FREELIST_HARDENED is not set # CONFIG_SLUB_STATS is not set +# CONFIG_RANDOM_KMALLOC_CACHES is not set # end of SLAB allocator options # CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set @@ -476,6 +525,7 @@ CONFIG_FLATMEM_MANUAL=y # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y CONFIG_COMPACTION=y @@ -485,7 +535,6 @@ CONFIG_MIGRATION=y CONFIG_PHYS_ADDR_T_64BIT=y # CONFIG_KSM is not set CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 -CONFIG_ARCH_WANT_GENERAL_HUGETLB=y CONFIG_ARCH_WANTS_THP_SWAP=y # CONFIG_TRANSPARENT_HUGEPAGE is not set CONFIG_NEED_PER_CPU_KM=y @@ -497,11 +546,14 @@ CONFIG_ZONE_DMA32=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_PERCPU_STATS is not set # CONFIG_GUP_TEST is not set +# CONFIG_DMAPOOL_TEST is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y +CONFIG_MEMFD_CREATE=y CONFIG_SECRETMEM=y # CONFIG_ANON_VMA_NAME is not set # CONFIG_USERFAULTFD is not set # CONFIG_LRU_GEN is not set +CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y CONFIG_LOCK_MM_AND_FIND_VMA=y # @@ -537,6 +589,7 @@ CONFIG_FW_LOADER=y CONFIG_EXTRA_FIRMWARE="" # CONFIG_FW_LOADER_USER_HELPER is not set # CONFIG_FW_LOADER_COMPRESS is not set +CONFIG_FW_CACHE=y # CONFIG_FW_UPLOAD is not set # end of Firmware loader @@ -548,6 +601,7 @@ CONFIG_ALLOW_DEV_COREDUMP=y CONFIG_REGMAP=y CONFIG_REGMAP_MMIO=y CONFIG_GENERIC_ARCH_TOPOLOGY=y +# CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT is not set # end of Generic Driver Options # @@ -557,6 +611,12 @@ CONFIG_GENERIC_ARCH_TOPOLOGY=y # CONFIG_MHI_BUS_EP is not set # end of Bus devices +# +# Cache Drivers +# +# CONFIG_AX45MP_L2_CACHE is not set +# end of Cache Drivers + # # Firmware Drivers # @@ -606,7 +666,6 @@ CONFIG_OF_ADDRESS=y CONFIG_OF_IRQ=y CONFIG_OF_RESERVED_MEM=y # CONFIG_OF_OVERLAY is not set -CONFIG_OF_DMA_DEFAULT_COHERENT=y # CONFIG_PARPORT is not set CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set @@ -708,6 +767,7 @@ CONFIG_TTY=y CONFIG_VT=y CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y +CONFIG_VT_CONSOLE_SLEEP=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_UNIX98_PTYS=y @@ -767,6 +827,7 @@ CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_CCTRNG is not set # CONFIG_HW_RANDOM_XIPHERA is not set CONFIG_DEVMEM=y +CONFIG_DEVPORT=y # CONFIG_TCG_TPM is not set # CONFIG_XILLYBUS is not set # end of Character devices @@ -876,14 +937,10 @@ CONFIG_BCMA_POSSIBLE=y # # Graphics support # +# CONFIG_AUXDISPLAY is not set # CONFIG_DRM is not set # CONFIG_DRM_DEBUG_MODESET_LOCK is not set -# -# ARM devices -# -# end of ARM devices - # # Frame buffer Devices # @@ -944,7 +1001,6 @@ CONFIG_EDAC_SUPPORT=y # CONFIG_DMABUF_HEAPS is not set # end of DMABUF options -# CONFIG_AUXDISPLAY is not set # CONFIG_UIO is not set # CONFIG_VFIO is not set # CONFIG_VIRT_DRIVERS is not set @@ -1121,6 +1177,7 @@ CONFIG_SIFIVE_PLIC=y # # CONFIG_VALIDATE_FS_PARSER is not set CONFIG_FS_IOMAP=y +CONFIG_BUFFER_HEAD=y # CONFIG_EXT2_FS is not set # CONFIG_EXT3_FS is not set CONFIG_EXT4_FS=y @@ -1149,7 +1206,6 @@ CONFIG_DNOTIFY=y CONFIG_INOTIFY_USER=y # CONFIG_FANOTIFY is not set # CONFIG_QUOTA is not set -# CONFIG_AUTOFS4_FS is not set CONFIG_AUTOFS_FS=y # CONFIG_FUSE_FS is not set # CONFIG_OVERLAY_FS is not set @@ -1191,10 +1247,9 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set +# CONFIG_TMPFS_QUOTA is not set CONFIG_ARCH_SUPPORTS_HUGETLBFS=y # CONFIG_HUGETLBFS is not set -CONFIG_ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y -CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y # CONFIG_CONFIGFS_FS is not set CONFIG_EFIVAR_FS=y @@ -1213,7 +1268,6 @@ CONFIG_IO_WQ=y # CONFIG_SECURITY_DMESG_RESTRICT is not set # CONFIG_SECURITY is not set # CONFIG_SECURITYFS is not set -CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y # CONFIG_HARDENED_USERCOPY is not set # CONFIG_FORTIFY_SOURCE is not set # CONFIG_STATIC_USERMODEHELPER is not set @@ -1239,6 +1293,13 @@ CONFIG_CC_HAS_ZERO_CALL_USED_REGS=y # CONFIG_ZERO_CALL_USED_REGS is not set # end of Memory initialization +# +# Hardening of kernel data structures +# +CONFIG_LIST_HARDENED=y +# CONFIG_BUG_ON_DATA_CORRUPTION is not set +# end of Hardening of kernel data structures + CONFIG_RANDSTRUCT_NONE=y # end of Kernel hardening options # end of Security options @@ -1426,14 +1487,26 @@ CONFIG_CRC32_SLICEBY8=y CONFIG_ZLIB_INFLATE=y # CONFIG_XZ_DEC is not set CONFIG_DECOMPRESS_GZIP=y +CONFIG_GENERIC_ALLOCATOR=y CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y CONFIG_HAS_IOPORT_MAP=y CONFIG_HAS_DMA=y CONFIG_NEED_DMA_MAP_STATE=y CONFIG_ARCH_DMA_ADDR_T_64BIT=y CONFIG_DMA_DECLARE_COHERENT=y +CONFIG_ARCH_HAS_SETUP_DMA_OPS=y +CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE=y +CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU=y +CONFIG_ARCH_HAS_DMA_PREP_COHERENT=y +CONFIG_ARCH_DMA_DEFAULT_COHERENT=y CONFIG_SWIOTLB=y +# CONFIG_SWIOTLB_DYNAMIC is not set +CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC=y # CONFIG_DMA_RESTRICTED_POOL is not set +CONFIG_DMA_NONCOHERENT_MMAP=y +CONFIG_DMA_COHERENT_POOL=y +CONFIG_DMA_DIRECT_REMAP=y # CONFIG_DMA_API_DEBUG is not set # CONFIG_DMA_MAP_BENCHMARK is not set # CONFIG_IRQ_POLL is not set @@ -1489,6 +1562,7 @@ CONFIG_READABLE_ASM=y # CONFIG_HEADERS_INSTALL is not set # CONFIG_DEBUG_SECTION_MISMATCH is not set CONFIG_SECTION_MISMATCH_WARN_ONLY=y +# CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B is not set CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y # CONFIG_VMLINUX_MAP is not set @@ -1570,6 +1644,7 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set CONFIG_WQ_WATCHDOG=y +# CONFIG_WQ_CPU_INTENSIVE_REPORT is not set # CONFIG_TEST_LOCKUP is not set # end of Debug Oops, Lockups and Hangs @@ -1614,7 +1689,6 @@ CONFIG_DEBUG_LIST=y CONFIG_DEBUG_PLIST=y CONFIG_DEBUG_SG=y # CONFIG_DEBUG_NOTIFIERS is not set -# CONFIG_BUG_ON_DATA_CORRUPTION is not set # CONFIG_DEBUG_MAPLE_TREE is not set # end of Debug kernel data structures @@ -1635,6 +1709,7 @@ CONFIG_RCU_EQS_DEBUG=y CONFIG_HAVE_RETHOOK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_RETVAL=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y diff --git a/linux/buildroot-config-src/buildroot-2023.05.1/main.config b/linux/buildroot-config-src/buildroot-2023.05.1/main.config index 99ffa9cef..6730d0801 100644 --- a/linux/buildroot-config-src/buildroot-2023.05.1/main.config +++ b/linux/buildroot-config-src/buildroot-2023.05.1/main.config @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Buildroot 2023.05.3 Configuration +# Buildroot 2023.05.3-dirty Configuration # BR2_HAVE_DOT_CONFIG=y BR2_HOST_GCC_AT_LEAST_4_9=y @@ -941,6 +941,7 @@ BR2_PACKAGE_NETSURF_ARCH_SUPPORTS=y BR2_PACKAGE_FLASHROM_ARCH_SUPPORTS=y # BR2_PACKAGE_FLASHROM is not set # BR2_PACKAGE_FMTOOLS is not set +BR2_PACKAGE_FPGA_AXI_SDC=y # BR2_PACKAGE_FREEIPMI is not set # BR2_PACKAGE_FXLOAD is not set # BR2_PACKAGE_GPM is not set From 540d8d930d96934b5fc0b76b99b9237c6a5eb2a2 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 14:04:43 -0600 Subject: [PATCH 35/48] Cleanup. Linux makefile wally tracer. probably reduce some complexity here. --- .gitignore | 3 +++ linux/Makefile | 32 -------------------------------- testbench/common/wallyTracer.sv | 5 +++-- tests/custom/boot/Makefile | 2 +- 4 files changed, 7 insertions(+), 35 deletions(-) diff --git a/.gitignore b/.gitignore index 2b767197b..bd7e800df 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,6 @@ tests/fp/combined_IF_vectors/IF_vectors/*.tv /sim/obj_dir/Vtestbench_tlbram__Pz1_T20__DepSet_hab70f5b0__0.cpp /sim/obj_dir/Vtestbench_tlbram__Pz1_T20__DepSet_hab70f5b0__0__Slow.cpp /sim/obj_dir/Vtestbench_tlbram__Pz1_T20__Slow.cpp +sim/bp-results/*.log +sim/branch*.log +/tests/custom/fpga-test-sdc/bin/fpga-test-sdc diff --git a/linux/Makefile b/linux/Makefile index 433bf0e73..3d880bc08 100644 --- a/linux/Makefile +++ b/linux/Makefile @@ -27,15 +27,6 @@ BINARIES := fw_jump.elf vmlinux busybox OBJDUMPS := $(foreach name, $(BINARIES), $(basename $(name) .elf)) OBJDUMPS := $(foreach name, $(OBJDUMPS), $(DIS)/$(name).objdump) -# LINUXDIR := $(shell ls $(BUILDROOT)/output/build | grep -e '^linux-[0-9]\+\.[0-9]\+\.[0-9]\+$$' ) -# LINUXDIR := $(BUILDROOT)/output/build/$(LINUXDIR) -# BUSYBOXDIR := $(shell ls $(BUILDROOT)/output/build | grep -e '^linux-[0-9]\+\.[0-9]\+\.[0-9]\+$$' ) -# BUSYBOXDIR := $(BUILDROOT)/output/build/$(BUSYBOXDIR) - -# Gets Linux and Busybox output folders for objedect dumps -# LINUXDIR ?= $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") -# BUSYBOXDIR ?= $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/busybox-[0-9]+\.[0-9]+\.[0-9]+$$") - define linuxDir = $(shell find $(BUILDROOT)/output/build -maxdepth 2 -type d -regex ".*/linux-[0-9]+\.[0-9]+\.[0-9]+$$") endef @@ -46,10 +37,6 @@ endef .PHONY: all generate disassemble install clean cleanDTB cleanDriver test -# Generate all device trees ------------------------------------------- -# TODO: Add configuration for only generating device tree for specified -# supported FPGA. - all: $(MAKE) install make -C $(BUILDROOT) --jobs @@ -99,18 +86,6 @@ $(IMAGES)/busybox: $(call busyboxDir)/busybox install: $(BUILDROOT)/package/fpga-axi-sdc $(WALLYBOARD) cp $(WALLYBOARD)/main.config $(BUILDROOT)/.config -# CONFIG DEPENDENCIES 2021.05 ----------------------------------------- -# $(WALLYBOARD)/main.config: $(WALLYBOARD) $(BRPACKAGES)/wally.config -# cp $(BRPACKAGES)/wally.config $@ - -# $(WALLYBOARD)/linux.config: $(BRPACKAGES)/linux.config $(WALLYBOARD) -# cp $(BRPACKAGES)/linux.config $@ - -# $(WALLYBOARD): $(BUILDROOT) -# cp -r $(WALLYBOARDSRC) $(BUILDROOT)/board -# cp $(BRPACKAGES)/wally.config $(WALLYBOARD)/main.config -# cp $(BRPACKAGES)/linux.config $(WALLYBOARD)/linux.config - # CONFIG DEPENDENCIES 2023.05.1 --------------------------------------- $(WALLYBOARD): $(BUILDROOT) cp -r $(WALLYBOARDSRC) $(BUILDROOT)/board @@ -129,13 +104,6 @@ $(PATCHFILE): $(BUILDROOT): git clone https://github.com/buildroot/buildroot.git $@ cd $@; git checkout 2023.05.x - #cd $@; git checkout 2021.05 - -#$(DRIVER): -# @ if [ -d "$(WALLY)/addins/vivado-risc-v" ] ; then git submodule update --init $(WALLY)/addins/vivado-risc-v; fi -# cp ../addins/vivado-risc-v/patches/fpga-axi-sdc.c $@ - # For 2021.05 - #sed -i "s|card_hw_reset|hw_reset|1" $@ # --------------------------------------------------------------------- diff --git a/testbench/common/wallyTracer.sv b/testbench/common/wallyTracer.sv index d79c7c6cb..0fb8c4b77 100644 --- a/testbench/common/wallyTracer.sv +++ b/testbench/common/wallyTracer.sv @@ -267,6 +267,7 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); flopenrc #(1) TrapWReg (clk, reset, 1'b0, ~StallW, TrapM, TrapW); flopenrc #(1) HaltWReg (clk, reset, 1'b0, ~StallW, HaltM, HaltW); + // **** remove? are these used? flopenrc #(1) IntrFReg (clk, reset, 1'b0, ~StallF, TrapM, IntrF); flopenrc #(1) IntrDReg (clk, reset, FlushD, ~StallD, IntrF, IntrD); flopenrc #(1) IntrEReg (clk, reset, FlushE, ~StallE, IntrD, IntrE); @@ -285,9 +286,9 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); assign rvvi.order[0][0] = CSRArray[12'hB02]; // TODO: IMPERAS Should be event order assign rvvi.insn[0][0] = InstrRawW; assign rvvi.pc_rdata[0][0] = PCW; - assign rvvi.trap[0][0] = 0; // TODO: IMPERAS TrapW; + assign rvvi.trap[0][0] = 0; assign rvvi.halt[0][0] = HaltW; - assign rvvi.intr[0][0] = IntrW; + assign rvvi.intr[0][0] = 0; assign rvvi.mode[0][0] = PrivilegeModeW; assign rvvi.ixl[0][0] = PrivilegeModeW == 2'b11 ? 2'b10 : PrivilegeModeW == 2'b01 ? STATUS_SXL : STATUS_UXL; diff --git a/tests/custom/boot/Makefile b/tests/custom/boot/Makefile index 1c3b8da6d..6fe9d2256 100644 --- a/tests/custom/boot/Makefile +++ b/tests/custom/boot/Makefile @@ -30,7 +30,7 @@ LINKER :=$(ROOT)/linker1000.x AFLAGS =$(MARCH) $(MABI) -W # Override directive allows us to prepend other options on the command line # e.g. $ make CFLAGS=-g -override CFLAGS +=$(MARCH) $(MABI) -mcmodel=medany -O2 +override CFLAGS +=$(MARCH) $(MABI) -mcmodel=medany -O2 -g AS=riscv64-unknown-elf-as CC=riscv64-unknown-elf-gcc AR=riscv64-unknown-elf-ar From da59cb71a93b664cd27fc6fa3bd621000812455b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 14:12:27 -0600 Subject: [PATCH 36/48] Commented out the arch64priv misaligned load/store tests since we added Zicclsm to the rv64gc config. --- testbench/testbench.sv | 8 ++++---- testbench/tests.vh | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 20f1412c9..186144839 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -36,10 +36,10 @@ module testbench; /* verilator lint_off WIDTHEXPAND */ parameter DEBUG=0; parameter TEST="none"; - parameter PrintHPMCounters=1; - parameter BPRED_LOGGER=1; - parameter I_CACHE_ADDR_LOGGER=1; - parameter D_CACHE_ADDR_LOGGER=1; + parameter PrintHPMCounters=0; + parameter BPRED_LOGGER=0; + parameter I_CACHE_ADDR_LOGGER=0; + parameter D_CACHE_ADDR_LOGGER=0; `include "parameter-defs.vh" diff --git a/testbench/tests.vh b/testbench/tests.vh index 8ebc98730..46c553cc3 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -884,7 +884,9 @@ string imperas32f[] = '{ "rv64i_m/privilege/src/misalign-blt-01.S", "rv64i_m/privilege/src/misalign-bltu-01.S", "rv64i_m/privilege/src/misalign-bne-01.S", - "rv64i_m/privilege/src/misalign-jal-01.S", + "rv64i_m/privilege/src/misalign-jal-01.S" + // removed because rv64gc supports Zicclsm +/* -----\/----- EXCLUDED -----\/----- "rv64i_m/privilege/src/misalign-ld-01.S", "rv64i_m/privilege/src/misalign-lh-01.S", "rv64i_m/privilege/src/misalign-lhu-01.S", @@ -893,6 +895,7 @@ string imperas32f[] = '{ "rv64i_m/privilege/src/misalign-sd-01.S", "rv64i_m/privilege/src/misalign-sh-01.S", "rv64i_m/privilege/src/misalign-sw-01.S" + -----/\----- EXCLUDED -----/\----- */ }; string arch64zi[] = '{ From 707b0c557cd15f86c4262ce7844cfb04ac1fc7fe Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 14:28:22 -0600 Subject: [PATCH 37/48] Cleanup and optimization of Zicclsm. --- src/lsu/align.sv | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index a54474b07..9ca191c14 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -58,6 +58,8 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic SpillStallM); + localparam LLENINBYTES = P.LLEN/8; + localparam OFFSET_BIT_POS = $clog2(P.DCACHE_LINELENINBITS/8); // Spill threshold occurs when all the cache offset PC bits are 1 (except [0]). Without a cache this is just PCF[1] typedef enum logic [1:0] {STATE_READY, STATE_SPILL, STATE_STORE_DELAY} statetype; @@ -71,12 +73,16 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; - localparam LLENINBYTES = P.LLEN/8; logic [P.XLEN-1:0] IEUAdrIncrementM; logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; logic SaveByteMask; + logic HalfMisalignedM, WordMisalignedM; + logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; + logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; + logic HalfSpillM, WordSpillM; + logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -92,11 +98,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // 1) operation size // 2) offset // 3) access location within the cacheline - localparam OFFSET_BIT_POS = $clog2(P.DCACHE_LINELENINBITS/8); - logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; - logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; - logic HalfSpillM, WordSpillM; - logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; @@ -109,17 +110,26 @@ module align import cvw::*; #(parameter cvw_t P) ( default: AccessByteOffsetM = ByteOffsetM; endcase end - - assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & (ByteOffsetM[0] != '0) & Funct3M[1:0] == 2'b01; - assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; + + // compute misalignement + assign HalfMisalignedM = (ByteOffsetM[0] != '0) & Funct3M[1:0] == 2'b01; + assign WordMisalignedM = (ByteOffsetM[1:0] != '0) & Funct3M[1:0] == 2'b10; + assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & HalfMisalignedM; + assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & WordMisalignedM; + if(P.LLEN == 64) begin logic DoubleSpillM; - assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:3] == '1) & (ByteOffsetM[2:0] != '0) & Funct3M[1:0] == 2'b11; + logic DoubleMisalignedM; + assign DoubleMisalignedM = (ByteOffsetM[2:0] != '0) & Funct3M[1:0] == 2'b11; + assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:3] == '1) & DoubleMisalignedM; + assign MisalignedM = HalfMisalignedM | WordMisalignedM | DoubleMisalignedM; assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM); + assign MisalignedM = HalfMisalignedM | WordMisalignedM; end + // align by shifting // Don't take the spill if there is a stall, TLB miss, or hardware update to the D/A bits assign TakeSpillM = SpillM & ~CacheBusHPWTStall & ~(DTLBMissM | (P.SVADU_SUPPORTED & DataUpdateDAM)); @@ -151,24 +161,12 @@ module align import cvw::*; #(parameter cvw_t P) ( // Merge spilled data //////////////////////////////////////////////////////////////////////////////////////////////////// - // save the first 2 bytes + // save the first native word flopenr #(P.LLEN) SpillDataReg(clk, reset, SpillSaveM, DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM); // merge together - mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SpillM, ReadDataWordSpillAllM); + mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SelSpillM, ReadDataWordSpillAllM); - // align by shifting - // *** optimize by merging with halfSpill, WordSpill, etc - logic HalfMisalignedM, WordMisalignedM; - assign HalfMisalignedM = Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; - assign WordMisalignedM = Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; - if(P.LLEN == 64) begin - logic DoubleMisalignedM; - assign DoubleMisalignedM = Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; - assign MisalignedM = HalfMisalignedM | WordMisalignedM | DoubleMisalignedM; - end else begin - assign MisalignedM = HalfMisalignedM | WordMisalignedM; - end // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits From a6995af91cf262f10bc44998c65e5225a7db323b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 16:15:23 -0600 Subject: [PATCH 38/48] Fixed bug in uncore updates which broke SDC. --- src/lsu/align.sv | 13 +++++-------- src/uncore/uncore.sv | 4 ++-- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 9ca191c14..573b7e50f 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -175,18 +175,15 @@ module align import cvw::*; #(parameter cvw_t P) ( // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs - logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * AccessByteOffsetM : '0); - assign LSUWriteDataShiftedM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; - assign LSUWriteDataSpillM = LSUWriteDataShiftedM; - //mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN], LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); + assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; - logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; - assign ByteMaskShiftedM = ByteMaskMuxM; - mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskM}, - {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, {SelSpillM, SelSpillE}, ByteMaskSpillM); + mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskMuxM, // no spill + {{{P.LLEN/8}{1'b0}}, ByteMaskM}, // spill, first half + {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, // spill, second half + {SelSpillM, SelSpillE}, ByteMaskSpillM); flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SaveByteMask, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); diff --git a/src/uncore/uncore.sv b/src/uncore/uncore.sv index 916dc53ef..d55e2b899 100644 --- a/src/uncore/uncore.sv +++ b/src/uncore/uncore.sv @@ -63,7 +63,7 @@ module uncore import cvw::*; #(parameter cvw_t P)( logic [P.XLEN-1:0] HREADRam, HREADSDC; logic [11:0] HSELRegions; - logic HSELDTIM, HSELIROM, HSELRam, HSELCLINT, HSELPLIC, HSELGPIO, HSELUART, HSELSDC, HSELSPI; + logic HSELDTIM, HSELIROM, HSELRam, HSELCLINT, HSELPLIC, HSELGPIO, HSELUART, HSELSPI; logic HSELDTIMD, HSELIROMD, HSELEXTD, HSELRamD, HSELCLINTD, HSELPLICD, HSELGPIOD, HSELUARTD, HSELSDCD, HSELSPID; logic HRESPRam, HRESPSDC; logic HREADYRam, HRESPSDCD; @@ -91,7 +91,7 @@ module uncore import cvw::*; #(parameter cvw_t P)( adrdecs #(P) adrdecs(HADDR, 1'b1, 1'b1, 1'b1, HSIZE[1:0], HSELRegions); // unswizzle HSEL signals - assign {HSELDTIM, HSELIROM, HSELEXT, HSELBootRom, HSELRam, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELSDC, HSELSPI} = HSELRegions[11:1]; + assign {HSELDTIM, HSELIROM, HSELEXT, HSELBootRom, HSELRam, HSELCLINT, HSELGPIO, HSELUART, HSELPLIC, HSELEXTSDC, HSELSPI} = HSELRegions[11:1]; // AHB -> APB bridge ahbapbbridge #(P, 5) ahbapbbridge ( From 6b7ff50a849919e776c51b7ec18c9c43908de6a3 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 16:44:02 -0600 Subject: [PATCH 39/48] Reduced Arty A7 clock speed to 20Mhz to support Zicclsm. --- fpga/generator/xlnx_mmcm.tcl | 2 +- linux/devicetree/wally-artya7.dts | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fpga/generator/xlnx_mmcm.tcl b/fpga/generator/xlnx_mmcm.tcl index a8a2fe568..2f003e7a5 100644 --- a/fpga/generator/xlnx_mmcm.tcl +++ b/fpga/generator/xlnx_mmcm.tcl @@ -15,7 +15,7 @@ set_property -dict [list CONFIG.PRIM_IN_FREQ {100.000} \ CONFIG.CLKOUT4_USED {false} \ CONFIG.CLKOUT1_REQUESTED_OUT_FREQ {166.66667} \ CONFIG.CLKOUT2_REQUESTED_OUT_FREQ {200} \ - CONFIG.CLKOUT3_REQUESTED_OUT_FREQ {23} \ + CONFIG.CLKOUT3_REQUESTED_OUT_FREQ {20} \ CONFIG.CLKIN1_JITTER_PS {10.0} \ ] [get_ips $ipName] diff --git a/linux/devicetree/wally-artya7.dts b/linux/devicetree/wally-artya7.dts index 4206c7804..6dab66c7b 100644 --- a/linux/devicetree/wally-artya7.dts +++ b/linux/devicetree/wally-artya7.dts @@ -21,8 +21,8 @@ cpus { #address-cells = <0x01>; #size-cells = <0x00>; - clock-frequency = <0x15EF3C0>; - timebase-frequency = <0x15EF3C0>; + clock-frequency = <0x1312D00>; + timebase-frequency = <0x1312D00>; cpu@0 { phandle = <0x01>; @@ -51,7 +51,7 @@ uart@10000000 { interrupts = <0x0a>; interrupt-parent = <0x03>; - clock-frequency = <0x15EF3C0>; + clock-frequency = <0x1312D00>; reg = <0x00 0x10000000 0x00 0x100>; compatible = "ns16550a"; }; @@ -74,8 +74,8 @@ fifo-depth = <256>; bus-width = <4>; interrupt-parent = <0x03>; - clock = <0x15EF3C0>; - max-frequency = <0x15EF3C0>; + clock = <0x1312D00>; + max-frequency = <0x1312D00>; cap-sd-highspeed; cap-mmc-highspeed; no-sdio; From 95fc5f4a1c5fa127b8622272842df7d1d9a8aefd Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 17:20:26 -0600 Subject: [PATCH 40/48] Towards removing the FPGA config file. --- config/buildroot/config.vh | 3 +- config/fpga/config.vh | 4 +- config/rv32e/config.vh | 4 +- config/rv32gc/config.vh | 4 +- config/rv32i/config.vh | 4 +- config/rv32imc/config.vh | 4 +- config/rv64fpquad/config.vh | 4 +- config/rv64gc/config.vh | 4 +- config/rv64i/config.vh | 4 +- config/shared/parameter-defs.vh | 3 +- sim/wave.do | 573 +++++++++++++++++--------------- src/cvw.sv | 3 +- src/uncore/ram_ahb.sv | 4 +- src/uncore/rom_ahb.sv | 4 +- src/uncore/uncore.sv | 4 +- testbench/sdc/sd_top_tb.sv | 132 -------- 16 files changed, 327 insertions(+), 431 deletions(-) delete mode 100644 testbench/sdc/sd_top_tb.sv diff --git a/config/buildroot/config.vh b/config/buildroot/config.vh index 7d68affc0..0015e2bba 100644 --- a/config/buildroot/config.vh +++ b/config/buildroot/config.vh @@ -28,7 +28,6 @@ // include shared configuration `include "BranchPredictorType.vh" -localparam FPGA = 1; // RV32 or RV64: XLEN = 32 or 64 localparam XLEN = 32'd64; @@ -98,9 +97,11 @@ localparam logic [63:0] IROM_RANGE = 64'h00001FFF; localparam BOOTROM_SUPPORTED = 1'b1; localparam logic [63:0] BOOTROM_BASE = 64'h00001000 ; localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; +localparam BOOTROM_PRELOAD = 1'b1; localparam UNCORE_RAM_SUPPORTED = 1'b1; localparam logic [63:0] UNCORE_RAM_BASE = 64'h80000000; localparam logic [63:0] UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam UNCORE_RAM_PRELOAD = 1'b1; localparam EXT_MEM_SUPPORTED = 1'b0; localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; localparam logic [63:0] EXT_MEM_RANGE = 64'h07FFFFFF; diff --git a/config/fpga/config.vh b/config/fpga/config.vh index 9ed009439..8f52f597c 100644 --- a/config/fpga/config.vh +++ b/config/fpga/config.vh @@ -28,8 +28,6 @@ // include shared configuration `include "BranchPredictorType.vh" -localparam FPGA = 1; - // RV32 or RV64: XLEN = 32 or 64 localparam XLEN = 32'd64; @@ -107,10 +105,12 @@ localparam logic [63:0] IROM_RANGE = 64'h00001FFF; localparam BOOTROM_SUPPORTED = 1'b1; localparam logic [63:0] BOOTROM_BASE = 64'h00001000; localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; +localparam BOOTROM_PRELOAD = 1'b1; localparam UNCORE_RAM_SUPPORTED = 1'b1; localparam logic [63:0] UNCORE_RAM_BASE = 64'h00002000; localparam logic [63:0] UNCORE_RAM_RANGE = 64'h00000FFF; +localparam UNCORE_RAM_PRELOAD = 1'b1; localparam EXT_MEM_SUPPORTED = 1'b1; localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; diff --git a/config/rv32e/config.vh b/config/rv32e/config.vh index 98d44eb18..e532aa9a8 100644 --- a/config/rv32e/config.vh +++ b/config/rv32e/config.vh @@ -27,8 +27,6 @@ `include "BranchPredictorType.vh" -localparam FPGA = 0; - // RV32 or RV64: XLEN = 32 or 64 localparam XLEN = 32'd32; @@ -97,11 +95,13 @@ localparam IROM_SUPPORTED = 1'b0; localparam logic [63:0] IROM_BASE = 64'h80000000; localparam logic [63:0] IROM_RANGE = 64'h007FFFFF; localparam BOOTROM_SUPPORTED = 1'b1; +localparam BOOTROM_PRELOAD = 1'b0; localparam logic [63:0] BOOTROM_BASE = 64'h00001000; localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; localparam UNCORE_RAM_SUPPORTED = 1'b1; localparam logic [63:0] UNCORE_RAM_BASE = 64'h80000000; localparam logic [63:0] UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam UNCORE_RAM_PRELOAD = 1'b0; localparam EXT_MEM_SUPPORTED = 1'b0; localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; localparam logic [63:0] EXT_MEM_RANGE = 64'h07FFFFFF; diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index a0aacb38f..69d3329f5 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -29,8 +29,6 @@ // `include "wally-shared.vh" `include "BranchPredictorType.vh" -localparam FPGA = 0; - // RV32 or RV64: XLEN = 32 or 64 localparam XLEN = 32'd32; @@ -100,9 +98,11 @@ localparam logic [63:0] IROM_RANGE = 64'h007FFFFF; localparam BOOTROM_SUPPORTED = 1'b1; localparam logic [63:0] BOOTROM_BASE = 64'h00001000; localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; +localparam BOOTROM_PRELOAD = 1'b0; localparam UNCORE_RAM_SUPPORTED = 1'b1; localparam logic [63:0] UNCORE_RAM_BASE = 64'h80000000; localparam logic [63:0] UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam UNCORE_RAM_PRELOAD = 1'b0; localparam EXT_MEM_SUPPORTED = 1'b0; localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; localparam logic [63:0] EXT_MEM_RANGE = 64'h07FFFFFF; diff --git a/config/rv32i/config.vh b/config/rv32i/config.vh index 9ae992e4a..860a7c783 100644 --- a/config/rv32i/config.vh +++ b/config/rv32i/config.vh @@ -27,8 +27,6 @@ `include "BranchPredictorType.vh" -localparam FPGA = 0; - // RV32 or RV64: XLEN = 32 or 64 localparam XLEN = 32'd32; @@ -99,9 +97,11 @@ localparam logic [63:0] IROM_RANGE = 64'h007FFFFF; localparam BOOTROM_SUPPORTED = 1'b0; localparam logic [63:0] BOOTROM_BASE = 64'h00001000; localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; +localparam BOOTROM_PRELOAD = 1'b0; localparam UNCORE_RAM_SUPPORTED = 1'b0; localparam logic [63:0] UNCORE_RAM_BASE = 64'h80000000; localparam logic [63:0] UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam UNCORE_RAM_PRELOAD = 1'b0; localparam EXT_MEM_SUPPORTED = 1'b0; localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; localparam logic [63:0] EXT_MEM_RANGE = 64'h07FFFFFF; diff --git a/config/rv32imc/config.vh b/config/rv32imc/config.vh index ec5bc0e15..b9e485099 100644 --- a/config/rv32imc/config.vh +++ b/config/rv32imc/config.vh @@ -27,8 +27,6 @@ `include "BranchPredictorType.vh" -localparam FPGA = 0; - // RV32 or RV64: XLEN = 32 or 64 localparam XLEN = 32'd32; @@ -98,9 +96,11 @@ localparam logic [63:0] IROM_RANGE = 64'h007FFFFF; localparam BOOTROM_SUPPORTED = 1'b0; localparam logic [63:0] BOOTROM_BASE = 64'h00001000; localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; +localparam BOOTROM_PRELOAD = 1'b0; localparam UNCORE_RAM_SUPPORTED = 1'b0; localparam logic [63:0] UNCORE_RAM_BASE = 64'h80000000; localparam logic [63:0] UNCORE_RAM_RANGE = 64'h07FFFFFF; +localparam UNCORE_RAM_PRELOAD = 1'b0; localparam EXT_MEM_SUPPORTED = 1'b0; localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; localparam logic [63:0] EXT_MEM_RANGE = 64'h07FFFFFF; diff --git a/config/rv64fpquad/config.vh b/config/rv64fpquad/config.vh index 0fffba91e..9d5843bca 100644 --- a/config/rv64fpquad/config.vh +++ b/config/rv64fpquad/config.vh @@ -27,8 +27,6 @@ `include "BranchPredictorType.vh" -localparam FPGA = 0; - // RV32 or RV64: XLEN = 32 or 64 localparam XLEN = 32'd64; @@ -104,9 +102,11 @@ localparam logic [63:0] IROM_RANGE = 64'h007FFFFF; localparam BOOTROM_SUPPORTED = 1'b1; localparam logic [63:0] BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; +localparam BOOTROM_PRELOAD = 1'b0; localparam UNCORE_RAM_SUPPORTED = 1'b1; localparam logic [63:0] UNCORE_RAM_BASE = 64'h80000000; localparam logic [63:0] UNCORE_RAM_RANGE = 64'h7FFFFFFF; +localparam UNCORE_RAM_PRELOAD = 1'b0; localparam EXT_MEM_SUPPORTED = 1'b0; localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; localparam logic [63:0] EXT_MEM_RANGE = 64'h07FFFFFF; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index af2402b4f..e00c9153d 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -27,8 +27,6 @@ `include "BranchPredictorType.vh" -localparam FPGA = 0; - // RV32 or RV64: XLEN = 32 or 64 localparam XLEN = 32'd64; @@ -104,9 +102,11 @@ localparam logic [63:0] IROM_RANGE = 64'h007FFFFF; localparam BOOTROM_SUPPORTED = 1'b1; localparam logic [63:0] BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder; localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; +localparam BOOTROM_PRELOAD = 1'b0; localparam UNCORE_RAM_SUPPORTED = 1'b1; localparam logic [63:0] UNCORE_RAM_BASE = 64'h80000000; localparam logic [63:0] UNCORE_RAM_RANGE = 64'h7FFFFFFF; +localparam UNCORE_RAM_PRELOAD = 1'b0; localparam EXT_MEM_SUPPORTED = 1'b0; localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; localparam logic [63:0] EXT_MEM_RANGE = 64'h07FFFFFF; diff --git a/config/rv64i/config.vh b/config/rv64i/config.vh index 028d47c91..cbc3700e9 100644 --- a/config/rv64i/config.vh +++ b/config/rv64i/config.vh @@ -27,8 +27,6 @@ `include "BranchPredictorType.vh" -localparam FPGA = 0; - // RV32 or RV64: XLEN = 32 or 64 localparam XLEN = 32'd64; @@ -104,9 +102,11 @@ localparam logic [63:0] IROM_RANGE = 64'h007FFFFF; localparam BOOTROM_SUPPORTED = 1'b0; localparam logic [63:0] BOOTROM_BASE = 64'h00001000; // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; +localparam BOOTROM_PRELOAD = 1'b0; localparam UNCORE_RAM_SUPPORTED = 1'b0; localparam logic [63:0] UNCORE_RAM_BASE = 64'h80000000; localparam logic [63:0] UNCORE_RAM_RANGE = 64'h7FFFFFFF; +localparam UNCORE_RAM_PRELOAD = 1'b0; localparam EXT_MEM_SUPPORTED = 1'b0; localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; localparam logic [63:0] EXT_MEM_RANGE = 64'h07FFFFFF; diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index dfb41ef9f..7e982fbde 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -4,7 +4,6 @@ `include "BranchPredictorType.vh" localparam cvw_t P = '{ - FPGA : FPGA, XLEN : XLEN, IEEE754 : IEEE754, MISA : MISA, @@ -53,9 +52,11 @@ localparam cvw_t P = '{ BOOTROM_SUPPORTED : BOOTROM_SUPPORTED, BOOTROM_BASE : BOOTROM_BASE, BOOTROM_RANGE : BOOTROM_RANGE, + BOOTROM_PRELOAD : BOOTROM_PRELOAD, UNCORE_RAM_SUPPORTED : UNCORE_RAM_SUPPORTED, UNCORE_RAM_BASE : UNCORE_RAM_BASE, UNCORE_RAM_RANGE : UNCORE_RAM_RANGE, + UNCORE_RAM_PRELOAD : UNCORE_RAM_PRELOAD, EXT_MEM_SUPPORTED : EXT_MEM_SUPPORTED, EXT_MEM_BASE : EXT_MEM_BASE, EXT_MEM_RANGE : EXT_MEM_RANGE, diff --git a/sim/wave.do b/sim/wave.do index 20d383bd9..3f2bcd72a 100644 --- a/sim/wave.do +++ b/sim/wave.do @@ -34,6 +34,7 @@ add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/Lo add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/StoreAmoPageFaultM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/InterruptM add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/trap/HPTWInstrAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/core/priv/priv/pmd/WFITimeoutM add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushD add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushE add wave -noupdate -group HDU -group Flush -color Yellow /testbench/dut/core/FlushM @@ -43,6 +44,10 @@ add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/Sta add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallE add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallM add wave -noupdate -group HDU -group Stall -color Orange /testbench/dut/core/StallW +add wave -noupdate /testbench/dut/core/hzu/WFIInterruptedM +add wave -noupdate /testbench/dut/core/priv/priv/trap/PendingIntsM +add wave -noupdate /testbench/dut/core/priv/priv/trap/InstrValidM +add wave -noupdate /testbench/dut/core/priv/priv/trap/ValidIntsM add wave -noupdate -group {instruction pipeline} /testbench/InstrFName add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/PostSpillInstrRawF add wave -noupdate -group {instruction pipeline} /testbench/dut/core/ifu/InstrD @@ -75,257 +80,295 @@ add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName add wave -noupdate -expand -group {Memory Stage} /testbench/dut/core/lsu/IEUAdrM -add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/WriteDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW -add wave -noupdate -group lsu /testbench/dut/core/lsu/LSUStallM -add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM -add wave -noupdate -group lsu /testbench/dut/core/lsu/ReadDataM -add wave -noupdate -group lsu -radix hexadecimal /testbench/dut/core/lsu/WriteDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/FWriteDataM -add wave -noupdate -group lsu /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall -add wave -noupdate -group lsu /testbench/dut/core/lsu/IgnoreRequestTLB -add wave -noupdate -group lsu /testbench/dut/core/lsu/SelHPTW -add wave -noupdate -group lsu -group bus /testbench/dut/core/ebu/ebu/HCLK -add wave -noupdate -group lsu -group bus -color Gold /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/AHBBuscachefsm/CurrState -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/AHBBuscachefsm/HREADY -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/BusStall -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/HTRANS -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/FetchBuffer -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/HRDATA -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/LSUHWDATA -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/BusStall -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/CacheBusRW -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/CacheBusAck -add wave -noupdate -group lsu -group bus /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheRW -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CMOp -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CMOZeroHit -add wave -noupdate -group lsu -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/IEUAdrE -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet -add wave -noupdate -group lsu -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} -add wave -noupdate -group lsu -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelFlush -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelWriteback -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/TagWay -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/Tag -add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/HitWay -add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn -add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CacheSet -add wave -noupdate -group lsu -group dcache -group {replacement policy} -color {Orange Red} {/testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUMemory[0]} -add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU -add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU -add wave -noupdate -group lsu -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/VictimWay -add wave -noupdate -group lsu -group dcache -group {replacement policy} -expand -group DETAILS -expand /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/Intermediate -add wave -noupdate -group lsu -group dcache -group {replacement policy} -expand -group DETAILS /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUUpdate -add wave -noupdate -group lsu -group dcache -group {replacement policy} -expand -group DETAILS /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/WayExpanded -add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/LineDirty -add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/NextFlushAdr -add wave -noupdate -group lsu -group dcache -group flush -radix hexadecimal /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr -add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/FlushWayFlag -add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWayCntEn -add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/FlushAdrCntEn -add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdrFlag -add wave -noupdate -group lsu -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/SelFlush -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/PAdr -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataLine -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/WordOffsetAddr -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/HitWay -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/ValidWay -add wave -noupdate -group lsu -group dcache -group Victim {/testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUMemory[0]} -add wave -noupdate -group lsu -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUMemory -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/SetValid -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelData} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearValidWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM[62]} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -group Way0Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way0 -group Way0Word3 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelNotHit2} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelNonHit} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelData} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ClearValidWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 -group Way1Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ClearValidWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ClearValidWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/we} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/RAM} -add wave -noupdate -group lsu -group dcache -expand -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/TagWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/TagWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/TagWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} -expand -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/TagWay} -add wave -noupdate -group lsu -group dcache -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay -add wave -noupdate -group lsu -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextSet -add wave -noupdate -group lsu -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusRW -add wave -noupdate -group lsu -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr -add wave -noupdate -group lsu -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck -add wave -noupdate -group lsu -group dcache -expand -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord -add wave -noupdate -group lsu -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress -add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault -add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM -add wave -noupdate -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM -add wave -noupdate -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr -add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE -add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal -add wave -noupdate -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM -add wave -noupdate -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM -add wave -noupdate -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/SelHPTW -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWStall -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/DTLBWalk -add wave -noupdate -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/hptw/hptw/WalkerState -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWAdr -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PTE -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/NextPageType -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PageType -add wave -noupdate -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/ValidNonLeafPTE -add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/ITLBMissF -add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/DTLBMissM -add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/ITLBWriteF -add wave -noupdate -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/DTLBWriteM -add wave -noupdate -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUAccessFaultM -add wave -noupdate -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/DCacheStallM -add wave -noupdate -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFaultF -add wave -noupdate -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSULoadAccessFaultM -add wave -noupdate -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUStoreAmoAccessFaultM -add wave -noupdate -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LoadAccessFaultM -add wave -noupdate -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/StoreAmoAccessFaultM -add wave -noupdate -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFault +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/WriteDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/LSUStallM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataWordMuxM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/ReadDataM +add wave -noupdate -expand -group lsu -radix hexadecimal /testbench/dut/core/lsu/WriteDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/FWriteDataM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/dcache/dcache/CacheStall +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/IgnoreRequestTLB +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/SelHPTW +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/ebu/ebu/HCLK +add wave -noupdate -expand -group lsu -expand -group bus -color Gold /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/AHBBuscachefsm/CurrState +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/AHBBuscachefsm/HREADY +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/BusStall +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/HTRANS +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/FetchBuffer +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/HRDATA +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/LSUHWDATA +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/BusStall +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/CacheBusRW +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/bus/dcache/ahbcacheinterface/CacheBusAck +add wave -noupdate -expand -group lsu -expand -group bus /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr +add wave -noupdate -expand -group lsu -group alignment -color Gold /testbench/dut/core/lsu/ziccslm_align/align/CurrState +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/MemRWM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/DTLBMissM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/CacheableM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/HalfSpillM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/WordSpillM +add wave -noupdate -expand -group lsu -group alignment -color Orange /testbench/dut/core/lsu/ziccslm_align/align/SpillM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/IEUAdrSpillM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/SaveByteMask +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/ByteMaskSaveM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ByteMaskM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ByteMaskExtendedM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/SelSpillE +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/SelSpillM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/ByteMaskMuxM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ByteMaskSpillM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/LSUWriteDataM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/LSUWriteDataShiftedM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/LSUWriteDataSpillM +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/bus/dcache/dcache/CacheWriteData +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/bus/dcache/dcache/ByteMask +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/bus/dcache/dcache/WriteSelLogic/BlankByteMask +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/bus/dcache/dcache/WriteSelLogic/DemuxedByteMask +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/bus/dcache/dcache/WriteSelLogic/FetchBufferByteSel +add wave -noupdate -expand -group lsu -group alignment {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/LineWriteData} +add wave -noupdate -expand -group lsu -group alignment /testbench/dut/core/lsu/ziccslm_align/align/IncrementAmount +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/IEUAdrExtE +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/IEUAdrExtM +add wave -noupdate -expand -group lsu /testbench/dut/core/lsu/bus/dcache/dcache/NextSet +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheHit +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CacheRW +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CMOp +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/CMOZeroHit +add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/CurrState +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetValid +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/IEUAdrE +add wave -noupdate -expand -group lsu -expand -group dcache -radix unsigned /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/PAdr +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelFlush +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/SelWriteback +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/TagWay +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/Tag +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataLineCache +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CacheSet +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} -color {Orange Red} {/testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUMemory[0]} +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/VictimWay +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} -expand -group DETAILS -expand /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/Intermediate +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} -expand -group DETAILS /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUUpdate +add wave -noupdate -expand -group lsu -expand -group dcache -group {replacement policy} -expand -group DETAILS /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/WayExpanded +add wave -noupdate -expand -group lsu -expand -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/LineDirty +add wave -noupdate -expand -group lsu -expand -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -expand -group lsu -expand -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/NextFlushAdr +add wave -noupdate -expand -group lsu -expand -group dcache -group flush -radix hexadecimal /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdr +add wave -noupdate -expand -group lsu -expand -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/FlushWayFlag +add wave -noupdate -expand -group lsu -expand -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushWayCntEn +add wave -noupdate -expand -group lsu -expand -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/FlushAdrCntEn +add wave -noupdate -expand -group lsu -expand -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/FlushAdrFlag +add wave -noupdate -expand -group lsu -expand -group dcache -group flush /testbench/dut/core/lsu/bus/dcache/dcache/cachefsm/SelFlush +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/VictimWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/SelAdr +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/PAdr +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/NextLRU +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/CurrLRU +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUWriteEn +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataLine +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/WordOffsetAddr +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/ValidWay +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim {/testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUMemory[0]} +add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/core/lsu/bus/dcache/dcache/vict/cacheLRU/LRUMemory +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/SetValid +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearValid +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/SetDirty +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} /testbench/dut/core/lsu/bus/dcache/dcache/LineByteMask +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ClearValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/CacheTagMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/bwe} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[0]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/bwe} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[1]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/bwe} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[2]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/ce} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/bwe} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group way0 -expand -group Way0Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/word[3]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelNotHit2} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelNonHit} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelData} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ClearValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/CacheTagMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[0]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[1]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[2]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/word[3]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ClearValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/CacheTagMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[0]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[1]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[2]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/word[3]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SelectedWriteWordEn} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ClearValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/SetDirtyWay} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -label TAG {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/CacheTagMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/DirtyBits} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[0]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[1]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word2 -expand {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[2]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/we} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group way3 -group Way3Word3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/word[3]/wordram/CacheDataMem/RAM} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -group valid/dirty /testbench/dut/core/lsu/bus/dcache/dcache/ClearDirty +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/CacheSet +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way0 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[0]/TagWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way1 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[1]/TagWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way2 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[2]/TagWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/HitWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ValidWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} -group way3 {/testbench/dut/core/lsu/bus/dcache/dcache/CacheWays[3]/TagWay} +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {Cache SRAM read} /testbench/dut/core/lsu/bus/dcache/dcache/HitWay +add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/core/lsu/bus/dcache/dcache/NextSet +add wave -noupdate -expand -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusRW +add wave -noupdate -expand -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAdr +add wave -noupdate -expand -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/CacheBusAck +add wave -noupdate -expand -group lsu -expand -group dcache -group {Memory Side} /testbench/dut/core/lsu/bus/dcache/dcache/ReadDataWord +add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/core/lsu/bus/dcache/dcache/FlushWay +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/VAdr +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/EffectivePrivilegeMode +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/HitPageType +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/Translate +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/tlbcontrol/DisableTranslation +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBMiss +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/TLBHit +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/PhysicalAddress +add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/TLBPageFault +add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/LoadAccessFaultM +add wave -noupdate -expand -group lsu -group dtlb -expand -group faults /testbench/dut/core/lsu/dmmu/dmmu/StoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group dtlb /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBPAdr +add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PTE +add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/PageTypeWriteVal +add wave -noupdate -expand -group lsu -group dtlb -expand -group write /testbench/dut/core/lsu/dmmu/dmmu/tlb/tlb/TLBWrite +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PhysicalAddress +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/SelRegions +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Cacheable +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/Idempotent +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/pmachecker/PMAAccessFault +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAInstrAccessFaultF +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMALoadAccessFaultM +add wave -noupdate -expand -group lsu -group pma /testbench/dut/core/lsu/dmmu/dmmu/PMAStoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPInstrAccessFaultF +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPLoadAccessFaultM +add wave -noupdate -expand -group lsu -group pmp /testbench/dut/core/lsu/dmmu/dmmu/PMPStoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/SelHPTW +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWStall +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/DTLBWalk +add wave -noupdate -expand -group lsu -group ptwalker -color Gold /testbench/dut/core/lsu/hptw/hptw/WalkerState +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/HPTWAdr +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PTE +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/NextPageType +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/PageType +add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/core/lsu/hptw/hptw/ValidNonLeafPTE +add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/ITLBMissF +add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/DTLBMissM +add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/ITLBWriteF +add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/core/lsu/hptw/hptw/DTLBWriteM +add wave -noupdate -expand -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUAccessFaultM +add wave -noupdate -expand -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/DCacheStallM +add wave -noupdate -expand -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFaultF +add wave -noupdate -expand -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSULoadAccessFaultM +add wave -noupdate -expand -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LSUStoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/LoadAccessFaultM +add wave -noupdate -expand -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/StoreAmoAccessFaultM +add wave -noupdate -expand -group lsu -group ptwalker -expand -group faults /testbench/dut/core/lsu/hptw/hptw/HPTWInstrAccessFault add wave -noupdate -group {WriteBack stage} /testbench/InstrW add wave -noupdate -group {WriteBack stage} /testbench/InstrWName -add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRM -add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} -label PHT /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/mem -add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[5]} -add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[4]} -add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[3]} -add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[2]} -add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[1]} -add wave -noupdate -expand -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[0]} -add wave -noupdate -expand -group Bpred -expand -group RAS -expand /testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory -add wave -noupdate -expand -group Bpred -expand -group RAS /testbench/dut/core/ifu/bpred/bpred/RASPredictor/Ptr -add wave -noupdate -expand -group Bpred -divider {class check} -add wave -noupdate -expand -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF -add wave -noupdate -expand -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE +add wave -noupdate -group {WriteBack stage} /testbench/dut/core/priv/priv/pmd/wfiW +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/GHRM +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} -label PHT /testbench/dut/core/ifu/bpred/bpred/Predictor/DirPredictor/PHT/mem +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[5]} +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[4]} +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[3]} +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[2]} +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[1]} +add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory[0]} +add wave -noupdate -group Bpred -expand -group RAS -expand /testbench/dut/core/ifu/bpred/bpred/RASPredictor/memory +add wave -noupdate -group Bpred -expand -group RAS /testbench/dut/core/ifu/bpred/bpred/RASPredictor/Ptr +add wave -noupdate -group Bpred -divider {class check} +add wave -noupdate -group Bpred -expand -group prediction /testbench/dut/core/ifu/bpred/bpred/RASPCF +add wave -noupdate -group Bpred -expand -group prediction -expand -group ex /testbench/dut/core/ifu/bpred/bpred/PCSrcE add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCNextF add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/bpred/bpred/NextValidPCE add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCF add wave -noupdate -group {PCNext Generation} /testbench/dut/core/ifu/PCPlus2or4F -add wave -noupdate -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf -add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a1 -add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a2 -add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/a3 -add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/rd1 -add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/rd2 -add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/we3 -add wave -noupdate -group RegFile /testbench/dut/core/ieu/dp/regf/wd3 -add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ReadDataW -add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/CSRReadValW -add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultSrcW -add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultW +add wave -noupdate -expand -group RegFile -expand /testbench/dut/core/ieu/dp/regf/rf +add wave -noupdate -expand -group RegFile /testbench/dut/core/ieu/dp/regf/a1 +add wave -noupdate -expand -group RegFile /testbench/dut/core/ieu/dp/regf/a2 +add wave -noupdate -expand -group RegFile /testbench/dut/core/ieu/dp/regf/a3 +add wave -noupdate -expand -group RegFile /testbench/dut/core/ieu/dp/regf/rd1 +add wave -noupdate -expand -group RegFile /testbench/dut/core/ieu/dp/regf/rd2 +add wave -noupdate -expand -group RegFile /testbench/dut/core/ieu/dp/regf/we3 +add wave -noupdate -expand -group RegFile /testbench/dut/core/ieu/dp/regf/wd3 +add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ReadDataW +add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/CSRReadValW +add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultSrcW +add wave -noupdate -expand -group RegFile -group {write regfile mux} /testbench/dut/core/ieu/dp/ResultW add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/csrm/MISA_REGW add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/csrm/MCAUSE_REGW add wave -noupdate -group CSRs /testbench/dut/core/priv/priv/csr/MCOUNTEREN_REGW @@ -489,14 +532,14 @@ add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/d add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/IFUCacheBusStallF add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/ITLBMissF add wave -noupdate -group ifu -group Spill -expand -group takespill /testbench/dut/core/ifu/Spill/spill/TakeSpillF -add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HSIZE -add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HBURST -add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HTRANS -add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HWRITE -add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HADDR -add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/AHBBuscachefsm/Flush -add wave -noupdate -group ifu -group bus -color Gold /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/AHBBuscachefsm/CurrState -add wave -noupdate -group ifu -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HRDATA +add wave -noupdate -group ifu -expand -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HSIZE +add wave -noupdate -group ifu -expand -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HBURST +add wave -noupdate -group ifu -expand -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HTRANS +add wave -noupdate -group ifu -expand -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HWRITE +add wave -noupdate -group ifu -expand -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HADDR +add wave -noupdate -group ifu -expand -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/AHBBuscachefsm/Flush +add wave -noupdate -group ifu -expand -group bus -color Gold /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/AHBBuscachefsm/CurrState +add wave -noupdate -group ifu -expand -group bus /testbench/dut/core/ifu/bus/icache/ahbcacheinterface/HRDATA add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/Stall add wave -noupdate -group ifu -expand -group icache /testbench/dut/core/ifu/bus/icache/icache/FlushStage add wave -noupdate -group ifu -expand -group icache -color Gold /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CurrState @@ -666,27 +709,9 @@ add wave -noupdate -group wfi /testbench/dut/core/priv/priv/pmd/STATUS_TW add wave -noupdate -group wfi /testbench/dut/core/priv/priv/pmd/PrivilegeModeW add wave -noupdate -group wfi /testbench/dut/core/priv/priv/pmd/wfi/WFICount add wave -noupdate -group wfi /testbench/dut/core/priv/priv/pmd/WFITimeoutM -add wave -noupdate /testbench/loggers/clk -add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/LRUWriteEn -add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/FlushStage -add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/Stall -add wave -noupdate /testbench/loggers/ICacheLogger/Enable -add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/cachefsm/CurrState -add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/vict/cacheLRU/CacheEn -add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/LRUWriteEn -add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/FlushStage -add wave -noupdate /testbench/dut/core/ifu/bus/icache/icache/CacheEn -add wave -noupdate /testbench/dut/core/ifu/CacheableF -add wave -noupdate /testbench/loggers/BeginSample -add wave -noupdate /testbench/loggers/StartSample -add wave -noupdate /testbench/loggers/reset -add wave -noupdate -radix ascii /testbench/loggers/TEST -add wave -noupdate /testbench/dut/core/fpu/fpu/fctrl/IllegalFPUInstrD -add wave -noupdate /testbench/dut/core/fpu/fpu/fctrl/STATUS_FS -add wave -noupdate /testbench/dut/core/priv/priv/csr/csrsr/STATUS_FS_INT TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 4} {172636 ns} 1} {{Cursor 4} {111958 ns} 0} {{Cursor 3} {152766 ns} 1} -quietly wave cursor active 2 +WaveRestoreCursors {{Cursor 4} {39144 ns} 1} {{Cursor 4} {33684 ns} 1} {{Cursor 3} {39145 ns} 0} +quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 194 configure wave -justifyvalue left @@ -701,4 +726,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {37879604 ns} {38203328 ns} +WaveRestoreZoom {39053 ns} {39217 ns} diff --git a/src/cvw.sv b/src/cvw.sv index 198042913..8b3c87a59 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -37,7 +37,6 @@ package cvw; `include "BranchPredictorType.vh" typedef struct packed { - logic FPGA; // Modifications to tare int XLEN; // Machine width (32 or 64) logic IEEE754; // IEEE754 NaN handling (0 = use RISC-V NaN propagation instead) int MISA; // Machine Instruction Set Architecture @@ -108,9 +107,11 @@ typedef struct packed { logic BOOTROM_SUPPORTED; logic [63:0] BOOTROM_BASE; logic [63:0] BOOTROM_RANGE; + logic BOOTROM_PRELOAD; logic UNCORE_RAM_SUPPORTED; logic [63:0] UNCORE_RAM_BASE; logic [63:0] UNCORE_RAM_RANGE; + logic UNCORE_RAM_PRELOAD; logic EXT_MEM_SUPPORTED; logic [63:0] EXT_MEM_BASE; logic [63:0] EXT_MEM_RANGE; diff --git a/src/uncore/ram_ahb.sv b/src/uncore/ram_ahb.sv index 7b6c504bd..896c2a4cb 100644 --- a/src/uncore/ram_ahb.sv +++ b/src/uncore/ram_ahb.sv @@ -29,7 +29,7 @@ `define RAM_LATENCY 0 module ram_ahb import cvw::*; #(parameter cvw_t P, - parameter BASE=0, RANGE = 65535) ( + parameter BASE=0, RANGE = 65535, PRELOAD = 0) ( input logic HCLK, HRESETn, input logic HSELRam, input logic [P.PA_BITS-1:0] HADDR, @@ -71,7 +71,7 @@ module ram_ahb import cvw::*; #(parameter cvw_t P, mux2 #(P.PA_BITS) adrmux(HADDR, HADDRD, memwriteD | ~HREADY, RamAddr); // single-ported RAM - ram1p1rwbe #(.USE_SRAM(P.USE_SRAM), .DEPTH(RANGE/8), .WIDTH(P.XLEN), .PRELOAD_ENABLED(P.FPGA)) memory(.clk(HCLK), .ce(1'b1), + ram1p1rwbe #(P.USE_SRAM, RANGE/8, P.XLEN, PRELOAD) memory(.clk(HCLK), .ce(1'b1), .addr(RamAddr[ADDR_WIDTH+OFFSET-1:OFFSET]), .we(memwriteD), .din(HWDATA), .bwe(HWSTRB), .dout(HREADRam)); // use this to add arbitrary latency to ram. Helps test AHB controller correctness diff --git a/src/uncore/rom_ahb.sv b/src/uncore/rom_ahb.sv index 0c09191be..d94cd6e07 100644 --- a/src/uncore/rom_ahb.sv +++ b/src/uncore/rom_ahb.sv @@ -27,7 +27,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module rom_ahb import cvw::*; #(parameter cvw_t P, - parameter BASE=0, RANGE = 65535) ( + parameter BASE=0, RANGE = 65535, PRELOAD = 0) ( input logic HCLK, HRESETn, input logic HSELRom, input logic [P.PA_BITS-1:0] HADDR, @@ -45,6 +45,6 @@ module rom_ahb import cvw::*; #(parameter cvw_t P, assign HRESPRom = 0; // OK // single-ported ROM - rom1p1r #(ADDR_WIDTH, P.XLEN, P.FPGA) + rom1p1r #(ADDR_WIDTH, P.XLEN, PRELOAD) memory(.clk(HCLK), .ce(1'b1), .addr(HADDR[ADDR_WIDTH+OFFSET-1:OFFSET]), .dout(HREADRom)); endmodule diff --git a/src/uncore/uncore.sv b/src/uncore/uncore.sv index d55e2b899..60d197f78 100644 --- a/src/uncore/uncore.sv +++ b/src/uncore/uncore.sv @@ -102,13 +102,13 @@ module uncore import cvw::*; #(parameter cvw_t P)( // on-chip RAM if (P.UNCORE_RAM_SUPPORTED) begin : ram - ram_ahb #(.P(P), .BASE(P.UNCORE_RAM_BASE), .RANGE(P.UNCORE_RAM_RANGE)) ram ( + ram_ahb #(.P(P), .BASE(P.UNCORE_RAM_BASE), .RANGE(P.UNCORE_RAM_RANGE), .PRELOAD(P.UNCORE_RAM_PRELOAD)) ram ( .HCLK, .HRESETn, .HSELRam, .HADDR, .HWRITE, .HREADY, .HTRANS, .HWDATA, .HWSTRB, .HREADRam, .HRESPRam, .HREADYRam); end if (P.BOOTROM_SUPPORTED) begin : bootrom - rom_ahb #(.P(P), .BASE(P.BOOTROM_BASE), .RANGE(P.BOOTROM_RANGE)) + rom_ahb #(.P(P), .BASE(P.BOOTROM_BASE), .RANGE(P.BOOTROM_RANGE), .PRELOAD(P.BOOTROM_PRELOAD)) bootrom(.HCLK, .HRESETn, .HSELRom(HSELBootRom), .HADDR, .HREADY, .HTRANS, .HREADRom(HREADBootRom), .HRESPRom(HRESPBootRom), .HREADYRom(HREADYBootRom)); end diff --git a/testbench/sdc/sd_top_tb.sv b/testbench/sdc/sd_top_tb.sv deleted file mode 100644 index d969af194..000000000 --- a/testbench/sdc/sd_top_tb.sv +++ /dev/null @@ -1,132 +0,0 @@ -/////////////////////////////////////////// -// sd_top_tb.sv -// -// Written: Ross Thompson September 20, 2021 -// Modified: -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wconfig.vh" - - -module sd_top_tb(); - - - localparam g_COUNT_WIDTH = 8; - - logic a_RST; - logic i_SD_CMD; - logic o_SD_CMD; - logic o_SD_CMD_OE; - wire [3:0] i_SD_DAT; - logic o_SD_CLK; - logic [32:9] i_BLOCK_ADDR; - logic [g_COUNT_WIDTH-1:0] i_COUNT_IN_MAX; - - logic o_READY_FOR_READ; - logic i_READ_REQUEST; - logic [3:0] o_DATA_TO_CORE; - logic o_DATA_VALID; - logic o_LAST_NIBBLE; - logic [4095:0] ReadData; - logic o_SD_RESTARTING; - logic [2:0] o_ERROR_CODE_Q; - logic o_FATAL_ERROR; - - - - // Driver - wire PAD; - - logic r_CLK; - - - // clock - - sd_top #(g_COUNT_WIDTH) DUT - (.CLK(r_CLK), - .a_RST(a_RST), - .i_SD_CMD(i_SD_CMD), - .o_SD_CMD(o_SD_CMD), - .o_SD_CMD_OE(o_SD_CMD_OE), - .i_SD_DAT(i_SD_DAT), - .o_SD_CLK(o_SD_CLK), - .i_BLOCK_ADDR(i_BLOCK_ADDR), - .o_READY_FOR_READ(o_READY_FOR_READ), - .o_SD_RESTARTING(o_SD_RESTARTING), - .o_ERROR_CODE_Q(o_ERROR_CODE_Q), - .o_FATAL_ERROR(o_FATAL_ERROR), - .i_READ_REQUEST(i_READ_REQUEST), - .o_DATA_TO_CORE(o_DATA_TO_CORE), - .ReadData(ReadData), - .o_DATA_VALID(o_DATA_VALID), - .o_LAST_NIBBLE(o_LAST_NIBBLE), - .i_COUNT_IN_MAX(i_COUNT_IN_MAX), - .LIMIT_SD_TIMERS(1'b1)); - - sdModel sdcard - (.sdClk(o_SD_CLK), - .cmd(PAD), - .dat(i_SD_DAT)); - - // tri state pad - // replace with I/O standard cell or FPGA gate. - assign PAD = o_SD_CMD_OE ? o_SD_CMD : 1'bz; - assign i_SD_CMD = PAD; - - - always - begin - r_CLK = 1; # 5; r_CLK = 0; # 5; - end - - - initial $readmemh("ramdisk2.hex", sdcard.FLASHmem); - - initial begin - - a_RST = 1'b0; - i_BLOCK_ADDR = 24'h100000; - i_COUNT_IN_MAX = '0; - i_READ_REQUEST = 1'b0; - - # 5; - i_COUNT_IN_MAX = -62; - - # 10; - a_RST = 1'b1; - - # 4800; - - a_RST = 1'b0; - - # 2000000; - i_READ_REQUEST = 1'b0; - # 10000; - i_READ_REQUEST = 1'b1; - # 10000; - i_READ_REQUEST = 1'b0; - - end - -endmodule From d5f0c15b906d02dfc9ea89d15a342db17e055f60 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 17:48:28 -0600 Subject: [PATCH 41/48] Modified the fpga build script to generate it's own config file rather than use the one in config/fpga. --- fpga/generator/Makefile | 19 +++++++++++++++++++ fpga/generator/wally.tcl | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/fpga/generator/Makefile b/fpga/generator/Makefile index 2a1d4a60e..e67b68cb8 100644 --- a/fpga/generator/Makefile +++ b/fpga/generator/Makefile @@ -50,7 +50,26 @@ IP_Arty: $(dst)/xlnx_proc_sys_reset.log \ PreProcessFiles: rm -rf ../src/CopiedFiles_do_not_add_to_repo/ cp -r ../../src/ ../src/CopiedFiles_do_not_add_to_repo/ + mkdir ../src/CopiedFiles_do_not_add_to_repo/config/ + cp ../../config/fpga/config.vh ../src/CopiedFiles_do_not_add_to_repo/config/ ./insert_debug_comment.sh + # modify config *** RT: eventually setup for variably defined sized memory + sed -i "s/ZICCLSM_SUPPORTED.*/ZICCLSM_SUPPORTED = 0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/RESET_VECTOR.*/RESET_VECTOR = 64'h0000000000001000;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/BOOTROM_PRELOAD.*/BOOTROM_PRELOAD = 1'b1;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/UNCORE_RAM_BASE.*/UNCORE_RAM_BASE = 64'h00002000;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/UNCORE_RAM_RANGE.*/UNCORE_RAM_RANGE = 64'h00000FFF;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/UNCORE_RAM_PRELOAD.*/UNCORE_RAM_PRELOAD = 1'b1;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/EXT_MEM_SUPPORTED.*/EXT_MEM_SUPPORTED = 1'b1;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/EXT_MEM_RANGE.*/EXT_MEM_RANGE = 64'h0FFFFFFF;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/SDC_SUPPORTED.*/SDC_SUPPORTED = 1'b1;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/SPI_SUPPORTED.*/SDC_SUPPORTED = 1'b0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh # *** RT: Add SPI when ready + sed -i "s/GPIO_LOOPBACK_TEST.*/GPIO_LOOPBACK_TEST = 0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/SPI_LOOPBACK_TEST.*/SPI_LOOPBACK_TEST = 0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/UART_PRESCALE.*/UART_PRESCALE = 32'd0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/PLIC_NUM_SRC.*/PLIC_NUM_SRC = 32'd53;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/PLIC_SDC_ID.*/PLIC_SDC_ID = 32'd20;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/BPRED_SIZE.*/BPRED_SIZE = 32'd12;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh $(dst)/%.log: %.tcl mkdir -p IP diff --git a/fpga/generator/wally.tcl b/fpga/generator/wally.tcl index f28825fae..f121ab704 100644 --- a/fpga/generator/wally.tcl +++ b/fpga/generator/wally.tcl @@ -48,7 +48,7 @@ read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_serial_host.v] read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_master.v] read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_serial_host.v] -set_property include_dirs {../../config/fpga ../../config/shared ../../addins/vivado-risc-v/sdc} [current_fileset] +set_property include_dirs {../src/CopiedFiles_do_no_add_to_repo/config/ ../../config/shared ../../addins/vivado-risc-v/sdc} [current_fileset] if {$board=="ArtyA7"} { add_files -fileset constrs_1 -norecurse ../constraints/constraints-$board.xdc From 05eb5460b4b8f84b45ad14f61f2b45ac479d3c4c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 17:50:29 -0600 Subject: [PATCH 42/48] Removed fpga config. No longer needed. --- config/fpga/config.vh | 190 ------------------------------------------ 1 file changed, 190 deletions(-) delete mode 100644 config/fpga/config.vh diff --git a/config/fpga/config.vh b/config/fpga/config.vh deleted file mode 100644 index 8f52f597c..000000000 --- a/config/fpga/config.vh +++ /dev/null @@ -1,190 +0,0 @@ -////////////////////////////////////////// -// config.vh -// -// Written: David_Harris@hmc.edu 4 January 2021 -// Modified: -// -// Purpose: Specify which features are configured -// Macros to determine which modes are supported based on MISA -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -// include shared configuration -`include "BranchPredictorType.vh" - -// RV32 or RV64: XLEN = 32 or 64 -localparam XLEN = 32'd64; - -// IEEE 754 compliance -localparam IEEE754 = 0; - -// MISA RISC-V configuration per specification -localparam MISA = (32'h00000104 | 1 << 5 | 1 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0); -localparam ZICSR_SUPPORTED = 1; -localparam ZIFENCEI_SUPPORTED = 1; -localparam COUNTERS = 12'd32; -localparam ZICNTR_SUPPORTED = 1; -localparam ZIHPM_SUPPORTED = 1; -localparam ZFH_SUPPORTED = 0; -localparam SSTC_SUPPORTED = 1; -localparam ZICBOM_SUPPORTED = 1; -localparam ZICBOZ_SUPPORTED = 1; -localparam ZICBOP_SUPPORTED = 1; -localparam ZICCLSM_SUPPORTED = 0; -localparam SVPBMT_SUPPORTED = 1; -localparam SVNAPOT_SUPPORTED = 1; -localparam SVINVAL_SUPPORTED = 1; - -// LSU microarchitectural Features -localparam BUS_SUPPORTED = 1; -localparam DCACHE_SUPPORTED = 1; -localparam ICACHE_SUPPORTED = 1; -localparam VIRTMEM_SUPPORTED = 1; -localparam VECTORED_INTERRUPTS_SUPPORTED = 1; -localparam BIGENDIAN_SUPPORTED = 1; - -// TLB configuration. Entries should be a power of 2 -localparam ITLB_ENTRIES = 32'd32; -localparam DTLB_ENTRIES = 32'd32; - -// Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines -localparam DCACHE_NUMWAYS = 32'd4; -localparam DCACHE_WAYSIZEINBYTES = 32'd4096; -localparam DCACHE_LINELENINBITS = 32'd512; -localparam ICACHE_NUMWAYS = 32'd4; -localparam ICACHE_WAYSIZEINBYTES = 32'd4096; -localparam ICACHE_LINELENINBITS = 32'd512; - -// Integer Divider Configuration -// IDIV_BITSPERCYCLE must be 1, 2, or 4 -localparam IDIV_BITSPERCYCLE = 32'd4; -localparam IDIV_ON_FPU = 1; - -// Legal number of PMP entries are 0, 16, or 64 -localparam PMP_ENTRIES = 32'd16; - -// Address space -localparam logic [63:0] RESET_VECTOR = 64'h0000000000001000; - -// Bus Interface width -localparam AHBW = 32'd64; - -// WFI Timeout Wait -localparam WFI_TIMEOUT_BIT = 32'd16; - -// Peripheral Physical Addresses -// Peripheral memory space extends from BASE to BASE+RANGE -// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits - -// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? -localparam DTIM_SUPPORTED = 1'b0; -localparam logic [63:0] DTIM_BASE = 64'h80000000; -localparam logic [63:0] DTIM_RANGE = 64'h00001FFF; - -localparam IROM_SUPPORTED = 1'b0; -localparam logic [63:0] IROM_BASE = 64'h80000000; -localparam logic [63:0] IROM_RANGE = 64'h00001FFF; - -localparam BOOTROM_SUPPORTED = 1'b1; -localparam logic [63:0] BOOTROM_BASE = 64'h00001000; -localparam logic [63:0] BOOTROM_RANGE = 64'h00000FFF; -localparam BOOTROM_PRELOAD = 1'b1; - -localparam UNCORE_RAM_SUPPORTED = 1'b1; -localparam logic [63:0] UNCORE_RAM_BASE = 64'h00002000; -localparam logic [63:0] UNCORE_RAM_RANGE = 64'h00000FFF; -localparam UNCORE_RAM_PRELOAD = 1'b1; - -localparam EXT_MEM_SUPPORTED = 1'b1; -localparam logic [63:0] EXT_MEM_BASE = 64'h80000000; -localparam logic [63:0] EXT_MEM_RANGE = 64'h0FFFFFFF; - -localparam CLINT_SUPPORTED = 1'b1; -localparam logic [63:0] CLINT_BASE = 64'h02000000; -localparam logic [63:0] CLINT_RANGE = 64'h0000FFFF; - -localparam GPIO_SUPPORTED = 1'b1; -localparam logic [63:0] GPIO_BASE = 64'h10060000; -localparam logic [63:0] GPIO_RANGE = 64'h000000FF; - -localparam UART_SUPPORTED = 1'b1; -localparam logic [63:0] UART_BASE = 64'h10000000; -localparam logic [63:0] UART_RANGE = 64'h00000007; - -localparam PLIC_SUPPORTED = 1'b1; -localparam logic [63:0] PLIC_BASE = 64'h0C000000; -localparam logic [63:0] PLIC_RANGE = 64'h03FFFFFF; - -localparam SDC_SUPPORTED = 1'b1; -localparam logic [63:0] SDC_BASE = 64'h00013000; -localparam logic [63:0] SDC_RANGE = 64'h0000007F; - -localparam SPI_SUPPORTED = 1'b1; -localparam logic [63:0] SPI_BASE = 64'h10040000; -localparam logic [63:0] SPI_RANGE = 64'h00000FFF; - -// Test modes - -// Tie GPIO outputs back to inputs -localparam GPIO_LOOPBACK_TEST = 0; -localparam SPI_LOOPBACK_TEST = 0; - -// Hardware configuration -localparam UART_PRESCALE = 32'd0; - -// Interrupt configuration -localparam PLIC_NUM_SRC = 32'd53; -// comment out the following if >=32 sources -localparam PLIC_NUM_SRC_LT_32 = (PLIC_NUM_SRC < 32); -localparam PLIC_GPIO_ID = 32'd3; -localparam PLIC_UART_ID = 32'd10; -localparam PLIC_SPI_ID = 32'd6; -localparam PLIC_SDC_ID = 32'd20; - -localparam BPRED_SUPPORTED = 1; -localparam BPRED_TYPE = `BP_GSHARE; // BP_GSHARE_BASIC, BP_GLOBAL, BP_GLOBAL_BASIC, BP_TWOBIT -localparam BPRED_NUM_LHR = 32'd6; -localparam BPRED_SIZE = 32'd12; -localparam BTB_SIZE = 32'd10; -localparam RAS_SIZE = 32'd16; - -localparam SVADU_SUPPORTED = 1; -localparam ZMMUL_SUPPORTED = 0; - -// FPU division architecture -localparam RADIX = 32'h4; -localparam DIVCOPIES = 32'h4; - -// bit manipulation -localparam ZBA_SUPPORTED = 1; -localparam ZBB_SUPPORTED = 1; -localparam ZBC_SUPPORTED = 1; -localparam ZBS_SUPPORTED = 1; - -// New compressed instructions -localparam ZCB_SUPPORTED = 1; -localparam ZCA_SUPPORTED = 0; -localparam ZCF_SUPPORTED = 0; -localparam ZCD_SUPPORTED = 0; - -// Memory synthesis configuration -localparam USE_SRAM = 0; - -`include "config-shared.vh" From f8b65f50b05b25acfeabdfff2e346b38fe59ad5a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 18:10:22 -0600 Subject: [PATCH 43/48] Fixed bugs in the updated fpga synthe script. --- fpga/generator/Makefile | 20 ++++++++++---------- fpga/generator/wally.tcl | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fpga/generator/Makefile b/fpga/generator/Makefile index e67b68cb8..42ca85677 100644 --- a/fpga/generator/Makefile +++ b/fpga/generator/Makefile @@ -6,20 +6,20 @@ dst := IP #export board := vcu118 # vcu108 -export XILINX_PART := xcvu095-ffva2104-2-e -export XILINX_BOARD := xilinx.com:vcu108:part0:1.2 -export board := vcu108 +#export XILINX_PART := xcvu095-ffva2104-2-e +#export XILINX_BOARD := xilinx.com:vcu108:part0:1.2 +#export board := vcu108 # Arty A7 -# export XILINX_PART := xc7a100tcsg324-1 -# export XILINX_BOARD := digilentinc.com:arty-a7-100:part0:1.1 -# export board := ArtyA7 +export XILINX_PART := xc7a100tcsg324-1 +export XILINX_BOARD := digilentinc.com:arty-a7-100:part0:1.1 +export board := ArtyA7 # for Arty A7 and S7 boards -# all: FPGA_Arty +all: FPGA_Arty # VCU 108 and VCU 118 boards -all: FPGA_VCU +#all: FPGA_VCU FPGA_Arty: PreProcessFiles IP_Arty vivado -mode tcl -source wally.tcl 2>&1 | tee wally.log @@ -63,11 +63,11 @@ PreProcessFiles: sed -i "s/EXT_MEM_SUPPORTED.*/EXT_MEM_SUPPORTED = 1'b1;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh sed -i "s/EXT_MEM_RANGE.*/EXT_MEM_RANGE = 64'h0FFFFFFF;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh sed -i "s/SDC_SUPPORTED.*/SDC_SUPPORTED = 1'b1;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh - sed -i "s/SPI_SUPPORTED.*/SDC_SUPPORTED = 1'b0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh # *** RT: Add SPI when ready + sed -i "s/SPI_SUPPORTED.*/SPI_SUPPORTED = 1'b0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh # *** RT: Add SPI when ready sed -i "s/GPIO_LOOPBACK_TEST.*/GPIO_LOOPBACK_TEST = 0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh sed -i "s/SPI_LOOPBACK_TEST.*/SPI_LOOPBACK_TEST = 0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh sed -i "s/UART_PRESCALE.*/UART_PRESCALE = 32'd0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh - sed -i "s/PLIC_NUM_SRC.*/PLIC_NUM_SRC = 32'd53;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh + sed -i "s/PLIC_NUM_SRC = .*/PLIC_NUM_SRC = 32'd53;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh sed -i "s/PLIC_SDC_ID.*/PLIC_SDC_ID = 32'd20;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh sed -i "s/BPRED_SIZE.*/BPRED_SIZE = 32'd12;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh diff --git a/fpga/generator/wally.tcl b/fpga/generator/wally.tcl index f121ab704..d699c3d21 100644 --- a/fpga/generator/wally.tcl +++ b/fpga/generator/wally.tcl @@ -48,7 +48,7 @@ read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_cmd_serial_host.v] read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_master.v] read_verilog [glob -type f ../../addins/vivado-risc-v/sdc/sd_data_serial_host.v] -set_property include_dirs {../src/CopiedFiles_do_no_add_to_repo/config/ ../../config/shared ../../addins/vivado-risc-v/sdc} [current_fileset] +set_property include_dirs {../src/CopiedFiles_do_not_add_to_repo/config ../../config/shared ../../addins/vivado-risc-v/sdc} [current_fileset] if {$board=="ArtyA7"} { add_files -fileset constrs_1 -norecurse ../constraints/constraints-$board.xdc From d4bc9da085b067b07b20fa7b5f50b70d23f3a90b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 13 Nov 2023 18:12:02 -0600 Subject: [PATCH 44/48] Fixed another bug in the updated script changes. --- fpga/generator/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fpga/generator/Makefile b/fpga/generator/Makefile index 42ca85677..9c7f557f3 100644 --- a/fpga/generator/Makefile +++ b/fpga/generator/Makefile @@ -51,7 +51,7 @@ PreProcessFiles: rm -rf ../src/CopiedFiles_do_not_add_to_repo/ cp -r ../../src/ ../src/CopiedFiles_do_not_add_to_repo/ mkdir ../src/CopiedFiles_do_not_add_to_repo/config/ - cp ../../config/fpga/config.vh ../src/CopiedFiles_do_not_add_to_repo/config/ + cp ../../config/rv64gc/config.vh ../src/CopiedFiles_do_not_add_to_repo/config/ ./insert_debug_comment.sh # modify config *** RT: eventually setup for variably defined sized memory sed -i "s/ZICCLSM_SUPPORTED.*/ZICCLSM_SUPPORTED = 0;/g" ../src/CopiedFiles_do_not_add_to_repo/config/config.vh From 6374d1a200329fcd4dd758833f75a7a13a155a28 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Tue, 14 Nov 2023 01:04:37 -0600 Subject: [PATCH 45/48] Modify ppaSynth.py to be able to not issue excess number of operations with Pool command. This is due to the original command using the Popen command, whereas, using the subprocess.call command solves this issue. The relieves the python script from issuing a ton of synthesis commands and using up all the licenses --- synthDC/ppa/ppaSynth.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index ceb6edbd2..07a342e26 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -12,11 +12,11 @@ from ppaAnalyze import synthsfromcsv def runCommand(module, width, tech, freq): command = "make synth DESIGN={} WIDTH={} TECH={} DRIVE=INV FREQ={} MAXOPT=1 MAXCORES=1".format(module, width, tech, freq) - subprocess.Popen(command, shell=True) + subprocess.call(command, shell=True) def deleteRedundant(synthsToRun): '''removes any previous runs for the current synthesis specifications''' - synthStr = "rm -rf runs/ppa_{}_{}_rv32e_{}nm_{}_*" + synthStr = "rm -rf runs/{}_{}_rv32e_{}_{}_*" for synth in synthsToRun: bashCommand = synthStr.format(*synth) outputCPL = subprocess.check_output(['bash','-c', bashCommand]) @@ -46,7 +46,7 @@ def freqModuleSweep(widths, modules, tech): return synthsToRun def filterRedundant(synthsToRun): - bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" + bashCommand = "find . -path '*runs/*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) specReg = re.compile('[a-zA-Z0-9]+') allSynths = output.decode("utf-8").split('\n')[:-1] @@ -84,14 +84,15 @@ if __name__ == '__main__': synthsToRun = freqSweep(module, width, tech) ##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses - modules = ['adder', 'comparator'] - widths = [64, 128] + modules = ['adder', "comparator"] + widths = [8, 16, 32, 64, 128] tech = 'sky130' synthsToRun = freqModuleSweep(widths, modules, tech) ##### Only do syntheses for which a run doesn't already exist - synthsToRun = filterRedundant(synthsToRun) - + synthsToRun = filterRedundant(synthsToRun) pool = Pool(processes=25) -pool.starmap(runCommand, synthsToRun) \ No newline at end of file +pool.starmap(runCommand, synthsToRun) +pool.close() +pool.join() \ No newline at end of file From c722e2c59da4e9473194d4abd4eda8b36277416c Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Tue, 14 Nov 2023 01:06:14 -0600 Subject: [PATCH 46/48] fix plotPPA and other excruciatingly painful problems related to using allWidths and causing empty arrays to be used. This generates the normalized/unnormalized plots --- synthDC/ppa/bestSynths.csv | 18 +- synthDC/ppa/ppaAnalyze.py | 798 +++++++++++++++++++++++-------------- 2 files changed, 504 insertions(+), 312 deletions(-) diff --git a/synthDC/ppa/bestSynths.csv b/synthDC/ppa/bestSynths.csv index 885eeb962..655f171a1 100644 --- a/synthDC/ppa/bestSynths.csv +++ b/synthDC/ppa/bestSynths.csv @@ -4,10 +4,10 @@ binencoder,sky130,16,1000,1.0000,136.220003,77.243,0.021773774467348 binencoder,sky130,32,1000,1.0000,372.400007,189.626,0.04371111111111111 binencoder,sky130,64,1000,1.0000,797.720015,382.205,0.07393850658857981 binencoder,sky130,128,900,1.1111,1602.300031,610.009,0.1261366969785861 -adder,sky130,8,1000,1.0000,253.820005,154.438,0.10825587752870422 -adder,sky130,16,1000,1.0000,722.260013,485.109,0.32460910944935417 -adder,sky130,32,1000,1.0000,1440.600027,714.057,0.6580226904376014 -adder,sky130,64,1000,1.0000,2781.240054,1050.0,0.9392239364188874 +adder,sky130,8,1700,0.588235,253.820005,154.438,0.10825587752870422 +adder,sky130,16,1300,0.7692307,722.260013,485.109,0.32460910944935417 +adder,sky130,32,1100,0.90909,1440.600027,714.057,0.6580226904376014 +adder,sky130,64,950,1.0526315,2781.240054,1050.0,0.9392239364188874 adder,sky130,128,900,1.1111,6186.740118,2230.0,2.1480106100795755 csa,sky130,8,1000,1.0000,266.560005,154.202,0.13650573115665163 csa,sky130,16,1000,1.0000,533.12001,308.404,0.27263530601922104 @@ -19,11 +19,11 @@ shifter,sky130,16,1000,1.0000,666.400006,558.433,0.19552906110283155 shifter,sky130,32,1000,1.0000,1475.880027,768.262,0.3807431082700759 shifter,sky130,64,1000,1.0000,3914.120062,2680.0,1.144802541988198 shifter,sky130,128,900,1.1111,9192.400136,6080.0,2.9008914525432616 -comparator,sky130,8,1000,1.0000,200.900004,136.6,0.05001033271337053 -comparator,sky130,16,1000,1.0000,358.680007,189.253,0.06321553011448482 -comparator,sky130,32,1500,0.666666,690.900013,315.709,0.10771793448084398 -comparator,sky130,64,1300,0.7692307,1372.980026,508.393,0.2048577820389901 -comparator,sky130,128,1100,0.909090,2744.980052,796.047,0.34396273737011823 +comparator,sky130,8,1700,0.588235,200.900004,136.6,0.05001033271337053 +comparator,sky130,16,1500,0.6666667,358.680007,189.253,0.06321553011448482 +comparator,sky130,32,1300,0.7692307,690.900013,315.709,0.10771793448084398 +comparator,sky130,64,1200,0.8333333,1372.980026,508.393,0.2048577820389901 +comparator,sky130,128,1150,0.869565,2744.980052,796.047,0.34396273737011823 flop,sky130,8,1000,1.0000,133.279999,64.8145,0.193835 flop,sky130,16,1000,1.0000,266.5599975,129.629,0.38715000000000005 flop,sky130,32,1000,1.0000,533.119995,259.258,0.7723000000000001 diff --git a/synthDC/ppa/ppaAnalyze.py b/synthDC/ppa/ppaAnalyze.py index 73cd353c4..9af15fd80 100755 --- a/synthDC/ppa/ppaAnalyze.py +++ b/synthDC/ppa/ppaAnalyze.py @@ -18,92 +18,117 @@ from collections import namedtuple import sklearn.metrics as skm # depricated, will need to replace with scikit-learn import os + def synthsfromcsv(filename): Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy") - with open(filename, newline='') as csvfile: + with open(filename, newline="") as csvfile: csvreader = csv.reader(csvfile) global allSynths allSynths = list(csvreader)[1:] for i in range(len(allSynths)): for j in range(len(allSynths[0])): - try: allSynths[i][j] = int(allSynths[i][j]) - except: - try: allSynths[i][j] = float(allSynths[i][j]) - except: pass + try: + allSynths[i][j] = int(allSynths[i][j]) + except: + try: + allSynths[i][j] = float(allSynths[i][j]) + except: + pass allSynths[i] = Synth(*allSynths[i]) return allSynths - + + def synthsintocsv(): - ''' writes a CSV with one line for every available synthesis - each line contains the module, tech, width, target freq, and resulting metrics - ''' + """writes a CSV with one line for every available synthesis + each line contains the module, tech, width, target freq, and resulting metrics + """ print("This takes a moment...") bashCommand = "find . -path '*runs/*' -prune" - output = subprocess.check_output(['bash','-c', bashCommand]) - allSynths = output.decode("utf-8").split('\n')[:-1] + output = subprocess.check_output(["bash", "-c", bashCommand]) + allSynths = output.decode("utf-8").split("\n")[:-1] - specReg = re.compile('[a-zA-Z0-9]+') - metricReg = re.compile('-?\d+\.\d+[e]?[-+]?\d*') + specReg = re.compile("[a-zA-Z0-9]+") + metricReg = re.compile("-?\d+\.\d+[e]?[-+]?\d*") file = open("ppaData.csv", "w") writer = csv.writer(file) - writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (nJ)']) + writer.writerow( + [ + "Module", + "Tech", + "Width", + "Target Freq", + "Delay", + "Area", + "L Power (nW)", + "D energy (nJ)", + ] + ) for oneSynth in allSynths: module, width, risc, tech, freq = specReg.findall(oneSynth)[1:6] metrics = [] - for phrase in [['Path Slack', 'qor'], ['Design Area', 'qor'], ['100', 'power']]: - bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*' + for phrase in [["Path Slack", "qor"], ["Design Area", "qor"], ["100", "power"]]: + bashCommand = 'grep "{}" ' + oneSynth[2:] + "/reports/*{}*" bashCommand = bashCommand.format(*phrase) - try: output = subprocess.check_output(['bash','-c', bashCommand]) - except: + try: + output = subprocess.check_output(["bash", "-c", bashCommand]) + except: print(module + width + tech + freq + " doesn't have reports") print("Consider running cleanup() first") nums = metricReg.findall(str(output)) nums = [float(m) for m in nums] metrics += nums - delay = 1000/int(freq) - metrics[0] + delay = 1000 / int(freq) - metrics[0] area = metrics[1] lpower = metrics[4] - denergy = (metrics[2] + metrics[3])/int(freq)*1000 # (switching + internal powers)*delay, more practical units for regression coefs + # switching, internal power in mW and leakage in nW + tpower = metrics[2] + metrics[3] + metrics[4]*0.000001 + # EDP (fJ/GHz) + denergy = ( + (metrics[2] + metrics[3] + metrics[4]*0.000001) / int(freq) + ) # (switching + internal powers)*delay, more practical units for regression coefs - if ('flop' in module): # since two flops in each module - [area, lpower, denergy] = [n/2 for n in [area, lpower, denergy]] + if "flop" in module: # since two flops in each module + [area, lpower, denergy] = [n / 2 for n in [area, lpower, denergy]] writer.writerow([module, tech, width, freq, delay, area, lpower, denergy]) file.close() + def cleanup(): - ''' removes runs that didn't work - ''' + """removes runs that didn't work""" bashCommand = 'grep -r "Error" runs/ppa*/reports/*qor*' - try: - output = subprocess.check_output(['bash','-c', bashCommand]) - allSynths = output.decode("utf-8").split('\n')[:-1] + try: + output = subprocess.check_output(["bash", "-c", bashCommand]) + allSynths = output.decode("utf-8").split("\n")[:-1] for run in allSynths: - run = run.split('MHz')[0] - bc = 'rm -r '+ run + '*' - output = subprocess.check_output(['bash','-c', bc]) - except: pass + run = run.split("MHz")[0] + bc = "rm -r " + run + "*" + output = subprocess.check_output(["bash", "-c", bc]) + except: + pass bashCommand = "find . -path '*runs/*' -prune" - output = subprocess.check_output(['bash','-c', bashCommand]) - allSynths = output.decode("utf-8").split('\n')[:-1] + output = subprocess.check_output(["bash", "-c", bashCommand]) + allSynths = output.decode("utf-8").split("\n")[:-1] for oneSynth in allSynths: - for phrase in [['Path Length', 'qor']]: - bashCommand = 'grep "{}" '+ oneSynth[2:]+'/reports/*{}*' + for phrase in [["Path Length", "qor"]]: + bashCommand = 'grep "{}" ' + oneSynth[2:] + "/reports/*{}*" bashCommand = bashCommand.format(*phrase) - try: output = subprocess.check_output(['bash','-c', bashCommand]) - except: - bc = 'rm -r '+ oneSynth[2:] - output = subprocess.check_output(['bash','-c', bc]) + try: + output = subprocess.check_output(["bash", "-c", bashCommand]) + except: + bc = "rm -r " + oneSynth[2:] + output = subprocess.check_output(["bash", "-c", bc]) print("All cleaned up!") + def getVals(tech, module, var, freq=None, width=None): - ''' for a specified tech, module, and variable/metric - returns a list of values for that metric in ascending width order - works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width - ''' + """for a specified tech, module, and variable/metric + returns a list of values for that metric in ascending width order + works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width + """ if width != None: widthsToGet = width @@ -113,85 +138,132 @@ def getVals(tech, module, var, freq=None, width=None): metric = [] widthL = [] - if (freq != None): + if freq != None: for oneSynth in allSynths: - if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module) & (oneSynth.width != 1): + if ( + (oneSynth.freq == freq) + & (oneSynth.tech == tech) + & (oneSynth.module == module) + & (oneSynth.width != 1) + ): widthL += [oneSynth.width] osdict = oneSynth._asdict() metric += [osdict[var]] - metric = [x for _, x in sorted(zip(widthL, metric))] # ordering + metric = [x for _, x in sorted(zip(widthL, metric))] # ordering else: for w in widthsToGet: for oneSynth in bestSynths: - if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == module): + if ( + (oneSynth.width == w) + & (oneSynth.tech == tech) + & (oneSynth.module == module) + ): osdict = oneSynth._asdict() met = osdict[var] metric += [met] return metric + def csvOfBest(filename): bestSynths = [] for tech in [x.tech for x in techSpecs]: for mod in modules: for w in widths: - m = np.Inf # large number to start + m = np.Inf # large number to start best = None - for oneSynth in allSynths: # best achievable, rightmost green - if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == mod): - if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq): + for oneSynth in allSynths: # best achievable, rightmost green + if ( + (oneSynth.width == w) + & (oneSynth.tech == tech) + & (oneSynth.module == mod) + ): + if (oneSynth.delay < m) & ( + 1000 / oneSynth.delay > oneSynth.freq + ): m = oneSynth.delay best = oneSynth if (best != None) & (best not in bestSynths): bestSynths += [best] - + file = open(filename, "w") writer = csv.writer(file) - writer.writerow(['Module', 'Tech', 'Width', 'Target Freq', 'Delay', 'Area', 'L Power (nW)', 'D energy (nJ)']) + writer.writerow( + [ + "Module", + "Tech", + "Width", + "Target Freq", + "Delay", + "Area", + "L Power (nW)", + "D energy (nJ)", + ] + ) for synth in bestSynths: writer.writerow(list(synth)) file.close() return bestSynths - + + def genLegend(fits, coefs, r2=None, spec=None, ale=False): - ''' generates a list of two legend elements (or just an equation if no r2 or spec) - labels line with fit equation and dots with r squared of the fit - ''' + """generates a list of two legend elements (or just an equation if no r2 or spec) + labels line with fit equation and dots with r squared of the fit + """ coefsr = [str(sigfig(c, 2)) for c in coefs] if ale: - if (normAddWidth == 32): - sub = 'S' + if normAddWidth == 32: + sub = "S" elif normAddWidth != 1: - print('Equations are wrong, check normAddWidth') + print("Equations are wrong, check normAddWidth") else: - sub = 'N' + sub = "N" - eqDict = {'c': '', 'l': sub, 's': '$'+sub+'^2$', 'g': '$log_2$('+sub+')', 'n': ''+sub+'$log_2$('+sub+')'} - eq = '' - ind = 0 + eqDict = { + "c": "", + "l": sub, + "s": "$" + sub + "^2$", + "g": "$log_2$(" + sub + ")", + "n": "" + sub + "$log_2$(" + sub + ")", + } + eq = "" + ind = 0 for k in eqDict.keys(): if k in fits: - if str(coefsr[ind]) != '0': eq += " + " + coefsr[ind] + eqDict[k] + if str(coefsr[ind]) != "0": + eq += " + " + coefsr[ind] + eqDict[k] ind += 1 - eq = eq[3:] # chop off leading ' + ' + eq = eq[3:] # chop off leading ' + ' - if (r2==None) or (spec==None): + if (r2 == None) or (spec == None): return eq else: legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq)] - legend_elements += [lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label='$R^2$='+ str(round(r2, 4)))] + legend_elements += [ + lines.Line2D( + [0], + [0], + color=spec.color, + ls="", + marker=spec.shape, + label="$R^2$=" + str(round(r2, 4)), + ) + ] return legend_elements -def oneMetricPlot(module, widths, var, freq=None, ax=None, fits='clsgn', norm=True, color=None): - ''' module: string module name - freq: int freq (MHz) - var: string delay, area, lpower, or denergy - fits: constant, linear, square, log2, Nlog2 - plots given variable vs width for all matching syntheses with regression - ''' + +def oneMetricPlot( + module, widths, var, freq=None, ax=None, fits="clsgn", norm=True, color=None +): + """module: string module name + freq: int freq (MHz) + var: string delay, area, lpower, or denergy + fits: constant, linear, square, log2, Nlog2 + plots given variable vs width for all matching syntheses with regression + """ singlePlot = True if ax or (freq == 10): singlePlot = False @@ -202,24 +274,27 @@ def oneMetricPlot(module, widths, var, freq=None, ax=None, fits='clsgn', norm=Tr allWidths = [] allMetrics = [] - ale = (var != 'delay') # if not delay, must be area, leakage, or energy + ale = var != "delay" # if not delay, must be area, leakage, or energy modFit = fitDict[module] fits = modFit[ale] if freq: - ls = '--' + ls = "--" else: - ls = '-' + ls = "-" for spec in techSpecs: + # print(f"Searching for module of spec {spec} and module {module} and var {var}") metric = getVals(spec.tech, module, var, freq=freq) - + # print(f"Found metric : {metric}") if norm: techdict = spec._asdict() norm = techdict[var] - metric = [m/norm for m in metric] + metric = [m / norm for m in metric] - if len(metric) == 5: # don't include the spec if we don't have points for all widths + if len(widths) == len(metric): + # don't include the spec if we don't have points for all widths + # print(f"Width \neq Metric") xp, pred, coefs, r2 = regress(widths, metric, fits, ale) fullLeg += genLegend(fits, coefs, r2, spec, ale=ale) c = color if color else spec.color @@ -228,44 +303,78 @@ def oneMetricPlot(module, widths, var, freq=None, ax=None, fits='clsgn', norm=Tr allWidths += widths allMetrics += metric - xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits) - ax.plot(xp, pred, color='red', linestyle=ls) + # print(f"Widths passed into regress : {allWidths}") + # Not sure why this works (jes) - if allWidths doesn't have data widths does + if len(allWidths) > 0: + xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits) + ax.plot(xp, pred, color="orange", linestyle=ls) + else: + xp, pred, coefs, r2 = regress(widths, metric, fits) + ax.plot(xp, pred, color="orange", linestyle=ls) if norm: - ylabeldic = {"lpower": "Leakage Power (add32)", "denergy": "Energy/Op (add32)", "area": "Area (add32)", "delay": "Delay (FO4)"} + ylabeldic = { + "lpower": "Leakage Power (add32)", + "denergy": "Energy/Op (add32)", + "area": "Area (add32)", + "delay": "Delay (FO4)", + } else: - ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (nJ)", "area": "Area (sq microns)", "delay": "Delay (ns)"} + ylabeldic = { + "lpower": "Leakage Power (nW)", + "denergy": "EDP (fJ/GHz)", + "area": "Area (sq microns)", + "delay": "Delay (ns)", + } ax.set_ylabel(ylabeldic[var]) ax.set_xticks(widths) - if singlePlot or (var == 'lpower') or (var == 'denergy'): + if singlePlot or (var == "lpower") or (var == "denergy"): ax.set_xlabel("Width (bits)") - if not singlePlot and ((var == 'delay') or (var == 'area')): - ax.tick_params(labelbottom=False) + if not singlePlot and ((var == "delay") or (var == "area")): + ax.tick_params(labelbottom=False) if singlePlot: fullLeg += genLegend(fits, coefs, r2, combined, ale=ale) - legLoc = 'upper left' if ale else 'center right' + legLoc = "upper left" if ale else "center right" ax.add_artist(ax.legend(handles=fullLeg, loc=legLoc)) - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" + titleStr = ( + " (target " + str(freq) + "MHz)" + if freq != None + else " (best achievable delay)" + ) ax.set_title(module + titleStr) - plt.savefig('.plots/'+ module + '_' + var + '.png') + plt.savefig(".plots/" + module + "_" + var + ".png") # plt.show() return r2 -def regress(widths, var, fits='clsgn', ale=False): - ''' fits a curve to the given points - returns lists of x and y values to plot that curve and coefs for the eq with r2 - ''' +def regress(widths, var, fits="clsgn", ale=False): + """fits a curve to the given points + returns lists of x and y values to plot that curve and coefs for the eq with r2 + """ + if len(var) != len(widths): + print( + f"There are not enough variables to match widths. Widths : {widths} Variables Found : {var}, padding to match may affect correctness (doing it anyways)\n" + ) + if len(widths) > len(var): + while len(widths) > len(var): + var.append(0.0) + if len(var) > len(widths): + while len(var) > len(widths): + widths.append(0) + + # widths = [8, 16, 32, 64, 128] + # print(f"Regress var : {var}") + # print(f"Regress widths : {widths}") funcArr = genFuncs(fits) - xp = np.linspace(min(widths)/2, max(widths)*1.1, 200) + xp = np.linspace(min(widths) / 2, max(widths) * 1.1, 200) xpToCalc = xp if ale: - widths = [w/normAddWidth for w in widths] - xpToCalc = [x/normAddWidth for x in xp] + widths = [w / normAddWidth for w in widths] + xpToCalc = [x / normAddWidth for x in xp] mat = [] for w in widths: @@ -273,8 +382,9 @@ def regress(widths, var, fits='clsgn', ale=False): for func in funcArr: row += [func(w)] mat += [row] - - y = np.array(var, dtype=np.float) + + # var = [0, 1, 2, 3, 4] + y = np.array(var, dtype=np.float64) coefs = opt.nnls(mat, y)[0] yp = [] @@ -290,19 +400,22 @@ def regress(widths, var, fits='clsgn', ale=False): return xp, pred, coefs, r2 + def makeCoefTable(): - ''' writes CSV with each line containing the coefficients for a regression fit - to a particular combination of module, metric (including both techs, normalized) - ''' + """writes CSV with each line containing the coefficients for a regression fit + to a particular combination of module, metric (including both techs, normalized) + """ file = open("ppaFitting.csv", "w") writer = csv.writer(file) - writer.writerow(['Module', 'Metric', 'Target', '1', 'N', 'N^2', 'log2(N)', 'Nlog2(N)', 'R^2']) + writer.writerow( + ["Module", "Metric", "Target", "1", "N", "N^2", "log2(N)", "Nlog2(N)", "R^2"] + ) for module in modules: for freq in [10, None]: - target = 'easy' if freq else 'hard' - for var in ['delay', 'area', 'lpower', 'denergy']: - ale = (var != 'delay') + target = "easy" if freq else "hard" + for var in ["delay", "area", "lpower", "denergy"]: + ale = var != "delay" metL = [] modFit = fitDict[module] fits = modFit[ale] @@ -311,12 +424,12 @@ def makeCoefTable(): metric = getVals(spec.tech, module, var, freq=freq) techdict = spec._asdict() norm = techdict[var] - metL += [m/norm for m in metric] + metL += [m / norm for m in metric] - xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale) + xp, pred, coefs, r2 = regress(widths * 2, metL, fits, ale) coefs = np.ndarray.tolist(coefs) - coefsToWrite = [None]*5 - fitTerms = 'clsgn' + coefsToWrite = [None] * 5 + fitTerms = "clsgn" ind = 0 for i in range(len(fitTerms)): if fitTerms[i] in fits: @@ -327,25 +440,38 @@ def makeCoefTable(): file.close() + def sigfig(num, figs): - return '{:g}'.format(float('{:.{p}g}'.format(num, p=figs))) + return "{:g}".format(float("{:.{p}g}".format(num, p=figs))) + def makeEqTable(): - ''' writes CSV with each line containing the equations for fits for each metric - to a particular module (including both techs, normalized) - ''' + """writes CSV with each line containing the equations for fits for each metric + to a particular module (including both techs, normalized) + """ file = open("ppaEquations.csv", "w") writer = csv.writer(file) - writer.writerow(['Element', 'Best delay', 'Fast area', 'Fast leakage', 'Fast energy', 'Small area', 'Small leakage', 'Small energy']) + writer.writerow( + [ + "Element", + "Best delay", + "Fast area", + "Fast leakage", + "Fast energy", + "Small area", + "Small leakage", + "Small energy", + ] + ) for module in modules: eqs = [] for freq in [None, 10]: - for var in ['delay', 'area', 'lpower', 'denergy']: - if (var == 'delay') and (freq == 10): + for var in ["delay", "area", "lpower", "denergy"]: + if (var == "delay") and (freq == 10): pass else: - ale = (var != 'delay') + ale = var != "delay" metL = [] modFit = fitDict[module] fits = modFit[ale] @@ -354,9 +480,9 @@ def makeEqTable(): metric = getVals(spec.tech, module, var, freq=freq) techdict = spec._asdict() norm = techdict[var] - metL += [m/norm for m in metric] + metL += [m / norm for m in metric] - xp, pred, coefs, r2 = regress(widths*2, metL, fits, ale) + xp, pred, coefs, r2 = regress(widths * 2, metL, fits, ale) coefs = np.ndarray.tolist(coefs) eqs += [genLegend(fits, coefs, ale=ale)] row = [module] + eqs @@ -364,93 +490,113 @@ def makeEqTable(): file.close() -def genFuncs(fits='clsgn'): - ''' helper function for regress() - returns array of functions with one for each term desired in the regression fit - ''' + +def genFuncs(fits="clsgn"): + """helper function for regress() + returns array of functions with one for each term desired in the regression fit + """ funcArr = [] - if 'c' in fits: + if "c" in fits: funcArr += [lambda x: 1] - if 'l' in fits: + if "l" in fits: funcArr += [lambda x: x] - if 's' in fits: + if "s" in fits: funcArr += [lambda x: x**2] - if 'g' in fits: + if "g" in fits: funcArr += [lambda x: np.log2(x)] - if 'n' in fits: - funcArr += [lambda x: x*np.log2(x)] + if "n" in fits: + funcArr += [lambda x: x * np.log2(x)] return funcArr + def noOutliers(median, freqs, delays, areas): - ''' returns a pared down list of freqs, delays, and areas - cuts out any syntheses in which target freq isn't within 75% of the min delay target to focus on interesting area - helper function to freqPlot() - ''' - f=[] - d=[] - a=[] + """returns a pared down list of freqs, delays, and areas + cuts out any syntheses in which target freq isn't within 75% of the min delay target to focus on interesting area + helper function to freqPlot() + """ + f = [] + d = [] + a = [] for i in range(len(freqs)): - norm = freqs[i]/median - if (norm > 0.4) & (norm<1.4): + norm = freqs[i] / median + if (norm > 0.4) & (norm < 1.4): f += [freqs[i]] d += [delays[i]] a += [areas[i]] - + return f, d, a + def freqPlot(tech, mod, width): - ''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width - ''' + """plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width""" freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: - if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech): - ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period + if ( + (mod == oneSynth.module) + & (width == oneSynth.width) + & (tech == oneSynth.tech) + ): + ind = ( + 1000 / oneSynth.delay < oneSynth.freq + ) # when delay is within target clock period freqsL[ind] += [oneSynth.freq] delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] median = np.median(list(flatten(freqsL))) - + f, (ax1, ax2) = plt.subplots(2, 1, sharex=True) for ax in (ax1, ax2): - ax.ticklabel_format(useOffset=False, style='plain') + ax.ticklabel_format(useOffset=False, style="plain") - for ind in [0,1]: + for ind in [0, 1]: areas = areasL[ind] delays = delaysL[ind] freqs = freqsL[ind] - freqs, delays, areas = noOutliers(median, freqs, delays, areas) # comment out to see all syntheses + freqs, delays, areas = noOutliers( + median, freqs, delays, areas + ) # comment out to see all syntheses - c = 'blue' if ind else 'green' + c = "blue" if ind else "green" ax1.scatter(freqs, delays, color=c) ax2.scatter(freqs, areas, color=c) - legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'), - lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')] + legend_elements = [ + lines.Line2D( + [0], [0], color="green", ls="", marker="o", label="timing achieved" + ), + lines.Line2D([0], [0], color="blue", ls="", marker="o", label="slack violated"), + ] ax1.legend(handles=legend_elements) width = str(width) - + ax2.set_xlabel("Target Freq (MHz)") - ax1.set_ylabel('Delay (ns)') - ax2.set_ylabel('Area (sq microns)') - ax1.set_title(mod + '_' + width) - if ('mux' in mod) & ('d' in mod): + ax1.set_ylabel("Delay (ns)") + ax2.set_ylabel("Area (sq microns)") + ax1.set_title(mod + "_" + width) + if ("mux" in mod) & ("d" in mod): width = mod - mod = 'muxd' - plt.savefig('./plots/freqBuckshot/' + tech + '/' + mod + '/' + width + '.png') + mod = "muxd" + plt.savefig("./plots/freqBuckshot/" + tech + "/" + mod + "/" + width + ".png") # plt.show() + def squareAreaDelay(tech, mod, width): - ''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width - ''' + """plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width""" global allSynths freqsL, delaysL, areasL = ([[], []] for i in range(3)) for oneSynth in allSynths: - if (mod == oneSynth.module) & (width == oneSynth.width) & (tech == oneSynth.tech): - ind = (1000/oneSynth.delay < oneSynth.freq) # when delay is within target clock period + if ( + (mod == oneSynth.module) + & (width == oneSynth.width) + & (tech == oneSynth.tech) + ): + ind = ( + 1000 / oneSynth.delay < oneSynth.freq + ) # when delay is within target clock period freqsL[ind] += [oneSynth.freq] delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] @@ -458,182 +604,212 @@ def squareAreaDelay(tech, mod, width): f, (ax1) = plt.subplots(1, 1) ax2 = ax1.twinx() - for ind in [0,1]: + for ind in [0, 1]: areas = areasL[ind] delays = delaysL[ind] targets = freqsL[ind] - targets = [1000/f for f in targets] - - targets, delays, areas = noOutliers(targets, delays, areas) # comment out to see all - + targets = [1000 / f for f in targets] + + targets, delays, areas = noOutliers( + targets, delays, areas + ) # comment out to see all + if not ind: achievedDelays = delays - c = 'blue' if ind else 'green' - ax1.scatter(targets, delays, marker='^', color=c) - ax2.scatter(targets, areas, marker='s', color=c) - - bestAchieved = min(achievedDelays) - - legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='^', label='delay (timing achieved)'), - lines.Line2D([0], [0], color='green', ls='', marker='s', label='area (timing achieved)'), - lines.Line2D([0], [0], color='blue', ls='', marker='^', label='delay (timing violated)'), - lines.Line2D([0], [0], color='blue', ls='', marker='s', label='area (timing violated)')] + c = "blue" if ind else "green" + ax1.scatter(targets, delays, marker="^", color=c) + ax2.scatter(targets, areas, marker="s", color=c) + + bestAchieved = min(achievedDelays) + + legend_elements = [ + lines.Line2D( + [0], [0], color="green", ls="", marker="^", label="delay (timing achieved)" + ), + lines.Line2D( + [0], [0], color="green", ls="", marker="s", label="area (timing achieved)" + ), + lines.Line2D( + [0], [0], color="blue", ls="", marker="^", label="delay (timing violated)" + ), + lines.Line2D( + [0], [0], color="blue", ls="", marker="s", label="area (timing violated)" + ), + ] + + ax2.legend(handles=legend_elements, loc="upper left") - ax2.legend(handles=legend_elements, loc='upper left') - ax1.set_xlabel("Delay Targeted (ns)") ax1.set_ylabel("Delay Achieved (ns)") - ax2.set_ylabel('Area (sq microns)') - ax1.set_title(mod + '_' + str(width)) + ax2.set_ylabel("Area (sq microns)") + ax1.set_title(mod + "_" + str(width)) squarify(f) xvals = np.array(ax1.get_xlim()) - frac = (min(flatten(delaysL))-xvals[0])/(xvals[1]-xvals[0]) - areaLowerLim = min(flatten(areasL))-100 - areaUpperLim = max(flatten(areasL))/frac + areaLowerLim + frac = (min(flatten(delaysL)) - xvals[0]) / (xvals[1] - xvals[0]) + areaLowerLim = min(flatten(areasL)) - 100 + areaUpperLim = max(flatten(areasL)) / frac + areaLowerLim ax2.set_ylim([areaLowerLim, areaUpperLim]) ax1.plot(xvals, xvals, ls="--", c=".3") - ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls='--') + ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls="--") - plt.savefig('./plots/squareareadelay_' + mod + '_' + str(width) + '.png') + plt.savefig("./plots/squareareadelay_" + mod + "_" + str(width) + ".png") # plt.show() + def squarify(fig): - ''' helper function for squareAreaDelay() - forces matplotlib figure to be a square - ''' + """helper function for squareAreaDelay() + forces matplotlib figure to be a square + """ w, h = fig.get_size_inches() if w > h: t = fig.subplotpars.top b = fig.subplotpars.bottom - axs = h*(t-b) - l = (1.-axs/w)/2 - fig.subplots_adjust(left=l, right=1-l) + axs = h * (t - b) + l = (1.0 - axs / w) / 2 + fig.subplots_adjust(left=l, right=1 - l) else: t = fig.subplotpars.right b = fig.subplotpars.left - axs = w*(t-b) - l = (1.-axs/h)/2 - fig.subplots_adjust(bottom=l, top=1-l) + axs = w * (t - b) + l = (1.0 - axs / h) / 2 + fig.subplots_adjust(bottom=l, top=1 - l) -def plotPPA(mod, widths, freq=None, norm=True, aleOpt=False): - ''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits - if no freq specified, uses the synthesis with best achievable delay for each width - overlays data from both techs - ''' - with mpl.rc_context({"figure.figsize": (7,3.46)}): + +def plotPPA(mod, freq=None, norm=True, aleOpt=False): + """for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits + if no freq specified, uses the synthesis with best achievable delay for each width + overlays data from both techs + """ + with mpl.rc_context({"figure.figsize": (7, 3.46)}): fig, axs = plt.subplots(2, 2) - arr = [['delay', 'area'], ['lpower', 'denergy']] + arr = [["delay", "area"], ["lpower", "denergy"]] freqs = [freq] - if aleOpt: freqs += [10] + if aleOpt: + freqs += [10] for i in [0, 1]: for j in [0, 1]: leg = [] for f in freqs: - if (arr[i][j]=='delay') and (f==10): + if (arr[i][j] == "delay") and (f == 10): pass else: - r2 = oneMetricPlot(mod, widths, arr[i][j], ax=axs[i, j], freq=f, norm=norm) - ls = '--' if f else '-' - leg += [lines.Line2D([0], [0], color='red', label='$R^2$='+str(round(r2, 4)), linestyle=ls)] + # print(f"Pasing in widths {widths}") + r2 = oneMetricPlot( + mod, widths, arr[i][j], ax=axs[i, j], freq=f, norm=norm + ) + ls = "--" if f else "-" + leg += [ + lines.Line2D( + [0], + [0], + color="orange", + label="$R^2$=" + str(round(r2, 4)), + linestyle=ls, + ) + ] - if (mod in ['flop', 'csa']) & (arr[i][j] == 'delay'): + if (mod in ["flop", "csa"]) & (arr[i][j] == "delay"): axs[i, j].set_ylim(ymin=0) ytop = axs[i, j].get_ylim()[1] - axs[i, j].set_ylim(ymax=1.1*ytop) + axs[i, j].set_ylim(ymax=1.1 * ytop) else: axs[i, j].legend(handles=leg, handlelength=1.5) - - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else "" - plt.suptitle(mod + titleStr) - plt.tight_layout(pad=0.05, w_pad=1, h_pad=0.5, rect=(0,0,1,0.97)) - if freq != 10: - n = 'normalized' if norm else 'unnormalized' - saveStr = './plots/'+ n + '/' + mod + '.png' + titleStr = " (target " + str(freq) + "MHz)" if freq != None else "" + plt.suptitle(mod + titleStr) + plt.tight_layout(pad=0.05, w_pad=1, h_pad=0.5, rect=(0, 0, 1, 0.97)) + + if freq != 10: + n = "normalized" if norm else "unnormalized" + saveStr = "./plots/" + n + "/" + mod + "_" + ".png" + print(f"Saving to {saveStr}") plt.savefig(saveStr) # plt.show() + def makeLineLegend(): - ''' generates legend to accompany normalized plots - ''' - plt.rcParams["figure.figsize"] = (5.5,0.3) + """generates legend to accompany normalized plots""" + plt.rcParams["figure.figsize"] = (5.5, 0.3) fig = plt.figure() - fullLeg = [lines.Line2D([0], [0], color='black', label='fastest', linestyle='-')] - fullLeg += [lines.Line2D([0], [0], color='black', label='smallest', linestyle='--')] - fullLeg += [lines.Line2D([0], [0], color='blue', label='tsmc28', marker='^')] - fullLeg += [lines.Line2D([0], [0], color='blue', label='tsmc28psyn', marker='x')] - fullLeg += [lines.Line2D([0], [0], color='green', label='sky90', marker='o')] - fullLeg += [lines.Line2D([0], [0], color='green', label='sky130', marker='+')] - fullLeg += [lines.Line2D([0], [0], color='red', label='combined', marker='_')] - fig.legend(handles=fullLeg, ncol=5, handlelength=1.4, loc='center') - saveStr = './plots/legend.png' + fullLeg = [lines.Line2D([0], [0], color="black", label="fastest", linestyle="-")] + fullLeg += [lines.Line2D([0], [0], color="black", label="smallest", linestyle="--")] + fullLeg += [lines.Line2D([0], [0], color="blue", label="tsmc28", marker="^")] + fullLeg += [lines.Line2D([0], [0], color="blue", label="tsmc28psyn", marker="x")] + fullLeg += [lines.Line2D([0], [0], color="green", label="sky90", marker="o")] + fullLeg += [lines.Line2D([0], [0], color="purple", label="sky130", marker="+")] + fullLeg += [lines.Line2D([0], [0], color="orange", label="combined", marker="_")] + fig.legend(handles=fullLeg, ncol=5, handlelength=1.4, loc="center") + saveStr = "./plots/legend.png" plt.savefig(saveStr) -def muxPlot(fits='clsgn', norm=True): - ''' module: string module name - freq: int freq (MHz) - var: string delay, area, lpower, or denergy - fits: constant, linear, square, log2, Nlog2 - plots given variable vs width for all matching syntheses with regression - ''' + +def muxPlot(fits="clsgn", norm=True): + """module: string module name + freq: int freq (MHz) + var: string delay, area, lpower, or denergy + fits: constant, linear, square, log2, Nlog2 + plots given variable vs width for all matching syntheses with regression + """ ax = plt.gca() inputs = [2, 4, 8] - allInputs = inputs*2 + allInputs = inputs * 2 fullLeg = [] - for crit in ['data', 'control']: + for crit in ["data", "control"]: allMetrics = [] - muxes = ['mux2', 'mux4', 'mux8'] + muxes = ["mux2", "mux4", "mux8"] - if crit == 'data': - ls = '--' - muxes = [m + 'd' for m in muxes] - elif crit == 'control': - ls = '-' + if crit == "data": + ls = "--" + muxes = [m + "d" for m in muxes] + elif crit == "control": + ls = "-" for spec in techSpecs: metric = [] for module in muxes: - metric += getVals(spec.tech, module, 'delay', width=[1]) - + metric += getVals(spec.tech, module, "delay", width=[1]) + if norm: techdict = spec._asdict() - norm = techdict['delay'] - metric = [m/norm for m in metric] + norm = techdict["delay"] + metric = [m / norm for m in metric] # print(spec.tech, ' ', metric) - if len(metric) == 3: # don't include the spec if we don't have points for all + if ( + len(metric) == 3 + ): # don't include the spec if we don't have points for all xp, pred, coefs, r2 = regress(inputs, metric, fits, ale=False) ax.scatter(inputs, metric, color=spec.color, marker=spec.shape) ax.plot(xp, pred, color=spec.color, linestyle=ls) allMetrics += metric xp, pred, coefs, r2 = regress(allInputs, allMetrics, fits) - ax.plot(xp, pred, color='red', linestyle=ls) - fullLeg += [lines.Line2D([0], [0], color='red', label=crit, linestyle=ls)] - - ax.set_ylabel('Delay (FO4)') + ax.plot(xp, pred, color="orange", linestyle=ls) + fullLeg += [lines.Line2D([0], [0], color="orange", label=crit, linestyle=ls)] + + ax.set_ylabel("Delay (FO4)") ax.set_xticks(inputs) ax.set_xlabel("Number of inputs") - ax.set_title('mux timing') - - ax.legend(handles = fullLeg) - plt.savefig('./plots/mux.png') + ax.set_title("mux timing") + + ax.legend(handles=fullLeg) + plt.savefig("./plots/mux.png") + def stdDevError(): - ''' calculates std deviation and error for paper-writing purposes - ''' - for var in ['delay', 'area', 'lpower', 'denergy']: + """calculates std deviation and error for paper-writing purposes""" + for var in ["delay", "area", "lpower", "denergy"]: errlist = [] for module in modules: - ale = (var != 'delay') + ale = var != "delay" metL = [] modFit = fitDict[module] fits = modFit[ale] @@ -643,20 +819,20 @@ def stdDevError(): metric = getVals(spec.tech, module, var) techdict = spec._asdict() norm = techdict[var] - metL += [m/norm for m in metric] + metL += [m / norm for m in metric] if ale: - ws = [w/normAddWidth for w in widths] + ws = [w / normAddWidth for w in widths] else: ws = widths - ws = ws*2 + ws = ws * 2 mat = [] for w in ws: row = [] for func in funcArr: row += [func(w)] mat += [row] - + y = np.array(metL, dtype=np.float) coefs = opt.nnls(mat, y)[0] @@ -665,68 +841,84 @@ def stdDevError(): n = [func(w) for func in funcArr] yp += [sum(np.multiply(coefs, n))] - if (var == 'delay') & (module == 'flop'): + if (var == "delay") & (module == "flop"): pass - elif (module == 'mult') & ale: + elif (module == "mult") & ale: pass else: for i in range(len(y)): - errlist += [abs(y[i]/yp[i]-1)] + errlist += [abs(y[i] / yp[i] - 1)] # print(module, ' ', var, ' ', np.mean(errlist[-10:])) - + avgErr = np.mean(errlist) stdv = np.std(errlist) - print(var, ' ', avgErr, ' ', stdv) + print(var, " ", avgErr, " ", stdv) + def makePlotDirectory(): - ''' creates plots directory in same level as this script to store plots in - ''' + """creates plots directory in same level as this script to store plots in""" current_directory = os.getcwd() - final_directory = os.path.join(current_directory, 'plots') + final_directory = os.path.join(current_directory, "plots") if not os.path.exists(final_directory): os.makedirs(final_directory) os.chdir(final_directory) - for folder in ['freqBuckshot', 'normalized', 'unnormalized']: + for folder in ["freqBuckshot", "normalized", "unnormalized"]: new_directory = os.path.join(final_directory, folder) if not os.path.exists(new_directory): os.makedirs(new_directory) os.chdir(new_directory) - if 'freq' in folder: - for tech in ['sky90', 'sky130', 'tsmc28', 'tsmc28psyn']: + if "freq" in folder: + for tech in ["sky90", "sky130", "tsmc28", "tsmc28psyn"]: for mod in modules: tech_directory = os.path.join(new_directory, tech) mod_directory = os.path.join(tech_directory, mod) if not os.path.exists(mod_directory): os.makedirs(mod_directory) - os.chdir('..') - + os.chdir("..") + os.chdir(current_directory) - -if __name__ == '__main__': + + +if __name__ == "__main__": ############################## # set up stuff, global variables - widths = [64, 128] - modules = ['adder', 'comparator'] + widths = [8, 16, 32, 64, 128] + modules = ["adder", "comparator"] - normAddWidth = 32 # divisor to use with N since normalizing to add_32 + normAddWidth = 32 # divisor to use with N since normalizing to add_32 - fitDict = {'adder': ['cg', 'l', 'l'], 'mul': ['cg', 's', 's'], 'comparator': ['cg', 'l', 'l'], 'csa': ['c', 'l', 'l'], 'shifter': ['cg', 'l', 'ln'], 'flop': ['c', 'l', 'l'], 'binencoder': ['cg', 'l', 'l']} - fitDict.update(dict.fromkeys(['mux2', 'mux4', 'mux8'], ['cg', 'l', 'l'])) + fitDict = { + "adder": ["cg", "l", "l"], + "mul": ["cg", "s", "s"], + "comparator": ["cg", "l", "l"], + "csa": ["c", "l", "l"], + "shifter": ["cg", "l", "ln"], + "flop": ["c", "l", "l"], + "binencoder": ["cg", "l", "l"], + } + fitDict.update(dict.fromkeys(["mux2", "mux4", "mux8"], ["cg", "l", "l"])) - TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") - techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['sky130', 'red', 'o', 43.2e-3, 1440.600027, 714.057, 0.658022690438], ['tsmc28', 'blue', '^', 12.2e-3, 209.286002, 1060.0, .08153281695882594], ['tsmc28psyn', 'blue', '^', 12.2e-3, 209.286002, 1060.0, .08153281695882594]] - techSpecs = [TechSpec(*t) for t in techSpecs] - combined = TechSpec('combined fit', 'red', '_', 0, 0, 0, 0) + TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") + # FO4 delay information information + techSpecs = [ + # ["sky90", "green", "o", 43.2e-3, 1440.600027, 714.057, 0.658022690438], + # Area/Lpower/Denergy needs to be corrected here (jes) + ["sky130", "orange", "o", 99.5e-3, 1440.600027, 714.057, 0.658022690438], + # ["tsmc28", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], + # ["tsmc28psyn", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], + ] + techSpecs = [TechSpec(*t) for t in techSpecs] + combined = TechSpec("combined fit", "orange", "_", 0, 0, 0, 0) ############################## # cleanup() # run to remove garbage synth runs - synthsintocsv() # slow, run only when new synth runs to add to csv - - allSynths = synthsfromcsv('ppaData.csv') # your csv here! - bestSynths = csvOfBest('bestSynths.csv') - makePlotDirectory() + synthsintocsv() # slow, run only when new synth runs to add to csv + + allSynths = synthsfromcsv("ppaData.csv") # your csv here! + bestSynths = csvOfBest("bestSynths.csv") + makePlotDirectory() # ### other functions # makeCoefTable() @@ -734,12 +926,12 @@ if __name__ == '__main__': # muxPlot() # stdDevError() - for mod in modules: - for w in widths: - #freqPlot('sky90', mod, w) - freqPlot('sky130', mod, w) - #freqPlot('tsmc28', mod, w) - #freqPlot('tsmc28psyn', mod, w) - #plotPPA(mod, widths, norm=False) - #plotPPA(mod, aleOpt=True) - plt.close('all') + for mod in modules: + for w in widths: + # freqPlot('sky90', mod, w) + # freqPlot("sky130", mod, w) + # freqPlot('tsmc28', mod, w) + # freqPlot('tsmc28psyn', mod, w) + plotPPA(mod, norm=False) + # plotPPA(mod, aleOpt=True) + plt.close("all") From 9dce08a743060ceae695a544f9e7b038041a33e5 Mon Sep 17 00:00:00 2001 From: "James E. Stine" Date: Tue, 14 Nov 2023 02:41:44 -0600 Subject: [PATCH 47/48] minor typo on ppaSynth and ppaAnalyze --- synthDC/ppa/ppaAnalyze.py | 31 ++++++++++++++----------------- synthDC/ppa/ppaSynth.py | 2 +- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/synthDC/ppa/ppaAnalyze.py b/synthDC/ppa/ppaAnalyze.py index 9af15fd80..bd98e79be 100755 --- a/synthDC/ppa/ppaAnalyze.py +++ b/synthDC/ppa/ppaAnalyze.py @@ -82,11 +82,9 @@ def synthsintocsv(): delay = 1000 / int(freq) - metrics[0] area = metrics[1] lpower = metrics[4] - # switching, internal power in mW and leakage in nW - tpower = metrics[2] + metrics[3] + metrics[4]*0.000001 - # EDP (fJ/GHz) + tpower = (metrics[2] + metrics[3] + metrics[4]*.000001) denergy = ( - (metrics[2] + metrics[3] + metrics[4]*0.000001) / int(freq) + (tpower) / int(freq) * 1000 ) # (switching + internal powers)*delay, more practical units for regression coefs if "flop" in module: # since two flops in each module @@ -304,7 +302,6 @@ def oneMetricPlot( allMetrics += metric # print(f"Widths passed into regress : {allWidths}") - # Not sure why this works (jes) - if allWidths doesn't have data widths does if len(allWidths) > 0: xp, pred, coefs, r2 = regress(allWidths, allMetrics, fits) ax.plot(xp, pred, color="orange", linestyle=ls) @@ -322,7 +319,7 @@ def oneMetricPlot( else: ylabeldic = { "lpower": "Leakage Power (nW)", - "denergy": "EDP (fJ/GHz)", + "denergy": "Dynamic Energy (nJ)", "area": "Area (sq microns)", "delay": "Delay (ns)", } @@ -355,9 +352,9 @@ def regress(widths, var, fits="clsgn", ale=False): returns lists of x and y values to plot that curve and coefs for the eq with r2 """ if len(var) != len(widths): - print( - f"There are not enough variables to match widths. Widths : {widths} Variables Found : {var}, padding to match may affect correctness (doing it anyways)\n" - ) + # print( + # f"There are not enough variables to match widths. Widths : {widths} Variables Found : {var}, padding to match may affect correctness (doing it anyways)\n" + # ) if len(widths) > len(var): while len(widths) > len(var): var.append(0.0) @@ -792,8 +789,8 @@ def muxPlot(fits="clsgn", norm=True): allMetrics += metric xp, pred, coefs, r2 = regress(allInputs, allMetrics, fits) - ax.plot(xp, pred, color="orange", linestyle=ls) - fullLeg += [lines.Line2D([0], [0], color="orange", label=crit, linestyle=ls)] + ax.plot(xp, pred, color="red", linestyle=ls) + fullLeg += [lines.Line2D([0], [0], color="red", label=crit, linestyle=ls)] ax.set_ylabel("Delay (FO4)") ax.set_xticks(inputs) @@ -885,7 +882,7 @@ if __name__ == "__main__": ############################## # set up stuff, global variables widths = [8, 16, 32, 64, 128] - modules = ["adder", "comparator"] + modules = ["adder"] normAddWidth = 32 # divisor to use with N since normalizing to add_32 @@ -903,14 +900,14 @@ if __name__ == "__main__": TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") # FO4 delay information information techSpecs = [ - # ["sky90", "green", "o", 43.2e-3, 1440.600027, 714.057, 0.658022690438], + #["sky90", "green", "o", 43.2e-3, 1440.600027, 714.057, 0.658022690438], # Area/Lpower/Denergy needs to be corrected here (jes) ["sky130", "orange", "o", 99.5e-3, 1440.600027, 714.057, 0.658022690438], # ["tsmc28", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], # ["tsmc28psyn", "blue", "^", 12.2e-3, 209.286002, 1060.0, 0.08153281695882594], ] techSpecs = [TechSpec(*t) for t in techSpecs] - combined = TechSpec("combined fit", "orange", "_", 0, 0, 0, 0) + combined = TechSpec("combined fit", "red", "_", 0, 0, 0, 0) ############################## # cleanup() # run to remove garbage synth runs @@ -928,10 +925,10 @@ if __name__ == "__main__": for mod in modules: for w in widths: - # freqPlot('sky90', mod, w) - # freqPlot("sky130", mod, w) + #freqPlot('sky90', mod, w) + freqPlot("sky130", mod, w) # freqPlot('tsmc28', mod, w) # freqPlot('tsmc28psyn', mod, w) plotPPA(mod, norm=False) - # plotPPA(mod, aleOpt=True) + plotPPA(mod, aleOpt=True) plt.close("all") diff --git a/synthDC/ppa/ppaSynth.py b/synthDC/ppa/ppaSynth.py index 07a342e26..30fe1254f 100755 --- a/synthDC/ppa/ppaSynth.py +++ b/synthDC/ppa/ppaSynth.py @@ -84,7 +84,7 @@ if __name__ == '__main__': synthsToRun = freqSweep(module, width, tech) ##### Run a sweep for multiple modules/widths based on best delay found in existing syntheses - modules = ['adder', "comparator"] + modules = ['adder'] widths = [8, 16, 32, 64, 128] tech = 'sky130' synthsToRun = freqModuleSweep(widths, modules, tech) From fdb75203cb8292ad07858e83c029c559bf4f44a2 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 14 Nov 2023 10:55:22 -0600 Subject: [PATCH 48/48] Added cbop to to rv32gc. --- config/rv32gc/config.vh | 2 +- sim/lint-wally | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index 69d3329f5..3b306a005 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -45,7 +45,7 @@ localparam ZFH_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; -localparam ZICBOP_SUPPORTED = 0; +localparam ZICBOP_SUPPORTED = 1; localparam ZICCLSM_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVNAPOT_SUPPORTED = 0; diff --git a/sim/lint-wally b/sim/lint-wally index 263fb864f..eb6ad62b0 100755 --- a/sim/lint-wally +++ b/sim/lint-wally @@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/ verilator=`which verilator` basepath=$(dirname $0)/.. -for config in fpga rv32e rv64gc rv32gc rv32imc rv32i rv64i rv64fpquad; do +for config in rv32e rv64gc rv32gc rv32imc rv32i rv64i rv64fpquad; do #for config in rv64gc; do echo "$config linting..." if !($verilator --no-timing --lint-only "$@" --top-module wallywrapper "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/cvw.sv $basepath/testbench/wallywrapper.sv $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes ); then