From 1fd678b43348fd447a7701c4d8fec968593f9ec6 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 14 Feb 2024 12:14:19 -0600 Subject: [PATCH 01/52] Optimized the align logic for loads. --- src/lsu/align.sv | 6 +- src/lsu/endianswapdouble.sv | 114 ++++++++++++++++++++ src/lsu/lsu.sv | 29 ++++-- src/lsu/subwordreaddouble.sv | 196 +++++++++++++++++++++++++++++++++++ 4 files changed, 331 insertions(+), 14 deletions(-) create mode 100644 src/lsu/endianswapdouble.sv create mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv index d516dad2a..7c3703886 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -53,7 +53,7 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline output logic SelStoreDelay, //*** this is bad. really don't like moving this outside - output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [P.LLEN*2-1:0] ReadDataWordSpillAllM, output logic SpillStallM); localparam LLENINBYTES = P.LLEN/8; @@ -67,8 +67,6 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic MisalignedM; - logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; - logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; logic [P.XLEN-1:0] IEUAdrIncrementM; @@ -148,8 +146,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; - assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs diff --git a/src/lsu/endianswapdouble.sv b/src/lsu/endianswapdouble.sv new file mode 100644 index 000000000..133149f0e --- /dev/null +++ b/src/lsu/endianswapdouble.sv @@ -0,0 +1,114 @@ +/////////////////////////////////////////// +// endianswap.sv +// +// Written: David_Harris@hmc.edu +// Created: 7 May 2022 +// Modified: 18 January 2023 +// +// Purpose: Swap byte order for Big-Endian accesses +// +// Documentation: RISC-V System on Chip Design Chapter 5 (Figure 5.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module endianswapdouble #(parameter LEN) ( + input logic BigEndianM, + input logic [LEN-1:0] a, + output logic [LEN-1:0] y +); + + if(LEN == 256) begin + always_comb + if (BigEndianM) begin // swap endianness + y[255:248] = a[7:0]; + y[247:240] = a[15:8]; + y[239:232] = a[23:16]; + y[231:224] = a[31:24]; + y[223:216] = a[39:32]; + y[215:208] = a[47:40]; + y[207:200] = a[55:48]; + y[199:192] = a[63:56]; + y[191:184] = a[71:64]; + y[183:176] = a[79:72]; + y[175:168] = a[87:80]; + y[167:160] = a[95:88]; + y[159:152] = a[103:96]; + y[151:144] = a[111:104]; + y[143:136] = a[119:112]; + y[135:128] = a[127:120]; + y[127:120] = a[135:128]; + y[119:112] = a[142:136]; + y[111:104] = a[152:144]; + y[103:96] = a[159:152]; + y[95:88] = a[167:160]; + y[87:80] = a[175:168]; + y[79:72] = a[183:176]; + y[71:64] = a[191:184]; + y[63:56] = a[199:192]; + y[55:48] = a[207:200]; + y[47:40] = a[215:208]; + y[39:32] = a[223:216]; + y[31:24] = a[231:224]; + y[23:16] = a[239:232]; + y[15:8] = a[247:240]; + y[7:0] = a[255:248]; + end else y = a; + end else if(LEN == 128) begin + always_comb + if (BigEndianM) begin // swap endianness + y[127:120] = a[7:0]; + y[119:112] = a[15:8]; + y[111:104] = a[23:16]; + y[103:96] = a[31:24]; + y[95:88] = a[39:32]; + y[87:80] = a[47:40]; + y[79:72] = a[55:48]; + y[71:64] = a[63:56]; + y[63:56] = a[71:64]; + y[55:48] = a[79:72]; + y[47:40] = a[87:80]; + y[39:32] = a[95:88]; + y[31:24] = a[103:96]; + y[23:16] = a[111:104]; + y[15:8] = a[119:112]; + y[7:0] = a[127:120]; + end else y = a; + end else if(LEN == 64) begin + always_comb + if (BigEndianM) begin // swap endianness + y[63:56] = a[7:0]; + y[55:48] = a[15:8]; + y[47:40] = a[23:16]; + y[39:32] = a[31:24]; + y[31:24] = a[39:32]; + y[23:16] = a[47:40]; + y[15:8] = a[55:48]; + y[7:0] = a[63:56]; + end else y = a; + end else begin + always_comb + if (BigEndianM) begin + y[31:24] = a[7:0]; + y[23:16] = a[15:8]; + y[15:8] = a[23:16]; + y[7:0] = a[31:24]; + end else y = a; + end +endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index f53bb9296..591353ac7 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -128,9 +128,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ - logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data - logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data - logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data + logic [MLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data + logic [MLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection logic [P.LLEN-1:0] ReadDataM; // Final read data @@ -155,6 +154,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic SelDTIM; // Select DTIM rather than bus or D$ logic [P.XLEN-1:0] WriteDataZM; logic LSULoadPageFaultM, LSUStoreAmoPageFaultM; + logic [MLEN-1:0] ReadDataWordSpillAllM; ///////////////////////////////////////////////////////////////////////////////////////////// // Pipeline for IEUAdr E to M @@ -168,7 +168,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM, + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -176,7 +176,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtE = {2'b00, IEUAdrE}; assign SelSpillE = '0; - assign DCacheReadDataWordSpillM = DCacheReadDataWordM; + assign ReadDataWordSpillAllM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; @@ -298,6 +298,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( localparam AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^ localparam LINELEN = P.DCACHE_LINELENINBITS; // Number of bits in cacheline localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) + localparam MLENPOVERAHBW = MLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) localparam CACHEWORDLEN = P.ZICCLSM_SUPPORTED ? 2*P.LLEN : P.LLEN; // Width of the cache's input and output data buses. Misaligned doubles width for fast access logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline @@ -361,9 +362,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. // pma should generate exception for LLEN read to periph. +/* -----\/----- EXCLUDED -----\/----- mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordSpillM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), .d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); + -----/\----- EXCLUDED -----/\----- */ + mux3 #(MLEN) UnCachedDataMux(.d0(ReadDataWordSpillAllM), .d1({MLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), + .d2({{(MLEN-P.XLEN){1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), + .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface logic [1:0] BusRW; // Non-DTIM memory access, ignore cacheableM logic [P.XLEN-1:0] FetchBuffer; @@ -416,9 +422,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// - - subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + + if(MISALIGN_SUPPORT) begin + subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + end else begin + subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + end subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks @@ -438,7 +449,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if (P.BIGENDIAN_SUPPORTED) begin:endian endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswap #(P.LLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv new file mode 100644 index 000000000..936240cf7 --- /dev/null +++ b/src/lsu/subwordreaddouble.sv @@ -0,0 +1,196 @@ +/////////////////////////////////////////// +// subwordread.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Extract subwords and sign extend for reads +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordreaddouble #(parameter LLEN) + ( + input logic [LLEN*2-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM +); + + logic [7:0] ByteM; + logic [15:0] HalfwordM; + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(Funct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase + + if (LLEN == 128) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + logic [63:0] QdWordM; + always_comb + case(PAdrSwap) + 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; + 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; + 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; + 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; + 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; + 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; + 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; + 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; + 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; + 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; + 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; + 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; + 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; + 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; + 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; + 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; + 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; + 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; + 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; + 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; + 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; + 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; + 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; + 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; + 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; + 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; + 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; + 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; + 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; + 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; + 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; + 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; + endcase + + assign ByteM = QdWordM[7:0]; + assign HalfwordM = QdWordM[15:0]; + assign WordM = QdWordM[31:0]; + assign DblWordM = QdWordM[63:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; + 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; + 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; + 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; + 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; + 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; + 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; + 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + assign ByteM = DblWordM[7:0]; + assign HalfwordM = DblWordM[15:0]; + assign WordM = DblWordM[31:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + + logic [31:0] WordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase + + assign ByteM = WordM[7:0]; + assign HalfwordM = WordM[15:0]; + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen + endcase + end +endmodule From dac8fc16af30dcd1182c9f7f4d69383dfde042fe Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 19 Feb 2024 12:26:29 -0600 Subject: [PATCH 02/52] Partially working optimized subwordwrite for misaligned. --- src/lsu/lsu.sv | 15 +++-- src/lsu/subwordwritedouble.sv | 117 ++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 src/lsu/subwordwritedouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 591353ac7..e10183a9e 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -136,8 +136,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IHWriteDataM; // IEU or HPTW write data logic [P.XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data logic [P.LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data - logic [P.LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data - logic [P.LLEN-1:0] LSUWriteDataM; // Final write data + logic [MLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data + logic [MLEN-1:0] LSUWriteDataM; // Final write data logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write logic [1:0] MemRWSpillM; @@ -167,7 +167,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, - .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; @@ -337,7 +337,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .CacheRW(SelStoreDelay ? 2'b00 : CacheRWM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), - .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, + .CacheWriteData(LSUWriteDataM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), @@ -351,7 +351,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), - .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), + .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM[P.LLEN-1:0]), .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusAtomic, .BusCMOZero, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), @@ -426,11 +426,12 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); end - subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks swbytemask #(P.LLEN, P.ZICCLSM_SUPPORTED) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM), .ByteMaskExtended(ByteMaskExtendedM)); @@ -448,7 +449,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if (P.BIGENDIAN_SUPPORTED) begin:endian - endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswapdouble #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv new file mode 100644 index 000000000..728a4f4aa --- /dev/null +++ b/src/lsu/subwordwritedouble.sv @@ -0,0 +1,117 @@ +/////////////////////////////////////////// +// subwordwrite.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Masking and muxing for subword writes +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordwritedouble #(parameter LLEN) ( + input logic [2:0] LSUFunct3M, + input logic [2:0] PAdrM, + input logic FpLoadStoreM, + input logic BigEndianM, + input logic [LLEN-1:0] IMAFWriteDataM, + output logic [LLEN*2-1:0] LittleEndianWriteDataM +); + + // *** RT: This is logic is duplicated in subwordreaddouble. Merge the two. + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(LSUFunct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase // case (LSUFunct3M & {FpLoadStoreM, 2'b11}) + + // *** RT: End duplicated logic + + logic [LLEN*2-1:0] IMAFWriteData2M; + assign IMAFWriteData2M = {IMAFWriteDataM, IMAFWriteDataM}; + localparam OffsetIndex = $clog2(LLEN/8); + logic [LLEN*2-1:0] LittleEndianWriteDataMTemp; + // *** RT: Switch to something like this. + assign LittleEndianWriteDataMTemp = (IMAFWriteData2M << PAdrSwap[OffsetIndex-1:0]) | (IMAFWriteData2M >> ~PAdrSwap[OffsetIndex-1:0]); + + + // Replicate data for subword writes + if (LLEN == 128) begin:sww + always_comb + case(PAdrSwap[3:0]) + 4'b0000: LittleEndianWriteDataM = {128'b0, IMAFWriteDataM }; + 4'b0001: LittleEndianWriteDataM = {120'b0, IMAFWriteDataM, 8'b0 }; + 4'b0010: LittleEndianWriteDataM = {112'b0, IMAFWriteDataM, 16'b0}; + 4'b0011: LittleEndianWriteDataM = {104'b0, IMAFWriteDataM, 24'b0}; + 4'b0100: LittleEndianWriteDataM = {96'b0, IMAFWriteDataM, 32'b0}; + 4'b0101: LittleEndianWriteDataM = {88'b0, IMAFWriteDataM, 40'b0}; + 4'b0110: LittleEndianWriteDataM = {80'b0, IMAFWriteDataM, 48'b0}; + 4'b0111: LittleEndianWriteDataM = {72'b0, IMAFWriteDataM, 56'b0}; + 4'b1000: LittleEndianWriteDataM = {64'b0, IMAFWriteDataM, 64'b0}; + 4'b1001: LittleEndianWriteDataM = {56'b0, IMAFWriteDataM, 72'b0 }; + 4'b1010: LittleEndianWriteDataM = {48'b0, IMAFWriteDataM, 80'b0}; + 4'b1011: LittleEndianWriteDataM = {40'b0, IMAFWriteDataM, 88'b0}; + 4'b1100: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM, 96'b0}; + 4'b1101: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 104'b0}; + 4'b1110: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 112'b0}; + 4'b1111: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 120'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // sq + endcase + end else if (LLEN == 64) begin:sww + always_comb + case(PAdrSwap[2:0]) + 3'b000: LittleEndianWriteDataM = {IMAFWriteDataM, IMAFWriteDataM}; + 3'b001: LittleEndianWriteDataM = {IMAFWriteDataM[55:0], IMAFWriteDataM, IMAFWriteDataM[63:56]}; + 3'b010: LittleEndianWriteDataM = {IMAFWriteDataM[47:0], IMAFWriteDataM, IMAFWriteDataM[63:48]}; + 3'b011: LittleEndianWriteDataM = {IMAFWriteDataM[39:0], IMAFWriteDataM, IMAFWriteDataM[63:40]}; + 3'b100: LittleEndianWriteDataM = {IMAFWriteDataM[31:0], IMAFWriteDataM, IMAFWriteDataM[63:32]}; + 3'b101: LittleEndianWriteDataM = {IMAFWriteDataM[23:0], IMAFWriteDataM, IMAFWriteDataM[63:24]}; + 3'b110: LittleEndianWriteDataM = {IMAFWriteDataM[15:0], IMAFWriteDataM, IMAFWriteDataM[63:16]}; + 3'b111: LittleEndianWriteDataM = {IMAFWriteDataM[7:0], IMAFWriteDataM, IMAFWriteDataM[63:8] }; + endcase + end else begin:sww // 32-bit + always_comb + case(PAdrSwap[1:0]) + 2'b00: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM }; + 2'b01: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 8'b0 }; + 2'b10: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 16'b0}; + 2'b11: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 24'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen + endcase + end +endmodule From 6a9c2d8dc43a1f997cf16969a2901d1e91fd4756 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 20 Feb 2024 20:23:42 -0600 Subject: [PATCH 03/52] Closer to getting subword write misaligned working. --- src/lsu/lsu.sv | 2 +- src/lsu/subwordwritedouble.sv | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index e10183a9e..12ab9930e 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -426,7 +426,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .CacheableM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv index 728a4f4aa..599d71984 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritedouble.sv @@ -33,6 +33,7 @@ module subwordwritedouble #(parameter LLEN) ( input logic [2:0] PAdrM, input logic FpLoadStoreM, input logic BigEndianM, + input logic CacheableM, input logic [LLEN-1:0] IMAFWriteDataM, output logic [LLEN*2-1:0] LittleEndianWriteDataM ); @@ -43,7 +44,13 @@ module subwordwritedouble #(parameter LLEN) ( logic [4:0] LengthM; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + // cacheable, BigEndian + // 10: PAdrM[2:0] + // 11: BigEndianPAdr + // 00: 00000 + // 01: 00111 + mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {CacheableM, BigEndianM}, PAdrSwap); + //assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; /* verilator lint_off WIDTHEXPAND */ /* verilator lint_off WIDTHTRUNC */ assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; From 3714b2bf4adb815704c718a3cec921e563462d31 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 09:14:43 -0600 Subject: [PATCH 04/52] Non-ideal fix. Added new output from pma which indicates if the write shift should occur. The more ideal solution would be to have the pma indicate if the shift should occur and the maximum amount.. --- src/ifu/ifu.sv | 2 +- src/lsu/lsu.sv | 5 +++-- src/lsu/subwordwritedouble.sv | 6 +++--- src/mmu/mmu.sv | 3 ++- src/mmu/pmachecker.sv | 7 ++++--- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 4848b5ebb..bb23f4fd3 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -185,7 +185,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .TLBFlush, .PhysicalAddress(PCPF), .TLBMiss(ITLBMissF), - .Cacheable(CacheableF), .Idempotent(), .SelTIM(SelIROM), + .Cacheable(CacheableF), .Idempotent(), .AllowShift(), .SelTIM(SelIROM), .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 12ab9930e..053d2bbb0 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -119,6 +119,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic SelSpillE; // Align logic detected a spill and needs to stall logic CacheableM; // PMA indicates memory address is cacheable + logic AllowShiftM; // PMA: indicates if WriteData should be byte shifted before going to cache or bus by offset. logic BusCommittedM; // Bus memory operation in flight, delay interrupts logic DCacheCommittedM; // D$ memory operation started, delay interrupts @@ -244,7 +245,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_ADUE, .PrivilegeModeW, .DisableTranslation, .VAdr(IHAdrM), .Size(LSUFunct3M[1:0]), .PTE, .PageTypeWriteVal(PageType), .TLBWrite(DTLBWriteM), .TLBFlush(sfencevmaM), - .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .SelTIM(SelDTIM), + .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .AllowShift(AllowShiftM), .SelTIM(SelDTIM), .InstrAccessFaultF(), .LoadAccessFaultM(LSULoadAccessFaultM), .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM(LSULoadPageFaultM), .StoreAmoPageFaultM(LSUStoreAmoPageFaultM), @@ -426,7 +427,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .CacheableM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv index 599d71984..eb62aa106 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritedouble.sv @@ -33,7 +33,7 @@ module subwordwritedouble #(parameter LLEN) ( input logic [2:0] PAdrM, input logic FpLoadStoreM, input logic BigEndianM, - input logic CacheableM, + input logic AllowShiftM, input logic [LLEN-1:0] IMAFWriteDataM, output logic [LLEN*2-1:0] LittleEndianWriteDataM ); @@ -48,8 +48,8 @@ module subwordwritedouble #(parameter LLEN) ( // 10: PAdrM[2:0] // 11: BigEndianPAdr // 00: 00000 - // 01: 00111 - mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {CacheableM, BigEndianM}, PAdrSwap); + // 01: 11111 + mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {AllowShiftM, BigEndianM}, PAdrSwap); //assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; /* verilator lint_off WIDTHEXPAND */ /* verilator lint_off WIDTHTRUNC */ diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index e842016a2..80a1ca7da 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -49,6 +49,7 @@ module mmu import cvw::*; #(parameter cvw_t P, output logic TLBMiss, // Miss TLB output logic Cacheable, // PMA indicates memory address is cachable output logic Idempotent, // PMA indicates memory address is idempotent + output logic AllowShift, // PMA indicates if WriteData should be byte shifted before going to cache or bus by offset output logic SelTIM, // Select a tightly integrated memory // Faults output logic InstrAccessFaultF, LoadAccessFaultM, StoreAmoAccessFaultM, // access fault sources @@ -112,7 +113,7 @@ module mmu import cvw::*; #(parameter cvw_t P, pmachecker #(P) pmachecker(.PhysicalAddress, .Size, .CMOpM, .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .PBMemoryType, - .Cacheable, .Idempotent, .SelTIM, + .Cacheable, .Idempotent, .AllowShift, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); if (P.PMP_ENTRIES > 0) begin : pmp diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 84e41ba65..60296213d 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -1,4 +1,4 @@ -/////////////////////////////////////////// +////////////////////////////////////////// // pmachecker.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 20 April 2021 @@ -38,7 +38,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( input logic WriteAccessM, // Write access input logic ReadAccessM, // Read access input logic [1:0] PBMemoryType, // PBMT field of PTE during TLB hit, or 00 otherwise - output logic Cacheable, Idempotent, SelTIM, + output logic Cacheable, Idempotent, AllowShift, SelTIM, output logic PMAInstrAccessFaultF, output logic PMALoadAccessFaultM, output logic PMAStoreAmoAccessFaultM @@ -60,7 +60,8 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[3] | SelRegions[4] | SelRegions[5]; // exclusion-tag: unused-cachable - assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; + assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; + assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6]; // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent From 1ece6f8eaeaa6bdccda4e9e8b05697b5cad4986f Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 09:34:16 -0600 Subject: [PATCH 05/52] Swapped to the more compact subwordreadmisaligned.sv. --- ...eaddouble.sv => subworddreadmisaligned.sv} | 108 +++++------------- 1 file changed, 26 insertions(+), 82 deletions(-) rename src/lsu/{subwordreaddouble.sv => subworddreadmisaligned.sv} (53%) diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subworddreadmisaligned.sv similarity index 53% rename from src/lsu/subwordreaddouble.sv rename to src/lsu/subworddreadmisaligned.sv index 936240cf7..cc1c13787 100644 --- a/src/lsu/subwordreaddouble.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -28,7 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module subwordreaddouble #(parameter LLEN) +module subwordreadmisaligned #(parameter LLEN) ( input logic [LLEN*2-1:0] ReadDataWordMuxM, input logic [2:0] PAdrM, @@ -63,50 +63,19 @@ module subwordreaddouble #(parameter LLEN) default: LengthM = 5'd8; endcase + logic [LLEN*2-1:0] ReadDataAlignedM; + assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); + if (LLEN == 128) begin:swrmux logic [31:0] WordM; logic [63:0] DblWordM; - logic [63:0] QdWordM; - always_comb - case(PAdrSwap) - 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; - 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; - 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; - 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; - 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; - 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; - 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; - 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; - 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; - 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; - 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; - 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; - 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; - 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; - 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; - 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; - 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; - 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; - 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; - 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; - 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; - 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; - 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; - 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; - 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; - 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; - 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; - 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; - 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; - 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; - 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; - 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; - endcase - - assign ByteM = QdWordM[7:0]; - assign HalfwordM = QdWordM[15:0]; - assign WordM = QdWordM[31:0]; - assign DblWordM = QdWordM[63:0]; + logic [127:0] QdWordM; + + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; + assign DblWordM = ReadDataAlignedM[63:0]; + assign QdWordM =ReadDataAlignedM[127:0]; // sign extension/ NaN boxing always_comb @@ -116,7 +85,7 @@ module subwordreaddouble #(parameter LLEN) 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b100: ReadDataM = FpLoadStoreM ? QdWordM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen @@ -125,29 +94,11 @@ module subwordreaddouble #(parameter LLEN) end else if (LLEN == 64) begin:swrmux logic [31:0] WordM; logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; - 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; - 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; - 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; - 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; - 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; - 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; - 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - assign ByteM = DblWordM[7:0]; - assign HalfwordM = DblWordM[15:0]; - assign WordM = DblWordM[31:0]; + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; + assign DblWordM = ReadDataAlignedM[63:0]; // sign extension/ NaN boxing always_comb @@ -165,32 +116,25 @@ module subwordreaddouble #(parameter LLEN) end else begin:swrmux // 32-bit - logic [31:0] WordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase + logic [31:0] WordM; - assign ByteM = WordM[7:0]; - assign HalfwordM = WordM[15:0]; + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; // sign extension always_comb case(Funct3M) 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + + 3'b011: ReadDataM = WordM[LLEN-1:0]; // fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen + + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen endcase end endmodule From 7e1ea1e6d9472fdfd188823fc81ee455abbab460 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 09:37:16 -0600 Subject: [PATCH 06/52] Beginning subword cleanup. --- src/lsu/lsu.sv | 2 +- src/lsu/subworddreadmisaligned.sv | 21 ++++++--------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 053d2bbb0..3e1974521 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -425,7 +425,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index cc1c13787..fe96844f3 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -40,10 +40,11 @@ module subwordreadmisaligned #(parameter LLEN) logic [7:0] ByteM; logic [15:0] HalfwordM; + logic [31:0] WordM; logic [4:0] PAdrSwap; logic [4:0] BigEndianPAdr; logic [4:0] LengthM; - + // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; @@ -66,14 +67,14 @@ module subwordreadmisaligned #(parameter LLEN) logic [LLEN*2-1:0] ReadDataAlignedM; assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; + if (LLEN == 128) begin:swrmux - logic [31:0] WordM; logic [63:0] DblWordM; logic [127:0] QdWordM; - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; assign DblWordM = ReadDataAlignedM[63:0]; assign QdWordM =ReadDataAlignedM[127:0]; @@ -92,12 +93,8 @@ module subwordreadmisaligned #(parameter LLEN) endcase end else if (LLEN == 64) begin:swrmux - logic [31:0] WordM; logic [63:0] DblWordM; - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; assign DblWordM = ReadDataAlignedM[63:0]; // sign extension/ NaN boxing @@ -116,12 +113,6 @@ module subwordreadmisaligned #(parameter LLEN) end else begin:swrmux // 32-bit - logic [31:0] WordM; - - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; - // sign extension always_comb case(Funct3M) From 69d31d50e27199f105706172b5e2427f96689d3f Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 13:29:39 -0600 Subject: [PATCH 07/52] Updated subword misaligned. --- src/lsu/lsu.sv | 2 +- src/lsu/subworddreadmisaligned.sv | 12 ++++++++++-- ...bwordwritedouble.sv => subwordwritemisaligned.sv} | 6 +++--- 3 files changed, 14 insertions(+), 6 deletions(-) rename src/lsu/{subwordwritedouble.sv => subwordwritemisaligned.sv} (97%) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 3e1974521..567dbdb79 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -427,7 +427,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index fe96844f3..1e179dbc3 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -38,6 +38,7 @@ module subwordreadmisaligned #(parameter LLEN) output logic [LLEN-1:0] ReadDataM ); + logic [LLEN*2-1:0] ReadDataAlignedM; logic [7:0] ByteM; logic [15:0] HalfwordM; logic [31:0] WordM; @@ -64,13 +65,20 @@ module subwordreadmisaligned #(parameter LLEN) default: LengthM = 5'd8; endcase - logic [LLEN*2-1:0] ReadDataAlignedM; assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); assign ByteM = ReadDataAlignedM[7:0]; assign HalfwordM = ReadDataAlignedM[15:0]; assign WordM = ReadDataAlignedM[31:0]; + logic [LLEN-1:0] lb, lh_flh, lw_flw, ld_fld, lbu, lbu_flq, lhu, lwu; + + assign lb = {{LLEN-8{ByteM[7]}}, ByteM}; + assign lh_flh = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]};; + assign lw_flw = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; + //assign ld_fld = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; + + if (LLEN == 128) begin:swrmux logic [63:0] DblWordM; logic [127:0] QdWordM; @@ -120,7 +128,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = WordM[LLEN-1:0]; // fld + //3'b011: ReadDataM = WordM[LLEN-1:0]; // fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritemisaligned.sv similarity index 97% rename from src/lsu/subwordwritedouble.sv rename to src/lsu/subwordwritemisaligned.sv index eb62aa106..dd82ffa19 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritemisaligned.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// subwordwrite.sv +// subwordwritemisaligned.sv // // Written: David_Harris@hmc.edu // Created: 9 January 2021 @@ -28,7 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module subwordwritedouble #(parameter LLEN) ( +module subwordwritemisaligned #(parameter LLEN) ( input logic [2:0] LSUFunct3M, input logic [2:0] PAdrM, input logic FpLoadStoreM, @@ -38,7 +38,7 @@ module subwordwritedouble #(parameter LLEN) ( output logic [LLEN*2-1:0] LittleEndianWriteDataM ); - // *** RT: This is logic is duplicated in subwordreaddouble. Merge the two. + // *** RT: This is logic is duplicated in subwordreadmisaligned. Merge the two. logic [4:0] PAdrSwap; logic [4:0] BigEndianPAdr; logic [4:0] LengthM; From 45c30267a50771fb0c5acb756ff3988d05f54f4a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 14:08:04 -0600 Subject: [PATCH 08/52] Cleanup. --- src/lsu/subworddreadmisaligned.sv | 72 +++++-------------------------- 1 file changed, 10 insertions(+), 62 deletions(-) diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 1e179dbc3..66ca0375e 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -71,69 +71,17 @@ module subwordreadmisaligned #(parameter LLEN) assign HalfwordM = ReadDataAlignedM[15:0]; assign WordM = ReadDataAlignedM[31:0]; - logic [LLEN-1:0] lb, lh_flh, lw_flw, ld_fld, lbu, lbu_flq, lhu, lwu; - - assign lb = {{LLEN-8{ByteM[7]}}, ByteM}; - assign lh_flh = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]};; - assign lw_flw = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; - //assign ld_fld = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; - - - if (LLEN == 128) begin:swrmux - logic [63:0] DblWordM; - logic [127:0] QdWordM; - - assign DblWordM = ReadDataAlignedM[63:0]; - assign QdWordM =ReadDataAlignedM[127:0]; - - // sign extension/ NaN boxing - always_comb + always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b100: ReadDataM = FpLoadStoreM ? QdWordM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: if(LLEN == 128 || LLEN == 64 ) ReadDataM = {{LLEN-64{ReadDataAlignedM[63]|FpLoadStoreM}}, ReadDataAlignedM[63:0]}; // ld/fld + 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + else if(LLEN == 64) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen endcase - end else if (LLEN == 64) begin:swrmux - logic [63:0] DblWordM; - - assign DblWordM = ReadDataAlignedM[63:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - - //3'b011: ReadDataM = WordM[LLEN-1:0]; // fld - - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - end endmodule From fbc18abaa0b9bd24d7febfad69840f9d717f76df Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 14:17:15 -0600 Subject: [PATCH 09/52] Siginficant cleanup of subwordwritemisaligned. --- src/lsu/subwordwritemisaligned.sv | 50 ++----------------------------- 1 file changed, 2 insertions(+), 48 deletions(-) diff --git a/src/lsu/subwordwritemisaligned.sv b/src/lsu/subwordwritemisaligned.sv index dd82ffa19..22f462d4a 100644 --- a/src/lsu/subwordwritemisaligned.sv +++ b/src/lsu/subwordwritemisaligned.sv @@ -72,53 +72,7 @@ module subwordwritemisaligned #(parameter LLEN) ( logic [LLEN*2-1:0] IMAFWriteData2M; assign IMAFWriteData2M = {IMAFWriteDataM, IMAFWriteDataM}; localparam OffsetIndex = $clog2(LLEN/8); - logic [LLEN*2-1:0] LittleEndianWriteDataMTemp; - // *** RT: Switch to something like this. - assign LittleEndianWriteDataMTemp = (IMAFWriteData2M << PAdrSwap[OffsetIndex-1:0]) | (IMAFWriteData2M >> ~PAdrSwap[OffsetIndex-1:0]); - - // Replicate data for subword writes - if (LLEN == 128) begin:sww - always_comb - case(PAdrSwap[3:0]) - 4'b0000: LittleEndianWriteDataM = {128'b0, IMAFWriteDataM }; - 4'b0001: LittleEndianWriteDataM = {120'b0, IMAFWriteDataM, 8'b0 }; - 4'b0010: LittleEndianWriteDataM = {112'b0, IMAFWriteDataM, 16'b0}; - 4'b0011: LittleEndianWriteDataM = {104'b0, IMAFWriteDataM, 24'b0}; - 4'b0100: LittleEndianWriteDataM = {96'b0, IMAFWriteDataM, 32'b0}; - 4'b0101: LittleEndianWriteDataM = {88'b0, IMAFWriteDataM, 40'b0}; - 4'b0110: LittleEndianWriteDataM = {80'b0, IMAFWriteDataM, 48'b0}; - 4'b0111: LittleEndianWriteDataM = {72'b0, IMAFWriteDataM, 56'b0}; - 4'b1000: LittleEndianWriteDataM = {64'b0, IMAFWriteDataM, 64'b0}; - 4'b1001: LittleEndianWriteDataM = {56'b0, IMAFWriteDataM, 72'b0 }; - 4'b1010: LittleEndianWriteDataM = {48'b0, IMAFWriteDataM, 80'b0}; - 4'b1011: LittleEndianWriteDataM = {40'b0, IMAFWriteDataM, 88'b0}; - 4'b1100: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM, 96'b0}; - 4'b1101: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 104'b0}; - 4'b1110: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 112'b0}; - 4'b1111: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 120'b0}; - default: LittleEndianWriteDataM = IMAFWriteDataM; // sq - endcase - end else if (LLEN == 64) begin:sww - always_comb - case(PAdrSwap[2:0]) - 3'b000: LittleEndianWriteDataM = {IMAFWriteDataM, IMAFWriteDataM}; - 3'b001: LittleEndianWriteDataM = {IMAFWriteDataM[55:0], IMAFWriteDataM, IMAFWriteDataM[63:56]}; - 3'b010: LittleEndianWriteDataM = {IMAFWriteDataM[47:0], IMAFWriteDataM, IMAFWriteDataM[63:48]}; - 3'b011: LittleEndianWriteDataM = {IMAFWriteDataM[39:0], IMAFWriteDataM, IMAFWriteDataM[63:40]}; - 3'b100: LittleEndianWriteDataM = {IMAFWriteDataM[31:0], IMAFWriteDataM, IMAFWriteDataM[63:32]}; - 3'b101: LittleEndianWriteDataM = {IMAFWriteDataM[23:0], IMAFWriteDataM, IMAFWriteDataM[63:24]}; - 3'b110: LittleEndianWriteDataM = {IMAFWriteDataM[15:0], IMAFWriteDataM, IMAFWriteDataM[63:16]}; - 3'b111: LittleEndianWriteDataM = {IMAFWriteDataM[7:0], IMAFWriteDataM, IMAFWriteDataM[63:8] }; - endcase - end else begin:sww // 32-bit - always_comb - case(PAdrSwap[1:0]) - 2'b00: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM }; - 2'b01: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 8'b0 }; - 2'b10: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 16'b0}; - 2'b11: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 24'b0}; - default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen - endcase - end + assign LittleEndianWriteDataM = (IMAFWriteData2M << (PAdrSwap[OffsetIndex-1:0] * 8)) | (IMAFWriteData2M >> (LLEN - (PAdrSwap[OffsetIndex-1:0] * 8))); + endmodule From a4028831150b8d1206aa69cc15eeda60bd19f21c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 09:41:59 -0600 Subject: [PATCH 10/52] Simplifications of subword code. --- src/lsu/endianswap.sv | 38 ++++++++++++++++++++++++++++++- src/lsu/lsu.sv | 4 ++-- src/lsu/subworddreadmisaligned.sv | 2 +- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index 7c042886a..3c552b371 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -34,7 +34,43 @@ module endianswap #(parameter LEN) ( output logic [LEN-1:0] y ); - if(LEN == 128) begin + if(LEN == 256) begin + always_comb + if (BigEndianM) begin // swap endianness + y[255:248] = a[7:0]; + y[247:240] = a[15:8]; + y[239:232] = a[23:16]; + y[231:224] = a[31:24]; + y[223:216] = a[39:32]; + y[215:208] = a[47:40]; + y[207:200] = a[55:48]; + y[199:192] = a[63:56]; + y[191:184] = a[71:64]; + y[183:176] = a[79:72]; + y[175:168] = a[87:80]; + y[167:160] = a[95:88]; + y[159:152] = a[103:96]; + y[151:144] = a[111:104]; + y[143:136] = a[119:112]; + y[135:128] = a[127:120]; + y[127:120] = a[135:128]; + y[119:112] = a[142:136]; + y[111:104] = a[152:144]; + y[103:96] = a[159:152]; + y[95:88] = a[167:160]; + y[87:80] = a[175:168]; + y[79:72] = a[183:176]; + y[71:64] = a[191:184]; + y[63:56] = a[199:192]; + y[55:48] = a[207:200]; + y[47:40] = a[215:208]; + y[39:32] = a[223:216]; + y[31:24] = a[231:224]; + y[23:16] = a[239:232]; + y[15:8] = a[247:240]; + y[7:0] = a[255:248]; + end else y = a; + end else if(LEN == 128) begin always_comb if (BigEndianM) begin // swap endianness y[127:120] = a[7:0]; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 567dbdb79..896af0b46 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -450,8 +450,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if (P.BIGENDIAN_SUPPORTED) begin:endian - endianswapdouble #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + endianswap #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswap #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 66ca0375e..2868a54d8 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -77,7 +77,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw 3'b011: if(LLEN == 128 || LLEN == 64 ) ReadDataM = {{LLEN-64{ReadDataAlignedM[63]|FpLoadStoreM}}, ReadDataAlignedM[63:0]}; // ld/fld - 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq else if(LLEN == 64) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu From caac48b7f28e33ada9d4a7d0f017878635473811 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 09:42:39 -0600 Subject: [PATCH 11/52] Removed duplicate endianswap. --- src/lsu/endianswapdouble.sv | 114 ------------------------------------ 1 file changed, 114 deletions(-) delete mode 100644 src/lsu/endianswapdouble.sv diff --git a/src/lsu/endianswapdouble.sv b/src/lsu/endianswapdouble.sv deleted file mode 100644 index 133149f0e..000000000 --- a/src/lsu/endianswapdouble.sv +++ /dev/null @@ -1,114 +0,0 @@ -/////////////////////////////////////////// -// endianswap.sv -// -// Written: David_Harris@hmc.edu -// Created: 7 May 2022 -// Modified: 18 January 2023 -// -// Purpose: Swap byte order for Big-Endian accesses -// -// Documentation: RISC-V System on Chip Design Chapter 5 (Figure 5.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module endianswapdouble #(parameter LEN) ( - input logic BigEndianM, - input logic [LEN-1:0] a, - output logic [LEN-1:0] y -); - - if(LEN == 256) begin - always_comb - if (BigEndianM) begin // swap endianness - y[255:248] = a[7:0]; - y[247:240] = a[15:8]; - y[239:232] = a[23:16]; - y[231:224] = a[31:24]; - y[223:216] = a[39:32]; - y[215:208] = a[47:40]; - y[207:200] = a[55:48]; - y[199:192] = a[63:56]; - y[191:184] = a[71:64]; - y[183:176] = a[79:72]; - y[175:168] = a[87:80]; - y[167:160] = a[95:88]; - y[159:152] = a[103:96]; - y[151:144] = a[111:104]; - y[143:136] = a[119:112]; - y[135:128] = a[127:120]; - y[127:120] = a[135:128]; - y[119:112] = a[142:136]; - y[111:104] = a[152:144]; - y[103:96] = a[159:152]; - y[95:88] = a[167:160]; - y[87:80] = a[175:168]; - y[79:72] = a[183:176]; - y[71:64] = a[191:184]; - y[63:56] = a[199:192]; - y[55:48] = a[207:200]; - y[47:40] = a[215:208]; - y[39:32] = a[223:216]; - y[31:24] = a[231:224]; - y[23:16] = a[239:232]; - y[15:8] = a[247:240]; - y[7:0] = a[255:248]; - end else y = a; - end else if(LEN == 128) begin - always_comb - if (BigEndianM) begin // swap endianness - y[127:120] = a[7:0]; - y[119:112] = a[15:8]; - y[111:104] = a[23:16]; - y[103:96] = a[31:24]; - y[95:88] = a[39:32]; - y[87:80] = a[47:40]; - y[79:72] = a[55:48]; - y[71:64] = a[63:56]; - y[63:56] = a[71:64]; - y[55:48] = a[79:72]; - y[47:40] = a[87:80]; - y[39:32] = a[95:88]; - y[31:24] = a[103:96]; - y[23:16] = a[111:104]; - y[15:8] = a[119:112]; - y[7:0] = a[127:120]; - end else y = a; - end else if(LEN == 64) begin - always_comb - if (BigEndianM) begin // swap endianness - y[63:56] = a[7:0]; - y[55:48] = a[15:8]; - y[47:40] = a[23:16]; - y[39:32] = a[31:24]; - y[31:24] = a[39:32]; - y[23:16] = a[47:40]; - y[15:8] = a[55:48]; - y[7:0] = a[63:56]; - end else y = a; - end else begin - always_comb - if (BigEndianM) begin - y[31:24] = a[7:0]; - y[23:16] = a[15:8]; - y[15:8] = a[23:16]; - y[7:0] = a[31:24]; - end else y = a; - end -endmodule From e84b7cc14782d6cb6676d94e987c15f63f87a604 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 13:00:21 -0600 Subject: [PATCH 12/52] Cleanup. --- src/lsu/align.sv | 14 -------------- src/lsu/endianswap.sv | 4 ++-- src/lsu/lsu.sv | 4 +--- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 7c3703886..29ba22c30 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -47,7 +47,6 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.LLEN-1:0] LSUWriteDataM, output logic [(P.LLEN*2-1)/8:0] ByteMaskSpillM, - output logic [P.LLEN*2-1:0] LSUWriteDataSpillM, output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill @@ -71,7 +70,6 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrIncrementM; logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; - logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ @@ -142,18 +140,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // merge together mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SelSpillM, ReadDataWordSpillAllM); - - // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) - // 8 * is for shifting by bytes not bits - assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate - - // write path. Also has the 8:1 shifter muxing for the byteoffset - // then it also has the mux to select when a spill occurs - logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. - - assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; - assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; - mux3 #(2*P.LLEN/8) bytemaskspillmux({ByteMaskExtendedM, ByteMaskM}, // no spill {{{P.LLEN/8}{1'b0}}, ByteMaskM}, // spill, first half {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, // spill, second half diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index 3c552b371..afd4ecdd2 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -54,8 +54,8 @@ module endianswap #(parameter LEN) ( y[143:136] = a[119:112]; y[135:128] = a[127:120]; y[127:120] = a[135:128]; - y[119:112] = a[142:136]; - y[111:104] = a[152:144]; + y[119:112] = a[143:136]; + y[111:104] = a[151:144]; y[103:96] = a[159:152]; y[95:88] = a[167:160]; y[87:80] = a[175:168]; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 896af0b46..28ef7ba08 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -126,7 +126,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data /* verilator lint_off WIDTHEXPAND */ logic [MLEN-1:0] DCacheReadDataWordM; // D$ read data - logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ logic [MLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data @@ -168,7 +167,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, - .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .LSUWriteDataSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; @@ -179,7 +178,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign SelSpillE = '0; assign ReadDataWordSpillAllM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; - assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; assign {SpillStallM, SelStoreDelay} = '0; end From a2d5618d889f882e0ceccb8c75708dc564bb7dae Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 13:46:04 -0600 Subject: [PATCH 13/52] Added sdc to pma allow shift. --- src/mmu/pmachecker.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 60296213d..e77cc74d0 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -61,7 +61,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[3] | SelRegions[4] | SelRegions[5]; // exclusion-tag: unused-cachable assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; - assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6]; + assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6] | SelRegions[10]; // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent From ab750e150f6a82a3c6d0d694d0f87c322ade44d1 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 14:00:19 -0600 Subject: [PATCH 14/52] Fixed lint errors for alignment. --- src/lsu/align.sv | 20 ++++++++++++-------- src/lsu/lsu.sv | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 29ba22c30..ad0dbf238 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -37,6 +37,7 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation + input logic FpLoadStoreM, // Floating point Load or Store input logic [1:0] MemRWM, input logic [P.LLEN*2-1:0] DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched @@ -69,8 +70,9 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrIncrementM; - logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; - logic PotentialSpillM; + localparam OFFSET_LEN = $clog2(LLENINBYTES); + logic [OFFSET_LEN-1:0] AccessByteOffsetM; + logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -89,12 +91,14 @@ module align import cvw::*; #(parameter cvw_t P) ( // compute misalignement always_comb begin - case (Funct3M[1:0]) - 2'b00: AccessByteOffsetM = '0; // byte access - 2'b01: AccessByteOffsetM = {2'b00, IEUAdrM[0]}; // half access - 2'b10: AccessByteOffsetM = {1'b0, IEUAdrM[1:0]}; // word access - 2'b11: AccessByteOffsetM = IEUAdrM[2:0]; // double access - default: AccessByteOffsetM = IEUAdrM[2:0]; + case (Funct3M & {FpLoadStoreM, 2'b11}) + 3'b000: AccessByteOffsetM = '0; // byte access + 3'b001: AccessByteOffsetM = {{OFFSET_LEN-1{1'b0}}, IEUAdrM[0]}; // half access + 3'b010: AccessByteOffsetM = {{OFFSET_LEN-2{1'b0}}, IEUAdrM[1:0]}; // word access + 3'b011: AccessByteOffsetM = {{OFFSET_LEN-3{1'b0}}, IEUAdrM[2:0]}; // double access + 3'b100: if(P.LLEN == 128) AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; // quad access + else AccessByteOffsetM = '0; // invalid + default: AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; endcase case (Funct3M[1:0]) 2'b00: PotentialSpillM = '0; // byte access diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 28ef7ba08..1712cdc19 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -164,7 +164,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( flopenrc #(P.XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); if(MISALIGN_SUPPORT) begin : ziccslm_align logic [P.XLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; - align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, + align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .FpLoadStoreM, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, From 90ad5e7dab8fe04e3e214bfe9de5434c39fb594a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 28 Feb 2024 17:07:32 -0600 Subject: [PATCH 15/52] Updated the cache for book clarity. --- src/cache/cache.sv | 6 ++--- src/cache/cachefsm.sv | 62 +++++++++++++++++++++---------------------- src/cache/cacheway.sv | 33 +++++++++++------------ 3 files changed, 49 insertions(+), 52 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 4a97a29d4..6882110bd 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -98,7 +98,7 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic SelFetchBuffer; logic CacheEn; - logic SelWay; + logic SelVictim; logic [LINELEN/8-1:0] LineByteMask; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; genvar index; @@ -120,7 +120,7 @@ module cache import cvw::*; #(parameter cvw_t P, // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( - .clk, .reset, .CacheEn, .CacheSetData, .CacheSetTag, .PAdr, .LineWriteData, .LineByteMask, .SelWay, + .clk, .reset, .CacheEn, .CacheSetData, .CacheSetTag, .PAdr, .LineWriteData, .LineByteMask, .SelVictim, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .VictimWay, .FlushWay, .FlushCache, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .HitDirtyWay, .TagWay, .FlushStage, .InvalidateCache); @@ -227,7 +227,7 @@ module cache import cvw::*; #(parameter cvw_t P, cachefsm #(P, READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, .FlushStage, .CacheRW, .Stall, .CacheHit, .LineDirty, .HitLineDirty, .CacheStall, .CacheCommitted, - .CacheMiss, .CacheAccess, .SelAdrData, .SelAdrTag, .SelWay, + .CacheMiss, .CacheAccess, .SelAdrData, .SelAdrTag, .SelVictim, .ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 4af89b08e..869789df5 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -63,7 +63,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic ClearDirty, // Clear the dirty bit in the selected way and set output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback output logic LRUWriteEn, // Update the LRU state - output logic SelWay, // Controls which way to select a way data and tag, 00 = hitway, 10 = victimway, 11 = flushway + output logic SelVictim, // Overides HitWay Tag matching. Selects selects the victim tag/data regardless of hit output logic FlushAdrCntEn, // Enable the counter for Flush Adr output logic FlushWayCntEn, // Enable the way counter during a flush output logic FlushCntRst, // Reset both flush counters @@ -79,12 +79,12 @@ module cachefsm import cvw::*; #(parameter cvw_t P, logic CMOZeroNoEviction; logic StallConditions; - typedef enum logic [3:0]{STATE_READY, // hit states + typedef enum logic [3:0]{STATE_HIT, // hit states // miss states STATE_FETCH, STATE_WRITEBACK, STATE_WRITE_LINE, - STATE_READ_HOLD, // required for back to back reads. structural hazard on writting SRAM + STATE_ADDRESS_SETUP, // required for back to back reads. structural hazard on writting SRAM // flush cache STATE_FLUSH, STATE_FLUSH_WRITEBACK @@ -101,51 +101,51 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. - assign CacheAccess = (|CacheRW) & ((CurrState == STATE_READY & ~Stall & ~FlushStage) | (CurrState == STATE_READ_HOLD & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW + assign CacheAccess = (|CacheRW) & ((CurrState == STATE_HIT & ~Stall & ~FlushStage) | (CurrState == STATE_ADDRESS_SETUP & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW assign CacheMiss = CacheAccess & ~CacheHit; - // special case on reset. When the fsm first exists reset the + // special case on reset. When the fsm first exists reset twayhe // PCNextF will no longer be pointing to the correct address. // But PCF will be the reset vector. flop #(1) resetDelayReg(.clk, .d(reset), .q(resetDelay)); always_ff @(posedge clk) - if (reset | FlushStage) CurrState <= #1 STATE_READY; + if (reset | FlushStage) CurrState <= #1 STATE_HIT; else CurrState <= #1 NextState; always_comb begin - NextState = STATE_READY; + NextState = STATE_HIT; case (CurrState) // exclusion-tag: icache state-case - STATE_READY: if(InvalidateCache) NextState = STATE_READY; // exclusion-tag: dcache InvalidateCheck + STATE_HIT: if(InvalidateCache) NextState = STATE_HIT; // exclusion-tag: dcache InvalidateCheck else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; // exclusion-tag: icache FLUSHStatement else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement else if((AnyMiss | CMOWriteback) & ~READ_ONLY_CACHE) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement - else NextState = STATE_READY; + else NextState = STATE_HIT; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else NextState = STATE_FETCH; - STATE_WRITE_LINE: NextState = STATE_READ_HOLD; - STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; - else NextState = STATE_READY; + STATE_WRITE_LINE: NextState = STATE_ADDRESS_SETUP; + STATE_ADDRESS_SETUP: if(Stall) NextState = STATE_ADDRESS_SETUP; + else NextState = STATE_HIT; // exclusion-tag-start: icache case STATE_WRITEBACK: if(CacheBusAck & ~(|CMOpM[3:1])) NextState = STATE_FETCH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; // Read_hold lowers CacheStall + else if(CacheBusAck) NextState = STATE_ADDRESS_SETUP; // Read_hold lowers CacheStall else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; - else if (FlushFlag) NextState = STATE_READ_HOLD; + else if (FlushFlag) NextState = STATE_ADDRESS_SETUP; else NextState = STATE_FLUSH; STATE_FLUSH_WRITEBACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; + else if(CacheBusAck) NextState = STATE_ADDRESS_SETUP; else NextState = STATE_FLUSH_WRITEBACK; // exclusion-tag-end: icache case - default: NextState = STATE_READY; + default: NextState = STATE_HIT; endcase end // com back to CPU - assign CacheCommitted = (CurrState != STATE_READY) & ~(READ_ONLY_CACHE & (CurrState == STATE_READ_HOLD)); + assign CacheCommitted = (CurrState != STATE_HIT) & ~(READ_ONLY_CACHE & (CurrState == STATE_ADDRESS_SETUP)); assign StallConditions = FlushCache | AnyMiss | CMOWriteback; // exclusion-tag: icache FlushCache - assign CacheStall = (CurrState == STATE_READY & StallConditions) | // exclusion-tag: icache StallStates + assign CacheStall = (CurrState == STATE_HIT & StallConditions) | // exclusion-tag: icache StallStates (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. @@ -153,26 +153,26 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK); // write enables internal to cache assign SetValid = CurrState == STATE_WRITE_LINE | - (CurrState == STATE_READY & CMOZeroNoEviction) | + (CurrState == STATE_HIT & CMOZeroNoEviction) | (CurrState == STATE_WRITEBACK & CacheBusAck & CMOpM[3]); - assign ClearValid = (CurrState == STATE_READY & CMOpM[0]) | + assign ClearValid = (CurrState == STATE_HIT & CMOpM[0]) | (CurrState == STATE_WRITEBACK & CMOpM[2] & CacheBusAck); - assign LRUWriteEn = (((CurrState == STATE_READY & (AnyHit | CMOZeroNoEviction)) | + assign LRUWriteEn = (((CurrState == STATE_HIT & (AnyHit | CMOZeroNoEviction)) | (CurrState == STATE_WRITE_LINE)) & ~FlushStage) | (CurrState == STATE_WRITEBACK & CMOpM[3] & CacheBusAck); // exclusion-tag-start: icache flushdirtycontrols - assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty + assign SetDirty = (CurrState == STATE_HIT & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | (CurrState == STATE_WRITEBACK & (CMOpM[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty (CurrState == STATE_FLUSH & LineDirty) | // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. // Flush and eviction controls CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & CacheBusAck; - assign SelWay = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOpM[1] | CMOpM[2])) | (CacheBusAck & CMOpM[3]))) | - (CurrState == STATE_READY & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | + assign SelVictim = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOpM[1] | CMOpM[2])) | (CacheBusAck & CMOpM[3]))) | + (CurrState == STATE_HIT & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | (CurrState == STATE_WRITE_LINE); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2] | ~CacheBusAck)) | - (CurrState == STATE_READY & AnyMiss & LineDirty); + (CurrState == STATE_HIT & AnyMiss & LineDirty); // coverage off -item e 1 -fecexprrow 1 // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck) assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | @@ -183,29 +183,29 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK & FlushFlag & CacheBusAck); // exclusion-tag-end: icache flushdirtycontrols // Bus interface controls - assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses + assign CacheBusRW[1] = (CurrState == STATE_HIT & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses (CurrState == STATE_FETCH & ~CacheBusAck) | (CurrState == STATE_WRITEBACK & CacheBusAck & ~(|CMOpM)); logic LoadMiss; assign LoadMiss = (CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss - assign CacheBusRW[0] = (CurrState == STATE_READY & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW + assign CacheBusRW[0] = (CurrState == STATE_HIT & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & ~CacheBusAck); - assign SelAdrData = (CurrState == STATE_READY & (CacheRW[0] | AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed + assign SelAdrData = (CurrState == STATE_HIT & (CacheRW[0] | AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | resetDelay; - assign SelAdrTag = (CurrState == STATE_READY & (AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrTag // changes if store delay hazard removed + assign SelAdrTag = (CurrState == STATE_HIT & (AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrTag // changes if store delay hazard removed (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | resetDelay; - assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_READ_HOLD; - assign CacheEn = (~Stall | StallConditions) | (CurrState != STATE_READY) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn + assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_ADDRESS_SETUP; + assign CacheEn = (~Stall | StallConditions) | (CurrState != STATE_HIT) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn endmodule // cachefsm diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 678f7acac..3c0f5df31 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -42,7 +42,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, input logic SetValid, // Set the valid bit in the selected way and set input logic ClearValid, // Clear the valid bit in the selected way and set input logic SetDirty, // Set the dirty bit in the selected way and set - input logic SelWay, // Controls which way to select a way data and tag, 00 = hitway, 10 = victimway, 11 = flushway + input logic SelVictim, // Overides HitWay Tag matching. Selects selects the victim tag/data regardless of hit input logic ClearDirty, // Clear the dirty bit in the selected way and set input logic FlushCache, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr input logic VictimWay, // LRU selected this way as victim to evict @@ -68,7 +68,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic [LINELEN-1:0] ReadDataLine; logic [TAGLEN-1:0] ReadTag; logic Dirty; - logic SelDirty; + logic SelecteDirty; logic SelectedWriteWordEn; logic [LINELEN/8-1:0] FinalByteMask; logic SetValidEN, ClearValidEN; @@ -77,33 +77,30 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic SetDirtyWay; logic ClearDirtyWay; logic SelNonHit; - logic SelData; + logic SelectedWay; logic InvalidateCacheDelay; if (!READ_ONLY_CACHE) begin:flushlogic - logic FlushWayEn; - mux2 #(1) seltagmux(VictimWay, FlushWay, FlushCache, SelDirty); - + mux2 #(1) seltagmux(VictimWay, FlushWay, FlushCache, SelecteDirty); + mux3 #(1) selectedmux(HitWay, FlushWay, VictimWay, {SelVictim, FlushCache}, SelectedWay); // FlushWay is part of a one hot way selection. Must clear it if FlushWay not selected. // coverage off -item e 1 -fecexprrow 3 // nonzero ways will never see FlushCache=0 while FlushWay=1 since FlushWay only advances on a subset of FlushCache assertion cases. - assign FlushWayEn = FlushWay & FlushCache; - assign SelNonHit = FlushWayEn | SelWay; end else begin:flushlogic // no flush operation for read-only caches. - assign SelDirty = VictimWay; - assign SelNonHit = SelWay; + assign SelecteDirty = VictimWay; + mux2 #(1) selectedwaymux(HitWay, SelecteDirty, SelVictim , SelectedWay); end - mux2 #(1) selectedwaymux(HitWay, SelDirty, SelNonHit , SelData); + ///////////////////////////////////////////////////////////////////////////////////////////// // Write Enable demux ///////////////////////////////////////////////////////////////////////////////////////////// - assign SetValidWay = SetValid & SelData; - assign ClearValidWay = ClearValid & SelData; // exclusion-tag: icache ClearValidWay - assign SetDirtyWay = SetDirty & SelData; // exclusion-tag: icache SetDirtyWay - assign ClearDirtyWay = ClearDirty & SelData; + assign SetValidWay = SetValid & SelectedWay; + assign ClearValidWay = ClearValid & SelectedWay; // exclusion-tag: icache ClearValidWay + assign SetDirtyWay = SetDirty & SelectedWay; // exclusion-tag: icache SetDirtyWay + assign ClearDirtyWay = ClearDirty & SelectedWay; assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage; // exclusion-tag: icache SelectedWiteWordEn assign SetValidEN = SetValidWay & ~FlushStage; // exclusion-tag: cache SetValidEN assign ClearValidEN = ClearValidWay & ~FlushStage; // exclusion-tag: cache ClearValidEN @@ -120,9 +117,9 @@ module cacheway import cvw::*; #(parameter cvw_t P, .din(PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN)); // AND portion of distributed tag multiplexer - assign TagWay = SelData ? ReadTag : '0; // AND part of AOMux + assign TagWay = SelectedWay ? ReadTag : '0; // AND part of AOMux assign HitDirtyWay = Dirty & ValidWay; - assign DirtyWay = SelDirty & HitDirtyWay; // exclusion-tag: icache DirtyWay + assign DirtyWay = SelecteDirty & HitDirtyWay; // exclusion-tag: icache DirtyWay assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]) & ~InvalidateCacheDelay; // exclusion-tag: dcache HitWay flop #(1) InvalidateCacheReg(clk, InvalidateCache, InvalidateCacheDelay); @@ -152,7 +149,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, end // AND portion of distributed read multiplexers - assign ReadDataLineWay = SelData ? ReadDataLine : '0; // AND part of AO mux. + assign ReadDataLineWay = SelectedWay ? ReadDataLine : '0; // AND part of AO mux. ///////////////////////////////////////////////////////////////////////////////////////////// // Valid Bits From 85691f0e8bfb77df76f7df50aca04ebad708621d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 29 Feb 2024 17:18:01 -0600 Subject: [PATCH 16/52] Simplified and clarified names in cacheLRU. --- src/cache/cache.sv | 4 ++-- src/cache/cacheLRU.sv | 37 ++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 6882110bd..cf3e5e0d4 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -180,14 +180,14 @@ module cache import cvw::*; #(parameter cvw_t P, assign DemuxedByteMask = BlankByteMask << ((MUXINTERVAL/8) * WordOffsetAddr); - assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. + assign FetchBufferByteSel = SetDirty ? ~DemuxedByteMask : '1; // If load miss set all muxes to 1. // Merge write data into fetched cache line for store miss for(index = 0; index < LINELEN/8; index++) begin mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]), .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index] & ~CMOpM[3]), .y(LineWriteData[8*index+7:8*index])); end - assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0; + assign LineByteMask = SetDirty ? DemuxedByteMask : '1; end else begin:WriteSelLogic diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index e795dd765..2670af93f 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -1,7 +1,7 @@ /////////////////////////////////////////// // cacheLRU.sv // -// Written: Ross Thompson ross1728@gmail.com +// Written: Rose Thompson ross1728@gmail.com // Created: 20 July 2021 // Modified: 20 January 2023 // @@ -36,8 +36,8 @@ module cacheLRU input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant input logic [NUMWAYS-1:0] HitWay, // Which way is valid and matches PAdr's tag input logic [NUMWAYS-1:0] ValidWay, // Which ways for a particular set are valid, ignores tag - input logic [SETLEN-1:0] CacheSetData, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr - input logic [SETLEN-1:0] CacheSetTag, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr + input logic [SETLEN-1:0] CacheSetData, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr + input logic [SETLEN-1:0] CacheSetTag, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr input logic [SETLEN-1:0] PAdr, // Physical address input logic LRUWriteEn, // Update the LRU state input logic SetValid, // Set the dirty bit in the selected way and set @@ -51,23 +51,27 @@ module cacheLRU logic [NUMWAYS-2:0] LRUMemory [NUMLINES-1:0]; logic [NUMWAYS-2:0] CurrLRU; logic [NUMWAYS-2:0] NextLRU; - logic [NUMWAYS-1:0] Way; - logic [LOGNUMWAYS-1:0] WayEncoded; + logic [LOGNUMWAYS-1:0] HitWayEncoded, Way; logic [NUMWAYS-2:0] WayExpanded; logic AllValid; genvar row; /* verilator lint_off UNOPTFLAT */ - // Ross: For some reason verilator does not like this. I checked and it is not a circular path. + // Rose: For some reason verilator does not like this. I checked and it is not a circular path. logic [NUMWAYS-2:0] LRUUpdate; logic [LOGNUMWAYS-1:0] Intermediate [NUMWAYS-2:0]; /* verilator lint_on UNOPTFLAT */ + logic [NUMWAYS-1:0] FirstZero; + logic [LOGNUMWAYS-1:0] FirstZeroWay; + logic [LOGNUMWAYS-1:0] VictimWayEnc; + + binencoder #(NUMWAYS) hitwayencoder(HitWay, HitWayEncoded); + assign AllValid = &ValidWay; ///// Update replacement bits. - // coverage off // Excluded from coverage b/c it is untestable without varying NUMWAYS. function integer log2 (integer value); @@ -80,8 +84,7 @@ module cacheLRU // coverage on // On a miss we need to ignore HitWay and derive the new replacement bits with the VictimWay. - mux2 #(NUMWAYS) WayMux(HitWay, VictimWay, SetValid, Way); - binencoder #(NUMWAYS) encoder(Way, WayEncoded); + mux2 #(LOGNUMWAYS) WayMuxEnc(HitWayEncoded, VictimWayEnc, SetValid, Way); // bit duplication // expand HitWay as HitWay[3], {{2}{HitWay[2]}}, {{4}{HitWay[1]}, {{8{HitWay[0]}}, ... @@ -89,7 +92,7 @@ module cacheLRU localparam integer DuplicationFactor = 2**(LOGNUMWAYS-row-1); localparam StartIndex = NUMWAYS-2 - DuplicationFactor + 1; localparam EndIndex = NUMWAYS-2 - 2 * DuplicationFactor + 2; - assign WayExpanded[StartIndex : EndIndex] = {{DuplicationFactor}{WayEncoded[row]}}; + assign WayExpanded[StartIndex : EndIndex] = {{DuplicationFactor}{Way[row]}}; end genvar node; @@ -102,14 +105,14 @@ module cacheLRU localparam r = LOGNUMWAYS - ctr_depth; // the child node will be updated if its parent was updated and - // the WayEncoded bit was the correct value. + // the Way bit was the correct value. // The if statement is only there for coverage since LRUUpdate[root] is always 1. if (node == NUMWAYS-2) begin - assign LRUUpdate[lchild] = ~WayEncoded[r]; - assign LRUUpdate[rchild] = WayEncoded[r]; + assign LRUUpdate[lchild] = ~Way[r]; + assign LRUUpdate[rchild] = Way[r]; end else begin - assign LRUUpdate[lchild] = LRUUpdate[node] & ~WayEncoded[r]; - assign LRUUpdate[rchild] = LRUUpdate[node] & WayEncoded[r]; + assign LRUUpdate[lchild] = LRUUpdate[node] & ~Way[r]; + assign LRUUpdate[rchild] = LRUUpdate[node] & Way[r]; end end @@ -129,14 +132,10 @@ module cacheLRU assign Intermediate[node] = CurrLRU[node] ? int1[LOGNUMWAYS-1:0] : int0[LOGNUMWAYS-1:0]; end - logic [NUMWAYS-1:0] FirstZero; - logic [LOGNUMWAYS-1:0] FirstZeroWay; - logic [LOGNUMWAYS-1:0] VictimWayEnc; priorityonehot #(NUMWAYS) FirstZeroEncoder(~ValidWay, FirstZero); binencoder #(NUMWAYS) FirstZeroWayEncoder(FirstZero, FirstZeroWay); mux2 #(LOGNUMWAYS) VictimMux(FirstZeroWay, Intermediate[NUMWAYS-2], AllValid, VictimWayEnc); - //decoder #(LOGNUMWAYS) decoder (Intermediate[NUMWAYS-2], VictimWay); decoder #(LOGNUMWAYS) decoder (VictimWayEnc, VictimWay); // LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice. From e72880fd8905f7f3315510f141fa545ef42b7f65 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 1 Mar 2024 09:59:54 -0600 Subject: [PATCH 17/52] Changed cachefsm state STATE_HIT to STATE_ACCESS. --- src/cache/cachefsm.sv | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 869789df5..15eda55f5 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -79,7 +79,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, logic CMOZeroNoEviction; logic StallConditions; - typedef enum logic [3:0]{STATE_HIT, // hit states + typedef enum logic [3:0]{STATE_ACCESS, // hit states // miss states STATE_FETCH, STATE_WRITEBACK, @@ -101,7 +101,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. - assign CacheAccess = (|CacheRW) & ((CurrState == STATE_HIT & ~Stall & ~FlushStage) | (CurrState == STATE_ADDRESS_SETUP & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW + assign CacheAccess = (|CacheRW) & ((CurrState == STATE_ACCESS & ~Stall & ~FlushStage) | (CurrState == STATE_ADDRESS_SETUP & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW assign CacheMiss = CacheAccess & ~CacheHit; // special case on reset. When the fsm first exists reset twayhe @@ -110,22 +110,22 @@ module cachefsm import cvw::*; #(parameter cvw_t P, flop #(1) resetDelayReg(.clk, .d(reset), .q(resetDelay)); always_ff @(posedge clk) - if (reset | FlushStage) CurrState <= #1 STATE_HIT; + if (reset | FlushStage) CurrState <= #1 STATE_ACCESS; else CurrState <= #1 NextState; always_comb begin - NextState = STATE_HIT; + NextState = STATE_ACCESS; case (CurrState) // exclusion-tag: icache state-case - STATE_HIT: if(InvalidateCache) NextState = STATE_HIT; // exclusion-tag: dcache InvalidateCheck + STATE_ACCESS: if(InvalidateCache) NextState = STATE_ACCESS; // exclusion-tag: dcache InvalidateCheck else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; // exclusion-tag: icache FLUSHStatement else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement else if((AnyMiss | CMOWriteback) & ~READ_ONLY_CACHE) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement - else NextState = STATE_HIT; + else NextState = STATE_ACCESS; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else NextState = STATE_FETCH; STATE_WRITE_LINE: NextState = STATE_ADDRESS_SETUP; STATE_ADDRESS_SETUP: if(Stall) NextState = STATE_ADDRESS_SETUP; - else NextState = STATE_HIT; + else NextState = STATE_ACCESS; // exclusion-tag-start: icache case STATE_WRITEBACK: if(CacheBusAck & ~(|CMOpM[3:1])) NextState = STATE_FETCH; else if(CacheBusAck) NextState = STATE_ADDRESS_SETUP; // Read_hold lowers CacheStall @@ -138,14 +138,14 @@ module cachefsm import cvw::*; #(parameter cvw_t P, else if(CacheBusAck) NextState = STATE_ADDRESS_SETUP; else NextState = STATE_FLUSH_WRITEBACK; // exclusion-tag-end: icache case - default: NextState = STATE_HIT; + default: NextState = STATE_ACCESS; endcase end // com back to CPU - assign CacheCommitted = (CurrState != STATE_HIT) & ~(READ_ONLY_CACHE & (CurrState == STATE_ADDRESS_SETUP)); + assign CacheCommitted = (CurrState != STATE_ACCESS) & ~(READ_ONLY_CACHE & (CurrState == STATE_ADDRESS_SETUP)); assign StallConditions = FlushCache | AnyMiss | CMOWriteback; // exclusion-tag: icache FlushCache - assign CacheStall = (CurrState == STATE_HIT & StallConditions) | // exclusion-tag: icache StallStates + assign CacheStall = (CurrState == STATE_ACCESS & StallConditions) | // exclusion-tag: icache StallStates (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. @@ -153,15 +153,15 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK); // write enables internal to cache assign SetValid = CurrState == STATE_WRITE_LINE | - (CurrState == STATE_HIT & CMOZeroNoEviction) | + (CurrState == STATE_ACCESS & CMOZeroNoEviction) | (CurrState == STATE_WRITEBACK & CacheBusAck & CMOpM[3]); - assign ClearValid = (CurrState == STATE_HIT & CMOpM[0]) | + assign ClearValid = (CurrState == STATE_ACCESS & CMOpM[0]) | (CurrState == STATE_WRITEBACK & CMOpM[2] & CacheBusAck); - assign LRUWriteEn = (((CurrState == STATE_HIT & (AnyHit | CMOZeroNoEviction)) | + assign LRUWriteEn = (((CurrState == STATE_ACCESS & (AnyHit | CMOZeroNoEviction)) | (CurrState == STATE_WRITE_LINE)) & ~FlushStage) | (CurrState == STATE_WRITEBACK & CMOpM[3] & CacheBusAck); // exclusion-tag-start: icache flushdirtycontrols - assign SetDirty = (CurrState == STATE_HIT & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty + assign SetDirty = (CurrState == STATE_ACCESS & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | (CurrState == STATE_WRITEBACK & (CMOpM[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty @@ -169,10 +169,10 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // Flush and eviction controls CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & CacheBusAck; assign SelVictim = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOpM[1] | CMOpM[2])) | (CacheBusAck & CMOpM[3]))) | - (CurrState == STATE_HIT & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | + (CurrState == STATE_ACCESS & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | (CurrState == STATE_WRITE_LINE); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2] | ~CacheBusAck)) | - (CurrState == STATE_HIT & AnyMiss & LineDirty); + (CurrState == STATE_ACCESS & AnyMiss & LineDirty); // coverage off -item e 1 -fecexprrow 1 // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck) assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | @@ -183,29 +183,29 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK & FlushFlag & CacheBusAck); // exclusion-tag-end: icache flushdirtycontrols // Bus interface controls - assign CacheBusRW[1] = (CurrState == STATE_HIT & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses + assign CacheBusRW[1] = (CurrState == STATE_ACCESS & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses (CurrState == STATE_FETCH & ~CacheBusAck) | (CurrState == STATE_WRITEBACK & CacheBusAck & ~(|CMOpM)); logic LoadMiss; assign LoadMiss = (CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss - assign CacheBusRW[0] = (CurrState == STATE_HIT & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW + assign CacheBusRW[0] = (CurrState == STATE_ACCESS & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & ~CacheBusAck); - assign SelAdrData = (CurrState == STATE_HIT & (CacheRW[0] | AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed + assign SelAdrData = (CurrState == STATE_ACCESS & (CacheRW[0] | AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | resetDelay; - assign SelAdrTag = (CurrState == STATE_HIT & (AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrTag // changes if store delay hazard removed + assign SelAdrTag = (CurrState == STATE_ACCESS & (AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrTag // changes if store delay hazard removed (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | resetDelay; assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_ADDRESS_SETUP; - assign CacheEn = (~Stall | StallConditions) | (CurrState != STATE_HIT) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn + assign CacheEn = (~Stall | StallConditions) | (CurrState != STATE_ACCESS) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn endmodule // cachefsm From 60f96112db2707ba676a17321118afaeda0301a7 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 1 Mar 2024 10:23:55 -0600 Subject: [PATCH 18/52] Moved the zero stage boot loader to the fpga directory. --- {tests/custom => fpga}/zsbl/Makefile | 2 +- {tests/custom => fpga}/zsbl/bios.s | 2 +- {tests/custom => fpga}/zsbl/copyFlash.c | 0 {tests/custom => fpga}/zsbl/gpt.c | 0 {tests/custom => fpga}/zsbl/gpt.h | 0 {tests/custom => fpga}/zsbl/main.c | 0 {tests/custom => fpga}/zsbl/sdcDriver.c | 2 +- {tests/custom => fpga}/zsbl/sdcDriver.h | 0 {tests/custom => fpga}/zsbl/smp.h | 0 {tests/custom => fpga}/zsbl/uart.c | 0 {tests/custom => fpga}/zsbl/uart.h | 0 11 files changed, 3 insertions(+), 3 deletions(-) rename {tests/custom => fpga}/zsbl/Makefile (98%) rename {tests/custom => fpga}/zsbl/bios.s (97%) rename {tests/custom => fpga}/zsbl/copyFlash.c (100%) rename {tests/custom => fpga}/zsbl/gpt.c (100%) rename {tests/custom => fpga}/zsbl/gpt.h (100%) rename {tests/custom => fpga}/zsbl/main.c (100%) rename {tests/custom => fpga}/zsbl/sdcDriver.c (98%) rename {tests/custom => fpga}/zsbl/sdcDriver.h (100%) rename {tests/custom => fpga}/zsbl/smp.h (100%) rename {tests/custom => fpga}/zsbl/uart.c (100%) rename {tests/custom => fpga}/zsbl/uart.h (100%) diff --git a/tests/custom/zsbl/Makefile b/fpga/zsbl/Makefile similarity index 98% rename from tests/custom/zsbl/Makefile rename to fpga/zsbl/Makefile index 6dec9c797..bd30033fc 100644 --- a/tests/custom/zsbl/Makefile +++ b/fpga/zsbl/Makefile @@ -21,7 +21,7 @@ ROOT := .. LIBRARY_DIRS := LIBRARY_FILES := -MARCH :=-march=rv64imfdc +MARCH :=-march=rv64imfdc_zifencei MABI :=-mabi=lp64d LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles LINKER :=linker.x diff --git a/tests/custom/zsbl/bios.s b/fpga/zsbl/bios.s similarity index 97% rename from tests/custom/zsbl/bios.s rename to fpga/zsbl/bios.s index ebeadcf59..7954eab7a 100644 --- a/tests/custom/zsbl/bios.s +++ b/fpga/zsbl/bios.s @@ -94,5 +94,5 @@ end_of_bios: .globl _dtb .align 4, 0 _dtb: -.incbin "wally-vcu118.dtb" +#.incbin "wally-vcu118.dtb" diff --git a/tests/custom/zsbl/copyFlash.c b/fpga/zsbl/copyFlash.c similarity index 100% rename from tests/custom/zsbl/copyFlash.c rename to fpga/zsbl/copyFlash.c diff --git a/tests/custom/zsbl/gpt.c b/fpga/zsbl/gpt.c similarity index 100% rename from tests/custom/zsbl/gpt.c rename to fpga/zsbl/gpt.c diff --git a/tests/custom/zsbl/gpt.h b/fpga/zsbl/gpt.h similarity index 100% rename from tests/custom/zsbl/gpt.h rename to fpga/zsbl/gpt.h diff --git a/tests/custom/zsbl/main.c b/fpga/zsbl/main.c similarity index 100% rename from tests/custom/zsbl/main.c rename to fpga/zsbl/main.c diff --git a/tests/custom/zsbl/sdcDriver.c b/fpga/zsbl/sdcDriver.c similarity index 98% rename from tests/custom/zsbl/sdcDriver.c rename to fpga/zsbl/sdcDriver.c index edbe0677d..45caa42fa 100644 --- a/tests/custom/zsbl/sdcDriver.c +++ b/fpga/zsbl/sdcDriver.c @@ -1,7 +1,7 @@ /////////////////////////////////////////// // SDC.sv // -// Written: Ross Thompson September 25, 2021 +// Written: Rose Thompson September 25, 2021 // Modified: // // Purpose: driver for sdc reader. diff --git a/tests/custom/zsbl/sdcDriver.h b/fpga/zsbl/sdcDriver.h similarity index 100% rename from tests/custom/zsbl/sdcDriver.h rename to fpga/zsbl/sdcDriver.h diff --git a/tests/custom/zsbl/smp.h b/fpga/zsbl/smp.h similarity index 100% rename from tests/custom/zsbl/smp.h rename to fpga/zsbl/smp.h diff --git a/tests/custom/zsbl/uart.c b/fpga/zsbl/uart.c similarity index 100% rename from tests/custom/zsbl/uart.c rename to fpga/zsbl/uart.c diff --git a/tests/custom/zsbl/uart.h b/fpga/zsbl/uart.h similarity index 100% rename from tests/custom/zsbl/uart.h rename to fpga/zsbl/uart.h From 4c3d927474180bcbb0913672c791d63eb6d45332 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 1 Mar 2024 11:00:24 -0600 Subject: [PATCH 19/52] Renamed CacheHit to Hit. --- src/cache/cache.sv | 6 +++--- src/cache/cachefsm.sv | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index cf3e5e0d4..3cf2f28a6 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -82,7 +82,7 @@ module cache import cvw::*; #(parameter cvw_t P, logic ClearDirty, SetDirty, SetValid, ClearValid; logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] HitWay, ValidWay; - logic CacheHit; + logic Hit; logic [NUMWAYS-1:0] VictimWay, DirtyWay, HitDirtyWay; logic LineDirty, HitLineDirty; logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; @@ -132,7 +132,7 @@ module cache import cvw::*; #(parameter cvw_t P, end else assign VictimWay = 1'b1; // one hot. - assign CacheHit = |HitWay; + assign Hit = |HitWay; assign LineDirty = |DirtyWay; assign HitLineDirty = |HitDirtyWay; @@ -226,7 +226,7 @@ module cache import cvw::*; #(parameter cvw_t P, cachefsm #(P, READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, .FlushStage, .CacheRW, .Stall, - .CacheHit, .LineDirty, .HitLineDirty, .CacheStall, .CacheCommitted, + .Hit, .LineDirty, .HitLineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdrData, .SelAdrTag, .SelVictim, .ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 15eda55f5..0059bb81d 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -50,7 +50,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic CacheAccess, // Cache access // cache internals - input logic CacheHit, // Exactly 1 way hits + input logic Hit, // Exactly 1 way hits input logic LineDirty, // The selected line and way is dirty input logic HitLineDirty, // The cache hit way is dirty input logic FlushAdrFlag, // On last set of a cache flush @@ -92,17 +92,17 @@ module cachefsm import cvw::*; #(parameter cvw_t P, statetype CurrState, NextState; - assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss - assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 - assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit + assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~Hit & ~InvalidateCache; // exclusion-tag: cache AnyMiss + assign AnyUpdateHit = (CacheRW[0]) & Hit; // exclusion-tag: icache storeAMO1 + assign AnyHit = AnyUpdateHit | (CacheRW[1] & Hit); // exclusion-tag: icache AnyUpdateHit assign CMOZeroNoEviction = CMOpM[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now - assign CMOWriteback = ((CMOpM[1] | CMOpM[2]) & CacheHit & HitLineDirty) | CMOpM[3] & LineDirty; + assign CMOWriteback = ((CMOpM[1] | CMOpM[2]) & Hit & HitLineDirty) | CMOpM[3] & LineDirty; assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. assign CacheAccess = (|CacheRW) & ((CurrState == STATE_ACCESS & ~Stall & ~FlushStage) | (CurrState == STATE_ADDRESS_SETUP & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW - assign CacheMiss = CacheAccess & ~CacheHit; + assign CacheMiss = CacheAccess & ~Hit; // special case on reset. When the fsm first exists reset twayhe // PCNextF will no longer be pointing to the correct address. @@ -169,7 +169,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // Flush and eviction controls CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & CacheBusAck; assign SelVictim = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOpM[1] | CMOpM[2])) | (CacheBusAck & CMOpM[3]))) | - (CurrState == STATE_ACCESS & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | + (CurrState == STATE_ACCESS & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~Hit))) | (CurrState == STATE_WRITE_LINE); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2] | ~CacheBusAck)) | (CurrState == STATE_ACCESS & AnyMiss & LineDirty); @@ -188,7 +188,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_WRITEBACK & CacheBusAck & ~(|CMOpM)); logic LoadMiss; - assign LoadMiss = (CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss + assign LoadMiss = (CacheRW[1]) & ~Hit & ~InvalidateCache; // exclusion-tag: cache AnyMiss assign CacheBusRW[0] = (CurrState == STATE_ACCESS & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | From cba3209e7fd7851aa57233f8771133bd29b7527e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Sat, 2 Mar 2024 11:38:33 -0600 Subject: [PATCH 20/52] Trying an experiment. Use the less compact subwordreaddouble in the fpga synthesize rather than subwordreadmisaligned. --- src/lsu/lsu.sv | 3 +- src/lsu/subwordreaddouble.sv | 196 +++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 1712cdc19..70adc90ce 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -423,7 +423,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + //subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv new file mode 100644 index 000000000..936240cf7 --- /dev/null +++ b/src/lsu/subwordreaddouble.sv @@ -0,0 +1,196 @@ +/////////////////////////////////////////// +// subwordread.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Extract subwords and sign extend for reads +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordreaddouble #(parameter LLEN) + ( + input logic [LLEN*2-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM +); + + logic [7:0] ByteM; + logic [15:0] HalfwordM; + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(Funct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase + + if (LLEN == 128) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + logic [63:0] QdWordM; + always_comb + case(PAdrSwap) + 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; + 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; + 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; + 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; + 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; + 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; + 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; + 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; + 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; + 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; + 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; + 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; + 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; + 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; + 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; + 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; + 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; + 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; + 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; + 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; + 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; + 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; + 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; + 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; + 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; + 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; + 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; + 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; + 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; + 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; + 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; + 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; + endcase + + assign ByteM = QdWordM[7:0]; + assign HalfwordM = QdWordM[15:0]; + assign WordM = QdWordM[31:0]; + assign DblWordM = QdWordM[63:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; + 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; + 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; + 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; + 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; + 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; + 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; + 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + assign ByteM = DblWordM[7:0]; + assign HalfwordM = DblWordM[15:0]; + assign WordM = DblWordM[31:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + + logic [31:0] WordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase + + assign ByteM = WordM[7:0]; + assign HalfwordM = WordM[15:0]; + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen + endcase + end +endmodule From 8136b45ca7f8dedb83fd97e152bbb8765436894e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Sat, 2 Mar 2024 11:55:43 -0600 Subject: [PATCH 21/52] Revert "Trying an experiment. Use the less compact subwordreaddouble in the fpga synthesize rather than subwordreadmisaligned." This reverts commit cba3209e7fd7851aa57233f8771133bd29b7527e. --- src/lsu/lsu.sv | 3 +- src/lsu/subwordreaddouble.sv | 196 ----------------------------------- 2 files changed, 1 insertion(+), 198 deletions(-) delete mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 70adc90ce..1712cdc19 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -423,8 +423,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - //subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv deleted file mode 100644 index 936240cf7..000000000 --- a/src/lsu/subwordreaddouble.sv +++ /dev/null @@ -1,196 +0,0 @@ -/////////////////////////////////////////// -// subwordread.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Extract subwords and sign extend for reads -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordreaddouble #(parameter LLEN) - ( - input logic [LLEN*2-1:0] ReadDataWordMuxM, - input logic [2:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN-1:0] ReadDataM -); - - logic [7:0] ByteM; - logic [15:0] HalfwordM; - logic [4:0] PAdrSwap; - logic [4:0] BigEndianPAdr; - logic [4:0] LengthM; - - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; - /* verilator lint_off WIDTHEXPAND */ - /* verilator lint_off WIDTHTRUNC */ - assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; - /* verilator lint_on WIDTHTRUNC */ - /* verilator lint_on WIDTHEXPAND */ - - always_comb - case(Funct3M & {FpLoadStoreM, 2'b11}) - 3'b000: LengthM = 5'd1; - 3'b001: LengthM = 5'd2; - 3'b010: LengthM = 5'd4; - 3'b011: LengthM = 5'd8; - 3'b100: LengthM = 5'd16; - default: LengthM = 5'd8; - endcase - - if (LLEN == 128) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - logic [63:0] QdWordM; - always_comb - case(PAdrSwap) - 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; - 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; - 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; - 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; - 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; - 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; - 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; - 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; - 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; - 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; - 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; - 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; - 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; - 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; - 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; - 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; - 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; - 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; - 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; - 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; - 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; - 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; - 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; - 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; - 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; - 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; - 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; - 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; - 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; - 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; - 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; - 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; - endcase - - assign ByteM = QdWordM[7:0]; - assign HalfwordM = QdWordM[15:0]; - assign WordM = QdWordM[31:0]; - assign DblWordM = QdWordM[63:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; - 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; - 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; - 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; - 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; - 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; - 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; - 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - assign ByteM = DblWordM[7:0]; - assign HalfwordM = DblWordM[15:0]; - assign WordM = DblWordM[31:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - - logic [31:0] WordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase - - assign ByteM = WordM[7:0]; - assign HalfwordM = WordM[15:0]; - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen - endcase - end -endmodule From a22de456312ea45ff77612eec9a73c1eed625c2e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Sat, 2 Mar 2024 16:20:31 -0600 Subject: [PATCH 22/52] Removed unused storedelay from align. --- src/lsu/align.sv | 10 +++------- src/lsu/lsu.sv | 8 +++----- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index ad0dbf238..094da4a15 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -52,7 +52,6 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic SelStoreDelay, //*** this is bad. really don't like moving this outside output logic [P.LLEN*2-1:0] ReadDataWordSpillAllM, output logic SpillStallM); @@ -118,20 +117,17 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (ValidSpillM & ~MemRWM[0]) NextState = STATE_SPILL; // load spill - else if(ValidSpillM) NextState = STATE_STORE_DELAY; // store spill + STATE_READY: if (ValidSpillM) NextState = STATE_SPILL; // load spill else NextState = STATE_READY; // no spill STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; - STATE_STORE_DELAY: NextState = STATE_SPILL; default: NextState = STATE_READY; endcase end - assign SelSpillM = (CurrState == STATE_SPILL | CurrState == STATE_STORE_DELAY); - assign SelSpillE = (CurrState == STATE_READY & ValidSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); + assign SelSpillM = CurrState == STATE_SPILL; + assign SelSpillE = (CurrState == STATE_READY & ValidSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall); assign SpillSaveM = (CurrState == STATE_READY) & ValidSpillM & ~FlushM; - assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); // *** Can this be merged into the PreLSURWM logic? assign SpillStallM = SelSpillE; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 1712cdc19..cf1767f5c 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -142,7 +142,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write logic [1:0] MemRWSpillM; logic SpillStallM; - logic SelStoreDelay; logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB @@ -168,8 +167,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, - .SelStoreDelay); + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; end else begin : no_ziccslm_align @@ -179,7 +177,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign ReadDataWordSpillAllM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign MemRWSpillM = MemRWM; - assign {SpillStallM, SelStoreDelay} = '0; + assign {SpillStallM} = '0; end if(P.ZICBOZ_SUPPORTED) begin : cboz @@ -333,7 +331,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), - .CacheRW(SelStoreDelay ? 2'b00 : CacheRWM), + .CacheRW(CacheRWM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataM), .SelHPTW, From 0222e8f42add640fd8f5d16bc32bf6d6890dec68 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 4 Mar 2024 17:52:41 -0600 Subject: [PATCH 23/52] Don't want to clear the lru bits on invalidation (clearvalid). --- src/cache/cacheLRU.sv | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index 2670af93f..71b11abee 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -144,9 +144,7 @@ module cacheLRU always_ff @(posedge clk) begin if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] = '0; // exclusion-tag: initialize if(CacheEn) begin - if(ClearValid & ~FlushStage) - LRUMemory[PAdr] <= '0; - else if(LRUWriteEn) + if(LRUWriteEn) LRUMemory[PAdr] <= NextLRU; if(LRUWriteEn & (PAdr == CacheSetTag)) CurrLRU <= #1 NextLRU; From 457d3481e76a686973dd39cb74dee9c4b5abd63a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 4 Mar 2024 17:58:41 -0600 Subject: [PATCH 24/52] How did this error get past for so long. --- src/cache/cacheLRU.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index 71b11abee..2d59ccfd7 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -142,7 +142,7 @@ module cacheLRU // This is a two port memory. // Every cycle must read from CacheSetData and each load/store must write the new LRU. always_ff @(posedge clk) begin - if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] = '0; // exclusion-tag: initialize + if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0; // exclusion-tag: initialize if(CacheEn) begin if(LRUWriteEn) LRUMemory[PAdr] <= NextLRU; From 86956026dc2aa4f60cf805ef2a785cc8d8aca00d Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 6 Mar 2024 04:24:31 -0800 Subject: [PATCH 25/52] Further simplified subwordread muxing --- src/lsu/subwordread.sv | 45 ++++++++++++++++++++++------------------- src/lsu/subwordwrite.sv | 6 +++--- src/mmu/hptw.sv | 1 - 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/lsu/subwordread.sv b/src/lsu/subwordread.sv index 593d01813..a5ccd12bf 100644 --- a/src/lsu/subwordread.sv +++ b/src/lsu/subwordread.sv @@ -29,39 +29,42 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module subwordread import cvw::*; #(parameter cvw_t P) ( - input logic [P.LLEN-1:0] ReadDataWordMuxM, - input logic [3:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [P.LLEN-1:0] ReadDataM + input logic [P.LLEN-1:0] ReadDataWordMuxM, + input logic [3:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [P.LLEN-1:0] ReadDataM ); + localparam ADRBITS = $clog2(P.LLEN)-3; + logic [ADRBITS-1:0] PAdrSwapM; logic [7:0] ByteM; logic [15:0] HalfwordM; logic [31:0] WordM; logic [63:0] DblWordM; - logic [ADRBITS-1:0] PAdrSwap; - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - if (P.BIGENDIAN_SUPPORTED) assign PAdrSwap = PAdrM[ADRBITS-1:0] ^ {ADRBITS{BigEndianM}}; - else assign PAdrSwap = PAdrM[ADRBITS-1:0]; - assign ByteM = ReadDataWordMuxM[PAdrSwap*8 +: 8]; - assign HalfwordM = ReadDataWordMuxM[PAdrSwap[ADRBITS-1:1]*16 +: 16]; - if (P.LLEN >= 64) assign WordM = ReadDataWordMuxM[PAdrSwap[ADRBITS-1:2] * 32 +: 32]; - else assign WordM = ReadDataWordMuxM; - if (P.LLEN >= 64) assign DblWordM = ReadDataWordMuxM[PAdrSwap[ADRBITS-1] * 64 +: 64]; + // invert lsbs of address to select appropriate subword for big endian + if (P.BIGENDIAN_SUPPORTED) assign PAdrSwapM = PAdrM[ADRBITS-1:0] ^ {ADRBITS{BigEndianM}}; + else assign PAdrSwapM = PAdrM[ADRBITS-1:0]; + + // Use indexed part select to imply muxes to select each size of subword + if (P.LLEN == 128) mux2 #(64) dblmux(ReadDataWordMuxM[63:0], ReadDataWordMuxM[127:64], PAdrSwapM[3], DblWordM); + else if (P.LLEN == 64) assign DblWordM = ReadDataWordMuxM; + if (P.LLEN >= 64) mux2 #(32) wordmux(DblWordM[31:0], DblWordM[63:32], PAdrSwapM[2], WordM); + else assign WordM = ReadDataWordMuxM; + mux2 #(16) halfwordmux(WordM[15:0], WordM[31:16], PAdrSwapM[1], HalfwordM); + mux2 #(8) bytemux(HalfwordM[7:0], HalfwordM[15:8], PAdrSwapM[0], ByteM); // sign extension/ NaN boxing always_comb case(Funct3M) - 3'b000: ReadDataM = {{(P.LLEN-8){ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{P.LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{P.LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: if (P.LLEN >= 64) ReadDataM = {{P.LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - else ReadDataM = ReadDataWordMuxM; + 3'b000: ReadDataM = {{(P.LLEN-8){ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{P.LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{P.LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: if (P.LLEN >= 64) ReadDataM = {{P.LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + else ReadDataM = ReadDataWordMuxM; // shouldn't happen 3'b100: if (P.LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{P.LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq else ReadDataM = {{P.LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{P.LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu diff --git a/src/lsu/subwordwrite.sv b/src/lsu/subwordwrite.sv index 705672ff7..659d6d9c7 100644 --- a/src/lsu/subwordwrite.sv +++ b/src/lsu/subwordwrite.sv @@ -55,9 +55,9 @@ module subwordwrite #(parameter LLEN) ( end else begin:sww // 32-bit always_comb case(LSUFunct3M[1:0]) - 2'b00: LittleEndianWriteDataM = {4{IMAFWriteDataM[7:0]}}; // sb - 2'b01: LittleEndianWriteDataM = {2{IMAFWriteDataM[15:0]}}; // sh - 2'b10: LittleEndianWriteDataM = IMAFWriteDataM; // sw + 2'b00: LittleEndianWriteDataM = {4{IMAFWriteDataM[7:0]}}; // sb + 2'b01: LittleEndianWriteDataM = {2{IMAFWriteDataM[15:0]}}; // sh + 2'b10: LittleEndianWriteDataM = IMAFWriteDataM; // sw default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen endcase end diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index 4e292ba3d..2a4288560 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -148,7 +148,6 @@ module hptw import cvw::*; #(parameter cvw_t P) ( flopenr #(1) TLBMissMReg(clk, reset, StartWalk, DTLBMissOrUpdateDAM, DTLBWalk); // when walk begins, record whether it was for DTLB (or record 0 for ITLB) assign PRegEn = HPTWRW[1] & ~DCacheBusStallM | UpdatePTE; flopenr #(P.XLEN) PTEReg(clk, reset, PRegEn, NextPTE, PTE); // Capture page table entry from data cache - assert property(@(posedge clk) ~PRegEn | reset | NextPTE[0] !== 1'bx); // report writing an x PTE from an uninitialized page table // Assign PTE descriptors common across all XLEN values // For non-leaf PTEs, D, A, U bits are reserved and ignored. They do not cause faults while walking the page table From dd33479056f154d4047ecd562836cfc4451beb2d Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 6 Mar 2024 04:59:58 -0800 Subject: [PATCH 26/52] Switched to ?: for gating per section 4.2.4.3 --- src/fpu/fma/fmaadd.sv | 4 ++-- src/privileged/trap.sv | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fpu/fma/fmaadd.sv b/src/fpu/fma/fmaadd.sv index 56f4a80cb..d408381c0 100644 --- a/src/fpu/fma/fmaadd.sv +++ b/src/fpu/fma/fmaadd.sv @@ -51,9 +51,9 @@ module fmaadd import cvw::*; #(parameter cvw_t P) ( /////////////////////////////////////////////////////////////////////////////// // Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition - assign AmInv = {3*P.NF+4{InvA}}^Am; + assign AmInv = InvA ? ~Am : Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign PmKilled = {2*P.NF+2{~KillProd}}&Pm; + assign PmKilled = KillProd ? '0 : Pm; // Do the addition // - calculate a positive and negative sum in parallel // if there was a small negative number killed in the alignment stage one needs to be subtracted from the sum diff --git a/src/privileged/trap.sv b/src/privileged/trap.sv index f20604379..469ff4e06 100644 --- a/src/privileged/trap.sv +++ b/src/privileged/trap.sv @@ -65,8 +65,8 @@ module trap import cvw::*; #(parameter cvw_t P) ( assign PendingIntsM = MIP_REGW & MIE_REGW; assign IntPendingM = |PendingIntsM; assign Committed = CommittedM | CommittedF; - assign EnabledIntsM = ({12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW); - assign ValidIntsM = {12{~Committed}} & EnabledIntsM; + assign EnabledIntsM = (MIntGlobalEnM ? PendingIntsM & ~MIDELEG_REGW : '0) | (SIntGlobalEnM ? PendingIntsM & MIDELEG_REGW : '0); + assign ValidIntsM = Committed ? '0 : EnabledIntsM; assign InterruptM = (|ValidIntsM) & InstrValidM & (~wfiM | wfiW); // suppress interrupt if the memory system has partially processed a request. Delay interrupt until wfi is in the W stage. // wfiW is to support possible but unlikely back to back wfi instructions. wfiM would be high in the M stage, while also in the W stage. assign DelegateM = P.S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM] : MEDELEG_REGW[CauseM]) & From b386331cc82f08a533b2374224ce14b9a4f958a3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 6 Mar 2024 05:48:17 -0800 Subject: [PATCH 27/52] Changed '0 to 0 where possible per Chapter 4 style guidelines --- src/cache/cache.sv | 4 +-- src/cache/cacheLRU.sv | 2 +- src/cache/cacheway.sv | 10 +++---- src/ebu/ahbinterface.sv | 4 +-- src/ebu/ebu.sv | 10 +++---- src/fpu/fdivsqrt/fdivsqrtfgen2.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtfgen4.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtiter.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtstage2.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtstage4.sv | 2 +- src/fpu/fli.sv | 6 ++-- src/fpu/fma/fmaadd.sv | 2 +- src/fpu/fma/fmaexpadd.sv | 2 +- src/fpu/fpu.sv | 2 +- src/fpu/postproc/divshiftcalc.sv | 2 +- src/fpu/postproc/fmashiftcalc.sv | 4 +-- src/fpu/postproc/round.sv | 4 +-- src/fpu/postproc/shiftcorrection.sv | 2 +- src/fpu/postproc/specialcase.sv | 2 +- src/generic/mem/ram1p1rwbe.sv | 2 +- src/generic/mem/ram1p1rwe.sv | 8 +++--- src/generic/mem/ram2p1r1wbe.sv | 27 +++++++++--------- src/generic/mem/rom1p1r.sv | 2 +- src/generic/onehotdecoder.sv | 2 +- src/hazard/hazard.sv | 2 +- src/ieu/bmu/cnt.sv | 4 +-- src/ifu/bpred/bpred.sv | 2 +- src/ifu/bpred/icpred.sv | 2 +- src/ifu/bpred/localrepairbp.sv | 2 +- src/ifu/ifu.sv | 22 +++++++-------- src/lsu/align.sv | 10 +++---- src/lsu/lsu.sv | 42 ++++++++++++++-------------- src/lsu/swbytemask.sv | 2 +- src/mmu/hptw.sv | 8 +++--- src/mmu/tlb/tlbmixer.sv | 2 +- src/privileged/csrm.sv | 4 +-- src/privileged/csrsr.sv | 2 +- src/privileged/privdec.sv | 2 +- src/privileged/trap.sv | 4 +-- src/uncore/plic_apb.sv | 10 +++---- testbench/common/functionName.sv | 6 ++-- testbench/common/wallyTracer.sv | 8 +++--- testbench/common/watchdog.sv | 2 +- testbench/testbench-imperas.sv | 4 +-- testbench/testbench-xcelium.sv | 8 +++--- testbench/testbench.sv | 8 +++--- 47 files changed, 133 insertions(+), 132 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 4a97a29d4..058d160fa 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -176,7 +176,7 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN/8-1:0] BlankByteMask; assign BlankByteMask[WORDLEN/8-1:0] = ByteMask; - assign BlankByteMask[LINELEN/8-1:WORDLEN/8] = '0; + assign BlankByteMask[LINELEN/8-1:WORDLEN/8] = 0; assign DemuxedByteMask = BlankByteMask << ((MUXINTERVAL/8) * WordOffsetAddr); @@ -187,7 +187,7 @@ module cache import cvw::*; #(parameter cvw_t P, mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]), .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index] & ~CMOpM[3]), .y(LineWriteData[8*index+7:8*index])); end - assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0; + assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : 0; end else begin:WriteSelLogic diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index 26bdca887..2b585aaa5 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -143,7 +143,7 @@ module cacheLRU // This is a two port memory. // Every cycle must read from CacheSetData and each load/store must write the new LRU. always_ff @(posedge clk) begin - if (reset | (InvalidateCache & ~FlushStage)) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0; + if (reset | (InvalidateCache & ~FlushStage)) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= 0; if(CacheEn) begin if(LRUWriteEn) LRUMemory[PAdr] <= NextLRU; diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 678f7acac..020a3f63c 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -120,7 +120,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, .din(PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN)); // AND portion of distributed tag multiplexer - assign TagWay = SelData ? ReadTag : '0; // AND part of AOMux + assign TagWay = SelData ? ReadTag : 0; // AND part of AOMux assign HitDirtyWay = Dirty & ValidWay; assign DirtyWay = SelDirty & HitDirtyWay; // exclusion-tag: icache DirtyWay assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]) & ~InvalidateCacheDelay; // exclusion-tag: dcache HitWay @@ -152,19 +152,19 @@ module cacheway import cvw::*; #(parameter cvw_t P, end // AND portion of distributed read multiplexers - assign ReadDataLineWay = SelData ? ReadDataLine : '0; // AND part of AO mux. + assign ReadDataLineWay = SelData ? ReadDataLine : 0; // AND part of AO mux. ///////////////////////////////////////////////////////////////////////////////////////////// // Valid Bits ///////////////////////////////////////////////////////////////////////////////////////////// always_ff @(posedge clk) begin // Valid bit array, - if (reset) ValidBits <= #1 '0; + if (reset) ValidBits <= #1 0; if(CacheEn) begin ValidWay <= #1 ValidBits[CacheSetTag]; - if(InvalidateCache) ValidBits <= #1 '0; // exclusion-tag: dcache invalidateway + if(InvalidateCache) ValidBits <= #1 0; // exclusion-tag: dcache invalidateway else if (SetValidEN) ValidBits[CacheSetData] <= #1 SetValidWay; - else if (ClearValidEN) ValidBits[CacheSetData] <= #1 '0; // exclusion-tag: icache ClearValidBits + else if (ClearValidEN) ValidBits[CacheSetData] <= #1 0; // exclusion-tag: icache ClearValidBits end end diff --git a/src/ebu/ahbinterface.sv b/src/ebu/ahbinterface.sv index 8852b52c3..2f4944303 100644 --- a/src/ebu/ahbinterface.sv +++ b/src/ebu/ahbinterface.sv @@ -62,8 +62,8 @@ module ahbinterface #( flop #(XLEN) wdreg(HCLK, WriteData, HWDATA); flop #(XLEN/8) HWSTRBReg(HCLK, ByteMask, HWSTRB); end else begin - assign HWDATA = '0; - assign HWSTRB = '0; + assign HWDATA = 0; + assign HWSTRB = 0; end busfsm #(~LSU) busfsm(.HCLK, .HRESETn, .Flush, .BusRW, .BusAtomic, diff --git a/src/ebu/ebu.sv b/src/ebu/ebu.sv index b91eb75b0..356f955f3 100644 --- a/src/ebu/ebu.sv +++ b/src/ebu/ebu.sv @@ -111,11 +111,11 @@ module ebu import cvw::*; #(parameter cvw_t P) ( .HTRANSOut(LSUHTRANSOut), .HADDROut(LSUHADDROut), .HREADYIn(HREADY)); // output mux //*** switch to structural implementation - assign HADDR = LSUSelect ? LSUHADDROut : IFUSelect ? IFUHADDROut : '0; - assign HSIZE = LSUSelect ? LSUHSIZEOut : IFUSelect ? IFUHSIZEOut: '0; - assign HBURST = LSUSelect ? LSUHBURSTOut : IFUSelect ? IFUHBURSTOut : '0; // If doing memory accesses, use LSUburst, else use Instruction burst. - assign HTRANS = LSUSelect ? LSUHTRANSOut : IFUSelect ? IFUHTRANSOut: '0; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise - assign HWRITE = LSUSelect ? LSUHWRITEOut : IFUSelect ? 1'b0 : '0; + assign HADDR = LSUSelect ? LSUHADDROut : IFUSelect ? IFUHADDROut : 0; + assign HSIZE = LSUSelect ? LSUHSIZEOut : IFUSelect ? IFUHSIZEOut: 0; + assign HBURST = LSUSelect ? LSUHBURSTOut : IFUSelect ? IFUHBURSTOut : 0; // If doing memory accesses, use LSUburst, else use Instruction burst. + assign HTRANS = LSUSelect ? LSUHTRANSOut : IFUSelect ? IFUHTRANSOut: 0; // SEQ if not first read or write, NONSEQ if first read or write, IDLE otherwise + assign HWRITE = LSUSelect ? LSUHWRITEOut : 0; assign HPROT = 4'b0011; // not used; see Section 3.7 assign HMASTLOCK = 0; // no locking supported diff --git a/src/fpu/fdivsqrt/fdivsqrtfgen2.sv b/src/fpu/fdivsqrt/fdivsqrtfgen2.sv index bc9dce536..732bd6f51 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfgen2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfgen2.sv @@ -37,7 +37,7 @@ module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) ( // Generate for both positive and negative quotient digits assign FP = ~(U << 1) & C; assign FN = (UM << 1) | (C & ~(C << 2)); - assign FZ = '0; + assign FZ = 0; always_comb // Choose which adder input will be used if (up) F = FP; diff --git a/src/fpu/fdivsqrt/fdivsqrtfgen4.sv b/src/fpu/fdivsqrt/fdivsqrtfgen4.sv index a04523e58..f1c2e3281 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfgen4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfgen4.sv @@ -37,7 +37,7 @@ module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) ( // Generate for both positive and negative digits assign F2 = (~U << 2) & (C << 2); // assign F1 = ~(U << 1) & C; - assign F0 = '0; + assign F0 = 0; assign FN1 = (UM << 1) | (C & ~(C << 3)); assign FN2 = (UM << 2) | ((C << 2) & ~(C << 4)); diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index 4bfcebcd1..c942db450 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -81,7 +81,7 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( // C register/initialization mux: C = -R: // C = -4 = 00.000000... (in Q2.DIVb) for radix 4, C = -2 = 10.000000... for radix2 - if(P.RADIX == 4) assign initC = '0; + if(P.RADIX == 4) assign initC = 0; else assign initC = {2'b10, {{P.DIVb{1'b0}}}}; mux2 #(P.DIVb+2) cmux(C[P.DIVCOPIES], initC, IFDivStartE, NextC); flopen #(P.DIVb+2) creg(clk, FDivBusyE, NextC, C[0]); diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index c3954bc0a..78a50d0c3 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -121,7 +121,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( else IntDivResultM = {(P.XLEN){1'b1}}; end else if (ALTBM) begin // Numerator is small if (RemOpM) IntDivResultM = AM; - else IntDivResultM = '0; + else IntDivResultM = 0; end else IntDivResultM = PreIntResultM[P.XLEN-1:0]; // sign extend result for W64 diff --git a/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/src/fpu/fdivsqrt/fdivsqrtstage2.sv index c3d6e210c..a0a552ac8 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -58,7 +58,7 @@ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) ( // Divisor multiple always_comb if (up) Dsel = DBar; - else if (uz) Dsel = '0; + else if (uz) Dsel = 0; else Dsel = D; // un // Residual Update diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index 856273a5e..d879e0f90 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -68,7 +68,7 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( case (udigit) 4'b1000: Dsel = DBar2; 4'b0100: Dsel = DBar; - 4'b0000: Dsel = '0; + 4'b0000: Dsel = 0; 4'b0010: Dsel = D; 4'b0001: Dsel = D2; default: Dsel = 'x; diff --git a/src/fpu/fli.sv b/src/fpu/fli.sv index cf3b736d7..c352d7a5c 100644 --- a/src/fpu/fli.sv +++ b/src/fpu/fli.sv @@ -80,7 +80,7 @@ module fli import cvw::*; #(parameter cvw_t P) ( endcase end assign HImmBox = {{(P.FLEN-16){1'b1}}, HImm}; // NaN-box HImm - end else assign HImmBox = '0; + end else assign HImmBox = 0; //////////////////////////// // single @@ -168,7 +168,7 @@ module fli import cvw::*; #(parameter cvw_t P) ( endcase end assign DImmBox = {{(P.FLEN-64){1'b1}}, DImm}; // NaN-box DImm - end else assign DImmBox = '0; + end else assign DImmBox = 0; //////////////////////////// // double @@ -213,7 +213,7 @@ module fli import cvw::*; #(parameter cvw_t P) ( endcase end assign QImmBox = QImm; // NaN-box QImm trivial because Q is longest format - end else assign QImmBox = '0; + end else assign QImmBox = 0; mux4 #(P.FLEN) flimux(SImmBox, DImmBox, HImmBox, QImmBox, Fmt, Imm); // select immediate based on format diff --git a/src/fpu/fma/fmaadd.sv b/src/fpu/fma/fmaadd.sv index d408381c0..00951ee10 100644 --- a/src/fpu/fma/fmaadd.sv +++ b/src/fpu/fma/fmaadd.sv @@ -53,7 +53,7 @@ module fmaadd import cvw::*; #(parameter cvw_t P) ( // Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition assign AmInv = InvA ? ~Am : Am; // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign PmKilled = KillProd ? '0 : Pm; + assign PmKilled = KillProd ? 0 : Pm; // Do the addition // - calculate a positive and negative sum in parallel // if there was a small negative number killed in the alignment stage one needs to be subtracted from the sum diff --git a/src/fpu/fma/fmaexpadd.sv b/src/fpu/fma/fmaexpadd.sv index 06ac7e290..50b85ded0 100644 --- a/src/fpu/fma/fmaexpadd.sv +++ b/src/fpu/fma/fmaexpadd.sv @@ -37,6 +37,6 @@ module fmaexpadd import cvw::*; #(parameter cvw_t P) ( // kill the exponent if the product is zero - either X or Y is 0 assign PZero = XZero | YZero; - assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)}); + assign Pe = PZero ? 0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (P.NE)'(P.BIAS)}); endmodule diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 7ff9f18bc..c97a12d32 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -275,7 +275,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, InstrD[19:15], Rs1E); flopenrc #(2) Fmt2EReg(clk, reset, FlushE, ~StallE, InstrD[26:25], Fmt2E); fli #(P) fli(.Rs1(Rs1E), .Fmt(Fmt2E), .Imm(FliResE)); - end else assign FliResE = '0; + end else assign FliResE = 0; // fmv.*.x: NaN Box SrcA to extend integer to requested FP size if(P.FPSIZES == 1) diff --git a/src/fpu/postproc/divshiftcalc.sv b/src/fpu/postproc/divshiftcalc.sv index 0a222d724..b0335c780 100644 --- a/src/fpu/postproc/divshiftcalc.sv +++ b/src/fpu/postproc/divshiftcalc.sv @@ -65,7 +65,7 @@ module divshiftcalc import cvw::*; #(parameter cvw_t P) ( // if the shift amount is negative then don't shift (keep sticky bit) // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) - assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZ-1:0] : '0; + assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[P.LOGNORMSHIFTSZ-1:0] : 0; assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; // pre-shift the divider result for normalization diff --git a/src/fpu/postproc/fmashiftcalc.sv b/src/fpu/postproc/fmashiftcalc.sv index e16f51615..81e7fb6d9 100644 --- a/src/fpu/postproc/fmashiftcalc.sv +++ b/src/fpu/postproc/fmashiftcalc.sv @@ -60,7 +60,7 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( end else if (P.FPSIZES == 3) begin always_comb begin case (Fmt) - P.FMT: BiasCorr = '0; + P.FMT: BiasCorr = 0; P.FMT1: BiasCorr = (P.NE+2)'(P.BIAS1-P.BIAS); P.FMT2: BiasCorr = (P.NE+2)'(P.BIAS2-P.BIAS); default: BiasCorr = 'x; @@ -70,7 +70,7 @@ module fmashiftcalc import cvw::*; #(parameter cvw_t P) ( end else if (P.FPSIZES == 4) begin always_comb begin case (Fmt) - 2'h3: BiasCorr = '0; + 2'h3: BiasCorr = 0; 2'h1: BiasCorr = (P.NE+2)'(P.D_BIAS-P.Q_BIAS); 2'h0: BiasCorr = (P.NE+2)'(P.S_BIAS-P.Q_BIAS); 2'h2: BiasCorr = (P.NE+2)'(P.H_BIAS-P.Q_BIAS); diff --git a/src/fpu/postproc/round.sv b/src/fpu/postproc/round.sv index fa7cebf9c..9e2de2320 100644 --- a/src/fpu/postproc/round.sv +++ b/src/fpu/postproc/round.sv @@ -303,9 +303,9 @@ module round import cvw::*; #(parameter cvw_t P) ( case(PostProcSel) 2'b10: Me = FmaMe; // fma 2'b00: Me = {CvtCe[P.NE], CvtCe}&{P.NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt - // 2'b01: Me = DivDone ? Ue : '0; // divide + // 2'b01: Me = DivDone ? Ue : 0; // divide 2'b01: Me = Ue; // divide - default: Me = '0; + default: Me = 0; endcase diff --git a/src/fpu/postproc/shiftcorrection.sv b/src/fpu/postproc/shiftcorrection.sv index 454b3f9cc..b06d8db0d 100644 --- a/src/fpu/postproc/shiftcorrection.sv +++ b/src/fpu/postproc/shiftcorrection.sv @@ -88,5 +88,5 @@ module shiftcorrection import cvw::*; #(parameter cvw_t P) ( // the quotent is in the range [.5,2) if there is no early termination // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift - assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1}; + assign Ue = (DivResSubnorm & DivSubnormShiftPos) ? 0 : DivUe - {(P.NE+1)'(0), ~LZAPlus1}; endmodule diff --git a/src/fpu/postproc/specialcase.sv b/src/fpu/postproc/specialcase.sv index edb5af6d3..e3a1466fe 100644 --- a/src/fpu/postproc/specialcase.sv +++ b/src/fpu/postproc/specialcase.sv @@ -339,7 +339,7 @@ module specialcase import cvw::*; #(parameter cvw_t P) ( if (P.ZFA_SUPPORTED & P.D_SUPPORTED) // fcvtmod.w.d support always_comb begin - if (Zfa) OfIntRes2 = '0; // fcvtmod.w.d produces 0 on overflow + if (Zfa) OfIntRes2 = 0; // fcvtmod.w.d produces 0 on overflow else OfIntRes2 = OfIntRes; if (Zfa) Int64Res = {{(P.XLEN-32){CvtNegRes[P.XLEN-1]}}, CvtNegRes[31:0]}; else Int64Res = CvtNegRes[P.XLEN-1:0]; diff --git a/src/generic/mem/ram1p1rwbe.sv b/src/generic/mem/ram1p1rwbe.sv index ccfce5da2..186fb9c5a 100644 --- a/src/generic/mem/ram1p1rwbe.sv +++ b/src/generic/mem/ram1p1rwbe.sv @@ -43,7 +43,7 @@ module ram1p1rwbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44, PRE output logic [WIDTH-1:0] dout ); - logic [WIDTH-1:0] RAM[DEPTH-1:0]; + bit [WIDTH-1:0] RAM[DEPTH-1:0]; // *************************************************************************** // TRUE SRAM macro diff --git a/src/generic/mem/ram1p1rwe.sv b/src/generic/mem/ram1p1rwe.sv index ebe7e336b..dd1d884f5 100644 --- a/src/generic/mem/ram1p1rwe.sv +++ b/src/generic/mem/ram1p1rwe.sv @@ -40,7 +40,7 @@ module ram1p1rwe import cvw::* ; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44) ( output logic [WIDTH-1:0] dout ); - logic [WIDTH-1:0] RAM[DEPTH-1:0]; + bit [WIDTH-1:0] RAM[DEPTH-1:0]; // *************************************************************************** // TRUE SRAM macro @@ -49,19 +49,19 @@ module ram1p1rwe import cvw::* ; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44) ( // 64 x 128-bit SRAM ram1p1rwbe_64x128 sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), .A(addr), .D(din), - .BWEB('0), .Q(dout)); + .BWEB(0), .Q(dout)); end else if ((USE_SRAM == 1) & (WIDTH == 44) & (DEPTH == 64)) begin // RV64 cache tag // 64 x 44-bit SRAM ram1p1rwbe_64x44 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), .A(addr), .D(din), - .BWEB('0), .Q(dout)); + .BWEB(0), .Q(dout)); end else if ((USE_SRAM == 1) & (WIDTH == 22) & (DEPTH == 64)) begin // RV32 cache tag // 64 x 22-bit SRAM ram1p1rwbe_64x22 sram1 (.CLK(clk), .CEB(~ce), .WEB(~we), .A(addr), .D(din), - .BWEB('0), .Q(dout)); + .BWEB(0), .Q(dout)); // *************************************************************************** // READ first SRAM model diff --git a/src/generic/mem/ram2p1r1wbe.sv b/src/generic/mem/ram2p1r1wbe.sv index 0945684d3..83334ea76 100644 --- a/src/generic/mem/ram2p1r1wbe.sv +++ b/src/generic/mem/ram2p1r1wbe.sv @@ -43,7 +43,7 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68) output logic [WIDTH-1:0] rd1 ); - logic [WIDTH-1:0] mem[DEPTH-1:0]; + bit [WIDTH-1:0] mem[DEPTH-1:0]; localparam SRAMWIDTH = 32; localparam SRAMNUMSETS = SRAMWIDTH/WIDTH; @@ -55,11 +55,11 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68) ram2p1r1wbe_1024x68 memory1(.CLKA(clk), .CLKB(clk), .CEBA(~ce1), .CEBB(~ce2), - .WEBA('0), .WEBB(~we2), + .WEBA(0), .WEBB(~we2), .AA(ra1), .AB(wa2), - .DA('0), + .DA(0), .DB(wd2), - .BWEBA('0), .BWEBB('1), + .BWEBA(0), .BWEBB('1), .QA(rd1), .QB()); @@ -67,11 +67,11 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68) ram2p1r1wbe_1024x36 memory1(.CLKA(clk), .CLKB(clk), .CEBA(~ce1), .CEBB(~ce2), - .WEBA('0), .WEBB(~we2), + .WEBA(0), .WEBB(~we2), .AA(ra1), .AB(wa2), - .DA('0), + .DA(0), .DB(wd2), - .BWEBA('0), .BWEBB('1), + .BWEBA(0), .BWEBB('1), .QA(rd1), .QB()); @@ -95,12 +95,12 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68) assign rd1 = RD1Sets[RA1Q[$clog2(SRAMWIDTH)-1:0]]; ram2p1r1wbe_64x32 memory2(.CLKA(clk), .CLKB(clk), .CEBA(~ce1), .CEBB(~ce2), - .WEBA('0), .WEBB(~we2), + .WEBA(0), .WEBB(~we2), .AA(ra1[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), .AB(wa2[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), - .DA('0), + .DA(0), .DB(SRAMWriteData), - .BWEBA('0), .BWEBB(SRAMBitMask), + .BWEBA(0), .BWEBB(SRAMBitMask), .QA(SRAMReadData), .QB()); @@ -110,13 +110,14 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68) // READ first SRAM model // *************************************************************************** integer i; - +/* initial begin // initialize memory for simulation only; not needed because done in the testbench now integer j; for (j=0; j < DEPTH; j++) - mem[j] = '0; + mem[j] = 0; end - +*/ + // Read logic [$clog2(DEPTH)-1:0] ra1d; flopen #($clog2(DEPTH)) adrreg(clk, ce1, ra1, ra1d); diff --git a/src/generic/mem/rom1p1r.sv b/src/generic/mem/rom1p1r.sv index cc94f1b96..1c6fea356 100644 --- a/src/generic/mem/rom1p1r.sv +++ b/src/generic/mem/rom1p1r.sv @@ -34,7 +34,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) ); // Core Memory - logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; + bit [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; // dh 10/30/23 ROM macros are presently commented out // because they don't point to a generated ROM diff --git a/src/generic/onehotdecoder.sv b/src/generic/onehotdecoder.sv index 9b25feb65..433e12d37 100644 --- a/src/generic/onehotdecoder.sv +++ b/src/generic/onehotdecoder.sv @@ -31,7 +31,7 @@ module onehotdecoder #(parameter WIDTH = 2) ( ); always_comb begin - decoded = '0; + decoded = 0; decoded[bin] = 1'b1; end diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index 140c3e74e..f7bc2a486 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -82,7 +82,7 @@ module hazard import cvw::*; #(parameter cvw_t P) ( // The IFU and LSU stall the entire pipeline on a cache miss, bus access, or other long operation. // The IFU stalls the entire pipeline rather than just Fetch to avoid complications with instructions later in the pipeline causing Exceptions // A trap could be asserted at the start of a IFU/LSU stall, and should flush the memory operation - assign StallFCause = '0; + assign StallFCause = 0; assign StallDCause = (StructuralStallD | FPUStallD) & ~FlushDCause; assign StallECause = (DivBusyE | FDivBusyE) & ~FlushECause; assign StallMCause = WFIStallM & ~FlushMCause; diff --git a/src/ieu/bmu/cnt.sv b/src/ieu/bmu/cnt.sv index eb54d6e3c..85861b19f 100644 --- a/src/ieu/bmu/cnt.sv +++ b/src/ieu/bmu/cnt.sv @@ -57,8 +57,8 @@ module cnt #(parameter WIDTH = 32) ( lzc #(WIDTH) lzc(.num(lzcA), .ZeroCnt(czResult[$clog2(WIDTH):0])); popcnt #(WIDTH) popcntw(.num(popcntA), .PopCnt(cpopResult[$clog2(WIDTH):0])); // zero extend these results to fit into width - assign czResult[WIDTH-1:$clog2(WIDTH)+1] = '0; - assign cpopResult[WIDTH-1:$clog2(WIDTH)+1] = '0; + assign czResult[WIDTH-1:$clog2(WIDTH)+1] = 0; + assign cpopResult[WIDTH-1:$clog2(WIDTH)+1] = 0; mux2 #(WIDTH) cntresultmux(czResult, cpopResult, B[1], CntResult); endmodule diff --git a/src/ifu/bpred/bpred.sv b/src/ifu/bpred/bpred.sv index 006a60b1d..568eeecb5 100644 --- a/src/ifu/bpred/bpred.sv +++ b/src/ifu/bpred/bpred.sv @@ -224,7 +224,7 @@ module bpred import cvw::*; #(parameter cvw_t P) ( assign BTAWrongM = BPBTAWrongM & PCSrcM; end else begin - assign {BTAWrongM, RASPredPCWrongM} = '0; + assign {BTAWrongM, RASPredPCWrongM} = 0; end // **** Fix me diff --git a/src/ifu/bpred/icpred.sv b/src/ifu/bpred/icpred.sv index 42bde6f4e..8d0ecc890 100644 --- a/src/ifu/bpred/icpred.sv +++ b/src/ifu/bpred/icpred.sv @@ -65,7 +65,7 @@ module icpred import cvw::*; #(parameter cvw_t P, assign CJumpF = cjal | cj | cjr | cjalr; assign CBranchF = CompressedOpcF[4:1] == 4'h7; end else begin - assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = '0; + assign {cjal, cj, cjr, cjalr, CJumpF, CBranchF} = 0; end assign NCJumpF = PostSpillInstrRawF[6:0] == 7'h67 | PostSpillInstrRawF[6:0] == 7'h6F; diff --git a/src/ifu/bpred/localrepairbp.sv b/src/ifu/bpred/localrepairbp.sv index 3a730bd41..6920f4ae1 100644 --- a/src/ifu/bpred/localrepairbp.sv +++ b/src/ifu/bpred/localrepairbp.sv @@ -116,7 +116,7 @@ module localrepairbp import cvw::*; #(parameter cvw_t P, SpeculativeFlushedF <= #1 FlushedBits[IndexLHRNextF]; if (reset | FlushD) FlushedBits <= #1 '1; if(BranchD & ~StallE & ~FlushE) begin - FlushedBits[IndexLHRD] <= #1 '0; + FlushedBits[IndexLHRD] <= #1 0; end end diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 4848b5ebb..f12e02ec9 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -194,10 +194,10 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); end else begin - assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrUpdateDAF} = '0; + assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrUpdateDAF} = 0; assign PCPF = PCFExt[P.PA_BITS-1:0]; - assign CacheableF = '1; - assign SelIROM = '0; + assign CacheableF = 1; + assign SelIROM = 0; end //////////////////////////////////////////////////////////////////////////////////////////////// @@ -234,8 +234,8 @@ module ifu import cvw::*; #(parameter cvw_t P) ( logic ICacheBusAck; logic [1:0] CacheBusRW, BusRW, CacheRWF; - assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : '0; - assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0; + assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : 0; + assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : 0; // *** RT: PAdr and NextSet are replaced with mux between PCPF/IEUAdrM and PCSpillNextF/IEUAdrE. cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.ICACHE_LINELENINBITS), .NUMLINES(P.ICACHE_WAYSIZEINBYTES*8/P.ICACHE_LINELENINBITS), @@ -271,7 +271,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( end else begin : passthrough assign IFUHADDR = PCPF; logic [1:0] BusRW; - assign BusRW = ~ITLBMissF & ~SelIROM ? IFURWF : '0; + assign BusRW = ~ITLBMissF & ~SelIROM ? IFURWF : 0; assign IFUHSIZE = 3'b010; ahbinterface #(P.XLEN, 1'b0) ahbinterface(.HCLK(clk), .Flush(FlushD), .HRESETn(~reset), .HREADY(IFUHREADY), @@ -279,15 +279,15 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .HWSTRB(), .BusRW, .BusAtomic('0), .ByteMask(), .WriteData('0), .Stall(GatedStallD), .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); - assign CacheCommittedF = '0; + assign CacheCommittedF = 0; if(P.IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(ShiftUncachedInstr, IROMInstrF, SelIROM, InstrRawF); else assign InstrRawF = ShiftUncachedInstr; assign IFUHBURST = 3'b0; - assign {ICacheMiss, ICacheAccess, ICacheStallF} = '0; + assign {ICacheMiss, ICacheAccess, ICacheStallF} = 0; end end else begin : nobus // block: bus - assign {BusStall, CacheCommittedF} = '0; - assign {ICacheStallF, ICacheMiss, ICacheAccess} = '0; + assign {BusStall, CacheCommittedF} = 0; + assign {ICacheStallF, ICacheMiss, ICacheAccess} = 0; assign InstrRawF = IROMInstrF; end @@ -355,7 +355,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .BTBBranchF(1'b0), .BPCallF(), .BPReturnF(), .BPJumpF(), .BPBranchF(), .IClassWrongM, .IClassWrongE(), .BPReturnWrongD()); flopenrc #(1) PCSrcMReg(clk, reset, FlushM, ~StallM, PCSrcE, BPWrongM); - assign RASPredPCWrongM = '0; + assign RASPredPCWrongM = 0; assign BPDirPredWrongM = BPWrongM; assign BTAWrongM = BPWrongM; assign InstrClassM = {CallM, ReturnM, JumpM, BranchM}; diff --git a/src/lsu/align.sv b/src/lsu/align.sv index d516dad2a..b934dc924 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -94,21 +94,21 @@ module align import cvw::*; #(parameter cvw_t P) ( // compute misalignement always_comb begin case (Funct3M[1:0]) - 2'b00: AccessByteOffsetM = '0; // byte access + 2'b00: AccessByteOffsetM = 0; // byte access 2'b01: AccessByteOffsetM = {2'b00, IEUAdrM[0]}; // half access 2'b10: AccessByteOffsetM = {1'b0, IEUAdrM[1:0]}; // word access 2'b11: AccessByteOffsetM = IEUAdrM[2:0]; // double access default: AccessByteOffsetM = IEUAdrM[2:0]; endcase case (Funct3M[1:0]) - 2'b00: PotentialSpillM = '0; // byte access + 2'b00: PotentialSpillM = 0; // byte access 2'b01: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:1] == '1; // half access 2'b10: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:2] == '1; // word access 2'b11: PotentialSpillM = IEUAdrM[OFFSET_BIT_POS-1:3] == '1; // double access - default: PotentialSpillM = '0; + default: PotentialSpillM = 0; endcase end - assign MisalignedM = (|MemRWM) & (AccessByteOffsetM != '0); + assign MisalignedM = (|MemRWM) & (AccessByteOffsetM != 0); assign ValidSpillM = MisalignedM & PotentialSpillM & ~CacheBusHPWTStall; // Don't take the spill if there is a stall @@ -147,7 +147,7 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits - assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate + assign ShiftAmount = SelHPTW ? 0 : {AccessByteOffsetM, 3'b0}; // AND gate assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 15b2c673d..6bc8f1735 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -175,17 +175,17 @@ module lsu import cvw::*; #(parameter cvw_t P) ( end else begin : no_ziccslm_align assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtE = {2'b00, IEUAdrE}; - assign SelSpillE = '0; + assign SelSpillE = 0; assign DCacheReadDataWordSpillM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; - assign {SpillStallM, SelStoreDelay} = '0; + assign {SpillStallM, SelStoreDelay} = 0; end if(P.ZICBOZ_SUPPORTED) begin : cboz - mux2 #(P.XLEN) writedatacbozmux(WriteDataM, '0, CMOpM[3], WriteDataZM); - end else begin : cboz + assign WriteDataZM = CMOpM[3] ? 0 : WriteDataM; + end else begin : cboz assign WriteDataZM = WriteDataM; end @@ -218,8 +218,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign StoreAmoAccessFaultM = LSUStoreAmoAccessFaultM; assign LoadPageFaultM = LSULoadPageFaultM; assign StoreAmoPageFaultM = LSUStoreAmoPageFaultM; - assign {HPTWStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = '0; - assign {HPTWInstrAccessFaultF, HPTWInstrPageFaultF} = '0; + assign {HPTWStall, SelHPTW, PTE, PageType, DTLBWriteM, ITLBWriteF, IgnoreRequestTLB} = 0; + assign {HPTWInstrAccessFaultF, HPTWInstrPageFaultF} = 0; end // CommittedM indicates the cache, bus, or HPTW are busy with a multiple cycle operation. @@ -255,8 +255,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW); end else begin // No MMU, so no PMA/page faults and no address translation - assign {DTLBMissM, LSULoadAccessFaultM, LSUStoreAmoAccessFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM} = '0; - assign {LSULoadPageFaultM, LSUStoreAmoPageFaultM} = '0; + assign {DTLBMissM, LSULoadAccessFaultM, LSUStoreAmoAccessFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM} = 0; + assign {LSULoadPageFaultM, LSUStoreAmoPageFaultM} = 0; assign PAdrM = IHAdrM[P.PA_BITS-1:0]; assign CacheableM = 1'b1; assign SelDTIM = P.DTIM_SUPPORTED & ~P.BUS_SUPPORTED; // if no PMA then select dtim if there is a DTIM. If there is @@ -281,7 +281,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // The DTIM uses untranslated addresses, so it is not compatible with virtual memory. mux2 #(P.PA_BITS) DTIMAdrMux(IEUAdrExtE[P.PA_BITS-1:0], IEUAdrExtM[P.PA_BITS-1:0], MemRWM[0], DTIMAdr); - assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : '0; + assign DTIMMemRWM = SelDTIM & ~IgnoreRequestTLB ? LSURWM : 0; // **** fix ReadDataWordM to be LLEN. ByteMask is wrong length. // **** create config to support DTIM with floating point. // Add support for cboz @@ -318,16 +318,16 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(P.ZICBOZ_SUPPORTED) begin assign BusCMOZero = CMOpM[3] & ~CacheableM; - assign CacheCMOpM = (CacheableM & ~SelHPTW) ? CMOpM : '0; + assign CacheCMOpM = (CacheableM & ~SelHPTW) ? CMOpM : 0; assign BusAtomic = AtomicM[1] & ~CacheableM; end else begin - assign BusCMOZero = '0; - assign CacheCMOpM = '0; - assign BusAtomic = '0; + assign BusCMOZero = 0; + assign CacheCMOpM = 0; + assign BusAtomic = 0; end - assign BusRW = ~CacheableM & ~SelDTIM ? LSURWM : '0; + assign BusRW = (~CacheableM & ~SelDTIM )? LSURWM : 0; assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM; - assign CacheRWM = CacheableM & ~SelDTIM ? LSURWM : '0; + assign CacheRWM = (CacheableM & ~SelDTIM) ? LSURWM : 0; assign FlushDCache = FlushDCacheM & ~(SelHPTW); cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), @@ -367,7 +367,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface logic [1:0] BusRW; // Non-DTIM memory access, ignore cacheableM logic [P.XLEN-1:0] FetchBuffer; - assign BusRW = ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0; + assign BusRW = (~IgnoreRequestTLB & ~SelDTIM) ? LSURWM : 0; assign LSUHADDR = PAdrM; assign LSUHSIZE = LSUFunct3M; @@ -381,14 +381,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(P.DTIM_SUPPORTED) mux2 #(P.XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM[P.XLEN-1:0], SelDTIM, ReadDataWordMuxM[P.XLEN-1:0]); else assign ReadDataWordMuxM[P.XLEN-1:0] = FetchBuffer[P.XLEN-1:0]; // *** bus only does not support double wide floats. assign LSUHBURST = 3'b0; - assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0; + assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = 0; end end else begin: nobus // block: bus, only DTIM - assign LSUHWDATA = '0; + assign LSUHWDATA = 0; assign ReadDataWordMuxM = DTIMReadDataWordM; - assign {BusStall, BusCommittedM} = '0; - assign {DCacheMiss, DCacheAccess} = '0; - assign {DCacheStallM, DCacheCommittedM} = '0; + assign {BusStall, BusCommittedM} = 0; + assign {DCacheMiss, DCacheAccess} = 0; + assign {DCacheStallM, DCacheCommittedM} = 0; end assign LSUBusStallM = BusStall & ~IgnoreRequestTLB; diff --git a/src/lsu/swbytemask.sv b/src/lsu/swbytemask.sv index 60164e081..d8db91cbc 100644 --- a/src/lsu/swbytemask.sv +++ b/src/lsu/swbytemask.sv @@ -42,7 +42,7 @@ module swbytemask #(parameter WORDLEN, EXTEND = 0)( assign ByteMaskExtended = ExtendedByteMask[WORDLEN*2/8-1:WORDLEN/8]; end else begin assign ByteMask = (('d2**('d2**Size))-'d1) << Adr; - assign ByteMaskExtended = '0; + assign ByteMaskExtended = 0; end /* Equivalent to the following diff --git a/src/mmu/hptw.sv b/src/mmu/hptw.sv index 2a4288560..77e73e696 100644 --- a/src/mmu/hptw.sv +++ b/src/mmu/hptw.sv @@ -173,7 +173,7 @@ module hptw import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] AccessedPTE; assign AccessedPTE = {PTE[P.XLEN-1:8], (SetDirty | PTE[7]), 1'b1, PTE[5:0]}; // set accessed bit, conditionally set dirty bit - //assign ReadDataNoXM = (ReadDataM[0] === 'x) ? '0 : ReadDataM; // If the PTE.V bit is x because it was read from uninitialized memory set to 0 to avoid x propagation and hanging the simulation. + //assign ReadDataNoXM = (ReadDataM[0] === 'x) ? 0 : ReadDataM; // If the PTE.V bit is x because it was read from uninitialized memory set to 0 to avoid x propagation and hanging the simulation. assign ReadDataNoXM = ReadDataM; // *** temporary fix for synthesis; === and x in line above are not synthesizable. mux2 #(P.XLEN) NextPTEMux(ReadDataNoXM, AccessedPTE, UpdatePTE, NextPTE); // NextPTE = ReadDataNoXM when ADUE = 0 because UpdatePTE = 0 flopenr #(P.PA_BITS) HPTWAdrWriteReg(clk, reset, SaveHPTWAdr, HPTWReadAdr, HPTWWriteAdr); @@ -213,9 +213,9 @@ module hptw import cvw::*; #(parameter cvw_t P) ( end else begin // block: hptwwrites assign NextPTE = ReadDataNoXM; assign HPTWAdr = HPTWReadAdr; - assign HPTWUpdateDA = '0; - assign UpdatePTE = '0; - assign HPTWRW[0] = '0; + assign HPTWUpdateDA = 0; + assign UpdatePTE = 0; + assign HPTWRW[0] = 0; end // Enable and select signals based on states diff --git a/src/mmu/tlb/tlbmixer.sv b/src/mmu/tlb/tlbmixer.sv index 4a8712da9..d615d1370 100644 --- a/src/mmu/tlb/tlbmixer.sv +++ b/src/mmu/tlb/tlbmixer.sv @@ -98,6 +98,6 @@ module tlbmixer import cvw::*; #(parameter cvw_t P) ( // Output the hit physical address if translation is currently on. // Provide physical address of zero if not TLBHits, to cause segmentation error if miss somehow percolated through signal - mux2 #(P.PA_BITS) hitmux('0, {PPNMixed2, Offset}, TLBHit, TLBPAdr); // set PA to 0 if TLB misses, to cause segementation error if this miss somehow passes through system + assign TLBPAdr = TLBHit ? {PPNMixed2, Offset} : 0; endmodule diff --git a/src/privileged/csrm.sv b/src/privileged/csrm.sv index 9f5b29428..35c27736c 100644 --- a/src/privileged/csrm.sv +++ b/src/privileged/csrm.sv @@ -163,7 +163,7 @@ module csrm import cvw::*; #(parameter cvw_t P) ( flopenr #(32) MCOUNTINHIBITreg(clk, reset, WriteMCOUNTINHIBITM, CSRWriteValM[31:0], MCOUNTINHIBIT_REGW); if (P.U_SUPPORTED) begin: mcounteren // MCOUNTEREN only exists when user mode is supported flopenr #(32) MCOUNTERENreg(clk, reset, WriteMCOUNTERENM, CSRWriteValM[31:0], MCOUNTEREN_REGW); - end else assign MCOUNTEREN_REGW = '0; + end else assign MCOUNTEREN_REGW = 0; // MENVCFG register if (P.U_SUPPORTED) begin // menvcfg only exists if there is a lower privilege to control @@ -199,7 +199,7 @@ module csrm import cvw::*; #(parameter cvw_t P) ( // verilator lint_off WIDTH logic [5:0] entry; always_comb begin - entry = '0; + entry = 0; CSRMReadValM = 0; IllegalCSRMAccessM = !(P.S_SUPPORTED) & (CSRAdrM == MEDELEG | CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode if (CSRAdrM >= PMPADDR0 & CSRAdrM < PMPADDR0 + P.PMP_ENTRIES) // reading a PMP entry diff --git a/src/privileged/csrsr.sv b/src/privileged/csrsr.sv index 3a28c5075..733b2f0c6 100644 --- a/src/privileged/csrsr.sv +++ b/src/privileged/csrsr.sv @@ -66,7 +66,7 @@ module csrsr import cvw::*; #(parameter cvw_t P) ( STATUS_XS, STATUS_FS, /*STATUS_MPP, 2'b0*/ 4'b0, STATUS_SPP, /*STATUS_MPIE*/ 1'b0, STATUS_UBE, STATUS_SPIE, /*1'b0, STATUS_MIE, 1'b0*/ 3'b0, STATUS_SIE, 1'b0}; - assign MSTATUSH_REGW = '0; // *** does not exist when XLEN=64, but don't want it to have an undefined value. Spec is not clear what it should be. + assign MSTATUSH_REGW = 0; // *** does not exist when XLEN=64, but don't want it to have an undefined value. Spec is not clear what it should be. end else begin: csrsr32 // RV32 assign MSTATUS_REGW = {STATUS_SD, 8'b0, STATUS_TSR, STATUS_TW, STATUS_TVM, STATUS_MXR, STATUS_SUM, STATUS_MPRV, diff --git a/src/privileged/privdec.sv b/src/privileged/privdec.sv index 23c0c2f15..360b3dab2 100644 --- a/src/privileged/privdec.sv +++ b/src/privileged/privdec.sv @@ -80,7 +80,7 @@ module privdec import cvw::*; #(parameter cvw_t P) ( if (P.U_SUPPORTED) begin:wfi logic [P.WFI_TIMEOUT_BIT:0] WFICount, WFICountPlus1; - assign WFICountPlus1 = wfiM ? '0 : WFICount + 1; // restart counting on WFI + assign WFICountPlus1 = wfiM ? 0 : WFICount + 1; // restart counting on WFI flopr #(P.WFI_TIMEOUT_BIT+1) wficountreg(clk, reset, WFICountPlus1, WFICount); // count while in WFI // coverage off -item e 1 -fecexprrow 1 // WFI Timout trap will not occur when STATUS_TW is low while in supervisor mode, so the system gets stuck waiting for an interrupt and triggers a watchdog timeout. diff --git a/src/privileged/trap.sv b/src/privileged/trap.sv index 469ff4e06..db31afa69 100644 --- a/src/privileged/trap.sv +++ b/src/privileged/trap.sv @@ -65,8 +65,8 @@ module trap import cvw::*; #(parameter cvw_t P) ( assign PendingIntsM = MIP_REGW & MIE_REGW; assign IntPendingM = |PendingIntsM; assign Committed = CommittedM | CommittedF; - assign EnabledIntsM = (MIntGlobalEnM ? PendingIntsM & ~MIDELEG_REGW : '0) | (SIntGlobalEnM ? PendingIntsM & MIDELEG_REGW : '0); - assign ValidIntsM = Committed ? '0 : EnabledIntsM; + assign EnabledIntsM = (MIntGlobalEnM ? PendingIntsM & ~MIDELEG_REGW : 0) | (SIntGlobalEnM ? PendingIntsM & MIDELEG_REGW : 0); + assign ValidIntsM = Committed ? 0 : EnabledIntsM; assign InterruptM = (|ValidIntsM) & InstrValidM & (~wfiM | wfiW); // suppress interrupt if the memory system has partially processed a request. Delay interrupt until wfi is in the W stage. // wfiW is to support possible but unlikely back to back wfi instructions. wfiM would be high in the M stage, while also in the W stage. assign DelegateM = P.S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM] : MEDELEG_REGW[CauseM]) & diff --git a/src/uncore/plic_apb.sv b/src/uncore/plic_apb.sv index 4c0602353..130a34e00 100644 --- a/src/uncore/plic_apb.sv +++ b/src/uncore/plic_apb.sv @@ -91,7 +91,7 @@ module plic_apb import cvw::*; #(parameter cvw_t P) ( assign memread = ~PWRITE & PSEL; // read at start of access phase. PENABLE hasn't set up before this assign PREADY = 1'b1; // PLIC never takes >1 cycle to respond assign entry = {PADDR[23:2],2'b0}; - assign One[P.PLIC_NUM_SRC-1:1] = '0; assign One[0] = 1'b1; // Vivado does not like this as a single assignment. + assign One[P.PLIC_NUM_SRC-1:1] = 0; assign One[0] = 1'b1; // Vivado does not like this as a single assignment. // account for subword read/write circuitry // -- Note PLIC registers are 32 bits no matter what; access them with LW SW. @@ -107,10 +107,10 @@ module plic_apb import cvw::*; #(parameter cvw_t P) ( always @(posedge PCLK) begin // resetting if (~PRESETn) begin - intPriority <= #1 '0; - intEn <= #1 '0; - intThreshold <= #1 '0; - intInProgress <= #1 '0; + intPriority <= #1 0; + intEn <= #1 0; + intThreshold <= #1 0; + intInProgress <= #1 0; // writing end else begin if (memwrite) diff --git a/testbench/common/functionName.sv b/testbench/common/functionName.sv index 1b2402bee..c08b1d767 100644 --- a/testbench/common/functionName.sv +++ b/testbench/common/functionName.sv @@ -136,7 +136,7 @@ module FunctionName import cvw::*; #(parameter cvw_t P) ( ProgramAddrMapFP = $fopen(ProgramAddrMapFile, "r"); // read line by line to count lines - if (ProgramAddrMapFP != '0) begin + if (ProgramAddrMapFP != 0) begin while (! $feof(ProgramAddrMapFP)) begin status = $fscanf(ProgramAddrMapFP, "%h\n", ProgramAddrMapLine); ProgramAddrMapMemory[ProgramAddrMapLineCount] = ProgramAddrMapLine; @@ -154,7 +154,7 @@ module FunctionName import cvw::*; #(parameter cvw_t P) ( ProgramLabelMapLineCount = 0; ProgramLabelMapFP = $fopen(ProgramLabelMapFile, "r"); - if (ProgramLabelMapFP != '0) begin + if (ProgramLabelMapFP != 0) begin while (! $feof(ProgramLabelMapFP)) begin status = $fscanf(ProgramLabelMapFP, "%s\n", ProgramLabelMapLine); ProgramLabelMapMemory[ProgramLabelMapLineCount] = ProgramLabelMapLine; @@ -174,7 +174,7 @@ module FunctionName import cvw::*; #(parameter cvw_t P) ( logic OrReducedAdr, AnyUnknown; assign OrReducedAdr = |ProgramAddrIndex; assign AnyUnknown = (OrReducedAdr === 1'bx) ? 1'b1 : 1'b0; - initial ProgramAddrIndex = '0; + initial ProgramAddrIndex = 0; always @(*) FunctionName = AnyUnknown ? "Unknown!" : ProgramLabelMapMemory[ProgramAddrIndex]; diff --git a/testbench/common/wallyTracer.sv b/testbench/common/wallyTracer.sv index 746fde068..309b39027 100644 --- a/testbench/common/wallyTracer.sv +++ b/testbench/common/wallyTracer.sv @@ -231,7 +231,7 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); end genvar index; - assign rf[0] = '0; + assign rf[0] = 0; for(index = 1; index < NUMREGS; index += 1) assign rf[index] = testbench.dut.core.ieu.dp.regf.rf[index]; @@ -239,7 +239,7 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); assign rf_we3 = testbench.dut.core.ieu.dp.regf.we3; always_comb begin - rf_wb <= '0; + rf_wb <= 0; if(rf_we3) rf_wb[rf_a3] <= 1'b1; end @@ -251,7 +251,7 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); assign frf_we4 = testbench.dut.core.fpu.fpu.fregfile.we4; always_comb begin - frf_wb <= '0; + frf_wb <= 0; if(frf_we4) frf_wb[frf_a4] <= 1'b1; end @@ -492,7 +492,7 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); end // *** implementation only cancel? so sc does not clear? - assign rvvi.lrsc_cancel[0][0] = '0; + assign rvvi.lrsc_cancel[0][0] = 0; integer index2; diff --git a/testbench/common/watchdog.sv b/testbench/common/watchdog.sv index 1e2b760ca..296900b20 100644 --- a/testbench/common/watchdog.sv +++ b/testbench/common/watchdog.sv @@ -40,7 +40,7 @@ module watchdog #(parameter XLEN, WatchDogTimerThreshold) always_ff @(posedge clk) begin OldPCW <= PCW; if(OldPCW == PCW) WatchDogTimerCount = WatchDogTimerCount + 1'b1; - else WatchDogTimerCount = '0; + else WatchDogTimerCount = 0; end always_comb begin diff --git a/testbench/testbench-imperas.sv b/testbench/testbench-imperas.sv index 35e37f69f..27bcdb73e 100644 --- a/testbench/testbench-imperas.sv +++ b/testbench/testbench-imperas.sv @@ -252,9 +252,9 @@ module testbench; assign SDCCmdIn = SDCCmd; assign SDCDatIn = SDCDat; -----/\----- EXCLUDED -----/\----- */ - assign SDCIntr = '0; + assign SDCIntr = 0; end else begin - assign SDCIntr = '0; + assign SDCIntr = 0; end wallypipelinedsoc #(P) dut(.clk, .reset_ext, .reset, .HRDATAEXT, .HREADYEXT, .HRESPEXT, .HSELEXT, .HSELEXTSDC, diff --git a/testbench/testbench-xcelium.sv b/testbench/testbench-xcelium.sv index 44afbcd3b..85b39e766 100644 --- a/testbench/testbench-xcelium.sv +++ b/testbench/testbench-xcelium.sv @@ -335,7 +335,7 @@ module testbench; if (P.UNCORE_RAM_SUPPORTED) begin `ifdef TB_UNCORE_RAM_SUPPORTED for (adrindex=0; adrindex<(P.UNCORE_RAM_RANGE>>1+(P.XLEN/32)); adrindex = adrindex+1) - dut.uncore.uncore.ram.ram.memory.RAM[adrindex] = '0; + dut.uncore.uncore.ram.ram.memory.RAM[adrindex] = 0; `endif end if(reset) begin // branch predictor must always be reset @@ -411,7 +411,7 @@ module testbench; .HREADRam(HRDATAEXT), .HREADYRam(HREADYEXT), .HRESPRam(HRESPEXT), .HREADY, .HWSTRB); end else begin assign HREADYEXT = 1; - assign {HRESPEXT, HRDATAEXT} = '0; + assign {HRESPEXT, HRDATAEXT} = 0; end if(P.FPGA) begin : sdcard @@ -424,8 +424,8 @@ module testbench; assign SDCCmdIn = SDCCmd; assign SDCDatIn = SDCDat; end else begin - assign SDCCmd = '0; - assign SDCDat = '0; + assign SDCCmd = 0; + assign SDCDat = 0; end wallypipelinedsoc #(P) dut(.clk, .reset_ext, .reset, .HRDATAEXT, .HREADYEXT, .HRESPEXT, .HSELEXT, .HSELEXTSDC, diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 0f8194e62..d52c0baf8 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -440,7 +440,7 @@ module testbench; always @(posedge clk) if (ResetMem) // program memory is sometimes reset (e.g. for CoreMark, which needs zeroed memory) for (adrindex=0; adrindex<(P.UNCORE_RAM_RANGE>>1+(P.XLEN/32)); adrindex = adrindex+1) - dut.uncore.uncore.ram.ram.memory.RAM[adrindex] = '0; + dut.uncore.uncore.ram.ram.memory.RAM[adrindex] = 0; //////////////////////////////////////////////////////////////////////////////// // Actual hardware @@ -457,7 +457,7 @@ module testbench; .HREADRam(HRDATAEXT), .HREADYRam(HREADYEXT), .HRESPRam(HRESPEXT), .HREADY, .HWSTRB); end else begin assign HREADYEXT = 1; - assign {HRESPEXT, HRDATAEXT} = '0; + assign {HRESPEXT, HRDATAEXT} = 0; end if(P.SDC_SUPPORTED) begin : sdcard @@ -473,9 +473,9 @@ module testbench; assign SDCDat = sd_dat_reg_t ? sd_dat_reg_o : sd_dat_i; assign SDCDatIn = SDCDat; -----/\----- EXCLUDED -----/\----- */ - assign SDCIntr = '0; + assign SDCIntr = 0; end else begin - assign SDCIntr = '0; + assign SDCIntr = 0; end wallypipelinedsoc #(P) dut(.clk, .reset_ext, .reset, .HRDATAEXT, .HREADYEXT, .HRESPEXT, .HSELEXT, .HSELEXTSDC, From e0eb91f79521e32c6b6a90b672c9f230937be073 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 6 Mar 2024 11:02:04 -0800 Subject: [PATCH 28/52] Changed always @(posedge clk) to always_ff @(posedge clk) where it was omitted in several places --- src/generic/mem/rom1p1r.sv | 2 +- src/privileged/csri.sv | 4 ++-- src/uncore/clint_apb.sv | 4 ++-- src/uncore/plic_apb.sv | 2 +- src/uncore/uartPC16550D.sv | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/generic/mem/rom1p1r.sv b/src/generic/mem/rom1p1r.sv index 1c6fea356..7350eac9c 100644 --- a/src/generic/mem/rom1p1r.sv +++ b/src/generic/mem/rom1p1r.sv @@ -52,7 +52,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, DATA_WIDTH = 32, PRELOAD_ENABLED = 0) end end - always @ (posedge clk) begin + always_ff @ (posedge clk) begin if(ce) dout <= ROM[addr]; end diff --git a/src/privileged/csri.sv b/src/privileged/csri.sv index 35b6f3fe6..fafc5c845 100644 --- a/src/privileged/csri.sv +++ b/src/privileged/csri.sv @@ -74,11 +74,11 @@ module csri import cvw::*; #(parameter cvw_t P) ( assign SIP_WRITE_MASK = 12'h000; assign MIE_WRITE_MASK = 12'h888; end - always @(posedge clk) + always_ff @(posedge clk) if (reset) MIP_REGW_writeable <= 12'b0; else if (WriteMIPM) MIP_REGW_writeable <= (CSRWriteValM[11:0] & MIP_WRITE_MASK); else if (WriteSIPM) MIP_REGW_writeable <= (CSRWriteValM[11:0] & SIP_WRITE_MASK) | (MIP_REGW_writeable & ~SIP_WRITE_MASK); - always @(posedge clk) + always_ff @(posedge clk) if (reset) MIE_REGW <= 12'b0; else if (WriteMIEM) MIE_REGW <= (CSRWriteValM[11:0] & MIE_WRITE_MASK); // MIE controls M and S fields else if (WriteSIEM) MIE_REGW <= (CSRWriteValM[11:0] & 12'h222 & MIDELEG_REGW) | (MIE_REGW & 12'h888); // only S fields diff --git a/src/uncore/clint_apb.sv b/src/uncore/clint_apb.sv index 691ba372d..961a50824 100644 --- a/src/uncore/clint_apb.sv +++ b/src/uncore/clint_apb.sv @@ -63,7 +63,7 @@ module clint_apb import cvw::*; #(parameter cvw_t P) ( // register access if (P.XLEN==64) begin:clint // 64-bit - always @(posedge PCLK) begin + always_ff @(posedge PCLK) begin case(entry) 16'h0000: PRDATA <= {63'b0, MSIP}; 16'h4000: PRDATA <= MTIMECMP; @@ -97,7 +97,7 @@ module clint_apb import cvw::*; #(parameter cvw_t P) ( MTIME[j*8 +: 8] <= PWDATA[j*8 +: 8]; end else MTIME <= MTIME + 1; end else begin:clint // 32-bit - always @(posedge PCLK) begin + always_ff @(posedge PCLK) begin case(entry) 16'h0000: PRDATA <= {31'b0, MSIP}; 16'h4000: PRDATA <= MTIMECMP[31:0]; diff --git a/src/uncore/plic_apb.sv b/src/uncore/plic_apb.sv index 130a34e00..7858b2e35 100644 --- a/src/uncore/plic_apb.sv +++ b/src/uncore/plic_apb.sv @@ -104,7 +104,7 @@ module plic_apb import cvw::*; #(parameter cvw_t P) ( // ================== localparam PLIC_NUM_SRC_MIN_32 = P.PLIC_NUM_SRC < 32 ? P.PLIC_NUM_SRC : 31; - always @(posedge PCLK) begin + always_ff @(posedge PCLK) begin // resetting if (~PRESETn) begin intPriority <= #1 0; diff --git a/src/uncore/uartPC16550D.sv b/src/uncore/uartPC16550D.sv index f8aa4e016..555a7682c 100644 --- a/src/uncore/uartPC16550D.sv +++ b/src/uncore/uartPC16550D.sv @@ -520,7 +520,7 @@ module uartPC16550D #(parameter UART_PRESCALE) ( intrpending = 0; end end - always @(posedge PCLK) INTR <= #1 intrpending; // prevent glitches on interrupt pin + always_ff @(posedge PCLK) INTR <= #1 intrpending; // prevent glitches on interrupt pin // Side effect of reading LSR is lowering overrun, parity, framing, break intr's assign setSquashRXerrIP = ~MEMRb & (A==3'b101); From 068ffda5fb8a7ecdffb87555cce02039d688776b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 13:28:47 -0600 Subject: [PATCH 29/52] Revert "Revert "Trying an experiment. Use the less compact subwordreaddouble in the fpga synthesize rather than subwordreadmisaligned."" This reverts commit 8136b45ca7f8dedb83fd97e152bbb8765436894e. --- src/lsu/lsu.sv | 3 +- src/lsu/subwordreaddouble.sv | 196 +++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index cf1767f5c..efb5d9307 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -421,7 +421,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + //subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv new file mode 100644 index 000000000..936240cf7 --- /dev/null +++ b/src/lsu/subwordreaddouble.sv @@ -0,0 +1,196 @@ +/////////////////////////////////////////// +// subwordread.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Extract subwords and sign extend for reads +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordreaddouble #(parameter LLEN) + ( + input logic [LLEN*2-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM +); + + logic [7:0] ByteM; + logic [15:0] HalfwordM; + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(Funct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase + + if (LLEN == 128) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + logic [63:0] QdWordM; + always_comb + case(PAdrSwap) + 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; + 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; + 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; + 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; + 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; + 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; + 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; + 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; + 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; + 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; + 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; + 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; + 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; + 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; + 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; + 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; + 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; + 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; + 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; + 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; + 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; + 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; + 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; + 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; + 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; + 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; + 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; + 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; + 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; + 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; + 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; + 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; + endcase + + assign ByteM = QdWordM[7:0]; + assign HalfwordM = QdWordM[15:0]; + assign WordM = QdWordM[31:0]; + assign DblWordM = QdWordM[63:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; + 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; + 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; + 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; + 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; + 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; + 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; + 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + assign ByteM = DblWordM[7:0]; + assign HalfwordM = DblWordM[15:0]; + assign WordM = DblWordM[31:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + + logic [31:0] WordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase + + assign ByteM = WordM[7:0]; + assign HalfwordM = WordM[15:0]; + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen + endcase + end +endmodule From 2ea01343293a8bb2fc16d63005a01fcc5c8e2cb3 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 13:28:59 -0600 Subject: [PATCH 30/52] Revert "Trying an experiment. Use the less compact subwordreaddouble in the fpga synthesize rather than subwordreadmisaligned." This reverts commit cba3209e7fd7851aa57233f8771133bd29b7527e. --- src/lsu/lsu.sv | 3 +- src/lsu/subwordreaddouble.sv | 196 ----------------------------------- 2 files changed, 1 insertion(+), 198 deletions(-) delete mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index efb5d9307..cf1767f5c 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -421,8 +421,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - //subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv deleted file mode 100644 index 936240cf7..000000000 --- a/src/lsu/subwordreaddouble.sv +++ /dev/null @@ -1,196 +0,0 @@ -/////////////////////////////////////////// -// subwordread.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Extract subwords and sign extend for reads -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordreaddouble #(parameter LLEN) - ( - input logic [LLEN*2-1:0] ReadDataWordMuxM, - input logic [2:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN-1:0] ReadDataM -); - - logic [7:0] ByteM; - logic [15:0] HalfwordM; - logic [4:0] PAdrSwap; - logic [4:0] BigEndianPAdr; - logic [4:0] LengthM; - - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; - /* verilator lint_off WIDTHEXPAND */ - /* verilator lint_off WIDTHTRUNC */ - assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; - /* verilator lint_on WIDTHTRUNC */ - /* verilator lint_on WIDTHEXPAND */ - - always_comb - case(Funct3M & {FpLoadStoreM, 2'b11}) - 3'b000: LengthM = 5'd1; - 3'b001: LengthM = 5'd2; - 3'b010: LengthM = 5'd4; - 3'b011: LengthM = 5'd8; - 3'b100: LengthM = 5'd16; - default: LengthM = 5'd8; - endcase - - if (LLEN == 128) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - logic [63:0] QdWordM; - always_comb - case(PAdrSwap) - 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; - 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; - 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; - 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; - 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; - 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; - 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; - 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; - 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; - 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; - 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; - 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; - 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; - 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; - 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; - 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; - 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; - 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; - 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; - 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; - 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; - 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; - 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; - 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; - 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; - 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; - 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; - 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; - 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; - 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; - 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; - 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; - endcase - - assign ByteM = QdWordM[7:0]; - assign HalfwordM = QdWordM[15:0]; - assign WordM = QdWordM[31:0]; - assign DblWordM = QdWordM[63:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; - 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; - 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; - 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; - 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; - 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; - 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; - 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - assign ByteM = DblWordM[7:0]; - assign HalfwordM = DblWordM[15:0]; - assign WordM = DblWordM[31:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - - logic [31:0] WordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase - - assign ByteM = WordM[7:0]; - assign HalfwordM = WordM[15:0]; - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen - endcase - end -endmodule From 3fa5faa6cf236db0870187020bba4a1ea8f7230a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 13:29:08 -0600 Subject: [PATCH 31/52] Revert "Added sdc to pma allow shift." This reverts commit a2d5618d889f882e0ceccb8c75708dc564bb7dae. --- src/mmu/pmachecker.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index e77cc74d0..60296213d 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -61,7 +61,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[3] | SelRegions[4] | SelRegions[5]; // exclusion-tag: unused-cachable assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; - assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6] | SelRegions[10]; + assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6]; // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent From 5447159cfd03801dc4f520885b719c5c4c3a91e4 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:15:26 -0600 Subject: [PATCH 32/52] Revert "Cleanup." This reverts commit e84b7cc14782d6cb6676d94e987c15f63f87a604. --- src/lsu/align.sv | 18 ++++++++++++++++-- src/lsu/endianswap.sv | 4 ++-- src/lsu/lsu.sv | 4 +++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 094da4a15..fa10916f9 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -48,6 +48,7 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.LLEN-1:0] LSUWriteDataM, output logic [(P.LLEN*2-1)/8:0] ByteMaskSpillM, + output logic [P.LLEN*2-1:0] LSUWriteDataSpillM, output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill @@ -70,8 +71,9 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrIncrementM; localparam OFFSET_LEN = $clog2(LLENINBYTES); - logic [OFFSET_LEN-1:0] AccessByteOffsetM; - logic PotentialSpillM; + logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; + logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; + logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -140,6 +142,18 @@ module align import cvw::*; #(parameter cvw_t P) ( // merge together mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SelSpillM, ReadDataWordSpillAllM); + + // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) + // 8 * is for shifting by bytes not bits + assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate + + // write path. Also has the 8:1 shifter muxing for the byteoffset + // then it also has the mux to select when a spill occurs + logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. + + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; + assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; + mux3 #(2*P.LLEN/8) bytemaskspillmux({ByteMaskExtendedM, ByteMaskM}, // no spill {{{P.LLEN/8}{1'b0}}, ByteMaskM}, // spill, first half {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, // spill, second half diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index afd4ecdd2..3c552b371 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -54,8 +54,8 @@ module endianswap #(parameter LEN) ( y[143:136] = a[119:112]; y[135:128] = a[127:120]; y[127:120] = a[135:128]; - y[119:112] = a[143:136]; - y[111:104] = a[151:144]; + y[119:112] = a[142:136]; + y[111:104] = a[152:144]; y[103:96] = a[159:152]; y[95:88] = a[167:160]; y[87:80] = a[175:168]; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index cf1767f5c..e3e138ea4 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -126,6 +126,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data /* verilator lint_off WIDTHEXPAND */ logic [MLEN-1:0] DCacheReadDataWordM; // D$ read data + logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ logic [MLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data @@ -166,7 +167,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .FpLoadStoreM, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, - .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -176,6 +177,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign SelSpillE = '0; assign ReadDataWordSpillAllM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; + assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; assign {SpillStallM} = '0; end From b64b88312924ea48843f05ffba0043392034e658 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:15:43 -0600 Subject: [PATCH 33/52] Revert "Removed duplicate endianswap." This reverts commit caac48b7f28e33ada9d4a7d0f017878635473811. --- src/lsu/endianswapdouble.sv | 114 ++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 src/lsu/endianswapdouble.sv diff --git a/src/lsu/endianswapdouble.sv b/src/lsu/endianswapdouble.sv new file mode 100644 index 000000000..133149f0e --- /dev/null +++ b/src/lsu/endianswapdouble.sv @@ -0,0 +1,114 @@ +/////////////////////////////////////////// +// endianswap.sv +// +// Written: David_Harris@hmc.edu +// Created: 7 May 2022 +// Modified: 18 January 2023 +// +// Purpose: Swap byte order for Big-Endian accesses +// +// Documentation: RISC-V System on Chip Design Chapter 5 (Figure 5.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module endianswapdouble #(parameter LEN) ( + input logic BigEndianM, + input logic [LEN-1:0] a, + output logic [LEN-1:0] y +); + + if(LEN == 256) begin + always_comb + if (BigEndianM) begin // swap endianness + y[255:248] = a[7:0]; + y[247:240] = a[15:8]; + y[239:232] = a[23:16]; + y[231:224] = a[31:24]; + y[223:216] = a[39:32]; + y[215:208] = a[47:40]; + y[207:200] = a[55:48]; + y[199:192] = a[63:56]; + y[191:184] = a[71:64]; + y[183:176] = a[79:72]; + y[175:168] = a[87:80]; + y[167:160] = a[95:88]; + y[159:152] = a[103:96]; + y[151:144] = a[111:104]; + y[143:136] = a[119:112]; + y[135:128] = a[127:120]; + y[127:120] = a[135:128]; + y[119:112] = a[142:136]; + y[111:104] = a[152:144]; + y[103:96] = a[159:152]; + y[95:88] = a[167:160]; + y[87:80] = a[175:168]; + y[79:72] = a[183:176]; + y[71:64] = a[191:184]; + y[63:56] = a[199:192]; + y[55:48] = a[207:200]; + y[47:40] = a[215:208]; + y[39:32] = a[223:216]; + y[31:24] = a[231:224]; + y[23:16] = a[239:232]; + y[15:8] = a[247:240]; + y[7:0] = a[255:248]; + end else y = a; + end else if(LEN == 128) begin + always_comb + if (BigEndianM) begin // swap endianness + y[127:120] = a[7:0]; + y[119:112] = a[15:8]; + y[111:104] = a[23:16]; + y[103:96] = a[31:24]; + y[95:88] = a[39:32]; + y[87:80] = a[47:40]; + y[79:72] = a[55:48]; + y[71:64] = a[63:56]; + y[63:56] = a[71:64]; + y[55:48] = a[79:72]; + y[47:40] = a[87:80]; + y[39:32] = a[95:88]; + y[31:24] = a[103:96]; + y[23:16] = a[111:104]; + y[15:8] = a[119:112]; + y[7:0] = a[127:120]; + end else y = a; + end else if(LEN == 64) begin + always_comb + if (BigEndianM) begin // swap endianness + y[63:56] = a[7:0]; + y[55:48] = a[15:8]; + y[47:40] = a[23:16]; + y[39:32] = a[31:24]; + y[31:24] = a[39:32]; + y[23:16] = a[47:40]; + y[15:8] = a[55:48]; + y[7:0] = a[63:56]; + end else y = a; + end else begin + always_comb + if (BigEndianM) begin + y[31:24] = a[7:0]; + y[23:16] = a[15:8]; + y[15:8] = a[23:16]; + y[7:0] = a[31:24]; + end else y = a; + end +endmodule From e7ec2bedd49db61b453322e7febe42430afc5a92 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:15:51 -0600 Subject: [PATCH 34/52] Revert "Simplifications of subword code." This reverts commit a4028831150b8d1206aa69cc15eeda60bd19f21c. --- src/lsu/endianswap.sv | 38 +------------------------------ src/lsu/lsu.sv | 4 ++-- src/lsu/subworddreadmisaligned.sv | 2 +- 3 files changed, 4 insertions(+), 40 deletions(-) diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index 3c552b371..7c042886a 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -34,43 +34,7 @@ module endianswap #(parameter LEN) ( output logic [LEN-1:0] y ); - if(LEN == 256) begin - always_comb - if (BigEndianM) begin // swap endianness - y[255:248] = a[7:0]; - y[247:240] = a[15:8]; - y[239:232] = a[23:16]; - y[231:224] = a[31:24]; - y[223:216] = a[39:32]; - y[215:208] = a[47:40]; - y[207:200] = a[55:48]; - y[199:192] = a[63:56]; - y[191:184] = a[71:64]; - y[183:176] = a[79:72]; - y[175:168] = a[87:80]; - y[167:160] = a[95:88]; - y[159:152] = a[103:96]; - y[151:144] = a[111:104]; - y[143:136] = a[119:112]; - y[135:128] = a[127:120]; - y[127:120] = a[135:128]; - y[119:112] = a[142:136]; - y[111:104] = a[152:144]; - y[103:96] = a[159:152]; - y[95:88] = a[167:160]; - y[87:80] = a[175:168]; - y[79:72] = a[183:176]; - y[71:64] = a[191:184]; - y[63:56] = a[199:192]; - y[55:48] = a[207:200]; - y[47:40] = a[215:208]; - y[39:32] = a[223:216]; - y[31:24] = a[231:224]; - y[23:16] = a[239:232]; - y[15:8] = a[247:240]; - y[7:0] = a[255:248]; - end else y = a; - end else if(LEN == 128) begin + if(LEN == 128) begin always_comb if (BigEndianM) begin // swap endianness y[127:120] = a[7:0]; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index e3e138ea4..0af655dbf 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -448,8 +448,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if (P.BIGENDIAN_SUPPORTED) begin:endian - endianswap #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswap #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + endianswapdouble #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 2868a54d8..66ca0375e 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -77,7 +77,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw 3'b011: if(LLEN == 128 || LLEN == 64 ) ReadDataM = {{LLEN-64{ReadDataAlignedM[63]|FpLoadStoreM}}, ReadDataAlignedM[63:0]}; // ld/fld - 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq + 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 else if(LLEN == 64) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu From 739e73ef8135036a988d2cde7c49bd2845d8327c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:15:58 -0600 Subject: [PATCH 35/52] Revert "Siginficant cleanup of subwordwritemisaligned." This reverts commit fbc18abaa0b9bd24d7febfad69840f9d717f76df. --- src/lsu/subwordwritemisaligned.sv | 50 +++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/lsu/subwordwritemisaligned.sv b/src/lsu/subwordwritemisaligned.sv index 22f462d4a..dd82ffa19 100644 --- a/src/lsu/subwordwritemisaligned.sv +++ b/src/lsu/subwordwritemisaligned.sv @@ -72,7 +72,53 @@ module subwordwritemisaligned #(parameter LLEN) ( logic [LLEN*2-1:0] IMAFWriteData2M; assign IMAFWriteData2M = {IMAFWriteDataM, IMAFWriteDataM}; localparam OffsetIndex = $clog2(LLEN/8); - - assign LittleEndianWriteDataM = (IMAFWriteData2M << (PAdrSwap[OffsetIndex-1:0] * 8)) | (IMAFWriteData2M >> (LLEN - (PAdrSwap[OffsetIndex-1:0] * 8))); + logic [LLEN*2-1:0] LittleEndianWriteDataMTemp; + // *** RT: Switch to something like this. + assign LittleEndianWriteDataMTemp = (IMAFWriteData2M << PAdrSwap[OffsetIndex-1:0]) | (IMAFWriteData2M >> ~PAdrSwap[OffsetIndex-1:0]); + + // Replicate data for subword writes + if (LLEN == 128) begin:sww + always_comb + case(PAdrSwap[3:0]) + 4'b0000: LittleEndianWriteDataM = {128'b0, IMAFWriteDataM }; + 4'b0001: LittleEndianWriteDataM = {120'b0, IMAFWriteDataM, 8'b0 }; + 4'b0010: LittleEndianWriteDataM = {112'b0, IMAFWriteDataM, 16'b0}; + 4'b0011: LittleEndianWriteDataM = {104'b0, IMAFWriteDataM, 24'b0}; + 4'b0100: LittleEndianWriteDataM = {96'b0, IMAFWriteDataM, 32'b0}; + 4'b0101: LittleEndianWriteDataM = {88'b0, IMAFWriteDataM, 40'b0}; + 4'b0110: LittleEndianWriteDataM = {80'b0, IMAFWriteDataM, 48'b0}; + 4'b0111: LittleEndianWriteDataM = {72'b0, IMAFWriteDataM, 56'b0}; + 4'b1000: LittleEndianWriteDataM = {64'b0, IMAFWriteDataM, 64'b0}; + 4'b1001: LittleEndianWriteDataM = {56'b0, IMAFWriteDataM, 72'b0 }; + 4'b1010: LittleEndianWriteDataM = {48'b0, IMAFWriteDataM, 80'b0}; + 4'b1011: LittleEndianWriteDataM = {40'b0, IMAFWriteDataM, 88'b0}; + 4'b1100: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM, 96'b0}; + 4'b1101: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 104'b0}; + 4'b1110: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 112'b0}; + 4'b1111: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 120'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // sq + endcase + end else if (LLEN == 64) begin:sww + always_comb + case(PAdrSwap[2:0]) + 3'b000: LittleEndianWriteDataM = {IMAFWriteDataM, IMAFWriteDataM}; + 3'b001: LittleEndianWriteDataM = {IMAFWriteDataM[55:0], IMAFWriteDataM, IMAFWriteDataM[63:56]}; + 3'b010: LittleEndianWriteDataM = {IMAFWriteDataM[47:0], IMAFWriteDataM, IMAFWriteDataM[63:48]}; + 3'b011: LittleEndianWriteDataM = {IMAFWriteDataM[39:0], IMAFWriteDataM, IMAFWriteDataM[63:40]}; + 3'b100: LittleEndianWriteDataM = {IMAFWriteDataM[31:0], IMAFWriteDataM, IMAFWriteDataM[63:32]}; + 3'b101: LittleEndianWriteDataM = {IMAFWriteDataM[23:0], IMAFWriteDataM, IMAFWriteDataM[63:24]}; + 3'b110: LittleEndianWriteDataM = {IMAFWriteDataM[15:0], IMAFWriteDataM, IMAFWriteDataM[63:16]}; + 3'b111: LittleEndianWriteDataM = {IMAFWriteDataM[7:0], IMAFWriteDataM, IMAFWriteDataM[63:8] }; + endcase + end else begin:sww // 32-bit + always_comb + case(PAdrSwap[1:0]) + 2'b00: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM }; + 2'b01: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 8'b0 }; + 2'b10: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 16'b0}; + 2'b11: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 24'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen + endcase + end endmodule From 298028b119777cce32068c18ae399b246f74dc5a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:03 -0600 Subject: [PATCH 36/52] Revert "Cleanup." This reverts commit 45c30267a50771fb0c5acb756ff3988d05f54f4a. --- src/lsu/subworddreadmisaligned.sv | 72 ++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 10 deletions(-) diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 66ca0375e..1e179dbc3 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -71,17 +71,69 @@ module subwordreadmisaligned #(parameter LLEN) assign HalfwordM = ReadDataAlignedM[15:0]; assign WordM = ReadDataAlignedM[31:0]; - always_comb + logic [LLEN-1:0] lb, lh_flh, lw_flw, ld_fld, lbu, lbu_flq, lhu, lwu; + + assign lb = {{LLEN-8{ByteM[7]}}, ByteM}; + assign lh_flh = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]};; + assign lw_flw = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; + //assign ld_fld = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; + + + if (LLEN == 128) begin:swrmux + logic [63:0] DblWordM; + logic [127:0] QdWordM; + + assign DblWordM = ReadDataAlignedM[63:0]; + assign QdWordM =ReadDataAlignedM[127:0]; + + // sign extension/ NaN boxing + always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: if(LLEN == 128 || LLEN == 64 ) ReadDataM = {{LLEN-64{ReadDataAlignedM[63]|FpLoadStoreM}}, ReadDataAlignedM[63:0]}; // ld/fld - 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - else if(LLEN == 64) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b100: ReadDataM = FpLoadStoreM ? QdWordM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen endcase + end else if (LLEN == 64) begin:swrmux + logic [63:0] DblWordM; + + assign DblWordM = ReadDataAlignedM[63:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + + //3'b011: ReadDataM = WordM[LLEN-1:0]; // fld + + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + end endmodule From a8024eee26debd95df9c7c2a6a0ba4109bd3004d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:16 -0600 Subject: [PATCH 37/52] Revert "Updated subword misaligned." This reverts commit 69d31d50e27199f105706172b5e2427f96689d3f. --- src/lsu/lsu.sv | 2 +- src/lsu/subworddreadmisaligned.sv | 12 ++---------- ...bwordwritemisaligned.sv => subwordwritedouble.sv} | 6 +++--- 3 files changed, 6 insertions(+), 14 deletions(-) rename src/lsu/{subwordwritemisaligned.sv => subwordwritedouble.sv} (97%) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 0af655dbf..8e827292f 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -425,7 +425,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 1e179dbc3..fe96844f3 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -38,7 +38,6 @@ module subwordreadmisaligned #(parameter LLEN) output logic [LLEN-1:0] ReadDataM ); - logic [LLEN*2-1:0] ReadDataAlignedM; logic [7:0] ByteM; logic [15:0] HalfwordM; logic [31:0] WordM; @@ -65,20 +64,13 @@ module subwordreadmisaligned #(parameter LLEN) default: LengthM = 5'd8; endcase + logic [LLEN*2-1:0] ReadDataAlignedM; assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); assign ByteM = ReadDataAlignedM[7:0]; assign HalfwordM = ReadDataAlignedM[15:0]; assign WordM = ReadDataAlignedM[31:0]; - logic [LLEN-1:0] lb, lh_flh, lw_flw, ld_fld, lbu, lbu_flq, lhu, lwu; - - assign lb = {{LLEN-8{ByteM[7]}}, ByteM}; - assign lh_flh = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]};; - assign lw_flw = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; - //assign ld_fld = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; - - if (LLEN == 128) begin:swrmux logic [63:0] DblWordM; logic [127:0] QdWordM; @@ -128,7 +120,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - //3'b011: ReadDataM = WordM[LLEN-1:0]; // fld + 3'b011: ReadDataM = WordM[LLEN-1:0]; // fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu diff --git a/src/lsu/subwordwritemisaligned.sv b/src/lsu/subwordwritedouble.sv similarity index 97% rename from src/lsu/subwordwritemisaligned.sv rename to src/lsu/subwordwritedouble.sv index dd82ffa19..eb62aa106 100644 --- a/src/lsu/subwordwritemisaligned.sv +++ b/src/lsu/subwordwritedouble.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// subwordwritemisaligned.sv +// subwordwrite.sv // // Written: David_Harris@hmc.edu // Created: 9 January 2021 @@ -28,7 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module subwordwritemisaligned #(parameter LLEN) ( +module subwordwritedouble #(parameter LLEN) ( input logic [2:0] LSUFunct3M, input logic [2:0] PAdrM, input logic FpLoadStoreM, @@ -38,7 +38,7 @@ module subwordwritemisaligned #(parameter LLEN) ( output logic [LLEN*2-1:0] LittleEndianWriteDataM ); - // *** RT: This is logic is duplicated in subwordreadmisaligned. Merge the two. + // *** RT: This is logic is duplicated in subwordreaddouble. Merge the two. logic [4:0] PAdrSwap; logic [4:0] BigEndianPAdr; logic [4:0] LengthM; From f752b5dd37d94fb359584f31efbac6d810c548bd Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:24 -0600 Subject: [PATCH 38/52] Revert "Beginning subword cleanup." This reverts commit 7e1ea1e6d9472fdfd188823fc81ee455abbab460. --- src/lsu/lsu.sv | 2 +- src/lsu/subworddreadmisaligned.sv | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 8e827292f..0150be599 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -423,7 +423,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index fe96844f3..cc1c13787 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -40,11 +40,10 @@ module subwordreadmisaligned #(parameter LLEN) logic [7:0] ByteM; logic [15:0] HalfwordM; - logic [31:0] WordM; logic [4:0] PAdrSwap; logic [4:0] BigEndianPAdr; logic [4:0] LengthM; - + // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; @@ -67,14 +66,14 @@ module subwordreadmisaligned #(parameter LLEN) logic [LLEN*2-1:0] ReadDataAlignedM; assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; - if (LLEN == 128) begin:swrmux + logic [31:0] WordM; logic [63:0] DblWordM; logic [127:0] QdWordM; + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; assign DblWordM = ReadDataAlignedM[63:0]; assign QdWordM =ReadDataAlignedM[127:0]; @@ -93,8 +92,12 @@ module subwordreadmisaligned #(parameter LLEN) endcase end else if (LLEN == 64) begin:swrmux + logic [31:0] WordM; logic [63:0] DblWordM; + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; assign DblWordM = ReadDataAlignedM[63:0]; // sign extension/ NaN boxing @@ -113,6 +116,12 @@ module subwordreadmisaligned #(parameter LLEN) end else begin:swrmux // 32-bit + logic [31:0] WordM; + + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; + // sign extension always_comb case(Funct3M) From a48c16c0efbab9ba6e149687630b5dad3a5b9863 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:32 -0600 Subject: [PATCH 39/52] Revert "Swapped to the more compact subwordreadmisaligned.sv." This reverts commit 1ece6f8eaeaa6bdccda4e9e8b05697b5cad4986f. --- ...readmisaligned.sv => subwordreaddouble.sv} | 108 +++++++++++++----- 1 file changed, 82 insertions(+), 26 deletions(-) rename src/lsu/{subworddreadmisaligned.sv => subwordreaddouble.sv} (53%) diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subwordreaddouble.sv similarity index 53% rename from src/lsu/subworddreadmisaligned.sv rename to src/lsu/subwordreaddouble.sv index cc1c13787..936240cf7 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subwordreaddouble.sv @@ -28,7 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module subwordreadmisaligned #(parameter LLEN) +module subwordreaddouble #(parameter LLEN) ( input logic [LLEN*2-1:0] ReadDataWordMuxM, input logic [2:0] PAdrM, @@ -63,19 +63,50 @@ module subwordreadmisaligned #(parameter LLEN) default: LengthM = 5'd8; endcase - logic [LLEN*2-1:0] ReadDataAlignedM; - assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); - if (LLEN == 128) begin:swrmux logic [31:0] WordM; logic [63:0] DblWordM; - logic [127:0] QdWordM; - - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; - assign DblWordM = ReadDataAlignedM[63:0]; - assign QdWordM =ReadDataAlignedM[127:0]; + logic [63:0] QdWordM; + always_comb + case(PAdrSwap) + 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; + 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; + 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; + 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; + 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; + 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; + 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; + 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; + 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; + 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; + 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; + 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; + 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; + 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; + 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; + 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; + 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; + 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; + 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; + 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; + 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; + 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; + 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; + 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; + 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; + 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; + 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; + 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; + 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; + 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; + 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; + 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; + endcase + + assign ByteM = QdWordM[7:0]; + assign HalfwordM = QdWordM[15:0]; + assign WordM = QdWordM[31:0]; + assign DblWordM = QdWordM[63:0]; // sign extension/ NaN boxing always_comb @@ -85,7 +116,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b100: ReadDataM = FpLoadStoreM ? QdWordM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen @@ -94,11 +125,29 @@ module subwordreadmisaligned #(parameter LLEN) end else if (LLEN == 64) begin:swrmux logic [31:0] WordM; logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; + 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; + 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; + 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; + 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; + 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; + 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; + 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; - assign DblWordM = ReadDataAlignedM[63:0]; + assign ByteM = DblWordM[7:0]; + assign HalfwordM = DblWordM[15:0]; + assign WordM = DblWordM[31:0]; // sign extension/ NaN boxing always_comb @@ -116,25 +165,32 @@ module subwordreadmisaligned #(parameter LLEN) end else begin:swrmux // 32-bit - logic [31:0] WordM; + logic [31:0] WordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; + assign ByteM = WordM[7:0]; + assign HalfwordM = WordM[15:0]; // sign extension always_comb case(Funct3M) 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - - 3'b011: ReadDataM = WordM[LLEN-1:0]; // fld - + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen endcase end endmodule From dce7de59a37fdc1acb9bc0e508aea001cf2ec06d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:37 -0600 Subject: [PATCH 40/52] Revert "Non-ideal fix. Added new output from pma which indicates if the write shift should occur." This reverts commit 3714b2bf4adb815704c718a3cec921e563462d31. --- src/ifu/ifu.sv | 2 +- src/lsu/lsu.sv | 5 ++--- src/lsu/subwordwritedouble.sv | 6 +++--- src/mmu/mmu.sv | 3 +-- src/mmu/pmachecker.sv | 7 +++---- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index bb23f4fd3..4848b5ebb 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -185,7 +185,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .TLBFlush, .PhysicalAddress(PCPF), .TLBMiss(ITLBMissF), - .Cacheable(CacheableF), .Idempotent(), .AllowShift(), .SelTIM(SelIROM), + .Cacheable(CacheableF), .Idempotent(), .SelTIM(SelIROM), .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 0150be599..17c9d0c69 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -119,7 +119,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic SelSpillE; // Align logic detected a spill and needs to stall logic CacheableM; // PMA indicates memory address is cacheable - logic AllowShiftM; // PMA: indicates if WriteData should be byte shifted before going to cache or bus by offset. logic BusCommittedM; // Bus memory operation in flight, delay interrupts logic DCacheCommittedM; // D$ memory operation started, delay interrupts @@ -243,7 +242,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_ADUE, .PrivilegeModeW, .DisableTranslation, .VAdr(IHAdrM), .Size(LSUFunct3M[1:0]), .PTE, .PageTypeWriteVal(PageType), .TLBWrite(DTLBWriteM), .TLBFlush(sfencevmaM), - .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .AllowShift(AllowShiftM), .SelTIM(SelDTIM), + .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .SelTIM(SelDTIM), .InstrAccessFaultF(), .LoadAccessFaultM(LSULoadAccessFaultM), .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM(LSULoadPageFaultM), .StoreAmoPageFaultM(LSUStoreAmoPageFaultM), @@ -425,7 +424,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .CacheableM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv index eb62aa106..599d71984 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritedouble.sv @@ -33,7 +33,7 @@ module subwordwritedouble #(parameter LLEN) ( input logic [2:0] PAdrM, input logic FpLoadStoreM, input logic BigEndianM, - input logic AllowShiftM, + input logic CacheableM, input logic [LLEN-1:0] IMAFWriteDataM, output logic [LLEN*2-1:0] LittleEndianWriteDataM ); @@ -48,8 +48,8 @@ module subwordwritedouble #(parameter LLEN) ( // 10: PAdrM[2:0] // 11: BigEndianPAdr // 00: 00000 - // 01: 11111 - mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {AllowShiftM, BigEndianM}, PAdrSwap); + // 01: 00111 + mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {CacheableM, BigEndianM}, PAdrSwap); //assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; /* verilator lint_off WIDTHEXPAND */ /* verilator lint_off WIDTHTRUNC */ diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 80a1ca7da..e842016a2 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -49,7 +49,6 @@ module mmu import cvw::*; #(parameter cvw_t P, output logic TLBMiss, // Miss TLB output logic Cacheable, // PMA indicates memory address is cachable output logic Idempotent, // PMA indicates memory address is idempotent - output logic AllowShift, // PMA indicates if WriteData should be byte shifted before going to cache or bus by offset output logic SelTIM, // Select a tightly integrated memory // Faults output logic InstrAccessFaultF, LoadAccessFaultM, StoreAmoAccessFaultM, // access fault sources @@ -113,7 +112,7 @@ module mmu import cvw::*; #(parameter cvw_t P, pmachecker #(P) pmachecker(.PhysicalAddress, .Size, .CMOpM, .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .PBMemoryType, - .Cacheable, .Idempotent, .AllowShift, .SelTIM, + .Cacheable, .Idempotent, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); if (P.PMP_ENTRIES > 0) begin : pmp diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 60296213d..84e41ba65 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -1,4 +1,4 @@ -////////////////////////////////////////// +/////////////////////////////////////////// // pmachecker.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 20 April 2021 @@ -38,7 +38,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( input logic WriteAccessM, // Write access input logic ReadAccessM, // Read access input logic [1:0] PBMemoryType, // PBMT field of PTE during TLB hit, or 00 otherwise - output logic Cacheable, Idempotent, AllowShift, SelTIM, + output logic Cacheable, Idempotent, SelTIM, output logic PMAInstrAccessFaultF, output logic PMALoadAccessFaultM, output logic PMAStoreAmoAccessFaultM @@ -60,8 +60,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[3] | SelRegions[4] | SelRegions[5]; // exclusion-tag: unused-cachable - assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; - assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6]; + assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent From 9668fdd8686e5afff8750e3552f57a2df9663117 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:43 -0600 Subject: [PATCH 41/52] Revert "Closer to getting subword write misaligned working." This reverts commit 6a9c2d8dc43a1f997cf16969a2901d1e91fd4756. --- src/lsu/lsu.sv | 2 +- src/lsu/subwordwritedouble.sv | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 17c9d0c69..9fdf267b6 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -424,7 +424,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .CacheableM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv index 599d71984..728a4f4aa 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritedouble.sv @@ -33,7 +33,6 @@ module subwordwritedouble #(parameter LLEN) ( input logic [2:0] PAdrM, input logic FpLoadStoreM, input logic BigEndianM, - input logic CacheableM, input logic [LLEN-1:0] IMAFWriteDataM, output logic [LLEN*2-1:0] LittleEndianWriteDataM ); @@ -44,13 +43,7 @@ module subwordwritedouble #(parameter LLEN) ( logic [4:0] LengthM; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - // cacheable, BigEndian - // 10: PAdrM[2:0] - // 11: BigEndianPAdr - // 00: 00000 - // 01: 00111 - mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {CacheableM, BigEndianM}, PAdrSwap); - //assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; /* verilator lint_off WIDTHEXPAND */ /* verilator lint_off WIDTHTRUNC */ assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; From 57aab52dc27f429bda5c07be8de6c42a5e5fe0a8 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:17:57 -0600 Subject: [PATCH 42/52] Revert "Partially working optimized subwordwrite for misaligned." This reverts commit dac8fc16af30dcd1182c9f7f4d69383dfde042fe. --- src/lsu/lsu.sv | 15 ++--- src/lsu/subwordwritedouble.sv | 117 ---------------------------------- 2 files changed, 7 insertions(+), 125 deletions(-) delete mode 100644 src/lsu/subwordwritedouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 9fdf267b6..9c7f49684 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -136,8 +136,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IHWriteDataM; // IEU or HPTW write data logic [P.XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data logic [P.LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data - logic [MLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data - logic [MLEN-1:0] LSUWriteDataM; // Final write data + logic [P.LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data + logic [P.LLEN-1:0] LSUWriteDataM; // Final write data logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write logic [1:0] MemRWSpillM; @@ -166,7 +166,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .FpLoadStoreM, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, - .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .LSUWriteDataSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -335,7 +335,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .CacheRW(CacheRWM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), - .CacheWriteData(LSUWriteDataM), .SelHPTW, + .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), @@ -349,7 +349,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), - .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM[P.LLEN-1:0]), + .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusAtomic, .BusCMOZero, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), @@ -424,12 +424,11 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); end + subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks swbytemask #(P.LLEN, P.ZICCLSM_SUPPORTED) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM), .ByteMaskExtended(ByteMaskExtendedM)); @@ -447,7 +446,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if (P.BIGENDIAN_SUPPORTED) begin:endian - endianswapdouble #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv deleted file mode 100644 index 728a4f4aa..000000000 --- a/src/lsu/subwordwritedouble.sv +++ /dev/null @@ -1,117 +0,0 @@ -/////////////////////////////////////////// -// subwordwrite.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Masking and muxing for subword writes -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordwritedouble #(parameter LLEN) ( - input logic [2:0] LSUFunct3M, - input logic [2:0] PAdrM, - input logic FpLoadStoreM, - input logic BigEndianM, - input logic [LLEN-1:0] IMAFWriteDataM, - output logic [LLEN*2-1:0] LittleEndianWriteDataM -); - - // *** RT: This is logic is duplicated in subwordreaddouble. Merge the two. - logic [4:0] PAdrSwap; - logic [4:0] BigEndianPAdr; - logic [4:0] LengthM; - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; - /* verilator lint_off WIDTHEXPAND */ - /* verilator lint_off WIDTHTRUNC */ - assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; - /* verilator lint_on WIDTHTRUNC */ - /* verilator lint_on WIDTHEXPAND */ - - always_comb - case(LSUFunct3M & {FpLoadStoreM, 2'b11}) - 3'b000: LengthM = 5'd1; - 3'b001: LengthM = 5'd2; - 3'b010: LengthM = 5'd4; - 3'b011: LengthM = 5'd8; - 3'b100: LengthM = 5'd16; - default: LengthM = 5'd8; - endcase // case (LSUFunct3M & {FpLoadStoreM, 2'b11}) - - // *** RT: End duplicated logic - - logic [LLEN*2-1:0] IMAFWriteData2M; - assign IMAFWriteData2M = {IMAFWriteDataM, IMAFWriteDataM}; - localparam OffsetIndex = $clog2(LLEN/8); - logic [LLEN*2-1:0] LittleEndianWriteDataMTemp; - // *** RT: Switch to something like this. - assign LittleEndianWriteDataMTemp = (IMAFWriteData2M << PAdrSwap[OffsetIndex-1:0]) | (IMAFWriteData2M >> ~PAdrSwap[OffsetIndex-1:0]); - - - // Replicate data for subword writes - if (LLEN == 128) begin:sww - always_comb - case(PAdrSwap[3:0]) - 4'b0000: LittleEndianWriteDataM = {128'b0, IMAFWriteDataM }; - 4'b0001: LittleEndianWriteDataM = {120'b0, IMAFWriteDataM, 8'b0 }; - 4'b0010: LittleEndianWriteDataM = {112'b0, IMAFWriteDataM, 16'b0}; - 4'b0011: LittleEndianWriteDataM = {104'b0, IMAFWriteDataM, 24'b0}; - 4'b0100: LittleEndianWriteDataM = {96'b0, IMAFWriteDataM, 32'b0}; - 4'b0101: LittleEndianWriteDataM = {88'b0, IMAFWriteDataM, 40'b0}; - 4'b0110: LittleEndianWriteDataM = {80'b0, IMAFWriteDataM, 48'b0}; - 4'b0111: LittleEndianWriteDataM = {72'b0, IMAFWriteDataM, 56'b0}; - 4'b1000: LittleEndianWriteDataM = {64'b0, IMAFWriteDataM, 64'b0}; - 4'b1001: LittleEndianWriteDataM = {56'b0, IMAFWriteDataM, 72'b0 }; - 4'b1010: LittleEndianWriteDataM = {48'b0, IMAFWriteDataM, 80'b0}; - 4'b1011: LittleEndianWriteDataM = {40'b0, IMAFWriteDataM, 88'b0}; - 4'b1100: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM, 96'b0}; - 4'b1101: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 104'b0}; - 4'b1110: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 112'b0}; - 4'b1111: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 120'b0}; - default: LittleEndianWriteDataM = IMAFWriteDataM; // sq - endcase - end else if (LLEN == 64) begin:sww - always_comb - case(PAdrSwap[2:0]) - 3'b000: LittleEndianWriteDataM = {IMAFWriteDataM, IMAFWriteDataM}; - 3'b001: LittleEndianWriteDataM = {IMAFWriteDataM[55:0], IMAFWriteDataM, IMAFWriteDataM[63:56]}; - 3'b010: LittleEndianWriteDataM = {IMAFWriteDataM[47:0], IMAFWriteDataM, IMAFWriteDataM[63:48]}; - 3'b011: LittleEndianWriteDataM = {IMAFWriteDataM[39:0], IMAFWriteDataM, IMAFWriteDataM[63:40]}; - 3'b100: LittleEndianWriteDataM = {IMAFWriteDataM[31:0], IMAFWriteDataM, IMAFWriteDataM[63:32]}; - 3'b101: LittleEndianWriteDataM = {IMAFWriteDataM[23:0], IMAFWriteDataM, IMAFWriteDataM[63:24]}; - 3'b110: LittleEndianWriteDataM = {IMAFWriteDataM[15:0], IMAFWriteDataM, IMAFWriteDataM[63:16]}; - 3'b111: LittleEndianWriteDataM = {IMAFWriteDataM[7:0], IMAFWriteDataM, IMAFWriteDataM[63:8] }; - endcase - end else begin:sww // 32-bit - always_comb - case(PAdrSwap[1:0]) - 2'b00: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM }; - 2'b01: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 8'b0 }; - 2'b10: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 16'b0}; - 2'b11: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 24'b0}; - default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen - endcase - end -endmodule From 2f94be5e796e7acb97e3c6b44eea4e7ca8ea77a6 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:19:17 -0600 Subject: [PATCH 43/52] Revert "Optimized the align logic for loads." This reverts commit 1fd678b43348fd447a7701c4d8fec968593f9ec6. --- src/lsu/align.sv | 6 +- src/lsu/endianswapdouble.sv | 114 -------------------- src/lsu/lsu.sv | 29 ++---- src/lsu/subwordreaddouble.sv | 196 ----------------------------------- 4 files changed, 14 insertions(+), 331 deletions(-) delete mode 100644 src/lsu/endianswapdouble.sv delete mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv index fa10916f9..33c7471a3 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -53,7 +53,7 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [P.LLEN*2-1:0] ReadDataWordSpillAllM, + output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic SpillStallM); localparam LLENINBYTES = P.LLEN/8; @@ -67,6 +67,8 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic MisalignedM; + logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; + logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; logic [P.XLEN-1:0] IEUAdrIncrementM; @@ -146,6 +148,8 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; + assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs diff --git a/src/lsu/endianswapdouble.sv b/src/lsu/endianswapdouble.sv deleted file mode 100644 index 133149f0e..000000000 --- a/src/lsu/endianswapdouble.sv +++ /dev/null @@ -1,114 +0,0 @@ -/////////////////////////////////////////// -// endianswap.sv -// -// Written: David_Harris@hmc.edu -// Created: 7 May 2022 -// Modified: 18 January 2023 -// -// Purpose: Swap byte order for Big-Endian accesses -// -// Documentation: RISC-V System on Chip Design Chapter 5 (Figure 5.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module endianswapdouble #(parameter LEN) ( - input logic BigEndianM, - input logic [LEN-1:0] a, - output logic [LEN-1:0] y -); - - if(LEN == 256) begin - always_comb - if (BigEndianM) begin // swap endianness - y[255:248] = a[7:0]; - y[247:240] = a[15:8]; - y[239:232] = a[23:16]; - y[231:224] = a[31:24]; - y[223:216] = a[39:32]; - y[215:208] = a[47:40]; - y[207:200] = a[55:48]; - y[199:192] = a[63:56]; - y[191:184] = a[71:64]; - y[183:176] = a[79:72]; - y[175:168] = a[87:80]; - y[167:160] = a[95:88]; - y[159:152] = a[103:96]; - y[151:144] = a[111:104]; - y[143:136] = a[119:112]; - y[135:128] = a[127:120]; - y[127:120] = a[135:128]; - y[119:112] = a[142:136]; - y[111:104] = a[152:144]; - y[103:96] = a[159:152]; - y[95:88] = a[167:160]; - y[87:80] = a[175:168]; - y[79:72] = a[183:176]; - y[71:64] = a[191:184]; - y[63:56] = a[199:192]; - y[55:48] = a[207:200]; - y[47:40] = a[215:208]; - y[39:32] = a[223:216]; - y[31:24] = a[231:224]; - y[23:16] = a[239:232]; - y[15:8] = a[247:240]; - y[7:0] = a[255:248]; - end else y = a; - end else if(LEN == 128) begin - always_comb - if (BigEndianM) begin // swap endianness - y[127:120] = a[7:0]; - y[119:112] = a[15:8]; - y[111:104] = a[23:16]; - y[103:96] = a[31:24]; - y[95:88] = a[39:32]; - y[87:80] = a[47:40]; - y[79:72] = a[55:48]; - y[71:64] = a[63:56]; - y[63:56] = a[71:64]; - y[55:48] = a[79:72]; - y[47:40] = a[87:80]; - y[39:32] = a[95:88]; - y[31:24] = a[103:96]; - y[23:16] = a[111:104]; - y[15:8] = a[119:112]; - y[7:0] = a[127:120]; - end else y = a; - end else if(LEN == 64) begin - always_comb - if (BigEndianM) begin // swap endianness - y[63:56] = a[7:0]; - y[55:48] = a[15:8]; - y[47:40] = a[23:16]; - y[39:32] = a[31:24]; - y[31:24] = a[39:32]; - y[23:16] = a[47:40]; - y[15:8] = a[55:48]; - y[7:0] = a[63:56]; - end else y = a; - end else begin - always_comb - if (BigEndianM) begin - y[31:24] = a[7:0]; - y[23:16] = a[15:8]; - y[15:8] = a[23:16]; - y[7:0] = a[31:24]; - end else y = a; - end -endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 9c7f49684..ac4edcd19 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -128,8 +128,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ - logic [MLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data - logic [MLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data + logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data + logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data + logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection logic [P.LLEN-1:0] ReadDataM; // Final read data @@ -153,7 +154,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic SelDTIM; // Select DTIM rather than bus or D$ logic [P.XLEN-1:0] WriteDataZM; logic LSULoadPageFaultM, LSUStoreAmoPageFaultM; - logic [MLEN-1:0] ReadDataWordSpillAllM; ///////////////////////////////////////////////////////////////////////////////////////////// // Pipeline for IEUAdr E to M @@ -167,14 +167,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM); + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; end else begin : no_ziccslm_align assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtE = {2'b00, IEUAdrE}; assign SelSpillE = '0; - assign ReadDataWordSpillAllM = DCacheReadDataWordM; + assign DCacheReadDataWordSpillM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; @@ -296,7 +296,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( localparam AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^ localparam LINELEN = P.DCACHE_LINELENINBITS; // Number of bits in cacheline localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) - localparam MLENPOVERAHBW = MLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) localparam CACHEWORDLEN = P.ZICCLSM_SUPPORTED ? 2*P.LLEN : P.LLEN; // Width of the cache's input and output data buses. Misaligned doubles width for fast access logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline @@ -360,14 +359,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. // pma should generate exception for LLEN read to periph. -/* -----\/----- EXCLUDED -----\/----- mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordSpillM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), .d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); - -----/\----- EXCLUDED -----/\----- */ - mux3 #(MLEN) UnCachedDataMux(.d0(ReadDataWordSpillAllM), .d1({MLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), - .d2({{(MLEN-P.XLEN){1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), - .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface logic [1:0] BusRW; // Non-DTIM memory access, ignore cacheableM logic [P.XLEN-1:0] FetchBuffer; @@ -420,14 +414,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// - - if(MISALIGN_SUPPORT) begin - subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - end else begin - subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - end + + subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks @@ -447,7 +436,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if (P.BIGENDIAN_SUPPORTED) begin:endian endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + endianswap #(P.LLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv deleted file mode 100644 index 936240cf7..000000000 --- a/src/lsu/subwordreaddouble.sv +++ /dev/null @@ -1,196 +0,0 @@ -/////////////////////////////////////////// -// subwordread.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Extract subwords and sign extend for reads -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordreaddouble #(parameter LLEN) - ( - input logic [LLEN*2-1:0] ReadDataWordMuxM, - input logic [2:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN-1:0] ReadDataM -); - - logic [7:0] ByteM; - logic [15:0] HalfwordM; - logic [4:0] PAdrSwap; - logic [4:0] BigEndianPAdr; - logic [4:0] LengthM; - - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; - /* verilator lint_off WIDTHEXPAND */ - /* verilator lint_off WIDTHTRUNC */ - assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; - /* verilator lint_on WIDTHTRUNC */ - /* verilator lint_on WIDTHEXPAND */ - - always_comb - case(Funct3M & {FpLoadStoreM, 2'b11}) - 3'b000: LengthM = 5'd1; - 3'b001: LengthM = 5'd2; - 3'b010: LengthM = 5'd4; - 3'b011: LengthM = 5'd8; - 3'b100: LengthM = 5'd16; - default: LengthM = 5'd8; - endcase - - if (LLEN == 128) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - logic [63:0] QdWordM; - always_comb - case(PAdrSwap) - 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; - 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; - 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; - 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; - 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; - 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; - 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; - 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; - 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; - 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; - 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; - 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; - 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; - 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; - 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; - 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; - 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; - 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; - 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; - 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; - 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; - 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; - 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; - 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; - 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; - 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; - 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; - 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; - 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; - 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; - 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; - 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; - endcase - - assign ByteM = QdWordM[7:0]; - assign HalfwordM = QdWordM[15:0]; - assign WordM = QdWordM[31:0]; - assign DblWordM = QdWordM[63:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; - 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; - 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; - 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; - 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; - 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; - 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; - 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - assign ByteM = DblWordM[7:0]; - assign HalfwordM = DblWordM[15:0]; - assign WordM = DblWordM[31:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - - logic [31:0] WordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase - - assign ByteM = WordM[7:0]; - assign HalfwordM = WordM[15:0]; - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen - endcase - end -endmodule From 54c1d28c8ba71b20d941a1560a0634ce05428f31 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:43:55 -0600 Subject: [PATCH 44/52] Fixed missing case in the align AccesByteOffset Mux. --- src/lsu/align.sv | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index e8cedec6c..d4603941b 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -98,10 +98,11 @@ module align import cvw::*; #(parameter cvw_t P) ( 3'b000: AccessByteOffsetM = 0; // byte access 3'b001: AccessByteOffsetM = {{OFFSET_LEN-1{1'b0}}, IEUAdrM[0]}; // half access 3'b010: AccessByteOffsetM = {{OFFSET_LEN-2{1'b0}}, IEUAdrM[1:0]}; // word access - 3'b011: AccessByteOffsetM = {{OFFSET_LEN-3{1'b0}}, IEUAdrM[2:0]}; // double access + 3'b011: if(P.LLEN >= 64) AccessByteOffsetM = {{OFFSET_LEN-3{1'b0}}, IEUAdrM[2:0]}; // double access + else AccessByteOffsetM = 0; // shouldn't happen 3'b100: if(P.LLEN == 128) AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; // quad access - else AccessByteOffsetM = 0; // invalid - default: AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; + else AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; + default: AccessByteOffsetM = 0; // shouldn't happen endcase case (Funct3M[1:0]) 2'b00: PotentialSpillM = 0; // byte access From c7c12cc3a8b3136176c36ba9b233c2b82d8c3a4d Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 6 Mar 2024 14:00:57 -0800 Subject: [PATCH 45/52] Fixed Lint issue on cacheLRU --- src/cache/cacheLRU.sv | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index cdd513547..865ebc74d 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -141,16 +141,17 @@ module cacheLRU // LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice. // This is a two port memory. // Every cycle must read from CacheSetData and each load/store must write the new LRU. + + // note: Verilator lint doesn't like <= for array initialization (https://verilator.org/warn/BLKLOOPINIT?v=5.021) + // Move to = to keep Verilator happy and simulator running fast always_ff @(posedge clk) begin if (reset | (InvalidateCache & ~FlushStage)) - for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= 0; // exclusion-tag: initialize - if(CacheEn) begin - if(LRUWriteEn) - LRUMemory[PAdr] <= NextLRU; - if(LRUWriteEn & (PAdr == CacheSetTag)) - CurrLRU <= #1 NextLRU; - else - CurrLRU <= #1 LRUMemory[CacheSetTag]; + for (int set = 0; set < NUMLINES; set++) LRUMemory[set] = 0; // exclusion-tag: initialize + else if(CacheEn) begin + // Because we are using blocking assignments, change to LRUMemory must occur after LRUMemory is used so we get the proper value + if(LRUWriteEn & (PAdr == CacheSetTag)) CurrLRU = #1 NextLRU; + else CurrLRU = #1 LRUMemory[CacheSetTag]; + if(LRUWriteEn) LRUMemory[PAdr] = NextLRU; end end From 0f93d009772436f93b771cc113a95091401ce4c3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 6 Mar 2024 14:05:15 -0800 Subject: [PATCH 46/52] Commented out embench tests from nightly regression, fixed which half preicsion tests are run --- sim/regression-wally | 88 ++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/sim/regression-wally b/sim/regression-wally index ad1720004..ea855b358 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -222,62 +222,62 @@ if (nightly): ### branch predictor simulation - ["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_TWOBIT_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_6_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_6_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_8_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_8_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_12_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_12_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_14_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_14_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_16_16_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_16_16_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - # btb - ["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # # btb + # ["bpred_GSHARE_10_16_6_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_6_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_8_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_8_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_12_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_16_12_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - # ras - ["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], - ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # # ras + # ["bpred_GSHARE_10_2_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_2_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_3_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_3_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_4_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_4_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_6_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_6_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_10_10_0_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], + # ["bpred_GSHARE_10_10_10_1_rv32gc", ["embench"], "configOptions", "-GPrintHPMCounters=1"], # enable floating-point tests when lint is fixed ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]], ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]], ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], - ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], + ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma"]], ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], ["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]], ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], - ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], + ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma"]], ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], From 24dffa39d556e98c7bdd8e168fbcd9ee59c78af2 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 7 Mar 2024 12:48:52 -0600 Subject: [PATCH 47/52] Yay. David and I got our first Quad load/store instructions working! --- testbench/testbench.sv | 1 + testbench/tests.vh | 4 ++++ tests/riscof/spike/spike_rv64gc_isa.yaml | 4 ++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/testbench/testbench.sv b/testbench/testbench.sv index d52c0baf8..1a72d4f0f 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -124,6 +124,7 @@ module testbench; "imperas64f": if (P.F_SUPPORTED) tests = imperas64f; "imperas64d": if (P.D_SUPPORTED) tests = imperas64d; "imperas64m": if (P.M_SUPPORTED) tests = imperas64m; + "wally64q": if (P.Q_SUPPORTED) tests = wally64q; "wally64a": if (P.A_SUPPORTED) tests = wally64a; "imperas64c": if (P.C_SUPPORTED) tests = imperas64c; else tests = imperas64iNOc; diff --git a/testbench/tests.vh b/testbench/tests.vh index 95ebb74b3..afde1f2e6 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -869,6 +869,10 @@ string imperas32f[] = '{ "rv32i_m/I/XORI-01" }; + string wally64q[] = '{ + `WALLYTEST, + "rv64i_m/Q/src/WALLY-q-01.S" + }; string wally64a[] = '{ `WALLYTEST, diff --git a/tests/riscof/spike/spike_rv64gc_isa.yaml b/tests/riscof/spike/spike_rv64gc_isa.yaml index 4374ad07c..7bbcaf9e5 100644 --- a/tests/riscof/spike/spike_rv64gc_isa.yaml +++ b/tests/riscof/spike/spike_rv64gc_isa.yaml @@ -2,12 +2,12 @@ hart_ids: [0] hart0: # ISA: RV64IMAFDCSUZicsr_Zicboz_Zifencei_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb # ISA: RV64IMAFDCSUZicsr_Zifencei_Zca_Zcb_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb - ISA: RV64IMAFDCSUZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb + ISA: RV64IMAFDQCSUZicsr_Zicond_Zifencei_Zfa_Zfh_Zba_Zbb_Zbc_Zbs # Zkbs_Zcb physical_addr_sz: 56 User_Spec_Version: '2.3' supported_xlen: [64] misa: - reset-val: 0x800000000014112D + reset-val: 0x800000000015112D rv32: accessible: false rv64: From 1872966b0b6bb1f9a07b1d7e9e42f37a4539cb49 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 7 Mar 2024 13:02:24 -0600 Subject: [PATCH 48/52] Progress. --- .../Q/references/WALLY-q-01.reference_output | 8 ++ .../rv64i_m/Q/src/WALLY-q-01.S | 117 ++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/references/WALLY-q-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/references/WALLY-q-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/references/WALLY-q-01.reference_output new file mode 100644 index 000000000..ccd487ddd --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/references/WALLY-q-01.reference_output @@ -0,0 +1,8 @@ +00000000 # fsq of 1 +00000000 +00000000 +3fff0000 +00003f00 # fsh of 1 +00000000 +00000000 +00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S new file mode 100644 index 000000000..79e856a95 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S @@ -0,0 +1,117 @@ +/////////////////////////////////////////// +// ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-ADD.S +// David_Harris@hmc.edu & Rose Thompson +// Created 07 March 2024 +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV64IFDQ_Zicsr") + +.section .text.init +.globl rvtest_entry_point +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +#ifdef TEST_CASE_1 + +RVTEST_CASE(0,"//check ISA:=regex(.*Q.*);def TEST_CASE_1=True;def NO_SAIL=True",flq-align) + +RVTEST_FP_ENABLE() +RVTEST_VALBASEUPD(x3,test_dataset_0) +RVTEST_SIGBASE(x1,signature_x1_1) + +#endif + + # turn on the floating point unit + li x7, 1 + slli x7, x7, 13 + csrw mstatus, x7 + +li x4, 1 # 3fff 0000 0000 0000 0000 0000 0000 0000 +li x2, 2 # 4000 0000 0000 0000 0000 0000 0000 0000 +fcvt.q.w f2, x2 +fcvt.q.w f4, x4 + +fcvt.h.w f5, x2 + +# test quad load/store +fsq f4, 0(x3) +flq f7, 0(x3) +fsq f7, 0(x1) + +# test half load/store +fsh f5, 16(x3) +flh f6, 16(x3) +fsh f6, 16(x1) + +# 1 + 2 = 3 # 4000 1000 0000 0000 0000 0000 0000 0000 +fadd.q f8, f2, f4 +fsq f8, 32(x3) + +# 1 - 2 = -1 +fsub.q f9, f2, f4 # bfff 0000000000000000000000000000 +fsq f9, 48(x3) + +# 2 * 3 = 6 +fsub.q f10, f4, f8 # 4001 1000000000000000000000000000 +fsq f10, 64(x3) + +# 6 * (-1) + 2 = -4 +fmadd.q f11, f10, f9, f4 # C001 0000000000000000000000000000 +fsq f11, 80(x3) + +# -4 / 2 = -2 +fdiv.q f12, f11, f4 # C000 0000000000000000000000000000 +fsq f12, 96(x3) + +# sign injection -4 = 4 +fsgnj.q f13, f12, f4 # 4001 0000000000000000000000000000 +fsq f13, 112(x3) + +# sqrt(4) = 2 +fsqrt.q f14, f13 # 4000 0000000000000000000000000000 +fsq f14, 128(x3) + + +RVTEST_CODE_END +RVMODEL_HALT + +RVTEST_DATA_BEGIN +.align 4 +rvtest_data: +.word 0xbabecafe +.word 0xabecafeb +.word 0xbecafeba +.word 0xecafebab +.word 0xecafebab +test_dataset_0: +test_dataset_1: +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN +rvtest_sig_begin: + + + + +signature_x1_1: + .fill 8,8,0xdeadbeefdeadbeef + .fill 8,8,0xdeadbeefdeadbeef + +rvtest_sig_end: +RVMODEL_DATA_END From a85ace87c72683c1285571deb9748ee899667456 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 7 Mar 2024 15:01:48 -0600 Subject: [PATCH 49/52] Sold progress towards a decent q test. --- .../Q/references/WALLY-q-01.reference_output | 30 ++++++- .../rv64i_m/Q/src/WALLY-q-01.S | 81 ++++++++++++++----- 2 files changed, 88 insertions(+), 23 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/references/WALLY-q-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/references/WALLY-q-01.reference_output index ccd487ddd..6f8523bbf 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/references/WALLY-q-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/references/WALLY-q-01.reference_output @@ -2,7 +2,35 @@ 00000000 00000000 3fff0000 -00003f00 # fsh of 1 +dead4000 # fsh of 1 +deadbeef +deadbeef +deadbeef +00000000 # fsq of 3 00000000 00000000 +40008000 +00000000 # fsq of -1 00000000 +00000000 +bfff0000 +00000000 # fsq of 6 +00000000 +00000000 +40018000 +00000000 # fsq of -4 +00000000 +00000000 +C0010000 +00000000 # fsq of -2 +00000000 +00000000 +C0000000 +00000000 # fsq of 4 +00000000 +00000000 +40010000 +00000000 # fsq of 2 +00000000 +00000000 +40000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S index 79e856a95..791d41a8a 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S @@ -19,7 +19,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////// #include "model_test.h" #include "arch_test.h" -RVTEST_ISA("RV64IFDQ_Zicsr") +RVTEST_ISA("RV64IFDQZfh_Zicsr") .section .text.init .globl rvtest_entry_point @@ -37,17 +37,17 @@ RVTEST_SIGBASE(x1,signature_x1_1) #endif - # turn on the floating point unit - li x7, 1 - slli x7, x7, 13 - csrw mstatus, x7 +# turn on the floating point unit +li x7, 1 +slli x7, x7, 13 +csrw mstatus, x7 li x4, 1 # 3fff 0000 0000 0000 0000 0000 0000 0000 li x2, 2 # 4000 0000 0000 0000 0000 0000 0000 0000 fcvt.q.w f2, x2 fcvt.q.w f4, x4 -fcvt.h.w f5, x2 +fcvt.h.w f5, x2 # 4000 # test quad load/store fsq f4, 0(x3) @@ -59,33 +59,33 @@ fsh f5, 16(x3) flh f6, 16(x3) fsh f6, 16(x1) -# 1 + 2 = 3 # 4000 1000 0000 0000 0000 0000 0000 0000 +# 1 + 2 = 3 # 4000 8000 0000 0000 0000 0000 0000 0000 fadd.q f8, f2, f4 -fsq f8, 32(x3) +fsq f8, 32(x1) # 1 - 2 = -1 -fsub.q f9, f2, f4 # bfff 0000000000000000000000000000 -fsq f9, 48(x3) +fsub.q f9, f4, f2 # bfff 0000000000000000000000000000 +fsq f9, 48(x1) # 2 * 3 = 6 -fsub.q f10, f4, f8 # 4001 1000000000000000000000000000 -fsq f10, 64(x3) +fmul.q f10, f2, f8 # 4001 8000000000000000000000000000 +fsq f10, 64(x1) # 6 * (-1) + 2 = -4 -fmadd.q f11, f10, f9, f4 # C001 0000000000000000000000000000 -fsq f11, 80(x3) +fmadd.q f11, f10, f9, f2 # C001 0000000000000000000000000000 +fsq f11, 80(x1) # -4 / 2 = -2 -fdiv.q f12, f11, f4 # C000 0000000000000000000000000000 -fsq f12, 96(x3) +fdiv.q f12, f11, f2 # C000 0000000000000000000000000000 +fsq f12, 96(x1) # sign injection -4 = 4 fsgnj.q f13, f12, f4 # 4001 0000000000000000000000000000 -fsq f13, 112(x3) +fsq f13, 112(x1) # sqrt(4) = 2 fsqrt.q f14, f13 # 4000 0000000000000000000000000000 -fsq f14, 128(x3) +fsq f14, 128(x1) RVTEST_CODE_END @@ -94,12 +94,43 @@ RVMODEL_HALT RVTEST_DATA_BEGIN .align 4 rvtest_data: +test_dataset_0: .word 0xbabecafe .word 0xabecafeb .word 0xbecafeba .word 0xecafebab +.word 0xbabecafe +.word 0xabecafeb +.word 0xbecafeba +.word 0xecafebab +.word 0xbabecafe +.word 0xabecafeb +.word 0xbecafeba +.word 0xecafebab +.word 0xbabecafe +.word 0xabecafeb +.word 0xbecafeba +.word 0xecafebab +.word 0xbabecafe +.word 0xabecafeb +.word 0xbecafeba +.word 0xecafebab +.word 0xbabecafe +.word 0xabecafeb +.word 0xbecafeba +.word 0xecafebab +.word 0xbabecafe +.word 0xabecafeb +.word 0xbecafeba +.word 0xecafebab +.word 0xbabecafe +.word 0xabecafeb +.word 0xbecafeba +.word 0xecafebab +.word 0xbabecafe +.word 0xabecafeb +.word 0xbecafeba .word 0xecafebab -test_dataset_0: test_dataset_1: RVTEST_DATA_END @@ -110,8 +141,14 @@ rvtest_sig_begin: signature_x1_1: - .fill 8,8,0xdeadbeefdeadbeef - .fill 8,8,0xdeadbeefdeadbeef - + .int 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .int 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .int 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .int 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .int 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .int 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .int 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .int 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef + .int 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef rvtest_sig_end: RVMODEL_DATA_END From 402d71e5f481857f2411d5c55d86fee1a4074dd1 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 7 Mar 2024 15:19:53 -0600 Subject: [PATCH 50/52] Added basic Quad testing. --- sim/regression-wally | 2 +- .../riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sim/regression-wally b/sim/regression-wally index ea855b358..26543d067 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -278,7 +278,7 @@ if (nightly): ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], # hanging 1/31/24 dh; try again when lint is fixed ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma"]], - ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], + ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "wally64q"]], ] diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S index 791d41a8a..ea8bd15d5 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/Q/src/WALLY-q-01.S @@ -79,8 +79,8 @@ fsq f11, 80(x1) fdiv.q f12, f11, f2 # C000 0000000000000000000000000000 fsq f12, 96(x1) -# sign injection -4 = 4 -fsgnj.q f13, f12, f4 # 4001 0000000000000000000000000000 +# sign injection (-4, 1) = 4 +fsgnj.q f13, f11, f4 # 4001 0000000000000000000000000000 fsq f13, 112(x1) # sqrt(4) = 2 From e870e8137b63cb0f46ee6f55343b8bc6ae6e08f4 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 8 Mar 2024 09:16:30 -0600 Subject: [PATCH 51/52] Finished Wally rvvi tracer. --- testbench/common/wallyTracer.sv | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/testbench/common/wallyTracer.sv b/testbench/common/wallyTracer.sv index 309b39027..554ebc5d7 100644 --- a/testbench/common/wallyTracer.sv +++ b/testbench/common/wallyTracer.sv @@ -63,7 +63,7 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); logic CSRWriteM, CSRWriteW; logic [11:0] CSRAdrM, CSRAdrW; logic wfiM; - logic InterruptM; + logic InterruptM, InterruptW; assign clk = testbench.dut.clk; // assign InstrValidF = testbench.dut.core.ieu.InstrValidF; // not needed yet @@ -266,6 +266,7 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); flopenrc #(P.XLEN)PCWReg (clk, reset, FlushW, ~StallW, PCM, PCW); flopenrc #(1) InstrValidMReg (clk, reset, FlushW, ~StallW, InstrValidM, InstrValidW); flopenrc #(1) TrapWReg (clk, reset, 1'b0, ~StallW, TrapM, TrapW); + flopenrc #(1) InterruptWReg (clk, reset, 1'b0, ~StallW, InterruptM, InterruptW); flopenrc #(1) HaltWReg (clk, reset, 1'b0, ~StallW, HaltM, HaltW); // **** remove? are these used? @@ -287,9 +288,9 @@ module wallyTracer import cvw::*; #(parameter cvw_t P) (rvviTrace rvvi); assign rvvi.order[0][0] = CSRArray[12'hB02]; // TODO: IMPERAS Should be event order assign rvvi.insn[0][0] = InstrRawW; assign rvvi.pc_rdata[0][0] = PCW; - assign rvvi.trap[0][0] = 0; + assign rvvi.trap[0][0] = TrapW; assign rvvi.halt[0][0] = HaltW; - assign rvvi.intr[0][0] = 0; + assign rvvi.intr[0][0] = InterruptW; assign rvvi.mode[0][0] = PrivilegeModeW; assign rvvi.ixl[0][0] = PrivilegeModeW == 2'b11 ? 2'b10 : PrivilegeModeW == 2'b01 ? STATUS_SXL : STATUS_UXL; From c71cafbea6e8009c198e5bcb98254a2d395e5b72 Mon Sep 17 00:00:00 2001 From: Kunlin Han Date: Fri, 8 Mar 2024 12:58:08 -0800 Subject: [PATCH 52/52] Add linux/buildroot to .gitignore to ignore the intermediate built for RISCV/buildroot --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a01f1c07d..2acbd1f2a 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ tests/linux-testgen/buildroot-image-output tests/linux-testgen/buildroot-config-src/main.config.old tests/linux-testgen/buildroot-config-src/linux.config.old tests/linux-testgen/buildroot-config-src/busybox.config.old +linux/buildroot linux/testvector-generation/boottrace.S linux/testvector-generation/boottrace_disasm.log sim/slack-notifier/slack-webhook-url.txt