From 1fd678b43348fd447a7701c4d8fec968593f9ec6 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 14 Feb 2024 12:14:19 -0600 Subject: [PATCH 01/40] Optimized the align logic for loads. --- src/lsu/align.sv | 6 +- src/lsu/endianswapdouble.sv | 114 ++++++++++++++++++++ src/lsu/lsu.sv | 29 ++++-- src/lsu/subwordreaddouble.sv | 196 +++++++++++++++++++++++++++++++++++ 4 files changed, 331 insertions(+), 14 deletions(-) create mode 100644 src/lsu/endianswapdouble.sv create mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv index d516dad2a..7c3703886 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -53,7 +53,7 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline output logic SelStoreDelay, //*** this is bad. really don't like moving this outside - output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [P.LLEN*2-1:0] ReadDataWordSpillAllM, output logic SpillStallM); localparam LLENINBYTES = P.LLEN/8; @@ -67,8 +67,6 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic MisalignedM; - logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; - logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; logic [P.XLEN-1:0] IEUAdrIncrementM; @@ -148,8 +146,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; - assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs diff --git a/src/lsu/endianswapdouble.sv b/src/lsu/endianswapdouble.sv new file mode 100644 index 000000000..133149f0e --- /dev/null +++ b/src/lsu/endianswapdouble.sv @@ -0,0 +1,114 @@ +/////////////////////////////////////////// +// endianswap.sv +// +// Written: David_Harris@hmc.edu +// Created: 7 May 2022 +// Modified: 18 January 2023 +// +// Purpose: Swap byte order for Big-Endian accesses +// +// Documentation: RISC-V System on Chip Design Chapter 5 (Figure 5.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module endianswapdouble #(parameter LEN) ( + input logic BigEndianM, + input logic [LEN-1:0] a, + output logic [LEN-1:0] y +); + + if(LEN == 256) begin + always_comb + if (BigEndianM) begin // swap endianness + y[255:248] = a[7:0]; + y[247:240] = a[15:8]; + y[239:232] = a[23:16]; + y[231:224] = a[31:24]; + y[223:216] = a[39:32]; + y[215:208] = a[47:40]; + y[207:200] = a[55:48]; + y[199:192] = a[63:56]; + y[191:184] = a[71:64]; + y[183:176] = a[79:72]; + y[175:168] = a[87:80]; + y[167:160] = a[95:88]; + y[159:152] = a[103:96]; + y[151:144] = a[111:104]; + y[143:136] = a[119:112]; + y[135:128] = a[127:120]; + y[127:120] = a[135:128]; + y[119:112] = a[142:136]; + y[111:104] = a[152:144]; + y[103:96] = a[159:152]; + y[95:88] = a[167:160]; + y[87:80] = a[175:168]; + y[79:72] = a[183:176]; + y[71:64] = a[191:184]; + y[63:56] = a[199:192]; + y[55:48] = a[207:200]; + y[47:40] = a[215:208]; + y[39:32] = a[223:216]; + y[31:24] = a[231:224]; + y[23:16] = a[239:232]; + y[15:8] = a[247:240]; + y[7:0] = a[255:248]; + end else y = a; + end else if(LEN == 128) begin + always_comb + if (BigEndianM) begin // swap endianness + y[127:120] = a[7:0]; + y[119:112] = a[15:8]; + y[111:104] = a[23:16]; + y[103:96] = a[31:24]; + y[95:88] = a[39:32]; + y[87:80] = a[47:40]; + y[79:72] = a[55:48]; + y[71:64] = a[63:56]; + y[63:56] = a[71:64]; + y[55:48] = a[79:72]; + y[47:40] = a[87:80]; + y[39:32] = a[95:88]; + y[31:24] = a[103:96]; + y[23:16] = a[111:104]; + y[15:8] = a[119:112]; + y[7:0] = a[127:120]; + end else y = a; + end else if(LEN == 64) begin + always_comb + if (BigEndianM) begin // swap endianness + y[63:56] = a[7:0]; + y[55:48] = a[15:8]; + y[47:40] = a[23:16]; + y[39:32] = a[31:24]; + y[31:24] = a[39:32]; + y[23:16] = a[47:40]; + y[15:8] = a[55:48]; + y[7:0] = a[63:56]; + end else y = a; + end else begin + always_comb + if (BigEndianM) begin + y[31:24] = a[7:0]; + y[23:16] = a[15:8]; + y[15:8] = a[23:16]; + y[7:0] = a[31:24]; + end else y = a; + end +endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index f53bb9296..591353ac7 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -128,9 +128,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ - logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data - logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data - logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data + logic [MLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data + logic [MLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection logic [P.LLEN-1:0] ReadDataM; // Final read data @@ -155,6 +154,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic SelDTIM; // Select DTIM rather than bus or D$ logic [P.XLEN-1:0] WriteDataZM; logic LSULoadPageFaultM, LSUStoreAmoPageFaultM; + logic [MLEN-1:0] ReadDataWordSpillAllM; ///////////////////////////////////////////////////////////////////////////////////////////// // Pipeline for IEUAdr E to M @@ -168,7 +168,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM, + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -176,7 +176,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtE = {2'b00, IEUAdrE}; assign SelSpillE = '0; - assign DCacheReadDataWordSpillM = DCacheReadDataWordM; + assign ReadDataWordSpillAllM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; @@ -298,6 +298,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( localparam AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^ localparam LINELEN = P.DCACHE_LINELENINBITS; // Number of bits in cacheline localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) + localparam MLENPOVERAHBW = MLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) localparam CACHEWORDLEN = P.ZICCLSM_SUPPORTED ? 2*P.LLEN : P.LLEN; // Width of the cache's input and output data buses. Misaligned doubles width for fast access logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline @@ -361,9 +362,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. // pma should generate exception for LLEN read to periph. +/* -----\/----- EXCLUDED -----\/----- mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordSpillM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), .d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); + -----/\----- EXCLUDED -----/\----- */ + mux3 #(MLEN) UnCachedDataMux(.d0(ReadDataWordSpillAllM), .d1({MLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), + .d2({{(MLEN-P.XLEN){1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), + .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface logic [1:0] BusRW; // Non-DTIM memory access, ignore cacheableM logic [P.XLEN-1:0] FetchBuffer; @@ -416,9 +422,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// - - subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + + if(MISALIGN_SUPPORT) begin + subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + end else begin + subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + end subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks @@ -438,7 +449,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if (P.BIGENDIAN_SUPPORTED) begin:endian endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswap #(P.LLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv new file mode 100644 index 000000000..936240cf7 --- /dev/null +++ b/src/lsu/subwordreaddouble.sv @@ -0,0 +1,196 @@ +/////////////////////////////////////////// +// subwordread.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Extract subwords and sign extend for reads +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordreaddouble #(parameter LLEN) + ( + input logic [LLEN*2-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM +); + + logic [7:0] ByteM; + logic [15:0] HalfwordM; + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(Funct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase + + if (LLEN == 128) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + logic [63:0] QdWordM; + always_comb + case(PAdrSwap) + 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; + 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; + 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; + 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; + 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; + 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; + 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; + 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; + 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; + 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; + 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; + 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; + 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; + 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; + 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; + 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; + 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; + 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; + 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; + 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; + 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; + 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; + 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; + 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; + 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; + 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; + 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; + 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; + 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; + 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; + 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; + 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; + endcase + + assign ByteM = QdWordM[7:0]; + assign HalfwordM = QdWordM[15:0]; + assign WordM = QdWordM[31:0]; + assign DblWordM = QdWordM[63:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; + 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; + 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; + 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; + 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; + 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; + 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; + 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + assign ByteM = DblWordM[7:0]; + assign HalfwordM = DblWordM[15:0]; + assign WordM = DblWordM[31:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + + logic [31:0] WordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase + + assign ByteM = WordM[7:0]; + assign HalfwordM = WordM[15:0]; + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen + endcase + end +endmodule From dac8fc16af30dcd1182c9f7f4d69383dfde042fe Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 19 Feb 2024 12:26:29 -0600 Subject: [PATCH 02/40] Partially working optimized subwordwrite for misaligned. --- src/lsu/lsu.sv | 15 +++-- src/lsu/subwordwritedouble.sv | 117 ++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 src/lsu/subwordwritedouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 591353ac7..e10183a9e 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -136,8 +136,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IHWriteDataM; // IEU or HPTW write data logic [P.XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data logic [P.LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data - logic [P.LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data - logic [P.LLEN-1:0] LSUWriteDataM; // Final write data + logic [MLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data + logic [MLEN-1:0] LSUWriteDataM; // Final write data logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write logic [1:0] MemRWSpillM; @@ -167,7 +167,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, - .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; @@ -337,7 +337,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .CacheRW(SelStoreDelay ? 2'b00 : CacheRWM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), - .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, + .CacheWriteData(LSUWriteDataM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), @@ -351,7 +351,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), - .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), + .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM[P.LLEN-1:0]), .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusAtomic, .BusCMOZero, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), @@ -426,11 +426,12 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); end - subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks swbytemask #(P.LLEN, P.ZICCLSM_SUPPORTED) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM), .ByteMaskExtended(ByteMaskExtendedM)); @@ -448,7 +449,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if (P.BIGENDIAN_SUPPORTED) begin:endian - endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswapdouble #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv new file mode 100644 index 000000000..728a4f4aa --- /dev/null +++ b/src/lsu/subwordwritedouble.sv @@ -0,0 +1,117 @@ +/////////////////////////////////////////// +// subwordwrite.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Masking and muxing for subword writes +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordwritedouble #(parameter LLEN) ( + input logic [2:0] LSUFunct3M, + input logic [2:0] PAdrM, + input logic FpLoadStoreM, + input logic BigEndianM, + input logic [LLEN-1:0] IMAFWriteDataM, + output logic [LLEN*2-1:0] LittleEndianWriteDataM +); + + // *** RT: This is logic is duplicated in subwordreaddouble. Merge the two. + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(LSUFunct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase // case (LSUFunct3M & {FpLoadStoreM, 2'b11}) + + // *** RT: End duplicated logic + + logic [LLEN*2-1:0] IMAFWriteData2M; + assign IMAFWriteData2M = {IMAFWriteDataM, IMAFWriteDataM}; + localparam OffsetIndex = $clog2(LLEN/8); + logic [LLEN*2-1:0] LittleEndianWriteDataMTemp; + // *** RT: Switch to something like this. + assign LittleEndianWriteDataMTemp = (IMAFWriteData2M << PAdrSwap[OffsetIndex-1:0]) | (IMAFWriteData2M >> ~PAdrSwap[OffsetIndex-1:0]); + + + // Replicate data for subword writes + if (LLEN == 128) begin:sww + always_comb + case(PAdrSwap[3:0]) + 4'b0000: LittleEndianWriteDataM = {128'b0, IMAFWriteDataM }; + 4'b0001: LittleEndianWriteDataM = {120'b0, IMAFWriteDataM, 8'b0 }; + 4'b0010: LittleEndianWriteDataM = {112'b0, IMAFWriteDataM, 16'b0}; + 4'b0011: LittleEndianWriteDataM = {104'b0, IMAFWriteDataM, 24'b0}; + 4'b0100: LittleEndianWriteDataM = {96'b0, IMAFWriteDataM, 32'b0}; + 4'b0101: LittleEndianWriteDataM = {88'b0, IMAFWriteDataM, 40'b0}; + 4'b0110: LittleEndianWriteDataM = {80'b0, IMAFWriteDataM, 48'b0}; + 4'b0111: LittleEndianWriteDataM = {72'b0, IMAFWriteDataM, 56'b0}; + 4'b1000: LittleEndianWriteDataM = {64'b0, IMAFWriteDataM, 64'b0}; + 4'b1001: LittleEndianWriteDataM = {56'b0, IMAFWriteDataM, 72'b0 }; + 4'b1010: LittleEndianWriteDataM = {48'b0, IMAFWriteDataM, 80'b0}; + 4'b1011: LittleEndianWriteDataM = {40'b0, IMAFWriteDataM, 88'b0}; + 4'b1100: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM, 96'b0}; + 4'b1101: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 104'b0}; + 4'b1110: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 112'b0}; + 4'b1111: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 120'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // sq + endcase + end else if (LLEN == 64) begin:sww + always_comb + case(PAdrSwap[2:0]) + 3'b000: LittleEndianWriteDataM = {IMAFWriteDataM, IMAFWriteDataM}; + 3'b001: LittleEndianWriteDataM = {IMAFWriteDataM[55:0], IMAFWriteDataM, IMAFWriteDataM[63:56]}; + 3'b010: LittleEndianWriteDataM = {IMAFWriteDataM[47:0], IMAFWriteDataM, IMAFWriteDataM[63:48]}; + 3'b011: LittleEndianWriteDataM = {IMAFWriteDataM[39:0], IMAFWriteDataM, IMAFWriteDataM[63:40]}; + 3'b100: LittleEndianWriteDataM = {IMAFWriteDataM[31:0], IMAFWriteDataM, IMAFWriteDataM[63:32]}; + 3'b101: LittleEndianWriteDataM = {IMAFWriteDataM[23:0], IMAFWriteDataM, IMAFWriteDataM[63:24]}; + 3'b110: LittleEndianWriteDataM = {IMAFWriteDataM[15:0], IMAFWriteDataM, IMAFWriteDataM[63:16]}; + 3'b111: LittleEndianWriteDataM = {IMAFWriteDataM[7:0], IMAFWriteDataM, IMAFWriteDataM[63:8] }; + endcase + end else begin:sww // 32-bit + always_comb + case(PAdrSwap[1:0]) + 2'b00: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM }; + 2'b01: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 8'b0 }; + 2'b10: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 16'b0}; + 2'b11: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 24'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen + endcase + end +endmodule From 6a9c2d8dc43a1f997cf16969a2901d1e91fd4756 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 20 Feb 2024 20:23:42 -0600 Subject: [PATCH 03/40] Closer to getting subword write misaligned working. --- src/lsu/lsu.sv | 2 +- src/lsu/subwordwritedouble.sv | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index e10183a9e..12ab9930e 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -426,7 +426,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .CacheableM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv index 728a4f4aa..599d71984 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritedouble.sv @@ -33,6 +33,7 @@ module subwordwritedouble #(parameter LLEN) ( input logic [2:0] PAdrM, input logic FpLoadStoreM, input logic BigEndianM, + input logic CacheableM, input logic [LLEN-1:0] IMAFWriteDataM, output logic [LLEN*2-1:0] LittleEndianWriteDataM ); @@ -43,7 +44,13 @@ module subwordwritedouble #(parameter LLEN) ( logic [4:0] LengthM; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + // cacheable, BigEndian + // 10: PAdrM[2:0] + // 11: BigEndianPAdr + // 00: 00000 + // 01: 00111 + mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {CacheableM, BigEndianM}, PAdrSwap); + //assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; /* verilator lint_off WIDTHEXPAND */ /* verilator lint_off WIDTHTRUNC */ assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; From 3714b2bf4adb815704c718a3cec921e563462d31 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 09:14:43 -0600 Subject: [PATCH 04/40] Non-ideal fix. Added new output from pma which indicates if the write shift should occur. The more ideal solution would be to have the pma indicate if the shift should occur and the maximum amount.. --- src/ifu/ifu.sv | 2 +- src/lsu/lsu.sv | 5 +++-- src/lsu/subwordwritedouble.sv | 6 +++--- src/mmu/mmu.sv | 3 ++- src/mmu/pmachecker.sv | 7 ++++--- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index 4848b5ebb..bb23f4fd3 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -185,7 +185,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .TLBFlush, .PhysicalAddress(PCPF), .TLBMiss(ITLBMissF), - .Cacheable(CacheableF), .Idempotent(), .SelTIM(SelIROM), + .Cacheable(CacheableF), .Idempotent(), .AllowShift(), .SelTIM(SelIROM), .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 12ab9930e..053d2bbb0 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -119,6 +119,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic SelSpillE; // Align logic detected a spill and needs to stall logic CacheableM; // PMA indicates memory address is cacheable + logic AllowShiftM; // PMA: indicates if WriteData should be byte shifted before going to cache or bus by offset. logic BusCommittedM; // Bus memory operation in flight, delay interrupts logic DCacheCommittedM; // D$ memory operation started, delay interrupts @@ -244,7 +245,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_ADUE, .PrivilegeModeW, .DisableTranslation, .VAdr(IHAdrM), .Size(LSUFunct3M[1:0]), .PTE, .PageTypeWriteVal(PageType), .TLBWrite(DTLBWriteM), .TLBFlush(sfencevmaM), - .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .SelTIM(SelDTIM), + .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .AllowShift(AllowShiftM), .SelTIM(SelDTIM), .InstrAccessFaultF(), .LoadAccessFaultM(LSULoadAccessFaultM), .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM(LSULoadPageFaultM), .StoreAmoPageFaultM(LSUStoreAmoPageFaultM), @@ -426,7 +427,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .CacheableM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv index 599d71984..eb62aa106 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritedouble.sv @@ -33,7 +33,7 @@ module subwordwritedouble #(parameter LLEN) ( input logic [2:0] PAdrM, input logic FpLoadStoreM, input logic BigEndianM, - input logic CacheableM, + input logic AllowShiftM, input logic [LLEN-1:0] IMAFWriteDataM, output logic [LLEN*2-1:0] LittleEndianWriteDataM ); @@ -48,8 +48,8 @@ module subwordwritedouble #(parameter LLEN) ( // 10: PAdrM[2:0] // 11: BigEndianPAdr // 00: 00000 - // 01: 00111 - mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {CacheableM, BigEndianM}, PAdrSwap); + // 01: 11111 + mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {AllowShiftM, BigEndianM}, PAdrSwap); //assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; /* verilator lint_off WIDTHEXPAND */ /* verilator lint_off WIDTHTRUNC */ diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index e842016a2..80a1ca7da 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -49,6 +49,7 @@ module mmu import cvw::*; #(parameter cvw_t P, output logic TLBMiss, // Miss TLB output logic Cacheable, // PMA indicates memory address is cachable output logic Idempotent, // PMA indicates memory address is idempotent + output logic AllowShift, // PMA indicates if WriteData should be byte shifted before going to cache or bus by offset output logic SelTIM, // Select a tightly integrated memory // Faults output logic InstrAccessFaultF, LoadAccessFaultM, StoreAmoAccessFaultM, // access fault sources @@ -112,7 +113,7 @@ module mmu import cvw::*; #(parameter cvw_t P, pmachecker #(P) pmachecker(.PhysicalAddress, .Size, .CMOpM, .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .PBMemoryType, - .Cacheable, .Idempotent, .SelTIM, + .Cacheable, .Idempotent, .AllowShift, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); if (P.PMP_ENTRIES > 0) begin : pmp diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 84e41ba65..60296213d 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -1,4 +1,4 @@ -/////////////////////////////////////////// +////////////////////////////////////////// // pmachecker.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 20 April 2021 @@ -38,7 +38,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( input logic WriteAccessM, // Write access input logic ReadAccessM, // Read access input logic [1:0] PBMemoryType, // PBMT field of PTE during TLB hit, or 00 otherwise - output logic Cacheable, Idempotent, SelTIM, + output logic Cacheable, Idempotent, AllowShift, SelTIM, output logic PMAInstrAccessFaultF, output logic PMALoadAccessFaultM, output logic PMAStoreAmoAccessFaultM @@ -60,7 +60,8 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[3] | SelRegions[4] | SelRegions[5]; // exclusion-tag: unused-cachable - assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; + assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; + assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6]; // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent From 1ece6f8eaeaa6bdccda4e9e8b05697b5cad4986f Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 09:34:16 -0600 Subject: [PATCH 05/40] Swapped to the more compact subwordreadmisaligned.sv. --- ...eaddouble.sv => subworddreadmisaligned.sv} | 108 +++++------------- 1 file changed, 26 insertions(+), 82 deletions(-) rename src/lsu/{subwordreaddouble.sv => subworddreadmisaligned.sv} (53%) diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subworddreadmisaligned.sv similarity index 53% rename from src/lsu/subwordreaddouble.sv rename to src/lsu/subworddreadmisaligned.sv index 936240cf7..cc1c13787 100644 --- a/src/lsu/subwordreaddouble.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -28,7 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module subwordreaddouble #(parameter LLEN) +module subwordreadmisaligned #(parameter LLEN) ( input logic [LLEN*2-1:0] ReadDataWordMuxM, input logic [2:0] PAdrM, @@ -63,50 +63,19 @@ module subwordreaddouble #(parameter LLEN) default: LengthM = 5'd8; endcase + logic [LLEN*2-1:0] ReadDataAlignedM; + assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); + if (LLEN == 128) begin:swrmux logic [31:0] WordM; logic [63:0] DblWordM; - logic [63:0] QdWordM; - always_comb - case(PAdrSwap) - 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; - 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; - 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; - 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; - 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; - 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; - 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; - 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; - 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; - 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; - 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; - 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; - 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; - 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; - 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; - 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; - 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; - 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; - 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; - 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; - 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; - 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; - 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; - 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; - 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; - 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; - 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; - 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; - 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; - 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; - 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; - 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; - endcase - - assign ByteM = QdWordM[7:0]; - assign HalfwordM = QdWordM[15:0]; - assign WordM = QdWordM[31:0]; - assign DblWordM = QdWordM[63:0]; + logic [127:0] QdWordM; + + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; + assign DblWordM = ReadDataAlignedM[63:0]; + assign QdWordM =ReadDataAlignedM[127:0]; // sign extension/ NaN boxing always_comb @@ -116,7 +85,7 @@ module subwordreaddouble #(parameter LLEN) 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b100: ReadDataM = FpLoadStoreM ? QdWordM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen @@ -125,29 +94,11 @@ module subwordreaddouble #(parameter LLEN) end else if (LLEN == 64) begin:swrmux logic [31:0] WordM; logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; - 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; - 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; - 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; - 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; - 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; - 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; - 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - assign ByteM = DblWordM[7:0]; - assign HalfwordM = DblWordM[15:0]; - assign WordM = DblWordM[31:0]; + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; + assign DblWordM = ReadDataAlignedM[63:0]; // sign extension/ NaN boxing always_comb @@ -165,32 +116,25 @@ module subwordreaddouble #(parameter LLEN) end else begin:swrmux // 32-bit - logic [31:0] WordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase + logic [31:0] WordM; - assign ByteM = WordM[7:0]; - assign HalfwordM = WordM[15:0]; + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; // sign extension always_comb case(Funct3M) 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + + 3'b011: ReadDataM = WordM[LLEN-1:0]; // fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen + + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen endcase end endmodule From 7e1ea1e6d9472fdfd188823fc81ee455abbab460 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 09:37:16 -0600 Subject: [PATCH 06/40] Beginning subword cleanup. --- src/lsu/lsu.sv | 2 +- src/lsu/subworddreadmisaligned.sv | 21 ++++++--------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 053d2bbb0..3e1974521 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -425,7 +425,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index cc1c13787..fe96844f3 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -40,10 +40,11 @@ module subwordreadmisaligned #(parameter LLEN) logic [7:0] ByteM; logic [15:0] HalfwordM; + logic [31:0] WordM; logic [4:0] PAdrSwap; logic [4:0] BigEndianPAdr; logic [4:0] LengthM; - + // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; @@ -66,14 +67,14 @@ module subwordreadmisaligned #(parameter LLEN) logic [LLEN*2-1:0] ReadDataAlignedM; assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; + if (LLEN == 128) begin:swrmux - logic [31:0] WordM; logic [63:0] DblWordM; logic [127:0] QdWordM; - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; assign DblWordM = ReadDataAlignedM[63:0]; assign QdWordM =ReadDataAlignedM[127:0]; @@ -92,12 +93,8 @@ module subwordreadmisaligned #(parameter LLEN) endcase end else if (LLEN == 64) begin:swrmux - logic [31:0] WordM; logic [63:0] DblWordM; - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; assign DblWordM = ReadDataAlignedM[63:0]; // sign extension/ NaN boxing @@ -116,12 +113,6 @@ module subwordreadmisaligned #(parameter LLEN) end else begin:swrmux // 32-bit - logic [31:0] WordM; - - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; - // sign extension always_comb case(Funct3M) From 69d31d50e27199f105706172b5e2427f96689d3f Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 13:29:39 -0600 Subject: [PATCH 07/40] Updated subword misaligned. --- src/lsu/lsu.sv | 2 +- src/lsu/subworddreadmisaligned.sv | 12 ++++++++++-- ...bwordwritedouble.sv => subwordwritemisaligned.sv} | 6 +++--- 3 files changed, 14 insertions(+), 6 deletions(-) rename src/lsu/{subwordwritedouble.sv => subwordwritemisaligned.sv} (97%) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 3e1974521..567dbdb79 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -427,7 +427,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index fe96844f3..1e179dbc3 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -38,6 +38,7 @@ module subwordreadmisaligned #(parameter LLEN) output logic [LLEN-1:0] ReadDataM ); + logic [LLEN*2-1:0] ReadDataAlignedM; logic [7:0] ByteM; logic [15:0] HalfwordM; logic [31:0] WordM; @@ -64,13 +65,20 @@ module subwordreadmisaligned #(parameter LLEN) default: LengthM = 5'd8; endcase - logic [LLEN*2-1:0] ReadDataAlignedM; assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); assign ByteM = ReadDataAlignedM[7:0]; assign HalfwordM = ReadDataAlignedM[15:0]; assign WordM = ReadDataAlignedM[31:0]; + logic [LLEN-1:0] lb, lh_flh, lw_flw, ld_fld, lbu, lbu_flq, lhu, lwu; + + assign lb = {{LLEN-8{ByteM[7]}}, ByteM}; + assign lh_flh = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]};; + assign lw_flw = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; + //assign ld_fld = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; + + if (LLEN == 128) begin:swrmux logic [63:0] DblWordM; logic [127:0] QdWordM; @@ -120,7 +128,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = WordM[LLEN-1:0]; // fld + //3'b011: ReadDataM = WordM[LLEN-1:0]; // fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritemisaligned.sv similarity index 97% rename from src/lsu/subwordwritedouble.sv rename to src/lsu/subwordwritemisaligned.sv index eb62aa106..dd82ffa19 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritemisaligned.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// subwordwrite.sv +// subwordwritemisaligned.sv // // Written: David_Harris@hmc.edu // Created: 9 January 2021 @@ -28,7 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module subwordwritedouble #(parameter LLEN) ( +module subwordwritemisaligned #(parameter LLEN) ( input logic [2:0] LSUFunct3M, input logic [2:0] PAdrM, input logic FpLoadStoreM, @@ -38,7 +38,7 @@ module subwordwritedouble #(parameter LLEN) ( output logic [LLEN*2-1:0] LittleEndianWriteDataM ); - // *** RT: This is logic is duplicated in subwordreaddouble. Merge the two. + // *** RT: This is logic is duplicated in subwordreadmisaligned. Merge the two. logic [4:0] PAdrSwap; logic [4:0] BigEndianPAdr; logic [4:0] LengthM; From 45c30267a50771fb0c5acb756ff3988d05f54f4a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 14:08:04 -0600 Subject: [PATCH 08/40] Cleanup. --- src/lsu/subworddreadmisaligned.sv | 72 +++++-------------------------- 1 file changed, 10 insertions(+), 62 deletions(-) diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 1e179dbc3..66ca0375e 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -71,69 +71,17 @@ module subwordreadmisaligned #(parameter LLEN) assign HalfwordM = ReadDataAlignedM[15:0]; assign WordM = ReadDataAlignedM[31:0]; - logic [LLEN-1:0] lb, lh_flh, lw_flw, ld_fld, lbu, lbu_flq, lhu, lwu; - - assign lb = {{LLEN-8{ByteM[7]}}, ByteM}; - assign lh_flh = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]};; - assign lw_flw = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; - //assign ld_fld = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; - - - if (LLEN == 128) begin:swrmux - logic [63:0] DblWordM; - logic [127:0] QdWordM; - - assign DblWordM = ReadDataAlignedM[63:0]; - assign QdWordM =ReadDataAlignedM[127:0]; - - // sign extension/ NaN boxing - always_comb + always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b100: ReadDataM = FpLoadStoreM ? QdWordM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: if(LLEN == 128 || LLEN == 64 ) ReadDataM = {{LLEN-64{ReadDataAlignedM[63]|FpLoadStoreM}}, ReadDataAlignedM[63:0]}; // ld/fld + 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + else if(LLEN == 64) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen endcase - end else if (LLEN == 64) begin:swrmux - logic [63:0] DblWordM; - - assign DblWordM = ReadDataAlignedM[63:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - - //3'b011: ReadDataM = WordM[LLEN-1:0]; // fld - - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - end endmodule From fbc18abaa0b9bd24d7febfad69840f9d717f76df Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 22 Feb 2024 14:17:15 -0600 Subject: [PATCH 09/40] Siginficant cleanup of subwordwritemisaligned. --- src/lsu/subwordwritemisaligned.sv | 50 ++----------------------------- 1 file changed, 2 insertions(+), 48 deletions(-) diff --git a/src/lsu/subwordwritemisaligned.sv b/src/lsu/subwordwritemisaligned.sv index dd82ffa19..22f462d4a 100644 --- a/src/lsu/subwordwritemisaligned.sv +++ b/src/lsu/subwordwritemisaligned.sv @@ -72,53 +72,7 @@ module subwordwritemisaligned #(parameter LLEN) ( logic [LLEN*2-1:0] IMAFWriteData2M; assign IMAFWriteData2M = {IMAFWriteDataM, IMAFWriteDataM}; localparam OffsetIndex = $clog2(LLEN/8); - logic [LLEN*2-1:0] LittleEndianWriteDataMTemp; - // *** RT: Switch to something like this. - assign LittleEndianWriteDataMTemp = (IMAFWriteData2M << PAdrSwap[OffsetIndex-1:0]) | (IMAFWriteData2M >> ~PAdrSwap[OffsetIndex-1:0]); - - // Replicate data for subword writes - if (LLEN == 128) begin:sww - always_comb - case(PAdrSwap[3:0]) - 4'b0000: LittleEndianWriteDataM = {128'b0, IMAFWriteDataM }; - 4'b0001: LittleEndianWriteDataM = {120'b0, IMAFWriteDataM, 8'b0 }; - 4'b0010: LittleEndianWriteDataM = {112'b0, IMAFWriteDataM, 16'b0}; - 4'b0011: LittleEndianWriteDataM = {104'b0, IMAFWriteDataM, 24'b0}; - 4'b0100: LittleEndianWriteDataM = {96'b0, IMAFWriteDataM, 32'b0}; - 4'b0101: LittleEndianWriteDataM = {88'b0, IMAFWriteDataM, 40'b0}; - 4'b0110: LittleEndianWriteDataM = {80'b0, IMAFWriteDataM, 48'b0}; - 4'b0111: LittleEndianWriteDataM = {72'b0, IMAFWriteDataM, 56'b0}; - 4'b1000: LittleEndianWriteDataM = {64'b0, IMAFWriteDataM, 64'b0}; - 4'b1001: LittleEndianWriteDataM = {56'b0, IMAFWriteDataM, 72'b0 }; - 4'b1010: LittleEndianWriteDataM = {48'b0, IMAFWriteDataM, 80'b0}; - 4'b1011: LittleEndianWriteDataM = {40'b0, IMAFWriteDataM, 88'b0}; - 4'b1100: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM, 96'b0}; - 4'b1101: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 104'b0}; - 4'b1110: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 112'b0}; - 4'b1111: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 120'b0}; - default: LittleEndianWriteDataM = IMAFWriteDataM; // sq - endcase - end else if (LLEN == 64) begin:sww - always_comb - case(PAdrSwap[2:0]) - 3'b000: LittleEndianWriteDataM = {IMAFWriteDataM, IMAFWriteDataM}; - 3'b001: LittleEndianWriteDataM = {IMAFWriteDataM[55:0], IMAFWriteDataM, IMAFWriteDataM[63:56]}; - 3'b010: LittleEndianWriteDataM = {IMAFWriteDataM[47:0], IMAFWriteDataM, IMAFWriteDataM[63:48]}; - 3'b011: LittleEndianWriteDataM = {IMAFWriteDataM[39:0], IMAFWriteDataM, IMAFWriteDataM[63:40]}; - 3'b100: LittleEndianWriteDataM = {IMAFWriteDataM[31:0], IMAFWriteDataM, IMAFWriteDataM[63:32]}; - 3'b101: LittleEndianWriteDataM = {IMAFWriteDataM[23:0], IMAFWriteDataM, IMAFWriteDataM[63:24]}; - 3'b110: LittleEndianWriteDataM = {IMAFWriteDataM[15:0], IMAFWriteDataM, IMAFWriteDataM[63:16]}; - 3'b111: LittleEndianWriteDataM = {IMAFWriteDataM[7:0], IMAFWriteDataM, IMAFWriteDataM[63:8] }; - endcase - end else begin:sww // 32-bit - always_comb - case(PAdrSwap[1:0]) - 2'b00: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM }; - 2'b01: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 8'b0 }; - 2'b10: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 16'b0}; - 2'b11: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 24'b0}; - default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen - endcase - end + assign LittleEndianWriteDataM = (IMAFWriteData2M << (PAdrSwap[OffsetIndex-1:0] * 8)) | (IMAFWriteData2M >> (LLEN - (PAdrSwap[OffsetIndex-1:0] * 8))); + endmodule From a4028831150b8d1206aa69cc15eeda60bd19f21c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 09:41:59 -0600 Subject: [PATCH 10/40] Simplifications of subword code. --- src/lsu/endianswap.sv | 38 ++++++++++++++++++++++++++++++- src/lsu/lsu.sv | 4 ++-- src/lsu/subworddreadmisaligned.sv | 2 +- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index 7c042886a..3c552b371 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -34,7 +34,43 @@ module endianswap #(parameter LEN) ( output logic [LEN-1:0] y ); - if(LEN == 128) begin + if(LEN == 256) begin + always_comb + if (BigEndianM) begin // swap endianness + y[255:248] = a[7:0]; + y[247:240] = a[15:8]; + y[239:232] = a[23:16]; + y[231:224] = a[31:24]; + y[223:216] = a[39:32]; + y[215:208] = a[47:40]; + y[207:200] = a[55:48]; + y[199:192] = a[63:56]; + y[191:184] = a[71:64]; + y[183:176] = a[79:72]; + y[175:168] = a[87:80]; + y[167:160] = a[95:88]; + y[159:152] = a[103:96]; + y[151:144] = a[111:104]; + y[143:136] = a[119:112]; + y[135:128] = a[127:120]; + y[127:120] = a[135:128]; + y[119:112] = a[142:136]; + y[111:104] = a[152:144]; + y[103:96] = a[159:152]; + y[95:88] = a[167:160]; + y[87:80] = a[175:168]; + y[79:72] = a[183:176]; + y[71:64] = a[191:184]; + y[63:56] = a[199:192]; + y[55:48] = a[207:200]; + y[47:40] = a[215:208]; + y[39:32] = a[223:216]; + y[31:24] = a[231:224]; + y[23:16] = a[239:232]; + y[15:8] = a[247:240]; + y[7:0] = a[255:248]; + end else y = a; + end else if(LEN == 128) begin always_comb if (BigEndianM) begin // swap endianness y[127:120] = a[7:0]; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 567dbdb79..896af0b46 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -450,8 +450,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if (P.BIGENDIAN_SUPPORTED) begin:endian - endianswapdouble #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + endianswap #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswap #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 66ca0375e..2868a54d8 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -77,7 +77,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw 3'b011: if(LLEN == 128 || LLEN == 64 ) ReadDataM = {{LLEN-64{ReadDataAlignedM[63]|FpLoadStoreM}}, ReadDataAlignedM[63:0]}; // ld/fld - 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq else if(LLEN == 64) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu From caac48b7f28e33ada9d4a7d0f017878635473811 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 09:42:39 -0600 Subject: [PATCH 11/40] Removed duplicate endianswap. --- src/lsu/endianswapdouble.sv | 114 ------------------------------------ 1 file changed, 114 deletions(-) delete mode 100644 src/lsu/endianswapdouble.sv diff --git a/src/lsu/endianswapdouble.sv b/src/lsu/endianswapdouble.sv deleted file mode 100644 index 133149f0e..000000000 --- a/src/lsu/endianswapdouble.sv +++ /dev/null @@ -1,114 +0,0 @@ -/////////////////////////////////////////// -// endianswap.sv -// -// Written: David_Harris@hmc.edu -// Created: 7 May 2022 -// Modified: 18 January 2023 -// -// Purpose: Swap byte order for Big-Endian accesses -// -// Documentation: RISC-V System on Chip Design Chapter 5 (Figure 5.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module endianswapdouble #(parameter LEN) ( - input logic BigEndianM, - input logic [LEN-1:0] a, - output logic [LEN-1:0] y -); - - if(LEN == 256) begin - always_comb - if (BigEndianM) begin // swap endianness - y[255:248] = a[7:0]; - y[247:240] = a[15:8]; - y[239:232] = a[23:16]; - y[231:224] = a[31:24]; - y[223:216] = a[39:32]; - y[215:208] = a[47:40]; - y[207:200] = a[55:48]; - y[199:192] = a[63:56]; - y[191:184] = a[71:64]; - y[183:176] = a[79:72]; - y[175:168] = a[87:80]; - y[167:160] = a[95:88]; - y[159:152] = a[103:96]; - y[151:144] = a[111:104]; - y[143:136] = a[119:112]; - y[135:128] = a[127:120]; - y[127:120] = a[135:128]; - y[119:112] = a[142:136]; - y[111:104] = a[152:144]; - y[103:96] = a[159:152]; - y[95:88] = a[167:160]; - y[87:80] = a[175:168]; - y[79:72] = a[183:176]; - y[71:64] = a[191:184]; - y[63:56] = a[199:192]; - y[55:48] = a[207:200]; - y[47:40] = a[215:208]; - y[39:32] = a[223:216]; - y[31:24] = a[231:224]; - y[23:16] = a[239:232]; - y[15:8] = a[247:240]; - y[7:0] = a[255:248]; - end else y = a; - end else if(LEN == 128) begin - always_comb - if (BigEndianM) begin // swap endianness - y[127:120] = a[7:0]; - y[119:112] = a[15:8]; - y[111:104] = a[23:16]; - y[103:96] = a[31:24]; - y[95:88] = a[39:32]; - y[87:80] = a[47:40]; - y[79:72] = a[55:48]; - y[71:64] = a[63:56]; - y[63:56] = a[71:64]; - y[55:48] = a[79:72]; - y[47:40] = a[87:80]; - y[39:32] = a[95:88]; - y[31:24] = a[103:96]; - y[23:16] = a[111:104]; - y[15:8] = a[119:112]; - y[7:0] = a[127:120]; - end else y = a; - end else if(LEN == 64) begin - always_comb - if (BigEndianM) begin // swap endianness - y[63:56] = a[7:0]; - y[55:48] = a[15:8]; - y[47:40] = a[23:16]; - y[39:32] = a[31:24]; - y[31:24] = a[39:32]; - y[23:16] = a[47:40]; - y[15:8] = a[55:48]; - y[7:0] = a[63:56]; - end else y = a; - end else begin - always_comb - if (BigEndianM) begin - y[31:24] = a[7:0]; - y[23:16] = a[15:8]; - y[15:8] = a[23:16]; - y[7:0] = a[31:24]; - end else y = a; - end -endmodule From e84b7cc14782d6cb6676d94e987c15f63f87a604 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 13:00:21 -0600 Subject: [PATCH 12/40] Cleanup. --- src/lsu/align.sv | 14 -------------- src/lsu/endianswap.sv | 4 ++-- src/lsu/lsu.sv | 4 +--- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 7c3703886..29ba22c30 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -47,7 +47,6 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.LLEN-1:0] LSUWriteDataM, output logic [(P.LLEN*2-1)/8:0] ByteMaskSpillM, - output logic [P.LLEN*2-1:0] LSUWriteDataSpillM, output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill @@ -71,7 +70,6 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrIncrementM; logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; - logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ @@ -142,18 +140,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // merge together mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SelSpillM, ReadDataWordSpillAllM); - - // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) - // 8 * is for shifting by bytes not bits - assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate - - // write path. Also has the 8:1 shifter muxing for the byteoffset - // then it also has the mux to select when a spill occurs - logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. - - assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; - assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; - mux3 #(2*P.LLEN/8) bytemaskspillmux({ByteMaskExtendedM, ByteMaskM}, // no spill {{{P.LLEN/8}{1'b0}}, ByteMaskM}, // spill, first half {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, // spill, second half diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index 3c552b371..afd4ecdd2 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -54,8 +54,8 @@ module endianswap #(parameter LEN) ( y[143:136] = a[119:112]; y[135:128] = a[127:120]; y[127:120] = a[135:128]; - y[119:112] = a[142:136]; - y[111:104] = a[152:144]; + y[119:112] = a[143:136]; + y[111:104] = a[151:144]; y[103:96] = a[159:152]; y[95:88] = a[167:160]; y[87:80] = a[175:168]; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 896af0b46..28ef7ba08 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -126,7 +126,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data /* verilator lint_off WIDTHEXPAND */ logic [MLEN-1:0] DCacheReadDataWordM; // D$ read data - logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ logic [MLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data @@ -168,7 +167,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, - .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .LSUWriteDataSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; @@ -179,7 +178,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign SelSpillE = '0; assign ReadDataWordSpillAllM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; - assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; assign {SpillStallM, SelStoreDelay} = '0; end From a2d5618d889f882e0ceccb8c75708dc564bb7dae Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 13:46:04 -0600 Subject: [PATCH 13/40] Added sdc to pma allow shift. --- src/mmu/pmachecker.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 60296213d..e77cc74d0 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -61,7 +61,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[3] | SelRegions[4] | SelRegions[5]; // exclusion-tag: unused-cachable assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; - assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6]; + assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6] | SelRegions[10]; // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent From ab750e150f6a82a3c6d0d694d0f87c322ade44d1 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 23 Feb 2024 14:00:19 -0600 Subject: [PATCH 14/40] Fixed lint errors for alignment. --- src/lsu/align.sv | 20 ++++++++++++-------- src/lsu/lsu.sv | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 29ba22c30..ad0dbf238 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -37,6 +37,7 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.XLEN-1:0] IEUAdrM, // 2 byte aligned PC in Fetch stage input logic [P.XLEN-1:0] IEUAdrE, // The next IEUAdrM input logic [2:0] Funct3M, // Size of memory operation + input logic FpLoadStoreM, // Floating point Load or Store input logic [1:0] MemRWM, input logic [P.LLEN*2-1:0] DCacheReadDataWordM, // Instruction from the IROM, I$, or bus. Used to check if the instruction if compressed input logic CacheBusHPWTStall, // I$ or bus are stalled. Transition to second fetch of spill after the first is fetched @@ -69,8 +70,9 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrIncrementM; - logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; - logic PotentialSpillM; + localparam OFFSET_LEN = $clog2(LLENINBYTES); + logic [OFFSET_LEN-1:0] AccessByteOffsetM; + logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -89,12 +91,14 @@ module align import cvw::*; #(parameter cvw_t P) ( // compute misalignement always_comb begin - case (Funct3M[1:0]) - 2'b00: AccessByteOffsetM = '0; // byte access - 2'b01: AccessByteOffsetM = {2'b00, IEUAdrM[0]}; // half access - 2'b10: AccessByteOffsetM = {1'b0, IEUAdrM[1:0]}; // word access - 2'b11: AccessByteOffsetM = IEUAdrM[2:0]; // double access - default: AccessByteOffsetM = IEUAdrM[2:0]; + case (Funct3M & {FpLoadStoreM, 2'b11}) + 3'b000: AccessByteOffsetM = '0; // byte access + 3'b001: AccessByteOffsetM = {{OFFSET_LEN-1{1'b0}}, IEUAdrM[0]}; // half access + 3'b010: AccessByteOffsetM = {{OFFSET_LEN-2{1'b0}}, IEUAdrM[1:0]}; // word access + 3'b011: AccessByteOffsetM = {{OFFSET_LEN-3{1'b0}}, IEUAdrM[2:0]}; // double access + 3'b100: if(P.LLEN == 128) AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; // quad access + else AccessByteOffsetM = '0; // invalid + default: AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; endcase case (Funct3M[1:0]) 2'b00: PotentialSpillM = '0; // byte access diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 28ef7ba08..1712cdc19 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -164,7 +164,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( flopenrc #(P.XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); if(MISALIGN_SUPPORT) begin : ziccslm_align logic [P.XLEN-1:0] IEUAdrSpillE, IEUAdrSpillM; - align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, + align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .FpLoadStoreM, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, From 90ad5e7dab8fe04e3e214bfe9de5434c39fb594a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 28 Feb 2024 17:07:32 -0600 Subject: [PATCH 15/40] Updated the cache for book clarity. --- src/cache/cache.sv | 6 ++--- src/cache/cachefsm.sv | 62 +++++++++++++++++++++---------------------- src/cache/cacheway.sv | 33 +++++++++++------------ 3 files changed, 49 insertions(+), 52 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 4a97a29d4..6882110bd 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -98,7 +98,7 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN-1:0] ReadDataLine, ReadDataLineCache; logic SelFetchBuffer; logic CacheEn; - logic SelWay; + logic SelVictim; logic [LINELEN/8-1:0] LineByteMask; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; genvar index; @@ -120,7 +120,7 @@ module cache import cvw::*; #(parameter cvw_t P, // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( - .clk, .reset, .CacheEn, .CacheSetData, .CacheSetTag, .PAdr, .LineWriteData, .LineByteMask, .SelWay, + .clk, .reset, .CacheEn, .CacheSetData, .CacheSetTag, .PAdr, .LineWriteData, .LineByteMask, .SelVictim, .SetValid, .ClearValid, .SetDirty, .ClearDirty, .VictimWay, .FlushWay, .FlushCache, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .HitDirtyWay, .TagWay, .FlushStage, .InvalidateCache); @@ -227,7 +227,7 @@ module cache import cvw::*; #(parameter cvw_t P, cachefsm #(P, READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, .FlushStage, .CacheRW, .Stall, .CacheHit, .LineDirty, .HitLineDirty, .CacheStall, .CacheCommitted, - .CacheMiss, .CacheAccess, .SelAdrData, .SelAdrTag, .SelWay, + .CacheMiss, .CacheAccess, .SelAdrData, .SelAdrTag, .SelVictim, .ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 4af89b08e..869789df5 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -63,7 +63,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic ClearDirty, // Clear the dirty bit in the selected way and set output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback output logic LRUWriteEn, // Update the LRU state - output logic SelWay, // Controls which way to select a way data and tag, 00 = hitway, 10 = victimway, 11 = flushway + output logic SelVictim, // Overides HitWay Tag matching. Selects selects the victim tag/data regardless of hit output logic FlushAdrCntEn, // Enable the counter for Flush Adr output logic FlushWayCntEn, // Enable the way counter during a flush output logic FlushCntRst, // Reset both flush counters @@ -79,12 +79,12 @@ module cachefsm import cvw::*; #(parameter cvw_t P, logic CMOZeroNoEviction; logic StallConditions; - typedef enum logic [3:0]{STATE_READY, // hit states + typedef enum logic [3:0]{STATE_HIT, // hit states // miss states STATE_FETCH, STATE_WRITEBACK, STATE_WRITE_LINE, - STATE_READ_HOLD, // required for back to back reads. structural hazard on writting SRAM + STATE_ADDRESS_SETUP, // required for back to back reads. structural hazard on writting SRAM // flush cache STATE_FLUSH, STATE_FLUSH_WRITEBACK @@ -101,51 +101,51 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. - assign CacheAccess = (|CacheRW) & ((CurrState == STATE_READY & ~Stall & ~FlushStage) | (CurrState == STATE_READ_HOLD & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW + assign CacheAccess = (|CacheRW) & ((CurrState == STATE_HIT & ~Stall & ~FlushStage) | (CurrState == STATE_ADDRESS_SETUP & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW assign CacheMiss = CacheAccess & ~CacheHit; - // special case on reset. When the fsm first exists reset the + // special case on reset. When the fsm first exists reset twayhe // PCNextF will no longer be pointing to the correct address. // But PCF will be the reset vector. flop #(1) resetDelayReg(.clk, .d(reset), .q(resetDelay)); always_ff @(posedge clk) - if (reset | FlushStage) CurrState <= #1 STATE_READY; + if (reset | FlushStage) CurrState <= #1 STATE_HIT; else CurrState <= #1 NextState; always_comb begin - NextState = STATE_READY; + NextState = STATE_HIT; case (CurrState) // exclusion-tag: icache state-case - STATE_READY: if(InvalidateCache) NextState = STATE_READY; // exclusion-tag: dcache InvalidateCheck + STATE_HIT: if(InvalidateCache) NextState = STATE_HIT; // exclusion-tag: dcache InvalidateCheck else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; // exclusion-tag: icache FLUSHStatement else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement else if((AnyMiss | CMOWriteback) & ~READ_ONLY_CACHE) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement - else NextState = STATE_READY; + else NextState = STATE_HIT; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else NextState = STATE_FETCH; - STATE_WRITE_LINE: NextState = STATE_READ_HOLD; - STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; - else NextState = STATE_READY; + STATE_WRITE_LINE: NextState = STATE_ADDRESS_SETUP; + STATE_ADDRESS_SETUP: if(Stall) NextState = STATE_ADDRESS_SETUP; + else NextState = STATE_HIT; // exclusion-tag-start: icache case STATE_WRITEBACK: if(CacheBusAck & ~(|CMOpM[3:1])) NextState = STATE_FETCH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; // Read_hold lowers CacheStall + else if(CacheBusAck) NextState = STATE_ADDRESS_SETUP; // Read_hold lowers CacheStall else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; - else if (FlushFlag) NextState = STATE_READ_HOLD; + else if (FlushFlag) NextState = STATE_ADDRESS_SETUP; else NextState = STATE_FLUSH; STATE_FLUSH_WRITEBACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; + else if(CacheBusAck) NextState = STATE_ADDRESS_SETUP; else NextState = STATE_FLUSH_WRITEBACK; // exclusion-tag-end: icache case - default: NextState = STATE_READY; + default: NextState = STATE_HIT; endcase end // com back to CPU - assign CacheCommitted = (CurrState != STATE_READY) & ~(READ_ONLY_CACHE & (CurrState == STATE_READ_HOLD)); + assign CacheCommitted = (CurrState != STATE_HIT) & ~(READ_ONLY_CACHE & (CurrState == STATE_ADDRESS_SETUP)); assign StallConditions = FlushCache | AnyMiss | CMOWriteback; // exclusion-tag: icache FlushCache - assign CacheStall = (CurrState == STATE_READY & StallConditions) | // exclusion-tag: icache StallStates + assign CacheStall = (CurrState == STATE_HIT & StallConditions) | // exclusion-tag: icache StallStates (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. @@ -153,26 +153,26 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK); // write enables internal to cache assign SetValid = CurrState == STATE_WRITE_LINE | - (CurrState == STATE_READY & CMOZeroNoEviction) | + (CurrState == STATE_HIT & CMOZeroNoEviction) | (CurrState == STATE_WRITEBACK & CacheBusAck & CMOpM[3]); - assign ClearValid = (CurrState == STATE_READY & CMOpM[0]) | + assign ClearValid = (CurrState == STATE_HIT & CMOpM[0]) | (CurrState == STATE_WRITEBACK & CMOpM[2] & CacheBusAck); - assign LRUWriteEn = (((CurrState == STATE_READY & (AnyHit | CMOZeroNoEviction)) | + assign LRUWriteEn = (((CurrState == STATE_HIT & (AnyHit | CMOZeroNoEviction)) | (CurrState == STATE_WRITE_LINE)) & ~FlushStage) | (CurrState == STATE_WRITEBACK & CMOpM[3] & CacheBusAck); // exclusion-tag-start: icache flushdirtycontrols - assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty + assign SetDirty = (CurrState == STATE_HIT & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | (CurrState == STATE_WRITEBACK & (CMOpM[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty (CurrState == STATE_FLUSH & LineDirty) | // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. // Flush and eviction controls CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & CacheBusAck; - assign SelWay = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOpM[1] | CMOpM[2])) | (CacheBusAck & CMOpM[3]))) | - (CurrState == STATE_READY & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | + assign SelVictim = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOpM[1] | CMOpM[2])) | (CacheBusAck & CMOpM[3]))) | + (CurrState == STATE_HIT & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | (CurrState == STATE_WRITE_LINE); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2] | ~CacheBusAck)) | - (CurrState == STATE_READY & AnyMiss & LineDirty); + (CurrState == STATE_HIT & AnyMiss & LineDirty); // coverage off -item e 1 -fecexprrow 1 // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck) assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | @@ -183,29 +183,29 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK & FlushFlag & CacheBusAck); // exclusion-tag-end: icache flushdirtycontrols // Bus interface controls - assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses + assign CacheBusRW[1] = (CurrState == STATE_HIT & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses (CurrState == STATE_FETCH & ~CacheBusAck) | (CurrState == STATE_WRITEBACK & CacheBusAck & ~(|CMOpM)); logic LoadMiss; assign LoadMiss = (CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss - assign CacheBusRW[0] = (CurrState == STATE_READY & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW + assign CacheBusRW[0] = (CurrState == STATE_HIT & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & ~CacheBusAck); - assign SelAdrData = (CurrState == STATE_READY & (CacheRW[0] | AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed + assign SelAdrData = (CurrState == STATE_HIT & (CacheRW[0] | AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | resetDelay; - assign SelAdrTag = (CurrState == STATE_READY & (AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrTag // changes if store delay hazard removed + assign SelAdrTag = (CurrState == STATE_HIT & (AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrTag // changes if store delay hazard removed (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | resetDelay; - assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_READ_HOLD; - assign CacheEn = (~Stall | StallConditions) | (CurrState != STATE_READY) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn + assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_ADDRESS_SETUP; + assign CacheEn = (~Stall | StallConditions) | (CurrState != STATE_HIT) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn endmodule // cachefsm diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 678f7acac..3c0f5df31 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -42,7 +42,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, input logic SetValid, // Set the valid bit in the selected way and set input logic ClearValid, // Clear the valid bit in the selected way and set input logic SetDirty, // Set the dirty bit in the selected way and set - input logic SelWay, // Controls which way to select a way data and tag, 00 = hitway, 10 = victimway, 11 = flushway + input logic SelVictim, // Overides HitWay Tag matching. Selects selects the victim tag/data regardless of hit input logic ClearDirty, // Clear the dirty bit in the selected way and set input logic FlushCache, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr input logic VictimWay, // LRU selected this way as victim to evict @@ -68,7 +68,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic [LINELEN-1:0] ReadDataLine; logic [TAGLEN-1:0] ReadTag; logic Dirty; - logic SelDirty; + logic SelecteDirty; logic SelectedWriteWordEn; logic [LINELEN/8-1:0] FinalByteMask; logic SetValidEN, ClearValidEN; @@ -77,33 +77,30 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic SetDirtyWay; logic ClearDirtyWay; logic SelNonHit; - logic SelData; + logic SelectedWay; logic InvalidateCacheDelay; if (!READ_ONLY_CACHE) begin:flushlogic - logic FlushWayEn; - mux2 #(1) seltagmux(VictimWay, FlushWay, FlushCache, SelDirty); - + mux2 #(1) seltagmux(VictimWay, FlushWay, FlushCache, SelecteDirty); + mux3 #(1) selectedmux(HitWay, FlushWay, VictimWay, {SelVictim, FlushCache}, SelectedWay); // FlushWay is part of a one hot way selection. Must clear it if FlushWay not selected. // coverage off -item e 1 -fecexprrow 3 // nonzero ways will never see FlushCache=0 while FlushWay=1 since FlushWay only advances on a subset of FlushCache assertion cases. - assign FlushWayEn = FlushWay & FlushCache; - assign SelNonHit = FlushWayEn | SelWay; end else begin:flushlogic // no flush operation for read-only caches. - assign SelDirty = VictimWay; - assign SelNonHit = SelWay; + assign SelecteDirty = VictimWay; + mux2 #(1) selectedwaymux(HitWay, SelecteDirty, SelVictim , SelectedWay); end - mux2 #(1) selectedwaymux(HitWay, SelDirty, SelNonHit , SelData); + ///////////////////////////////////////////////////////////////////////////////////////////// // Write Enable demux ///////////////////////////////////////////////////////////////////////////////////////////// - assign SetValidWay = SetValid & SelData; - assign ClearValidWay = ClearValid & SelData; // exclusion-tag: icache ClearValidWay - assign SetDirtyWay = SetDirty & SelData; // exclusion-tag: icache SetDirtyWay - assign ClearDirtyWay = ClearDirty & SelData; + assign SetValidWay = SetValid & SelectedWay; + assign ClearValidWay = ClearValid & SelectedWay; // exclusion-tag: icache ClearValidWay + assign SetDirtyWay = SetDirty & SelectedWay; // exclusion-tag: icache SetDirtyWay + assign ClearDirtyWay = ClearDirty & SelectedWay; assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage; // exclusion-tag: icache SelectedWiteWordEn assign SetValidEN = SetValidWay & ~FlushStage; // exclusion-tag: cache SetValidEN assign ClearValidEN = ClearValidWay & ~FlushStage; // exclusion-tag: cache ClearValidEN @@ -120,9 +117,9 @@ module cacheway import cvw::*; #(parameter cvw_t P, .din(PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN)); // AND portion of distributed tag multiplexer - assign TagWay = SelData ? ReadTag : '0; // AND part of AOMux + assign TagWay = SelectedWay ? ReadTag : '0; // AND part of AOMux assign HitDirtyWay = Dirty & ValidWay; - assign DirtyWay = SelDirty & HitDirtyWay; // exclusion-tag: icache DirtyWay + assign DirtyWay = SelecteDirty & HitDirtyWay; // exclusion-tag: icache DirtyWay assign HitWay = ValidWay & (ReadTag == PAdr[PA_BITS-1:OFFSETLEN+INDEXLEN]) & ~InvalidateCacheDelay; // exclusion-tag: dcache HitWay flop #(1) InvalidateCacheReg(clk, InvalidateCache, InvalidateCacheDelay); @@ -152,7 +149,7 @@ module cacheway import cvw::*; #(parameter cvw_t P, end // AND portion of distributed read multiplexers - assign ReadDataLineWay = SelData ? ReadDataLine : '0; // AND part of AO mux. + assign ReadDataLineWay = SelectedWay ? ReadDataLine : '0; // AND part of AO mux. ///////////////////////////////////////////////////////////////////////////////////////////// // Valid Bits From 85691f0e8bfb77df76f7df50aca04ebad708621d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Thu, 29 Feb 2024 17:18:01 -0600 Subject: [PATCH 16/40] Simplified and clarified names in cacheLRU. --- src/cache/cache.sv | 4 ++-- src/cache/cacheLRU.sv | 37 ++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 6882110bd..cf3e5e0d4 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -180,14 +180,14 @@ module cache import cvw::*; #(parameter cvw_t P, assign DemuxedByteMask = BlankByteMask << ((MUXINTERVAL/8) * WordOffsetAddr); - assign FetchBufferByteSel = SetValid & ~SetDirty ? '1 : ~DemuxedByteMask; // If load miss set all muxes to 1. + assign FetchBufferByteSel = SetDirty ? ~DemuxedByteMask : '1; // If load miss set all muxes to 1. // Merge write data into fetched cache line for store miss for(index = 0; index < LINELEN/8; index++) begin mux2 #(8) WriteDataMux(.d0(CacheWriteData[(8*index)%WORDLEN+7:(8*index)%WORDLEN]), .d1(FetchBuffer[8*index+7:8*index]), .s(FetchBufferByteSel[index] & ~CMOpM[3]), .y(LineWriteData[8*index+7:8*index])); end - assign LineByteMask = SetValid ? '1 : SetDirty ? DemuxedByteMask : '0; + assign LineByteMask = SetDirty ? DemuxedByteMask : '1; end else begin:WriteSelLogic diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index e795dd765..2670af93f 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -1,7 +1,7 @@ /////////////////////////////////////////// // cacheLRU.sv // -// Written: Ross Thompson ross1728@gmail.com +// Written: Rose Thompson ross1728@gmail.com // Created: 20 July 2021 // Modified: 20 January 2023 // @@ -36,8 +36,8 @@ module cacheLRU input logic CacheEn, // Enable the cache memory arrays. Disable hold read data constant input logic [NUMWAYS-1:0] HitWay, // Which way is valid and matches PAdr's tag input logic [NUMWAYS-1:0] ValidWay, // Which ways for a particular set are valid, ignores tag - input logic [SETLEN-1:0] CacheSetData, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr - input logic [SETLEN-1:0] CacheSetTag, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr + input logic [SETLEN-1:0] CacheSetData, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr + input logic [SETLEN-1:0] CacheSetTag, // Cache address, the output of the address select mux, NextAdr, PAdr, or FlushAdr input logic [SETLEN-1:0] PAdr, // Physical address input logic LRUWriteEn, // Update the LRU state input logic SetValid, // Set the dirty bit in the selected way and set @@ -51,23 +51,27 @@ module cacheLRU logic [NUMWAYS-2:0] LRUMemory [NUMLINES-1:0]; logic [NUMWAYS-2:0] CurrLRU; logic [NUMWAYS-2:0] NextLRU; - logic [NUMWAYS-1:0] Way; - logic [LOGNUMWAYS-1:0] WayEncoded; + logic [LOGNUMWAYS-1:0] HitWayEncoded, Way; logic [NUMWAYS-2:0] WayExpanded; logic AllValid; genvar row; /* verilator lint_off UNOPTFLAT */ - // Ross: For some reason verilator does not like this. I checked and it is not a circular path. + // Rose: For some reason verilator does not like this. I checked and it is not a circular path. logic [NUMWAYS-2:0] LRUUpdate; logic [LOGNUMWAYS-1:0] Intermediate [NUMWAYS-2:0]; /* verilator lint_on UNOPTFLAT */ + logic [NUMWAYS-1:0] FirstZero; + logic [LOGNUMWAYS-1:0] FirstZeroWay; + logic [LOGNUMWAYS-1:0] VictimWayEnc; + + binencoder #(NUMWAYS) hitwayencoder(HitWay, HitWayEncoded); + assign AllValid = &ValidWay; ///// Update replacement bits. - // coverage off // Excluded from coverage b/c it is untestable without varying NUMWAYS. function integer log2 (integer value); @@ -80,8 +84,7 @@ module cacheLRU // coverage on // On a miss we need to ignore HitWay and derive the new replacement bits with the VictimWay. - mux2 #(NUMWAYS) WayMux(HitWay, VictimWay, SetValid, Way); - binencoder #(NUMWAYS) encoder(Way, WayEncoded); + mux2 #(LOGNUMWAYS) WayMuxEnc(HitWayEncoded, VictimWayEnc, SetValid, Way); // bit duplication // expand HitWay as HitWay[3], {{2}{HitWay[2]}}, {{4}{HitWay[1]}, {{8{HitWay[0]}}, ... @@ -89,7 +92,7 @@ module cacheLRU localparam integer DuplicationFactor = 2**(LOGNUMWAYS-row-1); localparam StartIndex = NUMWAYS-2 - DuplicationFactor + 1; localparam EndIndex = NUMWAYS-2 - 2 * DuplicationFactor + 2; - assign WayExpanded[StartIndex : EndIndex] = {{DuplicationFactor}{WayEncoded[row]}}; + assign WayExpanded[StartIndex : EndIndex] = {{DuplicationFactor}{Way[row]}}; end genvar node; @@ -102,14 +105,14 @@ module cacheLRU localparam r = LOGNUMWAYS - ctr_depth; // the child node will be updated if its parent was updated and - // the WayEncoded bit was the correct value. + // the Way bit was the correct value. // The if statement is only there for coverage since LRUUpdate[root] is always 1. if (node == NUMWAYS-2) begin - assign LRUUpdate[lchild] = ~WayEncoded[r]; - assign LRUUpdate[rchild] = WayEncoded[r]; + assign LRUUpdate[lchild] = ~Way[r]; + assign LRUUpdate[rchild] = Way[r]; end else begin - assign LRUUpdate[lchild] = LRUUpdate[node] & ~WayEncoded[r]; - assign LRUUpdate[rchild] = LRUUpdate[node] & WayEncoded[r]; + assign LRUUpdate[lchild] = LRUUpdate[node] & ~Way[r]; + assign LRUUpdate[rchild] = LRUUpdate[node] & Way[r]; end end @@ -129,14 +132,10 @@ module cacheLRU assign Intermediate[node] = CurrLRU[node] ? int1[LOGNUMWAYS-1:0] : int0[LOGNUMWAYS-1:0]; end - logic [NUMWAYS-1:0] FirstZero; - logic [LOGNUMWAYS-1:0] FirstZeroWay; - logic [LOGNUMWAYS-1:0] VictimWayEnc; priorityonehot #(NUMWAYS) FirstZeroEncoder(~ValidWay, FirstZero); binencoder #(NUMWAYS) FirstZeroWayEncoder(FirstZero, FirstZeroWay); mux2 #(LOGNUMWAYS) VictimMux(FirstZeroWay, Intermediate[NUMWAYS-2], AllValid, VictimWayEnc); - //decoder #(LOGNUMWAYS) decoder (Intermediate[NUMWAYS-2], VictimWay); decoder #(LOGNUMWAYS) decoder (VictimWayEnc, VictimWay); // LRU storage must be reset for modelsim to run. However the reset value does not actually matter in practice. From e72880fd8905f7f3315510f141fa545ef42b7f65 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 1 Mar 2024 09:59:54 -0600 Subject: [PATCH 17/40] Changed cachefsm state STATE_HIT to STATE_ACCESS. --- src/cache/cachefsm.sv | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 869789df5..15eda55f5 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -79,7 +79,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, logic CMOZeroNoEviction; logic StallConditions; - typedef enum logic [3:0]{STATE_HIT, // hit states + typedef enum logic [3:0]{STATE_ACCESS, // hit states // miss states STATE_FETCH, STATE_WRITEBACK, @@ -101,7 +101,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. - assign CacheAccess = (|CacheRW) & ((CurrState == STATE_HIT & ~Stall & ~FlushStage) | (CurrState == STATE_ADDRESS_SETUP & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW + assign CacheAccess = (|CacheRW) & ((CurrState == STATE_ACCESS & ~Stall & ~FlushStage) | (CurrState == STATE_ADDRESS_SETUP & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW assign CacheMiss = CacheAccess & ~CacheHit; // special case on reset. When the fsm first exists reset twayhe @@ -110,22 +110,22 @@ module cachefsm import cvw::*; #(parameter cvw_t P, flop #(1) resetDelayReg(.clk, .d(reset), .q(resetDelay)); always_ff @(posedge clk) - if (reset | FlushStage) CurrState <= #1 STATE_HIT; + if (reset | FlushStage) CurrState <= #1 STATE_ACCESS; else CurrState <= #1 NextState; always_comb begin - NextState = STATE_HIT; + NextState = STATE_ACCESS; case (CurrState) // exclusion-tag: icache state-case - STATE_HIT: if(InvalidateCache) NextState = STATE_HIT; // exclusion-tag: dcache InvalidateCheck + STATE_ACCESS: if(InvalidateCache) NextState = STATE_ACCESS; // exclusion-tag: dcache InvalidateCheck else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; // exclusion-tag: icache FLUSHStatement else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement else if((AnyMiss | CMOWriteback) & ~READ_ONLY_CACHE) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement - else NextState = STATE_HIT; + else NextState = STATE_ACCESS; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else NextState = STATE_FETCH; STATE_WRITE_LINE: NextState = STATE_ADDRESS_SETUP; STATE_ADDRESS_SETUP: if(Stall) NextState = STATE_ADDRESS_SETUP; - else NextState = STATE_HIT; + else NextState = STATE_ACCESS; // exclusion-tag-start: icache case STATE_WRITEBACK: if(CacheBusAck & ~(|CMOpM[3:1])) NextState = STATE_FETCH; else if(CacheBusAck) NextState = STATE_ADDRESS_SETUP; // Read_hold lowers CacheStall @@ -138,14 +138,14 @@ module cachefsm import cvw::*; #(parameter cvw_t P, else if(CacheBusAck) NextState = STATE_ADDRESS_SETUP; else NextState = STATE_FLUSH_WRITEBACK; // exclusion-tag-end: icache case - default: NextState = STATE_HIT; + default: NextState = STATE_ACCESS; endcase end // com back to CPU - assign CacheCommitted = (CurrState != STATE_HIT) & ~(READ_ONLY_CACHE & (CurrState == STATE_ADDRESS_SETUP)); + assign CacheCommitted = (CurrState != STATE_ACCESS) & ~(READ_ONLY_CACHE & (CurrState == STATE_ADDRESS_SETUP)); assign StallConditions = FlushCache | AnyMiss | CMOWriteback; // exclusion-tag: icache FlushCache - assign CacheStall = (CurrState == STATE_HIT & StallConditions) | // exclusion-tag: icache StallStates + assign CacheStall = (CurrState == STATE_ACCESS & StallConditions) | // exclusion-tag: icache StallStates (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. @@ -153,15 +153,15 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK); // write enables internal to cache assign SetValid = CurrState == STATE_WRITE_LINE | - (CurrState == STATE_HIT & CMOZeroNoEviction) | + (CurrState == STATE_ACCESS & CMOZeroNoEviction) | (CurrState == STATE_WRITEBACK & CacheBusAck & CMOpM[3]); - assign ClearValid = (CurrState == STATE_HIT & CMOpM[0]) | + assign ClearValid = (CurrState == STATE_ACCESS & CMOpM[0]) | (CurrState == STATE_WRITEBACK & CMOpM[2] & CacheBusAck); - assign LRUWriteEn = (((CurrState == STATE_HIT & (AnyHit | CMOZeroNoEviction)) | + assign LRUWriteEn = (((CurrState == STATE_ACCESS & (AnyHit | CMOZeroNoEviction)) | (CurrState == STATE_WRITE_LINE)) & ~FlushStage) | (CurrState == STATE_WRITEBACK & CMOpM[3] & CacheBusAck); // exclusion-tag-start: icache flushdirtycontrols - assign SetDirty = (CurrState == STATE_HIT & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty + assign SetDirty = (CurrState == STATE_ACCESS & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | (CurrState == STATE_WRITEBACK & (CMOpM[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty @@ -169,10 +169,10 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // Flush and eviction controls CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & CacheBusAck; assign SelVictim = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOpM[1] | CMOpM[2])) | (CacheBusAck & CMOpM[3]))) | - (CurrState == STATE_HIT & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | + (CurrState == STATE_ACCESS & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | (CurrState == STATE_WRITE_LINE); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2] | ~CacheBusAck)) | - (CurrState == STATE_HIT & AnyMiss & LineDirty); + (CurrState == STATE_ACCESS & AnyMiss & LineDirty); // coverage off -item e 1 -fecexprrow 1 // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck) assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | @@ -183,29 +183,29 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK & FlushFlag & CacheBusAck); // exclusion-tag-end: icache flushdirtycontrols // Bus interface controls - assign CacheBusRW[1] = (CurrState == STATE_HIT & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses + assign CacheBusRW[1] = (CurrState == STATE_ACCESS & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses (CurrState == STATE_FETCH & ~CacheBusAck) | (CurrState == STATE_WRITEBACK & CacheBusAck & ~(|CMOpM)); logic LoadMiss; assign LoadMiss = (CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss - assign CacheBusRW[0] = (CurrState == STATE_HIT & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW + assign CacheBusRW[0] = (CurrState == STATE_ACCESS & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & ~CacheBusAck); - assign SelAdrData = (CurrState == STATE_HIT & (CacheRW[0] | AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed + assign SelAdrData = (CurrState == STATE_ACCESS & (CacheRW[0] | AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrCauses // changes if store delay hazard removed (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | resetDelay; - assign SelAdrTag = (CurrState == STATE_HIT & (AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrTag // changes if store delay hazard removed + assign SelAdrTag = (CurrState == STATE_ACCESS & (AnyMiss | (|CMOpM))) | // exclusion-tag: icache SelAdrTag // changes if store delay hazard removed (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | resetDelay; assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_ADDRESS_SETUP; - assign CacheEn = (~Stall | StallConditions) | (CurrState != STATE_HIT) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn + assign CacheEn = (~Stall | StallConditions) | (CurrState != STATE_ACCESS) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn endmodule // cachefsm From 60f96112db2707ba676a17321118afaeda0301a7 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 1 Mar 2024 10:23:55 -0600 Subject: [PATCH 18/40] Moved the zero stage boot loader to the fpga directory. --- {tests/custom => fpga}/zsbl/Makefile | 2 +- {tests/custom => fpga}/zsbl/bios.s | 2 +- {tests/custom => fpga}/zsbl/copyFlash.c | 0 {tests/custom => fpga}/zsbl/gpt.c | 0 {tests/custom => fpga}/zsbl/gpt.h | 0 {tests/custom => fpga}/zsbl/main.c | 0 {tests/custom => fpga}/zsbl/sdcDriver.c | 2 +- {tests/custom => fpga}/zsbl/sdcDriver.h | 0 {tests/custom => fpga}/zsbl/smp.h | 0 {tests/custom => fpga}/zsbl/uart.c | 0 {tests/custom => fpga}/zsbl/uart.h | 0 11 files changed, 3 insertions(+), 3 deletions(-) rename {tests/custom => fpga}/zsbl/Makefile (98%) rename {tests/custom => fpga}/zsbl/bios.s (97%) rename {tests/custom => fpga}/zsbl/copyFlash.c (100%) rename {tests/custom => fpga}/zsbl/gpt.c (100%) rename {tests/custom => fpga}/zsbl/gpt.h (100%) rename {tests/custom => fpga}/zsbl/main.c (100%) rename {tests/custom => fpga}/zsbl/sdcDriver.c (98%) rename {tests/custom => fpga}/zsbl/sdcDriver.h (100%) rename {tests/custom => fpga}/zsbl/smp.h (100%) rename {tests/custom => fpga}/zsbl/uart.c (100%) rename {tests/custom => fpga}/zsbl/uart.h (100%) diff --git a/tests/custom/zsbl/Makefile b/fpga/zsbl/Makefile similarity index 98% rename from tests/custom/zsbl/Makefile rename to fpga/zsbl/Makefile index 6dec9c797..bd30033fc 100644 --- a/tests/custom/zsbl/Makefile +++ b/fpga/zsbl/Makefile @@ -21,7 +21,7 @@ ROOT := .. LIBRARY_DIRS := LIBRARY_FILES := -MARCH :=-march=rv64imfdc +MARCH :=-march=rv64imfdc_zifencei MABI :=-mabi=lp64d LINK_FLAGS :=$(MARCH) $(MABI) -nostartfiles LINKER :=linker.x diff --git a/tests/custom/zsbl/bios.s b/fpga/zsbl/bios.s similarity index 97% rename from tests/custom/zsbl/bios.s rename to fpga/zsbl/bios.s index ebeadcf59..7954eab7a 100644 --- a/tests/custom/zsbl/bios.s +++ b/fpga/zsbl/bios.s @@ -94,5 +94,5 @@ end_of_bios: .globl _dtb .align 4, 0 _dtb: -.incbin "wally-vcu118.dtb" +#.incbin "wally-vcu118.dtb" diff --git a/tests/custom/zsbl/copyFlash.c b/fpga/zsbl/copyFlash.c similarity index 100% rename from tests/custom/zsbl/copyFlash.c rename to fpga/zsbl/copyFlash.c diff --git a/tests/custom/zsbl/gpt.c b/fpga/zsbl/gpt.c similarity index 100% rename from tests/custom/zsbl/gpt.c rename to fpga/zsbl/gpt.c diff --git a/tests/custom/zsbl/gpt.h b/fpga/zsbl/gpt.h similarity index 100% rename from tests/custom/zsbl/gpt.h rename to fpga/zsbl/gpt.h diff --git a/tests/custom/zsbl/main.c b/fpga/zsbl/main.c similarity index 100% rename from tests/custom/zsbl/main.c rename to fpga/zsbl/main.c diff --git a/tests/custom/zsbl/sdcDriver.c b/fpga/zsbl/sdcDriver.c similarity index 98% rename from tests/custom/zsbl/sdcDriver.c rename to fpga/zsbl/sdcDriver.c index edbe0677d..45caa42fa 100644 --- a/tests/custom/zsbl/sdcDriver.c +++ b/fpga/zsbl/sdcDriver.c @@ -1,7 +1,7 @@ /////////////////////////////////////////// // SDC.sv // -// Written: Ross Thompson September 25, 2021 +// Written: Rose Thompson September 25, 2021 // Modified: // // Purpose: driver for sdc reader. diff --git a/tests/custom/zsbl/sdcDriver.h b/fpga/zsbl/sdcDriver.h similarity index 100% rename from tests/custom/zsbl/sdcDriver.h rename to fpga/zsbl/sdcDriver.h diff --git a/tests/custom/zsbl/smp.h b/fpga/zsbl/smp.h similarity index 100% rename from tests/custom/zsbl/smp.h rename to fpga/zsbl/smp.h diff --git a/tests/custom/zsbl/uart.c b/fpga/zsbl/uart.c similarity index 100% rename from tests/custom/zsbl/uart.c rename to fpga/zsbl/uart.c diff --git a/tests/custom/zsbl/uart.h b/fpga/zsbl/uart.h similarity index 100% rename from tests/custom/zsbl/uart.h rename to fpga/zsbl/uart.h From 4c3d927474180bcbb0913672c791d63eb6d45332 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Fri, 1 Mar 2024 11:00:24 -0600 Subject: [PATCH 19/40] Renamed CacheHit to Hit. --- src/cache/cache.sv | 6 +++--- src/cache/cachefsm.sv | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index cf3e5e0d4..3cf2f28a6 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -82,7 +82,7 @@ module cache import cvw::*; #(parameter cvw_t P, logic ClearDirty, SetDirty, SetValid, ClearValid; logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] HitWay, ValidWay; - logic CacheHit; + logic Hit; logic [NUMWAYS-1:0] VictimWay, DirtyWay, HitDirtyWay; logic LineDirty, HitLineDirty; logic [TAGLEN-1:0] TagWay [NUMWAYS-1:0]; @@ -132,7 +132,7 @@ module cache import cvw::*; #(parameter cvw_t P, end else assign VictimWay = 1'b1; // one hot. - assign CacheHit = |HitWay; + assign Hit = |HitWay; assign LineDirty = |DirtyWay; assign HitLineDirty = |HitDirtyWay; @@ -226,7 +226,7 @@ module cache import cvw::*; #(parameter cvw_t P, cachefsm #(P, READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, .FlushStage, .CacheRW, .Stall, - .CacheHit, .LineDirty, .HitLineDirty, .CacheStall, .CacheCommitted, + .Hit, .LineDirty, .HitLineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdrData, .SelAdrTag, .SelVictim, .ClearDirty, .SetDirty, .SetValid, .ClearValid, .SelWriteback, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 15eda55f5..0059bb81d 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -50,7 +50,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic CacheAccess, // Cache access // cache internals - input logic CacheHit, // Exactly 1 way hits + input logic Hit, // Exactly 1 way hits input logic LineDirty, // The selected line and way is dirty input logic HitLineDirty, // The cache hit way is dirty input logic FlushAdrFlag, // On last set of a cache flush @@ -92,17 +92,17 @@ module cachefsm import cvw::*; #(parameter cvw_t P, statetype CurrState, NextState; - assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss - assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 - assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit + assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~Hit & ~InvalidateCache; // exclusion-tag: cache AnyMiss + assign AnyUpdateHit = (CacheRW[0]) & Hit; // exclusion-tag: icache storeAMO1 + assign AnyHit = AnyUpdateHit | (CacheRW[1] & Hit); // exclusion-tag: icache AnyUpdateHit assign CMOZeroNoEviction = CMOpM[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now - assign CMOWriteback = ((CMOpM[1] | CMOpM[2]) & CacheHit & HitLineDirty) | CMOpM[3] & LineDirty; + assign CMOWriteback = ((CMOpM[1] | CMOpM[2]) & Hit & HitLineDirty) | CMOpM[3] & LineDirty; assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. assign CacheAccess = (|CacheRW) & ((CurrState == STATE_ACCESS & ~Stall & ~FlushStage) | (CurrState == STATE_ADDRESS_SETUP & ~Stall & ~FlushStage)); // exclusion-tag: icache CacheW - assign CacheMiss = CacheAccess & ~CacheHit; + assign CacheMiss = CacheAccess & ~Hit; // special case on reset. When the fsm first exists reset twayhe // PCNextF will no longer be pointing to the correct address. @@ -169,7 +169,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // Flush and eviction controls CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2]) & CacheBusAck; assign SelVictim = (CurrState == STATE_WRITEBACK & ((~CacheBusAck & ~(CMOpM[1] | CMOpM[2])) | (CacheBusAck & CMOpM[3]))) | - (CurrState == STATE_ACCESS & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~CacheHit))) | + (CurrState == STATE_ACCESS & ((AnyMiss & LineDirty) | (CMOZeroNoEviction & ~Hit))) | (CurrState == STATE_WRITE_LINE); assign SelWriteback = (CurrState == STATE_WRITEBACK & (CMOpM[1] | CMOpM[2] | ~CacheBusAck)) | (CurrState == STATE_ACCESS & AnyMiss & LineDirty); @@ -188,7 +188,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_WRITEBACK & CacheBusAck & ~(|CMOpM)); logic LoadMiss; - assign LoadMiss = (CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss + assign LoadMiss = (CacheRW[1]) & ~Hit & ~InvalidateCache; // exclusion-tag: cache AnyMiss assign CacheBusRW[0] = (CurrState == STATE_ACCESS & LoadMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | From cba3209e7fd7851aa57233f8771133bd29b7527e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Sat, 2 Mar 2024 11:38:33 -0600 Subject: [PATCH 20/40] Trying an experiment. Use the less compact subwordreaddouble in the fpga synthesize rather than subwordreadmisaligned. --- src/lsu/lsu.sv | 3 +- src/lsu/subwordreaddouble.sv | 196 +++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 1712cdc19..70adc90ce 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -423,7 +423,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + //subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv new file mode 100644 index 000000000..936240cf7 --- /dev/null +++ b/src/lsu/subwordreaddouble.sv @@ -0,0 +1,196 @@ +/////////////////////////////////////////// +// subwordread.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Extract subwords and sign extend for reads +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordreaddouble #(parameter LLEN) + ( + input logic [LLEN*2-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM +); + + logic [7:0] ByteM; + logic [15:0] HalfwordM; + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(Funct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase + + if (LLEN == 128) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + logic [63:0] QdWordM; + always_comb + case(PAdrSwap) + 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; + 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; + 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; + 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; + 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; + 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; + 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; + 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; + 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; + 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; + 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; + 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; + 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; + 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; + 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; + 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; + 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; + 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; + 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; + 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; + 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; + 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; + 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; + 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; + 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; + 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; + 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; + 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; + 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; + 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; + 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; + 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; + endcase + + assign ByteM = QdWordM[7:0]; + assign HalfwordM = QdWordM[15:0]; + assign WordM = QdWordM[31:0]; + assign DblWordM = QdWordM[63:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; + 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; + 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; + 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; + 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; + 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; + 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; + 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + assign ByteM = DblWordM[7:0]; + assign HalfwordM = DblWordM[15:0]; + assign WordM = DblWordM[31:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + + logic [31:0] WordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase + + assign ByteM = WordM[7:0]; + assign HalfwordM = WordM[15:0]; + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen + endcase + end +endmodule From 8136b45ca7f8dedb83fd97e152bbb8765436894e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Sat, 2 Mar 2024 11:55:43 -0600 Subject: [PATCH 21/40] Revert "Trying an experiment. Use the less compact subwordreaddouble in the fpga synthesize rather than subwordreadmisaligned." This reverts commit cba3209e7fd7851aa57233f8771133bd29b7527e. --- src/lsu/lsu.sv | 3 +- src/lsu/subwordreaddouble.sv | 196 ----------------------------------- 2 files changed, 1 insertion(+), 198 deletions(-) delete mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 70adc90ce..1712cdc19 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -423,8 +423,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - //subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv deleted file mode 100644 index 936240cf7..000000000 --- a/src/lsu/subwordreaddouble.sv +++ /dev/null @@ -1,196 +0,0 @@ -/////////////////////////////////////////// -// subwordread.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Extract subwords and sign extend for reads -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordreaddouble #(parameter LLEN) - ( - input logic [LLEN*2-1:0] ReadDataWordMuxM, - input logic [2:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN-1:0] ReadDataM -); - - logic [7:0] ByteM; - logic [15:0] HalfwordM; - logic [4:0] PAdrSwap; - logic [4:0] BigEndianPAdr; - logic [4:0] LengthM; - - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; - /* verilator lint_off WIDTHEXPAND */ - /* verilator lint_off WIDTHTRUNC */ - assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; - /* verilator lint_on WIDTHTRUNC */ - /* verilator lint_on WIDTHEXPAND */ - - always_comb - case(Funct3M & {FpLoadStoreM, 2'b11}) - 3'b000: LengthM = 5'd1; - 3'b001: LengthM = 5'd2; - 3'b010: LengthM = 5'd4; - 3'b011: LengthM = 5'd8; - 3'b100: LengthM = 5'd16; - default: LengthM = 5'd8; - endcase - - if (LLEN == 128) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - logic [63:0] QdWordM; - always_comb - case(PAdrSwap) - 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; - 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; - 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; - 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; - 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; - 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; - 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; - 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; - 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; - 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; - 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; - 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; - 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; - 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; - 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; - 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; - 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; - 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; - 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; - 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; - 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; - 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; - 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; - 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; - 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; - 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; - 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; - 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; - 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; - 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; - 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; - 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; - endcase - - assign ByteM = QdWordM[7:0]; - assign HalfwordM = QdWordM[15:0]; - assign WordM = QdWordM[31:0]; - assign DblWordM = QdWordM[63:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; - 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; - 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; - 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; - 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; - 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; - 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; - 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - assign ByteM = DblWordM[7:0]; - assign HalfwordM = DblWordM[15:0]; - assign WordM = DblWordM[31:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - - logic [31:0] WordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase - - assign ByteM = WordM[7:0]; - assign HalfwordM = WordM[15:0]; - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen - endcase - end -endmodule From a22de456312ea45ff77612eec9a73c1eed625c2e Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Sat, 2 Mar 2024 16:20:31 -0600 Subject: [PATCH 22/40] Removed unused storedelay from align. --- src/lsu/align.sv | 10 +++------- src/lsu/lsu.sv | 8 +++----- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index ad0dbf238..094da4a15 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -52,7 +52,6 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic SelStoreDelay, //*** this is bad. really don't like moving this outside output logic [P.LLEN*2-1:0] ReadDataWordSpillAllM, output logic SpillStallM); @@ -118,20 +117,17 @@ module align import cvw::*; #(parameter cvw_t P) ( always_comb begin case (CurrState) - STATE_READY: if (ValidSpillM & ~MemRWM[0]) NextState = STATE_SPILL; // load spill - else if(ValidSpillM) NextState = STATE_STORE_DELAY; // store spill + STATE_READY: if (ValidSpillM) NextState = STATE_SPILL; // load spill else NextState = STATE_READY; // no spill STATE_SPILL: if(StallM) NextState = STATE_SPILL; else NextState = STATE_READY; - STATE_STORE_DELAY: NextState = STATE_SPILL; default: NextState = STATE_READY; endcase end - assign SelSpillM = (CurrState == STATE_SPILL | CurrState == STATE_STORE_DELAY); - assign SelSpillE = (CurrState == STATE_READY & ValidSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall) | (CurrState == STATE_STORE_DELAY); + assign SelSpillM = CurrState == STATE_SPILL; + assign SelSpillE = (CurrState == STATE_READY & ValidSpillM) | (CurrState == STATE_SPILL & CacheBusHPWTStall); assign SpillSaveM = (CurrState == STATE_READY) & ValidSpillM & ~FlushM; - assign SelStoreDelay = (CurrState == STATE_STORE_DELAY); // *** Can this be merged into the PreLSURWM logic? assign SpillStallM = SelSpillE; //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 1712cdc19..cf1767f5c 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -142,7 +142,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write logic [1:0] MemRWSpillM; logic SpillStallM; - logic SelStoreDelay; logic DTLBMissM; // DTLB miss causes HPTW walk logic DTLBWriteM; // Writes PTE and PageType to DTLB @@ -168,8 +167,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, - .SelStoreDelay); + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; end else begin : no_ziccslm_align @@ -179,7 +177,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign ReadDataWordSpillAllM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign MemRWSpillM = MemRWM; - assign {SpillStallM, SelStoreDelay} = '0; + assign {SpillStallM} = '0; end if(P.ZICBOZ_SUPPORTED) begin : cboz @@ -333,7 +331,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), - .CacheRW(SelStoreDelay ? 2'b00 : CacheRWM), + .CacheRW(CacheRWM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataM), .SelHPTW, From 0222e8f42add640fd8f5d16bc32bf6d6890dec68 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 4 Mar 2024 17:52:41 -0600 Subject: [PATCH 23/40] Don't want to clear the lru bits on invalidation (clearvalid). --- src/cache/cacheLRU.sv | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index 2670af93f..71b11abee 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -144,9 +144,7 @@ module cacheLRU always_ff @(posedge clk) begin if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] = '0; // exclusion-tag: initialize if(CacheEn) begin - if(ClearValid & ~FlushStage) - LRUMemory[PAdr] <= '0; - else if(LRUWriteEn) + if(LRUWriteEn) LRUMemory[PAdr] <= NextLRU; if(LRUWriteEn & (PAdr == CacheSetTag)) CurrLRU <= #1 NextLRU; From 457d3481e76a686973dd39cb74dee9c4b5abd63a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 4 Mar 2024 17:58:41 -0600 Subject: [PATCH 24/40] How did this error get past for so long. --- src/cache/cacheLRU.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cache/cacheLRU.sv b/src/cache/cacheLRU.sv index 71b11abee..2d59ccfd7 100644 --- a/src/cache/cacheLRU.sv +++ b/src/cache/cacheLRU.sv @@ -142,7 +142,7 @@ module cacheLRU // This is a two port memory. // Every cycle must read from CacheSetData and each load/store must write the new LRU. always_ff @(posedge clk) begin - if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] = '0; // exclusion-tag: initialize + if (reset) for (int set = 0; set < NUMLINES; set++) LRUMemory[set] <= '0; // exclusion-tag: initialize if(CacheEn) begin if(LRUWriteEn) LRUMemory[PAdr] <= NextLRU; From 068ffda5fb8a7ecdffb87555cce02039d688776b Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 13:28:47 -0600 Subject: [PATCH 25/40] Revert "Revert "Trying an experiment. Use the less compact subwordreaddouble in the fpga synthesize rather than subwordreadmisaligned."" This reverts commit 8136b45ca7f8dedb83fd97e152bbb8765436894e. --- src/lsu/lsu.sv | 3 +- src/lsu/subwordreaddouble.sv | 196 +++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index cf1767f5c..efb5d9307 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -421,7 +421,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + //subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv new file mode 100644 index 000000000..936240cf7 --- /dev/null +++ b/src/lsu/subwordreaddouble.sv @@ -0,0 +1,196 @@ +/////////////////////////////////////////// +// subwordread.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Extract subwords and sign extend for reads +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordreaddouble #(parameter LLEN) + ( + input logic [LLEN*2-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM +); + + logic [7:0] ByteM; + logic [15:0] HalfwordM; + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(Funct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase + + if (LLEN == 128) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + logic [63:0] QdWordM; + always_comb + case(PAdrSwap) + 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; + 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; + 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; + 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; + 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; + 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; + 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; + 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; + 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; + 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; + 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; + 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; + 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; + 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; + 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; + 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; + 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; + 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; + 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; + 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; + 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; + 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; + 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; + 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; + 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; + 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; + 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; + 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; + 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; + 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; + 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; + 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; + endcase + + assign ByteM = QdWordM[7:0]; + assign HalfwordM = QdWordM[15:0]; + assign WordM = QdWordM[31:0]; + assign DblWordM = QdWordM[63:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; + 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; + 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; + 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; + 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; + 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; + 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; + 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + assign ByteM = DblWordM[7:0]; + assign HalfwordM = DblWordM[15:0]; + assign WordM = DblWordM[31:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + + logic [31:0] WordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase + + assign ByteM = WordM[7:0]; + assign HalfwordM = WordM[15:0]; + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen + endcase + end +endmodule From 2ea01343293a8bb2fc16d63005a01fcc5c8e2cb3 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 13:28:59 -0600 Subject: [PATCH 26/40] Revert "Trying an experiment. Use the less compact subwordreaddouble in the fpga synthesize rather than subwordreadmisaligned." This reverts commit cba3209e7fd7851aa57233f8771133bd29b7527e. --- src/lsu/lsu.sv | 3 +- src/lsu/subwordreaddouble.sv | 196 ----------------------------------- 2 files changed, 1 insertion(+), 198 deletions(-) delete mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index efb5d9307..cf1767f5c 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -421,8 +421,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - //subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv deleted file mode 100644 index 936240cf7..000000000 --- a/src/lsu/subwordreaddouble.sv +++ /dev/null @@ -1,196 +0,0 @@ -/////////////////////////////////////////// -// subwordread.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Extract subwords and sign extend for reads -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordreaddouble #(parameter LLEN) - ( - input logic [LLEN*2-1:0] ReadDataWordMuxM, - input logic [2:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN-1:0] ReadDataM -); - - logic [7:0] ByteM; - logic [15:0] HalfwordM; - logic [4:0] PAdrSwap; - logic [4:0] BigEndianPAdr; - logic [4:0] LengthM; - - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; - /* verilator lint_off WIDTHEXPAND */ - /* verilator lint_off WIDTHTRUNC */ - assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; - /* verilator lint_on WIDTHTRUNC */ - /* verilator lint_on WIDTHEXPAND */ - - always_comb - case(Funct3M & {FpLoadStoreM, 2'b11}) - 3'b000: LengthM = 5'd1; - 3'b001: LengthM = 5'd2; - 3'b010: LengthM = 5'd4; - 3'b011: LengthM = 5'd8; - 3'b100: LengthM = 5'd16; - default: LengthM = 5'd8; - endcase - - if (LLEN == 128) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - logic [63:0] QdWordM; - always_comb - case(PAdrSwap) - 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; - 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; - 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; - 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; - 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; - 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; - 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; - 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; - 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; - 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; - 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; - 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; - 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; - 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; - 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; - 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; - 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; - 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; - 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; - 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; - 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; - 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; - 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; - 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; - 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; - 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; - 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; - 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; - 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; - 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; - 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; - 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; - endcase - - assign ByteM = QdWordM[7:0]; - assign HalfwordM = QdWordM[15:0]; - assign WordM = QdWordM[31:0]; - assign DblWordM = QdWordM[63:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; - 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; - 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; - 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; - 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; - 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; - 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; - 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - assign ByteM = DblWordM[7:0]; - assign HalfwordM = DblWordM[15:0]; - assign WordM = DblWordM[31:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - - logic [31:0] WordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase - - assign ByteM = WordM[7:0]; - assign HalfwordM = WordM[15:0]; - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen - endcase - end -endmodule From 3fa5faa6cf236db0870187020bba4a1ea8f7230a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 13:29:08 -0600 Subject: [PATCH 27/40] Revert "Added sdc to pma allow shift." This reverts commit a2d5618d889f882e0ceccb8c75708dc564bb7dae. --- src/mmu/pmachecker.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index e77cc74d0..60296213d 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -61,7 +61,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[3] | SelRegions[4] | SelRegions[5]; // exclusion-tag: unused-cachable assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; - assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6] | SelRegions[10]; + assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6]; // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent From 5447159cfd03801dc4f520885b719c5c4c3a91e4 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:15:26 -0600 Subject: [PATCH 28/40] Revert "Cleanup." This reverts commit e84b7cc14782d6cb6676d94e987c15f63f87a604. --- src/lsu/align.sv | 18 ++++++++++++++++-- src/lsu/endianswap.sv | 4 ++-- src/lsu/lsu.sv | 4 +++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index 094da4a15..fa10916f9 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -48,6 +48,7 @@ module align import cvw::*; #(parameter cvw_t P) ( input logic [P.LLEN-1:0] LSUWriteDataM, output logic [(P.LLEN*2-1)/8:0] ByteMaskSpillM, + output logic [P.LLEN*2-1:0] LSUWriteDataSpillM, output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill @@ -70,8 +71,9 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IEUAdrIncrementM; localparam OFFSET_LEN = $clog2(LLENINBYTES); - logic [OFFSET_LEN-1:0] AccessByteOffsetM; - logic PotentialSpillM; + logic [$clog2(LLENINBYTES)-1:0] AccessByteOffsetM; + logic [$clog2(LLENINBYTES)+2:0] ShiftAmount; + logic PotentialSpillM; /* verilator lint_off WIDTHEXPAND */ assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; @@ -140,6 +142,18 @@ module align import cvw::*; #(parameter cvw_t P) ( // merge together mux2 #(2*P.LLEN) postspillmux(DCacheReadDataWordM, {DCacheReadDataWordM[P.LLEN-1:0], ReadDataWordFirstHalfM}, SelSpillM, ReadDataWordSpillAllM); + + // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) + // 8 * is for shifting by bytes not bits + assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate + + // write path. Also has the 8:1 shifter muxing for the byteoffset + // then it also has the mux to select when a spill occurs + logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. + + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << ShiftAmount; + assign LSUWriteDataSpillM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; + mux3 #(2*P.LLEN/8) bytemaskspillmux({ByteMaskExtendedM, ByteMaskM}, // no spill {{{P.LLEN/8}{1'b0}}, ByteMaskM}, // spill, first half {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, // spill, second half diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index afd4ecdd2..3c552b371 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -54,8 +54,8 @@ module endianswap #(parameter LEN) ( y[143:136] = a[119:112]; y[135:128] = a[127:120]; y[127:120] = a[135:128]; - y[119:112] = a[143:136]; - y[111:104] = a[151:144]; + y[119:112] = a[142:136]; + y[111:104] = a[152:144]; y[103:96] = a[159:152]; y[95:88] = a[167:160]; y[87:80] = a[175:168]; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index cf1767f5c..e3e138ea4 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -126,6 +126,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN-1:0] DTIMReadDataWordM; // DTIM read data /* verilator lint_off WIDTHEXPAND */ logic [MLEN-1:0] DCacheReadDataWordM; // D$ read data + logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ logic [MLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data @@ -166,7 +167,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .FpLoadStoreM, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, - .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -176,6 +177,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign SelSpillE = '0; assign ReadDataWordSpillAllM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; + assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; assign {SpillStallM} = '0; end From b64b88312924ea48843f05ffba0043392034e658 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:15:43 -0600 Subject: [PATCH 29/40] Revert "Removed duplicate endianswap." This reverts commit caac48b7f28e33ada9d4a7d0f017878635473811. --- src/lsu/endianswapdouble.sv | 114 ++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 src/lsu/endianswapdouble.sv diff --git a/src/lsu/endianswapdouble.sv b/src/lsu/endianswapdouble.sv new file mode 100644 index 000000000..133149f0e --- /dev/null +++ b/src/lsu/endianswapdouble.sv @@ -0,0 +1,114 @@ +/////////////////////////////////////////// +// endianswap.sv +// +// Written: David_Harris@hmc.edu +// Created: 7 May 2022 +// Modified: 18 January 2023 +// +// Purpose: Swap byte order for Big-Endian accesses +// +// Documentation: RISC-V System on Chip Design Chapter 5 (Figure 5.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module endianswapdouble #(parameter LEN) ( + input logic BigEndianM, + input logic [LEN-1:0] a, + output logic [LEN-1:0] y +); + + if(LEN == 256) begin + always_comb + if (BigEndianM) begin // swap endianness + y[255:248] = a[7:0]; + y[247:240] = a[15:8]; + y[239:232] = a[23:16]; + y[231:224] = a[31:24]; + y[223:216] = a[39:32]; + y[215:208] = a[47:40]; + y[207:200] = a[55:48]; + y[199:192] = a[63:56]; + y[191:184] = a[71:64]; + y[183:176] = a[79:72]; + y[175:168] = a[87:80]; + y[167:160] = a[95:88]; + y[159:152] = a[103:96]; + y[151:144] = a[111:104]; + y[143:136] = a[119:112]; + y[135:128] = a[127:120]; + y[127:120] = a[135:128]; + y[119:112] = a[142:136]; + y[111:104] = a[152:144]; + y[103:96] = a[159:152]; + y[95:88] = a[167:160]; + y[87:80] = a[175:168]; + y[79:72] = a[183:176]; + y[71:64] = a[191:184]; + y[63:56] = a[199:192]; + y[55:48] = a[207:200]; + y[47:40] = a[215:208]; + y[39:32] = a[223:216]; + y[31:24] = a[231:224]; + y[23:16] = a[239:232]; + y[15:8] = a[247:240]; + y[7:0] = a[255:248]; + end else y = a; + end else if(LEN == 128) begin + always_comb + if (BigEndianM) begin // swap endianness + y[127:120] = a[7:0]; + y[119:112] = a[15:8]; + y[111:104] = a[23:16]; + y[103:96] = a[31:24]; + y[95:88] = a[39:32]; + y[87:80] = a[47:40]; + y[79:72] = a[55:48]; + y[71:64] = a[63:56]; + y[63:56] = a[71:64]; + y[55:48] = a[79:72]; + y[47:40] = a[87:80]; + y[39:32] = a[95:88]; + y[31:24] = a[103:96]; + y[23:16] = a[111:104]; + y[15:8] = a[119:112]; + y[7:0] = a[127:120]; + end else y = a; + end else if(LEN == 64) begin + always_comb + if (BigEndianM) begin // swap endianness + y[63:56] = a[7:0]; + y[55:48] = a[15:8]; + y[47:40] = a[23:16]; + y[39:32] = a[31:24]; + y[31:24] = a[39:32]; + y[23:16] = a[47:40]; + y[15:8] = a[55:48]; + y[7:0] = a[63:56]; + end else y = a; + end else begin + always_comb + if (BigEndianM) begin + y[31:24] = a[7:0]; + y[23:16] = a[15:8]; + y[15:8] = a[23:16]; + y[7:0] = a[31:24]; + end else y = a; + end +endmodule From e7ec2bedd49db61b453322e7febe42430afc5a92 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:15:51 -0600 Subject: [PATCH 30/40] Revert "Simplifications of subword code." This reverts commit a4028831150b8d1206aa69cc15eeda60bd19f21c. --- src/lsu/endianswap.sv | 38 +------------------------------ src/lsu/lsu.sv | 4 ++-- src/lsu/subworddreadmisaligned.sv | 2 +- 3 files changed, 4 insertions(+), 40 deletions(-) diff --git a/src/lsu/endianswap.sv b/src/lsu/endianswap.sv index 3c552b371..7c042886a 100644 --- a/src/lsu/endianswap.sv +++ b/src/lsu/endianswap.sv @@ -34,43 +34,7 @@ module endianswap #(parameter LEN) ( output logic [LEN-1:0] y ); - if(LEN == 256) begin - always_comb - if (BigEndianM) begin // swap endianness - y[255:248] = a[7:0]; - y[247:240] = a[15:8]; - y[239:232] = a[23:16]; - y[231:224] = a[31:24]; - y[223:216] = a[39:32]; - y[215:208] = a[47:40]; - y[207:200] = a[55:48]; - y[199:192] = a[63:56]; - y[191:184] = a[71:64]; - y[183:176] = a[79:72]; - y[175:168] = a[87:80]; - y[167:160] = a[95:88]; - y[159:152] = a[103:96]; - y[151:144] = a[111:104]; - y[143:136] = a[119:112]; - y[135:128] = a[127:120]; - y[127:120] = a[135:128]; - y[119:112] = a[142:136]; - y[111:104] = a[152:144]; - y[103:96] = a[159:152]; - y[95:88] = a[167:160]; - y[87:80] = a[175:168]; - y[79:72] = a[183:176]; - y[71:64] = a[191:184]; - y[63:56] = a[199:192]; - y[55:48] = a[207:200]; - y[47:40] = a[215:208]; - y[39:32] = a[223:216]; - y[31:24] = a[231:224]; - y[23:16] = a[239:232]; - y[15:8] = a[247:240]; - y[7:0] = a[255:248]; - end else y = a; - end else if(LEN == 128) begin + if(LEN == 128) begin always_comb if (BigEndianM) begin // swap endianness y[127:120] = a[7:0]; diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index e3e138ea4..0af655dbf 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -448,8 +448,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if (P.BIGENDIAN_SUPPORTED) begin:endian - endianswap #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswap #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + endianswapdouble #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 2868a54d8..66ca0375e 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -77,7 +77,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw 3'b011: if(LLEN == 128 || LLEN == 64 ) ReadDataM = {{LLEN-64{ReadDataAlignedM[63]|FpLoadStoreM}}, ReadDataAlignedM[63:0]}; // ld/fld - 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq + 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 else if(LLEN == 64) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu From 739e73ef8135036a988d2cde7c49bd2845d8327c Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:15:58 -0600 Subject: [PATCH 31/40] Revert "Siginficant cleanup of subwordwritemisaligned." This reverts commit fbc18abaa0b9bd24d7febfad69840f9d717f76df. --- src/lsu/subwordwritemisaligned.sv | 50 +++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/lsu/subwordwritemisaligned.sv b/src/lsu/subwordwritemisaligned.sv index 22f462d4a..dd82ffa19 100644 --- a/src/lsu/subwordwritemisaligned.sv +++ b/src/lsu/subwordwritemisaligned.sv @@ -72,7 +72,53 @@ module subwordwritemisaligned #(parameter LLEN) ( logic [LLEN*2-1:0] IMAFWriteData2M; assign IMAFWriteData2M = {IMAFWriteDataM, IMAFWriteDataM}; localparam OffsetIndex = $clog2(LLEN/8); - - assign LittleEndianWriteDataM = (IMAFWriteData2M << (PAdrSwap[OffsetIndex-1:0] * 8)) | (IMAFWriteData2M >> (LLEN - (PAdrSwap[OffsetIndex-1:0] * 8))); + logic [LLEN*2-1:0] LittleEndianWriteDataMTemp; + // *** RT: Switch to something like this. + assign LittleEndianWriteDataMTemp = (IMAFWriteData2M << PAdrSwap[OffsetIndex-1:0]) | (IMAFWriteData2M >> ~PAdrSwap[OffsetIndex-1:0]); + + // Replicate data for subword writes + if (LLEN == 128) begin:sww + always_comb + case(PAdrSwap[3:0]) + 4'b0000: LittleEndianWriteDataM = {128'b0, IMAFWriteDataM }; + 4'b0001: LittleEndianWriteDataM = {120'b0, IMAFWriteDataM, 8'b0 }; + 4'b0010: LittleEndianWriteDataM = {112'b0, IMAFWriteDataM, 16'b0}; + 4'b0011: LittleEndianWriteDataM = {104'b0, IMAFWriteDataM, 24'b0}; + 4'b0100: LittleEndianWriteDataM = {96'b0, IMAFWriteDataM, 32'b0}; + 4'b0101: LittleEndianWriteDataM = {88'b0, IMAFWriteDataM, 40'b0}; + 4'b0110: LittleEndianWriteDataM = {80'b0, IMAFWriteDataM, 48'b0}; + 4'b0111: LittleEndianWriteDataM = {72'b0, IMAFWriteDataM, 56'b0}; + 4'b1000: LittleEndianWriteDataM = {64'b0, IMAFWriteDataM, 64'b0}; + 4'b1001: LittleEndianWriteDataM = {56'b0, IMAFWriteDataM, 72'b0 }; + 4'b1010: LittleEndianWriteDataM = {48'b0, IMAFWriteDataM, 80'b0}; + 4'b1011: LittleEndianWriteDataM = {40'b0, IMAFWriteDataM, 88'b0}; + 4'b1100: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM, 96'b0}; + 4'b1101: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 104'b0}; + 4'b1110: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 112'b0}; + 4'b1111: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 120'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // sq + endcase + end else if (LLEN == 64) begin:sww + always_comb + case(PAdrSwap[2:0]) + 3'b000: LittleEndianWriteDataM = {IMAFWriteDataM, IMAFWriteDataM}; + 3'b001: LittleEndianWriteDataM = {IMAFWriteDataM[55:0], IMAFWriteDataM, IMAFWriteDataM[63:56]}; + 3'b010: LittleEndianWriteDataM = {IMAFWriteDataM[47:0], IMAFWriteDataM, IMAFWriteDataM[63:48]}; + 3'b011: LittleEndianWriteDataM = {IMAFWriteDataM[39:0], IMAFWriteDataM, IMAFWriteDataM[63:40]}; + 3'b100: LittleEndianWriteDataM = {IMAFWriteDataM[31:0], IMAFWriteDataM, IMAFWriteDataM[63:32]}; + 3'b101: LittleEndianWriteDataM = {IMAFWriteDataM[23:0], IMAFWriteDataM, IMAFWriteDataM[63:24]}; + 3'b110: LittleEndianWriteDataM = {IMAFWriteDataM[15:0], IMAFWriteDataM, IMAFWriteDataM[63:16]}; + 3'b111: LittleEndianWriteDataM = {IMAFWriteDataM[7:0], IMAFWriteDataM, IMAFWriteDataM[63:8] }; + endcase + end else begin:sww // 32-bit + always_comb + case(PAdrSwap[1:0]) + 2'b00: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM }; + 2'b01: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 8'b0 }; + 2'b10: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 16'b0}; + 2'b11: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 24'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen + endcase + end endmodule From 298028b119777cce32068c18ae399b246f74dc5a Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:03 -0600 Subject: [PATCH 32/40] Revert "Cleanup." This reverts commit 45c30267a50771fb0c5acb756ff3988d05f54f4a. --- src/lsu/subworddreadmisaligned.sv | 72 ++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 10 deletions(-) diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 66ca0375e..1e179dbc3 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -71,17 +71,69 @@ module subwordreadmisaligned #(parameter LLEN) assign HalfwordM = ReadDataAlignedM[15:0]; assign WordM = ReadDataAlignedM[31:0]; - always_comb + logic [LLEN-1:0] lb, lh_flh, lw_flw, ld_fld, lbu, lbu_flq, lhu, lwu; + + assign lb = {{LLEN-8{ByteM[7]}}, ByteM}; + assign lh_flh = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]};; + assign lw_flw = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; + //assign ld_fld = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; + + + if (LLEN == 128) begin:swrmux + logic [63:0] DblWordM; + logic [127:0] QdWordM; + + assign DblWordM = ReadDataAlignedM[63:0]; + assign QdWordM =ReadDataAlignedM[127:0]; + + // sign extension/ NaN boxing + always_comb case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: if(LLEN == 128 || LLEN == 64 ) ReadDataM = {{LLEN-64{ReadDataAlignedM[63]|FpLoadStoreM}}, ReadDataAlignedM[63:0]}; // ld/fld - 3'b100: if(LLEN == 128) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - else if(LLEN == 64) ReadDataM = FpLoadStoreM ? ReadDataAlignedM[LLEN-1:0] : {{LLEN-8{1'b0}}, ByteM[7:0]}; - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b100: ReadDataM = FpLoadStoreM ? QdWordM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen endcase + end else if (LLEN == 64) begin:swrmux + logic [63:0] DblWordM; + + assign DblWordM = ReadDataAlignedM[63:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + + //3'b011: ReadDataM = WordM[LLEN-1:0]; // fld + + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + end endmodule From a8024eee26debd95df9c7c2a6a0ba4109bd3004d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:16 -0600 Subject: [PATCH 33/40] Revert "Updated subword misaligned." This reverts commit 69d31d50e27199f105706172b5e2427f96689d3f. --- src/lsu/lsu.sv | 2 +- src/lsu/subworddreadmisaligned.sv | 12 ++---------- ...bwordwritemisaligned.sv => subwordwritedouble.sv} | 6 +++--- 3 files changed, 6 insertions(+), 14 deletions(-) rename src/lsu/{subwordwritemisaligned.sv => subwordwritedouble.sv} (97%) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 0af655dbf..8e827292f 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -425,7 +425,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritemisaligned #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index 1e179dbc3..fe96844f3 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -38,7 +38,6 @@ module subwordreadmisaligned #(parameter LLEN) output logic [LLEN-1:0] ReadDataM ); - logic [LLEN*2-1:0] ReadDataAlignedM; logic [7:0] ByteM; logic [15:0] HalfwordM; logic [31:0] WordM; @@ -65,20 +64,13 @@ module subwordreadmisaligned #(parameter LLEN) default: LengthM = 5'd8; endcase + logic [LLEN*2-1:0] ReadDataAlignedM; assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); assign ByteM = ReadDataAlignedM[7:0]; assign HalfwordM = ReadDataAlignedM[15:0]; assign WordM = ReadDataAlignedM[31:0]; - logic [LLEN-1:0] lb, lh_flh, lw_flw, ld_fld, lbu, lbu_flq, lhu, lwu; - - assign lb = {{LLEN-8{ByteM[7]}}, ByteM}; - assign lh_flh = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]};; - assign lw_flw = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; - //assign ld_fld = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; - - if (LLEN == 128) begin:swrmux logic [63:0] DblWordM; logic [127:0] QdWordM; @@ -128,7 +120,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - //3'b011: ReadDataM = WordM[LLEN-1:0]; // fld + 3'b011: ReadDataM = WordM[LLEN-1:0]; // fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu diff --git a/src/lsu/subwordwritemisaligned.sv b/src/lsu/subwordwritedouble.sv similarity index 97% rename from src/lsu/subwordwritemisaligned.sv rename to src/lsu/subwordwritedouble.sv index dd82ffa19..eb62aa106 100644 --- a/src/lsu/subwordwritemisaligned.sv +++ b/src/lsu/subwordwritedouble.sv @@ -1,5 +1,5 @@ /////////////////////////////////////////// -// subwordwritemisaligned.sv +// subwordwrite.sv // // Written: David_Harris@hmc.edu // Created: 9 January 2021 @@ -28,7 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module subwordwritemisaligned #(parameter LLEN) ( +module subwordwritedouble #(parameter LLEN) ( input logic [2:0] LSUFunct3M, input logic [2:0] PAdrM, input logic FpLoadStoreM, @@ -38,7 +38,7 @@ module subwordwritemisaligned #(parameter LLEN) ( output logic [LLEN*2-1:0] LittleEndianWriteDataM ); - // *** RT: This is logic is duplicated in subwordreadmisaligned. Merge the two. + // *** RT: This is logic is duplicated in subwordreaddouble. Merge the two. logic [4:0] PAdrSwap; logic [4:0] BigEndianPAdr; logic [4:0] LengthM; From f752b5dd37d94fb359584f31efbac6d810c548bd Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:24 -0600 Subject: [PATCH 34/40] Revert "Beginning subword cleanup." This reverts commit 7e1ea1e6d9472fdfd188823fc81ee455abbab460. --- src/lsu/lsu.sv | 2 +- src/lsu/subworddreadmisaligned.sv | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 8e827292f..0150be599 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -423,7 +423,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if(MISALIGN_SUPPORT) begin - subwordreadmisaligned #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subworddreadmisaligned.sv index fe96844f3..cc1c13787 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subworddreadmisaligned.sv @@ -40,11 +40,10 @@ module subwordreadmisaligned #(parameter LLEN) logic [7:0] ByteM; logic [15:0] HalfwordM; - logic [31:0] WordM; logic [4:0] PAdrSwap; logic [4:0] BigEndianPAdr; logic [4:0] LengthM; - + // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; @@ -67,14 +66,14 @@ module subwordreadmisaligned #(parameter LLEN) logic [LLEN*2-1:0] ReadDataAlignedM; assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; - if (LLEN == 128) begin:swrmux + logic [31:0] WordM; logic [63:0] DblWordM; logic [127:0] QdWordM; + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; assign DblWordM = ReadDataAlignedM[63:0]; assign QdWordM =ReadDataAlignedM[127:0]; @@ -93,8 +92,12 @@ module subwordreadmisaligned #(parameter LLEN) endcase end else if (LLEN == 64) begin:swrmux + logic [31:0] WordM; logic [63:0] DblWordM; + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; assign DblWordM = ReadDataAlignedM[63:0]; // sign extension/ NaN boxing @@ -113,6 +116,12 @@ module subwordreadmisaligned #(parameter LLEN) end else begin:swrmux // 32-bit + logic [31:0] WordM; + + assign ByteM = ReadDataAlignedM[7:0]; + assign HalfwordM = ReadDataAlignedM[15:0]; + assign WordM = ReadDataAlignedM[31:0]; + // sign extension always_comb case(Funct3M) From a48c16c0efbab9ba6e149687630b5dad3a5b9863 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:32 -0600 Subject: [PATCH 35/40] Revert "Swapped to the more compact subwordreadmisaligned.sv." This reverts commit 1ece6f8eaeaa6bdccda4e9e8b05697b5cad4986f. --- ...readmisaligned.sv => subwordreaddouble.sv} | 108 +++++++++++++----- 1 file changed, 82 insertions(+), 26 deletions(-) rename src/lsu/{subworddreadmisaligned.sv => subwordreaddouble.sv} (53%) diff --git a/src/lsu/subworddreadmisaligned.sv b/src/lsu/subwordreaddouble.sv similarity index 53% rename from src/lsu/subworddreadmisaligned.sv rename to src/lsu/subwordreaddouble.sv index cc1c13787..936240cf7 100644 --- a/src/lsu/subworddreadmisaligned.sv +++ b/src/lsu/subwordreaddouble.sv @@ -28,7 +28,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module subwordreadmisaligned #(parameter LLEN) +module subwordreaddouble #(parameter LLEN) ( input logic [LLEN*2-1:0] ReadDataWordMuxM, input logic [2:0] PAdrM, @@ -63,19 +63,50 @@ module subwordreadmisaligned #(parameter LLEN) default: LengthM = 5'd8; endcase - logic [LLEN*2-1:0] ReadDataAlignedM; - assign ReadDataAlignedM = ReadDataWordMuxM >> (PAdrSwap[$clog2(LLEN/4)-1:0] * 8); - if (LLEN == 128) begin:swrmux logic [31:0] WordM; logic [63:0] DblWordM; - logic [127:0] QdWordM; - - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; - assign DblWordM = ReadDataAlignedM[63:0]; - assign QdWordM =ReadDataAlignedM[127:0]; + logic [63:0] QdWordM; + always_comb + case(PAdrSwap) + 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; + 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; + 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; + 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; + 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; + 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; + 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; + 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; + 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; + 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; + 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; + 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; + 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; + 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; + 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; + 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; + 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; + 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; + 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; + 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; + 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; + 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; + 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; + 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; + 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; + 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; + 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; + 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; + 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; + 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; + 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; + 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; + endcase + + assign ByteM = QdWordM[7:0]; + assign HalfwordM = QdWordM[15:0]; + assign WordM = QdWordM[31:0]; + assign DblWordM = QdWordM[63:0]; // sign extension/ NaN boxing always_comb @@ -85,7 +116,7 @@ module subwordreadmisaligned #(parameter LLEN) 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b100: ReadDataM = FpLoadStoreM ? QdWordM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen @@ -94,11 +125,29 @@ module subwordreadmisaligned #(parameter LLEN) end else if (LLEN == 64) begin:swrmux logic [31:0] WordM; logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; + 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; + 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; + 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; + 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; + 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; + 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; + 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; - assign DblWordM = ReadDataAlignedM[63:0]; + assign ByteM = DblWordM[7:0]; + assign HalfwordM = DblWordM[15:0]; + assign WordM = DblWordM[31:0]; // sign extension/ NaN boxing always_comb @@ -116,25 +165,32 @@ module subwordreadmisaligned #(parameter LLEN) end else begin:swrmux // 32-bit - logic [31:0] WordM; + logic [31:0] WordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase - assign ByteM = ReadDataAlignedM[7:0]; - assign HalfwordM = ReadDataAlignedM[15:0]; - assign WordM = ReadDataAlignedM[31:0]; + assign ByteM = WordM[7:0]; + assign HalfwordM = WordM[15:0]; // sign extension always_comb case(Funct3M) 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - - 3'b011: ReadDataM = WordM[LLEN-1:0]; // fld - + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen endcase end endmodule From dce7de59a37fdc1acb9bc0e508aea001cf2ec06d Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:37 -0600 Subject: [PATCH 36/40] Revert "Non-ideal fix. Added new output from pma which indicates if the write shift should occur." This reverts commit 3714b2bf4adb815704c718a3cec921e563462d31. --- src/ifu/ifu.sv | 2 +- src/lsu/lsu.sv | 5 ++--- src/lsu/subwordwritedouble.sv | 6 +++--- src/mmu/mmu.sv | 3 +-- src/mmu/pmachecker.sv | 7 +++---- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/ifu/ifu.sv b/src/ifu/ifu.sv index bb23f4fd3..4848b5ebb 100644 --- a/src/ifu/ifu.sv +++ b/src/ifu/ifu.sv @@ -185,7 +185,7 @@ module ifu import cvw::*; #(parameter cvw_t P) ( .TLBFlush, .PhysicalAddress(PCPF), .TLBMiss(ITLBMissF), - .Cacheable(CacheableF), .Idempotent(), .AllowShift(), .SelTIM(SelIROM), + .Cacheable(CacheableF), .Idempotent(), .SelTIM(SelIROM), .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 0150be599..17c9d0c69 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -119,7 +119,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic SelSpillE; // Align logic detected a spill and needs to stall logic CacheableM; // PMA indicates memory address is cacheable - logic AllowShiftM; // PMA: indicates if WriteData should be byte shifted before going to cache or bus by offset. logic BusCommittedM; // Bus memory operation in flight, delay interrupts logic DCacheCommittedM; // D$ memory operation started, delay interrupts @@ -243,7 +242,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .ENVCFG_PBMTE, .ENVCFG_ADUE, .PrivilegeModeW, .DisableTranslation, .VAdr(IHAdrM), .Size(LSUFunct3M[1:0]), .PTE, .PageTypeWriteVal(PageType), .TLBWrite(DTLBWriteM), .TLBFlush(sfencevmaM), - .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .AllowShift(AllowShiftM), .SelTIM(SelDTIM), + .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), .Idempotent(), .SelTIM(SelDTIM), .InstrAccessFaultF(), .LoadAccessFaultM(LSULoadAccessFaultM), .StoreAmoAccessFaultM(LSUStoreAmoAccessFaultM), .InstrPageFaultF(), .LoadPageFaultM(LSULoadPageFaultM), .StoreAmoPageFaultM(LSUStoreAmoPageFaultM), @@ -425,7 +424,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .AllowShiftM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .CacheableM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv index eb62aa106..599d71984 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritedouble.sv @@ -33,7 +33,7 @@ module subwordwritedouble #(parameter LLEN) ( input logic [2:0] PAdrM, input logic FpLoadStoreM, input logic BigEndianM, - input logic AllowShiftM, + input logic CacheableM, input logic [LLEN-1:0] IMAFWriteDataM, output logic [LLEN*2-1:0] LittleEndianWriteDataM ); @@ -48,8 +48,8 @@ module subwordwritedouble #(parameter LLEN) ( // 10: PAdrM[2:0] // 11: BigEndianPAdr // 00: 00000 - // 01: 11111 - mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {AllowShiftM, BigEndianM}, PAdrSwap); + // 01: 00111 + mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {CacheableM, BigEndianM}, PAdrSwap); //assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; /* verilator lint_off WIDTHEXPAND */ /* verilator lint_off WIDTHTRUNC */ diff --git a/src/mmu/mmu.sv b/src/mmu/mmu.sv index 80a1ca7da..e842016a2 100644 --- a/src/mmu/mmu.sv +++ b/src/mmu/mmu.sv @@ -49,7 +49,6 @@ module mmu import cvw::*; #(parameter cvw_t P, output logic TLBMiss, // Miss TLB output logic Cacheable, // PMA indicates memory address is cachable output logic Idempotent, // PMA indicates memory address is idempotent - output logic AllowShift, // PMA indicates if WriteData should be byte shifted before going to cache or bus by offset output logic SelTIM, // Select a tightly integrated memory // Faults output logic InstrAccessFaultF, LoadAccessFaultM, StoreAmoAccessFaultM, // access fault sources @@ -113,7 +112,7 @@ module mmu import cvw::*; #(parameter cvw_t P, pmachecker #(P) pmachecker(.PhysicalAddress, .Size, .CMOpM, .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, .PBMemoryType, - .Cacheable, .Idempotent, .AllowShift, .SelTIM, + .Cacheable, .Idempotent, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); if (P.PMP_ENTRIES > 0) begin : pmp diff --git a/src/mmu/pmachecker.sv b/src/mmu/pmachecker.sv index 60296213d..84e41ba65 100644 --- a/src/mmu/pmachecker.sv +++ b/src/mmu/pmachecker.sv @@ -1,4 +1,4 @@ -////////////////////////////////////////// +/////////////////////////////////////////// // pmachecker.sv // // Written: tfleming@hmc.edu & jtorrey@hmc.edu 20 April 2021 @@ -38,7 +38,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( input logic WriteAccessM, // Write access input logic ReadAccessM, // Read access input logic [1:0] PBMemoryType, // PBMT field of PTE during TLB hit, or 00 otherwise - output logic Cacheable, Idempotent, AllowShift, SelTIM, + output logic Cacheable, Idempotent, SelTIM, output logic PMAInstrAccessFaultF, output logic PMALoadAccessFaultM, output logic PMAStoreAmoAccessFaultM @@ -60,8 +60,7 @@ module pmachecker import cvw::*; #(parameter cvw_t P) ( // Only non-core RAM/ROM memory regions are cacheable. PBMT can override cachable; NC and IO are uncachable assign CacheableRegion = SelRegions[3] | SelRegions[4] | SelRegions[5]; // exclusion-tag: unused-cachable - assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; - assign AllowShift = SelRegions[1] | SelRegions[2] | SelRegions[3] | SelRegions[5] | SelRegions[6]; + assign Cacheable = (PBMemoryType == 2'b00) ? CacheableRegion : 0; // Nonidemdempotent means access could have side effect and must not be done speculatively or redundantly // I/O is nonidempotent. PBMT can override PMA; NC is idempotent and IO is non-idempotent From 9668fdd8686e5afff8750e3552f57a2df9663117 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:16:43 -0600 Subject: [PATCH 37/40] Revert "Closer to getting subword write misaligned working." This reverts commit 6a9c2d8dc43a1f997cf16969a2901d1e91fd4756. --- src/lsu/lsu.sv | 2 +- src/lsu/subwordwritedouble.sv | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 17c9d0c69..9fdf267b6 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -424,7 +424,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .CacheableM, .IMAFWriteDataM, .LittleEndianWriteDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv index 599d71984..728a4f4aa 100644 --- a/src/lsu/subwordwritedouble.sv +++ b/src/lsu/subwordwritedouble.sv @@ -33,7 +33,6 @@ module subwordwritedouble #(parameter LLEN) ( input logic [2:0] PAdrM, input logic FpLoadStoreM, input logic BigEndianM, - input logic CacheableM, input logic [LLEN-1:0] IMAFWriteDataM, output logic [LLEN*2-1:0] LittleEndianWriteDataM ); @@ -44,13 +43,7 @@ module subwordwritedouble #(parameter LLEN) ( logic [4:0] LengthM; // Funct3M[2] is the unsigned bit. mask upper bits. // Funct3M[1:0] is the size of the memory access. - // cacheable, BigEndian - // 10: PAdrM[2:0] - // 11: BigEndianPAdr - // 00: 00000 - // 01: 00111 - mux4 #(5) OffsetMux(5'b0, 5'b11111, {2'b0, PAdrM}, BigEndianPAdr, {CacheableM, BigEndianM}, PAdrSwap); - //assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; /* verilator lint_off WIDTHEXPAND */ /* verilator lint_off WIDTHTRUNC */ assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; From 57aab52dc27f429bda5c07be8de6c42a5e5fe0a8 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:17:57 -0600 Subject: [PATCH 38/40] Revert "Partially working optimized subwordwrite for misaligned." This reverts commit dac8fc16af30dcd1182c9f7f4d69383dfde042fe. --- src/lsu/lsu.sv | 15 ++--- src/lsu/subwordwritedouble.sv | 117 ---------------------------------- 2 files changed, 7 insertions(+), 125 deletions(-) delete mode 100644 src/lsu/subwordwritedouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 9fdf267b6..9c7f49684 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -136,8 +136,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IHWriteDataM; // IEU or HPTW write data logic [P.XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data logic [P.LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data - logic [MLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data - logic [MLEN-1:0] LSUWriteDataM; // Final write data + logic [P.LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data + logic [P.LLEN-1:0] LSUWriteDataM; // Final write data logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write logic [1:0] MemRWSpillM; @@ -166,7 +166,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .FpLoadStoreM, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, - .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .LSUWriteDataSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -335,7 +335,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .CacheRW(CacheRWM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), - .CacheWriteData(LSUWriteDataM), .SelHPTW, + .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), @@ -349,7 +349,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), - .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM[P.LLEN-1:0]), + .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusAtomic, .BusCMOZero, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), @@ -424,12 +424,11 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); end + subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks swbytemask #(P.LLEN, P.ZICCLSM_SUPPORTED) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM), .ByteMaskExtended(ByteMaskExtendedM)); @@ -447,7 +446,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if (P.BIGENDIAN_SUPPORTED) begin:endian - endianswapdouble #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv deleted file mode 100644 index 728a4f4aa..000000000 --- a/src/lsu/subwordwritedouble.sv +++ /dev/null @@ -1,117 +0,0 @@ -/////////////////////////////////////////// -// subwordwrite.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Masking and muxing for subword writes -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordwritedouble #(parameter LLEN) ( - input logic [2:0] LSUFunct3M, - input logic [2:0] PAdrM, - input logic FpLoadStoreM, - input logic BigEndianM, - input logic [LLEN-1:0] IMAFWriteDataM, - output logic [LLEN*2-1:0] LittleEndianWriteDataM -); - - // *** RT: This is logic is duplicated in subwordreaddouble. Merge the two. - logic [4:0] PAdrSwap; - logic [4:0] BigEndianPAdr; - logic [4:0] LengthM; - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; - /* verilator lint_off WIDTHEXPAND */ - /* verilator lint_off WIDTHTRUNC */ - assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; - /* verilator lint_on WIDTHTRUNC */ - /* verilator lint_on WIDTHEXPAND */ - - always_comb - case(LSUFunct3M & {FpLoadStoreM, 2'b11}) - 3'b000: LengthM = 5'd1; - 3'b001: LengthM = 5'd2; - 3'b010: LengthM = 5'd4; - 3'b011: LengthM = 5'd8; - 3'b100: LengthM = 5'd16; - default: LengthM = 5'd8; - endcase // case (LSUFunct3M & {FpLoadStoreM, 2'b11}) - - // *** RT: End duplicated logic - - logic [LLEN*2-1:0] IMAFWriteData2M; - assign IMAFWriteData2M = {IMAFWriteDataM, IMAFWriteDataM}; - localparam OffsetIndex = $clog2(LLEN/8); - logic [LLEN*2-1:0] LittleEndianWriteDataMTemp; - // *** RT: Switch to something like this. - assign LittleEndianWriteDataMTemp = (IMAFWriteData2M << PAdrSwap[OffsetIndex-1:0]) | (IMAFWriteData2M >> ~PAdrSwap[OffsetIndex-1:0]); - - - // Replicate data for subword writes - if (LLEN == 128) begin:sww - always_comb - case(PAdrSwap[3:0]) - 4'b0000: LittleEndianWriteDataM = {128'b0, IMAFWriteDataM }; - 4'b0001: LittleEndianWriteDataM = {120'b0, IMAFWriteDataM, 8'b0 }; - 4'b0010: LittleEndianWriteDataM = {112'b0, IMAFWriteDataM, 16'b0}; - 4'b0011: LittleEndianWriteDataM = {104'b0, IMAFWriteDataM, 24'b0}; - 4'b0100: LittleEndianWriteDataM = {96'b0, IMAFWriteDataM, 32'b0}; - 4'b0101: LittleEndianWriteDataM = {88'b0, IMAFWriteDataM, 40'b0}; - 4'b0110: LittleEndianWriteDataM = {80'b0, IMAFWriteDataM, 48'b0}; - 4'b0111: LittleEndianWriteDataM = {72'b0, IMAFWriteDataM, 56'b0}; - 4'b1000: LittleEndianWriteDataM = {64'b0, IMAFWriteDataM, 64'b0}; - 4'b1001: LittleEndianWriteDataM = {56'b0, IMAFWriteDataM, 72'b0 }; - 4'b1010: LittleEndianWriteDataM = {48'b0, IMAFWriteDataM, 80'b0}; - 4'b1011: LittleEndianWriteDataM = {40'b0, IMAFWriteDataM, 88'b0}; - 4'b1100: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM, 96'b0}; - 4'b1101: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 104'b0}; - 4'b1110: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 112'b0}; - 4'b1111: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 120'b0}; - default: LittleEndianWriteDataM = IMAFWriteDataM; // sq - endcase - end else if (LLEN == 64) begin:sww - always_comb - case(PAdrSwap[2:0]) - 3'b000: LittleEndianWriteDataM = {IMAFWriteDataM, IMAFWriteDataM}; - 3'b001: LittleEndianWriteDataM = {IMAFWriteDataM[55:0], IMAFWriteDataM, IMAFWriteDataM[63:56]}; - 3'b010: LittleEndianWriteDataM = {IMAFWriteDataM[47:0], IMAFWriteDataM, IMAFWriteDataM[63:48]}; - 3'b011: LittleEndianWriteDataM = {IMAFWriteDataM[39:0], IMAFWriteDataM, IMAFWriteDataM[63:40]}; - 3'b100: LittleEndianWriteDataM = {IMAFWriteDataM[31:0], IMAFWriteDataM, IMAFWriteDataM[63:32]}; - 3'b101: LittleEndianWriteDataM = {IMAFWriteDataM[23:0], IMAFWriteDataM, IMAFWriteDataM[63:24]}; - 3'b110: LittleEndianWriteDataM = {IMAFWriteDataM[15:0], IMAFWriteDataM, IMAFWriteDataM[63:16]}; - 3'b111: LittleEndianWriteDataM = {IMAFWriteDataM[7:0], IMAFWriteDataM, IMAFWriteDataM[63:8] }; - endcase - end else begin:sww // 32-bit - always_comb - case(PAdrSwap[1:0]) - 2'b00: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM }; - 2'b01: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 8'b0 }; - 2'b10: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 16'b0}; - 2'b11: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 24'b0}; - default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen - endcase - end -endmodule From 2f94be5e796e7acb97e3c6b44eea4e7ca8ea77a6 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:19:17 -0600 Subject: [PATCH 39/40] Revert "Optimized the align logic for loads." This reverts commit 1fd678b43348fd447a7701c4d8fec968593f9ec6. --- src/lsu/align.sv | 6 +- src/lsu/endianswapdouble.sv | 114 -------------------- src/lsu/lsu.sv | 29 ++---- src/lsu/subwordreaddouble.sv | 196 ----------------------------------- 4 files changed, 14 insertions(+), 331 deletions(-) delete mode 100644 src/lsu/endianswapdouble.sv delete mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv index fa10916f9..33c7471a3 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -53,7 +53,7 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillE, // The next PCF for one of the two memory addresses of the spill output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline - output logic [P.LLEN*2-1:0] ReadDataWordSpillAllM, + output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction output logic SpillStallM); localparam LLENINBYTES = P.LLEN/8; @@ -67,6 +67,8 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic MisalignedM; + logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; + logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; logic [P.XLEN-1:0] IEUAdrIncrementM; @@ -146,6 +148,8 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate + assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; + assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs diff --git a/src/lsu/endianswapdouble.sv b/src/lsu/endianswapdouble.sv deleted file mode 100644 index 133149f0e..000000000 --- a/src/lsu/endianswapdouble.sv +++ /dev/null @@ -1,114 +0,0 @@ -/////////////////////////////////////////// -// endianswap.sv -// -// Written: David_Harris@hmc.edu -// Created: 7 May 2022 -// Modified: 18 January 2023 -// -// Purpose: Swap byte order for Big-Endian accesses -// -// Documentation: RISC-V System on Chip Design Chapter 5 (Figure 5.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module endianswapdouble #(parameter LEN) ( - input logic BigEndianM, - input logic [LEN-1:0] a, - output logic [LEN-1:0] y -); - - if(LEN == 256) begin - always_comb - if (BigEndianM) begin // swap endianness - y[255:248] = a[7:0]; - y[247:240] = a[15:8]; - y[239:232] = a[23:16]; - y[231:224] = a[31:24]; - y[223:216] = a[39:32]; - y[215:208] = a[47:40]; - y[207:200] = a[55:48]; - y[199:192] = a[63:56]; - y[191:184] = a[71:64]; - y[183:176] = a[79:72]; - y[175:168] = a[87:80]; - y[167:160] = a[95:88]; - y[159:152] = a[103:96]; - y[151:144] = a[111:104]; - y[143:136] = a[119:112]; - y[135:128] = a[127:120]; - y[127:120] = a[135:128]; - y[119:112] = a[142:136]; - y[111:104] = a[152:144]; - y[103:96] = a[159:152]; - y[95:88] = a[167:160]; - y[87:80] = a[175:168]; - y[79:72] = a[183:176]; - y[71:64] = a[191:184]; - y[63:56] = a[199:192]; - y[55:48] = a[207:200]; - y[47:40] = a[215:208]; - y[39:32] = a[223:216]; - y[31:24] = a[231:224]; - y[23:16] = a[239:232]; - y[15:8] = a[247:240]; - y[7:0] = a[255:248]; - end else y = a; - end else if(LEN == 128) begin - always_comb - if (BigEndianM) begin // swap endianness - y[127:120] = a[7:0]; - y[119:112] = a[15:8]; - y[111:104] = a[23:16]; - y[103:96] = a[31:24]; - y[95:88] = a[39:32]; - y[87:80] = a[47:40]; - y[79:72] = a[55:48]; - y[71:64] = a[63:56]; - y[63:56] = a[71:64]; - y[55:48] = a[79:72]; - y[47:40] = a[87:80]; - y[39:32] = a[95:88]; - y[31:24] = a[103:96]; - y[23:16] = a[111:104]; - y[15:8] = a[119:112]; - y[7:0] = a[127:120]; - end else y = a; - end else if(LEN == 64) begin - always_comb - if (BigEndianM) begin // swap endianness - y[63:56] = a[7:0]; - y[55:48] = a[15:8]; - y[47:40] = a[23:16]; - y[39:32] = a[31:24]; - y[31:24] = a[39:32]; - y[23:16] = a[47:40]; - y[15:8] = a[55:48]; - y[7:0] = a[63:56]; - end else y = a; - end else begin - always_comb - if (BigEndianM) begin - y[31:24] = a[7:0]; - y[23:16] = a[15:8]; - y[15:8] = a[23:16]; - y[7:0] = a[31:24]; - end else y = a; - end -endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 9c7f49684..ac4edcd19 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -128,8 +128,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ - logic [MLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data - logic [MLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data + logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data + logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data + logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection logic [P.LLEN-1:0] ReadDataM; // Final read data @@ -153,7 +154,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic SelDTIM; // Select DTIM rather than bus or D$ logic [P.XLEN-1:0] WriteDataZM; logic LSULoadPageFaultM, LSUStoreAmoPageFaultM; - logic [MLEN-1:0] ReadDataWordSpillAllM; ///////////////////////////////////////////////////////////////////////////////////////////// // Pipeline for IEUAdr E to M @@ -167,14 +167,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM); + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; end else begin : no_ziccslm_align assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtE = {2'b00, IEUAdrE}; assign SelSpillE = '0; - assign ReadDataWordSpillAllM = DCacheReadDataWordM; + assign DCacheReadDataWordSpillM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; @@ -296,7 +296,6 @@ module lsu import cvw::*; #(parameter cvw_t P) ( localparam AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^ localparam LINELEN = P.DCACHE_LINELENINBITS; // Number of bits in cacheline localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) - localparam MLENPOVERAHBW = MLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) localparam CACHEWORDLEN = P.ZICCLSM_SUPPORTED ? 2*P.LLEN : P.LLEN; // Width of the cache's input and output data buses. Misaligned doubles width for fast access logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline @@ -360,14 +359,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. // pma should generate exception for LLEN read to periph. -/* -----\/----- EXCLUDED -----\/----- mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordSpillM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), .d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); - -----/\----- EXCLUDED -----/\----- */ - mux3 #(MLEN) UnCachedDataMux(.d0(ReadDataWordSpillAllM), .d1({MLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), - .d2({{(MLEN-P.XLEN){1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), - .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface logic [1:0] BusRW; // Non-DTIM memory access, ignore cacheableM logic [P.XLEN-1:0] FetchBuffer; @@ -420,14 +414,9 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// - - if(MISALIGN_SUPPORT) begin - subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - end else begin - subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); - end + + subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks @@ -447,7 +436,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if (P.BIGENDIAN_SUPPORTED) begin:endian endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + endianswap #(P.LLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv deleted file mode 100644 index 936240cf7..000000000 --- a/src/lsu/subwordreaddouble.sv +++ /dev/null @@ -1,196 +0,0 @@ -/////////////////////////////////////////// -// subwordread.sv -// -// Written: David_Harris@hmc.edu -// Created: 9 January 2021 -// Modified: 18 January 2023 -// -// Purpose: Extract subwords and sign extend for reads -// -// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) -// -// A component of the CORE-V-WALLY configurable RISC-V project. -// https://github.com/openhwgroup/cvw -// -// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 -// -// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file -// except in compliance with the License, or, at your option, the Apache License version 2.0. You -// may obtain a copy of the License at -// -// https://solderpad.org/licenses/SHL-2.1/ -// -// Unless required by applicable law or agreed to in writing, any work distributed under the -// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. -//////////////////////////////////////////////////////////////////////////////////////////////// - -module subwordreaddouble #(parameter LLEN) - ( - input logic [LLEN*2-1:0] ReadDataWordMuxM, - input logic [2:0] PAdrM, - input logic [2:0] Funct3M, - input logic FpLoadStoreM, - input logic BigEndianM, - output logic [LLEN-1:0] ReadDataM -); - - logic [7:0] ByteM; - logic [15:0] HalfwordM; - logic [4:0] PAdrSwap; - logic [4:0] BigEndianPAdr; - logic [4:0] LengthM; - - // Funct3M[2] is the unsigned bit. mask upper bits. - // Funct3M[1:0] is the size of the memory access. - assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; - /* verilator lint_off WIDTHEXPAND */ - /* verilator lint_off WIDTHTRUNC */ - assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; - /* verilator lint_on WIDTHTRUNC */ - /* verilator lint_on WIDTHEXPAND */ - - always_comb - case(Funct3M & {FpLoadStoreM, 2'b11}) - 3'b000: LengthM = 5'd1; - 3'b001: LengthM = 5'd2; - 3'b010: LengthM = 5'd4; - 3'b011: LengthM = 5'd8; - 3'b100: LengthM = 5'd16; - default: LengthM = 5'd8; - endcase - - if (LLEN == 128) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - logic [63:0] QdWordM; - always_comb - case(PAdrSwap) - 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; - 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; - 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; - 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; - 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; - 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; - 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; - 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; - 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; - 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; - 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; - 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; - 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; - 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; - 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; - 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; - 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; - 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; - 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; - 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; - 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; - 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; - 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; - 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; - 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; - 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; - 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; - 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; - 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; - 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; - 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; - 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; - endcase - - assign ByteM = QdWordM[7:0]; - assign HalfwordM = QdWordM[15:0]; - assign WordM = QdWordM[31:0]; - assign DblWordM = QdWordM[63:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else if (LLEN == 64) begin:swrmux - logic [31:0] WordM; - logic [63:0] DblWordM; - always_comb - case(PAdrSwap[3:0]) - 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; - 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; - 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; - 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; - 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; - 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; - 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; - 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; - 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; - 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; - 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; - 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; - 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; - 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; - 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; - 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; - endcase - - assign ByteM = DblWordM[7:0]; - assign HalfwordM = DblWordM[15:0]; - assign WordM = DblWordM[31:0]; - - // sign extension/ NaN boxing - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu - default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen - endcase - - end else begin:swrmux // 32-bit - - logic [31:0] WordM; - always_comb - case(PAdrSwap[2:0]) - 3'b000: WordM = ReadDataWordMuxM[31:0]; - 3'b001: WordM = ReadDataWordMuxM[39:8]; - 3'b010: WordM = ReadDataWordMuxM[47:16]; - 3'b011: WordM = ReadDataWordMuxM[55:24]; - 3'b100: WordM = ReadDataWordMuxM[63:32]; - 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; - 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; - 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; - endcase - - assign ByteM = WordM[7:0]; - assign HalfwordM = WordM[15:0]; - - // sign extension - always_comb - case(Funct3M) - 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld - 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu - 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu - default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen - endcase - end -endmodule From 54c1d28c8ba71b20d941a1560a0634ce05428f31 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 6 Mar 2024 15:43:55 -0600 Subject: [PATCH 40/40] Fixed missing case in the align AccesByteOffset Mux. --- src/lsu/align.sv | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index e8cedec6c..d4603941b 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -98,10 +98,11 @@ module align import cvw::*; #(parameter cvw_t P) ( 3'b000: AccessByteOffsetM = 0; // byte access 3'b001: AccessByteOffsetM = {{OFFSET_LEN-1{1'b0}}, IEUAdrM[0]}; // half access 3'b010: AccessByteOffsetM = {{OFFSET_LEN-2{1'b0}}, IEUAdrM[1:0]}; // word access - 3'b011: AccessByteOffsetM = {{OFFSET_LEN-3{1'b0}}, IEUAdrM[2:0]}; // double access + 3'b011: if(P.LLEN >= 64) AccessByteOffsetM = {{OFFSET_LEN-3{1'b0}}, IEUAdrM[2:0]}; // double access + else AccessByteOffsetM = 0; // shouldn't happen 3'b100: if(P.LLEN == 128) AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; // quad access - else AccessByteOffsetM = 0; // invalid - default: AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; + else AccessByteOffsetM = IEUAdrM[OFFSET_LEN-1:0]; + default: AccessByteOffsetM = 0; // shouldn't happen endcase case (Funct3M[1:0]) 2'b00: PotentialSpillM = 0; // byte access