From 1fd678b43348fd447a7701c4d8fec968593f9ec6 Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Wed, 14 Feb 2024 12:14:19 -0600 Subject: [PATCH] Optimized the align logic for loads. --- src/lsu/align.sv | 6 +- src/lsu/endianswapdouble.sv | 114 ++++++++++++++++++++ src/lsu/lsu.sv | 29 ++++-- src/lsu/subwordreaddouble.sv | 196 +++++++++++++++++++++++++++++++++++ 4 files changed, 331 insertions(+), 14 deletions(-) create mode 100644 src/lsu/endianswapdouble.sv create mode 100644 src/lsu/subwordreaddouble.sv diff --git a/src/lsu/align.sv b/src/lsu/align.sv index d516dad2a..7c3703886 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -53,7 +53,7 @@ module align import cvw::*; #(parameter cvw_t P) ( output logic [P.XLEN-1:0] IEUAdrSpillM, // IEUAdrM for one of the two memory addresses of the spill output logic SelSpillE, // During the transition between the two spill operations, the IFU should stall the pipeline output logic SelStoreDelay, //*** this is bad. really don't like moving this outside - output logic [P.LLEN-1:0] DCacheReadDataWordSpillM, // The final 32 bit instruction after merging the two spilled fetches into 1 instruction + output logic [P.LLEN*2-1:0] ReadDataWordSpillAllM, output logic SpillStallM); localparam LLENINBYTES = P.LLEN/8; @@ -67,8 +67,6 @@ module align import cvw::*; #(parameter cvw_t P) ( logic SpillSaveM; logic [P.LLEN-1:0] ReadDataWordFirstHalfM; logic MisalignedM; - logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; - logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; logic [P.XLEN-1:0] IEUAdrIncrementM; @@ -148,8 +146,6 @@ module align import cvw::*; #(parameter cvw_t P) ( // shifter (4:1 mux for 32 bit, 8:1 mux for 64 bit) // 8 * is for shifting by bytes not bits assign ShiftAmount = SelHPTW ? '0 : {AccessByteOffsetM, 3'b0}; // AND gate - assign ReadDataWordSpillShiftedM = ReadDataWordSpillAllM >> ShiftAmount; - assign DCacheReadDataWordSpillM = ReadDataWordSpillShiftedM[P.LLEN-1:0]; // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs diff --git a/src/lsu/endianswapdouble.sv b/src/lsu/endianswapdouble.sv new file mode 100644 index 000000000..133149f0e --- /dev/null +++ b/src/lsu/endianswapdouble.sv @@ -0,0 +1,114 @@ +/////////////////////////////////////////// +// endianswap.sv +// +// Written: David_Harris@hmc.edu +// Created: 7 May 2022 +// Modified: 18 January 2023 +// +// Purpose: Swap byte order for Big-Endian accesses +// +// Documentation: RISC-V System on Chip Design Chapter 5 (Figure 5.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module endianswapdouble #(parameter LEN) ( + input logic BigEndianM, + input logic [LEN-1:0] a, + output logic [LEN-1:0] y +); + + if(LEN == 256) begin + always_comb + if (BigEndianM) begin // swap endianness + y[255:248] = a[7:0]; + y[247:240] = a[15:8]; + y[239:232] = a[23:16]; + y[231:224] = a[31:24]; + y[223:216] = a[39:32]; + y[215:208] = a[47:40]; + y[207:200] = a[55:48]; + y[199:192] = a[63:56]; + y[191:184] = a[71:64]; + y[183:176] = a[79:72]; + y[175:168] = a[87:80]; + y[167:160] = a[95:88]; + y[159:152] = a[103:96]; + y[151:144] = a[111:104]; + y[143:136] = a[119:112]; + y[135:128] = a[127:120]; + y[127:120] = a[135:128]; + y[119:112] = a[142:136]; + y[111:104] = a[152:144]; + y[103:96] = a[159:152]; + y[95:88] = a[167:160]; + y[87:80] = a[175:168]; + y[79:72] = a[183:176]; + y[71:64] = a[191:184]; + y[63:56] = a[199:192]; + y[55:48] = a[207:200]; + y[47:40] = a[215:208]; + y[39:32] = a[223:216]; + y[31:24] = a[231:224]; + y[23:16] = a[239:232]; + y[15:8] = a[247:240]; + y[7:0] = a[255:248]; + end else y = a; + end else if(LEN == 128) begin + always_comb + if (BigEndianM) begin // swap endianness + y[127:120] = a[7:0]; + y[119:112] = a[15:8]; + y[111:104] = a[23:16]; + y[103:96] = a[31:24]; + y[95:88] = a[39:32]; + y[87:80] = a[47:40]; + y[79:72] = a[55:48]; + y[71:64] = a[63:56]; + y[63:56] = a[71:64]; + y[55:48] = a[79:72]; + y[47:40] = a[87:80]; + y[39:32] = a[95:88]; + y[31:24] = a[103:96]; + y[23:16] = a[111:104]; + y[15:8] = a[119:112]; + y[7:0] = a[127:120]; + end else y = a; + end else if(LEN == 64) begin + always_comb + if (BigEndianM) begin // swap endianness + y[63:56] = a[7:0]; + y[55:48] = a[15:8]; + y[47:40] = a[23:16]; + y[39:32] = a[31:24]; + y[31:24] = a[39:32]; + y[23:16] = a[47:40]; + y[15:8] = a[55:48]; + y[7:0] = a[63:56]; + end else y = a; + end else begin + always_comb + if (BigEndianM) begin + y[31:24] = a[7:0]; + y[23:16] = a[15:8]; + y[15:8] = a[23:16]; + y[7:0] = a[31:24]; + end else y = a; + end +endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index f53bb9296..591353ac7 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -128,9 +128,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [MLEN-1:0] LSUWriteDataSpillM; // Final write data logic [MLEN/8-1:0] ByteMaskSpillM; // Selects which bytes within a word to write /* verilator lint_on WIDTHEXPAND */ - logic [P.LLEN-1:0] DCacheReadDataWordSpillM; // D$ read data - logic [P.LLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data - logic [P.LLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data + logic [MLEN-1:0] ReadDataWordMuxM; // DTIM or D$ read data + logic [MLEN-1:0] LittleEndianReadDataWordM; // Endian-swapped read data logic [P.LLEN-1:0] ReadDataWordM; // Read data before subword selection logic [P.LLEN-1:0] ReadDataM; // Final read data @@ -155,6 +154,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic SelDTIM; // Select DTIM rather than bus or D$ logic [P.XLEN-1:0] WriteDataZM; logic LSULoadPageFaultM, LSUStoreAmoPageFaultM; + logic [MLEN-1:0] ReadDataWordSpillAllM; ///////////////////////////////////////////////////////////////////////////////////////////// // Pipeline for IEUAdr E to M @@ -168,7 +168,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, - .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .DCacheReadDataWordSpillM, .SpillStallM, + .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; assign IEUAdrExtE = {2'b00, IEUAdrSpillE}; @@ -176,7 +176,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( assign IEUAdrExtM = {2'b00, IEUAdrM}; assign IEUAdrExtE = {2'b00, IEUAdrE}; assign SelSpillE = '0; - assign DCacheReadDataWordSpillM = DCacheReadDataWordM; + assign ReadDataWordSpillAllM = DCacheReadDataWordM; assign ByteMaskSpillM = ByteMaskM; assign LSUWriteDataSpillM = LSUWriteDataM; assign MemRWSpillM = MemRWM; @@ -298,6 +298,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( localparam AHBWLOGBWPL = $clog2(BEATSPERLINE); // Log2 of ^ localparam LINELEN = P.DCACHE_LINELENINBITS; // Number of bits in cacheline localparam LLENPOVERAHBW = P.LLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) + localparam MLENPOVERAHBW = MLEN / P.AHBW; // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) localparam CACHEWORDLEN = P.ZICCLSM_SUPPORTED ? 2*P.LLEN : P.LLEN; // Width of the cache's input and output data buses. Misaligned doubles width for fast access logic [LINELEN-1:0] FetchBuffer; // Temporary buffer to hold partially fetched cacheline @@ -361,9 +362,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( // Uncache bus access may be smaller width than LLEN. Duplicate LLENPOVERAHBW times. // *** DTIMReadDataWordM should be increased to LLEN. // pma should generate exception for LLEN read to periph. +/* -----\/----- EXCLUDED -----\/----- mux3 #(P.LLEN) UnCachedDataMux(.d0(DCacheReadDataWordSpillM), .d1({LLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), .d2({{P.LLEN-P.XLEN{1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); + -----/\----- EXCLUDED -----/\----- */ + mux3 #(MLEN) UnCachedDataMux(.d0(ReadDataWordSpillAllM), .d1({MLENPOVERAHBW{FetchBuffer[P.XLEN-1:0]}}), + .d2({{(MLEN-P.XLEN){1'b0}}, DTIMReadDataWordM[P.XLEN-1:0]}), + .s({SelDTIM, ~(CacheableOrFlushCacheM)}), .y(ReadDataWordMuxM)); end else begin : passthrough // No Cache, use simple ahbinterface instad of ahbcacheinterface logic [1:0] BusRW; // Non-DTIM memory access, ignore cacheableM logic [P.XLEN-1:0] FetchBuffer; @@ -416,9 +422,14 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// - - subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, - .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + + if(MISALIGN_SUPPORT) begin + subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + end else begin + subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, + .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + end subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks @@ -438,7 +449,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if (P.BIGENDIAN_SUPPORTED) begin:endian endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); - endianswap #(P.LLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); + endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; assign LittleEndianReadDataWordM = ReadDataWordMuxM; diff --git a/src/lsu/subwordreaddouble.sv b/src/lsu/subwordreaddouble.sv new file mode 100644 index 000000000..936240cf7 --- /dev/null +++ b/src/lsu/subwordreaddouble.sv @@ -0,0 +1,196 @@ +/////////////////////////////////////////// +// subwordread.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Extract subwords and sign extend for reads +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordreaddouble #(parameter LLEN) + ( + input logic [LLEN*2-1:0] ReadDataWordMuxM, + input logic [2:0] PAdrM, + input logic [2:0] Funct3M, + input logic FpLoadStoreM, + input logic BigEndianM, + output logic [LLEN-1:0] ReadDataM +); + + logic [7:0] ByteM; + logic [15:0] HalfwordM; + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(Funct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase + + if (LLEN == 128) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + logic [63:0] QdWordM; + always_comb + case(PAdrSwap) + 5'b00000: QdWordM = ReadDataWordMuxM[127:0]; + 5'b00001: QdWordM = ReadDataWordMuxM[135:8]; + 5'b00010: QdWordM = ReadDataWordMuxM[143:16]; + 5'b00011: QdWordM = ReadDataWordMuxM[151:24]; + 5'b00100: QdWordM = ReadDataWordMuxM[159:32]; + 5'b00101: QdWordM = ReadDataWordMuxM[167:40]; + 5'b00110: QdWordM = ReadDataWordMuxM[175:48]; + 5'b00111: QdWordM = ReadDataWordMuxM[183:56]; + 5'b01000: QdWordM = ReadDataWordMuxM[191:64]; + 5'b01001: QdWordM = ReadDataWordMuxM[199:72]; + 5'b01010: QdWordM = ReadDataWordMuxM[207:80]; + 5'b01011: QdWordM = ReadDataWordMuxM[215:88]; + 5'b01100: QdWordM = ReadDataWordMuxM[223:96]; + 5'b01101: QdWordM = ReadDataWordMuxM[231:104]; + 5'b01110: QdWordM = ReadDataWordMuxM[239:112]; + 5'b01111: QdWordM = ReadDataWordMuxM[247:120]; + 5'b10000: QdWordM = ReadDataWordMuxM[255:128]; + 5'b10001: QdWordM = {8'b0, ReadDataWordMuxM[255:136]}; + 5'b10010: QdWordM = {16'b0, ReadDataWordMuxM[255:144]}; + 5'b10011: QdWordM = {24'b0, ReadDataWordMuxM[255:152]}; + 5'b10100: QdWordM = {32'b0, ReadDataWordMuxM[255:160]}; + 5'b10101: QdWordM = {40'b0, ReadDataWordMuxM[255:168]}; + 5'b10110: QdWordM = {48'b0, ReadDataWordMuxM[255:176]}; + 5'b10111: QdWordM = {56'b0, ReadDataWordMuxM[255:184]}; + 5'b11000: QdWordM = {64'b0, ReadDataWordMuxM[255:192]}; + 5'b11001: QdWordM = {72'b0, ReadDataWordMuxM[255:200]}; + 5'b11010: QdWordM = {80'b0, ReadDataWordMuxM[255:208]}; + 5'b11011: QdWordM = {88'b0, ReadDataWordMuxM[255:216]}; + 5'b11100: QdWordM = {96'b0, ReadDataWordMuxM[255:224]}; + 5'b11101: QdWordM = {104'b0, ReadDataWordMuxM[255:232]}; + 5'b11110: QdWordM = {112'b0, ReadDataWordMuxM[255:240]}; + 5'b11111: QdWordM = {120'b0, ReadDataWordMuxM[255:248]}; + endcase + + assign ByteM = QdWordM[7:0]; + assign HalfwordM = QdWordM[15:0]; + assign WordM = QdWordM[31:0]; + assign DblWordM = QdWordM[63:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else if (LLEN == 64) begin:swrmux + logic [31:0] WordM; + logic [63:0] DblWordM; + always_comb + case(PAdrSwap[3:0]) + 4'b0000: DblWordM = ReadDataWordMuxM[63:0]; + 4'b0001: DblWordM = ReadDataWordMuxM[71:8]; + 4'b0010: DblWordM = ReadDataWordMuxM[79:16]; + 4'b0011: DblWordM = ReadDataWordMuxM[87:24]; + 4'b0100: DblWordM = ReadDataWordMuxM[95:32]; + 4'b0101: DblWordM = ReadDataWordMuxM[103:40]; + 4'b0110: DblWordM = ReadDataWordMuxM[111:48]; + 4'b0111: DblWordM = ReadDataWordMuxM[119:56]; + 4'b1000: DblWordM = ReadDataWordMuxM[127:64]; + 4'b1001: DblWordM = {8'b0, ReadDataWordMuxM[127:72]}; + 4'b1010: DblWordM = {16'b0, ReadDataWordMuxM[127:80]}; + 4'b1011: DblWordM = {24'b0, ReadDataWordMuxM[127:88]}; + 4'b1100: DblWordM = {32'b0, ReadDataWordMuxM[127:96]}; + 4'b1101: DblWordM = {40'b0, ReadDataWordMuxM[127:104]}; + 4'b1110: DblWordM = {48'b0, ReadDataWordMuxM[127:112]}; + 4'b1111: DblWordM = {56'b0, ReadDataWordMuxM[127:120]}; + endcase + + assign ByteM = DblWordM[7:0]; + assign HalfwordM = DblWordM[15:0]; + assign WordM = DblWordM[31:0]; + + // sign extension/ NaN boxing + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + //3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - only needed when LLEN=128 + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + 3'b110: ReadDataM = {{LLEN-32{1'b0}}, WordM[31:0]}; // lwu + default: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // Shouldn't happen + endcase + + end else begin:swrmux // 32-bit + + logic [31:0] WordM; + always_comb + case(PAdrSwap[2:0]) + 3'b000: WordM = ReadDataWordMuxM[31:0]; + 3'b001: WordM = ReadDataWordMuxM[39:8]; + 3'b010: WordM = ReadDataWordMuxM[47:16]; + 3'b011: WordM = ReadDataWordMuxM[55:24]; + 3'b100: WordM = ReadDataWordMuxM[63:32]; + 3'b101: WordM = {8'b0, ReadDataWordMuxM[63:40]}; + 3'b110: WordM = {16'b0, ReadDataWordMuxM[63:48]}; + 3'b111: WordM = {24'b0, ReadDataWordMuxM[63:56]}; + endcase + + assign ByteM = WordM[7:0]; + assign HalfwordM = WordM[15:0]; + + // sign extension + always_comb + case(Funct3M) + 3'b000: ReadDataM = {{LLEN-8{ByteM[7]}}, ByteM}; // lb + 3'b001: ReadDataM = {{LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw + 3'b011: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // fld + 3'b100: ReadDataM = {{LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b101: ReadDataM = {{LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu + default: ReadDataM = ReadDataWordMuxM[LLEN-1:0]; // Shouldn't happen + endcase + end +endmodule