From dac8fc16af30dcd1182c9f7f4d69383dfde042fe Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Mon, 19 Feb 2024 12:26:29 -0600 Subject: [PATCH] Partially working optimized subwordwrite for misaligned. --- src/lsu/lsu.sv | 15 +++-- src/lsu/subwordwritedouble.sv | 117 ++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 src/lsu/subwordwritedouble.sv diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index 591353ac7..e10183a9e 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -136,8 +136,8 @@ module lsu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] IHWriteDataM; // IEU or HPTW write data logic [P.XLEN-1:0] IMAWriteDataM; // IEU, HPTW, or AMO write data logic [P.LLEN-1:0] IMAFWriteDataM; // IEU, HPTW, AMO, or FPU write data - logic [P.LLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data - logic [P.LLEN-1:0] LSUWriteDataM; // Final write data + logic [MLEN-1:0] LittleEndianWriteDataM; // Ending-swapped write data + logic [MLEN-1:0] LSUWriteDataM; // Final write data logic [(P.LLEN-1)/8:0] ByteMaskM; // Selects which bytes within a word to write logic [(P.LLEN-1)/8:0] ByteMaskExtendedM; // Selects which bytes within a word to write logic [1:0] MemRWSpillM; @@ -167,7 +167,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( align #(P) align(.clk, .reset, .StallM, .FlushM, .IEUAdrE, .IEUAdrM, .Funct3M, .MemRWM, .DCacheReadDataWordM, .CacheBusHPWTStall, .SelHPTW, - .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM, .ByteMaskSpillM, .LSUWriteDataSpillM, + .ByteMaskM, .ByteMaskExtendedM, .LSUWriteDataM(LSUWriteDataM[P.LLEN-1:0]), .ByteMaskSpillM, .LSUWriteDataSpillM, .IEUAdrSpillE, .IEUAdrSpillM, .SelSpillE, .ReadDataWordSpillAllM, .SpillStallM, .SelStoreDelay); assign IEUAdrExtM = {2'b00, IEUAdrSpillM}; @@ -337,7 +337,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .CacheRW(SelStoreDelay ? 2'b00 : CacheRWM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), - .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, + .CacheWriteData(LSUWriteDataM), .SelHPTW, .CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), @@ -351,7 +351,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( .HCLK(clk), .HRESETn(~reset), .Flush(FlushW | IgnoreRequestTLB), .HRDATA, .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), - .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM), + .BeatCount, .SelBusBeat, .CacheReadDataWordM(DCacheReadDataWordM[P.LLEN-1:0]), .WriteDataM(LSUWriteDataM[P.LLEN-1:0]), .Funct3(LSUFunct3M), .HADDR(LSUHADDR), .CacheBusAdr(DCacheBusAdr), .CacheBusRW, .BusAtomic, .BusCMOZero, .CacheableOrFlushCacheM, .CacheBusAck(DCacheBusAck), .FetchBuffer, .PAdr(PAdrM), .Cacheable(CacheableOrFlushCacheM), .BusRW, .Stall(GatedStallW), @@ -426,11 +426,12 @@ module lsu import cvw::*; #(parameter cvw_t P) ( if(MISALIGN_SUPPORT) begin subwordreaddouble #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + subwordwritedouble #(P.LLEN) subwordwrite(.LSUFunct3M, .PAdrM(PAdrM[2:0]), .FpLoadStoreM, .BigEndianM, .IMAFWriteDataM, .LittleEndianWriteDataM); end else begin subwordread #(P.LLEN) subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); + subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); end - subwordwrite #(P.LLEN) subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); // Compute byte masks swbytemask #(P.LLEN, P.ZICCLSM_SUPPORTED) swbytemask(.Size(LSUFunct3M), .Adr(PAdrM[$clog2(P.LLEN/8)-1:0]), .ByteMask(ByteMaskM), .ByteMaskExtended(ByteMaskExtendedM)); @@ -448,7 +449,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( ///////////////////////////////////////////////////////////////////////////////////////////// if (P.BIGENDIAN_SUPPORTED) begin:endian - endianswap #(P.LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); + endianswapdouble #(MLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); endianswapdouble #(MLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); end else begin assign LSUWriteDataM = LittleEndianWriteDataM; diff --git a/src/lsu/subwordwritedouble.sv b/src/lsu/subwordwritedouble.sv new file mode 100644 index 000000000..728a4f4aa --- /dev/null +++ b/src/lsu/subwordwritedouble.sv @@ -0,0 +1,117 @@ +/////////////////////////////////////////// +// subwordwrite.sv +// +// Written: David_Harris@hmc.edu +// Created: 9 January 2021 +// Modified: 18 January 2023 +// +// Purpose: Masking and muxing for subword writes +// +// Documentation: RISC-V System on Chip Design Chapter 4 (Figure 4.9) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module subwordwritedouble #(parameter LLEN) ( + input logic [2:0] LSUFunct3M, + input logic [2:0] PAdrM, + input logic FpLoadStoreM, + input logic BigEndianM, + input logic [LLEN-1:0] IMAFWriteDataM, + output logic [LLEN*2-1:0] LittleEndianWriteDataM +); + + // *** RT: This is logic is duplicated in subwordreaddouble. Merge the two. + logic [4:0] PAdrSwap; + logic [4:0] BigEndianPAdr; + logic [4:0] LengthM; + // Funct3M[2] is the unsigned bit. mask upper bits. + // Funct3M[1:0] is the size of the memory access. + assign PAdrSwap = BigEndianM ? BigEndianPAdr : {2'b0, PAdrM}; + /* verilator lint_off WIDTHEXPAND */ + /* verilator lint_off WIDTHTRUNC */ + assign BigEndianPAdr = (LLEN/4) - PAdrM - LengthM; + /* verilator lint_on WIDTHTRUNC */ + /* verilator lint_on WIDTHEXPAND */ + + always_comb + case(LSUFunct3M & {FpLoadStoreM, 2'b11}) + 3'b000: LengthM = 5'd1; + 3'b001: LengthM = 5'd2; + 3'b010: LengthM = 5'd4; + 3'b011: LengthM = 5'd8; + 3'b100: LengthM = 5'd16; + default: LengthM = 5'd8; + endcase // case (LSUFunct3M & {FpLoadStoreM, 2'b11}) + + // *** RT: End duplicated logic + + logic [LLEN*2-1:0] IMAFWriteData2M; + assign IMAFWriteData2M = {IMAFWriteDataM, IMAFWriteDataM}; + localparam OffsetIndex = $clog2(LLEN/8); + logic [LLEN*2-1:0] LittleEndianWriteDataMTemp; + // *** RT: Switch to something like this. + assign LittleEndianWriteDataMTemp = (IMAFWriteData2M << PAdrSwap[OffsetIndex-1:0]) | (IMAFWriteData2M >> ~PAdrSwap[OffsetIndex-1:0]); + + + // Replicate data for subword writes + if (LLEN == 128) begin:sww + always_comb + case(PAdrSwap[3:0]) + 4'b0000: LittleEndianWriteDataM = {128'b0, IMAFWriteDataM }; + 4'b0001: LittleEndianWriteDataM = {120'b0, IMAFWriteDataM, 8'b0 }; + 4'b0010: LittleEndianWriteDataM = {112'b0, IMAFWriteDataM, 16'b0}; + 4'b0011: LittleEndianWriteDataM = {104'b0, IMAFWriteDataM, 24'b0}; + 4'b0100: LittleEndianWriteDataM = {96'b0, IMAFWriteDataM, 32'b0}; + 4'b0101: LittleEndianWriteDataM = {88'b0, IMAFWriteDataM, 40'b0}; + 4'b0110: LittleEndianWriteDataM = {80'b0, IMAFWriteDataM, 48'b0}; + 4'b0111: LittleEndianWriteDataM = {72'b0, IMAFWriteDataM, 56'b0}; + 4'b1000: LittleEndianWriteDataM = {64'b0, IMAFWriteDataM, 64'b0}; + 4'b1001: LittleEndianWriteDataM = {56'b0, IMAFWriteDataM, 72'b0 }; + 4'b1010: LittleEndianWriteDataM = {48'b0, IMAFWriteDataM, 80'b0}; + 4'b1011: LittleEndianWriteDataM = {40'b0, IMAFWriteDataM, 88'b0}; + 4'b1100: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM, 96'b0}; + 4'b1101: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 104'b0}; + 4'b1110: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 112'b0}; + 4'b1111: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 120'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // sq + endcase + end else if (LLEN == 64) begin:sww + always_comb + case(PAdrSwap[2:0]) + 3'b000: LittleEndianWriteDataM = {IMAFWriteDataM, IMAFWriteDataM}; + 3'b001: LittleEndianWriteDataM = {IMAFWriteDataM[55:0], IMAFWriteDataM, IMAFWriteDataM[63:56]}; + 3'b010: LittleEndianWriteDataM = {IMAFWriteDataM[47:0], IMAFWriteDataM, IMAFWriteDataM[63:48]}; + 3'b011: LittleEndianWriteDataM = {IMAFWriteDataM[39:0], IMAFWriteDataM, IMAFWriteDataM[63:40]}; + 3'b100: LittleEndianWriteDataM = {IMAFWriteDataM[31:0], IMAFWriteDataM, IMAFWriteDataM[63:32]}; + 3'b101: LittleEndianWriteDataM = {IMAFWriteDataM[23:0], IMAFWriteDataM, IMAFWriteDataM[63:24]}; + 3'b110: LittleEndianWriteDataM = {IMAFWriteDataM[15:0], IMAFWriteDataM, IMAFWriteDataM[63:16]}; + 3'b111: LittleEndianWriteDataM = {IMAFWriteDataM[7:0], IMAFWriteDataM, IMAFWriteDataM[63:8] }; + endcase + end else begin:sww // 32-bit + always_comb + case(PAdrSwap[1:0]) + 2'b00: LittleEndianWriteDataM = {32'b0, IMAFWriteDataM }; + 2'b01: LittleEndianWriteDataM = {24'b0, IMAFWriteDataM, 8'b0 }; + 2'b10: LittleEndianWriteDataM = {16'b0, IMAFWriteDataM, 16'b0}; + 2'b11: LittleEndianWriteDataM = {8'b0, IMAFWriteDataM, 24'b0}; + default: LittleEndianWriteDataM = IMAFWriteDataM; // shouldn't happen + endcase + end +endmodule