From b5ecae205604c64cc5150f65a1fde9adec041ddf Mon Sep 17 00:00:00 2001 From: Rose Thompson Date: Tue, 31 Oct 2023 18:50:13 -0500 Subject: [PATCH] Working through issues with the psill logic. --- src/lsu/align.sv | 50 +++++--- src/lsu/lsu.sv | 2 +- ...ALLY-misaligned-access-01.reference_output | 65 ++++++----- .../src/WALLY-misaligned-access-01.S | 110 +++++++++--------- 4 files changed, 124 insertions(+), 103 deletions(-) diff --git a/src/lsu/align.sv b/src/lsu/align.sv index b517dfcdb..3708674aa 100644 --- a/src/lsu/align.sv +++ b/src/lsu/align.sv @@ -68,14 +68,25 @@ module align import cvw::*; #(parameter cvw_t P) ( logic [P.LLEN*2-1:0] ReadDataWordSpillAllM; logic [P.LLEN*2-1:0] ReadDataWordSpillShiftedM; - //////////////////////////////////////////////////////////////////////////////////////////////////// - // PC logic - //////////////////////////////////////////////////////////////////////////////////////////////////// - localparam LLENINBYTES = P.LLEN/8; logic [P.XLEN-1:0] IEUAdrIncrementM; + logic [3:0] IncrementAmount; + + logic [(P.LLEN-1)*2/8:0] ByteMaskSaveM; + logic [(P.LLEN-1)*2/8:0] ByteMaskMuxM; + + always_comb begin + case(MemRWM) + 2'b00: IncrementAmount = 4'd0; + 2'b01: IncrementAmount = 4'd1; + 2'b10: IncrementAmount = 4'd3; + 2'b11: IncrementAmount = 4'd7; + default: IncrementAmount = 4'd7; + endcase + end /* verilator lint_off WIDTHEXPAND */ - assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + //assign IEUAdrIncrementM = IEUAdrM + LLENINBYTES; + assign IEUAdrIncrementM = IEUAdrM + IncrementAmount; /* verilator lint_on WIDTHEXPAND */ mux2 #(P.XLEN) ieuadrspillemux(.d0(IEUAdrE), .d1(IEUAdrIncrementM), .s(SelSpillE), .y(IEUAdrSpillE)); mux2 #(P.XLEN) ieuadrspillmmux(.d0(IEUAdrM), .d1(IEUAdrIncrementM), .s(SelSpillM), .y(IEUAdrSpillM)); @@ -88,15 +99,16 @@ module align import cvw::*; #(parameter cvw_t P) ( // 1) operation size // 2) offset // 3) access location within the cacheline - logic [$clog2(P.DCACHE_LINELENINBITS/8)-1:$clog2(LLENINBYTES)] WordOffsetM; + localparam OFFSET_BIT_POS = $clog2(P.DCACHE_LINELENINBITS/8); + logic [OFFSET_BIT_POS-1:$clog2(LLENINBYTES)] WordOffsetM; logic [$clog2(LLENINBYTES)-1:0] ByteOffsetM; logic HalfSpillM, WordSpillM; - assign {WordOffsetM, ByteOffsetM} = IEUAdrM[$clog2(P.DCACHE_LINELENINBITS/8)-1:0]; - assign HalfSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b01 & ByteOffsetM[0] != 1'b0; - assign WordSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b10 & ByteOffsetM[1:0] != 2'b00; + assign {WordOffsetM, ByteOffsetM} = IEUAdrM[OFFSET_BIT_POS-1:0]; + assign HalfSpillM = (IEUAdrM[OFFSET_BIT_POS-1:0] == '1) & Funct3M[1:0] == 2'b01; + assign WordSpillM = (IEUAdrM[OFFSET_BIT_POS-1:1] == '1) & Funct3M[1:0] == 2'b10; if(P.LLEN == 64) begin logic DoubleSpillM; - assign DoubleSpillM = (WordOffsetM == '1) & Funct3M[1:0] == 2'b11 & ByteOffsetM[2:0] != 3'b00; + assign DoubleSpillM = (IEUAdrM[OFFSET_BIT_POS-1:2] == '1) & Funct3M[1:0] == 2'b11; assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM | DoubleSpillM); end else begin assign SpillM = (|MemRWM) & CacheableM & (HalfSpillM | WordSpillM); @@ -154,10 +166,18 @@ module align import cvw::*; #(parameter cvw_t P) ( // write path. Also has the 8:1 shifter muxing for the byteoffset // then it also has the mux to select when a spill occurs logic [P.LLEN*2-1:0] LSUWriteDataShiftedM; - assign LSUWriteDataShiftedM = {LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); - mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {{{P.LLEN}{1'b0}}, LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); + logic [P.LLEN*3-1:0] LSUWriteDataShiftedExtM; // *** RT: Find a better way. I've extending in both directions so we don't shift in zeros. The cache expects the writedata to not have any zero data, but instead replicated data. + + assign LSUWriteDataShiftedExtM = {LSUWriteDataM, LSUWriteDataM, LSUWriteDataM} << (MisalignedM ? 8 * ByteOffsetM : '0); + assign LSUWriteDataShiftedM = LSUWriteDataShiftedExtM[P.LLEN*3-1:P.LLEN]; + assign LSUWriteDataSpillM = LSUWriteDataShiftedM; + //mux2 #(2*P.LLEN) writedataspillmux(LSUWriteDataShiftedM, {LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN], LSUWriteDataShiftedM[P.LLEN*2-1:P.LLEN]}, SelSpillM, LSUWriteDataSpillM); + logic [P.LLEN*2/8-1:0] ByteMaskShiftedM; - assign ByteMaskShiftedM = {ByteMaskExtendedM, ByteMaskM}; - mux2 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskExtendedM}, SelSpillM, ByteMaskSpillM); - + assign ByteMaskShiftedM = ByteMaskMuxM; + mux3 #(2*P.LLEN/8) bytemaskspillmux(ByteMaskShiftedM, {{{P.LLEN/8}{1'b0}}, ByteMaskM}, + {{{P.LLEN/8}{1'b0}}, ByteMaskMuxM[P.LLEN*2/8-1:P.LLEN/8]}, {SelSpillM, SelSpillE}, ByteMaskSpillM); + + flopenr #(P.LLEN*2/8) bytemaskreg(clk, reset, SelSpillE, {ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM); + mux2 #(P.LLEN*2/8) bytemasksavemux({ByteMaskExtendedM, ByteMaskM}, ByteMaskSaveM, SelSpillM, ByteMaskMuxM); endmodule diff --git a/src/lsu/lsu.sv b/src/lsu/lsu.sv index ef9edb72b..44689a1d1 100644 --- a/src/lsu/lsu.sv +++ b/src/lsu/lsu.sv @@ -297,7 +297,7 @@ module lsu import cvw::*; #(parameter cvw_t P) ( cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN), .NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(CACHEWORDLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache( - .clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), + .clk, .reset, .Stall(GatedStallW & ~SelSpillE), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM), .FlushCache(FlushDCache), .NextSet(IEUAdrExtE[11:0]), .PAdr(PAdrM), .ByteMask(ByteMaskSpillM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .CacheWriteData(LSUWriteDataSpillM), .SelHPTW, diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 9c1539122..dd8a642fc 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -62,38 +62,39 @@ 77767574 7b7a7978 7f7e7d7c -04030201 # Half1DstData -08070605 -0c0b0a09 -100f0e0d -14130211 -18171615 -1c1b1a19 -201f1e1d -24232221 -28272625 -2c2b2a29 -302f2e2d -34330231 -38373635 -3c3b3a39 -403f3e3d -44434241 -48474645 -4c4b4a49 -504f4e4d -54530251 -58575655 -5c5b5a59 -605f5e5d -64636261 -68676665 -6c6b6a69 -706f6e6d -74730271 -78777675 -7c7b7a79 -de7f7e7d +020100ef # Half1DstData +06050403 +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +7fdeadbe 03020100 # Word0DstData 07060504 0b0a0908 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 9ceff3694..d6ae2603f 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -66,14 +66,14 @@ TEST_HALF0: jal ra, CheckAllWriteSignature TEST_HALF1: - la a0, SourceData+1 - la a1, Half1DstData + la a0, SourceData + la a1, Half1DstData+1 li a2, 16 jal ra, memcpy8_2 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+1 - la a1, Half1DstData + la a0, SourceData + la a1, Half1DstData+1 li a2, 16 jal ra, CheckAllWriteSignature @@ -90,38 +90,38 @@ TEST_WORD0: jal ra, CheckAllWriteSignature TEST_WORD1: - la a0, SourceData+1 - la a1, Word1DstData + la a0, SourceData + la a1, Word1DstData+1 li a2, 16 jal ra, memcpy8_4 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+1 - la a1, Word1DstData + la a0, SourceData + la a1, Word1DstData+1 li a2, 16 jal ra, CheckAllWriteSignature TEST_WORD2: - la a0, SourceData+2 - la a1, Word2DstData + la a0, SourceData + la a1, Word2DstData+2 li a2, 16 jal ra, memcpy8_4 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+2 - la a1, Word2DstData + la a0, SourceData + la a1, Word2DstData+2 li a2, 16 jal ra, CheckAllWriteSignature TEST_WORD3: - la a0, SourceData+3 - la a1, Word3DstData + la a0, SourceData + la a1, Word3DstData+3 li a2, 16 jal ra, memcpy8_4 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+3 - la a1, Word3DstData + la a0, SourceData + la a1, Word3DstData+3 li a2, 16 jal ra, CheckAllWriteSignature @@ -138,86 +138,86 @@ TEST_DOUBLE0: jal ra, CheckAllWriteSignature TEST_DOUBLE1: - la a0, SourceData+1 - la a1, Double1DstData + la a0, SourceData + la a1, Double1DstData+1 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+1 - la a1, Double1DstData + la a0, SourceData + la a1, Double1DstData+1 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE2: - la a0, SourceData+2 - la a1, Double2DstData + la a0, SourceData + la a1, Double2DstData+2 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+2 - la a1, Double2DstData + la a0, SourceData + la a1, Double2DstData+2 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE3: - la a0, SourceData+3 - la a1, Double3DstData + la a0, SourceData + la a1, Double3DstData+3 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+3 - la a1, Double3DstData + la a0, SourceData + la a1, Double3DstData+3 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE4: - la a0, SourceData+4 - la a1, Double4DstData + la a0, SourceData + la a1, Double4DstData+4 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+4 - la a1, Double4DstData + la a0, SourceData + la a1, Double4DstData+4 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE5: - la a0, SourceData+5 - la a1, Double5DstData + la a0, SourceData + la a1, Double5DstData+5 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+5 - la a1, Double5DstData + la a0, SourceData + la a1, Double5DstData+5 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE6: - la a0, SourceData+6 - la a1, Double6DstData + la a0, SourceData + la a1, Double6DstData+6 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+6 - la a1, Double6DstData + la a0, SourceData + la a1, Double6DstData+6 li a2, 16 jal ra, CheckAllWriteSignature TEST_DOUBLE7: - la a0, SourceData+7 - la a1, Double7DstData + la a0, SourceData + la a1, Double7DstData+7 li a2, 16 jal ra, memcpy8_8 # check if the values are write for all sizes and offsets of misaligned loads. - la a0, SourceData+7 - la a1, Double7DstData + la a0, SourceData + la a1, Double7DstData+7 li a2, 16 jal ra, CheckAllWriteSignature @@ -672,7 +672,7 @@ Half1DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Word0DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef @@ -684,19 +684,19 @@ Word1DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Word2DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Word3DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double0DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef @@ -708,43 +708,43 @@ Double1DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double2DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double3DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double4DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double5DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double6DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef Double7DstData: .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef .8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef - +.8byte 0xdeadbeefdeadbeef signature: .fill 225, 1, 0x00