diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output index 7e1ab4344..b0078f9ac 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-misaligned-access-01.reference_output @@ -1,24 +1,537 @@ -00000000 -00000000 -00000001 -00000000 -ffffffff -ffffffff -00000001 -00000000 -00000002 +03020100 # ByteDstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +03020100 # Half0DstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +04030201 # Half1DstData +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +03020100 # Word0DstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +04030201 # Word1DstData +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +05040302 # Word2DstData +09080706 +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +06050403 # Word3DstData +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +03020100 # Double0DstData +07060504 +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +04030201 # Double1DstData +08070605 +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +05040302 # Double2DstData +09080706 +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +06050403 # Double3DstData +0a090807 +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +07060504 # Double4DestData +0b0a0908 +0f0e0d0c +13021110 +17161514 +1b1a1918 +1f1e1d1c +23222120 +27262524 +2b2a2928 +2f2e2d2c +33023130 +37363534 +3b3a3938 +3f3e3d3c +43424140 +47464544 +4b4a4948 +4f4e4d4c +53025150 +57565554 +5b5a5958 +5f5e5d5c +63626160 +67666564 +6b6a6968 +6f6e6d6c +73027170 +77767574 +7b7a7978 +7f7e7d7c +deadbeef +08070605 # Double5DestData +0c0b0a09 +100f0e0d +14130211 +18171615 +1c1b1a19 +201f1e1d +24232221 +28272625 +2c2b2a29 +302f2e2d +34330231 +38373635 +3c3b3a39 +403f3e3d +44434241 +48474645 +4c4b4a49 +504f4e4d +54530251 +58575655 +5c5b5a59 +605f5e5d +64636261 +68676665 +6c6b6a69 +706f6e6d +74730271 +78777675 +7c7b7a79 +de7f7e7d +deadbeef +09080706 # Double6DstData +0d0c0b0a +11100f0e +15141302 +19181716 +1d1c1b1a +21201f1e +25242322 +29282726 +2d2c2b2a +31302f2e +35343302 +39383736 +3d3c3b3a +41403f3e +45444342 +49484746 +4d4c4b4a +51504f4e +55545302 +59585756 +5d5c5b5a +61605f5e +65646362 +69686766 +6d6c6b6a +71706f6e +75747302 +79787776 +7d7c7b7a +dead7f7e +deadbeef +0a090807 # Double7DstData +0e0d0c0b +0211100f +16151413 +1a191817 +1e1d1c1b +2221201f +26252423 +2a292827 +2e2d2c2b +0231302f +36353433 +3a393837 +3e3d3c3b +4241403f +46454443 +4a494847 +4e4d4c4b +0251504f +56555453 +5a595857 +5e5d5c5b +6261605f +66656463 +6a696867 +6e6d6c6b +0271706f +76757473 +7a797877 +7e7d7c7b +deadbe7f +deadbeef +00000000 #signature 00000000 00000000 00000000 -ffffffff -ffffffff +00000000 00000000 00000000 -fffffffe -ffffffff -393cb5d1 -72ca6f49 -7b12609b -245889d8 -7f42ac28 -af17a2d3 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S index 792acc715..76496ff47 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-misaligned-access-01.S @@ -27,113 +27,709 @@ rvtest_entry_point: RVMODEL_BOOT RVTEST_CODE_BEGIN -RVTEST_SIGBASE( x6, wally_signature) - RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",ld) -# Testcase 0: rs1:x18(0x0000000000000000), rs2:x9(0x0000000000000000), result rd:x5(0x0000000000000000) -li x18, MASK_XLEN(0x0000000000000000) -li x9, MASK_XLEN(0x0000000000000000) -SLT x5, x18, x9 -sd x5, 0(x6) + # This test checks the misaligned load and stores work correctly and across D$ line spills. + # The general approach is to + # 1. load a region of memory using load doubles equal to two cache lines. And copy to a new + # region but using stores of bytes, half, word, or doubles. Each are repeated for all possible + # misaligned access. Bytes are always aligned, halves are 0, and 1, words are 0, 1, 2, and 3, and + # doubles are 0 through 7. Then the new region is compared against the reference region. Because + # of the misalignment the last few bytes will not be written so they will be some portion of deadbeef. + # The comparison is done using using same abyte, half, word, and double misaligned approach. -# Testcase 1: rs1:x8(0x0000000000000000), rs2:x25(0x0000000000000001), result rd:x31(0x0000000000000001) -li x8, MASK_XLEN(0x0000000000000000) -li x25, MASK_XLEN(0x0000000000000001) -SLT x31, x8, x25 -sd x31, 8(x6) + la a3, signature # does not get overwritten by any functions -# Testcase 2: rs1:x16(0x0000000000000000), rs2:x12(0xffffffffffffffff), result rd:x20(0x0000000000000000) -li x16, MASK_XLEN(0x0000000000000000) -li x12, MASK_XLEN(0xffffffffffffffff) -SLT x20, x16, x12 -sd x20, 16(x6) + # byte copy region. always naturally aligned + la a0, SourceData + la a1, ByteDstData + li a2, 16 + jal ra, memcpy8_1 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, ByteDstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData + la a1, Half0DstData + li a2, 16 + jal ra, memcpy8_2 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, Half0DstData + li a2, 16 + jal ra, CheckAllWriteSignature -# Testcase 3: rs1:x10(0x0000000000000001), rs2:x22(0x0000000000000000), result rd:x12(0x0000000000000000) -li x10, MASK_XLEN(0x0000000000000001) -li x22, MASK_XLEN(0x0000000000000000) -SLT x12, x10, x22 -sd x12, 24(x6) + la a0, SourceData+1 + la a1, Half1DstData + li a2, 16 + jal ra, memcpy8_2 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+1 + la a1, Half1DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData + la a1, Word0DstData + li a2, 16 + jal ra, memcpy8_4 -# Testcase 4: rs1:x19(0x0000000000000001), rs2:x31(0x0000000000000001), result rd:x29(0x0000000000000000) -li x19, MASK_XLEN(0x0000000000000001) -li x31, MASK_XLEN(0x0000000000000001) -SLT x29, x19, x31 -sd x29, 32(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, Word0DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+1 + la a1, Word1DstData + li a2, 16 + jal ra, memcpy8_4 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+1 + la a1, Word1DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+2 + la a1, Word2DstData + li a2, 16 + jal ra, memcpy8_4 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+2 + la a1, Word2DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+3 + la a1, Word3DstData + li a2, 16 + jal ra, memcpy8_4 -# Testcase 5: rs1:x21(0x0000000000000001), rs2:x28(0xffffffffffffffff), result rd:x20(0x0000000000000000) -li x21, MASK_XLEN(0x0000000000000001) -li x28, MASK_XLEN(0xffffffffffffffff) -SLT x20, x21, x28 -sd x20, 40(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+3 + la a1, Word3DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData + la a1, Double0DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData + la a1, Double0DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+1 + la a1, Double1DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+1 + la a1, Double1DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+2 + la a1, Double2DstData + li a2, 16 + jal ra, memcpy8_8 -# Testcase 6: rs1:x5(0xffffffffffffffff), rs2:x23(0x0000000000000000), result rd:x10(0x0000000000000001) -li x5, MASK_XLEN(0xffffffffffffffff) -li x23, MASK_XLEN(0x0000000000000000) -SLT x10, x5, x23 -sd x10, 48(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+2 + la a1, Double2DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+3 + la a1, Double3DstData + li a2, 16 + jal ra, memcpy8_8 -# Testcase 7: rs1:x13(0xffffffffffffffff), rs2:x24(0x0000000000000001), result rd:x14(0x0000000000000001) -li x13, MASK_XLEN(0xffffffffffffffff) -li x24, MASK_XLEN(0x0000000000000001) -SLT x14, x13, x24 -sd x14, 56(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+3 + la a1, Double3DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+4 + la a1, Double4DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+4 + la a1, Double4DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+5 + la a1, Double5DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+5 + la a1, Double5DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+6 + la a1, Double6DstData + li a2, 16 + jal ra, memcpy8_8 + + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+6 + la a1, Double6DstData + li a2, 16 + jal ra, CheckAllWriteSignature + + la a0, SourceData+7 + la a1, Double7DstData + li a2, 16 + jal ra, memcpy8_8 -# Testcase 8: rs1:x27(0xffffffffffffffff), rs2:x21(0xffffffffffffffff), result rd:x3(0x0000000000000000) -li x27, MASK_XLEN(0xffffffffffffffff) -li x21, MASK_XLEN(0xffffffffffffffff) -SLT x3, x27, x21 -sd x3, 64(x6) + # check if the values are write for all sizes and offsets of misaligned loads. + la a0, SourceData+7 + la a1, Double7DstData + li a2, 16 + jal ra, CheckAllWriteSignature + +.type CheckAll, @function +# a0 is the SourceData, (golden), a1 is the data to be checked. +# a2 is the number of doubles +# a3 is the signature pointer +# returns a0 as 0 for no mismatch, 1 for mismatch, +# returns a3 as incremented signature pointer +CheckAllWriteSignature: + mv s0, a0 + mv s1, a1 + mv s2, a2 + mv s3, a3 + # there is no stack so I'm saving ra into s4 + mv s4, ra -# Testcase 9: rs1:x8(0x983631890063e42f), rs2:x21(0xb2d650af313b32b7), result rd:x15(0x0000000000000001) -li x8, MASK_XLEN(0x983631890063e42f) -li x21, MASK_XLEN(0xb2d650af313b32b7) -SLT x15, x8, x21 -sd x15, 72(x6) + # check values byte by byte + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 3 # * 8 + jal ra, memcmp1 + sb a0, 0(s3) + mv s4, a0 -# Testcase 10: rs1:x19(0xb5d97ef760ef1471), rs2:x28(0xac7c8803e01bbf50), result rd:x14(0x0000000000000000) -li x19, MASK_XLEN(0xb5d97ef760ef1471) -li x28, MASK_XLEN(0xac7c8803e01bbf50) -SLT x14, x19, x28 -sd x14, 80(x6) + # check values half by half + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 2 # * 4 + jal ra, memcmp2 + sb a0, 1(s3) + or s4, s4, a0 -# Testcase 11: rs1:x19(0x66faf98908135d58), rs2:x14(0xb3ab1b2cdf26f517), result rd:x25(0x0000000000000000) -li x19, MASK_XLEN(0x66faf98908135d58) -li x14, MASK_XLEN(0xb3ab1b2cdf26f517) -SLT x25, x19, x14 -sd x25, 88(x6) + # check values half by half + mv a0, s0 # SourceData + addi a1, s1, 1 # ie: ByteDstData+1 + srli a2, s2, 2 # * 4 -1 + subi a2, a2, 1 + jal ra, memcmp2 + sb a0, 2(s3) + or s4, s4, a0 + + # check values word by word + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 1 # * 2 + jal ra, memcmp4 + sb a0, 3(s3) + or s4, s4, a0 -.EQU NUMTESTS,12 + # check values word by word + mv a0, s0 # SourceData + addi a1, s1, 1 # ie: ByteDstData+1 + srli a2, s2, 1 # * 2 -1 + subi a2, a2, 1 + jal ra, memcmp4 + sb a0, 4(s3) + or s4, s4, a0 + # check values word by word + mv a0, s0 # SourceData + addi a1, s1, 2 # ie: ByteDstData+2 + srli a2, s2, 1 # * 2 -1 + subi a2, a2, 1 + jal ra, memcmp4 + sb a0, 5(s3) + or s4, s4, a0 + + # check values word by word + mv a0, s0 # SourceData + addi a1, s1, 3 # ie: ByteDstData+3 + srli a2, s2, 1 # * 2 -1 + subi a2, a2, 1 + jal ra, memcmp4 + sb a0, 6(s3) + or s4, s4, a0 + + # check values double by double + mv a0, s0 # SourceData + mv a1, s1 # ie: ByteDstData + srli a2, s2, 0 # * 1 + jal ra, memcmp8 + sb a0, 7(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 1 # ie: ByteDstData+1 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 8(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 2 # ie: ByteDstData+2 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 9(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 3 # ie: ByteDstData+3 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 10(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 4 # ie: ByteDstData+4 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 11(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 5 # ie: ByteDstData+5 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 12(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 6 # ie: ByteDstData+6 + srli a2, s2, 0 # * 1 -1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 13(s3) + + # check values double by double + mv a0, s0 # SourceData + addi a1, s1, 7 # ie: ByteDstData+7 + srli a2, s2, 0 # * 1 + subi a2, a2, 1 + jal ra, memcmp8 + sb a0, 14(s3) + + addi s3, s3, 15 + mv a3, s3 + or a0, s4, a0 + mv ra, s4 + ret ra + + +.type memcmp1, @function +# returns which index mismatch, -1 if none +memcmp1: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 1 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp1_loop: + lbu t3, 0(t0) + lbu t4, 0(t1) + bne t3, t4, memcmp1_ne + addi t0, t0, 1 + addi t1, t1, 1 + addi t2, t2, 1 + blt t2, a2, memcmp1_loop + li a0, -1 + ret +memcmp1_ne: + mv a0, t2 + ret + +.type memcmp2, @function +# returns which index mismatch, -1 if none +memcmp2: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 2 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp2_loop: + lhu t3, 0(t0) + lhu t4, 0(t1) + bne t3, t4, memcmp2_ne + addi t0, t0, 2 + addi t1, t1, 2 + addi t2, t2, 1 + blt t2, a2, memcmp2_loop + li a0, -1 + ret +memcmp2_ne: + mv a0, t2 + ret + +.type memcmp4, @function +# returns which index mismatch, -1 if none +memcmp4: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp4_loop: + lwu t3, 0(t0) + lwu t4, 0(t1) + bne t3, t4, memcmp4_ne + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcmp4_loop + li a0, -1 + ret +memcmp4_ne: + mv a0, t2 + ret + +.type memcmp8, @function +# returns which index mismatch, -1 if none +memcmp8: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp8_loop: + ld t3, 0(t0) + ld t4, 0(t1) + bne t3, t4, memcmp8_ne + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcmp8_loop + li a0, -1 + ret +memcmp8_ne: + mv a0, t2 + ret + + RVTEST_CODE_END RVMODEL_HALT +.type memcpy8_1, @function +# load 8 bytes using load double then store using 8 sb +memcpy8_1: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy8_1_loop: + ld t3, 0(t0) + andi t4, t3, 0xff + sb t4, 0(t1) + slli t4, t3, 8 + andi t4, t4, 0xff + sb t4, 1(t1) + + slli t4, t3, 16 + andi t4, t4, 0xff + sb t4, 2(t1) + + slli t4, t3, 24 + andi t4, t4, 0xff + sb t4, 3(t1) + + slli t4, t3, 32 + andi t4, t4, 0xff + sb t4, 4(t1) + + slli t4, t3, 40 + andi t4, t4, 0xff + sb t4, 5(t1) + + slli t4, t3, 48 + andi t4, t4, 0xff + sb t4, 6(t1) + + slli t4, t3, 56 + andi t4, t4, 0xff + sb t4, 7(t1) + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_1_loop + ret + +.type memcpy8_2, @function +# load 8 bytes using load double then store using 4 sh +memcpy8_2: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 + + # 16 bit mask + lui t4, 0xf + ori t4, t4, 0xfff + +memcpy8_2_loop: + ld t3, 0(t0) + and t4, t4, t3 + sh t4, 0(t1) + + slli t4, t3, 16 + and t4, t4, t3 + sh t4, 2(t1) + + slli t4, t3, 32 + and t4, t4, t3 + sh t4, 4(t1) + + slli t4, t3, 48 + and t4, t4, t3 + sh t4, 6(t1) + + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_2_loop + ret + +.type memcpy8_4, @function +# load 8 bytes using load double then store using 2 sw +memcpy8_4: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 + + # 32 bit mask + lui t4, 0xffff + ori t4, t4, 0xfff + +memcpy8_4_loop: + ld t3, 0(t0) + and t4, t4, t3 + sw t4, 0(t1) + + slli t4, t3, 32 + and t4, t4, t3 + sw t4, 4(t1) + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_4_loop + ret + +.type memcpy8_8, @function +# load 8 bytes using load double then store using 1 sd +memcpy8_8: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 + +memcpy8_8_loop: + ld t3, 0(t0) + sd t4, 0(t1) + + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_8_loop + ret + + RVTEST_DATA_BEGIN -.align 4 +.align 3 rvtest_data: -.word 0x98765432 +SourceData: +.8byte 0x0706050403020100, 0x0f0e0d0c0b0a0908, 0x1716151413021110, 0x1f1e1d1c1b1a1918 +.8byte 0x2726252423222120, 0x2f2e2d2c2b2a2928, 0x3736353433023130, 0x3f3e3d3c3b3a3938 +.8byte 0x4746454443424140, 0x4f4e4d4c4b4a4948, 0x5756555453025150, 0x5f5e5d5c5b5a5958 +.8byte 0x6766656463626160, 0x6f6e6d6c6b6a6968, 0x7776757473027170, 0x7f7e7d7c7b7a7978 +.8byte 0xdeadbeefdeadbeef + +Response1ByteOffsetData: +.8byte 0x0807060504030201, 0x100f0e0d0c0b0a09, 0x1817161514130211, 0x201f1e1d1c1b1a19 +.8byte 0x2827262524232221, 0x302f2e2d2c2b2a29, 0x3837363534330231, 0x403f3e3d3c3b3a39 +.8byte 0x4847464544434241, 0x504f4e4d4c4b4a49, 0x5857565554530251, 0x605f5e5d5c5b5a59 +.8byte 0x6867666564636261, 0x706f6e6d6c6b6a69, 0x7877767574730271, 0xde7f7e7d7c7b7a79 + +Response2ByteOffsetData: +.8byte 0x0908070605040302, 0x11100f0e0d0c0b0a, 0x1918171615141302, 0x21201f1e1d1c1b1a +.8byte 0x2928272625242322, 0x31302f2e2d2c2b2a, 0x3938373635343302, 0x41403f3e3d3c3b3a +.8byte 0x4948474645444342, 0x51504f4e4d4c4b4a, 0x5958575655545302, 0x61605f5e5d5c5b5a +.8byte 0x6968676665646362, 0x71706f6e6d6c6b6a, 0x7978777675747302, 0xdead7f7e7d7c7b7a + +Response3ByteOffsetData: +.8byte 0x0a09080706050403, 0x0211100f0e0d0c0b, 0x1a19181716151413, 0x2221201f1e1d1c1b +.8byte 0x2a29282726252423, 0x0231302f2e2d2c2b, 0x3a39383736353433, 0x4241403f3e3d3c3b +.8byte 0x4a49484746454443, 0x0251504f4e4d4c4b, 0x5a59585756555453, 0x6261605f5e5d5c5b +.8byte 0x6a69686766656463, 0x0271706f6e6d6c6b, 0x7a79787776757473, 0xdeadbe7f7e7d7c7b + +Response4ByteOffsetData: +.8byte 0x0b0a090807060504, 0x130211100f0e0d0c, 0x1b1a191817161514, 0x232221201f1e1d1c +.8byte 0x2b2a292827262524, 0x330231302f2e2d2c, 0x3b3a393837363534, 0x434241403f3e3d3c +.8byte 0x4b4a494847464544, 0x530251504f4e4d4c, 0x5b5a595857565554, 0x636261605f5e5d5c +.8byte 0x6b6a696867666564, 0x730271706f6e6d6c, 0x7b7a797877767574, 0xdeadbeef7f7e7d7c + +Response5ByteOffsetData: +.8byte 0x0c0b0a0908070605, 0x14130211100f0e0d, 0x1c1b1a1918171615, 0x24232221201f1e1d +.8byte 0x2c2b2a2928272625, 0x34330231302f2e2d, 0x3c3b3a3938373635, 0x44434241403f3e3d +.8byte 0x4c4b4a4948474645, 0x54530251504f4e4d, 0x5c5b5a5958575655, 0x64636261605f5e5d +.8byte 0x6c6b6a6968676665, 0x74730271706f6e6d, 0x7c7b7a7978777675, 0xdeadbeefde7f7e7d + +Response6ByteOffsetData: +.8byte 0x0d0c0b0a09080706, 0x1514130211100f0e, 0x1d1c1b1a19181716, 0x2524232221201f1e +.8byte 0x2d2c2b2a29282726, 0x3534330231302f2e, 0x3d3c3b3a39383736, 0x4544434241403f3e +.8byte 0x4d4c4b4a49484746, 0x5554530251504f4e, 0x5d5c5b5a59585756, 0x6564636261605f5e +.8byte 0x6d6c6b6a69686766, 0x7574730271706f6e, 0x7d7c7b7a79787776, 0xdeadbeefdead7f7e + +Response7ByteOffsetData: +.8byte 0x0e0d0c0b0a090807, 0x161514130211100f, 0x1e1d1c1b1a191817, 0x262524232221201f +.8byte 0x2e2d2c2b2a292827, 0x363534330231302f, 0x3e3d3c3b3a393837, 0x464544434241403f +.8byte 0x4e4d4c4b4a494847, 0x565554530251504f, 0x5e5d5c5b5a595857, 0x666564636261605f +.8byte 0x6e6d6c6b6a696867, 0x767574730271706f, 0x7e7d7c7b7a797877, 0xdeadbeefdeadbe7f + RVTEST_DATA_END RVMODEL_DATA_BEGIN +ByteDstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -wally_signature: - .fill NUMTESTS*(XLEN/32),4,0xdeadbeef +Half0DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Half1DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -#ifdef rvtest_mtrap_routine +Word0DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -mtrap_sigptr: - .fill 64*(XLEN/32),4,0xdeadbeef +Word1DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -#endif +Word2DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Word3DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double0DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef -#ifdef rvtest_gpr_save - -gpr_save: - .fill 32*(XLEN/32),4,0xdeadbeef - -#endif +Double1DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double2DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double3DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double4DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double5DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double6DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef + +Double7DstData: +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +.8byte 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef +signature: + .fill 225, 1, 0xff + RVMODEL_DATA_END // ../wally-riscv-arch-test/riscv-test-suite/rv64i_m/I/src/WALLY-SLT.S // David_Harris@hmc.edu & Katherine Parry