diff --git a/config/rv32gc/config.vh b/config/rv32gc/config.vh index b08bf06d8..39992605d 100644 --- a/config/rv32gc/config.vh +++ b/config/rv32gc/config.vh @@ -46,7 +46,7 @@ localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; -localparam ZICBOZ_SUPPORTED = 0; +localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index 939ce72c8..ddbca789a 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -48,7 +48,7 @@ localparam ZIHPM_SUPPORTED = 1; localparam ZFH_SUPPORTED = 0; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; -localparam ZICBOZ_SUPPORTED = 0; +localparam ZICBOZ_SUPPORTED = 1; localparam ZICBOP_SUPPORTED = 0; localparam SVPBMT_SUPPORTED = 0; localparam SVINVAL_SUPPORTED = 1; diff --git a/fpga/constraints/debug4.xdc b/fpga/constraints/debug4.xdc index f26ce6c91..e8a7e061f 100644 --- a/fpga/constraints/debug4.xdc +++ b/fpga/constraints/debug4.xdc @@ -107,9 +107,9 @@ connect_debug_port u_ila_0/probe19 [get_nets [list {wallypipelinedsocwrapper/wal create_debug_port u_ila_0 probe -set_property port_width 63 [get_debug_ports u_ila_0/probe20] +set_property port_width 64 [get_debug_ports u_ila_0/probe20] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe20] -connect_debug_port u_ila_0/probe20 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[63]} ]] +connect_debug_port u_ila_0/probe20 [get_nets [list {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[0]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[1]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[2]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[3]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[4]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[5]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[6]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[7]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[8]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[9]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[10]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[11]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[12]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[13]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[14]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[15]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[16]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[17]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[18]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[19]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[20]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[21]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[22]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[23]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[24]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[25]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[26]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[27]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[28]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[29]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[30]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[31]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[32]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[33]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[34]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[35]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[36]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[37]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[38]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[39]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[40]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[41]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[42]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[43]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[44]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[45]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[46]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[47]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[48]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[49]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[50]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[51]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[52]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[53]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[54]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[55]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[56]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[57]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[58]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[59]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[60]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[61]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[62]} {wallypipelinedsocwrapper/wallypipelinedsoc/core/priv.priv/csr/csrs.csrs/STVEC_REGW[63]} ]] create_debug_port u_ila_0 probe diff --git a/fpga/generator/Makefile b/fpga/generator/Makefile index f481e2c81..2a1d4a60e 100644 --- a/fpga/generator/Makefile +++ b/fpga/generator/Makefile @@ -6,20 +6,20 @@ dst := IP #export board := vcu118 # vcu108 -#export XILINX_PART := xcvu095-ffva2104-2-e -#export XILINX_BOARD := xilinx.com:vcu108:part0:1.2 -#export board := vcu108 +export XILINX_PART := xcvu095-ffva2104-2-e +export XILINX_BOARD := xilinx.com:vcu108:part0:1.2 +export board := vcu108 # Arty A7 -export XILINX_PART := xc7a100tcsg324-1 -export XILINX_BOARD := digilentinc.com:arty-a7-100:part0:1.1 -export board := ArtyA7 +# export XILINX_PART := xc7a100tcsg324-1 +# export XILINX_BOARD := digilentinc.com:arty-a7-100:part0:1.1 +# export board := ArtyA7 # for Arty A7 and S7 boards -all: FPGA_Arty +# all: FPGA_Arty # VCU 108 and VCU 118 boards -#all: FPGA_VCU +all: FPGA_VCU FPGA_Arty: PreProcessFiles IP_Arty vivado -mode tcl -source wally.tcl 2>&1 | tee wally.log diff --git a/fpga/generator/wally.tcl b/fpga/generator/wally.tcl index 32baade79..f28825fae 100644 --- a/fpga/generator/wally.tcl +++ b/fpga/generator/wally.tcl @@ -96,7 +96,7 @@ if {$board=="ArtyA7"} { source ../constraints/small-debug.xdc } else { - #source ../constraints/vcu-small-debug.xdc + # source ../constraints/vcu-small-debug.xdc source ../constraints/debug4.xdc } diff --git a/fpga/src/fpgaTop.v b/fpga/src/fpgaTop.v index 43cbb1a8b..6038bb677 100644 --- a/fpga/src/fpgaTop.v +++ b/fpga/src/fpgaTop.v @@ -95,41 +95,41 @@ module fpgaTop // wire SDCCmdOE; // wire SDCCmdOut; - wire [3:0] m_axi_awid; - wire [7:0] m_axi_awlen; - wire [2:0] m_axi_awsize; - wire [1:0] m_axi_awburst; - wire [3:0] m_axi_awcache; - wire [31:0] m_axi_awaddr; - wire [2:0] m_axi_awprot; - wire m_axi_awvalid; - wire m_axi_awready; - wire m_axi_awlock; - wire [63:0] m_axi_wdata; - wire [7:0] m_axi_wstrb; - wire m_axi_wlast; - wire m_axi_wvalid; - wire m_axi_wready; - wire [3:0] m_axi_bid; - wire [1:0] m_axi_bresp; - wire m_axi_bvalid; - wire m_axi_bready; - wire [3:0] m_axi_arid; - wire [7:0] m_axi_arlen; - wire [2:0] m_axi_arsize; - wire [1:0] m_axi_arburst; - wire [2:0] m_axi_arprot; - wire [3:0] m_axi_arcache; - wire m_axi_arvalid; - wire [31:0] m_axi_araddr; - wire m_axi_arlock; - wire m_axi_arready; - wire [3:0] m_axi_rid; - wire [63:0] m_axi_rdata; - wire [1:0] m_axi_rresp; - wire m_axi_rvalid; - wire m_axi_rlast; - wire m_axi_rready; + (* mark_debug = "true" *)wire [3:0] m_axi_awid; + (* mark_debug = "true" *)wire [7:0] m_axi_awlen; + (* mark_debug = "true" *)wire [2:0] m_axi_awsize; + (* mark_debug = "true" *)wire [1:0] m_axi_awburst; + (* mark_debug = "true" *)wire [3:0] m_axi_awcache; + (* mark_debug = "true" *)wire [31:0] m_axi_awaddr; + (* mark_debug = "true" *)wire [2:0] m_axi_awprot; + (* mark_debug = "true" *)wire m_axi_awvalid; + (* mark_debug = "true" *)wire m_axi_awready; + (* mark_debug = "true" *)wire m_axi_awlock; + (* mark_debug = "true" *)wire [63:0] m_axi_wdata; + (* mark_debug = "true" *)wire [7:0] m_axi_wstrb; + (* mark_debug = "true" *)wire m_axi_wlast; + (* mark_debug = "true" *)wire m_axi_wvalid; + (* mark_debug = "true" *)wire m_axi_wready; + (* mark_debug = "true" *)wire [3:0] m_axi_bid; + (* mark_debug = "true" *)wire [1:0] m_axi_bresp; + (* mark_debug = "true" *)wire m_axi_bvalid; + (* mark_debug = "true" *)wire m_axi_bready; + (* mark_debug = "true" *)wire [3:0] m_axi_arid; + (* mark_debug = "true" *)wire [7:0] m_axi_arlen; + (* mark_debug = "true" *)wire [2:0] m_axi_arsize; + (* mark_debug = "true" *)wire [1:0] m_axi_arburst; + (* mark_debug = "true" *)wire [2:0] m_axi_arprot; + (* mark_debug = "true" *)wire [3:0] m_axi_arcache; + (* mark_debug = "true" *)wire m_axi_arvalid; + (* mark_debug = "true" *)wire [31:0] m_axi_araddr; + (* mark_debug = "true" *)wire m_axi_arlock; + (* mark_debug = "true" *)wire m_axi_arready; + (* mark_debug = "true" *)wire [3:0] m_axi_rid; + (* mark_debug = "true" *)wire [63:0] m_axi_rdata; + (* mark_debug = "true" *)wire [1:0] m_axi_rresp; + (* mark_debug = "true" *)wire m_axi_rvalid; + (* mark_debug = "true" *)wire m_axi_rlast; + (* mark_debug = "true" *)wire m_axi_rready; // Extra Bus signals wire [3:0] BUS_axi_arregion; diff --git a/linux/buildroot-scripts/Makefile b/linux/buildroot-scripts/Makefile index d8da7092f..6cb7f8f37 100644 --- a/linux/buildroot-scripts/Makefile +++ b/linux/buildroot-scripts/Makefile @@ -1,4 +1,4 @@ -BUILDROOT := ${RISCV}/buildroot +BUILDROOT := ${RISCV}/buildroot2023 IMAGES := ${BUILDROOT}/output/images DIS := ${IMAGES}/disassembly diff --git a/linux/sdcard/flash-sd.sh b/linux/sdcard/flash-sd.sh index d57347698..8b1b6ebdc 100755 --- a/linux/sdcard/flash-sd.sh +++ b/linux/sdcard/flash-sd.sh @@ -3,42 +3,47 @@ # Exit on any error (return code != 0) # set -e +usage() { echo "Usage: $0 [-zh] [-b ] " 1>&2; exit 1; } + +help() { + echo "Usage: $0 [OPTIONS] " + echo " -z wipes card with zeros" + echo " -b get images from given buildroot" + echo " -d specify device tree to use" + exit 0; +} + # Output colors GREEN="\e[32m" RED="\e[31m" BOLDRED="\e[1;91m" BOLDGREEN="\e[1;32m" +BOLDYELLOW="\e[1;33m" NC="\e[0m" NAME="$BOLDGREEN"${0:2}:"$NC" ERRORTEXT="$BOLDRED"ERROR:"$NC" -# File location variables +# Default values for buildroot and device tree RISCV=/opt/riscv BUILDROOT=$RISCV/buildroot -IMAGES=$BUILDROOT/output/images -FW_JUMP=$IMAGES/fw_jump.bin -LINUX_KERNEL=$IMAGES/Image -DEVICE_TREE=$IMAGES/wally-vcu108.dtb - -# Mount Directory +DEVICE_TREE=wally-vcu108.dtb MNT_DIR=wallyimg -# Usage function -usage() { echo "Usage: $0 [-z] [-b ] " 1>&2; exit 1; } - # Process options and arguments. The following code grabs the single # sdcard device argument no matter where it is in the positional # parameters list. ARGS=() while [ $OPTIND -le "$#" ] ; do - if getopts "hzb:" arg ; then + if getopts "hzb:d:" arg ; then case "${arg}" in - h) usage + h) help ;; z) WIPECARD=y ;; b) BUILDROOT=${OPTARG} ;; + d) DEVICE_TREE=${OPTARG} + ;; esac else ARGS+=("${!OPTIND}") @@ -46,14 +51,21 @@ while [ $OPTIND -le "$#" ] ; do fi done +# File location variables +IMAGES=$BUILDROOT/output/images +FW_JUMP=$IMAGES/fw_jump.bin +LINUX_KERNEL=$IMAGES/Image +DEVICE_TREE=$IMAGES/$DEVICE_TREE + SDCARD=${ARGS[0]} +# User Error Checks =================================================== + if [ "$#" -eq "0" ] ; then usage fi # Check to make sure sd card device exists - if [ ! -e "$SDCARD" ] ; then echo -e "$NAME $ERRORTEXT SD card device does not exist." exit 1 @@ -76,10 +88,13 @@ fi # Ensure device tree binaries exist if [ ! -e $DEVICE_TREE ] ; then - echo -e '$ERRORTEXT Missing device tree files' + echo -e "$NAME $ERRORTEXT Missing device tree files" + echo -e "$NAME generating all device tree files into buildroot" make -C ../ generate BUILDROOT=$BUILDROOT fi +# Calculate partition information ===================================== + # Size of OpenSBI and the Kernel in 512B blocks DST_SIZE=$(ls -la --block-size=512 $DEVICE_TREE | cut -d' ' -f 5 ) FW_JUMP_SIZE=$(ls -la --block-size=512 $FW_JUMP | cut -d' ' -f 5 ) diff --git a/linux/sdcard/make-img.sh b/linux/sdcard/make-img.sh index a635d04db..12a9084d4 100755 --- a/linux/sdcard/make-img.sh +++ b/linux/sdcard/make-img.sh @@ -68,12 +68,19 @@ if [ ! -e $1 ] ; then # --new=3:$FS_START:-0 --change-name=3:'filesystem' \ # $1 + # echo -e "$NAME: Creating GUID Partition Table" + # sudo sgdisk -g --clear --set-alignment=1 \ + # --new=1:34:+$DST_SIZE: --change-name=1:'fdt' \ + # --new=2:$FW_JUMP_START:+$FW_JUMP_SIZE --change-name=2:'opensbi' --typecode=1:2E54B353-1271-4842-806F-E436D6AF6985 \ + # --new=3:$KERNEL_START:+$KERNEL_SIZE --change-name=3:'kernel' \ + # --new=4:$FS_START:-0 --change-name=4:'filesystem' \ + # $1 + echo -e "$NAME: Creating GUID Partition Table" sudo sgdisk -g --clear --set-alignment=1 \ --new=1:34:+$DST_SIZE: --change-name=1:'fdt' \ --new=2:$FW_JUMP_START:+$FW_JUMP_SIZE --change-name=2:'opensbi' --typecode=1:2E54B353-1271-4842-806F-E436D6AF6985 \ --new=3:$KERNEL_START:+$KERNEL_SIZE --change-name=3:'kernel' \ - --new=4:$FS_START:-0 --change-name=4:'filesystem' \ $1 LOOPDEVICE=$(sudo losetup -f) @@ -94,14 +101,14 @@ if [ ! -e $1 ] ; then echo -e "$NAME: Copying Kernel" sudo dd if=$LINUX_KERNEL of="$LOOPDEVICE"p3 $DD_FLAGS - sudo mkfs.ext4 "$LOOPDEVICE"p4 - sudo mkdir /mnt/$MNT_DIR + # sudo mkfs.ext4 "$LOOPDEVICE"p4 + # sudo mkdir /mnt/$MNT_DIR - sudo mount -v "$LOOPDEVICE"p4 /mnt/$MNT_DIR + # sudo mount -v "$LOOPDEVICE"p4 /mnt/$MNT_DIR - sudo umount -v /mnt/$MNT_DIR + # sudo umount -v /mnt/$MNT_DIR - sudo rmdir /mnt/$MNT_DIR + # sudo rmdir /mnt/$MNT_DIR sudo losetup -d $LOOPDEVICE fi diff --git a/linux/testvector-generation/genInitMem.sh b/linux/testvector-generation/genInitMem.sh index b48a64896..c78485958 100755 --- a/linux/testvector-generation/genInitMem.sh +++ b/linux/testvector-generation/genInitMem.sh @@ -1,6 +1,6 @@ #!/bin/bash tcpPort=1235 -imageDir=/home/ross/repos/buildroot/output/images +imageDir=/home/jpease/repos/buildroot3/output/images tvDir=linux-testvectors rawRamFile="$tvDir/ramGDB.bin" ramFile="$tvDir/ram.bin" @@ -36,7 +36,7 @@ then echo "Launching QEMU in replay mode!" (qemu-system-riscv64 \ - -M virt -dtb /home/ross/repos/buildroot/output/images/wally-artya7.dtb \ + -M virt -dtb /home/jpease/repos/buildroot3/output/images/wally-artya7.dtb \ -nographic \ -bios $imageDir/fw_jump.elf -kernel $imageDir/Image -append "root=/dev/vda ro" -initrd $imageDir/rootfs.cpio \ -gdb tcp::$tcpPort -S) \ diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 52b54029f..1714544ec 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -101,6 +101,7 @@ module cache import cvw::*; #(parameter cvw_t P, logic [LINELEN/8-1:0] LineByteMask; logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; logic ZeroCacheLine; + logic CMOZeroHit; logic [LINELEN-1:0] PreLineWriteData; genvar index; @@ -119,7 +120,7 @@ module cache import cvw::*; #(parameter cvw_t P, // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(P, PA_BITS, XLEN, NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN, READ_ONLY_CACHE) CacheWays[NUMWAYS-1:0]( .clk, .reset, .CacheEn, .CMOp, .CacheSet, .PAdr, .LineWriteData, .LineByteMask, - .SetValid, .ClearValid, .SetDirty, .ClearDirty, .ZeroCacheLine, .SelWriteback, .SelCMOWriteback, .VictimWay, + .SetValid, .ClearValid, .SetDirty, .ClearDirty, .CMOZeroHit, .SelWriteback, .SelCMOWriteback, .VictimWay, .FlushWay, .SelFlush, .ReadDataLineWay, .HitWay, .ValidWay, .DirtyWay, .TagWay, .FlushStage, .InvalidateCache); // Select victim way for associative caches @@ -225,7 +226,7 @@ module cache import cvw::*; #(parameter cvw_t P, .FlushStage, .CacheRW, .CacheAtomic, .Stall, .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, - .ClearDirty, .SetDirty, .SetValid, .ClearValid, .ZeroCacheLine, .SelWriteback, .SelCMOWriteback, .SelFlush, + .ClearDirty, .SetDirty, .SetValid, .ClearValid, .ZeroCacheLine, .CMOZeroHit, .SelWriteback, .SelCMOWriteback, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .InvalidateCache, .CMOp, .CacheEn, .LRUWriteEn); diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 5be35a1a8..124b92678 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -60,6 +60,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, output logic SetDirty, // Set the dirty bit in the selected way and set output logic ClearDirty, // Clear the dirty bit in the selected way and set output logic ZeroCacheLine, // Write zeros to all bytes of cacheline + output logic CMOZeroHit, // CMOZ hit output logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback output logic SelCMOWriteback, // Overrides cached tag check to select a specific way and set for writeback for both data and tag output logic LRUWriteEn, // Update the LRU state @@ -75,7 +76,10 @@ module cachefsm import cvw::*; #(parameter cvw_t P, logic AnyUpdateHit, AnyHit; logic AnyMiss; logic FlushFlag; - + logic CMOWritebackHit; + logic CMOZeroNoEviction; + logic CMOZeroEviction; + typedef enum logic [3:0]{STATE_READY, // hit states // miss states STATE_FETCH, @@ -93,8 +97,12 @@ module cachefsm import cvw::*; #(parameter cvw_t P, statetype CurrState, NextState; assign AnyMiss = (CacheRW[0] | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss - assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 + assign AnyUpdateHit = (CacheRW[0]) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit + assign CMOWritebackHit = (CMOp[1] | CMOp[2]) & CacheHit; + assign CMOZeroNoEviction = CMOp[3] & ~LineDirty; // (hit or miss) with no writeback store zeros now + assign CMOZeroEviction = CMOp[3] & LineDirty; // (hit or miss) with writeback dirty line + assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. @@ -117,8 +125,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_READY: if(InvalidateCache) NextState = STATE_READY; // exclusion-tag: dcache InvalidateCheck else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement - else if(AnyMiss) /* & LineDirty */ NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement - else if((CMOp[1] | CMOp[2]) & CacheHit) NextState = STATE_CMO_WRITEBACK; + else if(AnyMiss | CMOZeroEviction) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement + else if(CMOWritebackHit) NextState = STATE_CMO_WRITEBACK; else NextState = STATE_READY; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else if(CacheBusAck) NextState = STATE_READY; @@ -127,7 +135,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; else NextState = STATE_READY; // exclusion-tag-start: icache case - STATE_WRITEBACK: if(CacheBusAck) NextState = STATE_FETCH; + STATE_WRITEBACK: if(CacheBusAck & ~CMOp[3]) NextState = STATE_FETCH; + else if(CacheBusAck) NextState = STATE_CMO_DONE; else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; @@ -139,6 +148,8 @@ module cachefsm import cvw::*; #(parameter cvw_t P, STATE_CMO_WRITEBACK: if(CacheBusAck & (CMOp[1] | CMOp[2])) NextState = STATE_CMO_DONE; else NextState = STATE_CMO_WRITEBACK; + STATE_CMO_DONE: if(Stall) NextState = STATE_CMO_DONE; + else NextState = STATE_READY; // exclusion-tag-end: icache case default: NextState = STATE_READY; endcase @@ -146,7 +157,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // com back to CPU assign CacheCommitted = (CurrState != STATE_READY) & ~(READ_ONLY_CACHE & (CurrState == STATE_READ_HOLD | CurrState == STATE_CMO_DONE)); - assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss | ((CMOp[1] | CMOp[2]) & CacheHit))) | // exclusion-tag: icache StallStates + assign CacheStall = (CurrState == STATE_READY & (FlushCache | AnyMiss | CMOWritebackHit | CMOZeroEviction)) | // exclusion-tag: icache StallStates (CurrState == STATE_FETCH) | (CurrState == STATE_WRITEBACK) | (CurrState == STATE_WRITE_LINE) | // this cycle writes the sram, must keep stalling so the next cycle can read the next hit/miss unless its a write. @@ -154,21 +165,26 @@ module cachefsm import cvw::*; #(parameter cvw_t P, (CurrState == STATE_FLUSH_WRITEBACK) | (CurrState == STATE_CMO_WRITEBACK); // write enables internal to cache + assign CMOZeroHit = CurrState == STATE_READY & CMOp[3] & CacheHit ; assign SetValid = CurrState == STATE_WRITE_LINE | - (CurrState == STATE_READY & CMOp[3]); // *** RT: NOT completely right has to be a hit - assign ClearValid = P.ZICBOM_SUPPORTED & ((CurrState == STATE_READY & CMOp[0]) | + (CurrState == STATE_READY & CMOZeroNoEviction) | + (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CacheBusAck & CMOp[3]); + assign ClearValid = P.ZICBOM_SUPPORTED & ((CurrState == STATE_READY & CMOp[0] & CacheHit) | (CurrState == STATE_CMO_WRITEBACK & CMOp[2] & CacheBusAck)); // coverage off -item e 1 -fecexprrow 8 - assign LRUWriteEn = (CurrState == STATE_READY & AnyHit) | + assign LRUWriteEn = (CurrState == STATE_READY & (AnyHit | CMOZeroNoEviction)) | + (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & CMOp[3] & CacheBusAck) | (CurrState == STATE_WRITE_LINE) & ~FlushStage; // exclusion-tag-start: icache flushdirtycontrols - assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOp[3])) | // exclusion-tag: icache SetDirty *** NOT completely right has to be a hit for CMOp[3] - (CurrState == STATE_WRITE_LINE & (CacheRW[0])); + assign SetDirty = (CurrState == STATE_READY & (AnyUpdateHit | CMOZeroNoEviction)) | // exclusion-tag: icache SetDirty + (CurrState == STATE_WRITE_LINE & (CacheRW[0])) | + (P.ZICBOZ_SUPPORTED & CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck)); assign ClearDirty = (CurrState == STATE_WRITE_LINE & ~(CacheRW[0])) | // exclusion-tag: icache ClearDirty (CurrState == STATE_FLUSH & LineDirty) | // This is wrong in a multicore snoop cache protocal. Dirty must be cleared concurrently and atomically with writeback. For single core cannot clear after writeback on bus ack and change flushadr. Clears the wrong set. // Flush and eviction controls (P.ZICBOM_SUPPORTED & CurrState == STATE_CMO_WRITEBACK & (CMOp[1] | CMOp[2]) & CacheBusAck); - assign ZeroCacheLine = CurrState == STATE_READY & CMOp[3]; // *** RT: NOT completely right + assign ZeroCacheLine = P.ZICBOZ_SUPPORTED & ((CurrState == STATE_READY & CMOZeroNoEviction) | + (CurrState == STATE_WRITEBACK & (CMOp[3] & CacheBusAck))); assign SelWriteback = (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_READY & AnyMiss & LineDirty); assign SelCMOWriteback = CurrState == STATE_CMO_WRITEBACK; @@ -188,7 +204,7 @@ module cachefsm import cvw::*; #(parameter cvw_t P, // Bus interface controls assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~LineDirty) | // exclusion-tag: icache CacheBusRCauses (CurrState == STATE_FETCH & ~CacheBusAck) | - (CurrState == STATE_WRITEBACK & CacheBusAck); + (CurrState == STATE_WRITEBACK & CacheBusAck & ~CMOp[3]); assign CacheBusRW[0] = (CurrState == STATE_READY & AnyMiss & LineDirty) | // exclusion-tag: icache CacheBusW (CurrState == STATE_WRITEBACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITEBACK & ~CacheBusAck) | diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 85d2b36ab..216cd82d2 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -41,10 +41,10 @@ module cacheway import cvw::*; #(parameter cvw_t P, input logic SetValid, // Set the valid bit in the selected way and set input logic ClearValid, // Clear the valid bit in the selected way and set input logic SetDirty, // Set the dirty bit in the selected way and set - input logic ZeroCacheLine, // Write zeros to all bytes of a cache line + input logic CMOZeroHit, // Write zeros to all bytes of a cache line input logic ClearDirty, // Clear the dirty bit in the selected way and set input logic SelWriteback, // Overrides cached tag check to select a specific way and set for writeback - input logic SelCMOWriteback, // Overrides cached tag check to select a specific way and set for writeback for both data and tag + input logic SelCMOWriteback,// Overrides cached tag check to select a specific way and set for writeback for both data and tag input logic SelFlush, // [0] Use SelAdr, [1] SRAM reads/writes from FlushAdr input logic VictimWay, // LRU selected this way as victim to evict input logic FlushWay, // This way is selected for flush and possible writeback if dirty @@ -81,7 +81,9 @@ module cacheway import cvw::*; #(parameter cvw_t P, logic SelNotHit2; if (P.ZICBOZ_SUPPORTED) begin : cbologic - assign SelNotHit2 = SetValid & ~(ZeroCacheLine & HitWay); + assign SelNotHit2 = SetValid & ~CMOZeroHit; + //assign SelNotHit2 = SetValid; + end else begin : cbologic assign SelNotHit2 = SetValid; end @@ -96,7 +98,8 @@ module cacheway import cvw::*; #(parameter cvw_t P, // nonzero ways will never see SelFlush=0 while FlushWay=1 since FlushWay only advances on a subset of SelFlush assertion cases. assign FlushWayEn = FlushWay & SelFlush; // *** RT: This is slopy. I should refactor to have the fsm issue two types of writeback commands - assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; + assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; // *** this is not correct + //assign SelNonHit = FlushWayEn | SelNotHit2 | SelWriteback; end else begin:flushlogic // no flush operation for read-only caches. assign SelTag = VictimWay; diff --git a/src/privileged/csrs.sv b/src/privileged/csrs.sv index a15be83e6..fbe40c13c 100644 --- a/src/privileged/csrs.sv +++ b/src/privileged/csrs.sv @@ -88,7 +88,8 @@ module csrs import cvw::*; #(parameter cvw_t P) ( assign WriteSEPCM = STrapM | (CSRSWriteM & (CSRAdrM == SEPC)); assign WriteSCAUSEM = STrapM | (CSRSWriteM & (CSRAdrM == SCAUSE)); assign WriteSTVALM = STrapM | (CSRSWriteM & (CSRAdrM == STVAL)); - assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == P.M_MODE | ~STATUS_TVM); + if(P.XLEN == 64) assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == P.M_MODE | ~STATUS_TVM) & (CSRWriteValM[63:60] != 4'hA); + else assign WriteSATPM = CSRSWriteM & (CSRAdrM == SATP) & (PrivilegeModeW == P.M_MODE | ~STATUS_TVM); assign WriteSCOUNTERENM = CSRSWriteM & (CSRAdrM == SCOUNTEREN); assign WriteSENVCFGM = CSRSWriteM & (CSRAdrM == SENVCFG); assign WriteSTIMECMPM = CSRSWriteM & (CSRAdrM == STIMECMP) & STCE; diff --git a/testbench/testbench-linux.sv b/testbench/testbench-linux.sv index af6a123db..683f55952 100644 --- a/testbench/testbench-linux.sv +++ b/testbench/testbench-linux.sv @@ -153,7 +153,7 @@ module testbench; `define SSCRATCH `CSR_BASE.csrs.csrs.SSCRATCHreg.q `define MTVEC `CSR_BASE.csrm.MTVECreg.q `define STVEC `CSR_BASE.csrs.csrs.STVECreg.q - `define SATP `CSR_BASE.csrs.csrs.genblk1.SATPreg.q + `define SATP `CSR_BASE.csrs.csrs.genblk2.SATPreg.q `define INSTRET `CSR_BASE.counters.counters.HPMCOUNTER_REGW[2] `define MSTATUS `CSR_BASE.csrsr.MSTATUS_REGW `define SSTATUS `CSR_BASE.csrsr.SSTATUS_REGW diff --git a/testbench/tests.vh b/testbench/tests.vh index 51d5c00b4..88a862d0e 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1936,6 +1936,7 @@ string arch64zbs[] = '{ string wally64priv[] = '{ `WALLYTEST, "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S", + "rv64i_m/privilege/src/WALLY-cboz-01.S", "rv64i_m/privilege/src/WALLY-cbom-01.S", "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S", "rv64i_m/privilege/src/WALLY-mie-01.S", @@ -2030,6 +2031,7 @@ string arch64zbs[] = '{ "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S", "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S", "rv32i_m/privilege/src/WALLY-cbom-01.S", + "rv32i_m/privilege/src/WALLY-cboz-01.S", "rv32i_m/privilege/src/WALLY-mie-01.S", "rv32i_m/privilege/src/WALLY-minfo-01.S", "rv32i_m/privilege/src/WALLY-misa-01.S", diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag index 472157f0d..837668c3c 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag @@ -58,6 +58,7 @@ target_tests_nosim = \ WALLY-plic-01 \ WALLY-uart-01 \ WALLY-cbom-01 \ + WALLY-cboz-01 \ rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests)) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cbom-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cbom-01.reference_output new file mode 100644 index 000000000..faf3bf658 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cbom-01.reference_output @@ -0,0 +1,428 @@ +deadbeef # begin_signature +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef # destination 1 +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +00000000 # destination 2 +00000001 +00000002 +00000003 +00000004 +00000005 +00000006 +00000007 +00000008 +00000009 +0000000a +0000000b +0000000c +0000000d +0000000e +0000000f +00000010 +00000011 +00000012 +00000013 +00000014 +00000015 +00000016 +00000017 +00000018 +00000019 +0000001a +0000001b +0000001c +0000001d +0000001e +0000001f +00000020 +00000021 +00000022 +00000023 +00000024 +00000025 +00000026 +00000027 +00000028 +00000029 +0000002a +0000002b +0000002c +0000002d +0000002e +0000002f +00000030 +00000031 +00000032 +00000033 +00000034 +00000035 +00000036 +00000037 +00000038 +00000039 +0000003a +0000003b +0000003c +0000003d +0000003e +0000003f +00000040 +00000041 +00000042 +00000043 +00000044 +00000045 +00000046 +00000047 +00000048 +00000049 +0000004a +0000004b +0000004c +0000004d +0000004e +0000004f +00000050 +00000051 +00000052 +00000053 +00000054 +00000055 +00000056 +00000057 +00000058 +00000059 +0000005a +0000005b +0000005c +0000005d +0000005e +0000005f +00000060 +00000061 +00000062 +00000063 +00000064 +00000065 +00000066 +00000067 +00000068 +00000069 +0000006a +0000006b +0000006c +0000006d +0000006e +0000006f +00000070 +00000071 +00000072 +00000073 +00000074 +00000075 +00000076 +00000077 +00000078 +00000079 +0000007a +0000007b +0000007c +0000007d +0000007e +0000007f +00000000 # destination 3 +00000001 +00000002 +00000003 +00000004 +00000005 +00000006 +00000007 +00000008 +00000009 +0000000a +0000000b +0000000c +0000000d +0000000e +0000000f +00000010 +00000011 +00000012 +00000013 +00000014 +00000015 +00000016 +00000017 +00000018 +00000019 +0000001a +0000001b +0000001c +0000001d +0000001e +0000001f +00000020 +00000021 +00000022 +00000023 +00000024 +00000025 +00000026 +00000027 +00000028 +00000029 +0000002a +0000002b +0000002c +0000002d +0000002e +0000002f +00000030 +00000031 +00000032 +00000033 +00000034 +00000035 +00000036 +00000037 +00000038 +00000039 +0000003a +0000003b +0000003c +0000003d +0000003e +0000003f +00000040 +00000041 +00000042 +00000043 +00000044 +00000045 +00000046 +00000047 +00000048 +00000049 +0000004a +0000004b +0000004c +0000004d +0000004e +0000004f +00000050 +00000051 +00000052 +00000053 +00000054 +00000055 +00000056 +00000057 +00000058 +00000059 +0000005a +0000005b +0000005c +0000005d +0000005e +0000005f +00000060 +00000061 +00000062 +00000063 +00000064 +00000065 +00000066 +00000067 +00000068 +00000069 +0000006a +0000006b +0000006c +0000006d +0000006e +0000006f +00000070 +00000071 +00000072 +00000073 +00000074 +00000075 +00000076 +00000077 +00000078 +00000079 +0000007a +0000007b +0000007c +0000007d +0000007e +0000007f +ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies. +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +0bad0bad # controls +0bad0bad +0bad0bad diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cboz-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cboz-01.reference_output new file mode 100644 index 000000000..ef91aa32f --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-cboz-01.reference_output @@ -0,0 +1,188 @@ +deadbeef # begin_signature +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +00000000 # destination 1 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 # destination 2 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies. +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +0bad0bad +0bad0bad diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cbom-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cbom-01.S new file mode 100644 index 000000000..3c129b998 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cbom-01.S @@ -0,0 +1,472 @@ +/////////////////////////////////////////// +// +// WALLY-cache-management-tests +// invalidate, clean, and flush +// +// Author: Rose Thompson +// +// Created 18 August 2023 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +# Purpose: Tests the 3 Zicbom cache instructions which all operate on cacheline +# granularity blocks of memory. Invalidate: Clears valid and dirty bits +# and does not write back. Clean: Writes back dirty cacheline if needed +# and clears dirty bit. Does NOT clear valid bit. Flush: Cleans and then +# Invalidates. These operations apply to all caches in the memory system. +# The tests are divided into three parts one for the data cache, instruction cache +# and checks to verify the uncached regions of memory cause exceptions. +# ----------- +# Copyright (c) 2020. RISC-V International. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# ----------- +# +# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension. +# + +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV32I_Zicbom") +# Test code region +.section .text.init +.globl rvtest_entry_point + +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.inval) + +RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n") + +CBOMTest: + # *** TODO + # first need to discover the length of the cacheline. + # for now assume it is 64 bytes + + #addi sp, sp, -16 + #sd s0, 0(sp) + #sd ra, 8(sp) + + la s0, signature + + ################################################################################ + # INVALIDATE D$ + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Invalidate the second region + # 4. Verify the second region has the original invalid data + # DON'T batch each step. We want to see the transition between cachelines. The current should be invalidated + # but the next should have the copied data. + + # step 1 +CBOMTest_inval_step1: + la a0, SourceData + la a1, Destination1 + li a2, 128 + jal ra, memcpy4 + + # step 2 +CBOMTest_inval_step2: + la a0, SourceData + la a1, Destination1 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 +CBOMTest_inval_step3: + la a1, Destination1 + cbo.inval (a1) + # step 4 (should be Invalid) + la a0, DeadBeafData1 + la a1, Destination1 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 4 next line (should still be valid) +CBOMTest_inval_step4: + la a0, SourceData+64 + la a1, Destination1+64 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 (Invalidate all remaining lines) +CBOMTest_inval_step3_all: + la a1, Destination1+64 + cbo.inval (a1) + cbo.inval (a1) # verify invalidating an already non present line does not cause an issue. + la a1, Destination1+128 + cbo.inval (a1) + la a1, Destination1+192 + cbo.inval (a1) + la a1, Destination1+256 + cbo.inval (a1) + la a1, Destination1+320 + cbo.inval (a1) + la a1, Destination1+384 + cbo.inval (a1) + la a1, Destination1+448 + cbo.inval (a1) + + # step 4 All should be invalid +CBOMTest_inval_step4_all: + la a0, DeadBeafData1 + la a1, Destination1 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + ################################################################################ + # Clean D$ + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Invalidate the second region + # 4. Verify the second region has the original invalid data + # 5. Repeat step 1 + # 6. Clean cachelines + # 7. Verify the second region has the same data + # 8. Invalidate the second region + # 9. Verify again but this time it should contain the same data + # DON'T batch each step. We want to see the transition between cachelines. The current should be invalidated + # but the next should have the copied data. + + # step 1 +CBOMTest_clean_step1: + la a0, SourceData + la a1, Destination2 + li a2, 128 + jal ra, memcpy4 + + # step 2 +CBOMTest_clean_step2: + la a0, SourceData + la a1, Destination2 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 +CBOMTest_clean_step3: + la a1, Destination2 + cbo.inval (a1) + la a1, Destination2+64 + cbo.inval (a1) + la a1, Destination2+128 + cbo.inval (a1) + la a1, Destination2+192 + cbo.inval (a1) + la a1, Destination2+256 + cbo.inval (a1) + la a1, Destination2+320 + cbo.inval (a1) + la a1, Destination2+384 + cbo.inval (a1) + la a1, Destination2+448 + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + cbo.inval (a1) + + # step 4 All should be invalid +CBOMTest_clean_step4: + la a0, DeadBeafData1 + la a1, Destination2 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 5 +CBOMTest_clean_step5: + la a0, SourceData + la a1, Destination2 + li a2, 128 + jal ra, memcpy4 + + # step 6 only clean 1 line +CBOMTest_clean_step6: + la a1, Destination2 + cbo.clean (a1) + + # step 7 only check that 1 line +CBOMTest_clean_step7: + la a0, SourceData + la a1, Destination2 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 8 invalidate that 1 line and the next +CBOMTest_clean_step8: + la a1, Destination2 + cbo.inval (a1) + la a1, Destination2+64 + cbo.inval (a1) + + # step 9 that 1 line should contain the valid data +CBOMTest_clean_step9_line1: + la a0, SourceData + la a1, Destination2 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 9 the next should contain the invalid data +CBOMTest_clean_step9_line2: + la a0, DeadBeafData1 + la a1, Destination2+64 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 5 # now recopy the one we just corrupted +CBOMTest_clean_step5_recopy_line2: + la a0, SourceData+64 + la a1, Destination2+64 + li a2, 16 + jal ra, memcpy4 + + # step 6 # clean the remaining +CBOMTest_clean_step6_clean_all: + la a1, Destination2+64 + cbo.clean (a1) + la a1, Destination2+128 + cbo.clean (a1) + la a1, Destination2+192 + cbo.clean (a1) + la a1, Destination2+256 + cbo.clean (a1) + la a1, Destination2+320 + cbo.clean (a1) + la a1, Destination2+384 + cbo.clean (a1) + la a1, Destination2+448 + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + cbo.clean (a1) + + # step 8 # invalidate all remaining +CBOMTest_clean_step7_invalidate_all: + la a1, Destination2 + cbo.inval (a1) + la a1, Destination2+64 + cbo.inval (a1) + la a1, Destination2+128 + cbo.inval (a1) + la a1, Destination2+192 + cbo.inval (a1) + la a1, Destination2+256 + cbo.inval (a1) + la a1, Destination2+320 + cbo.inval (a1) + la a1, Destination2+384 + cbo.inval (a1) + la a1, Destination2+448 + cbo.inval (a1) + + # step 9 # check all +CBOMTest_clean_step9_check_all: + la a0, SourceData + la a1, Destination2 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + ################################################################################ + # Flush D$ line + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. For flush there is no way to create a negative control. We will flush 1 cache line + # 4. Verify whole region + # 5. Flush the remaining lines + # 6. Verify whole region + + # step 1 +CBOMTest_flush_step1: + la a0, SourceData + la a1, Destination3 + li a2, 128 + jal ra, memcpy4 + + # step 2 All should be valid +CBOMTest_flush_step2_verify: + la a0, SourceData + la a1, Destination3 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 # flush 1 line +CBOMTest_flush_step3: + la a1, Destination3 + cbo.flush (a1) + + # step 4 +CBOMTest_flush_step4_verify: + la a0, SourceData + la a1, Destination3 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 5 +CBOMTest_flush_step5_flush_all: + la a1, Destination3 + cbo.flush (a1) + la a1, Destination3+64 + cbo.flush (a1) + la a1, Destination3+128 + cbo.flush (a1) + la a1, Destination3+192 + cbo.flush (a1) + la a1, Destination3+256 + cbo.flush (a1) + la a1, Destination3+320 + cbo.flush (a1) + la a1, Destination3+384 + cbo.flush (a1) + la a1, Destination3+448 + cbo.flush (a1) + cbo.flush (a1) + cbo.flush (a1) + cbo.flush (a1) + cbo.flush (a1) + cbo.flush (a1) + + # step 6 +CBOMTest_flush_step6_verify: + la a0, SourceData + la a1, Destination3 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + + #lw s0, 0(sp) + #lw ra, 8(sp) + #addi sp, sp, 16 + #ret +RVMODEL_HALT + + +.type memcpy4, @function +memcpy4: + # a0 is the source + # a1 is the dst + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy4_loop: + lw t3, 0(t0) + sw t3, 0(t1) + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcpy4_loop + ret + +.type memcmp4, @function +# returns which index mismatch, -1 if none +memcmp4: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp4_loop: + lw t3, 0(t0) + lw t4, 0(t1) + bne t3, t4, memcmp4_ne + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcmp4_loop + li a0, -1 + ret +memcmp4_ne: + mv a0, t2 + ret + +RVTEST_CODE_END + + +RVTEST_DATA_BEGIN +# Input data section. +#.data +.align 7 + +DeadBeafData1: + .fill 128, 4, 0xdeadbeef +SourceData: + .int 0, 1, 2, 3, 4, 5, 6, 7 + .int 8, 9, 10, 11, 12, 13, 14, 15 + .int 16, 17, 18, 19, 20, 21, 22, 23 + .int 24, 25, 26, 27, 28, 29, 30, 31 + .int 32, 33, 34, 35, 36, 37, 38, 39 + .int 40, 41, 42, 43, 44, 45, 46, 47 + .int 48, 49, 50, 51, 52, 53, 54, 55 + .int 56, 57, 58, 59, 60, 61, 62, 63 + .int 64, 65, 66, 67, 68, 69, 70, 71 + .int 72, 73, 74, 75, 76, 77, 78, 79 + .int 80, 81, 82, 83, 84, 85, 86, 87 + .int 88, 89, 90, 91, 92, 93, 94, 95 + .int 96, 97, 98, 99, 100, 101, 102, 103 + .int 104, 105, 106, 107, 108, 109, 110, 111 + .int 112, 113, 114, 115, 116, 117, 118, 119 + .int 120, 121, 122, 123, 124, 125, 126, 127 + +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + .fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert + # 4 bytes. This needs to be aligned to a cacheline + + .align 6 +Destination1: + .fill 128, 4, 0xdeadbeef +Destination2: + .fill 128, 4, 0xdeadbeef +Destination3: + .fill 128, 4, 0xdeadbeef +signature: + .fill 16, 4, 0x0bad0bad + +RVMODEL_DATA_END + diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cboz-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cboz-01.S new file mode 100644 index 000000000..207c727ec --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-cboz-01.S @@ -0,0 +1,377 @@ +/////////////////////////////////////////// +// +// WALLY-cache-management-tests +// invalidate, clean, and flush +// +// Author: Rose Thompson +// +// Created 22 August 2023 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +# Purpose: Tests the Zicboz cache instruction which all operate on cacheline +# granularity blocks of memory. The instruction cbo.zero allocates a cacheline +# and writes 0 to each byte. A dirty cacheline is overwritten, any data in main +# memory is over written. +# ----------- +# Copyright (c) 2020. RISC-V International. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# ----------- +# +# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension. +# + +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV32I_Zicboz_Zicbom") +# Test code region +.section .text.init +.globl rvtest_entry_point + +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.zero) + +RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n") + +CBOZTest: + # *** TODO + # first need to discover the length of the cacheline. + # for now assume it is 64 bytes + + #addi sp, sp, -16 + #sd s0, 0(sp) + #sd ra, 8(sp) + + la s0, signature + + ################################################################################ + # Zero cache line hit overwrites + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Zero that region of memory + # 4. Verify the second region is all zero. + + # step 1 +CBOZTest_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 128 + jal ra, memcpy4 + + # step 2 +CBOZTest_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 +CBOZTest_zero_step3: + la a1, Destination1 + cbo.zero (a1) + la a1, Destination1+64 + cbo.zero (a1) + la a1, Destination1+128 + cbo.zero (a1) + la a1, Destination1+192 + cbo.zero (a1) + la a1, Destination1+256 + cbo.zero (a1) + la a1, Destination1+320 + cbo.zero (a1) + la a1, Destination1+384 + cbo.zero (a1) + la a1, Destination1+448 + cbo.zero (a1) + +CBOZTest_zero_step4: + # step 4 (should be zero) + la a0, ZeroData + la a1, Destination1 + li a2, 128 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + ################################################################################ + # Verify cbo.zero miss overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Flush that one line + # 4. Zero that one line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_miss_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 16 + jal ra, memcpy4 + + # step 2 +CBOZTest_miss_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 3 +CBOZTest_miss_zero_step3: + la a1, Destination1 + cbo.flush (a1) + cbo.zero (a1) + +CBOZTest_miss_zero_step4: + # step 4 (should be Invalid) + la a0, ZeroData + la a1, Destination1 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + ################################################################################ + # Verify cbo.zero miss with eviction overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Repeate 1 four times at 4KiB intervals + # 2. Then verify the second region has the same data + # 4. Zero each line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_eviction_zero_step1_0: + la a0, SourceData + la a1, Destination2 + li a2, 16 + jal ra, memcpy4 + +CBOZTest_eviction_zero_step2_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 16 + jal ra, memcpy4 + +CBOZTest_eviction_zero_step2_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 16 + jal ra, memcpy4 + +CBOZTest_eviction_zero_step2_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 16 + jal ra, memcpy4 + +CBOZTest_eviction_zero_step2_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 16 + jal ra, memcpy4 + + # step 3 +CBOZTest_eviction_zero_step3_0: + la a0, SourceData + la a1, Destination2 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step3_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step3_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step3_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step3_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + # step 4 +CBOZTest_eviction_zero_step4: + la a1, Destination2 + cbo.zero (a1) + la a1, Destination2+4096 + cbo.zero (a1) + la a1, Destination2+8192 + cbo.zero (a1) + la a1, Destination2+12288 + cbo.zero (a1) + la a1, Destination2+16384 + cbo.zero (a1) + +CBOZTest_eviction_zero_step5_0: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step5_4096: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+4096 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step5_8192: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+8192 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step5_12288: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+12288 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + +CBOZTest_eviction_zero_step5_16384: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+16384 + li a2, 16 + jal ra, memcmp4 + sw a0, 0(s0) # should be -1 + addi s0, s0, 4 + + + #ld s0, 0(sp) + #ld ra, 8(sp) + #addi sp, sp, 16 + #ret +RVMODEL_HALT + + +.type memcpy4, @function +memcpy4: + # a0 is the source + # a1 is the dst + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy4_loop: + lw t3, 0(t0) + sw t3, 0(t1) + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcpy4_loop + ret + +.type memcmp4, @function +# returns which index mismatch, -1 if none +memcmp4: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 4 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp4_loop: + lw t3, 0(t0) + lw t4, 0(t1) + bne t3, t4, memcmp4_ne + addi t0, t0, 4 + addi t1, t1, 4 + addi t2, t2, 1 + blt t2, a2, memcmp4_loop + li a0, -1 + ret +memcmp4_ne: + mv a0, t2 + ret + +RVTEST_CODE_END + + +RVTEST_DATA_BEGIN +# Input data section. +#.data +.align 7 + +ZeroData: + .fill 128, 4, 0x0 +SourceData: + .int 0, 1, 2, 3, 4, 5, 6, 7 + .int 8, 9, 10, 11, 12, 13, 14, 15 + .int 16, 17, 18, 19, 20, 21, 22, 23 + .int 24, 25, 26, 27, 28, 29, 30, 31 + .int 32, 33, 34, 35, 36, 37, 38, 39 + .int 40, 41, 42, 43, 44, 45, 46, 47 + .int 48, 49, 50, 51, 52, 53, 54, 55 + .int 56, 57, 58, 59, 60, 61, 62, 63 + .int 64, 65, 66, 67, 68, 69, 70, 71 + .int 72, 73, 74, 75, 76, 77, 78, 79 + .int 80, 81, 82, 83, 84, 85, 86, 87 + .int 88, 89, 90, 91, 92, 93, 94, 95 + .int 96, 97, 98, 99, 100, 101, 102, 103 + .int 104, 105, 106, 107, 108, 109, 110, 111 + .int 112, 113, 114, 115, 116, 117, 118, 119 + .int 120, 121, 122, 123, 124, 125, 126, 127 + +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + .fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert + # 4 bytes. This needs to be aligned to a cacheline + + .align 6 +Destination1: + .fill 128, 4, 0xdeadbeef +Destination2: + .fill 16, 4, 0xdeadbeef +signature: + .fill 16, 4, 0x0bad0bad + +RVMODEL_DATA_END + diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag index 6b13612ce..bc5f454bb 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/Makefrag @@ -57,6 +57,7 @@ target_tests_nosim = \ WALLY-status-fp-enabled-01 \ WALLY-wfi-01 \ WALLY-cbom-01 \ + WALLY-cboz-01 \ # unclear why status-fp-enabled and wfi aren't simulating ok diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-cboz-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-cboz-01.reference_output new file mode 100644 index 000000000..644fa6f0b --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-cboz-01.reference_output @@ -0,0 +1,204 @@ +deadbeef # begin_signature +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +deadbeef +00000000 # destination 1 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 # destination 2 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +00000000 +ffffffff # signature The test writes -1 for correct answers and the a positive integer for incorrect copies. +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +ffffffff +0bad0bad +0bad0bad +0bad0bad +0bad0bad diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-cboz-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-cboz-01.S new file mode 100644 index 000000000..22b076261 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-cboz-01.S @@ -0,0 +1,377 @@ +/////////////////////////////////////////// +// +// WALLY-cache-management-tests +// invalidate, clean, and flush +// +// Author: Rose Thompson +// +// Created 22 August 2023 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +# Purpose: Tests the Zicboz cache instruction which all operate on cacheline +# granularity blocks of memory. The instruction cbo.zero allocates a cacheline +# and writes 0 to each byte. A dirty cacheline is overwritten, any data in main +# memory is over written. +# ----------- +# Copyright (c) 2020. RISC-V International. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# ----------- +# +# This assembly file tests the cbo.inval, cbo.clean, and cbo.flush instructions of the RISC-V Zicbom extension. +# + +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV64I_Zicboz_Zicbom") +# Test code region +.section .text.init +.globl rvtest_entry_point + +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +RVTEST_CASE(0,"//check ISA:=regex(.*64.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;def NO_SAIL=True;",cbo.zero) + +RVMODEL_IO_WRITE_STR(x31, "# Test Begin\n") + +CBOZTest: + # *** TODO + # first need to discover the length of the cacheline. + # for now assume it is 64 bytes + + #addi sp, sp, -16 + #sd s0, 0(sp) + #sd ra, 8(sp) + + la s0, signature + + ################################################################################ + # Zero cache line hit overwrites + ################################################################################ + + # theory of operation + # 1. Read several cachelines of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Zero that region of memory + # 4. Verify the second region is all zero. + + # step 1 +CBOZTest_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 64 + jal ra, memcpy8 + + # step 2 +CBOZTest_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 64 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + # step 3 +CBOZTest_zero_step3: + la a1, Destination1 + cbo.zero (a1) + la a1, Destination1+64 + cbo.zero (a1) + la a1, Destination1+128 + cbo.zero (a1) + la a1, Destination1+192 + cbo.zero (a1) + la a1, Destination1+256 + cbo.zero (a1) + la a1, Destination1+320 + cbo.zero (a1) + la a1, Destination1+384 + cbo.zero (a1) + la a1, Destination1+448 + cbo.zero (a1) + +CBOZTest_zero_step4: + # step 4 (should be zero) + la a0, ZeroData + la a1, Destination1 + li a2, 64 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + ################################################################################ + # Verify cbo.zero miss overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Then verify the second region has the same data + # 3. Flush that one line + # 4. Zero that one line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_miss_zero_step1: + la a0, SourceData + la a1, Destination1 + li a2, 8 + jal ra, memcpy8 + + # step 2 +CBOZTest_miss_zero_step2: + la a0, SourceData + la a1, Destination1 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + # step 3 +CBOZTest_miss_zero_step3: + la a1, Destination1 + cbo.flush (a1) + cbo.zero (a1) + +CBOZTest_miss_zero_step4: + # step 4 (should be Invalid) + la a0, ZeroData + la a1, Destination1 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + ################################################################################ + # Verify cbo.zero miss with eviction overwrites + ################################################################################ + + # theory of operation + # 1. Read 1 cacheline of data from memory into the d cache and copy to a second region of memory + # 2. Repeate 1 four times at 4KiB intervals + # 2. Then verify the second region has the same data + # 4. Zero each line + # 5. Verify the second region is zero + + # step 1 +CBOZTest_eviction_zero_step1_0: + la a0, SourceData + la a1, Destination2 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 8 + jal ra, memcpy8 + +CBOZTest_eviction_zero_step2_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 8 + jal ra, memcpy8 + + # step 3 +CBOZTest_eviction_zero_step3_0: + la a0, SourceData + la a1, Destination2 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_4096: + la a0, SourceData+8 + la a1, Destination2+4096 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_8192: + la a0, SourceData+16 + la a1, Destination2+8192 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_12288: + la a0, SourceData+24 + la a1, Destination2+12288 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step3_16384: + la a0, SourceData+32 + la a1, Destination2+16384 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + # step 4 +CBOZTest_eviction_zero_step4: + la a1, Destination2 + cbo.zero (a1) + la a1, Destination2+4096 + cbo.zero (a1) + la a1, Destination2+8192 + cbo.zero (a1) + la a1, Destination2+12288 + cbo.zero (a1) + la a1, Destination2+16384 + cbo.zero (a1) + +CBOZTest_eviction_zero_step5_0: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_4096: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+4096 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_8192: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+8192 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_12288: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+12288 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + +CBOZTest_eviction_zero_step5_16384: + # step 5 (should be zero) + la a0, ZeroData + la a1, Destination2+16384 + li a2, 8 + jal ra, memcmp8 + sd a0, 0(s0) # should be -1 + addi s0, s0, 8 + + + #ld s0, 0(sp) + #ld ra, 8(sp) + #addi sp, sp, 16 + #ret +RVMODEL_HALT + + +.type memcpy8, @function +memcpy8: + # a0 is the source + # a1 is the dst + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcpy8_loop: + ld t3, 0(t0) + sd t3, 0(t1) + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcpy8_loop + ret + +.type memcmp8, @function +# returns which index mismatch, -1 if none +memcmp8: + # a0 is the source1 + # a1 is the source2 + # a2 is the number of 8 byte words + mv t0, a0 + mv t1, a1 + li t2, 0 +memcmp8_loop: + ld t3, 0(t0) + ld t4, 0(t1) + bne t3, t4, memcmp8_ne + addi t0, t0, 8 + addi t1, t1, 8 + addi t2, t2, 1 + blt t2, a2, memcmp8_loop + li a0, -1 + ret +memcmp8_ne: + mv a0, t2 + ret + +RVTEST_CODE_END + + +RVTEST_DATA_BEGIN +# Input data section. +#.data +.align 7 + +ZeroData: + .fill 128, 4, 0x0 +SourceData: + .int 0, 1, 2, 3, 4, 5, 6, 7 + .int 8, 9, 10, 11, 12, 13, 14, 15 + .int 16, 17, 18, 19, 20, 21, 22, 23 + .int 24, 25, 26, 27, 28, 29, 30, 31 + .int 32, 33, 34, 35, 36, 37, 38, 39 + .int 40, 41, 42, 43, 44, 45, 46, 47 + .int 48, 49, 50, 51, 52, 53, 54, 55 + .int 56, 57, 58, 59, 60, 61, 62, 63 + .int 64, 65, 66, 67, 68, 69, 70, 71 + .int 72, 73, 74, 75, 76, 77, 78, 79 + .int 80, 81, 82, 83, 84, 85, 86, 87 + .int 88, 89, 90, 91, 92, 93, 94, 95 + .int 96, 97, 98, 99, 100, 101, 102, 103 + .int 104, 105, 106, 107, 108, 109, 110, 111 + .int 112, 113, 114, 115, 116, 117, 118, 119 + .int 120, 121, 122, 123, 124, 125, 126, 127 + +RVTEST_DATA_END + +RVMODEL_DATA_BEGIN + .fill 28, 4, 0xdeadbeef # this is annoying, but RVMODEL_DATA_END and BEGIN insert + # 4 bytes. This needs to be aligned to a cacheline + + .align 6 +Destination1: + .fill 128, 4, 0xdeadbeef +Destination2: + .fill 16, 4, 0xdeadbeef +signature: + .fill 32, 4, 0x0bad0bad + +RVMODEL_DATA_END +