diff --git a/fpga/constraints/constraints.xdc b/fpga/constraints/constraints.xdc index 6b0a0363..b21b1cc5 100644 --- a/fpga/constraints/constraints.xdc +++ b/fpga/constraints/constraints.xdc @@ -35,12 +35,14 @@ set_output_delay -clock [get_clocks mmcm_clkout1] -max -add_delay 0.000 [get_por ##### UART ##### -set_property PACKAGE_PIN AW25 [get_ports UARTSin] -set_property PACKAGE_PIN BB21 [get_ports UARTSout] +#set_property PACKAGE_PIN AW25 [get_ports UARTSin] +set_property PACKAGE_PIN R29 [get_ports UARTSin] +#set_property PACKAGE_PIN BB21 [get_ports UARTSout] +set_property PACKAGE_PIN M31 [get_ports UARTSout] set_max_delay -from [get_ports UARTSin] 10.000 set_max_delay -to [get_ports UARTSout] 10.000 -set_property IOSTANDARD LVCMOS18 [get_ports UARTSin] -set_property IOSTANDARD LVCMOS18 [get_ports UARTSout] +set_property IOSTANDARD LVCMOS12 [get_ports UARTSin] +set_property IOSTANDARD LVCMOS12 [get_ports UARTSout] set_property DRIVE 6 [get_ports UARTSout] set_input_delay -clock [get_clocks mmcm_clkout1] -min -add_delay 2.000 [get_ports UARTSin] set_input_delay -clock [get_clocks mmcm_clkout1] -max -add_delay 2.000 [get_ports UARTSin] diff --git a/pipelined/config/rv32i/wally-config.vh b/pipelined/config/rv32i/wally-config.vh index a0ea4607..03a58d1a 100644 --- a/pipelined/config/rv32i/wally-config.vh +++ b/pipelined/config/rv32i/wally-config.vh @@ -38,11 +38,11 @@ `define IEEE754 0 // I -`define MISA (32'h00000100 | 1 << 20 | 1 << 18 ) +`define MISA (32'h00000104) `define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 1 +`define ZIFENCEI_SUPPORTED 0 `define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 1 +`define ZICOUNTERS_SUPPORTED 0 `define ZFH_SUPPORTED 0 // Microarchitectural Features @@ -50,11 +50,11 @@ `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 // LSU microarchitectural Features -`define BUS 1 -`define DCACHE 1 -`define ICACHE 1 -`define VIRTMEM_SUPPORTED 1 -`define VECTORED_INTERRUPTS_SUPPORTED 1 +`define BUS 0 +`define DCACHE 0 +`define ICACHE 0 +`define VIRTMEM_SUPPORTED 0 +`define VECTORED_INTERRUPTS_SUPPORTED 1 `define BIGENDIAN_SUPPORTED 0 // TLB configuration. Entries should be a power of 2 @@ -86,31 +86,31 @@ // Peripheral Addresses // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits -`define DTIM_SUPPORTED 1'b0 +`define DTIM_SUPPORTED 1'b1 `define DTIM_BASE 34'h80000000 -`define DTIM_RANGE 34'h00001FFF -`define IROM_SUPPORTED 1'b0 +`define DTIM_RANGE 34'h000007FF +`define IROM_SUPPORTED 1'b1 `define IROM_BASE 34'h80000000 -`define IROM_RANGE 34'h00001FFF -`define BOOTROM_SUPPORTED 1'b1 +`define IROM_RANGE 34'h000007FF +`define BOOTROM_SUPPORTED 1'b0 `define BOOTROM_BASE 34'h00001000 `define BOOTROM_RANGE 34'h00000FFF -`define UNCORE_RAM_SUPPORTED 1'b1 +`define UNCORE_RAM_SUPPORTED 1'b0 `define UNCORE_RAM_BASE 34'h80000000 `define UNCORE_RAM_RANGE 34'h07FFFFFF `define EXT_MEM_SUPPORTED 1'b0 `define EXT_MEM_BASE 34'h80000000 `define EXT_MEM_RANGE 34'h07FFFFFF -`define CLINT_SUPPORTED 1'b1 +`define CLINT_SUPPORTED 1'b0 `define CLINT_BASE 34'h02000000 `define CLINT_RANGE 34'h0000FFFF -`define GPIO_SUPPORTED 1'b1 +`define GPIO_SUPPORTED 1'b0 `define GPIO_BASE 34'h10060000 `define GPIO_RANGE 34'h000000FF -`define UART_SUPPORTED 1'b1 +`define UART_SUPPORTED 1'b0 `define UART_BASE 34'h10000000 `define UART_RANGE 34'h00000007 -`define PLIC_SUPPORTED 1'b1 +`define PLIC_SUPPORTED 1'b0 `define PLIC_BASE 34'h0C000000 `define PLIC_RANGE 34'h03FFFFFF `define SDC_SUPPORTED 1'b0 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index d865623d..75242578 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -37,11 +37,11 @@ // IEEE 754 compliance `define IEEE754 0 -`define MISA (32'h00000104) +`define MISA (32'h00000104 | 1 << 20 | 1 << 18 ) `define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 0 +`define ZIFENCEI_SUPPORTED 1 `define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 0 +`define ZICOUNTERS_SUPPORTED 1 `define ZFH_SUPPORTED 0 // Microarchitectural Features @@ -49,7 +49,7 @@ `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 // LSU microarchitectural Features -`define BUS 0 +`define BUS 1 `define DCACHE 0 `define ICACHE 0 `define VIRTMEM_SUPPORTED 0 @@ -87,10 +87,10 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits `define DTIM_SUPPORTED 1'b1 `define DTIM_BASE 34'h80000000 -`define DTIM_RANGE 34'h007FFFFF +`define DTIM_RANGE 34'h00000FFF `define IROM_SUPPORTED 1'b1 `define IROM_BASE 34'h80000000 -`define IROM_RANGE 34'h007FFFFF +`define IROM_RANGE 34'h00003FFF `define BOOTROM_SUPPORTED 1'b0 `define BOOTROM_BASE 34'h00001000 `define BOOTROM_RANGE 34'h00000FFF @@ -103,13 +103,13 @@ `define CLINT_SUPPORTED 1'b1 `define CLINT_BASE 34'h02000000 `define CLINT_RANGE 34'h0000FFFF -`define GPIO_SUPPORTED 1'b0 +`define GPIO_SUPPORTED 1'b1 `define GPIO_BASE 34'h10060000 `define GPIO_RANGE 34'h000000FF -`define UART_SUPPORTED 1'b0 +`define UART_SUPPORTED 1'b1 `define UART_BASE 34'h10000000 `define UART_RANGE 34'h00000007 -`define PLIC_SUPPORTED 1'b0 +`define PLIC_SUPPORTED 1'b1 `define PLIC_BASE 34'h0C000000 `define PLIC_RANGE 34'h03FFFFFF `define SDC_SUPPORTED 1'b0 diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh deleted file mode 100644 index b708bbb5..00000000 --- a/pipelined/config/rv64fp/wally-config.vh +++ /dev/null @@ -1,147 +0,0 @@ -////////////////////////////////////////// -// wally-config.vh -// -// Written: David_Harris@hmc.edu 4 January 2021 -// Modified: -// -// Purpose: Specify which features are configured -// Macros to determine which modes are supported based on MISA -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -// include shared configuration -`include "wally-shared.vh" - -`define FPGA 0 -`define QEMU 0 -`define DESIGN_COMPILER 0 - -// RV32 or RV64: XLEN = 32 or 64 -`define XLEN 32 - -// IEEE 754 compliance -`define IEEE754 0 - -// MISA RISC-V configuration per specification -// ZYXWVUTSRQPONMLKJIHGFEDCBA -`define MISA 32'b0000000000101000001000100101101 -`define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 1 -`define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 1 -`define ZFH_SUPPORTED 0 - -/// Microarchitectural Features -`define UARCH_PIPELINED 1 -`define UARCH_SUPERSCALR 0 -`define UARCH_SINGLECYCLE 0 - -// LSU microarchitectural Features -`define BUS 1 -`define DCACHE 1 -`define ICACHE 1 -`define VIRTMEM_SUPPORTED 1 -`define VECTORED_INTERRUPTS_SUPPORTED 1 -`define BIGENDIAN_SUPPORTED 1 - -// TLB configuration. Entries should be a power of 2 -`define ITLB_ENTRIES 32 -`define DTLB_ENTRIES 32 - -// Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines -`define DCACHE_NUMWAYS 4 -`define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_LINELENINBITS 512 -`define ICACHE_NUMWAYS 4 -`define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_LINELENINBITS 512 - -// Integer Divider Configuration -// DIV_BITSPERCYCLE must be 1, 2, or 4 -`define DIV_BITSPERCYCLE 4 - -// Legal number of PMP entries are 0, 16, or 64 -`define PMP_ENTRIES 64 - -// Address space -`define RESET_VECTOR 64'h0000000080000000 - -// Bus Interface width -`define AHBW 64 - -// WFI Timeout Wait -`define WFI_TIMEOUT_BIT 16 - -// Peripheral Physiccal Addresses -// Peripheral memory space extends from BASE to BASE+RANGE -// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits - -// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? -`define DTIM_SUPPORTED 1'b0 -`define DTIM_BASE 56'h80000000 -`define DTIM_RANGE 56'h00001FFF -`define IROM_SUPPORTED 1'b0 -`define IROM_BASE 56'h80000000 -`define IROM_RANGE 56'h00001FFF -`define BOOTROM_SUPPORTED 1'b1 -`define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTROM_RANGE 56'h00000FFF -`define UNCORE_RAM_SUPPORTED 1'b1 -`define UNCORE_RAM_BASE 56'h80000000 -`define UNCORE_RAM_RANGE 56'h7FFFFFFF -`define EXT_MEM_SUPPORTED 1'b0 -`define EXT_MEM_BASE 56'h80000000 -`define EXT_MEM_RANGE 56'h07FFFFFF -`define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 56'h02000000 -`define CLINT_RANGE 56'h0000FFFF -`define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 56'h10060000 -`define GPIO_RANGE 56'h000000FF -`define UART_SUPPORTED 1'b1 -`define UART_BASE 56'h10000000 -`define UART_RANGE 56'h00000007 -`define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 56'h0C000000 -`define PLIC_RANGE 56'h03FFFFFF -`define SDC_SUPPORTED 1'b0 -`define SDC_BASE 56'h00012100 -`define SDC_RANGE 56'h0000001F - -// Test modes - -// Tie GPIO outputs back to inputs -`define GPIO_LOOPBACK_TEST 1 - -// Hardware configuration -`define UART_PRESCALE 1 - -// Interrupt configuration -`define PLIC_NUM_SRC 10 -// comment out the following if >=32 sources -`define PLIC_NUM_SRC_LT_32 -`define PLIC_GPIO_ID 3 -`define PLIC_UART_ID 10 - -`define BPRED_ENABLED 1 -`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE -`define TESTSBP 0 -`define BPRED_SIZE 10 - -`define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/rv64i/wally-config.vh b/pipelined/config/rv64i/wally-config.vh index 05af9011..b3b547cc 100644 --- a/pipelined/config/rv64i/wally-config.vh +++ b/pipelined/config/rv64i/wally-config.vh @@ -37,30 +37,30 @@ // IEEE 754 compliance `define IEEE754 0 -// MISA RISC-V configuration per specification I -`define MISA (32'h00000100 | 1 << 20 | 1 << 18 ) -`define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 1 +// MISA RISC-V configuration per specification +`define MISA (32'h00000104) +`define ZICSR_SUPPORTED 0 +`define ZIFENCEI_SUPPORTED 0 `define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 1 +`define ZICOUNTERS_SUPPORTED 0 `define ZFH_SUPPORTED 0 -/// Microarchitectural Features +// Microarchitectural Features `define UARCH_PIPELINED 1 `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 // LSU microarchitectural Features -`define BUS 1 -`define DCACHE 1 -`define ICACHE 1 -`define VIRTMEM_SUPPORTED 1 +`define BUS 0 +`define DCACHE 0 +`define ICACHE 0 +`define VIRTMEM_SUPPORTED 0 `define VECTORED_INTERRUPTS_SUPPORTED 1 `define BIGENDIAN_SUPPORTED 0 // TLB configuration. Entries should be a power of 2 -`define ITLB_ENTRIES 32 -`define DTLB_ENTRIES 32 +`define ITLB_ENTRIES 0 +`define DTLB_ENTRIES 0 // Cache configuration. Sizes should be a power of two // typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines @@ -76,13 +76,13 @@ `define DIV_BITSPERCYCLE 4 // Legal number of PMP entries are 0, 16, or 64 -`define PMP_ENTRIES 64 +`define PMP_ENTRIES 0 // Address space `define RESET_VECTOR 64'h0000000080000000 // Bus Interface width -`define AHBW 64 +`define AHBW (`XLEN) // WFI Timeout Wait `define WFI_TIMEOUT_BIT 16 @@ -92,31 +92,31 @@ // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits // *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? -`define DTIM_SUPPORTED 1'b0 +`define DTIM_SUPPORTED 1'b1 `define DTIM_BASE 56'h80000000 -`define DTIM_RANGE 56'h00001FFF -`define IROM_SUPPORTED 1'b0 +`define DTIM_RANGE 56'h000007FF +`define IROM_SUPPORTED 1'b1 `define IROM_BASE 56'h80000000 -`define IROM_RANGE 56'h00001FFF -`define BOOTROM_SUPPORTED 1'b1 +`define IROM_RANGE 56'h000007FF +`define BOOTROM_SUPPORTED 1'b0 `define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder `define BOOTROM_RANGE 56'h00000FFF -`define UNCORE_RAM_SUPPORTED 1'b1 +`define UNCORE_RAM_SUPPORTED 1'b0 `define UNCORE_RAM_BASE 56'h80000000 `define UNCORE_RAM_RANGE 56'h7FFFFFFF `define EXT_MEM_SUPPORTED 1'b0 `define EXT_MEM_BASE 56'h80000000 `define EXT_MEM_RANGE 56'h07FFFFFF -`define CLINT_SUPPORTED 1'b1 +`define CLINT_SUPPORTED 1'b0 `define CLINT_BASE 56'h02000000 `define CLINT_RANGE 56'h0000FFFF -`define GPIO_SUPPORTED 1'b1 +`define GPIO_SUPPORTED 1'b0 `define GPIO_BASE 56'h10060000 `define GPIO_RANGE 56'h000000FF -`define UART_SUPPORTED 1'b1 +`define UART_SUPPORTED 1'b0 `define UART_BASE 56'h10000000 `define UART_RANGE 56'h00000007 -`define PLIC_SUPPORTED 1'b1 +`define PLIC_SUPPORTED 1'b0 `define PLIC_BASE 56'h0C000000 `define PLIC_RANGE 56'h03FFFFFF `define SDC_SUPPORTED 1'b0 @@ -138,7 +138,7 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define BPRED_ENABLED 1 +`define BPRED_ENABLED 0 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 `define BPRED_SIZE 10 diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh deleted file mode 100644 index e820e57c..00000000 --- a/pipelined/config/rv64ic/wally-config.vh +++ /dev/null @@ -1,146 +0,0 @@ -////////////////////////////////////////// -// wally-config.vh -// -// Written: David_Harris@hmc.edu 4 January 2021 -// Modified: -// -// Purpose: Specify which features are configured -// Macros to determine which modes are supported based on MISA -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -// include shared configuration -`include "wally-shared.vh" - -`define FPGA 0 -`define QEMU 0 -`define DESIGN_COMPILER 0 - -// RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 - -// IEEE 754 compliance -`define IEEE754 0 - -// MISA RISC-V configuration per specification -`define MISA (32'h00000104) -`define ZICSR_SUPPORTED 1 -`define ZIFENCEI_SUPPORTED 0 -`define COUNTERS 32 -`define ZICOUNTERS_SUPPORTED 0 -`define ZFH_SUPPORTED 0 - -// Microarchitectural Features -`define UARCH_PIPELINED 1 -`define UARCH_SUPERSCALR 0 -`define UARCH_SINGLECYCLE 0 - -// LSU microarchitectural Features -`define BUS 0 -`define DCACHE 0 -`define ICACHE 0 -`define VIRTMEM_SUPPORTED 0 -`define VECTORED_INTERRUPTS_SUPPORTED 1 -`define BIGENDIAN_SUPPORTED 0 - -// TLB configuration. Entries should be a power of 2 -`define ITLB_ENTRIES 0 -`define DTLB_ENTRIES 0 - -// Cache configuration. Sizes should be a power of two -// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines -`define DCACHE_NUMWAYS 4 -`define DCACHE_WAYSIZEINBYTES 4096 -`define DCACHE_LINELENINBITS 512 -`define ICACHE_NUMWAYS 4 -`define ICACHE_WAYSIZEINBYTES 4096 -`define ICACHE_LINELENINBITS 512 - -// Integer Divider Configuration -// DIV_BITSPERCYCLE must be 1, 2, or 4 -`define DIV_BITSPERCYCLE 4 - -// Legal number of PMP entries are 0, 16, or 64 -`define PMP_ENTRIES 0 - -// Address space -`define RESET_VECTOR 64'h0000000080000000 - -// Bus Interface width -`define AHBW 64 - -// WFI Timeout Wait -`define WFI_TIMEOUT_BIT 16 - -// Peripheral Physiccal Addresses -// Peripheral memory space extends from BASE to BASE+RANGE -// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits - -// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file? -`define DTIM_SUPPORTED 1'b1 -`define DTIM_BASE 56'h80000000 -`define DTIM_RANGE 56'h007FFFFF -`define IROM_SUPPORTED 1'b1 -`define IROM_BASE 56'h80000000 -`define IROM_RANGE 56'h007FFFFF -`define BOOTROM_SUPPORTED 1'b0 -`define BOOTROM_BASE 56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder -`define BOOTROM_RANGE 56'h00000FFF -`define UNCORE_RAM_SUPPORTED 1'b0 -`define UNCORE_RAM_BASE 56'h80000000 -`define UNCORE_RAM_RANGE 56'h7FFFFFFF -`define EXT_MEM_SUPPORTED 1'b0 -`define EXT_MEM_BASE 56'h80000000 -`define EXT_MEM_RANGE 56'h07FFFFFF -`define CLINT_SUPPORTED 1'b1 -`define CLINT_BASE 56'h02000000 -`define CLINT_RANGE 56'h0000FFFF -`define GPIO_SUPPORTED 1'b1 -`define GPIO_BASE 56'h10060000 -`define GPIO_RANGE 56'h000000FF -`define UART_SUPPORTED 1'b1 -`define UART_BASE 56'h10000000 -`define UART_RANGE 56'h00000007 -`define PLIC_SUPPORTED 1'b1 -`define PLIC_BASE 56'h0C000000 -`define PLIC_RANGE 56'h03FFFFFF -`define SDC_SUPPORTED 1'b0 -`define SDC_BASE 56'h00012100 -`define SDC_RANGE 56'h0000001F - -// Test modes - -// Tie GPIO outputs back to inputs -`define GPIO_LOOPBACK_TEST 1 - -// Hardware configuration -`define UART_PRESCALE 1 - -// Interrupt configuration -`define PLIC_NUM_SRC 10 -// comment out the following if >=32 sources -`define PLIC_NUM_SRC_LT_32 -`define PLIC_GPIO_ID 3 -`define PLIC_UART_ID 10 - -`define BPRED_ENABLED 1 -`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE -`define TESTSBP 0 -`define BPRED_SIZE 10 - -`define HPTW_WRITES_SUPPORTED 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index cd5bb05e..b4fc2cea 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -73,11 +73,18 @@ `define H_FMT 2'd2 // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits +`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `S_LEN) +`define NE (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `S_NE) +`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `S_NF) +`define FMT (`Q_SUPPORTED ? 2'd3 : `D_SUPPORTED ? 2'd1 : 2'd0) +`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `S_BIAS) +/* Delete once tested dh 10/10/22 + `define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN) `define NE (`Q_SUPPORTED ? `Q_NE : `D_SUPPORTED ? `D_NE : `F_SUPPORTED ? `S_NE : `H_NE) -`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `F_SUPPORTED ? `S_NF : `H_NF) +`define NF (`Q_SUPPORTED ? `Q_NF : `D_SUPPORTED ? `D_NF : `F_SUPPORTED ? `S_NF : `H_NF) `define FMT (`Q_SUPPORTED ? 2'd3 : `D_SUPPORTED ? 2'd1 : `F_SUPPORTED ? 2'd0 : 2'd2) -`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS) +`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS)*/ // Floating point constants needed for FPU paramerterization `define FPSIZES ((32)'(`Q_SUPPORTED)+(32)'(`D_SUPPORTED)+(32)'(`F_SUPPORTED)+(32)'(`ZFH_SUPPORTED)) diff --git a/pipelined/regression/lint-wally b/pipelined/regression/lint-wally index 750486c4..705fbd61 100755 --- a/pipelined/regression/lint-wally +++ b/pipelined/regression/lint-wally @@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/ verilator=`which verilator` basepath=$(dirname $0)/.. -for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do +for config in rv32e rv64gc rv32gc rv32ic rv32i rv64i rv64fpquad; do echo "$config linting..." if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then echo "Exiting after $config lint due to errors or warnings" diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index 48dd7c26..5318a0f7 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -73,6 +73,15 @@ for test in tests64gc: grepstr="All tests ran without failures") configs.append(tc) +tests64i = ["arch64i"] +for test in tests64i: + tc = TestCase( + name=test, + variant="rv64i", + cmd="vsim > {} -c < 1) begin:vict cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy( - .clk, .reset, .HitWay, .VictimWay, .RAdr, .LRUWriteEn); + .clk, .reset, .ce(SRAMEnable), .HitWay, .VictimWay, .RAdr, .LRUWriteEn); end else assign VictimWay = 1'b1; // one hot. assign CacheHit = | HitWay; assign VictimDirty = | VictimDirtyWay; diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 58ae7afb..94bf36a6 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -133,17 +133,17 @@ module cachefsm else if(DoFlush) NextState = STATE_FLUSH; // Delayed LRU update. Cannot check if victim line is dirty on this cycle. // To optimize do the fetch first, then eviction if necessary. - else if(DoAnyMiss) NextState = STATE_MISS_FETCH_WDV; + else if(DoAnyMiss & ~VictimDirty) NextState = STATE_MISS_FETCH_WDV; + else if(DoAnyMiss & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; else NextState = STATE_READY; - STATE_MISS_FETCH_WDV: if(CacheBusAck & ~VictimDirty) NextState = STATE_MISS_WRITE_CACHE_LINE; - else if(CacheBusAck & VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; + STATE_MISS_FETCH_WDV: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; else NextState = STATE_MISS_FETCH_WDV; //STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_READY; - STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_MISS_READ_DELAY; + STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_MISS_READ_DELAY; //else NextState = STATE_READY; STATE_MISS_READ_DELAY: if(CPUBusy) NextState = STATE_MISS_READ_DELAY; else NextState = STATE_READY; - STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; + STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_FETCH_WDV; else NextState = STATE_MISS_EVICT_DIRTY; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. STATE_FLUSH: NextState = STATE_FLUSH_CHECK; @@ -174,15 +174,15 @@ module cachefsm // write enables internal to cache assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign SetDirty = (CurrState == STATE_READY & DoAnyUpdateHit) | - (CurrState == STATE_MISS_WRITE_CACHE_LINE & (AMO | CacheRW[0])); + (CurrState == STATE_MISS_WRITE_CACHE_LINE & (AMO | CacheRW[0])); assign ClearValid = '0; assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE & ~(AMO | CacheRW[0])) | (CurrState == STATE_FLUSH_WRITE_BACK & CacheBusAck); assign LRUWriteEn = (CurrState == STATE_READY & DoAnyHit) | (CurrState == STATE_MISS_WRITE_CACHE_LINE); // Flush and eviction controls - assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY) | - (CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty); + assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | + (CurrState == STATE_READY & DoAnyMiss & VictimDirty); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK); assign FlushWayAndNotAdrFlag = FlushWayFlag & ~FlushAdrFlag; @@ -193,9 +193,11 @@ module cachefsm assign FlushAdrCntRst = (CurrState == STATE_READY); assign FlushWayCntRst = (CurrState == STATE_READY) | (CurrState == STATE_FLUSH_INCR); // Bus interface controls - assign CacheBusRW[1] = (CurrState == STATE_READY & DoAnyMiss) | (CurrState == STATE_MISS_FETCH_WDV & ~CacheBusAck); + assign CacheBusRW[1] = (CurrState == STATE_READY & DoAnyMiss & ~VictimDirty) | + (CurrState == STATE_MISS_FETCH_WDV & ~CacheBusAck) | + (CurrState == STATE_MISS_EVICT_DIRTY & CacheBusAck); // assign CacheBusRW[1] = CurrState == STATE_READY & DoAnyMiss; - assign CacheBusRW[0] = (CurrState == STATE_MISS_FETCH_WDV & CacheBusAck & VictimDirty) | + assign CacheBusRW[0] = (CurrState == STATE_READY & DoAnyMiss & VictimDirty) | (CurrState == STATE_MISS_EVICT_DIRTY & ~CacheBusAck) | (CurrState == STATE_FLUSH_WRITE_BACK & ~CacheBusAck) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); diff --git a/pipelined/src/cache/cachereplacementpolicy.sv b/pipelined/src/cache/cachereplacementpolicy.sv index d407bc28..a791e9fc 100644 --- a/pipelined/src/cache/cachereplacementpolicy.sv +++ b/pipelined/src/cache/cachereplacementpolicy.sv @@ -31,7 +31,7 @@ module cachereplacementpolicy #(parameter NUMWAYS = 4, SETLEN = 9, OFFSETLEN = 5, NUMLINES = 128)( - input logic clk, reset, + input logic clk, reset, ce, input logic [NUMWAYS-1:0] HitWay, output logic [NUMWAYS-1:0] VictimWay, input logic [SETLEN-1:0] RAdr, @@ -51,17 +51,19 @@ module cachereplacementpolicy assert (NUMWAYS == 2 || NUMWAYS == 4) else $error("Only 2 or 4 ways supported"); end - // Pipeline Delay Registers - flopr #(SETLEN) RAdrDelayReg(clk, reset, RAdr, RAdrD); - flopr #(1) LRUWriteEnDelayReg(clk, reset, LRUWriteEn, LRUWriteEnD); - flopr #(NUMWAYS-1) NewReplacementDelayReg(clk, reset, NewReplacement, NewReplacementD); - // Replacement Bits: Register file // Needs to be resettable for simulation, but could omit reset for synthesis *** - always_ff @(posedge clk) + always_ff @(posedge clk) begin if (reset) for (int set = 0; set < NUMLINES; set++) ReplacementBits[set] <= '0; - else if (LRUWriteEnD) ReplacementBits[RAdrD] <= NewReplacementD; - assign LineReplacementBits = ReplacementBits[RAdrD]; + if(ce) begin + if (LRUWriteEn) begin + ReplacementBits[RAdr] <= NewReplacement; + LineReplacementBits <= #1 NewReplacement; + end else begin + LineReplacementBits <= #1 ReplacementBits[RAdr]; + end + end + end genvar index; if(NUMWAYS == 2) begin : PseudoLRU diff --git a/pipelined/src/ebu/ebu.sv b/pipelined/src/ebu/ebu.sv index c4cb882d..d4c99242 100644 --- a/pipelined/src/ebu/ebu.sv +++ b/pipelined/src/ebu/ebu.sv @@ -94,7 +94,7 @@ module ebu logic BeatCntEn; logic [4-1:0] NextBeatCount, BeatCount, BeatCountDelayed; - logic FinalBeat; + logic FinalBeat, FinalBeatD; logic [2:0] LocalBurstType; logic CntReset; logic [3:0] Threshold; @@ -145,7 +145,7 @@ module ebu case (CurrState) IDLE: if (both) NextState = ARBITRATE; else NextState = IDLE; - ARBITRATE: if (HREADY & FinalBeat & ~(LSUReq & IFUReq)) NextState = IDLE; + ARBITRATE: if (HREADY & FinalBeatD & ~(LSUReq & IFUReq)) NextState = IDLE; else NextState = ARBITRATE; default: NextState = IDLE; endcase @@ -158,27 +158,25 @@ module ebu .en(BeatCntEn), .d(NextBeatCount), .q(BeatCount)); - - // Used to store data from data phase of AHB. - flopenr #(4) - BeatCountDelayedReg(.clk(HCLK), - .reset(~HRESETn | CntReset), - .en(BeatCntEn), - .d(BeatCount), - .q(BeatCountDelayed)); assign NextBeatCount = BeatCount + 1'b1; assign CntReset = NextState == IDLE; - assign FinalBeat = (BeatCountDelayed == Threshold); // Detect when we are waiting on the final access. + assign FinalBeat = (BeatCount == Threshold); // Detect when we are waiting on the final access. assign BeatCntEn = (NextState == ARBITRATE & HREADY); logic [2:0] HBURSTD; - flopenr #(3) HBURSTReg(.clk(HCLK), .reset(~HRESETn), .en(HTRANS == 2'b10), .d(HBURST), .q(HBURSTD)); + // Used to store data from data phase of AHB. + flopenr #(1) + FinalBeatReg(.clk(HCLK), + .reset(~HRESETn | CntReset), + .en(BeatCntEn), + .d(FinalBeat), + .q(FinalBeatD)); // unlike the bus fsm in lsu/ifu, we need to derive the number of beats from HBURST. always_comb begin - case(HBURSTD) + case(HBURST) 0: Threshold = 4'b0000; 3: Threshold = 4'b0011; // INCR4 5: Threshold = 4'b0111; // INCR8 @@ -196,7 +194,7 @@ module ebu assign IFUDisable = CurrState == ARBITRATE; assign IFUSelect = (NextState == ARBITRATE) ? 1'b0 : IFUReq; // Controller 1 (LSU) - assign LSUDisable = CurrState == ARBITRATE ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeat)); + assign LSUDisable = CurrState == ARBITRATE ? 1'b0 : (IFUReqD & ~(HREADY & FinalBeatD)); assign LSUSelect = NextState == ARBITRATE ? 1'b1: LSUReq; flopr #(1) ifureqreg(clk, ~HRESETn, IFUReq, IFUReqD); diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 290ebf16..cde357bf 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -55,7 +55,6 @@ module fdivsqrt( // output logic [`XLEN-1:0] RemM, ); - logic [`DIVb+3:0] NextWSN, NextWCN; logic [`DIVb+3:0] WS, WC; logic [`DIVb+3:0] X; logic [`DIVN-2:0] D; // U0.N-1 @@ -77,7 +76,7 @@ module fdivsqrt( .XInfE, .YInfE, .WZero, .SpecialCaseM); fdivsqrtiter fdivsqrtiter( .clk, .Firstun, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .SqrtM, - .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, + .X,.Dpreproc, .FirstWS(WS), .FirstWC(WC), .DivStartE, .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, .DivBusy); fdivsqrtpostproc fdivsqrtpostproc(.WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .QmM, .WZero, .DivSM); diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv index d13d706f..5c067796 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -41,7 +41,6 @@ module fdivsqrtiter( input logic [`DIVb+3:0] X, input logic [`DIVN-2:0] Dpreproc, output logic [`DIVN-2:0] D, // U0.N-1 - output logic [`DIVb+3:0] NextWSN, NextWCN, output logic [`DIVb:0] FirstU, FirstUM, output logic [`DIVb+1:0] FirstC, output logic Firstun, @@ -56,12 +55,12 @@ module fdivsqrtiter( // U/UM should be 1.b so b+1 bits or b:0 // C needs to be the lenght of the final fraction 0.b so b or b-1:0 /* verilator lint_off UNOPTFLAT */ - logic [`DIVb+3:0] WSA[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WCA[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WS[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb+3:0] WC[`DIVCOPIES-1:0]; // Q4.b - logic [`DIVb:0] U[`DIVCOPIES-1:0]; // U1.b - logic [`DIVb:0] UM[`DIVCOPIES-1:0];// 1.b + logic [`DIVb+3:0] WSNext[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WCNext[`DIVCOPIES-1:0]; // Q4.b + logic [`DIVb+3:0] WS[`DIVCOPIES:0]; // Q4.b + logic [`DIVb+3:0] WC[`DIVCOPIES:0]; // Q4.b + logic [`DIVb:0] U[`DIVCOPIES:0]; // U1.b + logic [`DIVb:0] UM[`DIVCOPIES:0];// 1.b logic [`DIVb:0] UNext[`DIVCOPIES-1:0];// U1.b logic [`DIVb:0] UMNext[`DIVCOPIES-1:0];// U1.b logic [`DIVb+1:0] C[`DIVCOPIES:0]; // Q2.b @@ -79,31 +78,35 @@ module fdivsqrtiter( // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. - // Otherwise, the divisor is retained and the partial remainder - // is fed back for the next iteration. - // - when the start signal is asserted X and 0 are loaded into WS and WC - // - otherwise load WSA into the flipflop - // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) - // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - assign NextWSN = WSA[`DIVCOPIES-1] << `LOGR; - assign NextWCN = WCA[`DIVCOPIES-1] << `LOGR; - - // Initialize C to -1 for sqrt and -R for division - logic [1:0] initCSqrt, initCDiv2, initCDiv4, initCUpper; - assign initCSqrt = 2'b11; // -1 - assign initCDiv2 = 2'b10; // -2 - assign initCDiv4 = 2'b00; // -4 - assign initCUpper = SqrtE ? initCSqrt : (`RADIX == 4) ? initCDiv4 : initCDiv2; - assign initC = {initCUpper, {`DIVb{1'b0}}}; - - mux2 #(`DIVb+4) wsmux(NextWSN, X, DivStartE, WSN); + // Otherwise, the divisor is retained and the residual and result + // are fed back for the next iteration. + + // Residual WS/SC registers/initializaiton mux + mux2 #(`DIVb+4) wsmux(WS[`DIVCOPIES], X, DivStartE, WSN); + mux2 #(`DIVb+4) wcmux(WC[`DIVCOPIES], '0, DivStartE, WCN); flopen #(`DIVb+4) wsflop(clk, DivStartE|DivBusy, WSN, WS[0]); - mux2 #(`DIVb+4) wcmux(NextWCN, '0, DivStartE, WCN); flopen #(`DIVb+4) wcflop(clk, DivStartE|DivBusy, WCN, WC[0]); - flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D); + + // UOTFC Result U and UM registers/initialization mux + // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division + assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0; + assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; + mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux); + mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux); + flopen #(`DIVb+1) UReg(clk, DivStartE|DivBusy, UMux, U[0]); + flopen #(`DIVb+1) UMReg(clk, DivStartE|DivBusy, UMMux, UM[0]); + + // C register/initialization mux + // Initialize C to -1 for sqrt and -R for division + logic [1:0] initCUpper; + assign initCUpper = SqrtE ? 2'b11 : (`RADIX == 4) ? 2'b00 : 2'b10; + assign initC = {initCUpper, {`DIVb{1'b0}}}; mux2 #(`DIVb+2) Cmux(C[`DIVCOPIES], initC, DivStartE, CMux); flopen #(`DIVb+2) cflop(clk, DivStartE|DivBusy, CMux, C[0]); + // Divisior register + flopen #(`DIVN-1) dflop(clk, DivStartE, Dpreproc, D); + // Divisor Selections // - choose the negitive version of what's being selected // - D is only the fraction @@ -113,37 +116,29 @@ module fdivsqrtiter( assign D2 = {2'b0, 1'b1, D, {`DIVb+2-`DIVN{1'b0}}}; end + // k=DIVCOPIES of the recurrence logic genvar i; generate for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations if (`RADIX == 2) begin: stage fdivsqrtstage2 fdivsqrtstage(.D, .DBar, .SqrtM, - .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), + .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end else begin: stage logic j1; assign j1 = (i == 0 & ~C[0][`DIVb-1]); fdivsqrtstage4 fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtM, .j1, - .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), + .WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]), .C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i])); end - if(i<(`DIVCOPIES-1)) begin - assign WS[i+1] = WSA[i] << `LOGR; - assign WC[i+1] = WCA[i] << `LOGR; - assign U[i+1] = UNext[i]; - assign UM[i+1] = UMNext[i]; - end + assign WS[i+1] = WSNext[i]; + assign WC[i+1] = WCNext[i]; + assign U[i+1] = UNext[i]; + assign UM[i+1] = UMNext[i]; end endgenerate - // Initialize U to 1.0 and UM to 0 for square root; U to 0 and UM to -1 for division - assign initU = SqrtE ? {1'b1, {(`DIVb){1'b0}}} : 0; - assign initUM = SqrtE ? 0 : {1'b1, {(`DIVb){1'b0}}}; - mux2 #(`DIVb+1) Umux(UNext[`DIVCOPIES-1], initU, DivStartE, UMux); - mux2 #(`DIVb+1) UMmux(UMNext[`DIVCOPIES-1], initUM, DivStartE, UMMux); - flopen #(`DIVb+1) UReg(clk, DivStartE|DivBusy, UMux, U[0]); - flopen #(`DIVb+1) UMReg(clk, DivStartE|DivBusy, UMMux, UM[0]); - + // Send values from start of cycle for postprocessing assign FirstWS = WS[0]; assign FirstWC = WC[0]; assign FirstU = U[0]; diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv index 4379724f..73f4e442 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4.sv @@ -31,19 +31,18 @@ `include "wally-config.vh" module fdivsqrtqsel4 ( - input logic [`DIVN-2:0] D, + input logic [2:0] Dmsbs, input logic [4:0] Smsbs, - input logic [`DIVb+3:0] WS, WC, + input logic [7:0] WSmsbs, WCmsbs, input logic Sqrt, j1, output logic [3:0] udigit ); logic [6:0] Wmsbs; logic [7:0] PreWmsbs; - logic [2:0] Dmsbs, A; + logic [2:0] A; - assign PreWmsbs = WC[`DIVb+3:`DIVb-4] + WS[`DIVb+3:`DIVb-4]; + assign PreWmsbs = WCmsbs + WSmsbs; assign Wmsbs = PreWmsbs[7:1]; - assign Dmsbs = D[`DIVN-2:`DIVN-4];//|{3{D[`DIVN-2]&Sqrt}}; // D = 0001.xxx... // Dmsbs = | | // W = xxxx.xxx... @@ -51,6 +50,7 @@ module fdivsqrtqsel4 ( logic [3:0] USel4[1023:0]; + // Prepopulate selection table; this is constant at compile time always_comb begin integer a, w, i, w2; for(a=0; a<8; a++) @@ -101,12 +101,15 @@ module fdivsqrtqsel4 ( endcase end end + + // Select A always_comb if (Sqrt) begin if (j1) A = 3'b101; else if (Smsbs == 5'b10000) A = 3'b111; else A = Smsbs[2:0]; end else A = Dmsbs; + + // Select quotient digit from lookup table based on A and W assign udigit = USel4[{A,Wmsbs}]; - endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv new file mode 100644 index 00000000..de4c22a1 --- /dev/null +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv @@ -0,0 +1,93 @@ +/////////////////////////////////////////// +// fdivsqrtqsel4cmp.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu +// Modified:13 January 2022 +// +// Purpose: Comparator-based Radix 4 Quotient Digit Selection +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fdivsqrtqsel4cmp ( + input logic [2:0] Dmsbs, + input logic [4:0] Smsbs, + input logic [7:0] WSmsbs, WCmsbs, + input logic Sqrt, j1, + output logic [3:0] udigit +); + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] A; + + assign PreWmsbs = WCmsbs + WSmsbs; + assign Wmsbs = PreWmsbs[7:1]; + // D = 0001.xxx... + // Dmsbs = | | + // W = xxxx.xxx... + // Wmsbs = | | + + logic [6:0] mk2, mk1, mk0, mkm1; + logic [6:0] mks2[7:0], mks1[7:0]; + + // Prepopulate table of mks0 + assign mks2[0] = 12; + assign mks2[1] = 14; + assign mks2[2] = 16; + assign mks2[3] = 17; + assign mks2[4] = 18; + assign mks2[5] = 20; + assign mks2[6] = 22; + assign mks2[7] = 23; + assign mks1[0] = 4; + assign mks1[1] = 4; + assign mks1[2] = 6; + assign mks1[3] = 6; + assign mks1[4] = 6; + assign mks1[5] = 8; // is the logic any cheaper if this is a 6? + assign mks1[6] = 8; + assign mks1[7] = 8; + + // Choose A for current operation + always_comb + if (Sqrt) begin + if (j1) A = 3'b101; + else if (Smsbs == 5'b10000) A = 3'b111; + else A = Smsbs[2:0]; + end else A = Dmsbs; + + // Choose selection constants based on a + assign mk2 = mks2[A]; + assign mk1 = mks1[A]; + assign mk0 = -mks1[A]; + assign mkm1 = (A == 3'b000) ? -13 : -mks2[A]; // asymmetry in table + + // Compare residual W to selection constants to choose digit + always_comb + if ($signed(Wmsbs) >= $signed(mk2)) udigit = 4'b1000; // choose 2 + else if ($signed(Wmsbs) >= $signed(mk1)) udigit = 4'b0100; // choose 1 + else if ($signed(Wmsbs) >= $signed(mk0)) udigit = 4'b0000; // choose 0 + else if ($signed(Wmsbs) >= $signed(mkm1)) udigit = 4'b0010; // choose -1 + else udigit = 4'b0001; // choose -2 +endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 987f2357..8ed1664a 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -41,7 +41,7 @@ module fdivsqrtstage2 ( output logic un, output logic [`DIVb+1:0] CNext, output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSA, WCA + output logic [`DIVb+3:0] WSNext, WCNext ); /* verilator lint_on UNOPTFLAT */ @@ -49,8 +49,7 @@ module fdivsqrtstage2 ( logic up, uz; logic [`DIVb+3:0] F; logic [`DIVb+3:0] AddIn; - - assign CNext = {1'b1, C[`DIVb+1:1]}; + logic [`DIVb+3:0] WSA, WCA; // Qmient Selection logic // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un) @@ -61,8 +60,11 @@ module fdivsqrtstage2 ( // 0010 = -1 // 0001 = -2 fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un); + + // Sqrt F generatin fdivsqrtfgen2 fgen2(.up, .uz, .C(CNext), .U, .UM, .F); + // Divisor multiple always_comb if (up) Dsel = DBar; else if (uz) Dsel = '0; // qz @@ -72,7 +74,13 @@ module fdivsqrtstage2 ( // WSA, WCA = WS + WC - qD assign AddIn = SqrtM ? F : Dsel; csa #(`DIVb+4) csa(WS, WC, AddIn, up&~SqrtM, WSA, WCA); + assign WSNext = WSA << 1; + assign WCNext = WCA << 1; + // Shift thermometer code C + assign CNext = {1'b1, C[`DIVb+1:1]}; + + // Unified On-The-Fly Converter to accumulate result fdivsqrtuotfc2 uotfc2(.up, .uz, .C(CNext), .U, .UM, .UNext, .UMNext); endmodule diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv index e463762a..05792293 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -30,7 +30,6 @@ `include "wally-config.vh" -/* verilator lint_off UNOPTFLAT */ module fdivsqrtstage4 ( input logic [`DIVN-2:0] D, input logic [`DIVb+3:0] DBar, D2, DBar2, @@ -41,17 +40,18 @@ module fdivsqrtstage4 ( input logic SqrtM, j1, output logic un, output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSA, WCA + output logic [`DIVb+3:0] WSNext, WCNext ); - /* verilator lint_on UNOPTFLAT */ logic [`DIVb+3:0] Dsel; logic [3:0] udigit; logic [`DIVb+3:0] F; logic [`DIVb+3:0] AddIn; logic [4:0] Smsbs; + logic [2:0] Dmsbs; + logic [7:0] WCmsbs, WSmsbs; logic CarryIn; - assign CNext = {2'b11, C[`DIVb+1:2]}; + logic [`DIVb+3:0] WSA, WCA; // Digit Selection logic // u encoding: @@ -61,28 +61,40 @@ module fdivsqrtstage4 ( // 0010 = -1 // 0001 = -2 assign Smsbs = U[`DIVb:`DIVb-4]; - fdivsqrtqsel4 qsel4(.D, .Smsbs, .WS, .WC, .Sqrt(SqrtM), .j1, .udigit); + assign Dmsbs = D[`DIVN-2:`DIVN-4]; + assign WCmsbs = WC[`DIVb+3:`DIVb-4]; + assign WSmsbs = WS[`DIVb+3:`DIVb-4]; + + fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .Sqrt(SqrtM), .j1, .udigit); + assign un = 0; // unused for radix 4 + + // F generation logic fdivsqrtfgen4 fgen4(.udigit, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F); + // Divisor multiple logic always_comb - case (udigit) - 4'b1000: Dsel = DBar2; - 4'b0100: Dsel = DBar; - 4'b0000: Dsel = '0; - 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; - 4'b0001: Dsel = D2; - default: Dsel = 'x; - endcase + case (udigit) + 4'b1000: Dsel = DBar2; + 4'b0100: Dsel = DBar; + 4'b0000: Dsel = '0; + 4'b0010: Dsel = {3'b0, 1'b1, D, {`DIVb-`DIVN+1{1'b0}}}; + 4'b0001: Dsel = D2; + default: Dsel = 'x; + endcase - // Partial Product Generation - // WSA, WCA = WS + WC - qD + // Residual Update + // {WS, WC}}Next = (WS + WC - qD or F) << 2 assign AddIn = SqrtM ? F : Dsel; assign CarryIn = ~SqrtM & (udigit[3] | udigit[2]); // +1 for 2's complement of -D and -2D csa #(`DIVb+4) csa(WS, WC, AddIn, CarryIn, WSA, WCA); - - fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); + assign WSNext = WSA << 2; + assign WCNext = WCA << 2; - assign un = 0; // unused for radix 4 + // Shift thermometer code C + assign CNext = {2'b11, C[`DIVb+1:2]}; + + // On-the-fly converter to accumulate result + fdivsqrtuotfc4 fdivsqrtuotfc4(.udigit, .Sqrt(SqrtM), .C(CNext[`DIVb:0]), .U, .UM, .UNext, .UMNext); endmodule diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 8bda30b0..a011a9e8 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -91,7 +91,7 @@ module ifu ( logic [`XLEN-1:0] PCPlus2or4F, PCLinkD; logic [`XLEN-3:0] PCPlusUpperF; logic CompressedF; - logic [31:0] InstrRawD, InstrRawF; + logic [31:0] InstrRawD, InstrRawF, IROMInstrF, ICacheInstrF; logic [31:0] FinalInstrRawF; logic [1:0] IFURWF; @@ -118,6 +118,8 @@ module ifu ( // branch predictor signal logic [`XLEN-1:0] PCNext1F, PCNext2F, PCNext0F; logic BusCommittedF, CacheCommittedF; + logic SelIROM; + assign PCFExt = {2'b00, PCFSpill}; @@ -128,7 +130,7 @@ module ifu ( if(`C_SUPPORTED) begin : SpillSupport - spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .PCF, .PCPlusUpperF, .PCNextF, .InstrRawF, + spillsupport #(`ICACHE) spillsupport(.clk, .reset, .StallF, .PCF, .PCPlusUpperF, .PCNextF, .InstrRawF(InstrRawF), .InstrDAPageFaultF, .IFUCacheBusStallF, .ITLBMissF, .PCNextFSpill, .PCFSpill, .SelNextSpillF, .PostSpillInstrRawF, .CompressedF); end else begin : NoSpillSupport @@ -166,7 +168,7 @@ module ifu ( .TLBFlush, .PhysicalAddress(PCPF), .TLBMiss(ITLBMissF), - .Cacheable(CacheableF), .Idempotent(), .AtomicAllowed(), + .Cacheable(CacheableF), .Idempotent(), .AtomicAllowed(), .SelTIM(SelIROM), .InstrAccessFaultF, .LoadAccessFaultM(), .StoreAmoAccessFaultM(), .InstrPageFaultF, .LoadPageFaultM(), .StoreAmoPageFaultM(), .LoadMisalignedFaultM(), .StoreAmoMisalignedFaultM(), @@ -178,6 +180,7 @@ module ifu ( assign {ITLBMissF, InstrAccessFaultF, InstrPageFaultF, InstrDAPageFaultF} = '0; assign PCPF = PCFExt[`PA_BITS-1:0]; assign CacheableF = '1; + assign SelIROM = '0; end //////////////////////////////////////////////////////////////////////////////////////////////// @@ -190,13 +193,13 @@ module ifu ( // delay the interrupt until the LSU is in a clean state. assign CommittedF = CacheCommittedF | BusCommittedF; -// logic [`XLEN-1:0] InstrRawF; -// assign InstrRawF = InstrRawF[31:0]; + logic IgnoreRequest; + assign IgnoreRequest = ITLBMissF | TrapM; // The IROM uses untranslated addresses, so it is not compatible with virtual memory. if (`IROM_SUPPORTED) begin : irom assign IFURWF = 2'b10; - irom irom(.clk, .reset, .ce(~CPUBusy), .Adr(PCNextFSpill[`XLEN-1:0]), .ReadData(FinalInstrRawF)); + irom irom(.clk, .reset, .ce(~CPUBusy | reset), .Adr(PCNextFSpill[`XLEN-1:0]), .ReadData(IROMInstrF)); end else begin assign IFURWF = 2'b10; @@ -211,10 +214,10 @@ module ifu ( logic ICacheBusAck; logic SelUncachedAdr; logic [1:0] CacheBusRW, BusRW; - logic IgnoreRequest; + - assign IgnoreRequest = ITLBMissF | TrapM; - assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{CacheableF, CacheableF}; + //assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{CacheableF, CacheableF} & ~{SelIROM, SelIROM}; + assign BusRW = ~IgnoreRequest & ~CacheableF & ~SelIROM ? IFURWF : '0; cache #(.LINELEN(`ICACHE_LINELENINBITS), .NUMLINES(`ICACHE_WAYSIZEINBYTES*8/`ICACHE_LINELENINBITS), .NUMWAYS(`ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .DCACHE(0)) @@ -222,7 +225,7 @@ module ifu ( .FetchBuffer, .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .CacheBusRW, - .ReadDataWord(FinalInstrRawF), + .ReadDataWord(ICacheInstrF), .Cacheable(CacheableF), .SelReplay('0), .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), @@ -244,20 +247,24 @@ module ifu ( .BusRW, .CPUBusy, .BusStall, .BusCommitted(BusCommittedF)); - mux2 #(32) UnCachedDataMux(.d0(FinalInstrRawF), .d1(FetchBuffer[32-1:0]), - .s(SelUncachedAdr), .y(InstrRawF[31:0])); + mux3 #(32) UnCachedDataMux(.d0(ICacheInstrF), .d1(FetchBuffer[32-1:0]), .d2(IROMInstrF), + .s({SelIROM, SelUncachedAdr}), .y(InstrRawF[31:0])); end else begin : passthrough assign IFUHADDR = PCPF; logic CaptureEn; + logic [31:0] FetchBuffer; logic [1:0] BusRW; - assign BusRW = IFURWF & ~{ITLBMissF, ITLBMissF} & ~{TrapM, TrapM}; + assign BusRW = ~IgnoreRequest & ~SelIROM ? IFURWF : '0; +// assign BusRW = IFURWF & ~{IgnoreRequest, IgnoreRequest} & ~{SelIROM, SelIROM}; assign IFUHSIZE = 3'b010; ahbinterface #(0) ahbinterface(.HCLK(clk), .HRESETn(~reset), .HREADY(IFUHREADY), .HRDATA(HRDATA), .HTRANS(IFUHTRANS), .HWRITE(IFUHWRITE), .HWDATA(), .HWSTRB(), .BusRW, .ByteMask(), .WriteData('0), - .CPUBusy, .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(InstrRawF[31:0])); + .CPUBusy, .BusStall, .BusCommitted(BusCommittedF), .FetchBuffer(FetchBuffer)); + if(`IROM_SUPPORTED) mux2 #(32) UnCachedDataMux2(FetchBuffer, IROMInstrF, SelIROM, InstrRawF); + else assign InstrRawF = FetchBuffer; assign IFUHBURST = 3'b0; assign {ICacheFetchLine, ICacheStallF, FinalInstrRawF} = '0; assign {ICacheMiss, ICacheAccess} = '0; @@ -265,7 +272,7 @@ module ifu ( end else begin : nobus // block: bus assign BusStall = '0; assign {ICacheStallF, ICacheMiss, ICacheAccess} = '0; - assign InstrRawF = FinalInstrRawF; + assign InstrRawF = IROMInstrF; end assign IFUCacheBusStallF = ICacheStallF | BusStall; diff --git a/pipelined/src/ifu/irom.sv b/pipelined/src/ifu/irom.sv index f136e64f..af262ba8 100644 --- a/pipelined/src/ifu/irom.sv +++ b/pipelined/src/ifu/irom.sv @@ -36,8 +36,17 @@ module irom( ); localparam ADDR_WDITH = $clog2(`IROM_RANGE/8); - localparam OFFSET = $clog2(`LLEN/8); + localparam OFFSET = $clog2(`XLEN/8); - rom1p1r #(ADDR_WDITH, 32) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadData)); + logic [`XLEN-1:0] ReadDataFull; + + rom1p1r #(ADDR_WDITH, `XLEN) rom(.clk, .ce, .addr(Adr[ADDR_WDITH+OFFSET-1:OFFSET]), .dout(ReadDataFull)); + if (`XLEN == 32) assign ReadData = ReadDataFull; + // have to delay Ardr[OFFSET-1] by 1 cycle + else begin + logic AdrD; + flopen #(1) AdrReg(clk, ce, Adr[OFFSET-1], AdrD); + assign ReadData = AdrD ? ReadDataFull[63:32] : ReadDataFull[31:0]; + end endmodule diff --git a/pipelined/src/lsu/bigendianswap.sv b/pipelined/src/lsu/endianswap.sv similarity index 100% rename from pipelined/src/lsu/bigendianswap.sv rename to pipelined/src/lsu/endianswap.sv diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 0d08e3fa..d7bfa9d1 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -117,6 +117,7 @@ module lsu ( logic [`LLEN-1:0] ReadDataM; logic [(`LLEN-1)/8:0] ByteMaskM; logic SelReplay; + logic SelDTIM; flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); assign IEUAdrExtM = {2'b00, IEUAdrM}; @@ -153,7 +154,6 @@ module lsu ( assign CommittedM = SelHPTW | DCacheCommittedM | BusCommittedM; // MMU and Misalignment fault logic required if privileged unit exists - // *** DH: This is too strong a requirement. Separate MMU in `VIRTMEM_SUPPORTED from simpler faults in `ZICSR_SUPPORTED if(`ZICSR_SUPPORTED == 1) begin : dmmu logic DisableTranslation; assign DisableTranslation = SelHPTW | FlushDCacheM; @@ -168,7 +168,7 @@ module lsu ( .TLBFlush(sfencevmaM), .PhysicalAddress(PAdrM), .TLBMiss(DTLBMissM), - .Cacheable(CacheableM), .Idempotent(), .AtomicAllowed(), + .Cacheable(CacheableM), .Idempotent(), .AtomicAllowed(), .SelTIM(SelDTIM), .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAmoAccessFaultM, .InstrPageFaultF(),.LoadPageFaultM, .StoreAmoPageFaultM, .LoadMisalignedFaultM, .StoreAmoMisalignedFaultM, // *** these faults need to be supressed during hptw. @@ -190,8 +190,10 @@ module lsu ( assign {DTLBMissM, LoadAccessFaultM, StoreAmoAccessFaultM, LoadMisalignedFaultM, StoreAmoMisalignedFaultM} = '0; assign {LoadPageFaultM, StoreAmoPageFaultM} = '0; - assign PAdrM = IHAdrM; + assign PAdrM = IHAdrM[`PA_BITS-1:0]; assign CacheableM = '1; + assign SelDTIM = `DTIM_SUPPORTED & ~`BUS; // if no pma then select dtim if there is a DTIM. If there is + // a bus then this is always 0. Cannot have both without PMA. end ///////////////////////////////////////////////////////////////////////////////////////////// @@ -200,51 +202,56 @@ module lsu ( ///////////////////////////////////////////////////////////////////////////////////////////// logic [`LLEN-1:0] LSUWriteDataM, LittleEndianWriteDataM; logic [`LLEN-1:0] ReadDataWordM, LittleEndianReadDataWordM; - logic [`LLEN-1:0] ReadDataWordMuxM; + logic [`LLEN-1:0] ReadDataWordMuxM, DTIMReadDataWordM, DCacheReadDataWordM; logic IgnoreRequest; assign IgnoreRequest = IgnoreRequestTLB | TrapM; if (`DTIM_SUPPORTED) begin : dtim logic [`PA_BITS-1:0] DTIMAdr; - + logic [1:0] DTIMMemRWM; + // The DTIM uses untranslated addresses, so it is not compatible with virtual memory. - assign DTIMAdr = MemRWM[0] ? IEUAdrExtM : IEUAdrExtE; // zero extend or contract to PA_BITS - dtim dtim(.clk, .reset, .ce(~CPUBusy), .MemRWM, + assign DTIMAdr = MemRWM[0] ? IEUAdrExtM[`PA_BITS-1:0] : IEUAdrExtE[`PA_BITS-1:0]; // zero extend or contract to PA_BITS + assign DTIMMemRWM = SelDTIM & ~IgnoreRequest ? LSURWM : '0; + dtim dtim(.clk, .reset, .ce(~CPUBusy), .MemRWM(DTIMMemRWM), .Adr(DTIMAdr), .TrapM, .WriteDataM(LSUWriteDataM), - .ReadDataWordM(ReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0])); + .ReadDataWordM(DTIMReadDataWordM[`XLEN-1:0]), .ByteMaskM(ByteMaskM[`XLEN/8-1:0])); end else begin end if (`BUS) begin : bus - localparam integer WORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`XLEN : 1; - localparam integer LOGBWPL = `DCACHE ? $clog2(WORDSPERLINE) : 1; + localparam integer LLENWORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`LLEN : 1; + localparam integer LLENLOGBWPL = `DCACHE ? $clog2(LLENWORDSPERLINE) : 1; + localparam integer AHBWWORDSPERLINE = `DCACHE ? `DCACHE_LINELENINBITS/`AHBW : 1; + localparam integer AHBWLOGBWPL = `DCACHE ? $clog2(AHBWWORDSPERLINE) : 1; if(`DCACHE) begin : dcache localparam integer LINELEN = `DCACHE ? `DCACHE_LINELENINBITS : `XLEN; logic [LINELEN-1:0] FetchBuffer; logic [`PA_BITS-1:0] DCacheBusAdr; logic DCacheWriteLine; logic DCacheFetchLine; - logic [LOGBWPL-1:0] WordCount; + logic [AHBWLOGBWPL-1:0] WordCount; logic SelUncachedAdr, DCacheBusAck; logic SelBusWord; logic [`XLEN-1:0] PreHWDATA; //*** change name logic [`XLEN/8-1:0] ByteMaskMDelay; logic [1:0] CacheBusRW, BusRW; + localparam integer LLENPOVERAHBW = `LLEN / `AHBW; - assign BusRW = LSURWM & ~{IgnoreRequest, IgnoreRequest} & ~{CacheableM, CacheableM}; + assign BusRW = ~CacheableM & ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), - .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`XLEN), .DCACHE(1)) dcache( + .NUMWAYS(`DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(`LLEN), .MUXINTERVAL(`LLEN), .DCACHE(1)) dcache( .clk, .reset, .CPUBusy, .SelBusWord, .RW(LSURWM), .Atomic(LSUAtomicM), .FlushCache(FlushDCacheM), .NextAdr(IEUAdrE[11:0]), .PAdr(PAdrM), - .ByteMask(ByteMaskM), .WordCount, + .ByteMask(ByteMaskM), .WordCount(WordCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]), .FinalWriteData(LSUWriteDataM), .Cacheable(CacheableM), .SelReplay, .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequestTLB, .TrapM, .CacheCommitted(DCacheCommittedM), - .CacheBusAdr(DCacheBusAdr), .ReadDataWord(ReadDataWordM), + .CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM), .FetchBuffer, .CacheBusRW, .CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0)); - ahbcacheinterface #(WORDSPERLINE, LINELEN, LOGBWPL, `DCACHE) ahbcacheinterface( + ahbcacheinterface #(.WORDSPERLINE(AHBWWORDSPERLINE), .LINELEN(LINELEN), .LOGWPL(AHBWLOGBWPL), .CACHE_ENABLED(`DCACHE)) ahbcacheinterface( .HCLK(clk), .HRESETn(~reset), .HRDATA, .HSIZE(LSUHSIZE), .HBURST(LSUHBURST), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HREADY(LSUHREADY), @@ -254,24 +261,40 @@ module lsu ( .SelUncachedAdr, .BusRW, .CPUBusy, .BusStall, .BusCommitted(BusCommittedM)); - mux2 #(`LLEN) UnCachedDataMux(.d0(ReadDataWordM), .d1({{`LLEN-`XLEN{1'b0}}, FetchBuffer[`XLEN-1:0] }), - .s(SelUncachedAdr), .y(ReadDataWordMuxM)); - mux2 #(`XLEN) LSUHWDATAMux(.d0(ReadDataWordM[`XLEN-1:0]), .d1(LSUWriteDataM[`XLEN-1:0]), + // FetchBuffer[`AHBW-1:0] needs to be duplicated LLENPOVERAHBW times. + // DTIMReadDataWordM should be increased to LLEN. + mux3 #(`LLEN) UnCachedDataMux(.d0(DCacheReadDataWordM), .d1({LLENPOVERAHBW{FetchBuffer[`XLEN-1:0]}}), + .d2({{`LLEN-`XLEN{1'b0}}, DTIMReadDataWordM[`XLEN-1:0]}), + .s({SelDTIM, SelUncachedAdr}), .y(ReadDataWordMuxM)); + + // When AHBW is less than LLEN need extra muxes to select the subword from cache's read data. + logic [`AHBW-1:0] DCacheReadDataWordAHB; + if(LLENPOVERAHBW > 1) begin + logic [`AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0]; + genvar index; + for (index = 0; index < LLENPOVERAHBW; index++) begin:readdatalinesetsmux + assign AHBWordSets[index] = DCacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)]; + end + assign DCacheReadDataWordAHB = AHBWordSets[WordCount[$clog2(LLENPOVERAHBW)-1:0]]; + end else assign DCacheReadDataWordAHB = DCacheReadDataWordM[`AHBW-1:0]; + mux2 #(`XLEN) LSUHWDATAMux(.d0(DCacheReadDataWordAHB), .d1(LSUWriteDataM[`AHBW-1:0]), .s(SelUncachedAdr), .y(PreHWDATA)); - flopen #(`XLEN) wdreg(clk, LSUHREADY, PreHWDATA, LSUHWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN + flopen #(`AHBW) wdreg(clk, LSUHREADY, PreHWDATA, LSUHWDATA); // delay HWDATA by 1 cycle per spec - // *** bummer need a second byte mask for bus as it is XLEN rather than LLEN. + // *** bummer need a second byte mask for bus as it is AHBW rather than LLEN. // probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0. - logic [`XLEN/8-1:0] BusByteMaskM; - swbytemask #(`XLEN) busswbytemask(.Size(LSUHSIZE), .Adr(PAdrM[$clog2(`XLEN/8)-1:0]), .ByteMask(BusByteMaskM)); + logic [`AHBW/8-1:0] BusByteMaskM; + swbytemask #(`AHBW) busswbytemask(.Size(LSUHSIZE), .Adr(PAdrM[$clog2(`AHBW/8)-1:0]), .ByteMask(BusByteMaskM)); - flop #(`XLEN/8) HWSTRBReg(clk, BusByteMaskM[`XLEN/8-1:0], LSUHWSTRB); + flop #(`AHBW/8) HWSTRBReg(clk, BusByteMaskM[`AHBW/8-1:0], LSUHWSTRB); end else begin : passthrough // just needs a register to hold the value from the bus logic CaptureEn; logic [1:0] BusRW; - assign BusRW = LSURWM & ~{IgnoreRequest, IgnoreRequest}; + logic [`XLEN-1:0] FetchBuffer; + assign BusRW = ~IgnoreRequest & ~SelDTIM ? LSURWM : '0; +// assign BusRW = LSURWM & ~{IgnoreRequest, IgnoreRequest} & ~{SelDTIM, SelDTIM}; assign LSUHADDR = PAdrM; assign LSUHSIZE = LSUFunct3M; @@ -279,15 +302,16 @@ module lsu ( ahbinterface #(1) ahbinterface(.HCLK(clk), .HRESETn(~reset), .HREADY(LSUHREADY), .HRDATA(HRDATA), .HTRANS(LSUHTRANS), .HWRITE(LSUHWRITE), .HWDATA(LSUHWDATA), .HWSTRB(LSUHWSTRB), .BusRW, .ByteMask(ByteMaskM), .WriteData(LSUWriteDataM), - .CPUBusy, .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(ReadDataWordM)); - - assign ReadDataWordMuxM = ReadDataWordM; // from byte swapping + .CPUBusy, .BusStall, .BusCommitted(BusCommittedM), .FetchBuffer(FetchBuffer)); + + if(`DTIM_SUPPORTED) mux2 #(`XLEN) ReadDataMux2(FetchBuffer, DTIMReadDataWordM, SelDTIM, ReadDataWordMuxM); + else assign ReadDataWordMuxM = FetchBuffer[`XLEN-1:0]; assign LSUHBURST = 3'b0; assign {DCacheStallM, DCacheCommittedM, DCacheMiss, DCacheAccess} = '0; end end else begin: nobus // block: bus assign LSUHWDATA = '0; - assign ReadDataWordMuxM = ReadDataWordM; + assign ReadDataWordMuxM = DTIMReadDataWordM; assign {BusStall, BusCommittedM} = '0; assign {DCacheMiss, DCacheAccess} = '0; assign {DCacheStallM, DCacheCommittedM} = '0; @@ -311,9 +335,6 @@ module lsu ( ///////////////////////////////////////////////////////////////////////////////////////////// // Subword Accesses ///////////////////////////////////////////////////////////////////////////////////////////// - // *** Ross Thompson: I think swr needs to be modified to support bigendian. Both the subword - // selected and the sign extension are probably wrong. I think it should be an invertion of - // the address bits and a different bit selected for extension. subwordread subwordread(.ReadDataWordMuxM(LittleEndianReadDataWordM), .PAdrM(PAdrM[2:0]), .BigEndianM, .FpLoadStoreM, .Funct3M(LSUFunct3M), .ReadDataM); subwordwrite subwordwrite(.LSUFunct3M, .IMAFWriteDataM, .LittleEndianWriteDataM); @@ -332,6 +353,7 @@ module lsu ( // hart works little-endian internally // swap the bytes when read from big-endian memory ///////////////////////////////////////////////////////////////////////////////////////////// + if (`BIGENDIAN_SUPPORTED) begin:endian endianswap #(`LLEN) storeswap(.BigEndianM, .a(LittleEndianWriteDataM), .y(LSUWriteDataM)); endianswap #(`LLEN) loadswap(.BigEndianM, .a(ReadDataWordMuxM), .y(LittleEndianReadDataWordM)); diff --git a/pipelined/src/lsu/subwordread.sv b/pipelined/src/lsu/subwordread.sv index 8d5ed4de..61034574 100644 --- a/pipelined/src/lsu/subwordread.sv +++ b/pipelined/src/lsu/subwordread.sv @@ -47,7 +47,7 @@ module subwordread // Funct3M[1:0] is the size of the memory access. assign PAdrSwap = PAdrM ^ {3{BigEndianM}}; - if (`XLEN == 64) begin:swrmux + if (`LLEN == 64) begin:swrmux // ByteMe mux always_comb case(PAdrSwap[2:0]) @@ -85,19 +85,10 @@ module subwordread always_comb case(Funct3M) 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: if(`ZFH_SUPPORTED) - ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh - 3'b010: if(`F_SUPPORTED) - ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw - else ReadDataM = {{`LLEN-32{WordM[31]}}, WordM[31:0]}; // lw - 3'b011: if(`D_SUPPORTED) - ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld - else ReadDataM = {{`LLEN-64{DblWordM[63]}}, DblWordM[63:0]}; // ld/fld - 3'b100: if(`Q_SUPPORTED) - ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq - else - ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu + 3'b001: ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{`LLEN-32{WordM[31]|FpLoadStoreM}}, WordM[31:0]}; // lw/flw + 3'b011: ReadDataM = {{`LLEN-64{DblWordM[63]|FpLoadStoreM}}, DblWordM[63:0]}; // ld/fld + 3'b100: ReadDataM = FpLoadStoreM ? ReadDataWordMuxM : {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu/flq 3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu 3'b110: ReadDataM = {{`LLEN-32{1'b0}}, WordM[31:0]}; // lwu default: ReadDataM = ReadDataWordMuxM; // Shouldn't happen @@ -124,12 +115,8 @@ module subwordread always_comb case(Funct3M) 3'b000: ReadDataM = {{`LLEN-8{ByteM[7]}}, ByteM}; // lb - 3'b001: if(`ZFH_SUPPORTED) - ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh - else ReadDataM = {{`LLEN-16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh - 3'b010: if(`F_SUPPORTED) - ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw - else ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; // lw + 3'b001: ReadDataM = {{`LLEN-16{HalfwordM[15]|FpLoadStoreM}}, HalfwordM[15:0]}; // lh/flh + 3'b010: ReadDataM = {{`LLEN-32{ReadDataWordMuxM[31]|FpLoadStoreM}}, ReadDataWordMuxM[31:0]}; // lw/flw 3'b011: ReadDataM = ReadDataWordMuxM; // fld 3'b100: ReadDataM = {{`LLEN-8{1'b0}}, ByteM[7:0]}; // lbu 3'b101: ReadDataM = {{`LLEN-16{1'b0}}, HalfwordM[15:0]}; // lhu diff --git a/pipelined/src/mmu/mmu.sv b/pipelined/src/mmu/mmu.sv index dbf23e98..da90ee2c 100644 --- a/pipelined/src/mmu/mmu.sv +++ b/pipelined/src/mmu/mmu.sv @@ -66,7 +66,7 @@ module mmu #(parameter TLB_ENTRIES = 8, // number of TLB Entries // Physical address outputs output logic [`PA_BITS-1:0] PhysicalAddress, output logic TLBMiss, - output logic Cacheable, Idempotent, AtomicAllowed, + output logic Cacheable, Idempotent, AtomicAllowed, SelTIM, // Faults output logic InstrAccessFaultF, LoadAccessFaultM, StoreAmoAccessFaultM, @@ -126,7 +126,7 @@ module mmu #(parameter TLB_ENTRIES = 8, // number of TLB Entries pmachecker pmachecker(.PhysicalAddress, .Size, .AtomicAccessM, .ExecuteAccessF, .WriteAccessM, .ReadAccessM, - .Cacheable, .Idempotent, .AtomicAllowed, + .Cacheable, .Idempotent, .AtomicAllowed, .SelTIM, .PMAInstrAccessFaultF, .PMALoadAccessFaultM, .PMAStoreAmoAccessFaultM); pmpchecker pmpchecker(.PhysicalAddress, .PrivilegeModeW, diff --git a/pipelined/src/mmu/pmachecker.sv b/pipelined/src/mmu/pmachecker.sv index 455f510d..df6eb271 100644 --- a/pipelined/src/mmu/pmachecker.sv +++ b/pipelined/src/mmu/pmachecker.sv @@ -38,7 +38,7 @@ module pmachecker ( input logic [`PA_BITS-1:0] PhysicalAddress, input logic [1:0] Size, input logic AtomicAccessM, ExecuteAccessF, WriteAccessM, ReadAccessM, // *** atomicaccessM is unused but might want to stay in for future use. - output logic Cacheable, Idempotent, AtomicAllowed, + output logic Cacheable, Idempotent, AtomicAllowed, SelTIM, output logic PMAInstrAccessFaultF, output logic PMALoadAccessFaultM, output logic PMAStoreAmoAccessFaultM @@ -60,6 +60,7 @@ module pmachecker ( assign Cacheable = SelRegions[8] | SelRegions[7] | SelRegions[6]; assign Idempotent = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[6]; assign AtomicAllowed = SelRegions[10] | SelRegions[9] | SelRegions[8] | SelRegions[6]; + assign SelTIM = SelRegions[10] | SelRegions[9]; // Detect access faults assign PMAAccessFault = (SelRegions[0]) & AccessRWX; diff --git a/pipelined/src/privileged/trap.sv b/pipelined/src/privileged/trap.sv index 2c3c0f21..03994491 100644 --- a/pipelined/src/privileged/trap.sv +++ b/pipelined/src/privileged/trap.sv @@ -50,6 +50,8 @@ module trap ( logic MIntGlobalEnM, SIntGlobalEnM; logic ExceptionM; + logic Committed; + (* mark_debug = "true" *) logic [11:0] PendingIntsM, ValidIntsM; /////////////////////////////////////////// @@ -62,8 +64,9 @@ module trap ( assign SIntGlobalEnM = (PrivilegeModeW == `U_MODE) | ((PrivilegeModeW == `S_MODE) & STATUS_SIE); // if in lower priv mode, or if S ints enabled and not in higher priv mode 3.1.9 assign PendingIntsM = MIP_REGW & MIE_REGW; assign IntPendingM = |PendingIntsM; - assign ValidIntsM = {12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW; - assign InterruptM = (|ValidIntsM) && InstrValidM && ~(CommittedM | CommittedF); // *** RT. CommittedM is a temporary hack to prevent integer division from having an interrupt during divide. + assign Committed = CommittedM | CommittedF; + assign ValidIntsM = {12{~Committed}} & ({12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW); + assign InterruptM = (|ValidIntsM) && InstrValidM && ~Committed; // suppress interrupt if the memory system has partially processed a request. assign DelegateM = `S_SUPPORTED & (InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM]) & (PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE); diff --git a/pipelined/src/wally/wallypipelinedcore.sv b/pipelined/src/wally/wallypipelinedcore.sv index 0f61f452..277ca426 100644 --- a/pipelined/src/wally/wallypipelinedcore.sv +++ b/pipelined/src/wally/wallypipelinedcore.sv @@ -414,5 +414,6 @@ module wallypipelinedcore ( assign FDivBusyE = 0; assign IllegalFPUInstrM = 1; assign SetFflagsM = 0; + assign FpLoadStoreM = 0; end endmodule diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 61e76ff1..b6c7c790 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1818,7 +1818,8 @@ string imperas32f[] = '{ "rv64i_m/privilege/src/WALLY-trap-sret-01.S", "rv64i_m/privilege/src/WALLY-trap-u-01.S", "rv64i_m/privilege/src/WALLY-wfi-01.S", - "rv64i_m/privilege/src/WALLY-endianness-01.S" + "rv64i_m/privilege/src/WALLY-endianness-01.S", + "rv64i_m/privilege/src/WALLY-status-xlen-01.S" }; string wally64periph[] = '{ @@ -1896,7 +1897,8 @@ string imperas32f[] = '{ "rv32i_m/privilege/src/WALLY-trap-s-01.S", "rv32i_m/privilege/src/WALLY-trap-sret-01.S", "rv32i_m/privilege/src/WALLY-trap-u-01.S", - "rv32i_m/privilege/src/WALLY-wfi-01.S" + "rv32i_m/privilege/src/WALLY-wfi-01.S", + "rv32i_m/privilege/src/WALLY-endianness-01.S" }; string wally32periph[] = '{ diff --git a/setup.sh b/setup.sh index b3f9fb11..2ae92f1b 100755 --- a/setup.sh +++ b/setup.sh @@ -31,7 +31,7 @@ export PATH=/cad/mentor/questa_sim-2022.1_1/questasim/bin:$PATH # Change this export PATH=/cad/mentor/questa_sim-2021.2_1/questasim/bin:$PATH # Change this for your path to Modelsim, or delete export MGLS_LICENSE_FILE=1717@solidworks.eng.hmc.edu # Change this to your Siemens license server export PATH=/cad/synopsys/SYN/bin:$PATH # Change this for your path to Design Compiler -export SNPSLMD_LICENSE_FILE=27020@134.173.38.214 +export SNPSLMD_LICENSE_FILE=27020@134.173.38.184 # Change this to your license manager file # Imperas; put this in if you are using it #export PATH=$RISCV/imperas-riscv-tests/riscv-ovpsim-plus/bin/Linux64:$PATH diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-endianness-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-endianness-01.S index e51a6b65..c13dde25 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-endianness-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-endianness-01.S @@ -58,7 +58,7 @@ addi t1, t1, 4 addi a6, a6, 4 li x28, 0x20 -csrs mstatush, x28 // turn on big endianness for M mode +csrs 0x310, x28 // turn on big endianness for M mode // using '0x310' instead of mstatush because GCC doesn't recognize just mstatush // M mode Big endianness tests // In big endian modes, all values are sign extended to the right, rather than left @@ -83,7 +83,7 @@ addi t1, t1, 4 addi a6, a6, 4 li x28, 0x20 -csrc mstatush, x28 // Turn off big endianness for M mode before going into the trap handler +csrc 0x310, x28 // Turn off big endianness for M mode before going into the trap handler // using '0x310' instead of mstatush because GCC doesn't recognize just mstatush GOTO_S_MODE @@ -91,7 +91,7 @@ GOTO_S_MODE li x28, 0xAABBCCDD li x29, 0x8000F000 -sd x28, 0(x29) // value stored in memory as 0xAABBCCDD +sw x28, 0(x29) // value stored in memory as 0xAABBCCDD lw x30, 0(x29) // test load word, should read out 0xAABBCCDD sw x30, 0(t1) // test store word, should save 0xAABBCCDD @@ -111,7 +111,7 @@ addi a6, a6, 4 GOTO_M_MODE // Go back to M mode to be able to toggle SBE bit of mstatus li x28, 0x10 -csrs mstatush, x28 // turn on big endianness for S mode +csrs 0x310, x28 // turn on big endianness for S mode // using '0x310' instead of mstatush because GCC doesn't recognize just mstatush GOTO_S_MODE @@ -119,7 +119,7 @@ GOTO_S_MODE li x28, 0xAABBCCDD li x29, 0x8000F000 -sd x28, 0(x29) // value stored in memory as 0xDDCCBBAA +sw x28, 0(x29) // value stored in memory as 0xDDCCBBAA lw x30, 0(x29) // test load word, should read out 0xAABBCCDD sw x30, 0(t1) // test store word, should save 0xDDCCBBAA @@ -142,7 +142,7 @@ GOTO_U_MODE li x28, 0xAABBCCDD li x29, 0x8000F000 -sd x28, 0(x29) // value stored in memory as 0xAABBCCDD +sw x28, 0(x29) // value stored in memory as 0xAABBCCDD lw x30, 0(x29) // test load word, should read out 0xAABBCCDD sw x30, 0(t1) // test store word, should save 0xAABBCCDD @@ -170,7 +170,7 @@ GOTO_U_MODE li x28, 0xAABBCCDD li x29, 0x8000F000 -sd x28, 0(x29) // value stored in memory as 0xDDCCBBAA +sw x28, 0(x29) // value stored in memory as 0xDDCCBBAA lw x30, 0(x29) // test load word, should read out 0xAABBCCDD sw x30, 0(t1) // test store word, should save 0xDDCCBBAA @@ -187,6 +187,9 @@ sb x30, 0(t1) // test store byte, should save 0xAA addi t1, t1, 4 addi a6, a6, 4 +//store_location: +//.fill + END_TESTS TEST_STACK_AND_DATA \ No newline at end of file